ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,295 @@
1
+ """False Discovery Rate (FDR) and Family-Wise Error Rate (FWER) corrections.
2
+
3
+ This module implements multiple testing corrections:
4
+ - Benjamini-Hochberg FDR (1995): Controls expected proportion of false discoveries
5
+ - Holm-Bonferroni FWER (1979): Controls probability of any false discovery
6
+
7
+ These methods are essential when testing multiple hypotheses simultaneously,
8
+ which is common in quantitative finance (testing many strategies, factors, etc.).
9
+ """
10
+
11
+ from collections.abc import Sequence
12
+ from typing import TYPE_CHECKING, Any, Union
13
+
14
+ import numpy as np
15
+
16
+ if TYPE_CHECKING:
17
+ from numpy.typing import NDArray
18
+
19
+
20
+ def benjamini_hochberg_fdr(
21
+ p_values: Sequence[float],
22
+ alpha: float = 0.05,
23
+ return_details: bool = False,
24
+ ) -> Union["NDArray[Any]", dict[str, Any]]:
25
+ """Apply Benjamini-Hochberg False Discovery Rate correction.
26
+
27
+ Controls the False Discovery Rate (FDR) - the expected proportion of false
28
+ discoveries among the rejected hypotheses. More powerful than Bonferroni
29
+ correction for multiple hypothesis testing.
30
+
31
+ Based on Benjamini & Hochberg (1995): "Controlling the False Discovery Rate"
32
+
33
+ Parameters
34
+ ----------
35
+ p_values : Sequence[float]
36
+ P-values from multiple hypothesis tests
37
+ alpha : float, default 0.05
38
+ Target FDR level (e.g., 0.05 for 5% FDR)
39
+ return_details : bool, default False
40
+ Whether to return detailed information
41
+
42
+ Returns
43
+ -------
44
+ Union[NDArray, dict]
45
+ If return_details=False: Boolean array of rejected hypotheses
46
+ If return_details=True: dict with 'rejected', 'adjusted_p_values',
47
+ 'critical_values', 'n_rejected'
48
+
49
+ Examples
50
+ --------
51
+ >>> p_values = [0.001, 0.01, 0.03, 0.08, 0.12]
52
+ >>> rejected = benjamini_hochberg_fdr(p_values, alpha=0.05)
53
+ >>> print(f"Rejected: {rejected}")
54
+ Rejected: [ True True True False False]
55
+ """
56
+ p_array = np.array(p_values)
57
+ n = len(p_array)
58
+
59
+ if n == 0:
60
+ if return_details:
61
+ return {
62
+ "rejected": np.array([], dtype=bool),
63
+ "adjusted_p_values": np.array([]),
64
+ "critical_values": np.array([]),
65
+ "n_rejected": 0,
66
+ }
67
+ return np.array([], dtype=bool)
68
+
69
+ # Sort p-values and keep track of original indices
70
+ sorted_indices = np.argsort(p_array)
71
+ sorted_p_values = p_array[sorted_indices]
72
+
73
+ # Calculate critical values: (i/n) * alpha
74
+ critical_values = np.arange(1, n + 1) / n * alpha
75
+
76
+ # Find largest i such that P(i) <= (i/n) * alpha
77
+ # Work backwards from largest p-value
78
+ rejected_sorted = np.zeros(n, dtype=bool)
79
+
80
+ for i in range(n - 1, -1, -1):
81
+ if sorted_p_values[i] <= critical_values[i]:
82
+ # Reject this and all smaller p-values
83
+ rejected_sorted[: i + 1] = True
84
+ break
85
+
86
+ # Map back to original order
87
+ rejected = np.zeros(n, dtype=bool)
88
+ rejected[sorted_indices] = rejected_sorted
89
+
90
+ if not return_details:
91
+ return rejected
92
+
93
+ # Calculate adjusted p-values (step-up method)
94
+ adjusted_p_values = np.zeros(n)
95
+ adjusted_p_values[sorted_indices] = np.minimum.accumulate(
96
+ sorted_p_values[::-1] * n / np.arange(n, 0, -1),
97
+ )[::-1]
98
+
99
+ # Ensure adjusted p-values don't exceed 1
100
+ adjusted_p_values = np.minimum(adjusted_p_values, 1.0)
101
+
102
+ return {
103
+ "rejected": rejected,
104
+ "adjusted_p_values": adjusted_p_values,
105
+ "critical_values": critical_values[sorted_indices],
106
+ "n_rejected": int(np.sum(rejected)),
107
+ }
108
+
109
+
110
+ def holm_bonferroni(
111
+ p_values: Sequence[float],
112
+ alpha: float = 0.05,
113
+ ) -> dict[str, Any]:
114
+ """Holm-Bonferroni step-down procedure for FWER control.
115
+
116
+ Controls the Family-Wise Error Rate (FWER) - the probability of making
117
+ at least one false discovery. More powerful than Bonferroni correction
118
+ while maintaining strong FWER control.
119
+
120
+ Based on Holm (1979): "A Simple Sequentially Rejective Multiple Test Procedure"
121
+
122
+ Parameters
123
+ ----------
124
+ p_values : Sequence[float]
125
+ P-values from multiple hypothesis tests
126
+ alpha : float, default 0.05
127
+ Target FWER significance level
128
+
129
+ Returns
130
+ -------
131
+ dict
132
+ Dictionary with:
133
+ - rejected: list[bool] - Whether each hypothesis is rejected
134
+ - adjusted_p_values: list[float] - Holm-adjusted p-values
135
+ - n_rejected: int - Number of rejections
136
+ - critical_values: list[float] - Holm critical thresholds
137
+
138
+ Notes
139
+ -----
140
+ The Holm procedure is a step-down method:
141
+
142
+ 1. Sort p-values ascending: p_(1) <= p_(2) <= ... <= p_(m)
143
+ 2. For p_(i), compare to alpha / (m - i + 1)
144
+ 3. Reject all hypotheses up to (and including) the last rejection
145
+ 4. Stop at first non-rejection; accept remaining hypotheses
146
+
147
+ This is uniformly more powerful than Bonferroni while controlling FWER.
148
+
149
+ Examples
150
+ --------
151
+ >>> p_values = [0.001, 0.01, 0.03, 0.08, 0.12]
152
+ >>> result = holm_bonferroni(p_values, alpha=0.05)
153
+ >>> print(f"Rejected: {result['rejected']}")
154
+ Rejected: [True, True, False, False, False]
155
+ """
156
+ p_array = np.asarray(p_values, dtype=np.float64)
157
+ m = len(p_array)
158
+
159
+ if m == 0:
160
+ return {
161
+ "rejected": [],
162
+ "adjusted_p_values": [],
163
+ "n_rejected": 0,
164
+ "critical_values": [],
165
+ }
166
+
167
+ # Sort p-values and track original indices
168
+ sorted_indices = np.argsort(p_array)
169
+ sorted_p = p_array[sorted_indices]
170
+
171
+ # Holm critical values: alpha / (m - i + 1) for i = 0, 1, ..., m-1
172
+ # i.e., alpha/m, alpha/(m-1), ..., alpha/1
173
+ critical_values = alpha / (m - np.arange(m))
174
+
175
+ # Step-down procedure: reject while p_(i) <= critical_(i)
176
+ rejected_sorted = sorted_p <= critical_values
177
+
178
+ # Once we fail to reject, accept all remaining
179
+ if not rejected_sorted.all():
180
+ first_fail = np.argmin(rejected_sorted)
181
+ rejected_sorted[first_fail:] = False
182
+
183
+ # Map back to original order
184
+ rejected = np.zeros(m, dtype=bool)
185
+ rejected[sorted_indices] = rejected_sorted
186
+
187
+ # Compute Holm-adjusted p-values
188
+ # adjusted_p_(i) = max_{j <= i} { (m - j + 1) * p_(j) }
189
+ adjusted_sorted = np.maximum.accumulate(sorted_p * (m - np.arange(m)))
190
+ adjusted_sorted = np.clip(adjusted_sorted, 0.0, 1.0)
191
+
192
+ # Map adjusted p-values back to original order
193
+ adjusted_p_values = np.zeros(m)
194
+ adjusted_p_values[sorted_indices] = adjusted_sorted
195
+
196
+ # Critical values in original order
197
+ critical_original = np.zeros(m)
198
+ critical_original[sorted_indices] = critical_values
199
+
200
+ return {
201
+ "rejected": rejected.tolist(),
202
+ "adjusted_p_values": adjusted_p_values.tolist(),
203
+ "n_rejected": int(rejected.sum()),
204
+ "critical_values": critical_original.tolist(),
205
+ }
206
+
207
+
208
+ def multiple_testing_summary(
209
+ test_results: Sequence[dict[str, Any]],
210
+ method: str = "benjamini_hochberg",
211
+ alpha: float = 0.05,
212
+ ) -> dict[str, Any]:
213
+ """Summarize results from multiple statistical tests with corrections.
214
+
215
+ Provides a comprehensive summary of multiple hypothesis testing results
216
+ with appropriate corrections for multiple comparisons.
217
+
218
+ Parameters
219
+ ----------
220
+ test_results : Sequence[dict]
221
+ List of test result dictionaries (each should have 'p_value' key)
222
+ method : str, default "benjamini_hochberg"
223
+ Multiple testing correction method
224
+ alpha : float, default 0.05
225
+ Significance level
226
+
227
+ Returns
228
+ -------
229
+ dict
230
+ Summary with original and corrected results
231
+
232
+ Examples
233
+ --------
234
+ >>> results = [{'name': 'Strategy A', 'p_value': 0.01},
235
+ ... {'name': 'Strategy B', 'p_value': 0.08}]
236
+ >>> summary = multiple_testing_summary(results)
237
+ >>> print(f"Significant after correction: {summary['n_significant_corrected']}")
238
+ """
239
+ if not test_results:
240
+ return {
241
+ "n_tests": 0,
242
+ "n_significant_uncorrected": 0,
243
+ "n_significant_corrected": 0,
244
+ "correction_method": method,
245
+ "alpha": alpha,
246
+ }
247
+
248
+ # Extract p-values
249
+ p_values = [result.get("p_value", np.nan) for result in test_results]
250
+ valid_p_values = [p for p in p_values if not np.isnan(p)]
251
+
252
+ if not valid_p_values:
253
+ return {
254
+ "n_tests": len(test_results),
255
+ "n_significant_uncorrected": 0,
256
+ "n_significant_corrected": 0,
257
+ "correction_method": method,
258
+ "alpha": alpha,
259
+ "warning": "No valid p-values found",
260
+ }
261
+
262
+ # Uncorrected significance
263
+ n_significant_uncorrected = sum(p <= alpha for p in valid_p_values)
264
+
265
+ # Apply correction
266
+ if method == "benjamini_hochberg":
267
+ correction_result = benjamini_hochberg_fdr(
268
+ valid_p_values,
269
+ alpha=alpha,
270
+ return_details=True,
271
+ )
272
+ n_significant_corrected = correction_result["n_rejected"]
273
+ adjusted_p_values = correction_result["adjusted_p_values"]
274
+ rejected = correction_result["rejected"]
275
+ else:
276
+ raise ValueError(f"Unknown correction method: {method}")
277
+
278
+ return {
279
+ "n_tests": len(test_results),
280
+ "n_significant_uncorrected": n_significant_uncorrected,
281
+ "n_significant_corrected": n_significant_corrected,
282
+ "correction_method": method,
283
+ "alpha": alpha,
284
+ "adjusted_p_values": adjusted_p_values.tolist(),
285
+ "rejected_hypotheses": rejected.tolist(),
286
+ "uncorrected_rate": n_significant_uncorrected / len(valid_p_values),
287
+ "corrected_rate": n_significant_corrected / len(valid_p_values),
288
+ }
289
+
290
+
291
+ __all__ = [
292
+ "benjamini_hochberg_fdr",
293
+ "holm_bonferroni",
294
+ "multiple_testing_summary",
295
+ ]
@@ -0,0 +1,108 @@
1
+ """Robust standard errors for Information Coefficient estimation.
2
+
3
+ This module provides standard error estimation for rank correlation (IC)
4
+ with proper handling of temporal dependence through stationary bootstrap.
5
+
6
+ References
7
+ ----------
8
+ .. [1] Politis, D.N. & Romano, J.P. (1994). "The Stationary Bootstrap."
9
+ Journal of the American Statistical Association 89:1303-1313.
10
+
11
+ .. [2] Patton, A., Politis, D.N. & White, H. (2009). "Correction to
12
+ Automatic Block-Length Selection for the Dependent Bootstrap."
13
+ Econometric Reviews 28:372-375.
14
+ """
15
+
16
+ from typing import TYPE_CHECKING, Any, Union
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+ import polars as pl
21
+
22
+ from .bootstrap import stationary_bootstrap_ic
23
+
24
+ if TYPE_CHECKING:
25
+ from numpy.typing import NDArray
26
+
27
+
28
+ def robust_ic(
29
+ predictions: Union[pl.Series, pd.Series, "NDArray[Any]"],
30
+ returns: Union[pl.Series, pd.Series, "NDArray[Any]"],
31
+ n_samples: int = 1000,
32
+ return_details: bool = False,
33
+ ) -> dict[str, float] | float:
34
+ """Calculate Information Coefficient with robust standard errors.
35
+
36
+ Uses stationary bootstrap [1]_ to compute standard errors that properly
37
+ account for temporal dependence in time series data.
38
+
39
+ The stationary bootstrap is the correct method because:
40
+ 1. Preserves temporal dependence structure
41
+ 2. No asymptotic approximations required
42
+ 3. Theoretically valid for rank correlation (Spearman IC)
43
+
44
+ Parameters
45
+ ----------
46
+ predictions : Union[pl.Series, pd.Series, NDArray]
47
+ Model predictions or scores
48
+ returns : Union[pl.Series, pd.Series, NDArray]
49
+ Forward returns corresponding to predictions
50
+ n_samples : int, default 1000
51
+ Number of bootstrap samples
52
+ return_details : bool, default False
53
+ Whether to return detailed statistics
54
+
55
+ Returns
56
+ -------
57
+ Union[dict, float]
58
+ If return_details=False: t-statistic (IC / bootstrap_std)
59
+ If return_details=True: dict with 'ic', 'bootstrap_std', 't_stat',
60
+ 'p_value', 'ci_lower', 'ci_upper'
61
+
62
+ Examples
63
+ --------
64
+ >>> predictions = np.random.randn(252)
65
+ >>> returns = 0.1 * predictions + np.random.randn(252) * 0.5
66
+ >>> result = robust_ic(predictions, returns, return_details=True)
67
+ >>> print(f"IC: {result['ic']:.3f}, t-stat: {result['t_stat']:.3f}")
68
+
69
+ References
70
+ ----------
71
+ .. [1] Politis, D.N. & Romano, J.P. (1994). "The Stationary Bootstrap."
72
+ Journal of the American Statistical Association 89:1303-1313.
73
+ """
74
+ bootstrap_result = stationary_bootstrap_ic(
75
+ predictions, returns, n_samples=n_samples, return_details=True
76
+ )
77
+ assert isinstance(bootstrap_result, dict)
78
+
79
+ if not return_details:
80
+ if bootstrap_result["bootstrap_std"] > 0:
81
+ return bootstrap_result["ic"] / bootstrap_result["bootstrap_std"]
82
+ return np.nan
83
+
84
+ # Compute t-statistic
85
+ t_stat = (
86
+ bootstrap_result["ic"] / bootstrap_result["bootstrap_std"]
87
+ if bootstrap_result["bootstrap_std"] > 0
88
+ else np.nan
89
+ )
90
+
91
+ return {
92
+ "ic": bootstrap_result["ic"],
93
+ "bootstrap_std": bootstrap_result["bootstrap_std"],
94
+ "t_stat": t_stat,
95
+ "p_value": bootstrap_result.get("p_value", np.nan),
96
+ "ci_lower": bootstrap_result.get("ci_lower", np.nan),
97
+ "ci_upper": bootstrap_result.get("ci_upper", np.nan),
98
+ }
99
+
100
+
101
+ # Keep old name as alias for now
102
+ hac_adjusted_ic = robust_ic
103
+
104
+
105
+ __all__ = [
106
+ "robust_ic",
107
+ "hac_adjusted_ic", # Alias
108
+ ]