ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,581 @@
1
+ """IC (Information Coefficient) result classes for signal analysis.
2
+
3
+ This module provides result classes for storing IC analysis outputs including
4
+ time series data, summary statistics, HAC-adjusted values, and RAS adjustments.
5
+
6
+ References
7
+ ----------
8
+ Lopez de Prado, M. (2018). "Advances in Financial Machine Learning"
9
+ Paleologo, G. (2024). "Elements of Quantitative Investing"
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass
15
+ from typing import Any
16
+
17
+ import polars as pl
18
+ from pydantic import Field, model_validator
19
+
20
+ from ml4t.diagnostic.results.base import BaseResult
21
+ from ml4t.diagnostic.results.signal_results.validation import (
22
+ _normalize_period,
23
+ _validate_dict_keys_match,
24
+ )
25
+
26
+
27
+ @dataclass
28
+ class ICStats:
29
+ """IC statistics for a single period.
30
+
31
+ Provides a convenient typed container for all IC metrics
32
+ at a specific forward return period.
33
+
34
+ Examples
35
+ --------
36
+ >>> stats = ic_result.get_stats(21)
37
+ >>> if stats:
38
+ ... print(f"IC: {stats.mean:.4f} (t={stats.t_stat:.2f})")
39
+ """
40
+
41
+ mean: float
42
+ std: float
43
+ t_stat: float
44
+ p_value: float
45
+ positive_pct: float
46
+ ir: float # Information Ratio
47
+ t_stat_hac: float | None = None
48
+ p_value_hac: float | None = None
49
+ ras_adjusted: float | None = None
50
+ ras_significant: bool | None = None
51
+
52
+
53
+ class SignalICResult(BaseResult):
54
+ """Results from Signal IC (Information Coefficient) analysis.
55
+
56
+ Contains IC time series, summary statistics, t-statistics,
57
+ and optional RAS-adjusted values for signal analysis.
58
+
59
+ This is distinct from feature_results.ICAnalysisResult which
60
+ handles single-feature IC analysis (Module C).
61
+
62
+ Examples
63
+ --------
64
+ >>> result = signal_ic_result
65
+ >>> print(result.summary())
66
+ >>> df = result.get_dataframe("ic_by_date")
67
+ """
68
+
69
+ analysis_type: str = Field(default="signal_ic_analysis", frozen=True)
70
+
71
+ # ==========================================================================
72
+ # IC Time Series Data
73
+ # ==========================================================================
74
+
75
+ ic_by_date: dict[str, list[float]] = Field(
76
+ ...,
77
+ description="IC values by date for each period. Keys: period names, values: IC series",
78
+ )
79
+
80
+ dates: list[str] = Field(
81
+ ...,
82
+ description="Date strings (ISO format) corresponding to IC values",
83
+ )
84
+
85
+ # ==========================================================================
86
+ # Summary Statistics
87
+ # ==========================================================================
88
+
89
+ ic_mean: dict[str, float] = Field(
90
+ ...,
91
+ description="Mean IC for each period",
92
+ )
93
+
94
+ ic_std: dict[str, float] = Field(
95
+ ...,
96
+ description="Standard deviation of IC for each period",
97
+ )
98
+
99
+ ic_t_stat: dict[str, float] = Field(
100
+ ...,
101
+ description="T-statistic for IC mean != 0",
102
+ )
103
+
104
+ ic_p_value: dict[str, float] = Field(
105
+ ...,
106
+ description="P-value for IC significance (two-tailed)",
107
+ )
108
+
109
+ ic_positive_pct: dict[str, float] = Field(
110
+ ...,
111
+ description="Percentage of periods with positive IC",
112
+ )
113
+
114
+ ic_ir: dict[str, float] = Field(
115
+ ...,
116
+ description="Information Ratio (IC_mean / IC_std)",
117
+ )
118
+
119
+ # ==========================================================================
120
+ # HAC-Adjusted Statistics (Newey-West)
121
+ # ==========================================================================
122
+
123
+ ic_t_stat_hac: dict[str, float] | None = Field(
124
+ default=None,
125
+ description="HAC-adjusted t-statistic (Newey-West)",
126
+ )
127
+
128
+ ic_p_value_hac: dict[str, float] | None = Field(
129
+ default=None,
130
+ description="HAC-adjusted p-value",
131
+ )
132
+
133
+ hac_lags_used: int | None = Field(
134
+ default=None,
135
+ description="Number of lags used for HAC adjustment",
136
+ )
137
+
138
+ # ==========================================================================
139
+ # RAS-Adjusted Values (Rademacher Anti-Serum)
140
+ # ==========================================================================
141
+
142
+ ras_adjusted_ic: dict[str, float] | None = Field(
143
+ default=None,
144
+ description="RAS-adjusted conservative IC lower bounds",
145
+ )
146
+
147
+ ras_complexity: float | None = Field(
148
+ default=None,
149
+ description="Rademacher complexity R^ used in adjustment",
150
+ )
151
+
152
+ ras_significant: dict[str, bool] | None = Field(
153
+ default=None,
154
+ description="Whether RAS-adjusted IC > 0 (significant after multiple testing)",
155
+ )
156
+
157
+ # ==========================================================================
158
+ # Validation
159
+ # ==========================================================================
160
+
161
+ @model_validator(mode="after")
162
+ def _validate_period_keys(self) -> SignalICResult:
163
+ """Validate that all period-keyed dicts share the same keys."""
164
+ data = self.model_dump()
165
+ _validate_dict_keys_match(
166
+ data,
167
+ required_fields=[
168
+ "ic_by_date",
169
+ "ic_mean",
170
+ "ic_std",
171
+ "ic_t_stat",
172
+ "ic_p_value",
173
+ "ic_positive_pct",
174
+ "ic_ir",
175
+ ],
176
+ optional_fields=[
177
+ "ic_t_stat_hac",
178
+ "ic_p_value_hac",
179
+ "ras_adjusted_ic",
180
+ "ras_significant",
181
+ ],
182
+ reference_field="ic_mean",
183
+ )
184
+ return self
185
+
186
+ # ==========================================================================
187
+ # Methods
188
+ # ==========================================================================
189
+
190
+ def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
191
+ """Get results as Polars DataFrame.
192
+
193
+ Parameters
194
+ ----------
195
+ name : str | None
196
+ DataFrame to retrieve:
197
+ - None or "ic_by_date": IC time series by date
198
+ - "summary": Summary statistics
199
+
200
+ Returns
201
+ -------
202
+ pl.DataFrame
203
+ Requested DataFrame
204
+ """
205
+ if name is None or name == "ic_by_date":
206
+ # Build IC time series DataFrame
207
+ data: dict[str, Any] = {"date": self.dates}
208
+ for period, values in self.ic_by_date.items():
209
+ data[f"ic_{period}"] = values
210
+ return pl.DataFrame(data)
211
+
212
+ if name == "summary":
213
+ # Build summary statistics DataFrame
214
+ periods = list(self.ic_mean.keys())
215
+ data_summary: dict[str, Any] = {
216
+ "period": periods,
217
+ "ic_mean": [self.ic_mean[p] for p in periods],
218
+ "ic_std": [self.ic_std[p] for p in periods],
219
+ "ic_t_stat": [self.ic_t_stat[p] for p in periods],
220
+ "ic_p_value": [self.ic_p_value[p] for p in periods],
221
+ "ic_positive_pct": [self.ic_positive_pct[p] for p in periods],
222
+ "ic_ir": [self.ic_ir[p] for p in periods],
223
+ }
224
+
225
+ if self.ras_adjusted_ic is not None and self.ras_significant is not None:
226
+ data_summary["ras_adjusted_ic"] = [self.ras_adjusted_ic[p] for p in periods]
227
+ data_summary["ras_significant"] = [self.ras_significant[p] for p in periods]
228
+
229
+ return pl.DataFrame(data_summary)
230
+
231
+ raise ValueError(f"Unknown DataFrame name: {name}. Available: 'ic_by_date', 'summary'")
232
+
233
+ def list_available_dataframes(self) -> list[str]:
234
+ """List available DataFrame views."""
235
+ return ["ic_by_date", "summary"]
236
+
237
+ def summary(self) -> str:
238
+ """Get human-readable summary of IC analysis results."""
239
+ lines = ["=" * 60, "IC Analysis Summary", "=" * 60, ""]
240
+
241
+ for period in self.ic_mean:
242
+ lines.append(f"Period: {period}")
243
+ lines.append(f" Mean IC: {self.ic_mean[period]:>8.4f}")
244
+ lines.append(f" Std IC: {self.ic_std[period]:>8.4f}")
245
+ lines.append(f" IR: {self.ic_ir[period]:>8.4f}")
246
+ lines.append(f" t-stat: {self.ic_t_stat[period]:>8.2f}")
247
+ lines.append(f" p-value: {self.ic_p_value[period]:>8.4f}")
248
+ lines.append(f" Positive %: {self.ic_positive_pct[period]:>8.1%}")
249
+
250
+ if self.ras_adjusted_ic is not None and self.ras_significant is not None:
251
+ lines.append(f" RAS IC: {self.ras_adjusted_ic[period]:>8.4f}")
252
+ sig = "Y" if self.ras_significant[period] else "X"
253
+ lines.append(f" RAS Signif: {sig:>8}")
254
+ lines.append("")
255
+
256
+ return "\n".join(lines)
257
+
258
+ # =========================================================================
259
+ # Convenience Accessor Methods
260
+ # =========================================================================
261
+
262
+ @property
263
+ def periods(self) -> list[str]:
264
+ """List of available periods (e.g., ['1D', '5D', '21D'])."""
265
+ return list(self.ic_mean.keys())
266
+
267
+ def get_ic(self, period: int | str) -> float | None:
268
+ """Get mean IC for a period, accepting int or string keys.
269
+
270
+ Parameters
271
+ ----------
272
+ period : int | str
273
+ Period as integer (21) or string ('21' or '21D').
274
+
275
+ Returns
276
+ -------
277
+ float | None
278
+ Mean IC for the period, or None if not found.
279
+
280
+ Examples
281
+ --------
282
+ >>> ic_result.get_ic(21) # Works
283
+ >>> ic_result.get_ic('21') # Works
284
+ >>> ic_result.get_ic('21D') # Works
285
+ """
286
+ key = _normalize_period(period)
287
+ return self.ic_mean.get(key)
288
+
289
+ def get_t_stat(self, period: int | str) -> float | None:
290
+ """Get t-statistic for a period."""
291
+ key = _normalize_period(period)
292
+ return self.ic_t_stat.get(key)
293
+
294
+ def get_p_value(self, period: int | str) -> float | None:
295
+ """Get p-value for a period."""
296
+ key = _normalize_period(period)
297
+ return self.ic_p_value.get(key)
298
+
299
+ def get_ir(self, period: int | str) -> float | None:
300
+ """Get Information Ratio (IC/std) for a period."""
301
+ key = _normalize_period(period)
302
+ return self.ic_ir.get(key)
303
+
304
+ def get_stats(self, period: int | str) -> ICStats | None:
305
+ """Get all IC statistics for a period as a typed object.
306
+
307
+ This is the recommended way to access IC results, providing
308
+ a clean typed interface instead of multiple dict lookups.
309
+
310
+ Parameters
311
+ ----------
312
+ period : int | str
313
+ Period as integer or string (e.g., 21, '21', '21D').
314
+
315
+ Returns
316
+ -------
317
+ ICStats | None
318
+ Typed container with all IC metrics, or None if period not found.
319
+
320
+ Examples
321
+ --------
322
+ >>> stats = ic_result.get_stats(21)
323
+ >>> if stats:
324
+ ... print(f"IC: {stats.mean:.4f} (t={stats.t_stat:.2f}, p={stats.p_value:.4f})")
325
+ ... if stats.ras_significant:
326
+ ... print("Significant after RAS adjustment!")
327
+ """
328
+ key = _normalize_period(period)
329
+ if key not in self.ic_mean:
330
+ return None
331
+
332
+ return ICStats(
333
+ mean=self.ic_mean[key],
334
+ std=self.ic_std[key],
335
+ t_stat=self.ic_t_stat[key],
336
+ p_value=self.ic_p_value[key],
337
+ positive_pct=self.ic_positive_pct[key],
338
+ ir=self.ic_ir[key],
339
+ t_stat_hac=self.ic_t_stat_hac.get(key) if self.ic_t_stat_hac else None,
340
+ p_value_hac=self.ic_p_value_hac.get(key) if self.ic_p_value_hac else None,
341
+ ras_adjusted=self.ras_adjusted_ic.get(key) if self.ras_adjusted_ic else None,
342
+ ras_significant=self.ras_significant.get(key) if self.ras_significant else None,
343
+ )
344
+
345
+ def is_significant(self, period: int | str, alpha: float = 0.05, use_hac: bool = True) -> bool:
346
+ """Check if IC is statistically significant for a period.
347
+
348
+ Parameters
349
+ ----------
350
+ period : int | str
351
+ Period to check.
352
+ alpha : float, default 0.05
353
+ Significance level.
354
+ use_hac : bool, default True
355
+ Use HAC-adjusted p-value if available.
356
+
357
+ Returns
358
+ -------
359
+ bool
360
+ True if p-value < alpha.
361
+ """
362
+ key = _normalize_period(period)
363
+
364
+ # Prefer HAC-adjusted p-value if available and requested
365
+ p_val: float | None
366
+ if use_hac and self.ic_p_value_hac and key in self.ic_p_value_hac:
367
+ p_val = self.ic_p_value_hac[key]
368
+ else:
369
+ p_val = self.ic_p_value.get(key)
370
+
371
+ if p_val is None:
372
+ return False
373
+ return p_val < alpha
374
+
375
+
376
+ class RASICResult(BaseResult):
377
+ """Results from RAS-adjusted IC analysis.
378
+
379
+ Specialized result class for Rademacher Anti-Serum adjustments
380
+ used in multiple testing correction.
381
+
382
+ Examples
383
+ --------
384
+ >>> result = ras_ic_result
385
+ >>> if result.any_significant:
386
+ ... print("Found significant signals after RAS adjustment")
387
+ """
388
+
389
+ analysis_type: str = Field(default="ras_ic_analysis", frozen=True)
390
+
391
+ # ==========================================================================
392
+ # Input Summary
393
+ # ==========================================================================
394
+
395
+ n_signals: int = Field(
396
+ ...,
397
+ description="Number of signals tested",
398
+ )
399
+
400
+ n_samples: int = Field(
401
+ ...,
402
+ description="Number of time periods used",
403
+ )
404
+
405
+ # ==========================================================================
406
+ # RAS Parameters
407
+ # ==========================================================================
408
+
409
+ delta: float = Field(
410
+ ...,
411
+ description="Significance level used (1-delta = confidence)",
412
+ )
413
+
414
+ kappa: float = Field(
415
+ ...,
416
+ description="IC bound used (|IC| <= kappa)",
417
+ )
418
+
419
+ n_simulations: int = Field(
420
+ ...,
421
+ description="Monte Carlo simulations used",
422
+ )
423
+
424
+ # ==========================================================================
425
+ # Results
426
+ # ==========================================================================
427
+
428
+ rademacher_complexity: float = Field(
429
+ ...,
430
+ description="Empirical Rademacher complexity R^",
431
+ )
432
+
433
+ massart_bound: float = Field(
434
+ ...,
435
+ description="Massart's theoretical upper bound sqrt(2logN/T)",
436
+ )
437
+
438
+ observed_ic: dict[str, float] = Field(
439
+ ...,
440
+ description="Observed IC for each signal",
441
+ )
442
+
443
+ adjusted_ic: dict[str, float] = Field(
444
+ ...,
445
+ description="RAS-adjusted conservative IC lower bounds",
446
+ )
447
+
448
+ is_significant: dict[str, bool] = Field(
449
+ ...,
450
+ description="Whether adjusted IC > 0 for each signal",
451
+ )
452
+
453
+ # ==========================================================================
454
+ # Summary Statistics
455
+ # ==========================================================================
456
+
457
+ n_significant: int = Field(
458
+ ...,
459
+ description="Number of signals with adjusted IC > 0",
460
+ )
461
+
462
+ any_significant: bool = Field(
463
+ ...,
464
+ description="Whether any signal passed RAS test",
465
+ )
466
+
467
+ data_snooping_term: float = Field(
468
+ ...,
469
+ description="Data snooping penalty (2 * R^)",
470
+ )
471
+
472
+ estimation_error_term: float = Field(
473
+ ...,
474
+ description="Estimation error term (2*kappa*sqrt(log(2/delta)/T))",
475
+ )
476
+
477
+ # ==========================================================================
478
+ # Validation
479
+ # ==========================================================================
480
+
481
+ @model_validator(mode="after")
482
+ def _validate_signal_keys(self) -> RASICResult:
483
+ """Validate that all signal-keyed dicts share the same keys."""
484
+ data = self.model_dump()
485
+ _validate_dict_keys_match(
486
+ data,
487
+ required_fields=["observed_ic", "adjusted_ic", "is_significant"],
488
+ reference_field="observed_ic",
489
+ )
490
+ return self
491
+
492
+ # ==========================================================================
493
+ # Methods
494
+ # ==========================================================================
495
+
496
+ def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
497
+ """Get results as Polars DataFrame.
498
+
499
+ Parameters
500
+ ----------
501
+ name : str | None
502
+ DataFrame to retrieve:
503
+ - None or "signals": Per-signal results
504
+ - "summary": Summary statistics
505
+
506
+ Returns
507
+ -------
508
+ pl.DataFrame
509
+ Requested DataFrame
510
+ """
511
+ if name is None or name == "signals":
512
+ signals = list(self.observed_ic.keys())
513
+ return pl.DataFrame(
514
+ {
515
+ "signal": signals,
516
+ "observed_ic": [self.observed_ic[s] for s in signals],
517
+ "adjusted_ic": [self.adjusted_ic[s] for s in signals],
518
+ "is_significant": [self.is_significant[s] for s in signals],
519
+ }
520
+ )
521
+
522
+ if name == "summary":
523
+ return pl.DataFrame(
524
+ {
525
+ "metric": [
526
+ "n_signals",
527
+ "n_samples",
528
+ "rademacher_complexity",
529
+ "massart_bound",
530
+ "data_snooping_term",
531
+ "estimation_error_term",
532
+ "n_significant",
533
+ ],
534
+ "value": [
535
+ float(self.n_signals),
536
+ float(self.n_samples),
537
+ self.rademacher_complexity,
538
+ self.massart_bound,
539
+ self.data_snooping_term,
540
+ self.estimation_error_term,
541
+ float(self.n_significant),
542
+ ],
543
+ }
544
+ )
545
+
546
+ raise ValueError(f"Unknown DataFrame name: {name}. Available: 'signals', 'summary'")
547
+
548
+ def list_available_dataframes(self) -> list[str]:
549
+ """List available DataFrame views."""
550
+ return ["signals", "summary"]
551
+
552
+ def summary(self) -> str:
553
+ """Get human-readable summary of RAS IC results."""
554
+ lines = [
555
+ "=" * 60,
556
+ "RAS IC Analysis Summary",
557
+ "=" * 60,
558
+ "",
559
+ f"Signals Tested: {self.n_signals:>10}",
560
+ f"Time Periods: {self.n_samples:>10}",
561
+ f"Confidence Level: {1 - self.delta:>10.1%}",
562
+ f"IC Bound (kappa): {self.kappa:>10.4f}",
563
+ "",
564
+ f"Rademacher Complexity:{self.rademacher_complexity:>10.4f}",
565
+ f"Massart Bound: {self.massart_bound:>10.4f}",
566
+ f"Data Snooping Term: {self.data_snooping_term:>10.4f}",
567
+ f"Estimation Error: {self.estimation_error_term:>10.4f}",
568
+ "",
569
+ f"Significant Signals: {self.n_significant:>10} / {self.n_signals}",
570
+ "",
571
+ ]
572
+
573
+ if self.any_significant:
574
+ lines.append("Significant signals (RAS-adjusted IC > 0):")
575
+ for signal, sig in self.is_significant.items():
576
+ if sig:
577
+ obs = self.observed_ic[signal]
578
+ adj = self.adjusted_ic[signal]
579
+ lines.append(f" {signal}: observed={obs:.4f}, adjusted={adj:.4f}")
580
+
581
+ return "\n".join(lines)
@@ -0,0 +1,110 @@
1
+ """IR_tc (Transaction-Cost Adjusted Information Ratio) result classes.
2
+
3
+ This module provides result classes for storing transaction-cost-adjusted
4
+ Information Ratio analysis outputs.
5
+
6
+ References
7
+ ----------
8
+ Lopez de Prado, M. (2018). "Advances in Financial Machine Learning"
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import polars as pl
14
+ from pydantic import Field
15
+
16
+ from ml4t.diagnostic.results.base import BaseResult
17
+
18
+
19
+ class IRtcResult(BaseResult):
20
+ """Results from transaction-cost-adjusted Information Ratio analysis.
21
+
22
+ IR_tc measures the risk-adjusted IC after accounting for the cost
23
+ of turnover required to maintain the signal-based portfolio.
24
+
25
+ IR_tc = (IC * spread_return - turnover * cost) / volatility
26
+
27
+ Examples
28
+ --------
29
+ >>> result = ir_tc_result
30
+ >>> print(result.summary())
31
+ """
32
+
33
+ analysis_type: str = Field(default="ir_tc_analysis", frozen=True)
34
+
35
+ # ==========================================================================
36
+ # Configuration
37
+ # ==========================================================================
38
+
39
+ cost_per_trade: float = Field(
40
+ ...,
41
+ description="Transaction cost per unit turnover used",
42
+ )
43
+
44
+ # ==========================================================================
45
+ # Results by Period
46
+ # ==========================================================================
47
+
48
+ ir_gross: dict[str, float] = Field(
49
+ ...,
50
+ description="Gross IR (before transaction costs) per period",
51
+ )
52
+
53
+ ir_tc: dict[str, float] = Field(
54
+ ...,
55
+ description="Net IR (after transaction costs) per period",
56
+ )
57
+
58
+ implied_cost: dict[str, float] = Field(
59
+ ...,
60
+ description="Implied cost from turnover per period",
61
+ )
62
+
63
+ breakeven_cost: dict[str, float] = Field(
64
+ ...,
65
+ description="Breakeven cost (cost at which IR_tc = 0)",
66
+ )
67
+
68
+ cost_drag: dict[str, float] = Field(
69
+ ...,
70
+ description="Percentage of gross return lost to costs",
71
+ )
72
+
73
+ def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
74
+ """Get results as Polars DataFrame."""
75
+ periods = list(self.ir_gross.keys())
76
+ return pl.DataFrame(
77
+ {
78
+ "period": periods,
79
+ "ir_gross": [self.ir_gross[p] for p in periods],
80
+ "ir_tc": [self.ir_tc[p] for p in periods],
81
+ "implied_cost": [self.implied_cost[p] for p in periods],
82
+ "breakeven_cost": [self.breakeven_cost[p] for p in periods],
83
+ "cost_drag": [self.cost_drag[p] for p in periods],
84
+ }
85
+ )
86
+
87
+ def list_available_dataframes(self) -> list[str]:
88
+ """List available DataFrame views."""
89
+ return ["primary"]
90
+
91
+ def summary(self) -> str:
92
+ """Get human-readable summary of IR_tc results."""
93
+ lines = [
94
+ "=" * 60,
95
+ "Transaction-Cost Adjusted IR Summary",
96
+ "=" * 60,
97
+ "",
98
+ f"Cost per Trade: {self.cost_per_trade:.4f} ({self.cost_per_trade * 10000:.0f} bps)",
99
+ "",
100
+ "Period IR_gross IR_tc Cost Drag Breakeven",
101
+ "-" * 60,
102
+ ]
103
+
104
+ for period in self.ir_gross:
105
+ lines.append(
106
+ f"{period:<12} {self.ir_gross[period]:>8.4f} {self.ir_tc[period]:>8.4f} "
107
+ f"{self.cost_drag[period]:>8.1%} {self.breakeven_cost[period]:>8.4f}"
108
+ )
109
+
110
+ return "\n".join(lines)