ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,297 @@
1
+ """Profit factor analysis results for barrier outcomes.
2
+
3
+ This module provides the ProfitFactorResult class for storing profit factor
4
+ metrics (Sum(TP returns) / |Sum(SL returns)|) by signal quantile.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import polars as pl
10
+ from pydantic import Field, model_validator
11
+
12
+ from ml4t.diagnostic.results.barrier_results.validation import _validate_quantile_dict_keys
13
+ from ml4t.diagnostic.results.base import BaseResult
14
+
15
+
16
+ class ProfitFactorResult(BaseResult):
17
+ """Results from profit factor analysis by signal decile.
18
+
19
+ Profit Factor = Sum(TP returns) / |Sum(SL returns)|
20
+
21
+ A profit factor > 1 indicates net profitable trading in that decile.
22
+
23
+ Examples
24
+ --------
25
+ >>> result = profit_factor_result
26
+ >>> print(result.summary())
27
+ >>> df = result.get_dataframe()
28
+ """
29
+
30
+ analysis_type: str = Field(default="barrier_profit_factor", frozen=True)
31
+
32
+ # ==========================================================================
33
+ # Configuration
34
+ # ==========================================================================
35
+
36
+ n_quantiles: int = Field(
37
+ ...,
38
+ description="Number of quantiles used",
39
+ )
40
+
41
+ quantile_labels: list[str] = Field(
42
+ ...,
43
+ description="Labels for each quantile (e.g., ['D1', 'D2', ..., 'D10'])",
44
+ )
45
+
46
+ # ==========================================================================
47
+ # Profit Factor by Quantile
48
+ # ==========================================================================
49
+
50
+ profit_factor: dict[str, float] = Field(
51
+ ...,
52
+ description="Profit factor per quantile: Sum(TP returns) / |Sum(SL returns)|",
53
+ )
54
+
55
+ # ==========================================================================
56
+ # Component Sums
57
+ # ==========================================================================
58
+
59
+ sum_tp_returns: dict[str, float] = Field(
60
+ ...,
61
+ description="Sum of returns from TP outcomes per quantile",
62
+ )
63
+
64
+ sum_sl_returns: dict[str, float] = Field(
65
+ ...,
66
+ description="Sum of returns from SL outcomes per quantile (negative values)",
67
+ )
68
+
69
+ sum_timeout_returns: dict[str, float] = Field(
70
+ ...,
71
+ description="Sum of returns from timeout outcomes per quantile",
72
+ )
73
+
74
+ sum_all_returns: dict[str, float] = Field(
75
+ ...,
76
+ description="Sum of all returns per quantile",
77
+ )
78
+
79
+ # ==========================================================================
80
+ # Average Returns
81
+ # ==========================================================================
82
+
83
+ avg_tp_return: dict[str, float] = Field(
84
+ ...,
85
+ description="Average return per TP outcome per quantile",
86
+ )
87
+
88
+ avg_sl_return: dict[str, float] = Field(
89
+ ...,
90
+ description="Average return per SL outcome per quantile",
91
+ )
92
+
93
+ avg_return: dict[str, float] = Field(
94
+ ...,
95
+ description="Average return per quantile (all outcomes)",
96
+ )
97
+
98
+ # ==========================================================================
99
+ # Counts
100
+ # ==========================================================================
101
+
102
+ count_tp: dict[str, int] = Field(
103
+ ...,
104
+ description="Number of TP outcomes per quantile",
105
+ )
106
+
107
+ count_sl: dict[str, int] = Field(
108
+ ...,
109
+ description="Number of SL outcomes per quantile",
110
+ )
111
+
112
+ count_total: dict[str, int] = Field(
113
+ ...,
114
+ description="Total count per quantile",
115
+ )
116
+
117
+ # ==========================================================================
118
+ # Aggregates
119
+ # ==========================================================================
120
+
121
+ overall_profit_factor: float = Field(
122
+ ...,
123
+ description="Overall profit factor across all observations",
124
+ )
125
+
126
+ overall_sum_returns: float = Field(
127
+ ...,
128
+ description="Total sum of all returns",
129
+ )
130
+
131
+ overall_avg_return: float = Field(
132
+ ...,
133
+ description="Average return across all observations",
134
+ )
135
+
136
+ n_observations: int = Field(
137
+ ...,
138
+ description="Total number of observations analyzed",
139
+ )
140
+
141
+ # ==========================================================================
142
+ # Monotonicity
143
+ # ==========================================================================
144
+
145
+ pf_monotonic: bool = Field(
146
+ ...,
147
+ description="Whether profit factor is monotonic across quantiles",
148
+ )
149
+
150
+ pf_direction: str = Field(
151
+ ...,
152
+ description="Direction of PF change: 'increasing', 'decreasing', or 'none'",
153
+ )
154
+
155
+ pf_spearman: float = Field(
156
+ ...,
157
+ description="Spearman correlation between quantile rank and profit factor",
158
+ )
159
+
160
+ # ==========================================================================
161
+ # Validation
162
+ # ==========================================================================
163
+
164
+ @model_validator(mode="after")
165
+ def _validate_quantile_keys(self) -> ProfitFactorResult:
166
+ """Validate that all quantile-keyed dicts have consistent keys."""
167
+ if self.n_quantiles != len(self.quantile_labels):
168
+ raise ValueError(
169
+ f"n_quantiles ({self.n_quantiles}) != len(quantile_labels) ({len(self.quantile_labels)})"
170
+ )
171
+ _validate_quantile_dict_keys(
172
+ self.quantile_labels,
173
+ [
174
+ ("profit_factor", self.profit_factor),
175
+ ("sum_tp_returns", self.sum_tp_returns),
176
+ ("sum_sl_returns", self.sum_sl_returns),
177
+ ("sum_timeout_returns", self.sum_timeout_returns),
178
+ ("sum_all_returns", self.sum_all_returns),
179
+ ("avg_tp_return", self.avg_tp_return),
180
+ ("avg_sl_return", self.avg_sl_return),
181
+ ("avg_return", self.avg_return),
182
+ ("count_tp", self.count_tp),
183
+ ("count_sl", self.count_sl),
184
+ ("count_total", self.count_total),
185
+ ],
186
+ )
187
+ return self
188
+
189
+ def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
190
+ """Get results as Polars DataFrame.
191
+
192
+ Parameters
193
+ ----------
194
+ name : str | None
195
+ DataFrame to retrieve:
196
+ - None or "profit_factor": Profit factor by quantile
197
+ - "returns": Detailed returns breakdown by quantile
198
+ - "summary": Single-row summary statistics
199
+
200
+ Returns
201
+ -------
202
+ pl.DataFrame
203
+ Requested DataFrame
204
+ """
205
+ if name is None or name == "profit_factor":
206
+ return pl.DataFrame(
207
+ {
208
+ "quantile": self.quantile_labels,
209
+ "profit_factor": [self.profit_factor[q] for q in self.quantile_labels],
210
+ "avg_return": [self.avg_return[q] for q in self.quantile_labels],
211
+ "sum_returns": [self.sum_all_returns[q] for q in self.quantile_labels],
212
+ "count_total": [self.count_total[q] for q in self.quantile_labels],
213
+ }
214
+ )
215
+
216
+ if name == "returns":
217
+ return pl.DataFrame(
218
+ {
219
+ "quantile": self.quantile_labels,
220
+ "sum_tp_returns": [self.sum_tp_returns[q] for q in self.quantile_labels],
221
+ "sum_sl_returns": [self.sum_sl_returns[q] for q in self.quantile_labels],
222
+ "sum_timeout_returns": [
223
+ self.sum_timeout_returns[q] for q in self.quantile_labels
224
+ ],
225
+ "avg_tp_return": [self.avg_tp_return[q] for q in self.quantile_labels],
226
+ "avg_sl_return": [self.avg_sl_return[q] for q in self.quantile_labels],
227
+ "count_tp": [self.count_tp[q] for q in self.quantile_labels],
228
+ "count_sl": [self.count_sl[q] for q in self.quantile_labels],
229
+ }
230
+ )
231
+
232
+ if name == "summary":
233
+ return pl.DataFrame(
234
+ {
235
+ "metric": [
236
+ "n_observations",
237
+ "n_quantiles",
238
+ "overall_profit_factor",
239
+ "overall_sum_returns",
240
+ "overall_avg_return",
241
+ "pf_monotonic",
242
+ "pf_spearman",
243
+ ],
244
+ "value": [
245
+ float(self.n_observations),
246
+ float(self.n_quantiles),
247
+ self.overall_profit_factor,
248
+ self.overall_sum_returns,
249
+ self.overall_avg_return,
250
+ float(self.pf_monotonic),
251
+ self.pf_spearman,
252
+ ],
253
+ }
254
+ )
255
+
256
+ raise ValueError(
257
+ f"Unknown DataFrame name: {name}. Available: 'profit_factor', 'returns', 'summary'"
258
+ )
259
+
260
+ def list_available_dataframes(self) -> list[str]:
261
+ """List available DataFrame views."""
262
+ return ["profit_factor", "returns", "summary"]
263
+
264
+ def summary(self) -> str:
265
+ """Get human-readable summary of profit factor results."""
266
+ lines = [
267
+ "=" * 60,
268
+ "Barrier Profit Factor Analysis",
269
+ "=" * 60,
270
+ "",
271
+ f"Observations: {self.n_observations:>12,}",
272
+ f"Quantiles: {self.n_quantiles:>12}",
273
+ "",
274
+ "Overall Metrics:",
275
+ f" Profit Factor: {self.overall_profit_factor:>12.2f}",
276
+ f" Sum Returns: {self.overall_sum_returns:>12.4f}",
277
+ f" Avg Return: {self.overall_avg_return:>12.4%}",
278
+ "",
279
+ "Monotonicity (PF vs Signal Strength):",
280
+ f" Monotonic: {'Yes' if self.pf_monotonic else 'No':>12}",
281
+ f" Direction: {self.pf_direction:>12}",
282
+ f" Spearman rho: {self.pf_spearman:>12.4f}",
283
+ "",
284
+ "-" * 60,
285
+ "Profit Factor by Quantile:",
286
+ "-" * 60,
287
+ f"{'Quantile':<10} {'PF':>8} {'Avg Ret':>10} {'Sum Ret':>12} {'Count':>8}",
288
+ ]
289
+
290
+ for q in self.quantile_labels:
291
+ pf = self.profit_factor[q]
292
+ avg = self.avg_return[q]
293
+ total = self.sum_all_returns[q]
294
+ count = self.count_total[q]
295
+ lines.append(f"{q:<10} {pf:>8.2f} {avg:>10.4%} {total:>12.4f} {count:>8,}")
296
+
297
+ return "\n".join(lines)
@@ -0,0 +1,397 @@
1
+ """Barrier tear sheet containing all barrier analysis results.
2
+
3
+ This module provides the BarrierTearSheet class that aggregates all barrier
4
+ analysis results (hit rates, profit factor, precision/recall, time-to-target)
5
+ into a single exportable result object.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ import polars as pl
14
+ from pydantic import Field
15
+
16
+ from ml4t.diagnostic.results.barrier_results.hit_rate import HitRateResult
17
+ from ml4t.diagnostic.results.barrier_results.precision_recall import PrecisionRecallResult
18
+ from ml4t.diagnostic.results.barrier_results.profit_factor import ProfitFactorResult
19
+ from ml4t.diagnostic.results.barrier_results.time_to_target import TimeToTargetResult
20
+ from ml4t.diagnostic.results.base import BaseResult
21
+
22
+
23
+ class BarrierTearSheet(BaseResult):
24
+ """Complete tear sheet containing all barrier analysis results.
25
+
26
+ Aggregates hit rates, profit factor, and visualization data into
27
+ a single exportable result object.
28
+
29
+ Examples
30
+ --------
31
+ >>> tear_sheet = barrier_analysis.create_tear_sheet()
32
+ >>> tear_sheet.show() # Display in Jupyter
33
+ >>> tear_sheet.save_html("barrier_report.html")
34
+ """
35
+
36
+ analysis_type: str = Field(default="barrier_tear_sheet", frozen=True)
37
+
38
+ # ==========================================================================
39
+ # Component Results
40
+ # ==========================================================================
41
+
42
+ hit_rate_result: HitRateResult | None = Field(
43
+ default=None,
44
+ description="Hit rate analysis results",
45
+ )
46
+
47
+ profit_factor_result: ProfitFactorResult | None = Field(
48
+ default=None,
49
+ description="Profit factor analysis results",
50
+ )
51
+
52
+ precision_recall_result: PrecisionRecallResult | None = Field(
53
+ default=None,
54
+ description="Precision/recall analysis results",
55
+ )
56
+
57
+ time_to_target_result: TimeToTargetResult | None = Field(
58
+ default=None,
59
+ description="Time-to-target analysis results",
60
+ )
61
+
62
+ # ==========================================================================
63
+ # Metadata
64
+ # ==========================================================================
65
+
66
+ signal_name: str = Field(
67
+ default="signal",
68
+ description="Name of the signal analyzed",
69
+ )
70
+
71
+ n_assets: int = Field(
72
+ ...,
73
+ description="Number of unique assets",
74
+ )
75
+
76
+ n_dates: int = Field(
77
+ ...,
78
+ description="Number of unique dates",
79
+ )
80
+
81
+ n_observations: int = Field(
82
+ ...,
83
+ description="Total number of observations analyzed",
84
+ )
85
+
86
+ date_range: tuple[str, str] = Field(
87
+ ...,
88
+ description="Date range (start, end) in ISO format",
89
+ )
90
+
91
+ # ==========================================================================
92
+ # Figures (stored as JSON for serialization)
93
+ # ==========================================================================
94
+
95
+ figures: dict[str, Any] = Field(
96
+ default_factory=dict,
97
+ description="Plotly figures as JSON (for HTML export)",
98
+ )
99
+
100
+ def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
101
+ """Get results as Polars DataFrame.
102
+
103
+ Parameters
104
+ ----------
105
+ name : str | None
106
+ DataFrame to retrieve - routes to component results
107
+
108
+ Returns
109
+ -------
110
+ pl.DataFrame
111
+ Requested DataFrame
112
+ """
113
+ if name is None or name == "summary":
114
+ return self._build_summary_df()
115
+
116
+ # Route to component results
117
+ if name.startswith("hit_rate_"):
118
+ if self.hit_rate_result is None:
119
+ raise ValueError("Hit rate analysis not available")
120
+ component_name = name[9:] if name != "hit_rate_result" else None
121
+ return self.hit_rate_result.get_dataframe(component_name)
122
+
123
+ if name.startswith("profit_factor_"):
124
+ if self.profit_factor_result is None:
125
+ raise ValueError("Profit factor analysis not available")
126
+ component_name = name[14:] if name != "profit_factor_result" else None
127
+ return self.profit_factor_result.get_dataframe(component_name)
128
+
129
+ if name.startswith("precision_recall_"):
130
+ if self.precision_recall_result is None:
131
+ raise ValueError("Precision/recall analysis not available")
132
+ component_name = name[17:] if name != "precision_recall_result" else None
133
+ return self.precision_recall_result.get_dataframe(component_name)
134
+
135
+ if name.startswith("time_to_target_"):
136
+ if self.time_to_target_result is None:
137
+ raise ValueError("Time-to-target analysis not available")
138
+ component_name = name[15:] if name != "time_to_target_result" else None
139
+ return self.time_to_target_result.get_dataframe(component_name)
140
+
141
+ raise ValueError(
142
+ f"Unknown DataFrame name: {name}. Use 'summary' or prefix with "
143
+ "'hit_rate_', 'profit_factor_', 'precision_recall_', 'time_to_target_'"
144
+ )
145
+
146
+ def list_available_dataframes(self) -> list[str]:
147
+ """List available DataFrame views."""
148
+ available = ["summary"]
149
+ if self.hit_rate_result:
150
+ available.extend(
151
+ [f"hit_rate_{n}" for n in self.hit_rate_result.list_available_dataframes()]
152
+ )
153
+ if self.profit_factor_result:
154
+ available.extend(
155
+ [
156
+ f"profit_factor_{n}"
157
+ for n in self.profit_factor_result.list_available_dataframes()
158
+ ]
159
+ )
160
+ if self.precision_recall_result:
161
+ available.extend(
162
+ [
163
+ f"precision_recall_{n}"
164
+ for n in self.precision_recall_result.list_available_dataframes()
165
+ ]
166
+ )
167
+ if self.time_to_target_result:
168
+ available.extend(
169
+ [
170
+ f"time_to_target_{n}"
171
+ for n in self.time_to_target_result.list_available_dataframes()
172
+ ]
173
+ )
174
+ return available
175
+
176
+ def _build_summary_df(self) -> pl.DataFrame:
177
+ """Build summary DataFrame with key metrics."""
178
+ rows: list[dict[str, str]] = [
179
+ {"metric": "signal_name", "value": self.signal_name},
180
+ {"metric": "n_assets", "value": str(self.n_assets)},
181
+ {"metric": "n_dates", "value": str(self.n_dates)},
182
+ {"metric": "n_observations", "value": str(self.n_observations)},
183
+ {"metric": "date_range_start", "value": self.date_range[0]},
184
+ {"metric": "date_range_end", "value": self.date_range[1]},
185
+ ]
186
+
187
+ if self.hit_rate_result:
188
+ rows.append(
189
+ {
190
+ "metric": "overall_hit_rate_tp",
191
+ "value": f"{self.hit_rate_result.overall_hit_rate_tp:.4f}",
192
+ }
193
+ )
194
+ rows.append(
195
+ {"metric": "chi2_significant", "value": str(self.hit_rate_result.is_significant)}
196
+ )
197
+
198
+ if self.profit_factor_result:
199
+ rows.append(
200
+ {
201
+ "metric": "overall_profit_factor",
202
+ "value": f"{self.profit_factor_result.overall_profit_factor:.4f}",
203
+ }
204
+ )
205
+
206
+ if self.precision_recall_result:
207
+ rows.append(
208
+ {
209
+ "metric": "baseline_tp_rate",
210
+ "value": f"{self.precision_recall_result.baseline_tp_rate:.4f}",
211
+ }
212
+ )
213
+ rows.append(
214
+ {
215
+ "metric": "best_f1_score",
216
+ "value": f"{self.precision_recall_result.best_f1_score:.4f}",
217
+ }
218
+ )
219
+ rows.append(
220
+ {
221
+ "metric": "best_f1_quantile",
222
+ "value": self.precision_recall_result.best_f1_quantile,
223
+ }
224
+ )
225
+
226
+ if self.time_to_target_result:
227
+ rows.append(
228
+ {
229
+ "metric": "overall_mean_bars",
230
+ "value": f"{self.time_to_target_result.overall_mean_bars:.1f}",
231
+ }
232
+ )
233
+ rows.append(
234
+ {
235
+ "metric": "overall_mean_bars_tp",
236
+ "value": f"{self.time_to_target_result.overall_mean_bars_tp:.1f}",
237
+ }
238
+ )
239
+
240
+ return pl.DataFrame(rows)
241
+
242
+ def summary(self) -> str:
243
+ """Get human-readable summary of complete tear sheet."""
244
+ lines = [
245
+ "=" * 60,
246
+ f"Barrier Analysis Tear Sheet: {self.signal_name}",
247
+ "=" * 60,
248
+ "",
249
+ f"Assets: {self.n_assets:>10,}",
250
+ f"Dates: {self.n_dates:>10,}",
251
+ f"Observations: {self.n_observations:>10,}",
252
+ f"Range: {self.date_range[0]} to {self.date_range[1]}",
253
+ f"Created: {self.created_at}",
254
+ "",
255
+ ]
256
+
257
+ if self.hit_rate_result:
258
+ lines.append("--- Hit Rate Analysis ---")
259
+ lines.append(self.hit_rate_result.summary())
260
+ lines.append("")
261
+
262
+ if self.profit_factor_result:
263
+ lines.append("--- Profit Factor Analysis ---")
264
+ lines.append(self.profit_factor_result.summary())
265
+ lines.append("")
266
+
267
+ if self.precision_recall_result:
268
+ lines.append("--- Precision/Recall Analysis ---")
269
+ lines.append(self.precision_recall_result.summary())
270
+ lines.append("")
271
+
272
+ if self.time_to_target_result:
273
+ lines.append("--- Time-to-Target Analysis ---")
274
+ lines.append(self.time_to_target_result.summary())
275
+
276
+ return "\n".join(lines)
277
+
278
+ def show(self) -> None:
279
+ """Display tear sheet in Jupyter notebook."""
280
+ try:
281
+ from IPython.display import HTML, display
282
+
283
+ display(HTML(f"<h2>Barrier Analysis: {self.signal_name}</h2>"))
284
+ display(
285
+ HTML(
286
+ f"<p>{self.n_assets} assets, {self.n_dates} dates, {self.n_observations} observations</p>"
287
+ )
288
+ )
289
+
290
+ for _name, fig_json in self.figures.items():
291
+ import plotly.io as pio
292
+
293
+ fig = pio.from_json(fig_json)
294
+ fig.show()
295
+
296
+ except ImportError:
297
+ print("IPython not available. Use save_html() instead.")
298
+ print(self.summary())
299
+
300
+ def save_html(
301
+ self,
302
+ path: str | Path,
303
+ include_plotlyjs: str | bool = "cdn",
304
+ ) -> Path:
305
+ """Save tear sheet as self-contained HTML file.
306
+
307
+ Parameters
308
+ ----------
309
+ path : str | Path
310
+ Output file path
311
+ include_plotlyjs : str | bool
312
+ How to include plotly.js: 'cdn', 'directory', True (embed), False
313
+
314
+ Returns
315
+ -------
316
+ Path
317
+ Path to saved file
318
+ """
319
+ import plotly.io as pio
320
+
321
+ path = Path(path)
322
+ path.parent.mkdir(parents=True, exist_ok=True)
323
+
324
+ # NOTE: Plotly.js is included via pio.to_html with include_plotlyjs parameter
325
+ # Do NOT add hardcoded CDN script here - it would duplicate the inclusion
326
+ html_parts = [
327
+ "<!DOCTYPE html>",
328
+ "<html>",
329
+ "<head>",
330
+ f"<title>Barrier Analysis: {self.signal_name}</title>",
331
+ "<style>",
332
+ "body { font-family: -apple-system, system-ui, sans-serif; margin: 40px; }",
333
+ "h1 { color: #2C3E50; }",
334
+ ".summary { background: #f8f9fa; padding: 20px; border-radius: 8px; margin-bottom: 30px; }",
335
+ ".plot-container { margin-bottom: 40px; }",
336
+ "</style>",
337
+ "</head>",
338
+ "<body>",
339
+ f"<h1>Barrier Analysis: {self.signal_name}</h1>",
340
+ "<div class='summary'>",
341
+ f"<p><strong>Assets:</strong> {self.n_assets:,}</p>",
342
+ f"<p><strong>Dates:</strong> {self.n_dates:,}</p>",
343
+ f"<p><strong>Observations:</strong> {self.n_observations:,}</p>",
344
+ f"<p><strong>Range:</strong> {self.date_range[0]} to {self.date_range[1]}</p>",
345
+ f"<p><strong>Generated:</strong> {self.created_at}</p>",
346
+ "</div>",
347
+ ]
348
+
349
+ # Add figures
350
+ plotlyjs_included = False
351
+ for name, fig_json in self.figures.items():
352
+ fig = pio.from_json(fig_json)
353
+ fig_html = pio.to_html(
354
+ fig,
355
+ include_plotlyjs=include_plotlyjs if not plotlyjs_included else False,
356
+ full_html=False,
357
+ )
358
+ html_parts.append("<div class='plot-container'>")
359
+ html_parts.append(f"<h2>{name.replace('_', ' ').title()}</h2>")
360
+ html_parts.append(fig_html)
361
+ html_parts.append("</div>")
362
+ plotlyjs_included = True
363
+
364
+ html_parts.extend(["</body>", "</html>"])
365
+ path.write_text("\n".join(html_parts))
366
+
367
+ return path
368
+
369
+ def save_json(self, path: str | Path, exclude_figures: bool = False) -> Path:
370
+ """Export all metrics as structured JSON.
371
+
372
+ Parameters
373
+ ----------
374
+ path : str | Path
375
+ Output file path
376
+ exclude_figures : bool, default=False
377
+ If True, exclude figure JSON data to reduce file size
378
+
379
+ Returns
380
+ -------
381
+ Path
382
+ Path to saved file
383
+ """
384
+ import json
385
+
386
+ path = Path(path)
387
+ path.parent.mkdir(parents=True, exist_ok=True)
388
+
389
+ data = self.to_dict(exclude_none=True)
390
+
391
+ if exclude_figures:
392
+ data.pop("figures", None)
393
+
394
+ with open(path, "w", encoding="utf-8") as f:
395
+ json.dump(data, f, indent=2, default=str)
396
+
397
+ return path