ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,895 @@
1
+ """Executive summary visualizations for backtest analysis.
2
+
3
+ Provides KPI cards with traffic lights (red/yellow/green) and
4
+ automated insight generation for backtest results.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import TYPE_CHECKING, Any, Literal
11
+
12
+ import numpy as np
13
+ import plotly.graph_objects as go
14
+ from plotly.subplots import make_subplots
15
+
16
+ from ml4t.diagnostic.visualization.core import (
17
+ get_theme_config,
18
+ validate_theme,
19
+ )
20
+
21
+ if TYPE_CHECKING:
22
+ import polars as pl
23
+
24
+
25
+ # =============================================================================
26
+ # Default Thresholds for Traffic Lights
27
+ # =============================================================================
28
+
29
+ DEFAULT_THRESHOLDS: dict[str, dict[str, Any]] = {
30
+ "sharpe_ratio": {
31
+ "red": (-float("inf"), 0.5),
32
+ "yellow": (0.5, 1.5),
33
+ "green": (1.5, float("inf")),
34
+ "format": "{:.2f}",
35
+ "label": "Sharpe Ratio",
36
+ "higher_is_better": True,
37
+ },
38
+ "sortino_ratio": {
39
+ "red": (-float("inf"), 0.5),
40
+ "yellow": (0.5, 1.5),
41
+ "green": (1.5, float("inf")),
42
+ "format": "{:.2f}",
43
+ "label": "Sortino Ratio",
44
+ "higher_is_better": True,
45
+ },
46
+ "calmar_ratio": {
47
+ "red": (-float("inf"), 0.5),
48
+ "yellow": (0.5, 1.0),
49
+ "green": (1.0, float("inf")),
50
+ "format": "{:.2f}",
51
+ "label": "Calmar Ratio",
52
+ "higher_is_better": True,
53
+ },
54
+ "cagr": {
55
+ "red": (-float("inf"), 0.05),
56
+ "yellow": (0.05, 0.15),
57
+ "green": (0.15, float("inf")),
58
+ "format": "{:.1%}",
59
+ "label": "CAGR",
60
+ "higher_is_better": True,
61
+ },
62
+ "total_return": {
63
+ "red": (-float("inf"), 0.0),
64
+ "yellow": (0.0, 0.20),
65
+ "green": (0.20, float("inf")),
66
+ "format": "{:.1%}",
67
+ "label": "Total Return",
68
+ "higher_is_better": True,
69
+ },
70
+ "max_drawdown": {
71
+ "red": (0.30, float("inf")),
72
+ "yellow": (0.15, 0.30),
73
+ "green": (-float("inf"), 0.15),
74
+ "format": "{:.1%}",
75
+ "label": "Max Drawdown",
76
+ "higher_is_better": False,
77
+ },
78
+ "win_rate": {
79
+ "red": (-float("inf"), 0.40),
80
+ "yellow": (0.40, 0.55),
81
+ "green": (0.55, float("inf")),
82
+ "format": "{:.1%}",
83
+ "label": "Win Rate",
84
+ "higher_is_better": True,
85
+ },
86
+ "profit_factor": {
87
+ "red": (-float("inf"), 1.0),
88
+ "yellow": (1.0, 1.5),
89
+ "green": (1.5, float("inf")),
90
+ "format": "{:.2f}",
91
+ "label": "Profit Factor",
92
+ "higher_is_better": True,
93
+ },
94
+ "expectancy": {
95
+ "red": (-float("inf"), 0.0),
96
+ "yellow": (0.0, 50.0),
97
+ "green": (50.0, float("inf")),
98
+ "format": "${:.2f}",
99
+ "label": "Expectancy",
100
+ "higher_is_better": True,
101
+ },
102
+ "avg_trade": {
103
+ "red": (-float("inf"), 0.0),
104
+ "yellow": (0.0, 25.0),
105
+ "green": (25.0, float("inf")),
106
+ "format": "${:.2f}",
107
+ "label": "Avg Trade",
108
+ "higher_is_better": True,
109
+ },
110
+ "n_trades": {
111
+ "red": (-float("inf"), 30),
112
+ "yellow": (30, 100),
113
+ "green": (100, float("inf")),
114
+ "format": "{:.0f}",
115
+ "label": "Total Trades",
116
+ "higher_is_better": True,
117
+ },
118
+ "volatility": {
119
+ "red": (0.30, float("inf")),
120
+ "yellow": (0.15, 0.30),
121
+ "green": (-float("inf"), 0.15),
122
+ "format": "{:.1%}",
123
+ "label": "Volatility",
124
+ "higher_is_better": False,
125
+ },
126
+ }
127
+
128
+ # Color definitions
129
+ TRAFFIC_LIGHT_COLORS = {
130
+ "green": "#28A745",
131
+ "yellow": "#FFC107",
132
+ "red": "#DC3545",
133
+ "neutral": "#6C757D",
134
+ }
135
+
136
+
137
+ # =============================================================================
138
+ # Traffic Light Functions
139
+ # =============================================================================
140
+
141
+
142
+ def get_traffic_light_color(
143
+ value: float,
144
+ metric_name: str,
145
+ thresholds: dict[str, dict[str, Any]] | None = None,
146
+ ) -> str:
147
+ """Determine traffic light color for a metric value.
148
+
149
+ Parameters
150
+ ----------
151
+ value : float
152
+ The metric value to evaluate
153
+ metric_name : str
154
+ Name of the metric (must be in thresholds)
155
+ thresholds : dict, optional
156
+ Custom thresholds. Uses DEFAULT_THRESHOLDS if None.
157
+
158
+ Returns
159
+ -------
160
+ str
161
+ Color code: "green", "yellow", "red", or "neutral"
162
+ """
163
+ if thresholds is None:
164
+ thresholds = DEFAULT_THRESHOLDS
165
+
166
+ if metric_name not in thresholds:
167
+ return "neutral"
168
+
169
+ config = thresholds[metric_name]
170
+
171
+ # Handle NaN
172
+ if np.isnan(value):
173
+ return "neutral"
174
+
175
+ # Check which range the value falls into
176
+ for color in ["green", "yellow", "red"]:
177
+ low, high = config[color]
178
+ if low <= value < high:
179
+ return color
180
+
181
+ return "neutral"
182
+
183
+
184
+ def _format_metric_value(
185
+ value: float,
186
+ metric_name: str,
187
+ thresholds: dict[str, dict[str, Any]] | None = None,
188
+ ) -> str:
189
+ """Format a metric value for display.
190
+
191
+ Parameters
192
+ ----------
193
+ value : float
194
+ The metric value
195
+ metric_name : str
196
+ Name of the metric
197
+ thresholds : dict, optional
198
+ Thresholds containing format strings
199
+
200
+ Returns
201
+ -------
202
+ str
203
+ Formatted value string
204
+ """
205
+ if thresholds is None:
206
+ thresholds = DEFAULT_THRESHOLDS
207
+
208
+ if np.isnan(value):
209
+ return "N/A"
210
+
211
+ if metric_name in thresholds:
212
+ fmt = thresholds[metric_name].get("format", "{:.2f}")
213
+ return fmt.format(value)
214
+
215
+ return f"{value:.2f}"
216
+
217
+
218
+ def _get_metric_label(
219
+ metric_name: str,
220
+ thresholds: dict[str, dict[str, Any]] | None = None,
221
+ ) -> str:
222
+ """Get display label for a metric.
223
+
224
+ Parameters
225
+ ----------
226
+ metric_name : str
227
+ Internal metric name
228
+ thresholds : dict, optional
229
+ Thresholds containing labels
230
+
231
+ Returns
232
+ -------
233
+ str
234
+ Human-readable label
235
+ """
236
+ if thresholds is None:
237
+ thresholds = DEFAULT_THRESHOLDS
238
+
239
+ if metric_name in thresholds:
240
+ return thresholds[metric_name].get("label", metric_name.replace("_", " ").title())
241
+
242
+ return metric_name.replace("_", " ").title()
243
+
244
+
245
+ # =============================================================================
246
+ # Metric Card Creation
247
+ # =============================================================================
248
+
249
+
250
+ def create_metric_card(
251
+ metric_name: str,
252
+ value: float,
253
+ *,
254
+ delta: float | None = None,
255
+ delta_reference: str | None = None,
256
+ sparkline_data: list[float] | None = None,
257
+ thresholds: dict[str, dict[str, Any]] | None = None,
258
+ theme: str | None = None,
259
+ ) -> go.Figure:
260
+ """Create a single KPI metric card with traffic light indicator.
261
+
262
+ Parameters
263
+ ----------
264
+ metric_name : str
265
+ Name of the metric (e.g., "sharpe_ratio", "max_drawdown")
266
+ value : float
267
+ Current metric value
268
+ delta : float, optional
269
+ Change from reference (e.g., vs benchmark or previous period)
270
+ delta_reference : str, optional
271
+ Label for delta reference (e.g., "vs Benchmark", "vs YTD")
272
+ sparkline_data : list[float], optional
273
+ Rolling values for mini sparkline
274
+ thresholds : dict, optional
275
+ Custom thresholds for traffic light
276
+ theme : str, optional
277
+ Plot theme
278
+
279
+ Returns
280
+ -------
281
+ go.Figure
282
+ Single metric card as Plotly figure
283
+ """
284
+ theme = validate_theme(theme)
285
+ theme_config = get_theme_config(theme)
286
+
287
+ # Get traffic light color
288
+ color_name = get_traffic_light_color(value, metric_name, thresholds)
289
+ color = TRAFFIC_LIGHT_COLORS.get(color_name, TRAFFIC_LIGHT_COLORS["neutral"])
290
+
291
+ # Get label for metric
292
+ label = _get_metric_label(metric_name, thresholds)
293
+
294
+ # Create figure
295
+ fig = go.Figure()
296
+
297
+ # Add indicator
298
+ fig.add_trace(
299
+ go.Indicator(
300
+ mode="number+delta" if delta is not None else "number",
301
+ value=value,
302
+ number={
303
+ "font": {"size": 48, "color": color},
304
+ "valueformat": _get_plotly_format(metric_name, thresholds),
305
+ },
306
+ delta={
307
+ "reference": value - delta if delta is not None else 0,
308
+ "relative": False,
309
+ "valueformat": ".2%",
310
+ }
311
+ if delta is not None
312
+ else None,
313
+ title={
314
+ "text": f"<b>{label}</b>"
315
+ + (
316
+ f"<br><span style='font-size:12px'>{delta_reference}</span>"
317
+ if delta_reference
318
+ else ""
319
+ ),
320
+ "font": {"size": 16},
321
+ },
322
+ domain={"x": [0, 1], "y": [0.3, 1]},
323
+ )
324
+ )
325
+
326
+ # Add sparkline if provided
327
+ if sparkline_data is not None and len(sparkline_data) > 2:
328
+ fig.add_trace(
329
+ go.Scatter(
330
+ y=sparkline_data,
331
+ mode="lines",
332
+ line={"color": color, "width": 2},
333
+ fill="tozeroy",
334
+ fillcolor=f"rgba({int(color[1:3], 16)}, {int(color[3:5], 16)}, {int(color[5:7], 16)}, 0.2)",
335
+ showlegend=False,
336
+ xaxis="x2",
337
+ yaxis="y2",
338
+ )
339
+ )
340
+
341
+ # Add second axis for sparkline
342
+ fig.update_layout(
343
+ xaxis2={
344
+ "domain": [0.1, 0.9],
345
+ "anchor": "y2",
346
+ "showticklabels": False,
347
+ "showgrid": False,
348
+ "zeroline": False,
349
+ },
350
+ yaxis2={
351
+ "domain": [0.05, 0.25],
352
+ "anchor": "x2",
353
+ "showticklabels": False,
354
+ "showgrid": False,
355
+ "zeroline": False,
356
+ },
357
+ )
358
+
359
+ # Add traffic light circle
360
+ fig.add_shape(
361
+ type="circle",
362
+ x0=0.85,
363
+ y0=0.85,
364
+ x1=0.95,
365
+ y1=0.95,
366
+ xref="paper",
367
+ yref="paper",
368
+ fillcolor=color,
369
+ line={"color": color},
370
+ )
371
+
372
+ fig.update_layout(
373
+ height=200,
374
+ width=250,
375
+ margin={"l": 20, "r": 20, "t": 40, "b": 20},
376
+ **theme_config["layout"],
377
+ )
378
+
379
+ return fig
380
+
381
+
382
+ def _get_plotly_format(
383
+ metric_name: str,
384
+ thresholds: dict[str, dict[str, Any]] | None = None,
385
+ ) -> str:
386
+ """Convert Python format string to Plotly d3 format.
387
+
388
+ Parameters
389
+ ----------
390
+ metric_name : str
391
+ Metric name
392
+ thresholds : dict, optional
393
+ Thresholds with format strings
394
+
395
+ Returns
396
+ -------
397
+ str
398
+ Plotly d3 format string
399
+ """
400
+ if thresholds is None:
401
+ thresholds = DEFAULT_THRESHOLDS
402
+
403
+ if metric_name not in thresholds:
404
+ return ".2f"
405
+
406
+ py_fmt = thresholds[metric_name].get("format", "{:.2f}")
407
+
408
+ # Convert Python format to d3
409
+ if "%" in py_fmt:
410
+ return ".1%"
411
+ elif "$" in py_fmt:
412
+ return "$.2f"
413
+ elif ".0f" in py_fmt:
414
+ return ".0f"
415
+ elif ".1f" in py_fmt:
416
+ return ".1f"
417
+ else:
418
+ return ".2f"
419
+
420
+
421
+ # =============================================================================
422
+ # Executive Summary Grid
423
+ # =============================================================================
424
+
425
+
426
+ def create_executive_summary(
427
+ metrics: dict[str, float],
428
+ *,
429
+ selected_metrics: list[str] | None = None,
430
+ thresholds: dict[str, dict[str, Any]] | None = None,
431
+ benchmark_metrics: dict[str, float] | None = None,
432
+ rolling_metrics: dict[str, list[float]] | None = None,
433
+ title: str = "Executive Summary",
434
+ theme: str | None = None,
435
+ cols: int = 3,
436
+ height: int | None = None,
437
+ width: int | None = None,
438
+ ) -> go.Figure:
439
+ """Create executive summary grid with KPI cards and traffic lights.
440
+
441
+ Parameters
442
+ ----------
443
+ metrics : dict[str, float]
444
+ Dictionary of metric name to value
445
+ selected_metrics : list[str], optional
446
+ Specific metrics to display. If None, uses sensible defaults.
447
+ thresholds : dict, optional
448
+ Custom thresholds for traffic lights
449
+ benchmark_metrics : dict[str, float], optional
450
+ Benchmark values for delta display
451
+ rolling_metrics : dict[str, list[float]], optional
452
+ Rolling values for sparklines
453
+ title : str, default "Executive Summary"
454
+ Dashboard title
455
+ theme : str, optional
456
+ Plot theme ("default", "dark", "print", "presentation")
457
+ cols : int, default 3
458
+ Number of columns in the grid
459
+ height : int, optional
460
+ Figure height
461
+ width : int, optional
462
+ Figure width
463
+
464
+ Returns
465
+ -------
466
+ go.Figure
467
+ Executive summary dashboard with KPI cards
468
+
469
+ Examples
470
+ --------
471
+ >>> from ml4t.diagnostic.visualization.backtest import create_executive_summary
472
+ >>> metrics = {
473
+ ... "sharpe_ratio": 1.85,
474
+ ... "max_drawdown": 0.12,
475
+ ... "win_rate": 0.58,
476
+ ... "profit_factor": 1.75,
477
+ ... "cagr": 0.22,
478
+ ... "n_trades": 156,
479
+ ... }
480
+ >>> fig = create_executive_summary(metrics)
481
+ >>> fig.show()
482
+ """
483
+ theme = validate_theme(theme)
484
+ theme_config = get_theme_config(theme)
485
+
486
+ if thresholds is None:
487
+ thresholds = DEFAULT_THRESHOLDS
488
+
489
+ # Default metrics selection
490
+ if selected_metrics is None:
491
+ selected_metrics = [
492
+ "sharpe_ratio",
493
+ "cagr",
494
+ "max_drawdown",
495
+ "win_rate",
496
+ "profit_factor",
497
+ "n_trades",
498
+ ]
499
+
500
+ # Filter to available metrics
501
+ available_metrics = [m for m in selected_metrics if m in metrics]
502
+
503
+ if not available_metrics:
504
+ # Fallback to any available
505
+ available_metrics = list(metrics.keys())[:6]
506
+
507
+ n_metrics = len(available_metrics)
508
+ rows = (n_metrics + cols - 1) // cols
509
+
510
+ # Create subplot grid
511
+ fig = make_subplots(
512
+ rows=rows,
513
+ cols=cols,
514
+ specs=[[{"type": "indicator"}] * cols for _ in range(rows)],
515
+ vertical_spacing=0.15,
516
+ horizontal_spacing=0.08,
517
+ )
518
+
519
+ for idx, metric_name in enumerate(available_metrics):
520
+ row = idx // cols + 1
521
+ col = idx % cols + 1
522
+
523
+ value = metrics.get(metric_name, np.nan)
524
+
525
+ # Get traffic light color
526
+ color_name = get_traffic_light_color(value, metric_name, thresholds)
527
+ color = TRAFFIC_LIGHT_COLORS.get(color_name, TRAFFIC_LIGHT_COLORS["neutral"])
528
+
529
+ # Format label
530
+ label = _get_metric_label(metric_name, thresholds)
531
+
532
+ # Compute delta if benchmark available
533
+ delta = None
534
+ if benchmark_metrics and metric_name in benchmark_metrics:
535
+ delta = value - benchmark_metrics[metric_name]
536
+
537
+ # Add indicator
538
+ fig.add_trace(
539
+ go.Indicator(
540
+ mode="number+delta" if delta is not None else "number",
541
+ value=value,
542
+ number={
543
+ "font": {"size": 36, "color": color},
544
+ "valueformat": _get_plotly_format(metric_name, thresholds),
545
+ },
546
+ delta={
547
+ "reference": value - delta if delta is not None else 0,
548
+ "relative": False,
549
+ "valueformat": ".2f",
550
+ "increasing": {"color": "#28A745"},
551
+ "decreasing": {"color": "#DC3545"},
552
+ }
553
+ if delta is not None
554
+ else None,
555
+ title={"text": f"<b>{label}</b>", "font": {"size": 14}},
556
+ ),
557
+ row=row,
558
+ col=col,
559
+ )
560
+
561
+ # Calculate dimensions
562
+ card_height = 180
563
+ if height is None:
564
+ height = rows * card_height + 100
565
+
566
+ if width is None:
567
+ width = cols * 280 + 100
568
+
569
+ # Build layout without conflicting with theme_config margin
570
+ layout_updates = {
571
+ "title": {
572
+ "text": f"<b>{title}</b>",
573
+ "font": {"size": 20},
574
+ "x": 0.5,
575
+ "xanchor": "center",
576
+ },
577
+ "height": height,
578
+ "width": width,
579
+ "margin": {"l": 40, "r": 40, "t": 80, "b": 40},
580
+ }
581
+
582
+ # Apply theme layout (without overwriting our explicit settings)
583
+ for key, value in theme_config["layout"].items():
584
+ if key not in layout_updates:
585
+ layout_updates[key] = value
586
+
587
+ fig.update_layout(**layout_updates)
588
+
589
+ return fig
590
+
591
+
592
+ # =============================================================================
593
+ # Automated Insights Generation
594
+ # =============================================================================
595
+
596
+
597
+ @dataclass
598
+ class Insight:
599
+ """A single automated insight from backtest analysis."""
600
+
601
+ category: Literal["strength", "weakness", "warning", "info"]
602
+ metric: str
603
+ message: str
604
+ severity: int # 1-5 scale
605
+ value: float | None = None
606
+ threshold: float | None = None
607
+
608
+
609
+ def create_key_insights(
610
+ metrics: dict[str, float],
611
+ *,
612
+ trades_df: pl.DataFrame | None = None,
613
+ equity_df: pl.DataFrame | None = None,
614
+ max_insights: int = 5,
615
+ thresholds: dict[str, dict[str, Any]] | None = None,
616
+ ) -> list[Insight]:
617
+ """Generate automated insights from backtest metrics.
618
+
619
+ Analyzes metrics and generates human-readable insights about
620
+ strengths, weaknesses, and warnings.
621
+
622
+ Parameters
623
+ ----------
624
+ metrics : dict[str, float]
625
+ Dictionary of metric name to value
626
+ trades_df : pl.DataFrame, optional
627
+ Trade-level data for deeper analysis
628
+ equity_df : pl.DataFrame, optional
629
+ Equity curve data for time-based analysis
630
+ max_insights : int, default 5
631
+ Maximum number of insights to return
632
+ thresholds : dict, optional
633
+ Custom thresholds for evaluation
634
+
635
+ Returns
636
+ -------
637
+ list[Insight]
638
+ List of insights sorted by severity
639
+
640
+ Examples
641
+ --------
642
+ >>> insights = create_key_insights({"sharpe_ratio": 2.1, "max_drawdown": 0.35})
643
+ >>> for insight in insights:
644
+ ... print(f"[{insight.category}] {insight.message}")
645
+ [strength] Sharpe ratio of 2.10 is excellent (top 10% of strategies)
646
+ [warning] Maximum drawdown of 35.0% exceeds typical institutional tolerance (20%)
647
+ """
648
+ if thresholds is None:
649
+ thresholds = DEFAULT_THRESHOLDS
650
+
651
+ insights: list[Insight] = []
652
+
653
+ # --- Sharpe Ratio Insights ---
654
+ if "sharpe_ratio" in metrics:
655
+ sharpe = metrics["sharpe_ratio"]
656
+ if sharpe >= 2.0:
657
+ insights.append(
658
+ Insight(
659
+ category="strength",
660
+ metric="sharpe_ratio",
661
+ message=f"Sharpe ratio of {sharpe:.2f} is excellent (top 10% of strategies)",
662
+ severity=5,
663
+ value=sharpe,
664
+ threshold=2.0,
665
+ )
666
+ )
667
+ elif sharpe >= 1.5:
668
+ insights.append(
669
+ Insight(
670
+ category="strength",
671
+ metric="sharpe_ratio",
672
+ message=f"Sharpe ratio of {sharpe:.2f} indicates strong risk-adjusted performance",
673
+ severity=4,
674
+ value=sharpe,
675
+ threshold=1.5,
676
+ )
677
+ )
678
+ elif sharpe < 0.5:
679
+ insights.append(
680
+ Insight(
681
+ category="weakness",
682
+ metric="sharpe_ratio",
683
+ message=f"Sharpe ratio of {sharpe:.2f} suggests poor risk-adjusted returns",
684
+ severity=4,
685
+ value=sharpe,
686
+ threshold=0.5,
687
+ )
688
+ )
689
+
690
+ # --- Maximum Drawdown Insights ---
691
+ if "max_drawdown" in metrics:
692
+ dd = metrics["max_drawdown"]
693
+ if dd > 0.30:
694
+ insights.append(
695
+ Insight(
696
+ category="warning",
697
+ metric="max_drawdown",
698
+ message=f"Maximum drawdown of {dd:.1%} exceeds typical institutional tolerance (20%)",
699
+ severity=5,
700
+ value=dd,
701
+ threshold=0.20,
702
+ )
703
+ )
704
+ elif dd > 0.20:
705
+ insights.append(
706
+ Insight(
707
+ category="warning",
708
+ metric="max_drawdown",
709
+ message=f"Maximum drawdown of {dd:.1%} is elevated - consider risk controls",
710
+ severity=3,
711
+ value=dd,
712
+ threshold=0.20,
713
+ )
714
+ )
715
+ elif dd < 0.10:
716
+ insights.append(
717
+ Insight(
718
+ category="strength",
719
+ metric="max_drawdown",
720
+ message=f"Maximum drawdown of {dd:.1%} shows excellent capital preservation",
721
+ severity=4,
722
+ value=dd,
723
+ threshold=0.10,
724
+ )
725
+ )
726
+
727
+ # --- Win Rate + Profit Factor Combination ---
728
+ if "win_rate" in metrics and "profit_factor" in metrics:
729
+ wr = metrics["win_rate"]
730
+ pf = metrics["profit_factor"]
731
+
732
+ if wr < 0.50 and pf > 1.5:
733
+ insights.append(
734
+ Insight(
735
+ category="info",
736
+ metric="win_rate",
737
+ message=f"Win rate of {wr:.1%} with profit factor {pf:.2f} suggests effective 'let winners run' approach",
738
+ severity=3,
739
+ value=wr,
740
+ )
741
+ )
742
+ elif wr > 0.60 and pf < 1.2:
743
+ insights.append(
744
+ Insight(
745
+ category="warning",
746
+ metric="profit_factor",
747
+ message=f"High win rate ({wr:.1%}) but low profit factor ({pf:.2f}) - winners may be too small",
748
+ severity=3,
749
+ value=pf,
750
+ )
751
+ )
752
+
753
+ # --- Trade Count Insights ---
754
+ if "n_trades" in metrics:
755
+ n = metrics["n_trades"]
756
+ if n < 30:
757
+ insights.append(
758
+ Insight(
759
+ category="warning",
760
+ metric="n_trades",
761
+ message=f"Only {n:.0f} trades - insufficient for statistical significance",
762
+ severity=4,
763
+ value=n,
764
+ threshold=30,
765
+ )
766
+ )
767
+ elif n > 500:
768
+ insights.append(
769
+ Insight(
770
+ category="strength",
771
+ metric="n_trades",
772
+ message=f"{n:.0f} trades provides strong statistical validity",
773
+ severity=3,
774
+ value=n,
775
+ threshold=100,
776
+ )
777
+ )
778
+
779
+ # --- CAGR vs Volatility (Risk-adjusted) ---
780
+ if "cagr" in metrics and "volatility" in metrics:
781
+ cagr = metrics["cagr"]
782
+ vol = metrics["volatility"]
783
+ if cagr > 0 and vol > 0:
784
+ return_per_risk = cagr / vol
785
+ if return_per_risk > 1.0:
786
+ insights.append(
787
+ Insight(
788
+ category="strength",
789
+ metric="cagr",
790
+ message=f"Return/risk ratio of {return_per_risk:.2f} indicates efficient risk utilization",
791
+ severity=3,
792
+ value=return_per_risk,
793
+ )
794
+ )
795
+
796
+ # --- Profit Factor Insights ---
797
+ if "profit_factor" in metrics:
798
+ pf = metrics["profit_factor"]
799
+ if pf < 1.0:
800
+ insights.append(
801
+ Insight(
802
+ category="weakness",
803
+ metric="profit_factor",
804
+ message=f"Profit factor of {pf:.2f} indicates net losing strategy",
805
+ severity=5,
806
+ value=pf,
807
+ threshold=1.0,
808
+ )
809
+ )
810
+ elif pf > 2.0:
811
+ insights.append(
812
+ Insight(
813
+ category="strength",
814
+ metric="profit_factor",
815
+ message=f"Profit factor of {pf:.2f} shows strong edge in winner/loser ratio",
816
+ severity=4,
817
+ value=pf,
818
+ threshold=2.0,
819
+ )
820
+ )
821
+
822
+ # --- Expectancy Insights ---
823
+ if "expectancy" in metrics:
824
+ exp = metrics["expectancy"]
825
+ if exp < 0:
826
+ insights.append(
827
+ Insight(
828
+ category="weakness",
829
+ metric="expectancy",
830
+ message=f"Negative expectancy (${exp:.2f}) - strategy loses money on average per trade",
831
+ severity=5,
832
+ value=exp,
833
+ threshold=0,
834
+ )
835
+ )
836
+ elif exp > 100:
837
+ insights.append(
838
+ Insight(
839
+ category="strength",
840
+ metric="expectancy",
841
+ message=f"Strong expectancy of ${exp:.2f} per trade provides robust edge",
842
+ severity=4,
843
+ value=exp,
844
+ threshold=50,
845
+ )
846
+ )
847
+
848
+ # Sort by severity and limit
849
+ insights.sort(key=lambda x: x.severity, reverse=True)
850
+ return insights[:max_insights]
851
+
852
+
853
+ def format_insights_html(insights: list[Insight]) -> str:
854
+ """Format insights as HTML for embedding in reports.
855
+
856
+ Parameters
857
+ ----------
858
+ insights : list[Insight]
859
+ List of insights to format
860
+
861
+ Returns
862
+ -------
863
+ str
864
+ HTML string with styled insight cards
865
+ """
866
+ category_icons = {
867
+ "strength": '<span style="color: #28A745; font-size: 18px;">&#10004;</span>', # Checkmark
868
+ "weakness": '<span style="color: #DC3545; font-size: 18px;">&#10006;</span>', # X
869
+ "warning": '<span style="color: #FFC107; font-size: 18px;">&#9888;</span>', # Warning
870
+ "info": '<span style="color: #17A2B8; font-size: 18px;">&#8505;</span>', # Info
871
+ }
872
+
873
+ category_colors = {
874
+ "strength": "#d4edda",
875
+ "weakness": "#f8d7da",
876
+ "warning": "#fff3cd",
877
+ "info": "#d1ecf1",
878
+ }
879
+
880
+ html_parts = ['<div style="margin: 20px 0;">']
881
+
882
+ for insight in insights:
883
+ icon = category_icons.get(insight.category, "")
884
+ bg_color = category_colors.get(insight.category, "#f8f9fa")
885
+
886
+ html_parts.append(f"""
887
+ <div style="background-color: {bg_color}; padding: 12px 16px; margin: 8px 0;
888
+ border-radius: 6px; display: flex; align-items: center;">
889
+ <span style="margin-right: 12px;">{icon}</span>
890
+ <span style="flex: 1;">{insight.message}</span>
891
+ </div>
892
+ """)
893
+
894
+ html_parts.append("</div>")
895
+ return "\n".join(html_parts)