ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1343 @@
1
+ """HTML report generation for feature evaluation results.
2
+
3
+ This module provides functions for generating comprehensive HTML reports that combine
4
+ multiple Plotly visualizations with narrative text, analysis summaries, and styling.
5
+
6
+ All report functions follow the standard API defined in docs/plot_api_standards.md:
7
+ - Accept evaluation results from analyze_*() functions
8
+ - Generate self-contained HTML files with embedded plots
9
+ - Support theme customization and styling
10
+ - Provide flexible report templates
11
+
12
+ Example workflow:
13
+ >>> from ml4t.diagnostic.evaluation import analyze_ml_importance, compute_shap_interactions
14
+ >>> from ml4t.diagnostic.visualization import generate_importance_report
15
+ >>>
16
+ >>> # Run evaluations
17
+ >>> importance = analyze_ml_importance(model, X, y)
18
+ >>> interactions = compute_shap_interactions(model, X)
19
+ >>>
20
+ >>> # Generate comprehensive HTML report
21
+ >>> report_path = generate_importance_report(
22
+ ... importance_results=importance,
23
+ ... interaction_results=interactions,
24
+ ... output_file="feature_analysis.html",
25
+ ... title="Feature Analysis Report",
26
+ ... theme="dark"
27
+ ... )
28
+ >>> print(f"Report saved to: {report_path}")
29
+ """
30
+
31
+ from datetime import datetime
32
+ from pathlib import Path
33
+ from typing import Any
34
+
35
+ import plotly.graph_objects as go
36
+
37
+ from ml4t.diagnostic.visualization.core import get_theme_config, validate_theme
38
+ from ml4t.diagnostic.visualization.feature_plots import (
39
+ plot_importance_bar,
40
+ plot_importance_distribution,
41
+ plot_importance_heatmap,
42
+ plot_importance_summary,
43
+ )
44
+ from ml4t.diagnostic.visualization.interaction_plots import (
45
+ plot_interaction_bar,
46
+ plot_interaction_heatmap,
47
+ plot_interaction_network,
48
+ )
49
+
50
+ __all__ = [
51
+ "generate_importance_report",
52
+ "generate_interaction_report",
53
+ "generate_combined_report",
54
+ "combine_figures_to_html",
55
+ "export_figures_to_pdf",
56
+ ]
57
+
58
+
59
+ def combine_figures_to_html(
60
+ figures: list[go.Figure],
61
+ *,
62
+ title: str = "Analysis Report",
63
+ sections: list[dict[str, Any]] | None = None,
64
+ output_file: str | Path | None = None,
65
+ theme: str | None = None,
66
+ include_toc: bool = True,
67
+ ) -> str:
68
+ """Combine multiple Plotly figures into a single HTML document.
69
+
70
+ This is the core function for generating HTML reports. It takes a list of
71
+ Plotly figures and optional narrative sections, and produces a self-contained
72
+ HTML file with embedded visualizations.
73
+
74
+ Parameters
75
+ ----------
76
+ figures : list[go.Figure]
77
+ List of Plotly figure objects to include in the report.
78
+ Figures are rendered in the order provided.
79
+ title : str, optional
80
+ Report title displayed at the top. Default is "Analysis Report".
81
+ sections : list[dict[str, Any]] | None, optional
82
+ List of section dictionaries defining report structure. Each section can contain:
83
+ - "title": str - Section heading
84
+ - "text": str - Narrative text (supports HTML and markdown-style formatting)
85
+ - "figure_index": int - Index of figure to include (from figures list)
86
+ If None, figures are rendered sequentially without additional text.
87
+ output_file : str | Path | None, optional
88
+ Path where HTML file should be saved. If None, returns HTML string without saving.
89
+ theme : str | None, optional
90
+ Theme name ("default", "dark", "print", "presentation").
91
+ Affects overall page styling. If None, uses "default".
92
+ include_toc : bool, optional
93
+ Whether to include a table of contents at the top of the report.
94
+ Default is True. TOC is generated from section titles.
95
+
96
+ Returns
97
+ -------
98
+ str
99
+ If output_file is None: HTML content as string
100
+ If output_file is provided: Path to saved HTML file
101
+
102
+ Raises
103
+ ------
104
+ ValueError
105
+ If figures list is empty
106
+ If section refers to invalid figure_index
107
+ TypeError
108
+ If figures contains non-Figure objects
109
+
110
+ Examples
111
+ --------
112
+ Generate report with multiple plots:
113
+
114
+ >>> from ml4t.diagnostic.visualization import (
115
+ ... plot_importance_bar,
116
+ ... plot_importance_heatmap,
117
+ ... combine_figures_to_html
118
+ ... )
119
+ >>>
120
+ >>> # Create figures
121
+ >>> fig1 = plot_importance_bar(results, top_n=15)
122
+ >>> fig2 = plot_importance_heatmap(results)
123
+ >>>
124
+ >>> # Define sections with narrative
125
+ >>> sections = [
126
+ ... {
127
+ ... "title": "Feature Importance Rankings",
128
+ ... "text": "Top 15 features ranked by consensus importance across methods.",
129
+ ... "figure_index": 0
130
+ ... },
131
+ ... {
132
+ ... "title": "Method Agreement Analysis",
133
+ ... "text": "Correlation matrix showing agreement between importance methods.",
134
+ ... "figure_index": 1
135
+ ... }
136
+ ... ]
137
+ >>>
138
+ >>> # Generate HTML report
139
+ >>> html_path = combine_figures_to_html(
140
+ ... figures=[fig1, fig2],
141
+ ... title="Feature Importance Analysis",
142
+ ... sections=sections,
143
+ ... output_file="report.html",
144
+ ... theme="dark"
145
+ ... )
146
+
147
+ Generate simple report without sections:
148
+
149
+ >>> figs = [plot_importance_bar(results), plot_importance_heatmap(results)]
150
+ >>> html = combine_figures_to_html(figs, title="Quick Report")
151
+ >>> print(html[:100]) # Preview HTML
152
+
153
+ Notes
154
+ -----
155
+ - HTML is self-contained with embedded Plotly.js from CDN
156
+ - First figure includes full Plotly.js, subsequent figures reuse it
157
+ - CSS styling is embedded in <style> tag
158
+ - Reports are responsive and work on mobile devices
159
+ - File size depends on number of data points in figures
160
+ """
161
+ # Validation
162
+ if not figures:
163
+ raise ValueError("At least one figure is required")
164
+
165
+ if not all(isinstance(fig, go.Figure) for fig in figures):
166
+ raise TypeError("All items in figures list must be plotly.graph_objects.Figure instances")
167
+
168
+ if sections is not None:
169
+ for i, section in enumerate(sections):
170
+ if "figure_index" in section:
171
+ idx = section["figure_index"]
172
+ if idx < 0 or idx >= len(figures):
173
+ raise ValueError(
174
+ f"Section {i} has invalid figure_index {idx}. Must be between 0 and {len(figures) - 1}"
175
+ )
176
+
177
+ # Validate theme
178
+ theme = theme or "default"
179
+ validate_theme(theme)
180
+ theme_config = get_theme_config(theme)
181
+
182
+ # Convert figures to HTML divs
183
+ figure_htmls = []
184
+ for i, fig in enumerate(figures):
185
+ # First figure includes Plotly.js from CDN, others don't
186
+ include_plotlyjs = "cdn" if i == 0 else False
187
+
188
+ fig_html = fig.to_html(
189
+ full_html=False, include_plotlyjs=include_plotlyjs, div_id=f"plot-{i}"
190
+ )
191
+ figure_htmls.append(fig_html)
192
+
193
+ # Build HTML content
194
+ html_content = _build_html_document(
195
+ title=title,
196
+ figure_htmls=figure_htmls,
197
+ sections=sections,
198
+ theme_config=theme_config,
199
+ include_toc=include_toc,
200
+ )
201
+
202
+ # Save or return
203
+ if output_file is not None:
204
+ output_path = Path(output_file)
205
+ output_path.parent.mkdir(parents=True, exist_ok=True)
206
+ output_path.write_text(html_content, encoding="utf-8")
207
+ return str(output_path.absolute())
208
+ else:
209
+ return html_content
210
+
211
+
212
+ def export_figures_to_pdf(
213
+ figures: list[go.Figure],
214
+ output_file: str | Path,
215
+ *,
216
+ layout: str = "vertical",
217
+ page_size: tuple[int, int] = (800, 600),
218
+ scale: float = 2.0,
219
+ ) -> str:
220
+ """Export multiple Plotly figures to a single PDF file.
221
+
222
+ Each figure is rendered as a separate page in the PDF. Uses kaleido for
223
+ high-quality vector rendering.
224
+
225
+ Parameters
226
+ ----------
227
+ figures : list[go.Figure]
228
+ List of Plotly figure objects to export.
229
+ output_file : str | Path
230
+ Path where PDF file should be saved.
231
+ layout : str, optional
232
+ Layout mode for figures:
233
+ - "vertical": Each figure on its own page (default)
234
+ - "compact": Attempt to fit multiple small figures per page
235
+ Default is "vertical".
236
+ page_size : tuple[int, int], optional
237
+ Page size in pixels (width, height).
238
+ Default is (800, 600) which approximates A4 landscape at 96 DPI.
239
+ Common sizes:
240
+ - (800, 600): A4 landscape-like
241
+ - (600, 800): A4 portrait-like
242
+ - (1200, 900): Larger landscape
243
+ scale : float, optional
244
+ Resolution scale factor for rendering. Higher values produce
245
+ better quality but larger files. Default is 2.0.
246
+
247
+ Returns
248
+ -------
249
+ str
250
+ Absolute path to generated PDF file.
251
+
252
+ Raises
253
+ ------
254
+ ValueError
255
+ If figures list is empty
256
+ ImportError
257
+ If kaleido is not installed
258
+ TypeError
259
+ If figures contains non-Figure objects
260
+
261
+ Examples
262
+ --------
263
+ Export multiple plots to PDF:
264
+
265
+ >>> from ml4t.diagnostic.visualization import plot_importance_bar, export_figures_to_pdf
266
+ >>>
267
+ >>> fig1 = plot_importance_bar(results, top_n=15)
268
+ >>> fig2 = plot_importance_heatmap(results)
269
+ >>>
270
+ >>> pdf_path = export_figures_to_pdf(
271
+ ... figures=[fig1, fig2],
272
+ ... output_file="analysis.pdf",
273
+ ... page_size=(800, 600),
274
+ ... scale=2.0
275
+ ... )
276
+
277
+ Export with custom page size:
278
+
279
+ >>> pdf_path = export_figures_to_pdf(
280
+ ... figures=[fig1, fig2, fig3],
281
+ ... output_file="report.pdf",
282
+ ... page_size=(1200, 900), # Larger pages
283
+ ... scale=3.0 # High resolution
284
+ ... )
285
+
286
+ Notes
287
+ -----
288
+ - Requires kaleido package: `pip install kaleido`
289
+ - Each figure is exported as a vector PDF page
290
+ - File size depends on plot complexity and scale factor
291
+ - For print quality, use scale >= 2.0
292
+ - For web sharing, use scale = 1.0 to reduce file size
293
+ """
294
+ # Validation
295
+ if not figures:
296
+ raise ValueError("At least one figure is required")
297
+
298
+ if not all(isinstance(fig, go.Figure) for fig in figures):
299
+ raise TypeError("All items in figures list must be plotly.graph_objects.Figure instances")
300
+
301
+ # Check kaleido availability
302
+ try:
303
+ import kaleido # noqa: F401
304
+ except ImportError as e:
305
+ raise ImportError(
306
+ "kaleido is required for PDF export. Install it with: pip install kaleido"
307
+ ) from e
308
+
309
+ # Create output directory if needed
310
+ output_path = Path(output_file)
311
+ output_path.parent.mkdir(parents=True, exist_ok=True)
312
+
313
+ # Export strategy depends on layout
314
+ if layout == "vertical":
315
+ # Each figure gets its own page
316
+ return _export_figures_multipage(figures, output_path, page_size, scale)
317
+ elif layout == "compact":
318
+ # Try to fit multiple figures per page (not implemented yet)
319
+ raise NotImplementedError("Compact layout is not yet implemented. Use 'vertical'.")
320
+ else:
321
+ raise ValueError(f"Invalid layout '{layout}'. Must be 'vertical' or 'compact'.")
322
+
323
+
324
+ def generate_importance_report(
325
+ importance_results: dict[str, Any],
326
+ *,
327
+ output_file: str | Path,
328
+ title: str | None = None,
329
+ theme: str | None = None,
330
+ include_sections: list[str] | None = None,
331
+ top_n: int = 20,
332
+ export_pdf: bool = False,
333
+ pdf_page_size: tuple[int, int] = (800, 600),
334
+ pdf_scale: float = 2.0,
335
+ ) -> str:
336
+ """Generate comprehensive HTML report for feature importance analysis.
337
+
338
+ Creates a multi-section report combining:
339
+ - Executive summary with key findings
340
+ - Consensus importance rankings (bar chart)
341
+ - Method agreement analysis (heatmap)
342
+ - Importance score distributions
343
+ - Interpretation and recommendations
344
+
345
+ Parameters
346
+ ----------
347
+ importance_results : dict[str, Any]
348
+ Results from analyze_ml_importance() containing:
349
+ - "consensus_ranking": Features ranked by consensus
350
+ - "method_results": Individual method results
351
+ - "method_agreement": Cross-method correlations
352
+ - "top_features_consensus": Features in all top-10s
353
+ output_file : str | Path
354
+ Path where HTML report will be saved.
355
+ title : str | None, optional
356
+ Report title. If None, uses "Feature Importance Analysis Report".
357
+ theme : str | None, optional
358
+ Visual theme ("default", "dark", "print", "presentation").
359
+ If None, uses "default".
360
+ include_sections : list[str] | None, optional
361
+ Which sections to include in report. Options:
362
+ - "summary": Executive summary
363
+ - "rankings": Consensus rankings bar chart
364
+ - "agreement": Method agreement heatmap
365
+ - "distributions": Score distributions
366
+ - "recommendations": Interpretation and next steps
367
+ If None, includes all sections.
368
+ top_n : int, optional
369
+ Number of top features to display in charts. Default is 20.
370
+ export_pdf : bool, optional
371
+ If True, also export the report figures to PDF format.
372
+ Default is False (HTML only).
373
+ pdf_page_size : tuple[int, int], optional
374
+ Page size for PDF export (width, height) in pixels.
375
+ Default is (800, 600). Only used if export_pdf=True.
376
+ pdf_scale : float, optional
377
+ Resolution scale for PDF export. Higher = better quality.
378
+ Default is 2.0. Only used if export_pdf=True.
379
+
380
+ Returns
381
+ -------
382
+ str
383
+ Absolute path to generated HTML file.
384
+
385
+ Examples
386
+ --------
387
+ Generate full report:
388
+
389
+ >>> from ml4t.diagnostic.evaluation import analyze_ml_importance
390
+ >>> from ml4t.diagnostic.visualization import generate_importance_report
391
+ >>>
392
+ >>> results = analyze_ml_importance(model, X, y, methods=["mdi", "pfi", "shap"])
393
+ >>> report_path = generate_importance_report(
394
+ ... importance_results=results,
395
+ ... output_file="importance_report.html",
396
+ ... theme="dark"
397
+ ... )
398
+
399
+ Generate minimal report with specific sections:
400
+
401
+ >>> report_path = generate_importance_report(
402
+ ... importance_results=results,
403
+ ... output_file="quick_report.html",
404
+ ... include_sections=["summary", "rankings"],
405
+ ... top_n=10
406
+ ... )
407
+ """
408
+ # Default title
409
+ if title is None:
410
+ title = "Feature Importance Analysis Report"
411
+
412
+ # Default sections
413
+ if include_sections is None:
414
+ include_sections = ["summary", "rankings", "agreement", "distributions", "recommendations"]
415
+
416
+ # Validate sections
417
+ valid_sections = {"summary", "rankings", "agreement", "distributions", "recommendations"}
418
+ invalid = set(include_sections) - valid_sections
419
+ if invalid:
420
+ raise ValueError(f"Invalid sections: {invalid}. Valid options: {valid_sections}")
421
+
422
+ # Generate figures
423
+ figures: list[go.Figure] = []
424
+ sections: list[dict[str, str | int]] = []
425
+
426
+ # Add summary section
427
+ if "summary" in include_sections:
428
+ summary_text = _generate_importance_summary_text(importance_results)
429
+ sections.append({"title": "Executive Summary", "text": summary_text})
430
+
431
+ # Add consensus rankings
432
+ if "rankings" in include_sections:
433
+ fig_bar = plot_importance_bar(importance_results, top_n=top_n, theme=theme)
434
+ figures.append(fig_bar)
435
+ sections.append(
436
+ {
437
+ "title": "Consensus Feature Rankings",
438
+ "text": (
439
+ f"The top {top_n} features ranked by consensus across all importance methods. "
440
+ "Features appearing at the top are consistently identified as important by "
441
+ "multiple methodologies (MDI, PFI, SHAP)."
442
+ ),
443
+ "figure_index": len(figures) - 1,
444
+ }
445
+ )
446
+
447
+ # Add method agreement
448
+ if "agreement" in include_sections:
449
+ fig_heatmap = plot_importance_heatmap(importance_results, theme=theme)
450
+ figures.append(fig_heatmap)
451
+ sections.append(
452
+ {
453
+ "title": "Method Agreement Analysis",
454
+ "text": (
455
+ "Spearman correlation matrix showing agreement between different importance "
456
+ "methods. High correlation (>0.7) indicates methods agree on feature rankings. "
457
+ "Low correlation (<0.5) suggests method-specific biases or feature interactions."
458
+ ),
459
+ "figure_index": len(figures) - 1,
460
+ }
461
+ )
462
+
463
+ # Add distributions
464
+ if "distributions" in include_sections:
465
+ fig_dist = plot_importance_distribution(importance_results, theme=theme)
466
+ figures.append(fig_dist)
467
+ sections.append(
468
+ {
469
+ "title": "Importance Score Distributions",
470
+ "text": (
471
+ "Distribution of importance scores from each method. Overlapping distributions "
472
+ "indicate consensus, while separation suggests method disagreement."
473
+ ),
474
+ "figure_index": len(figures) - 1,
475
+ }
476
+ )
477
+
478
+ # Add recommendations
479
+ if "recommendations" in include_sections:
480
+ rec_text = _generate_importance_recommendations(importance_results)
481
+ sections.append({"title": "Interpretation & Recommendations", "text": rec_text})
482
+
483
+ # Generate HTML
484
+ html_path = combine_figures_to_html(
485
+ figures=figures,
486
+ title=title,
487
+ sections=sections,
488
+ output_file=output_file,
489
+ theme=theme,
490
+ include_toc=True,
491
+ )
492
+
493
+ # Optionally export to PDF
494
+ if export_pdf and figures:
495
+ pdf_path = Path(output_file).with_suffix(".pdf")
496
+ export_figures_to_pdf(
497
+ figures=figures,
498
+ output_file=pdf_path,
499
+ page_size=pdf_page_size,
500
+ scale=pdf_scale,
501
+ )
502
+
503
+ return html_path
504
+
505
+
506
+ def generate_interaction_report(
507
+ interaction_results: dict[str, Any],
508
+ *,
509
+ output_file: str | Path,
510
+ title: str | None = None,
511
+ theme: str | None = None,
512
+ include_sections: list[str] | None = None,
513
+ top_n: int = 20,
514
+ export_pdf: bool = False,
515
+ pdf_page_size: tuple[int, int] = (800, 600),
516
+ pdf_scale: float = 2.0,
517
+ ) -> str:
518
+ """Generate comprehensive HTML report for feature interaction analysis.
519
+
520
+ Creates a multi-section report combining:
521
+ - Top feature pair interactions (bar chart)
522
+ - Full interaction matrix (heatmap)
523
+ - Interaction network graph
524
+ - Interpretation and recommendations
525
+
526
+ Parameters
527
+ ----------
528
+ interaction_results : dict[str, Any]
529
+ Results from compute_shap_interactions() or analyze_interactions().
530
+ output_file : str | Path
531
+ Path where HTML report will be saved.
532
+ title : str | None, optional
533
+ Report title. If None, uses "Feature Interaction Analysis Report".
534
+ theme : str | None, optional
535
+ Visual theme. If None, uses "default".
536
+ include_sections : list[str] | None, optional
537
+ Which sections to include. Options:
538
+ - "top_pairs": Top N strongest interactions (bar)
539
+ - "matrix": Full interaction matrix (heatmap)
540
+ - "network": Interactive network graph
541
+ - "recommendations": Interpretation
542
+ If None, includes all sections.
543
+ top_n : int, optional
544
+ Number of top interactions to display. Default is 20.
545
+ export_pdf : bool, optional
546
+ If True, also export the report figures to PDF format.
547
+ Default is False (HTML only).
548
+ pdf_page_size : tuple[int, int], optional
549
+ Page size for PDF export (width, height) in pixels.
550
+ Default is (800, 600). Only used if export_pdf=True.
551
+ pdf_scale : float, optional
552
+ Resolution scale for PDF export. Higher = better quality.
553
+ Default is 2.0. Only used if export_pdf=True.
554
+
555
+ Returns
556
+ -------
557
+ str
558
+ Absolute path to generated HTML file.
559
+
560
+ Examples
561
+ --------
562
+ >>> from ml4t.diagnostic.evaluation import compute_shap_interactions
563
+ >>> from ml4t.diagnostic.visualization import generate_interaction_report
564
+ >>>
565
+ >>> interactions = compute_shap_interactions(model, X)
566
+ >>> report_path = generate_interaction_report(
567
+ ... interaction_results=interactions,
568
+ ... output_file="interactions.html"
569
+ ... )
570
+ """
571
+ # Default title
572
+ if title is None:
573
+ title = "Feature Interaction Analysis Report"
574
+
575
+ # Default sections
576
+ if include_sections is None:
577
+ include_sections = ["top_pairs", "matrix", "network", "recommendations"]
578
+
579
+ # Generate figures
580
+ figures: list[go.Figure] = []
581
+ sections: list[dict[str, str | int]] = []
582
+
583
+ # Top pairs
584
+ if "top_pairs" in include_sections:
585
+ fig_bar = plot_interaction_bar(interaction_results, top_n=top_n, theme=theme)
586
+ figures.append(fig_bar)
587
+ sections.append(
588
+ {
589
+ "title": f"Top {top_n} Feature Interactions",
590
+ "text": (
591
+ "Strongest pairwise feature interactions ranked by mean absolute interaction strength. "
592
+ "High interaction values indicate non-linear or conditional relationships."
593
+ ),
594
+ "figure_index": len(figures) - 1,
595
+ }
596
+ )
597
+
598
+ # Matrix
599
+ if "matrix" in include_sections:
600
+ fig_heatmap = plot_interaction_heatmap(interaction_results, theme=theme)
601
+ figures.append(fig_heatmap)
602
+ sections.append(
603
+ {
604
+ "title": "Interaction Strength Matrix",
605
+ "text": (
606
+ "Symmetric matrix showing pairwise interaction strengths. "
607
+ "Darker colors indicate stronger interactions."
608
+ ),
609
+ "figure_index": len(figures) - 1,
610
+ }
611
+ )
612
+
613
+ # Network
614
+ if "network" in include_sections:
615
+ fig_network = plot_interaction_network(interaction_results, theme=theme, top_n=top_n)
616
+ figures.append(fig_network)
617
+ sections.append(
618
+ {
619
+ "title": "Interaction Network Graph",
620
+ "text": (
621
+ "Network visualization of feature interactions. Node size represents "
622
+ "feature importance, edge thickness represents interaction strength. "
623
+ "Isolated nodes have weak interactions."
624
+ ),
625
+ "figure_index": len(figures) - 1,
626
+ }
627
+ )
628
+
629
+ # Recommendations
630
+ if "recommendations" in include_sections:
631
+ rec_text = _generate_interaction_recommendations(interaction_results)
632
+ sections.append({"title": "Interpretation & Recommendations", "text": rec_text})
633
+
634
+ # Generate HTML
635
+ html_path = combine_figures_to_html(
636
+ figures=figures,
637
+ title=title,
638
+ sections=sections,
639
+ output_file=output_file,
640
+ theme=theme,
641
+ include_toc=True,
642
+ )
643
+
644
+ # Optionally export to PDF
645
+ if export_pdf and figures:
646
+ pdf_path = Path(output_file).with_suffix(".pdf")
647
+ export_figures_to_pdf(
648
+ figures=figures,
649
+ output_file=pdf_path,
650
+ page_size=pdf_page_size,
651
+ scale=pdf_scale,
652
+ )
653
+
654
+ return html_path
655
+
656
+
657
+ def generate_combined_report(
658
+ importance_results: dict[str, Any],
659
+ interaction_results: dict[str, Any] | None = None,
660
+ *,
661
+ output_file: str | Path,
662
+ title: str | None = None,
663
+ theme: str | None = None,
664
+ top_n: int = 20,
665
+ export_pdf: bool = False,
666
+ pdf_page_size: tuple[int, int] = (800, 600),
667
+ pdf_scale: float = 2.0,
668
+ ) -> str:
669
+ """Generate comprehensive report combining importance and interaction analysis.
670
+
671
+ Creates a unified report with all feature analysis visualizations and interpretations.
672
+
673
+ Parameters
674
+ ----------
675
+ importance_results : dict[str, Any]
676
+ Results from analyze_ml_importance().
677
+ interaction_results : dict[str, Any] | None, optional
678
+ Results from compute_shap_interactions(). If None, only importance analysis included.
679
+ output_file : str | Path
680
+ Path where HTML report will be saved.
681
+ title : str | None, optional
682
+ Report title. If None, uses "Complete Feature Analysis Report".
683
+ theme : str | None, optional
684
+ Visual theme. If None, uses "default".
685
+ top_n : int, optional
686
+ Number of top features/interactions to display. Default is 20.
687
+ export_pdf : bool, optional
688
+ If True, also export the report figures to PDF format.
689
+ Default is False (HTML only).
690
+ pdf_page_size : tuple[int, int], optional
691
+ Page size for PDF export (width, height) in pixels.
692
+ Default is (800, 600). Only used if export_pdf=True.
693
+ pdf_scale : float, optional
694
+ Resolution scale for PDF export. Higher = better quality.
695
+ Default is 2.0. Only used if export_pdf=True.
696
+
697
+ Returns
698
+ -------
699
+ str
700
+ Absolute path to generated HTML file.
701
+
702
+ Examples
703
+ --------
704
+ >>> from ml4t.diagnostic.evaluation import analyze_ml_importance, compute_shap_interactions
705
+ >>> from ml4t.diagnostic.visualization import generate_combined_report
706
+ >>>
707
+ >>> importance = analyze_ml_importance(model, X, y)
708
+ >>> interactions = compute_shap_interactions(model, X)
709
+ >>>
710
+ >>> report_path = generate_combined_report(
711
+ ... importance_results=importance,
712
+ ... interaction_results=interactions,
713
+ ... output_file="complete_analysis.html",
714
+ ... theme="presentation"
715
+ ... )
716
+ """
717
+ # Default title
718
+ if title is None:
719
+ title = "Complete Feature Analysis Report"
720
+
721
+ # Generate figures
722
+ figures: list[go.Figure] = []
723
+ sections: list[dict[str, str | int]] = []
724
+
725
+ # Overview section
726
+ overview_text = _generate_combined_overview(importance_results, interaction_results)
727
+ sections.append({"title": "Analysis Overview", "text": overview_text})
728
+
729
+ # Importance section
730
+ sections.append({"title": "Part 1: Feature Importance Analysis", "text": ""})
731
+
732
+ # Summary plot (4-panel importance summary)
733
+ fig_importance_summary = plot_importance_summary(importance_results, top_n=15, theme=theme)
734
+ figures.append(fig_importance_summary)
735
+ sections.append(
736
+ {
737
+ "title": "Importance Summary (Multi-Panel View)",
738
+ "text": (
739
+ "Comprehensive view of feature importance combining consensus rankings, "
740
+ "method agreement, and score distributions in a single multi-panel visualization."
741
+ ),
742
+ "figure_index": len(figures) - 1,
743
+ }
744
+ )
745
+
746
+ # Interaction section (if provided)
747
+ if interaction_results is not None:
748
+ sections.append({"title": "Part 2: Feature Interaction Analysis", "text": ""})
749
+
750
+ # Network visualization
751
+ fig_network = plot_interaction_network(interaction_results, theme=theme, top_n=top_n)
752
+ figures.append(fig_network)
753
+ sections.append(
754
+ {
755
+ "title": "Interaction Network",
756
+ "text": (
757
+ "Interactive network showing how features interact. Strong interactions "
758
+ "may indicate opportunities for feature engineering."
759
+ ),
760
+ "figure_index": len(figures) - 1,
761
+ }
762
+ )
763
+
764
+ # Interaction heatmap
765
+ fig_int_heatmap = plot_interaction_heatmap(interaction_results, theme=theme)
766
+ figures.append(fig_int_heatmap)
767
+ sections.append(
768
+ {
769
+ "title": "Interaction Matrix",
770
+ "text": "Complete pairwise interaction strength matrix.",
771
+ "figure_index": len(figures) - 1,
772
+ }
773
+ )
774
+
775
+ # Recommendations
776
+ rec_text = _generate_combined_recommendations(importance_results, interaction_results)
777
+ sections.append({"title": "Actionable Recommendations", "text": rec_text})
778
+
779
+ # Generate HTML
780
+ html_path = combine_figures_to_html(
781
+ figures=figures,
782
+ title=title,
783
+ sections=sections,
784
+ output_file=output_file,
785
+ theme=theme,
786
+ include_toc=True,
787
+ )
788
+
789
+ # Optionally export to PDF
790
+ if export_pdf and figures:
791
+ pdf_path = Path(output_file).with_suffix(".pdf")
792
+ export_figures_to_pdf(
793
+ figures=figures,
794
+ output_file=pdf_path,
795
+ page_size=pdf_page_size,
796
+ scale=pdf_scale,
797
+ )
798
+
799
+ return html_path
800
+
801
+
802
+ # ============================================================================
803
+ # Private Helper Functions
804
+ # ============================================================================
805
+
806
+
807
+ def _build_html_document(
808
+ title: str,
809
+ figure_htmls: list[str],
810
+ sections: list[dict[str, Any]] | None,
811
+ theme_config: dict[str, Any],
812
+ include_toc: bool,
813
+ ) -> str:
814
+ """Build complete HTML document from components.
815
+
816
+ Parameters
817
+ ----------
818
+ title : str
819
+ Document title
820
+ figure_htmls : list[str]
821
+ List of figure HTML div strings
822
+ sections : list[dict] | None
823
+ Section definitions with title, text, figure_index
824
+ theme_config : dict
825
+ Theme configuration from get_theme_config()
826
+ include_toc : bool
827
+ Whether to include table of contents
828
+
829
+ Returns
830
+ -------
831
+ str
832
+ Complete HTML document
833
+ """
834
+ # Extract colors from theme
835
+ bg_color = theme_config.get("plot_bgcolor", "#FFFFFF")
836
+ text_color = theme_config.get("font_color", "#1F1F1F")
837
+ grid_color = theme_config.get("gridcolor", "#E5E5E5")
838
+
839
+ # Determine if dark theme
840
+ is_dark = "dark" in theme_config.get("template", "").lower() or bg_color in [
841
+ "#1E1E1E",
842
+ "#0E0E0E",
843
+ ]
844
+
845
+ # Generate CSS
846
+ css = _generate_css(bg_color, text_color, grid_color, is_dark)
847
+
848
+ # Generate TOC if requested
849
+ toc_html = ""
850
+ if include_toc and sections:
851
+ toc_html = _generate_toc(sections)
852
+
853
+ # Generate body content
854
+ body_html = _generate_body_content(figure_htmls, sections)
855
+
856
+ # Generate timestamp
857
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
858
+
859
+ # Assemble complete HTML
860
+ html = f"""<!DOCTYPE html>
861
+ <html lang="en">
862
+ <head>
863
+ <meta charset="UTF-8">
864
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
865
+ <meta name="generator" content="ML4T Diagnostic Visualization Library">
866
+ <title>{title}</title>
867
+ <style>
868
+ {css}
869
+ </style>
870
+ </head>
871
+ <body>
872
+ <div class="container">
873
+ <header>
874
+ <h1>{title}</h1>
875
+ <p class="timestamp">Generated: {timestamp}</p>
876
+ </header>
877
+
878
+ {toc_html}
879
+
880
+ <main>
881
+ {body_html}
882
+ </main>
883
+
884
+ <footer>
885
+ <p>Generated by <a href="https://github.com/yourusername/ml4t-diagnostic" target="_blank">ML4T Diagnostic</a> - Quantitative Evaluation Library</p>
886
+ </footer>
887
+ </div>
888
+ </body>
889
+ </html>"""
890
+
891
+ return html
892
+
893
+
894
+ def _generate_css(bg_color: str, text_color: str, grid_color: str, is_dark: bool) -> str:
895
+ """Generate CSS styles for report."""
896
+ # Derive additional colors
897
+ if is_dark:
898
+ header_bg = "#2A2A2A"
899
+ section_bg = "#252525"
900
+ border_color = "#404040"
901
+ link_color = "#6FA8DC"
902
+ else:
903
+ header_bg = "#F5F5F5"
904
+ section_bg = "#FAFAFA"
905
+ border_color = grid_color
906
+ link_color = "#1A73E8"
907
+
908
+ css = f"""
909
+ /* Reset and base styles */
910
+ * {{
911
+ margin: 0;
912
+ padding: 0;
913
+ box-sizing: border-box;
914
+ }}
915
+
916
+ body {{
917
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
918
+ background-color: {bg_color};
919
+ color: {text_color};
920
+ line-height: 1.6;
921
+ padding: 20px;
922
+ }}
923
+
924
+ .container {{
925
+ max-width: 1400px;
926
+ margin: 0 auto;
927
+ }}
928
+
929
+ /* Header */
930
+ header {{
931
+ text-align: center;
932
+ padding: 40px 20px;
933
+ background-color: {header_bg};
934
+ border-radius: 8px;
935
+ margin-bottom: 30px;
936
+ }}
937
+
938
+ header h1 {{
939
+ font-size: 2.5em;
940
+ font-weight: 700;
941
+ margin-bottom: 10px;
942
+ }}
943
+
944
+ .timestamp {{
945
+ color: {text_color};
946
+ opacity: 0.7;
947
+ font-size: 0.9em;
948
+ }}
949
+
950
+ /* Table of Contents */
951
+ .toc {{
952
+ background-color: {section_bg};
953
+ padding: 20px;
954
+ border-radius: 8px;
955
+ margin-bottom: 30px;
956
+ border: 1px solid {border_color};
957
+ }}
958
+
959
+ .toc h2 {{
960
+ font-size: 1.5em;
961
+ margin-bottom: 15px;
962
+ }}
963
+
964
+ .toc ul {{
965
+ list-style: none;
966
+ padding-left: 0;
967
+ }}
968
+
969
+ .toc li {{
970
+ margin: 8px 0;
971
+ }}
972
+
973
+ .toc a {{
974
+ color: {link_color};
975
+ text-decoration: none;
976
+ transition: opacity 0.2s;
977
+ }}
978
+
979
+ .toc a:hover {{
980
+ opacity: 0.7;
981
+ }}
982
+
983
+ /* Sections */
984
+ .section {{
985
+ margin-bottom: 50px;
986
+ }}
987
+
988
+ .section-title {{
989
+ font-size: 1.8em;
990
+ font-weight: 600;
991
+ margin-bottom: 15px;
992
+ padding-bottom: 10px;
993
+ border-bottom: 2px solid {border_color};
994
+ }}
995
+
996
+ .section-text {{
997
+ font-size: 1.1em;
998
+ margin-bottom: 20px;
999
+ line-height: 1.8;
1000
+ }}
1001
+
1002
+ /* Plot containers */
1003
+ .plot-container {{
1004
+ margin: 30px 0;
1005
+ padding: 20px;
1006
+ background-color: {section_bg};
1007
+ border-radius: 8px;
1008
+ border: 1px solid {border_color};
1009
+ }}
1010
+
1011
+ /* Footer */
1012
+ footer {{
1013
+ text-align: center;
1014
+ padding: 30px 20px;
1015
+ margin-top: 50px;
1016
+ border-top: 1px solid {border_color};
1017
+ opacity: 0.7;
1018
+ }}
1019
+
1020
+ footer a {{
1021
+ color: {link_color};
1022
+ text-decoration: none;
1023
+ }}
1024
+
1025
+ footer a:hover {{
1026
+ text-decoration: underline;
1027
+ }}
1028
+
1029
+ /* Responsive design */
1030
+ @media (max-width: 768px) {{
1031
+ header h1 {{
1032
+ font-size: 2em;
1033
+ }}
1034
+
1035
+ .section-title {{
1036
+ font-size: 1.5em;
1037
+ }}
1038
+
1039
+ .plot-container {{
1040
+ padding: 10px;
1041
+ }}
1042
+ }}
1043
+
1044
+ /* Print styles */
1045
+ @media print {{
1046
+ body {{
1047
+ background-color: white;
1048
+ color: black;
1049
+ }}
1050
+
1051
+ .container {{
1052
+ max-width: none;
1053
+ }}
1054
+
1055
+ .plot-container {{
1056
+ page-break-inside: avoid;
1057
+ }}
1058
+ }}
1059
+ """
1060
+
1061
+ return css
1062
+
1063
+
1064
+ def _generate_toc(sections: list[dict[str, Any]]) -> str:
1065
+ """Generate table of contents HTML."""
1066
+ toc_items = []
1067
+
1068
+ for i, section in enumerate(sections):
1069
+ section_title = section.get("title", "")
1070
+ if section_title:
1071
+ # Create anchor-safe ID
1072
+ section_id = f"section-{i}"
1073
+ toc_items.append(f' <li><a href="#{section_id}">{section_title}</a></li>')
1074
+
1075
+ toc_html = f""" <nav class="toc">
1076
+ <h2>Table of Contents</h2>
1077
+ <ul>
1078
+ {chr(10).join(toc_items)}
1079
+ </ul>
1080
+ </nav>
1081
+ """
1082
+
1083
+ return toc_html
1084
+
1085
+
1086
+ def _generate_body_content(figure_htmls: list[str], sections: list[dict[str, Any]] | None) -> str:
1087
+ """Generate main body content HTML."""
1088
+ if sections is None:
1089
+ # Simple case: just render all figures sequentially
1090
+ body_parts = []
1091
+ for _i, fig_html in enumerate(figure_htmls):
1092
+ body_parts.append(f""" <div class="plot-container">
1093
+ {fig_html}
1094
+ </div>
1095
+ """)
1096
+ return "\n".join(body_parts)
1097
+
1098
+ # Complex case: render sections with associated figures
1099
+ body_parts = []
1100
+
1101
+ for i, section in enumerate(sections):
1102
+ section_id = f"section-{i}"
1103
+ section_title = section.get("title", "")
1104
+ section_text = section.get("text", "")
1105
+ figure_index = section.get("figure_index")
1106
+
1107
+ # Start section
1108
+ section_html = f' <section class="section" id="{section_id}">\n'
1109
+
1110
+ # Add title if present
1111
+ if section_title:
1112
+ section_html += f' <h2 class="section-title">{section_title}</h2>\n'
1113
+
1114
+ # Add text if present (section_text may contain HTML block elements,
1115
+ # so we don't wrap in <p> to avoid invalid nesting)
1116
+ if section_text:
1117
+ section_html += ' <div class="section-text">\n'
1118
+ section_html += f" {section_text}\n"
1119
+ section_html += " </div>\n"
1120
+
1121
+ # Add figure if specified
1122
+ if figure_index is not None and 0 <= figure_index < len(figure_htmls):
1123
+ section_html += ' <div class="plot-container">\n'
1124
+ section_html += figure_htmls[figure_index]
1125
+ section_html += "\n </div>\n"
1126
+
1127
+ # Close section
1128
+ section_html += " </section>\n"
1129
+
1130
+ body_parts.append(section_html)
1131
+
1132
+ return "\n".join(body_parts)
1133
+
1134
+
1135
+ def _generate_importance_summary_text(results: dict[str, Any]) -> str:
1136
+ """Generate executive summary text for importance analysis."""
1137
+ consensus_ranking = results.get("consensus_ranking", [])
1138
+ top_consensus = results.get("top_features_consensus", [])
1139
+ method_agreement = results.get("method_agreement", {})
1140
+
1141
+ # Calculate average agreement
1142
+ avg_agreement = (
1143
+ sum(method_agreement.values()) / len(method_agreement) if method_agreement else 0.0
1144
+ )
1145
+
1146
+ summary = f"""
1147
+ <p><strong>Key Findings:</strong></p>
1148
+ <ul>
1149
+ <li>Analyzed {len(consensus_ranking)} features across multiple importance methods</li>
1150
+ <li>Top consensus feature: <strong>{consensus_ranking[0] if consensus_ranking else "N/A"}</strong></li>
1151
+ <li>Features with strong consensus: {len(top_consensus)} features appear in all methods' top-10</li>
1152
+ <li>Average method agreement: {avg_agreement:.2f} (Spearman correlation)</li>
1153
+ </ul>
1154
+ """
1155
+
1156
+ return summary.strip()
1157
+
1158
+
1159
+ def _generate_importance_recommendations(_results: dict[str, Any]) -> str:
1160
+ """Generate recommendations text for importance analysis."""
1161
+ recommendations = """
1162
+ <p><strong>Interpretation Guidelines:</strong></p>
1163
+ <ul>
1164
+ <li><strong>High consensus + high agreement</strong>: Trust the rankings - features are robustly important</li>
1165
+ <li><strong>Method disagreement</strong>: Investigate feature-specific biases (MDI vs PFI patterns)</li>
1166
+ <li><strong>SHAP divergence</strong>: Indicates interaction effects - consider feature engineering</li>
1167
+ </ul>
1168
+
1169
+ <p><strong>Next Steps:</strong></p>
1170
+ <ul>
1171
+ <li>Focus on top consensus features for model interpretability</li>
1172
+ <li>Investigate features with large method disagreement</li>
1173
+ <li>Consider removing features with low importance across all methods</li>
1174
+ <li>Analyze SHAP interaction effects for top features</li>
1175
+ </ul>
1176
+ """
1177
+
1178
+ return recommendations.strip()
1179
+
1180
+
1181
+ def _generate_interaction_recommendations(_results: dict[str, Any]) -> str:
1182
+ """Generate recommendations text for interaction analysis."""
1183
+ recommendations = """
1184
+ <p><strong>Interpreting Interactions:</strong></p>
1185
+ <ul>
1186
+ <li><strong>Strong interactions</strong>: Non-linear or conditional relationships between features</li>
1187
+ <li><strong>Network clusters</strong>: Groups of related features that interact strongly</li>
1188
+ <li><strong>Isolated features</strong>: Features with weak interactions (may be independent)</li>
1189
+ </ul>
1190
+
1191
+ <p><strong>Feature Engineering Opportunities:</strong></p>
1192
+ <ul>
1193
+ <li>Create explicit interaction terms for top pairs (e.g., feature_A * feature_B)</li>
1194
+ <li>Consider non-linear transformations for interacting features</li>
1195
+ <li>Investigate domain-specific meanings of top interactions</li>
1196
+ </ul>
1197
+ """
1198
+
1199
+ return recommendations.strip()
1200
+
1201
+
1202
+ def _generate_combined_overview(
1203
+ importance_results: dict[str, Any], interaction_results: dict[str, Any] | None
1204
+ ) -> str:
1205
+ """Generate overview text for combined report."""
1206
+ n_features = len(importance_results.get("consensus_ranking", []))
1207
+
1208
+ overview = f"""
1209
+ <p>This comprehensive report analyzes feature importance and interactions for a machine learning model
1210
+ with {n_features} features. The analysis combines multiple methodologies to provide robust insights.</p>
1211
+
1212
+ <p><strong>Report Contents:</strong></p>
1213
+ <ul>
1214
+ <li><strong>Part 1: Feature Importance</strong> - Which features the model relies on most</li>
1215
+ """
1216
+
1217
+ if interaction_results is not None:
1218
+ overview += """ <li><strong>Part 2: Feature Interactions</strong> - How features combine and interact</li>
1219
+ """
1220
+
1221
+ overview += """ </ul>
1222
+ """
1223
+
1224
+ return overview.strip()
1225
+
1226
+
1227
+ def _generate_combined_recommendations(
1228
+ _importance_results: dict[str, Any], interaction_results: dict[str, Any] | None
1229
+ ) -> str:
1230
+ """Generate combined recommendations."""
1231
+ recommendations = """
1232
+ <p><strong>Prioritized Action Items:</strong></p>
1233
+ <ol>
1234
+ <li><strong>Focus on consensus features</strong>: Top features identified by multiple methods are most reliable</li>
1235
+ <li><strong>Investigate method disagreements</strong>: Understand why different methods rank features differently</li>
1236
+ """
1237
+
1238
+ if interaction_results is not None:
1239
+ recommendations += """ <li><strong>Engineer interaction terms</strong>: Create explicit features for strong interactions</li>
1240
+ <li><strong>Analyze interaction clusters</strong>: Groups of interacting features may represent domain concepts</li>
1241
+ """
1242
+
1243
+ recommendations += """ </ol>
1244
+
1245
+ <p><strong>Model Improvement Strategies:</strong></p>
1246
+ <ul>
1247
+ <li>Remove low-importance features to reduce overfitting risk</li>
1248
+ <li>Add domain knowledge to interpret top features and interactions</li>
1249
+ <li>Consider model architecture changes if interactions are prevalent</li>
1250
+ <li>Validate findings on out-of-sample data</li>
1251
+ </ul>
1252
+ """
1253
+
1254
+ return recommendations.strip()
1255
+
1256
+
1257
+ def _export_figures_multipage(
1258
+ figures: list[go.Figure],
1259
+ output_path: Path,
1260
+ page_size: tuple[int, int],
1261
+ scale: float,
1262
+ ) -> str:
1263
+ """Export multiple figures to a single multi-page PDF.
1264
+
1265
+ Uses kaleido to export each figure to PDF, then combines them using pypdf.
1266
+
1267
+ Parameters
1268
+ ----------
1269
+ figures : list[go.Figure]
1270
+ Figures to export
1271
+ output_path : Path
1272
+ Output PDF file path
1273
+ page_size : tuple[int, int]
1274
+ Page dimensions (width, height) in pixels
1275
+ scale : float
1276
+ Rendering scale factor
1277
+
1278
+ Returns
1279
+ -------
1280
+ str
1281
+ Path to created PDF file
1282
+ """
1283
+ import tempfile
1284
+ from pathlib import Path as TempPath
1285
+
1286
+ # Try to import pypdf for merging
1287
+ pdf_writer_class: type
1288
+ try:
1289
+ from pypdf import PdfWriter as _PypdfWriter
1290
+
1291
+ pdf_writer_class = _PypdfWriter
1292
+ except ImportError:
1293
+ # Fallback to PyPDF2 if pypdf not available
1294
+ try:
1295
+ from PyPDF2 import (
1296
+ PdfWriter as _Pypdf2Writer, # type: ignore[import-not-found,unused-ignore]
1297
+ )
1298
+
1299
+ pdf_writer_class = _Pypdf2Writer
1300
+ except ImportError as e:
1301
+ raise ImportError(
1302
+ "pypdf or PyPDF2 is required for PDF merging. Install it with: pip install pypdf"
1303
+ ) from e
1304
+
1305
+ width, height = page_size
1306
+
1307
+ # Create temporary directory for individual PDFs
1308
+ with tempfile.TemporaryDirectory() as temp_dir:
1309
+ temp_pdfs = []
1310
+
1311
+ # Export each figure to its own PDF
1312
+ for i, fig in enumerate(figures):
1313
+ temp_pdf = TempPath(temp_dir) / f"page_{i}.pdf"
1314
+
1315
+ # Update figure layout for PDF export
1316
+ fig_copy = go.Figure(fig) # Make a copy to avoid modifying original
1317
+ fig_copy.update_layout(
1318
+ width=width,
1319
+ height=height,
1320
+ margin={"l": 50, "r": 50, "t": 80, "b": 50}, # Add margins for print
1321
+ )
1322
+
1323
+ # Export to PDF using kaleido
1324
+ fig_copy.write_image(
1325
+ str(temp_pdf),
1326
+ format="pdf",
1327
+ width=width,
1328
+ height=height,
1329
+ scale=scale,
1330
+ )
1331
+
1332
+ temp_pdfs.append(temp_pdf)
1333
+
1334
+ # Merge all PDFs into single file
1335
+ writer = pdf_writer_class()
1336
+ for pdf_path in temp_pdfs:
1337
+ writer.append(str(pdf_path))
1338
+
1339
+ # Write merged PDF
1340
+ with open(output_path, "wb") as output_file:
1341
+ writer.write(output_file)
1342
+
1343
+ return str(output_path.absolute())