ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1343 @@
|
|
|
1
|
+
"""HTML report generation for feature evaluation results.
|
|
2
|
+
|
|
3
|
+
This module provides functions for generating comprehensive HTML reports that combine
|
|
4
|
+
multiple Plotly visualizations with narrative text, analysis summaries, and styling.
|
|
5
|
+
|
|
6
|
+
All report functions follow the standard API defined in docs/plot_api_standards.md:
|
|
7
|
+
- Accept evaluation results from analyze_*() functions
|
|
8
|
+
- Generate self-contained HTML files with embedded plots
|
|
9
|
+
- Support theme customization and styling
|
|
10
|
+
- Provide flexible report templates
|
|
11
|
+
|
|
12
|
+
Example workflow:
|
|
13
|
+
>>> from ml4t.diagnostic.evaluation import analyze_ml_importance, compute_shap_interactions
|
|
14
|
+
>>> from ml4t.diagnostic.visualization import generate_importance_report
|
|
15
|
+
>>>
|
|
16
|
+
>>> # Run evaluations
|
|
17
|
+
>>> importance = analyze_ml_importance(model, X, y)
|
|
18
|
+
>>> interactions = compute_shap_interactions(model, X)
|
|
19
|
+
>>>
|
|
20
|
+
>>> # Generate comprehensive HTML report
|
|
21
|
+
>>> report_path = generate_importance_report(
|
|
22
|
+
... importance_results=importance,
|
|
23
|
+
... interaction_results=interactions,
|
|
24
|
+
... output_file="feature_analysis.html",
|
|
25
|
+
... title="Feature Analysis Report",
|
|
26
|
+
... theme="dark"
|
|
27
|
+
... )
|
|
28
|
+
>>> print(f"Report saved to: {report_path}")
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from datetime import datetime
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import Any
|
|
34
|
+
|
|
35
|
+
import plotly.graph_objects as go
|
|
36
|
+
|
|
37
|
+
from ml4t.diagnostic.visualization.core import get_theme_config, validate_theme
|
|
38
|
+
from ml4t.diagnostic.visualization.feature_plots import (
|
|
39
|
+
plot_importance_bar,
|
|
40
|
+
plot_importance_distribution,
|
|
41
|
+
plot_importance_heatmap,
|
|
42
|
+
plot_importance_summary,
|
|
43
|
+
)
|
|
44
|
+
from ml4t.diagnostic.visualization.interaction_plots import (
|
|
45
|
+
plot_interaction_bar,
|
|
46
|
+
plot_interaction_heatmap,
|
|
47
|
+
plot_interaction_network,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
__all__ = [
|
|
51
|
+
"generate_importance_report",
|
|
52
|
+
"generate_interaction_report",
|
|
53
|
+
"generate_combined_report",
|
|
54
|
+
"combine_figures_to_html",
|
|
55
|
+
"export_figures_to_pdf",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def combine_figures_to_html(
|
|
60
|
+
figures: list[go.Figure],
|
|
61
|
+
*,
|
|
62
|
+
title: str = "Analysis Report",
|
|
63
|
+
sections: list[dict[str, Any]] | None = None,
|
|
64
|
+
output_file: str | Path | None = None,
|
|
65
|
+
theme: str | None = None,
|
|
66
|
+
include_toc: bool = True,
|
|
67
|
+
) -> str:
|
|
68
|
+
"""Combine multiple Plotly figures into a single HTML document.
|
|
69
|
+
|
|
70
|
+
This is the core function for generating HTML reports. It takes a list of
|
|
71
|
+
Plotly figures and optional narrative sections, and produces a self-contained
|
|
72
|
+
HTML file with embedded visualizations.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
figures : list[go.Figure]
|
|
77
|
+
List of Plotly figure objects to include in the report.
|
|
78
|
+
Figures are rendered in the order provided.
|
|
79
|
+
title : str, optional
|
|
80
|
+
Report title displayed at the top. Default is "Analysis Report".
|
|
81
|
+
sections : list[dict[str, Any]] | None, optional
|
|
82
|
+
List of section dictionaries defining report structure. Each section can contain:
|
|
83
|
+
- "title": str - Section heading
|
|
84
|
+
- "text": str - Narrative text (supports HTML and markdown-style formatting)
|
|
85
|
+
- "figure_index": int - Index of figure to include (from figures list)
|
|
86
|
+
If None, figures are rendered sequentially without additional text.
|
|
87
|
+
output_file : str | Path | None, optional
|
|
88
|
+
Path where HTML file should be saved. If None, returns HTML string without saving.
|
|
89
|
+
theme : str | None, optional
|
|
90
|
+
Theme name ("default", "dark", "print", "presentation").
|
|
91
|
+
Affects overall page styling. If None, uses "default".
|
|
92
|
+
include_toc : bool, optional
|
|
93
|
+
Whether to include a table of contents at the top of the report.
|
|
94
|
+
Default is True. TOC is generated from section titles.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
str
|
|
99
|
+
If output_file is None: HTML content as string
|
|
100
|
+
If output_file is provided: Path to saved HTML file
|
|
101
|
+
|
|
102
|
+
Raises
|
|
103
|
+
------
|
|
104
|
+
ValueError
|
|
105
|
+
If figures list is empty
|
|
106
|
+
If section refers to invalid figure_index
|
|
107
|
+
TypeError
|
|
108
|
+
If figures contains non-Figure objects
|
|
109
|
+
|
|
110
|
+
Examples
|
|
111
|
+
--------
|
|
112
|
+
Generate report with multiple plots:
|
|
113
|
+
|
|
114
|
+
>>> from ml4t.diagnostic.visualization import (
|
|
115
|
+
... plot_importance_bar,
|
|
116
|
+
... plot_importance_heatmap,
|
|
117
|
+
... combine_figures_to_html
|
|
118
|
+
... )
|
|
119
|
+
>>>
|
|
120
|
+
>>> # Create figures
|
|
121
|
+
>>> fig1 = plot_importance_bar(results, top_n=15)
|
|
122
|
+
>>> fig2 = plot_importance_heatmap(results)
|
|
123
|
+
>>>
|
|
124
|
+
>>> # Define sections with narrative
|
|
125
|
+
>>> sections = [
|
|
126
|
+
... {
|
|
127
|
+
... "title": "Feature Importance Rankings",
|
|
128
|
+
... "text": "Top 15 features ranked by consensus importance across methods.",
|
|
129
|
+
... "figure_index": 0
|
|
130
|
+
... },
|
|
131
|
+
... {
|
|
132
|
+
... "title": "Method Agreement Analysis",
|
|
133
|
+
... "text": "Correlation matrix showing agreement between importance methods.",
|
|
134
|
+
... "figure_index": 1
|
|
135
|
+
... }
|
|
136
|
+
... ]
|
|
137
|
+
>>>
|
|
138
|
+
>>> # Generate HTML report
|
|
139
|
+
>>> html_path = combine_figures_to_html(
|
|
140
|
+
... figures=[fig1, fig2],
|
|
141
|
+
... title="Feature Importance Analysis",
|
|
142
|
+
... sections=sections,
|
|
143
|
+
... output_file="report.html",
|
|
144
|
+
... theme="dark"
|
|
145
|
+
... )
|
|
146
|
+
|
|
147
|
+
Generate simple report without sections:
|
|
148
|
+
|
|
149
|
+
>>> figs = [plot_importance_bar(results), plot_importance_heatmap(results)]
|
|
150
|
+
>>> html = combine_figures_to_html(figs, title="Quick Report")
|
|
151
|
+
>>> print(html[:100]) # Preview HTML
|
|
152
|
+
|
|
153
|
+
Notes
|
|
154
|
+
-----
|
|
155
|
+
- HTML is self-contained with embedded Plotly.js from CDN
|
|
156
|
+
- First figure includes full Plotly.js, subsequent figures reuse it
|
|
157
|
+
- CSS styling is embedded in <style> tag
|
|
158
|
+
- Reports are responsive and work on mobile devices
|
|
159
|
+
- File size depends on number of data points in figures
|
|
160
|
+
"""
|
|
161
|
+
# Validation
|
|
162
|
+
if not figures:
|
|
163
|
+
raise ValueError("At least one figure is required")
|
|
164
|
+
|
|
165
|
+
if not all(isinstance(fig, go.Figure) for fig in figures):
|
|
166
|
+
raise TypeError("All items in figures list must be plotly.graph_objects.Figure instances")
|
|
167
|
+
|
|
168
|
+
if sections is not None:
|
|
169
|
+
for i, section in enumerate(sections):
|
|
170
|
+
if "figure_index" in section:
|
|
171
|
+
idx = section["figure_index"]
|
|
172
|
+
if idx < 0 or idx >= len(figures):
|
|
173
|
+
raise ValueError(
|
|
174
|
+
f"Section {i} has invalid figure_index {idx}. Must be between 0 and {len(figures) - 1}"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Validate theme
|
|
178
|
+
theme = theme or "default"
|
|
179
|
+
validate_theme(theme)
|
|
180
|
+
theme_config = get_theme_config(theme)
|
|
181
|
+
|
|
182
|
+
# Convert figures to HTML divs
|
|
183
|
+
figure_htmls = []
|
|
184
|
+
for i, fig in enumerate(figures):
|
|
185
|
+
# First figure includes Plotly.js from CDN, others don't
|
|
186
|
+
include_plotlyjs = "cdn" if i == 0 else False
|
|
187
|
+
|
|
188
|
+
fig_html = fig.to_html(
|
|
189
|
+
full_html=False, include_plotlyjs=include_plotlyjs, div_id=f"plot-{i}"
|
|
190
|
+
)
|
|
191
|
+
figure_htmls.append(fig_html)
|
|
192
|
+
|
|
193
|
+
# Build HTML content
|
|
194
|
+
html_content = _build_html_document(
|
|
195
|
+
title=title,
|
|
196
|
+
figure_htmls=figure_htmls,
|
|
197
|
+
sections=sections,
|
|
198
|
+
theme_config=theme_config,
|
|
199
|
+
include_toc=include_toc,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Save or return
|
|
203
|
+
if output_file is not None:
|
|
204
|
+
output_path = Path(output_file)
|
|
205
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
206
|
+
output_path.write_text(html_content, encoding="utf-8")
|
|
207
|
+
return str(output_path.absolute())
|
|
208
|
+
else:
|
|
209
|
+
return html_content
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def export_figures_to_pdf(
|
|
213
|
+
figures: list[go.Figure],
|
|
214
|
+
output_file: str | Path,
|
|
215
|
+
*,
|
|
216
|
+
layout: str = "vertical",
|
|
217
|
+
page_size: tuple[int, int] = (800, 600),
|
|
218
|
+
scale: float = 2.0,
|
|
219
|
+
) -> str:
|
|
220
|
+
"""Export multiple Plotly figures to a single PDF file.
|
|
221
|
+
|
|
222
|
+
Each figure is rendered as a separate page in the PDF. Uses kaleido for
|
|
223
|
+
high-quality vector rendering.
|
|
224
|
+
|
|
225
|
+
Parameters
|
|
226
|
+
----------
|
|
227
|
+
figures : list[go.Figure]
|
|
228
|
+
List of Plotly figure objects to export.
|
|
229
|
+
output_file : str | Path
|
|
230
|
+
Path where PDF file should be saved.
|
|
231
|
+
layout : str, optional
|
|
232
|
+
Layout mode for figures:
|
|
233
|
+
- "vertical": Each figure on its own page (default)
|
|
234
|
+
- "compact": Attempt to fit multiple small figures per page
|
|
235
|
+
Default is "vertical".
|
|
236
|
+
page_size : tuple[int, int], optional
|
|
237
|
+
Page size in pixels (width, height).
|
|
238
|
+
Default is (800, 600) which approximates A4 landscape at 96 DPI.
|
|
239
|
+
Common sizes:
|
|
240
|
+
- (800, 600): A4 landscape-like
|
|
241
|
+
- (600, 800): A4 portrait-like
|
|
242
|
+
- (1200, 900): Larger landscape
|
|
243
|
+
scale : float, optional
|
|
244
|
+
Resolution scale factor for rendering. Higher values produce
|
|
245
|
+
better quality but larger files. Default is 2.0.
|
|
246
|
+
|
|
247
|
+
Returns
|
|
248
|
+
-------
|
|
249
|
+
str
|
|
250
|
+
Absolute path to generated PDF file.
|
|
251
|
+
|
|
252
|
+
Raises
|
|
253
|
+
------
|
|
254
|
+
ValueError
|
|
255
|
+
If figures list is empty
|
|
256
|
+
ImportError
|
|
257
|
+
If kaleido is not installed
|
|
258
|
+
TypeError
|
|
259
|
+
If figures contains non-Figure objects
|
|
260
|
+
|
|
261
|
+
Examples
|
|
262
|
+
--------
|
|
263
|
+
Export multiple plots to PDF:
|
|
264
|
+
|
|
265
|
+
>>> from ml4t.diagnostic.visualization import plot_importance_bar, export_figures_to_pdf
|
|
266
|
+
>>>
|
|
267
|
+
>>> fig1 = plot_importance_bar(results, top_n=15)
|
|
268
|
+
>>> fig2 = plot_importance_heatmap(results)
|
|
269
|
+
>>>
|
|
270
|
+
>>> pdf_path = export_figures_to_pdf(
|
|
271
|
+
... figures=[fig1, fig2],
|
|
272
|
+
... output_file="analysis.pdf",
|
|
273
|
+
... page_size=(800, 600),
|
|
274
|
+
... scale=2.0
|
|
275
|
+
... )
|
|
276
|
+
|
|
277
|
+
Export with custom page size:
|
|
278
|
+
|
|
279
|
+
>>> pdf_path = export_figures_to_pdf(
|
|
280
|
+
... figures=[fig1, fig2, fig3],
|
|
281
|
+
... output_file="report.pdf",
|
|
282
|
+
... page_size=(1200, 900), # Larger pages
|
|
283
|
+
... scale=3.0 # High resolution
|
|
284
|
+
... )
|
|
285
|
+
|
|
286
|
+
Notes
|
|
287
|
+
-----
|
|
288
|
+
- Requires kaleido package: `pip install kaleido`
|
|
289
|
+
- Each figure is exported as a vector PDF page
|
|
290
|
+
- File size depends on plot complexity and scale factor
|
|
291
|
+
- For print quality, use scale >= 2.0
|
|
292
|
+
- For web sharing, use scale = 1.0 to reduce file size
|
|
293
|
+
"""
|
|
294
|
+
# Validation
|
|
295
|
+
if not figures:
|
|
296
|
+
raise ValueError("At least one figure is required")
|
|
297
|
+
|
|
298
|
+
if not all(isinstance(fig, go.Figure) for fig in figures):
|
|
299
|
+
raise TypeError("All items in figures list must be plotly.graph_objects.Figure instances")
|
|
300
|
+
|
|
301
|
+
# Check kaleido availability
|
|
302
|
+
try:
|
|
303
|
+
import kaleido # noqa: F401
|
|
304
|
+
except ImportError as e:
|
|
305
|
+
raise ImportError(
|
|
306
|
+
"kaleido is required for PDF export. Install it with: pip install kaleido"
|
|
307
|
+
) from e
|
|
308
|
+
|
|
309
|
+
# Create output directory if needed
|
|
310
|
+
output_path = Path(output_file)
|
|
311
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
312
|
+
|
|
313
|
+
# Export strategy depends on layout
|
|
314
|
+
if layout == "vertical":
|
|
315
|
+
# Each figure gets its own page
|
|
316
|
+
return _export_figures_multipage(figures, output_path, page_size, scale)
|
|
317
|
+
elif layout == "compact":
|
|
318
|
+
# Try to fit multiple figures per page (not implemented yet)
|
|
319
|
+
raise NotImplementedError("Compact layout is not yet implemented. Use 'vertical'.")
|
|
320
|
+
else:
|
|
321
|
+
raise ValueError(f"Invalid layout '{layout}'. Must be 'vertical' or 'compact'.")
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def generate_importance_report(
|
|
325
|
+
importance_results: dict[str, Any],
|
|
326
|
+
*,
|
|
327
|
+
output_file: str | Path,
|
|
328
|
+
title: str | None = None,
|
|
329
|
+
theme: str | None = None,
|
|
330
|
+
include_sections: list[str] | None = None,
|
|
331
|
+
top_n: int = 20,
|
|
332
|
+
export_pdf: bool = False,
|
|
333
|
+
pdf_page_size: tuple[int, int] = (800, 600),
|
|
334
|
+
pdf_scale: float = 2.0,
|
|
335
|
+
) -> str:
|
|
336
|
+
"""Generate comprehensive HTML report for feature importance analysis.
|
|
337
|
+
|
|
338
|
+
Creates a multi-section report combining:
|
|
339
|
+
- Executive summary with key findings
|
|
340
|
+
- Consensus importance rankings (bar chart)
|
|
341
|
+
- Method agreement analysis (heatmap)
|
|
342
|
+
- Importance score distributions
|
|
343
|
+
- Interpretation and recommendations
|
|
344
|
+
|
|
345
|
+
Parameters
|
|
346
|
+
----------
|
|
347
|
+
importance_results : dict[str, Any]
|
|
348
|
+
Results from analyze_ml_importance() containing:
|
|
349
|
+
- "consensus_ranking": Features ranked by consensus
|
|
350
|
+
- "method_results": Individual method results
|
|
351
|
+
- "method_agreement": Cross-method correlations
|
|
352
|
+
- "top_features_consensus": Features in all top-10s
|
|
353
|
+
output_file : str | Path
|
|
354
|
+
Path where HTML report will be saved.
|
|
355
|
+
title : str | None, optional
|
|
356
|
+
Report title. If None, uses "Feature Importance Analysis Report".
|
|
357
|
+
theme : str | None, optional
|
|
358
|
+
Visual theme ("default", "dark", "print", "presentation").
|
|
359
|
+
If None, uses "default".
|
|
360
|
+
include_sections : list[str] | None, optional
|
|
361
|
+
Which sections to include in report. Options:
|
|
362
|
+
- "summary": Executive summary
|
|
363
|
+
- "rankings": Consensus rankings bar chart
|
|
364
|
+
- "agreement": Method agreement heatmap
|
|
365
|
+
- "distributions": Score distributions
|
|
366
|
+
- "recommendations": Interpretation and next steps
|
|
367
|
+
If None, includes all sections.
|
|
368
|
+
top_n : int, optional
|
|
369
|
+
Number of top features to display in charts. Default is 20.
|
|
370
|
+
export_pdf : bool, optional
|
|
371
|
+
If True, also export the report figures to PDF format.
|
|
372
|
+
Default is False (HTML only).
|
|
373
|
+
pdf_page_size : tuple[int, int], optional
|
|
374
|
+
Page size for PDF export (width, height) in pixels.
|
|
375
|
+
Default is (800, 600). Only used if export_pdf=True.
|
|
376
|
+
pdf_scale : float, optional
|
|
377
|
+
Resolution scale for PDF export. Higher = better quality.
|
|
378
|
+
Default is 2.0. Only used if export_pdf=True.
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
str
|
|
383
|
+
Absolute path to generated HTML file.
|
|
384
|
+
|
|
385
|
+
Examples
|
|
386
|
+
--------
|
|
387
|
+
Generate full report:
|
|
388
|
+
|
|
389
|
+
>>> from ml4t.diagnostic.evaluation import analyze_ml_importance
|
|
390
|
+
>>> from ml4t.diagnostic.visualization import generate_importance_report
|
|
391
|
+
>>>
|
|
392
|
+
>>> results = analyze_ml_importance(model, X, y, methods=["mdi", "pfi", "shap"])
|
|
393
|
+
>>> report_path = generate_importance_report(
|
|
394
|
+
... importance_results=results,
|
|
395
|
+
... output_file="importance_report.html",
|
|
396
|
+
... theme="dark"
|
|
397
|
+
... )
|
|
398
|
+
|
|
399
|
+
Generate minimal report with specific sections:
|
|
400
|
+
|
|
401
|
+
>>> report_path = generate_importance_report(
|
|
402
|
+
... importance_results=results,
|
|
403
|
+
... output_file="quick_report.html",
|
|
404
|
+
... include_sections=["summary", "rankings"],
|
|
405
|
+
... top_n=10
|
|
406
|
+
... )
|
|
407
|
+
"""
|
|
408
|
+
# Default title
|
|
409
|
+
if title is None:
|
|
410
|
+
title = "Feature Importance Analysis Report"
|
|
411
|
+
|
|
412
|
+
# Default sections
|
|
413
|
+
if include_sections is None:
|
|
414
|
+
include_sections = ["summary", "rankings", "agreement", "distributions", "recommendations"]
|
|
415
|
+
|
|
416
|
+
# Validate sections
|
|
417
|
+
valid_sections = {"summary", "rankings", "agreement", "distributions", "recommendations"}
|
|
418
|
+
invalid = set(include_sections) - valid_sections
|
|
419
|
+
if invalid:
|
|
420
|
+
raise ValueError(f"Invalid sections: {invalid}. Valid options: {valid_sections}")
|
|
421
|
+
|
|
422
|
+
# Generate figures
|
|
423
|
+
figures: list[go.Figure] = []
|
|
424
|
+
sections: list[dict[str, str | int]] = []
|
|
425
|
+
|
|
426
|
+
# Add summary section
|
|
427
|
+
if "summary" in include_sections:
|
|
428
|
+
summary_text = _generate_importance_summary_text(importance_results)
|
|
429
|
+
sections.append({"title": "Executive Summary", "text": summary_text})
|
|
430
|
+
|
|
431
|
+
# Add consensus rankings
|
|
432
|
+
if "rankings" in include_sections:
|
|
433
|
+
fig_bar = plot_importance_bar(importance_results, top_n=top_n, theme=theme)
|
|
434
|
+
figures.append(fig_bar)
|
|
435
|
+
sections.append(
|
|
436
|
+
{
|
|
437
|
+
"title": "Consensus Feature Rankings",
|
|
438
|
+
"text": (
|
|
439
|
+
f"The top {top_n} features ranked by consensus across all importance methods. "
|
|
440
|
+
"Features appearing at the top are consistently identified as important by "
|
|
441
|
+
"multiple methodologies (MDI, PFI, SHAP)."
|
|
442
|
+
),
|
|
443
|
+
"figure_index": len(figures) - 1,
|
|
444
|
+
}
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
# Add method agreement
|
|
448
|
+
if "agreement" in include_sections:
|
|
449
|
+
fig_heatmap = plot_importance_heatmap(importance_results, theme=theme)
|
|
450
|
+
figures.append(fig_heatmap)
|
|
451
|
+
sections.append(
|
|
452
|
+
{
|
|
453
|
+
"title": "Method Agreement Analysis",
|
|
454
|
+
"text": (
|
|
455
|
+
"Spearman correlation matrix showing agreement between different importance "
|
|
456
|
+
"methods. High correlation (>0.7) indicates methods agree on feature rankings. "
|
|
457
|
+
"Low correlation (<0.5) suggests method-specific biases or feature interactions."
|
|
458
|
+
),
|
|
459
|
+
"figure_index": len(figures) - 1,
|
|
460
|
+
}
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
# Add distributions
|
|
464
|
+
if "distributions" in include_sections:
|
|
465
|
+
fig_dist = plot_importance_distribution(importance_results, theme=theme)
|
|
466
|
+
figures.append(fig_dist)
|
|
467
|
+
sections.append(
|
|
468
|
+
{
|
|
469
|
+
"title": "Importance Score Distributions",
|
|
470
|
+
"text": (
|
|
471
|
+
"Distribution of importance scores from each method. Overlapping distributions "
|
|
472
|
+
"indicate consensus, while separation suggests method disagreement."
|
|
473
|
+
),
|
|
474
|
+
"figure_index": len(figures) - 1,
|
|
475
|
+
}
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
# Add recommendations
|
|
479
|
+
if "recommendations" in include_sections:
|
|
480
|
+
rec_text = _generate_importance_recommendations(importance_results)
|
|
481
|
+
sections.append({"title": "Interpretation & Recommendations", "text": rec_text})
|
|
482
|
+
|
|
483
|
+
# Generate HTML
|
|
484
|
+
html_path = combine_figures_to_html(
|
|
485
|
+
figures=figures,
|
|
486
|
+
title=title,
|
|
487
|
+
sections=sections,
|
|
488
|
+
output_file=output_file,
|
|
489
|
+
theme=theme,
|
|
490
|
+
include_toc=True,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
# Optionally export to PDF
|
|
494
|
+
if export_pdf and figures:
|
|
495
|
+
pdf_path = Path(output_file).with_suffix(".pdf")
|
|
496
|
+
export_figures_to_pdf(
|
|
497
|
+
figures=figures,
|
|
498
|
+
output_file=pdf_path,
|
|
499
|
+
page_size=pdf_page_size,
|
|
500
|
+
scale=pdf_scale,
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
return html_path
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def generate_interaction_report(
|
|
507
|
+
interaction_results: dict[str, Any],
|
|
508
|
+
*,
|
|
509
|
+
output_file: str | Path,
|
|
510
|
+
title: str | None = None,
|
|
511
|
+
theme: str | None = None,
|
|
512
|
+
include_sections: list[str] | None = None,
|
|
513
|
+
top_n: int = 20,
|
|
514
|
+
export_pdf: bool = False,
|
|
515
|
+
pdf_page_size: tuple[int, int] = (800, 600),
|
|
516
|
+
pdf_scale: float = 2.0,
|
|
517
|
+
) -> str:
|
|
518
|
+
"""Generate comprehensive HTML report for feature interaction analysis.
|
|
519
|
+
|
|
520
|
+
Creates a multi-section report combining:
|
|
521
|
+
- Top feature pair interactions (bar chart)
|
|
522
|
+
- Full interaction matrix (heatmap)
|
|
523
|
+
- Interaction network graph
|
|
524
|
+
- Interpretation and recommendations
|
|
525
|
+
|
|
526
|
+
Parameters
|
|
527
|
+
----------
|
|
528
|
+
interaction_results : dict[str, Any]
|
|
529
|
+
Results from compute_shap_interactions() or analyze_interactions().
|
|
530
|
+
output_file : str | Path
|
|
531
|
+
Path where HTML report will be saved.
|
|
532
|
+
title : str | None, optional
|
|
533
|
+
Report title. If None, uses "Feature Interaction Analysis Report".
|
|
534
|
+
theme : str | None, optional
|
|
535
|
+
Visual theme. If None, uses "default".
|
|
536
|
+
include_sections : list[str] | None, optional
|
|
537
|
+
Which sections to include. Options:
|
|
538
|
+
- "top_pairs": Top N strongest interactions (bar)
|
|
539
|
+
- "matrix": Full interaction matrix (heatmap)
|
|
540
|
+
- "network": Interactive network graph
|
|
541
|
+
- "recommendations": Interpretation
|
|
542
|
+
If None, includes all sections.
|
|
543
|
+
top_n : int, optional
|
|
544
|
+
Number of top interactions to display. Default is 20.
|
|
545
|
+
export_pdf : bool, optional
|
|
546
|
+
If True, also export the report figures to PDF format.
|
|
547
|
+
Default is False (HTML only).
|
|
548
|
+
pdf_page_size : tuple[int, int], optional
|
|
549
|
+
Page size for PDF export (width, height) in pixels.
|
|
550
|
+
Default is (800, 600). Only used if export_pdf=True.
|
|
551
|
+
pdf_scale : float, optional
|
|
552
|
+
Resolution scale for PDF export. Higher = better quality.
|
|
553
|
+
Default is 2.0. Only used if export_pdf=True.
|
|
554
|
+
|
|
555
|
+
Returns
|
|
556
|
+
-------
|
|
557
|
+
str
|
|
558
|
+
Absolute path to generated HTML file.
|
|
559
|
+
|
|
560
|
+
Examples
|
|
561
|
+
--------
|
|
562
|
+
>>> from ml4t.diagnostic.evaluation import compute_shap_interactions
|
|
563
|
+
>>> from ml4t.diagnostic.visualization import generate_interaction_report
|
|
564
|
+
>>>
|
|
565
|
+
>>> interactions = compute_shap_interactions(model, X)
|
|
566
|
+
>>> report_path = generate_interaction_report(
|
|
567
|
+
... interaction_results=interactions,
|
|
568
|
+
... output_file="interactions.html"
|
|
569
|
+
... )
|
|
570
|
+
"""
|
|
571
|
+
# Default title
|
|
572
|
+
if title is None:
|
|
573
|
+
title = "Feature Interaction Analysis Report"
|
|
574
|
+
|
|
575
|
+
# Default sections
|
|
576
|
+
if include_sections is None:
|
|
577
|
+
include_sections = ["top_pairs", "matrix", "network", "recommendations"]
|
|
578
|
+
|
|
579
|
+
# Generate figures
|
|
580
|
+
figures: list[go.Figure] = []
|
|
581
|
+
sections: list[dict[str, str | int]] = []
|
|
582
|
+
|
|
583
|
+
# Top pairs
|
|
584
|
+
if "top_pairs" in include_sections:
|
|
585
|
+
fig_bar = plot_interaction_bar(interaction_results, top_n=top_n, theme=theme)
|
|
586
|
+
figures.append(fig_bar)
|
|
587
|
+
sections.append(
|
|
588
|
+
{
|
|
589
|
+
"title": f"Top {top_n} Feature Interactions",
|
|
590
|
+
"text": (
|
|
591
|
+
"Strongest pairwise feature interactions ranked by mean absolute interaction strength. "
|
|
592
|
+
"High interaction values indicate non-linear or conditional relationships."
|
|
593
|
+
),
|
|
594
|
+
"figure_index": len(figures) - 1,
|
|
595
|
+
}
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
# Matrix
|
|
599
|
+
if "matrix" in include_sections:
|
|
600
|
+
fig_heatmap = plot_interaction_heatmap(interaction_results, theme=theme)
|
|
601
|
+
figures.append(fig_heatmap)
|
|
602
|
+
sections.append(
|
|
603
|
+
{
|
|
604
|
+
"title": "Interaction Strength Matrix",
|
|
605
|
+
"text": (
|
|
606
|
+
"Symmetric matrix showing pairwise interaction strengths. "
|
|
607
|
+
"Darker colors indicate stronger interactions."
|
|
608
|
+
),
|
|
609
|
+
"figure_index": len(figures) - 1,
|
|
610
|
+
}
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
# Network
|
|
614
|
+
if "network" in include_sections:
|
|
615
|
+
fig_network = plot_interaction_network(interaction_results, theme=theme, top_n=top_n)
|
|
616
|
+
figures.append(fig_network)
|
|
617
|
+
sections.append(
|
|
618
|
+
{
|
|
619
|
+
"title": "Interaction Network Graph",
|
|
620
|
+
"text": (
|
|
621
|
+
"Network visualization of feature interactions. Node size represents "
|
|
622
|
+
"feature importance, edge thickness represents interaction strength. "
|
|
623
|
+
"Isolated nodes have weak interactions."
|
|
624
|
+
),
|
|
625
|
+
"figure_index": len(figures) - 1,
|
|
626
|
+
}
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
# Recommendations
|
|
630
|
+
if "recommendations" in include_sections:
|
|
631
|
+
rec_text = _generate_interaction_recommendations(interaction_results)
|
|
632
|
+
sections.append({"title": "Interpretation & Recommendations", "text": rec_text})
|
|
633
|
+
|
|
634
|
+
# Generate HTML
|
|
635
|
+
html_path = combine_figures_to_html(
|
|
636
|
+
figures=figures,
|
|
637
|
+
title=title,
|
|
638
|
+
sections=sections,
|
|
639
|
+
output_file=output_file,
|
|
640
|
+
theme=theme,
|
|
641
|
+
include_toc=True,
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
# Optionally export to PDF
|
|
645
|
+
if export_pdf and figures:
|
|
646
|
+
pdf_path = Path(output_file).with_suffix(".pdf")
|
|
647
|
+
export_figures_to_pdf(
|
|
648
|
+
figures=figures,
|
|
649
|
+
output_file=pdf_path,
|
|
650
|
+
page_size=pdf_page_size,
|
|
651
|
+
scale=pdf_scale,
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
return html_path
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def generate_combined_report(
|
|
658
|
+
importance_results: dict[str, Any],
|
|
659
|
+
interaction_results: dict[str, Any] | None = None,
|
|
660
|
+
*,
|
|
661
|
+
output_file: str | Path,
|
|
662
|
+
title: str | None = None,
|
|
663
|
+
theme: str | None = None,
|
|
664
|
+
top_n: int = 20,
|
|
665
|
+
export_pdf: bool = False,
|
|
666
|
+
pdf_page_size: tuple[int, int] = (800, 600),
|
|
667
|
+
pdf_scale: float = 2.0,
|
|
668
|
+
) -> str:
|
|
669
|
+
"""Generate comprehensive report combining importance and interaction analysis.
|
|
670
|
+
|
|
671
|
+
Creates a unified report with all feature analysis visualizations and interpretations.
|
|
672
|
+
|
|
673
|
+
Parameters
|
|
674
|
+
----------
|
|
675
|
+
importance_results : dict[str, Any]
|
|
676
|
+
Results from analyze_ml_importance().
|
|
677
|
+
interaction_results : dict[str, Any] | None, optional
|
|
678
|
+
Results from compute_shap_interactions(). If None, only importance analysis included.
|
|
679
|
+
output_file : str | Path
|
|
680
|
+
Path where HTML report will be saved.
|
|
681
|
+
title : str | None, optional
|
|
682
|
+
Report title. If None, uses "Complete Feature Analysis Report".
|
|
683
|
+
theme : str | None, optional
|
|
684
|
+
Visual theme. If None, uses "default".
|
|
685
|
+
top_n : int, optional
|
|
686
|
+
Number of top features/interactions to display. Default is 20.
|
|
687
|
+
export_pdf : bool, optional
|
|
688
|
+
If True, also export the report figures to PDF format.
|
|
689
|
+
Default is False (HTML only).
|
|
690
|
+
pdf_page_size : tuple[int, int], optional
|
|
691
|
+
Page size for PDF export (width, height) in pixels.
|
|
692
|
+
Default is (800, 600). Only used if export_pdf=True.
|
|
693
|
+
pdf_scale : float, optional
|
|
694
|
+
Resolution scale for PDF export. Higher = better quality.
|
|
695
|
+
Default is 2.0. Only used if export_pdf=True.
|
|
696
|
+
|
|
697
|
+
Returns
|
|
698
|
+
-------
|
|
699
|
+
str
|
|
700
|
+
Absolute path to generated HTML file.
|
|
701
|
+
|
|
702
|
+
Examples
|
|
703
|
+
--------
|
|
704
|
+
>>> from ml4t.diagnostic.evaluation import analyze_ml_importance, compute_shap_interactions
|
|
705
|
+
>>> from ml4t.diagnostic.visualization import generate_combined_report
|
|
706
|
+
>>>
|
|
707
|
+
>>> importance = analyze_ml_importance(model, X, y)
|
|
708
|
+
>>> interactions = compute_shap_interactions(model, X)
|
|
709
|
+
>>>
|
|
710
|
+
>>> report_path = generate_combined_report(
|
|
711
|
+
... importance_results=importance,
|
|
712
|
+
... interaction_results=interactions,
|
|
713
|
+
... output_file="complete_analysis.html",
|
|
714
|
+
... theme="presentation"
|
|
715
|
+
... )
|
|
716
|
+
"""
|
|
717
|
+
# Default title
|
|
718
|
+
if title is None:
|
|
719
|
+
title = "Complete Feature Analysis Report"
|
|
720
|
+
|
|
721
|
+
# Generate figures
|
|
722
|
+
figures: list[go.Figure] = []
|
|
723
|
+
sections: list[dict[str, str | int]] = []
|
|
724
|
+
|
|
725
|
+
# Overview section
|
|
726
|
+
overview_text = _generate_combined_overview(importance_results, interaction_results)
|
|
727
|
+
sections.append({"title": "Analysis Overview", "text": overview_text})
|
|
728
|
+
|
|
729
|
+
# Importance section
|
|
730
|
+
sections.append({"title": "Part 1: Feature Importance Analysis", "text": ""})
|
|
731
|
+
|
|
732
|
+
# Summary plot (4-panel importance summary)
|
|
733
|
+
fig_importance_summary = plot_importance_summary(importance_results, top_n=15, theme=theme)
|
|
734
|
+
figures.append(fig_importance_summary)
|
|
735
|
+
sections.append(
|
|
736
|
+
{
|
|
737
|
+
"title": "Importance Summary (Multi-Panel View)",
|
|
738
|
+
"text": (
|
|
739
|
+
"Comprehensive view of feature importance combining consensus rankings, "
|
|
740
|
+
"method agreement, and score distributions in a single multi-panel visualization."
|
|
741
|
+
),
|
|
742
|
+
"figure_index": len(figures) - 1,
|
|
743
|
+
}
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
# Interaction section (if provided)
|
|
747
|
+
if interaction_results is not None:
|
|
748
|
+
sections.append({"title": "Part 2: Feature Interaction Analysis", "text": ""})
|
|
749
|
+
|
|
750
|
+
# Network visualization
|
|
751
|
+
fig_network = plot_interaction_network(interaction_results, theme=theme, top_n=top_n)
|
|
752
|
+
figures.append(fig_network)
|
|
753
|
+
sections.append(
|
|
754
|
+
{
|
|
755
|
+
"title": "Interaction Network",
|
|
756
|
+
"text": (
|
|
757
|
+
"Interactive network showing how features interact. Strong interactions "
|
|
758
|
+
"may indicate opportunities for feature engineering."
|
|
759
|
+
),
|
|
760
|
+
"figure_index": len(figures) - 1,
|
|
761
|
+
}
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
# Interaction heatmap
|
|
765
|
+
fig_int_heatmap = plot_interaction_heatmap(interaction_results, theme=theme)
|
|
766
|
+
figures.append(fig_int_heatmap)
|
|
767
|
+
sections.append(
|
|
768
|
+
{
|
|
769
|
+
"title": "Interaction Matrix",
|
|
770
|
+
"text": "Complete pairwise interaction strength matrix.",
|
|
771
|
+
"figure_index": len(figures) - 1,
|
|
772
|
+
}
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
# Recommendations
|
|
776
|
+
rec_text = _generate_combined_recommendations(importance_results, interaction_results)
|
|
777
|
+
sections.append({"title": "Actionable Recommendations", "text": rec_text})
|
|
778
|
+
|
|
779
|
+
# Generate HTML
|
|
780
|
+
html_path = combine_figures_to_html(
|
|
781
|
+
figures=figures,
|
|
782
|
+
title=title,
|
|
783
|
+
sections=sections,
|
|
784
|
+
output_file=output_file,
|
|
785
|
+
theme=theme,
|
|
786
|
+
include_toc=True,
|
|
787
|
+
)
|
|
788
|
+
|
|
789
|
+
# Optionally export to PDF
|
|
790
|
+
if export_pdf and figures:
|
|
791
|
+
pdf_path = Path(output_file).with_suffix(".pdf")
|
|
792
|
+
export_figures_to_pdf(
|
|
793
|
+
figures=figures,
|
|
794
|
+
output_file=pdf_path,
|
|
795
|
+
page_size=pdf_page_size,
|
|
796
|
+
scale=pdf_scale,
|
|
797
|
+
)
|
|
798
|
+
|
|
799
|
+
return html_path
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
# ============================================================================
|
|
803
|
+
# Private Helper Functions
|
|
804
|
+
# ============================================================================
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
def _build_html_document(
|
|
808
|
+
title: str,
|
|
809
|
+
figure_htmls: list[str],
|
|
810
|
+
sections: list[dict[str, Any]] | None,
|
|
811
|
+
theme_config: dict[str, Any],
|
|
812
|
+
include_toc: bool,
|
|
813
|
+
) -> str:
|
|
814
|
+
"""Build complete HTML document from components.
|
|
815
|
+
|
|
816
|
+
Parameters
|
|
817
|
+
----------
|
|
818
|
+
title : str
|
|
819
|
+
Document title
|
|
820
|
+
figure_htmls : list[str]
|
|
821
|
+
List of figure HTML div strings
|
|
822
|
+
sections : list[dict] | None
|
|
823
|
+
Section definitions with title, text, figure_index
|
|
824
|
+
theme_config : dict
|
|
825
|
+
Theme configuration from get_theme_config()
|
|
826
|
+
include_toc : bool
|
|
827
|
+
Whether to include table of contents
|
|
828
|
+
|
|
829
|
+
Returns
|
|
830
|
+
-------
|
|
831
|
+
str
|
|
832
|
+
Complete HTML document
|
|
833
|
+
"""
|
|
834
|
+
# Extract colors from theme
|
|
835
|
+
bg_color = theme_config.get("plot_bgcolor", "#FFFFFF")
|
|
836
|
+
text_color = theme_config.get("font_color", "#1F1F1F")
|
|
837
|
+
grid_color = theme_config.get("gridcolor", "#E5E5E5")
|
|
838
|
+
|
|
839
|
+
# Determine if dark theme
|
|
840
|
+
is_dark = "dark" in theme_config.get("template", "").lower() or bg_color in [
|
|
841
|
+
"#1E1E1E",
|
|
842
|
+
"#0E0E0E",
|
|
843
|
+
]
|
|
844
|
+
|
|
845
|
+
# Generate CSS
|
|
846
|
+
css = _generate_css(bg_color, text_color, grid_color, is_dark)
|
|
847
|
+
|
|
848
|
+
# Generate TOC if requested
|
|
849
|
+
toc_html = ""
|
|
850
|
+
if include_toc and sections:
|
|
851
|
+
toc_html = _generate_toc(sections)
|
|
852
|
+
|
|
853
|
+
# Generate body content
|
|
854
|
+
body_html = _generate_body_content(figure_htmls, sections)
|
|
855
|
+
|
|
856
|
+
# Generate timestamp
|
|
857
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
858
|
+
|
|
859
|
+
# Assemble complete HTML
|
|
860
|
+
html = f"""<!DOCTYPE html>
|
|
861
|
+
<html lang="en">
|
|
862
|
+
<head>
|
|
863
|
+
<meta charset="UTF-8">
|
|
864
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
865
|
+
<meta name="generator" content="ML4T Diagnostic Visualization Library">
|
|
866
|
+
<title>{title}</title>
|
|
867
|
+
<style>
|
|
868
|
+
{css}
|
|
869
|
+
</style>
|
|
870
|
+
</head>
|
|
871
|
+
<body>
|
|
872
|
+
<div class="container">
|
|
873
|
+
<header>
|
|
874
|
+
<h1>{title}</h1>
|
|
875
|
+
<p class="timestamp">Generated: {timestamp}</p>
|
|
876
|
+
</header>
|
|
877
|
+
|
|
878
|
+
{toc_html}
|
|
879
|
+
|
|
880
|
+
<main>
|
|
881
|
+
{body_html}
|
|
882
|
+
</main>
|
|
883
|
+
|
|
884
|
+
<footer>
|
|
885
|
+
<p>Generated by <a href="https://github.com/yourusername/ml4t-diagnostic" target="_blank">ML4T Diagnostic</a> - Quantitative Evaluation Library</p>
|
|
886
|
+
</footer>
|
|
887
|
+
</div>
|
|
888
|
+
</body>
|
|
889
|
+
</html>"""
|
|
890
|
+
|
|
891
|
+
return html
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
def _generate_css(bg_color: str, text_color: str, grid_color: str, is_dark: bool) -> str:
|
|
895
|
+
"""Generate CSS styles for report."""
|
|
896
|
+
# Derive additional colors
|
|
897
|
+
if is_dark:
|
|
898
|
+
header_bg = "#2A2A2A"
|
|
899
|
+
section_bg = "#252525"
|
|
900
|
+
border_color = "#404040"
|
|
901
|
+
link_color = "#6FA8DC"
|
|
902
|
+
else:
|
|
903
|
+
header_bg = "#F5F5F5"
|
|
904
|
+
section_bg = "#FAFAFA"
|
|
905
|
+
border_color = grid_color
|
|
906
|
+
link_color = "#1A73E8"
|
|
907
|
+
|
|
908
|
+
css = f"""
|
|
909
|
+
/* Reset and base styles */
|
|
910
|
+
* {{
|
|
911
|
+
margin: 0;
|
|
912
|
+
padding: 0;
|
|
913
|
+
box-sizing: border-box;
|
|
914
|
+
}}
|
|
915
|
+
|
|
916
|
+
body {{
|
|
917
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
|
918
|
+
background-color: {bg_color};
|
|
919
|
+
color: {text_color};
|
|
920
|
+
line-height: 1.6;
|
|
921
|
+
padding: 20px;
|
|
922
|
+
}}
|
|
923
|
+
|
|
924
|
+
.container {{
|
|
925
|
+
max-width: 1400px;
|
|
926
|
+
margin: 0 auto;
|
|
927
|
+
}}
|
|
928
|
+
|
|
929
|
+
/* Header */
|
|
930
|
+
header {{
|
|
931
|
+
text-align: center;
|
|
932
|
+
padding: 40px 20px;
|
|
933
|
+
background-color: {header_bg};
|
|
934
|
+
border-radius: 8px;
|
|
935
|
+
margin-bottom: 30px;
|
|
936
|
+
}}
|
|
937
|
+
|
|
938
|
+
header h1 {{
|
|
939
|
+
font-size: 2.5em;
|
|
940
|
+
font-weight: 700;
|
|
941
|
+
margin-bottom: 10px;
|
|
942
|
+
}}
|
|
943
|
+
|
|
944
|
+
.timestamp {{
|
|
945
|
+
color: {text_color};
|
|
946
|
+
opacity: 0.7;
|
|
947
|
+
font-size: 0.9em;
|
|
948
|
+
}}
|
|
949
|
+
|
|
950
|
+
/* Table of Contents */
|
|
951
|
+
.toc {{
|
|
952
|
+
background-color: {section_bg};
|
|
953
|
+
padding: 20px;
|
|
954
|
+
border-radius: 8px;
|
|
955
|
+
margin-bottom: 30px;
|
|
956
|
+
border: 1px solid {border_color};
|
|
957
|
+
}}
|
|
958
|
+
|
|
959
|
+
.toc h2 {{
|
|
960
|
+
font-size: 1.5em;
|
|
961
|
+
margin-bottom: 15px;
|
|
962
|
+
}}
|
|
963
|
+
|
|
964
|
+
.toc ul {{
|
|
965
|
+
list-style: none;
|
|
966
|
+
padding-left: 0;
|
|
967
|
+
}}
|
|
968
|
+
|
|
969
|
+
.toc li {{
|
|
970
|
+
margin: 8px 0;
|
|
971
|
+
}}
|
|
972
|
+
|
|
973
|
+
.toc a {{
|
|
974
|
+
color: {link_color};
|
|
975
|
+
text-decoration: none;
|
|
976
|
+
transition: opacity 0.2s;
|
|
977
|
+
}}
|
|
978
|
+
|
|
979
|
+
.toc a:hover {{
|
|
980
|
+
opacity: 0.7;
|
|
981
|
+
}}
|
|
982
|
+
|
|
983
|
+
/* Sections */
|
|
984
|
+
.section {{
|
|
985
|
+
margin-bottom: 50px;
|
|
986
|
+
}}
|
|
987
|
+
|
|
988
|
+
.section-title {{
|
|
989
|
+
font-size: 1.8em;
|
|
990
|
+
font-weight: 600;
|
|
991
|
+
margin-bottom: 15px;
|
|
992
|
+
padding-bottom: 10px;
|
|
993
|
+
border-bottom: 2px solid {border_color};
|
|
994
|
+
}}
|
|
995
|
+
|
|
996
|
+
.section-text {{
|
|
997
|
+
font-size: 1.1em;
|
|
998
|
+
margin-bottom: 20px;
|
|
999
|
+
line-height: 1.8;
|
|
1000
|
+
}}
|
|
1001
|
+
|
|
1002
|
+
/* Plot containers */
|
|
1003
|
+
.plot-container {{
|
|
1004
|
+
margin: 30px 0;
|
|
1005
|
+
padding: 20px;
|
|
1006
|
+
background-color: {section_bg};
|
|
1007
|
+
border-radius: 8px;
|
|
1008
|
+
border: 1px solid {border_color};
|
|
1009
|
+
}}
|
|
1010
|
+
|
|
1011
|
+
/* Footer */
|
|
1012
|
+
footer {{
|
|
1013
|
+
text-align: center;
|
|
1014
|
+
padding: 30px 20px;
|
|
1015
|
+
margin-top: 50px;
|
|
1016
|
+
border-top: 1px solid {border_color};
|
|
1017
|
+
opacity: 0.7;
|
|
1018
|
+
}}
|
|
1019
|
+
|
|
1020
|
+
footer a {{
|
|
1021
|
+
color: {link_color};
|
|
1022
|
+
text-decoration: none;
|
|
1023
|
+
}}
|
|
1024
|
+
|
|
1025
|
+
footer a:hover {{
|
|
1026
|
+
text-decoration: underline;
|
|
1027
|
+
}}
|
|
1028
|
+
|
|
1029
|
+
/* Responsive design */
|
|
1030
|
+
@media (max-width: 768px) {{
|
|
1031
|
+
header h1 {{
|
|
1032
|
+
font-size: 2em;
|
|
1033
|
+
}}
|
|
1034
|
+
|
|
1035
|
+
.section-title {{
|
|
1036
|
+
font-size: 1.5em;
|
|
1037
|
+
}}
|
|
1038
|
+
|
|
1039
|
+
.plot-container {{
|
|
1040
|
+
padding: 10px;
|
|
1041
|
+
}}
|
|
1042
|
+
}}
|
|
1043
|
+
|
|
1044
|
+
/* Print styles */
|
|
1045
|
+
@media print {{
|
|
1046
|
+
body {{
|
|
1047
|
+
background-color: white;
|
|
1048
|
+
color: black;
|
|
1049
|
+
}}
|
|
1050
|
+
|
|
1051
|
+
.container {{
|
|
1052
|
+
max-width: none;
|
|
1053
|
+
}}
|
|
1054
|
+
|
|
1055
|
+
.plot-container {{
|
|
1056
|
+
page-break-inside: avoid;
|
|
1057
|
+
}}
|
|
1058
|
+
}}
|
|
1059
|
+
"""
|
|
1060
|
+
|
|
1061
|
+
return css
|
|
1062
|
+
|
|
1063
|
+
|
|
1064
|
+
def _generate_toc(sections: list[dict[str, Any]]) -> str:
|
|
1065
|
+
"""Generate table of contents HTML."""
|
|
1066
|
+
toc_items = []
|
|
1067
|
+
|
|
1068
|
+
for i, section in enumerate(sections):
|
|
1069
|
+
section_title = section.get("title", "")
|
|
1070
|
+
if section_title:
|
|
1071
|
+
# Create anchor-safe ID
|
|
1072
|
+
section_id = f"section-{i}"
|
|
1073
|
+
toc_items.append(f' <li><a href="#{section_id}">{section_title}</a></li>')
|
|
1074
|
+
|
|
1075
|
+
toc_html = f""" <nav class="toc">
|
|
1076
|
+
<h2>Table of Contents</h2>
|
|
1077
|
+
<ul>
|
|
1078
|
+
{chr(10).join(toc_items)}
|
|
1079
|
+
</ul>
|
|
1080
|
+
</nav>
|
|
1081
|
+
"""
|
|
1082
|
+
|
|
1083
|
+
return toc_html
|
|
1084
|
+
|
|
1085
|
+
|
|
1086
|
+
def _generate_body_content(figure_htmls: list[str], sections: list[dict[str, Any]] | None) -> str:
|
|
1087
|
+
"""Generate main body content HTML."""
|
|
1088
|
+
if sections is None:
|
|
1089
|
+
# Simple case: just render all figures sequentially
|
|
1090
|
+
body_parts = []
|
|
1091
|
+
for _i, fig_html in enumerate(figure_htmls):
|
|
1092
|
+
body_parts.append(f""" <div class="plot-container">
|
|
1093
|
+
{fig_html}
|
|
1094
|
+
</div>
|
|
1095
|
+
""")
|
|
1096
|
+
return "\n".join(body_parts)
|
|
1097
|
+
|
|
1098
|
+
# Complex case: render sections with associated figures
|
|
1099
|
+
body_parts = []
|
|
1100
|
+
|
|
1101
|
+
for i, section in enumerate(sections):
|
|
1102
|
+
section_id = f"section-{i}"
|
|
1103
|
+
section_title = section.get("title", "")
|
|
1104
|
+
section_text = section.get("text", "")
|
|
1105
|
+
figure_index = section.get("figure_index")
|
|
1106
|
+
|
|
1107
|
+
# Start section
|
|
1108
|
+
section_html = f' <section class="section" id="{section_id}">\n'
|
|
1109
|
+
|
|
1110
|
+
# Add title if present
|
|
1111
|
+
if section_title:
|
|
1112
|
+
section_html += f' <h2 class="section-title">{section_title}</h2>\n'
|
|
1113
|
+
|
|
1114
|
+
# Add text if present (section_text may contain HTML block elements,
|
|
1115
|
+
# so we don't wrap in <p> to avoid invalid nesting)
|
|
1116
|
+
if section_text:
|
|
1117
|
+
section_html += ' <div class="section-text">\n'
|
|
1118
|
+
section_html += f" {section_text}\n"
|
|
1119
|
+
section_html += " </div>\n"
|
|
1120
|
+
|
|
1121
|
+
# Add figure if specified
|
|
1122
|
+
if figure_index is not None and 0 <= figure_index < len(figure_htmls):
|
|
1123
|
+
section_html += ' <div class="plot-container">\n'
|
|
1124
|
+
section_html += figure_htmls[figure_index]
|
|
1125
|
+
section_html += "\n </div>\n"
|
|
1126
|
+
|
|
1127
|
+
# Close section
|
|
1128
|
+
section_html += " </section>\n"
|
|
1129
|
+
|
|
1130
|
+
body_parts.append(section_html)
|
|
1131
|
+
|
|
1132
|
+
return "\n".join(body_parts)
|
|
1133
|
+
|
|
1134
|
+
|
|
1135
|
+
def _generate_importance_summary_text(results: dict[str, Any]) -> str:
|
|
1136
|
+
"""Generate executive summary text for importance analysis."""
|
|
1137
|
+
consensus_ranking = results.get("consensus_ranking", [])
|
|
1138
|
+
top_consensus = results.get("top_features_consensus", [])
|
|
1139
|
+
method_agreement = results.get("method_agreement", {})
|
|
1140
|
+
|
|
1141
|
+
# Calculate average agreement
|
|
1142
|
+
avg_agreement = (
|
|
1143
|
+
sum(method_agreement.values()) / len(method_agreement) if method_agreement else 0.0
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
summary = f"""
|
|
1147
|
+
<p><strong>Key Findings:</strong></p>
|
|
1148
|
+
<ul>
|
|
1149
|
+
<li>Analyzed {len(consensus_ranking)} features across multiple importance methods</li>
|
|
1150
|
+
<li>Top consensus feature: <strong>{consensus_ranking[0] if consensus_ranking else "N/A"}</strong></li>
|
|
1151
|
+
<li>Features with strong consensus: {len(top_consensus)} features appear in all methods' top-10</li>
|
|
1152
|
+
<li>Average method agreement: {avg_agreement:.2f} (Spearman correlation)</li>
|
|
1153
|
+
</ul>
|
|
1154
|
+
"""
|
|
1155
|
+
|
|
1156
|
+
return summary.strip()
|
|
1157
|
+
|
|
1158
|
+
|
|
1159
|
+
def _generate_importance_recommendations(_results: dict[str, Any]) -> str:
|
|
1160
|
+
"""Generate recommendations text for importance analysis."""
|
|
1161
|
+
recommendations = """
|
|
1162
|
+
<p><strong>Interpretation Guidelines:</strong></p>
|
|
1163
|
+
<ul>
|
|
1164
|
+
<li><strong>High consensus + high agreement</strong>: Trust the rankings - features are robustly important</li>
|
|
1165
|
+
<li><strong>Method disagreement</strong>: Investigate feature-specific biases (MDI vs PFI patterns)</li>
|
|
1166
|
+
<li><strong>SHAP divergence</strong>: Indicates interaction effects - consider feature engineering</li>
|
|
1167
|
+
</ul>
|
|
1168
|
+
|
|
1169
|
+
<p><strong>Next Steps:</strong></p>
|
|
1170
|
+
<ul>
|
|
1171
|
+
<li>Focus on top consensus features for model interpretability</li>
|
|
1172
|
+
<li>Investigate features with large method disagreement</li>
|
|
1173
|
+
<li>Consider removing features with low importance across all methods</li>
|
|
1174
|
+
<li>Analyze SHAP interaction effects for top features</li>
|
|
1175
|
+
</ul>
|
|
1176
|
+
"""
|
|
1177
|
+
|
|
1178
|
+
return recommendations.strip()
|
|
1179
|
+
|
|
1180
|
+
|
|
1181
|
+
def _generate_interaction_recommendations(_results: dict[str, Any]) -> str:
|
|
1182
|
+
"""Generate recommendations text for interaction analysis."""
|
|
1183
|
+
recommendations = """
|
|
1184
|
+
<p><strong>Interpreting Interactions:</strong></p>
|
|
1185
|
+
<ul>
|
|
1186
|
+
<li><strong>Strong interactions</strong>: Non-linear or conditional relationships between features</li>
|
|
1187
|
+
<li><strong>Network clusters</strong>: Groups of related features that interact strongly</li>
|
|
1188
|
+
<li><strong>Isolated features</strong>: Features with weak interactions (may be independent)</li>
|
|
1189
|
+
</ul>
|
|
1190
|
+
|
|
1191
|
+
<p><strong>Feature Engineering Opportunities:</strong></p>
|
|
1192
|
+
<ul>
|
|
1193
|
+
<li>Create explicit interaction terms for top pairs (e.g., feature_A * feature_B)</li>
|
|
1194
|
+
<li>Consider non-linear transformations for interacting features</li>
|
|
1195
|
+
<li>Investigate domain-specific meanings of top interactions</li>
|
|
1196
|
+
</ul>
|
|
1197
|
+
"""
|
|
1198
|
+
|
|
1199
|
+
return recommendations.strip()
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
def _generate_combined_overview(
|
|
1203
|
+
importance_results: dict[str, Any], interaction_results: dict[str, Any] | None
|
|
1204
|
+
) -> str:
|
|
1205
|
+
"""Generate overview text for combined report."""
|
|
1206
|
+
n_features = len(importance_results.get("consensus_ranking", []))
|
|
1207
|
+
|
|
1208
|
+
overview = f"""
|
|
1209
|
+
<p>This comprehensive report analyzes feature importance and interactions for a machine learning model
|
|
1210
|
+
with {n_features} features. The analysis combines multiple methodologies to provide robust insights.</p>
|
|
1211
|
+
|
|
1212
|
+
<p><strong>Report Contents:</strong></p>
|
|
1213
|
+
<ul>
|
|
1214
|
+
<li><strong>Part 1: Feature Importance</strong> - Which features the model relies on most</li>
|
|
1215
|
+
"""
|
|
1216
|
+
|
|
1217
|
+
if interaction_results is not None:
|
|
1218
|
+
overview += """ <li><strong>Part 2: Feature Interactions</strong> - How features combine and interact</li>
|
|
1219
|
+
"""
|
|
1220
|
+
|
|
1221
|
+
overview += """ </ul>
|
|
1222
|
+
"""
|
|
1223
|
+
|
|
1224
|
+
return overview.strip()
|
|
1225
|
+
|
|
1226
|
+
|
|
1227
|
+
def _generate_combined_recommendations(
|
|
1228
|
+
_importance_results: dict[str, Any], interaction_results: dict[str, Any] | None
|
|
1229
|
+
) -> str:
|
|
1230
|
+
"""Generate combined recommendations."""
|
|
1231
|
+
recommendations = """
|
|
1232
|
+
<p><strong>Prioritized Action Items:</strong></p>
|
|
1233
|
+
<ol>
|
|
1234
|
+
<li><strong>Focus on consensus features</strong>: Top features identified by multiple methods are most reliable</li>
|
|
1235
|
+
<li><strong>Investigate method disagreements</strong>: Understand why different methods rank features differently</li>
|
|
1236
|
+
"""
|
|
1237
|
+
|
|
1238
|
+
if interaction_results is not None:
|
|
1239
|
+
recommendations += """ <li><strong>Engineer interaction terms</strong>: Create explicit features for strong interactions</li>
|
|
1240
|
+
<li><strong>Analyze interaction clusters</strong>: Groups of interacting features may represent domain concepts</li>
|
|
1241
|
+
"""
|
|
1242
|
+
|
|
1243
|
+
recommendations += """ </ol>
|
|
1244
|
+
|
|
1245
|
+
<p><strong>Model Improvement Strategies:</strong></p>
|
|
1246
|
+
<ul>
|
|
1247
|
+
<li>Remove low-importance features to reduce overfitting risk</li>
|
|
1248
|
+
<li>Add domain knowledge to interpret top features and interactions</li>
|
|
1249
|
+
<li>Consider model architecture changes if interactions are prevalent</li>
|
|
1250
|
+
<li>Validate findings on out-of-sample data</li>
|
|
1251
|
+
</ul>
|
|
1252
|
+
"""
|
|
1253
|
+
|
|
1254
|
+
return recommendations.strip()
|
|
1255
|
+
|
|
1256
|
+
|
|
1257
|
+
def _export_figures_multipage(
|
|
1258
|
+
figures: list[go.Figure],
|
|
1259
|
+
output_path: Path,
|
|
1260
|
+
page_size: tuple[int, int],
|
|
1261
|
+
scale: float,
|
|
1262
|
+
) -> str:
|
|
1263
|
+
"""Export multiple figures to a single multi-page PDF.
|
|
1264
|
+
|
|
1265
|
+
Uses kaleido to export each figure to PDF, then combines them using pypdf.
|
|
1266
|
+
|
|
1267
|
+
Parameters
|
|
1268
|
+
----------
|
|
1269
|
+
figures : list[go.Figure]
|
|
1270
|
+
Figures to export
|
|
1271
|
+
output_path : Path
|
|
1272
|
+
Output PDF file path
|
|
1273
|
+
page_size : tuple[int, int]
|
|
1274
|
+
Page dimensions (width, height) in pixels
|
|
1275
|
+
scale : float
|
|
1276
|
+
Rendering scale factor
|
|
1277
|
+
|
|
1278
|
+
Returns
|
|
1279
|
+
-------
|
|
1280
|
+
str
|
|
1281
|
+
Path to created PDF file
|
|
1282
|
+
"""
|
|
1283
|
+
import tempfile
|
|
1284
|
+
from pathlib import Path as TempPath
|
|
1285
|
+
|
|
1286
|
+
# Try to import pypdf for merging
|
|
1287
|
+
pdf_writer_class: type
|
|
1288
|
+
try:
|
|
1289
|
+
from pypdf import PdfWriter as _PypdfWriter
|
|
1290
|
+
|
|
1291
|
+
pdf_writer_class = _PypdfWriter
|
|
1292
|
+
except ImportError:
|
|
1293
|
+
# Fallback to PyPDF2 if pypdf not available
|
|
1294
|
+
try:
|
|
1295
|
+
from PyPDF2 import (
|
|
1296
|
+
PdfWriter as _Pypdf2Writer, # type: ignore[import-not-found,unused-ignore]
|
|
1297
|
+
)
|
|
1298
|
+
|
|
1299
|
+
pdf_writer_class = _Pypdf2Writer
|
|
1300
|
+
except ImportError as e:
|
|
1301
|
+
raise ImportError(
|
|
1302
|
+
"pypdf or PyPDF2 is required for PDF merging. Install it with: pip install pypdf"
|
|
1303
|
+
) from e
|
|
1304
|
+
|
|
1305
|
+
width, height = page_size
|
|
1306
|
+
|
|
1307
|
+
# Create temporary directory for individual PDFs
|
|
1308
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
1309
|
+
temp_pdfs = []
|
|
1310
|
+
|
|
1311
|
+
# Export each figure to its own PDF
|
|
1312
|
+
for i, fig in enumerate(figures):
|
|
1313
|
+
temp_pdf = TempPath(temp_dir) / f"page_{i}.pdf"
|
|
1314
|
+
|
|
1315
|
+
# Update figure layout for PDF export
|
|
1316
|
+
fig_copy = go.Figure(fig) # Make a copy to avoid modifying original
|
|
1317
|
+
fig_copy.update_layout(
|
|
1318
|
+
width=width,
|
|
1319
|
+
height=height,
|
|
1320
|
+
margin={"l": 50, "r": 50, "t": 80, "b": 50}, # Add margins for print
|
|
1321
|
+
)
|
|
1322
|
+
|
|
1323
|
+
# Export to PDF using kaleido
|
|
1324
|
+
fig_copy.write_image(
|
|
1325
|
+
str(temp_pdf),
|
|
1326
|
+
format="pdf",
|
|
1327
|
+
width=width,
|
|
1328
|
+
height=height,
|
|
1329
|
+
scale=scale,
|
|
1330
|
+
)
|
|
1331
|
+
|
|
1332
|
+
temp_pdfs.append(temp_pdf)
|
|
1333
|
+
|
|
1334
|
+
# Merge all PDFs into single file
|
|
1335
|
+
writer = pdf_writer_class()
|
|
1336
|
+
for pdf_path in temp_pdfs:
|
|
1337
|
+
writer.append(str(pdf_path))
|
|
1338
|
+
|
|
1339
|
+
# Write merged PDF
|
|
1340
|
+
with open(output_path, "wb") as output_file:
|
|
1341
|
+
writer.write(output_file)
|
|
1342
|
+
|
|
1343
|
+
return str(output_path.absolute())
|