ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,824 @@
|
|
|
1
|
+
"""Report generation for feature diagnostic analysis.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to generate comprehensive reports from
|
|
4
|
+
FeatureDiagnostics results in multiple formats:
|
|
5
|
+
|
|
6
|
+
- **HTML**: Interactive reports with embedded Plotly charts
|
|
7
|
+
- **JSON**: Structured data for programmatic access
|
|
8
|
+
- **Markdown**: Documentation-friendly format
|
|
9
|
+
|
|
10
|
+
The reports can be generated for single features or multiple features
|
|
11
|
+
in comparative format.
|
|
12
|
+
|
|
13
|
+
Key Features:
|
|
14
|
+
- Interactive HTML reports with embedded visualizations
|
|
15
|
+
- Customizable templates for branding/styling
|
|
16
|
+
- JSON export with full diagnostic data
|
|
17
|
+
- Markdown reports for documentation
|
|
18
|
+
- Multi-feature comparison reports
|
|
19
|
+
- Standalone files (no external dependencies)
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
>>> from ml4t.diagnostic.evaluation import FeatureDiagnostics, generate_html_report
|
|
23
|
+
>>> import numpy as np
|
|
24
|
+
>>>
|
|
25
|
+
>>> # Run diagnostics
|
|
26
|
+
>>> diagnostics = FeatureDiagnostics()
|
|
27
|
+
>>> data = np.random.randn(1000)
|
|
28
|
+
>>> result = diagnostics.run_diagnostics(data, name="momentum")
|
|
29
|
+
>>>
|
|
30
|
+
>>> # Generate HTML report
|
|
31
|
+
>>> html = generate_html_report(result, include_plots=True)
|
|
32
|
+
>>> with open("diagnostics_report.html", "w") as f:
|
|
33
|
+
... f.write(html)
|
|
34
|
+
>>>
|
|
35
|
+
>>> # Generate JSON export
|
|
36
|
+
>>> json_data = generate_json_report(result)
|
|
37
|
+
>>> with open("diagnostics.json", "w") as f:
|
|
38
|
+
... f.write(json_data)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from __future__ import annotations
|
|
42
|
+
|
|
43
|
+
import json
|
|
44
|
+
from datetime import datetime
|
|
45
|
+
from importlib.metadata import version as get_version
|
|
46
|
+
from pathlib import Path
|
|
47
|
+
from typing import TYPE_CHECKING, Any
|
|
48
|
+
|
|
49
|
+
if TYPE_CHECKING:
|
|
50
|
+
from .feature_diagnostics import FeatureDiagnosticsResult
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
"generate_html_report",
|
|
54
|
+
"generate_json_report",
|
|
55
|
+
"generate_markdown_report",
|
|
56
|
+
"generate_multi_feature_html_report",
|
|
57
|
+
"save_report",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def generate_html_report(
|
|
62
|
+
result: FeatureDiagnosticsResult,
|
|
63
|
+
include_plots: bool = True,
|
|
64
|
+
title: str | None = None,
|
|
65
|
+
template: str | None = None,
|
|
66
|
+
) -> str:
|
|
67
|
+
"""Generate interactive HTML report for feature diagnostics.
|
|
68
|
+
|
|
69
|
+
Creates a standalone HTML file with embedded Plotly charts and
|
|
70
|
+
comprehensive diagnostic results. The report is fully self-contained
|
|
71
|
+
with no external dependencies.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
result: FeatureDiagnosticsResult from diagnostic analysis
|
|
75
|
+
include_plots: Whether to embed interactive Plotly charts
|
|
76
|
+
title: Custom report title (default: "Feature Diagnostics: {name}")
|
|
77
|
+
template: Custom HTML template (None = use default)
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Complete HTML document as string
|
|
81
|
+
|
|
82
|
+
Example:
|
|
83
|
+
>>> diagnostics = FeatureDiagnostics()
|
|
84
|
+
>>> result = diagnostics.run_diagnostics(data, name="momentum")
|
|
85
|
+
>>> html = generate_html_report(result, include_plots=True)
|
|
86
|
+
>>> with open("report.html", "w") as f:
|
|
87
|
+
... f.write(html)
|
|
88
|
+
"""
|
|
89
|
+
if template is not None:
|
|
90
|
+
# Use custom template
|
|
91
|
+
return _render_custom_template(result, template, include_plots)
|
|
92
|
+
|
|
93
|
+
# Use default template
|
|
94
|
+
return _generate_default_html(result, include_plots, title)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def generate_json_report(
|
|
98
|
+
result: FeatureDiagnosticsResult,
|
|
99
|
+
indent: int = 2,
|
|
100
|
+
) -> str:
|
|
101
|
+
"""Generate JSON export of diagnostic results.
|
|
102
|
+
|
|
103
|
+
Exports all diagnostic data in structured JSON format for programmatic
|
|
104
|
+
access. Includes test statistics, p-values, recommendations, and
|
|
105
|
+
summary information.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
result: FeatureDiagnosticsResult from diagnostic analysis
|
|
109
|
+
indent: JSON indentation level (None for compact)
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
JSON string with complete diagnostic data
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
>>> result = diagnostics.run_diagnostics(data, name="momentum")
|
|
116
|
+
>>> json_data = generate_json_report(result)
|
|
117
|
+
>>> with open("diagnostics.json", "w") as f:
|
|
118
|
+
... f.write(json_data)
|
|
119
|
+
>>> # Later, load and analyze
|
|
120
|
+
>>> import json
|
|
121
|
+
>>> with open("diagnostics.json") as f:
|
|
122
|
+
... data = json.load(f)
|
|
123
|
+
>>> print(data['health_score'])
|
|
124
|
+
"""
|
|
125
|
+
# Convert result to dictionary
|
|
126
|
+
data = _result_to_dict(result)
|
|
127
|
+
|
|
128
|
+
# Add metadata
|
|
129
|
+
try:
|
|
130
|
+
pkg_version = get_version("ml4t-diagnostic")
|
|
131
|
+
except Exception:
|
|
132
|
+
pkg_version = "unknown"
|
|
133
|
+
data["_metadata"] = {
|
|
134
|
+
"generated_at": datetime.now().isoformat(),
|
|
135
|
+
"qeval_version": pkg_version,
|
|
136
|
+
"format_version": "1.0",
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return json.dumps(data, indent=indent, default=str)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def generate_markdown_report(
|
|
143
|
+
result: FeatureDiagnosticsResult,
|
|
144
|
+
include_summary_table: bool = True,
|
|
145
|
+
include_recommendations: bool = True,
|
|
146
|
+
) -> str:
|
|
147
|
+
"""Generate Markdown report for feature diagnostics.
|
|
148
|
+
|
|
149
|
+
Creates a documentation-friendly Markdown report with test results,
|
|
150
|
+
summary table, and recommendations. Suitable for version control,
|
|
151
|
+
documentation systems, or inclusion in notebooks.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
result: FeatureDiagnosticsResult from diagnostic analysis
|
|
155
|
+
include_summary_table: Whether to include summary DataFrame as table
|
|
156
|
+
include_recommendations: Whether to include recommendation list
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Markdown-formatted report
|
|
160
|
+
|
|
161
|
+
Example:
|
|
162
|
+
>>> result = diagnostics.run_diagnostics(data, name="momentum")
|
|
163
|
+
>>> markdown = generate_markdown_report(result)
|
|
164
|
+
>>> with open("diagnostics.md", "w") as f:
|
|
165
|
+
... f.write(markdown)
|
|
166
|
+
"""
|
|
167
|
+
lines = []
|
|
168
|
+
|
|
169
|
+
# Header
|
|
170
|
+
lines.append(f"# Feature Diagnostics: {result.feature_name}")
|
|
171
|
+
lines.append("")
|
|
172
|
+
lines.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
173
|
+
lines.append(f"**Observations**: {result.n_obs:,}")
|
|
174
|
+
lines.append(f"**Health Score**: {result.health_score:.2f}/1.00")
|
|
175
|
+
lines.append("")
|
|
176
|
+
|
|
177
|
+
# Flags (if any)
|
|
178
|
+
if result.flags:
|
|
179
|
+
lines.append("## ⚠️ Flags")
|
|
180
|
+
lines.append("")
|
|
181
|
+
for flag in result.flags:
|
|
182
|
+
lines.append(f"- {flag}")
|
|
183
|
+
lines.append("")
|
|
184
|
+
|
|
185
|
+
# Summary table
|
|
186
|
+
if include_summary_table and not result.summary_df.empty:
|
|
187
|
+
lines.append("## Test Summary")
|
|
188
|
+
lines.append("")
|
|
189
|
+
lines.append(result.summary_df.to_markdown(index=False))
|
|
190
|
+
lines.append("")
|
|
191
|
+
|
|
192
|
+
# Module-specific results
|
|
193
|
+
if result.stationarity is not None:
|
|
194
|
+
lines.append("## Stationarity Analysis")
|
|
195
|
+
lines.append("")
|
|
196
|
+
lines.append(f"**Consensus**: {result.stationarity.consensus}")
|
|
197
|
+
lines.append("")
|
|
198
|
+
|
|
199
|
+
if result.stationarity.adf_result is not None:
|
|
200
|
+
adf = result.stationarity.adf_result
|
|
201
|
+
lines.append(
|
|
202
|
+
f"- **ADF**: statistic={adf.test_statistic:.4f}, p-value={adf.p_value:.4f}"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
if result.stationarity.kpss_result is not None:
|
|
206
|
+
kpss = result.stationarity.kpss_result
|
|
207
|
+
lines.append(
|
|
208
|
+
f"- **KPSS**: statistic={kpss.test_statistic:.4f}, p-value={kpss.p_value:.4f}"
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
if result.stationarity.pp_result is not None:
|
|
212
|
+
pp = result.stationarity.pp_result
|
|
213
|
+
lines.append(f"- **PP**: statistic={pp.test_statistic:.4f}, p-value={pp.p_value:.4f}")
|
|
214
|
+
|
|
215
|
+
lines.append("")
|
|
216
|
+
|
|
217
|
+
if result.autocorrelation is not None:
|
|
218
|
+
lines.append("## Autocorrelation Analysis")
|
|
219
|
+
lines.append("")
|
|
220
|
+
n_sig_acf = len(result.autocorrelation.significant_acf_lags)
|
|
221
|
+
n_sig_pacf = len(result.autocorrelation.significant_pacf_lags)
|
|
222
|
+
lines.append(f"- **Significant ACF lags**: {n_sig_acf}")
|
|
223
|
+
lines.append(f"- **Significant PACF lags**: {n_sig_pacf}")
|
|
224
|
+
lines.append(f"- **Suggested ARIMA order**: {result.autocorrelation.suggested_arima_order}")
|
|
225
|
+
lines.append(
|
|
226
|
+
f"- **White noise**: {'Yes' if result.autocorrelation.is_white_noise else 'No'}"
|
|
227
|
+
)
|
|
228
|
+
lines.append("")
|
|
229
|
+
|
|
230
|
+
if result.volatility is not None:
|
|
231
|
+
lines.append("## Volatility Analysis")
|
|
232
|
+
lines.append("")
|
|
233
|
+
has_clustering = "Yes" if result.volatility.has_volatility_clustering else "No"
|
|
234
|
+
lines.append(f"- **Volatility clustering**: {has_clustering}")
|
|
235
|
+
if result.volatility.arch_lm_result is not None:
|
|
236
|
+
arch = result.volatility.arch_lm_result
|
|
237
|
+
lines.append(
|
|
238
|
+
f"- **ARCH-LM**: statistic={arch.test_statistic:.4f}, p-value={arch.p_value:.4f}"
|
|
239
|
+
)
|
|
240
|
+
lines.append("")
|
|
241
|
+
|
|
242
|
+
if result.distribution is not None:
|
|
243
|
+
lines.append("## Distribution Analysis")
|
|
244
|
+
lines.append("")
|
|
245
|
+
lines.append(
|
|
246
|
+
f"- **Recommended distribution**: {result.distribution.recommended_distribution}"
|
|
247
|
+
)
|
|
248
|
+
lines.append(f"- **Is normal**: {'Yes' if result.distribution.is_normal else 'No'}")
|
|
249
|
+
|
|
250
|
+
if result.distribution.moments_result is not None:
|
|
251
|
+
mom = result.distribution.moments_result
|
|
252
|
+
lines.append(f"- **Mean**: {mom.mean:.6f}")
|
|
253
|
+
lines.append(f"- **Std Dev**: {mom.std:.6f}")
|
|
254
|
+
lines.append(
|
|
255
|
+
f"- **Skewness**: {mom.skewness:.4f} ({'significant' if mom.skewness_significant else 'not significant'})"
|
|
256
|
+
)
|
|
257
|
+
lines.append(
|
|
258
|
+
f"- **Excess Kurtosis**: {mom.excess_kurtosis:.4f} ({'significant' if mom.excess_kurtosis_significant else 'not significant'})"
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if result.distribution.jarque_bera_result is not None:
|
|
262
|
+
jb = result.distribution.jarque_bera_result
|
|
263
|
+
lines.append(
|
|
264
|
+
f"- **Jarque-Bera**: statistic={jb.statistic:.4f}, p-value={jb.p_value:.4f}"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
lines.append("")
|
|
268
|
+
|
|
269
|
+
# Recommendations
|
|
270
|
+
if include_recommendations and result.recommendations:
|
|
271
|
+
lines.append("## Recommendations")
|
|
272
|
+
lines.append("")
|
|
273
|
+
for i, rec in enumerate(result.recommendations, 1):
|
|
274
|
+
lines.append(f"{i}. {rec}")
|
|
275
|
+
lines.append("")
|
|
276
|
+
|
|
277
|
+
return "\n".join(lines)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def generate_multi_feature_html_report(
|
|
281
|
+
results: list[FeatureDiagnosticsResult],
|
|
282
|
+
include_plots: bool = True,
|
|
283
|
+
title: str = "Multi-Feature Diagnostic Report",
|
|
284
|
+
) -> str:
|
|
285
|
+
"""Generate comparative HTML report for multiple features.
|
|
286
|
+
|
|
287
|
+
Creates a single HTML report comparing diagnostics across multiple
|
|
288
|
+
features. Useful for portfolio-level analysis or comparing alternative
|
|
289
|
+
feature transformations.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
results: List of FeatureDiagnosticsResult objects
|
|
293
|
+
include_plots: Whether to embed interactive Plotly charts
|
|
294
|
+
title: Report title
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
Complete HTML document as string
|
|
298
|
+
|
|
299
|
+
Example:
|
|
300
|
+
>>> results = []
|
|
301
|
+
>>> for name, data in features.items():
|
|
302
|
+
... result = diagnostics.run_diagnostics(data, name=name)
|
|
303
|
+
... results.append(result)
|
|
304
|
+
>>> html = generate_multi_feature_html_report(results)
|
|
305
|
+
>>> with open("portfolio_diagnostics.html", "w") as f:
|
|
306
|
+
... f.write(html)
|
|
307
|
+
"""
|
|
308
|
+
if not results:
|
|
309
|
+
raise ValueError("results list cannot be empty")
|
|
310
|
+
|
|
311
|
+
# Build comparison table
|
|
312
|
+
import pandas as pd
|
|
313
|
+
|
|
314
|
+
comparison_data = []
|
|
315
|
+
for result in results:
|
|
316
|
+
row = {
|
|
317
|
+
"Feature": result.feature_name,
|
|
318
|
+
"N": result.n_obs,
|
|
319
|
+
"Health Score": f"{result.health_score:.2f}",
|
|
320
|
+
"Flags": len(result.flags),
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
if result.stationarity is not None:
|
|
324
|
+
row["Stationarity"] = result.stationarity.consensus
|
|
325
|
+
|
|
326
|
+
if result.autocorrelation is not None:
|
|
327
|
+
row["Significant ACF Lags"] = len(result.autocorrelation.significant_acf_lags)
|
|
328
|
+
|
|
329
|
+
if result.volatility is not None:
|
|
330
|
+
row["Vol Clustering"] = "Yes" if result.volatility.has_volatility_clustering else "No"
|
|
331
|
+
|
|
332
|
+
if result.distribution is not None:
|
|
333
|
+
row["Distribution"] = result.distribution.recommended_distribution
|
|
334
|
+
|
|
335
|
+
comparison_data.append(row)
|
|
336
|
+
|
|
337
|
+
comparison_df = pd.DataFrame(comparison_data)
|
|
338
|
+
|
|
339
|
+
# Generate HTML
|
|
340
|
+
html_parts = []
|
|
341
|
+
|
|
342
|
+
# Header
|
|
343
|
+
html_parts.append(_html_header(title))
|
|
344
|
+
|
|
345
|
+
# Overview section
|
|
346
|
+
html_parts.append("<h2>Overview</h2>")
|
|
347
|
+
html_parts.append(f"<p><strong>Features analyzed:</strong> {len(results)}</p>")
|
|
348
|
+
html_parts.append(
|
|
349
|
+
f"<p><strong>Generated:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Comparison table
|
|
353
|
+
html_parts.append("<h2>Comparison Table</h2>")
|
|
354
|
+
html_parts.append(comparison_df.to_html(index=False, classes="dataframe"))
|
|
355
|
+
|
|
356
|
+
# Individual feature sections
|
|
357
|
+
for result in results:
|
|
358
|
+
html_parts.append("<hr>")
|
|
359
|
+
html_parts.append(f"<h2>Feature: {result.feature_name}</h2>")
|
|
360
|
+
html_parts.append(_generate_feature_section_html(result, include_plots))
|
|
361
|
+
|
|
362
|
+
# Footer
|
|
363
|
+
html_parts.append(_html_footer())
|
|
364
|
+
|
|
365
|
+
return "\n".join(html_parts)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def save_report(
|
|
369
|
+
content: str,
|
|
370
|
+
filepath: str | Path,
|
|
371
|
+
overwrite: bool = False,
|
|
372
|
+
) -> Path:
|
|
373
|
+
"""Save report to file.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
content: Report content (HTML, JSON, or Markdown)
|
|
377
|
+
filepath: Destination file path
|
|
378
|
+
overwrite: Whether to overwrite existing file
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
Path to saved file
|
|
382
|
+
|
|
383
|
+
Raises:
|
|
384
|
+
FileExistsError: If file exists and overwrite=False
|
|
385
|
+
|
|
386
|
+
Example:
|
|
387
|
+
>>> html = generate_html_report(result)
|
|
388
|
+
>>> save_report(html, "diagnostics.html", overwrite=True)
|
|
389
|
+
"""
|
|
390
|
+
filepath = Path(filepath)
|
|
391
|
+
|
|
392
|
+
if filepath.exists() and not overwrite:
|
|
393
|
+
raise FileExistsError(f"File {filepath} already exists. Set overwrite=True to replace.")
|
|
394
|
+
|
|
395
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
396
|
+
|
|
397
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
398
|
+
f.write(content)
|
|
399
|
+
|
|
400
|
+
return filepath
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# ============================================================================
|
|
404
|
+
# Private helper functions
|
|
405
|
+
# ============================================================================
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _result_to_dict(result: FeatureDiagnosticsResult) -> dict[str, Any]:
|
|
409
|
+
"""Convert FeatureDiagnosticsResult to dictionary.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
result: Diagnostic result object
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
Dictionary with all diagnostic data
|
|
416
|
+
"""
|
|
417
|
+
data: dict[str, Any] = {
|
|
418
|
+
"feature_name": result.feature_name,
|
|
419
|
+
"n_obs": result.n_obs,
|
|
420
|
+
"health_score": result.health_score,
|
|
421
|
+
"flags": result.flags,
|
|
422
|
+
"recommendations": result.recommendations,
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
# Summary DataFrame
|
|
426
|
+
if not result.summary_df.empty:
|
|
427
|
+
data["summary"] = result.summary_df.to_dict(orient="records")
|
|
428
|
+
|
|
429
|
+
# Module results
|
|
430
|
+
if result.stationarity is not None:
|
|
431
|
+
data["stationarity"] = _stationarity_to_dict(result.stationarity)
|
|
432
|
+
|
|
433
|
+
if result.autocorrelation is not None:
|
|
434
|
+
data["autocorrelation"] = _autocorrelation_to_dict(result.autocorrelation)
|
|
435
|
+
|
|
436
|
+
if result.volatility is not None:
|
|
437
|
+
data["volatility"] = _volatility_to_dict(result.volatility)
|
|
438
|
+
|
|
439
|
+
if result.distribution is not None:
|
|
440
|
+
data["distribution"] = _distribution_to_dict(result.distribution)
|
|
441
|
+
|
|
442
|
+
return data
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def _stationarity_to_dict(result) -> dict[str, Any]:
|
|
446
|
+
"""Convert StationarityAnalysisResult to dict."""
|
|
447
|
+
data = {"consensus": result.consensus}
|
|
448
|
+
|
|
449
|
+
if result.adf_result is not None:
|
|
450
|
+
data["adf"] = {
|
|
451
|
+
"test_statistic": result.adf_result.test_statistic,
|
|
452
|
+
"p_value": result.adf_result.p_value,
|
|
453
|
+
"critical_values": result.adf_result.critical_values,
|
|
454
|
+
"is_stationary": result.adf_result.is_stationary,
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if result.kpss_result is not None:
|
|
458
|
+
data["kpss"] = {
|
|
459
|
+
"test_statistic": result.kpss_result.test_statistic,
|
|
460
|
+
"p_value": result.kpss_result.p_value,
|
|
461
|
+
"critical_values": result.kpss_result.critical_values,
|
|
462
|
+
"is_stationary": result.kpss_result.is_stationary,
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
if result.pp_result is not None:
|
|
466
|
+
data["pp"] = {
|
|
467
|
+
"test_statistic": result.pp_result.test_statistic,
|
|
468
|
+
"p_value": result.pp_result.p_value,
|
|
469
|
+
"critical_values": result.pp_result.critical_values,
|
|
470
|
+
"is_stationary": result.pp_result.is_stationary,
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
return data
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _autocorrelation_to_dict(result) -> dict[str, Any]:
|
|
477
|
+
"""Convert AutocorrelationAnalysisResult to dict."""
|
|
478
|
+
return {
|
|
479
|
+
"acf_values": result.acf_result.acf_values.tolist() if result.acf_result else None,
|
|
480
|
+
"pacf_values": result.pacf_result.pacf_values.tolist() if result.pacf_result else None,
|
|
481
|
+
"significant_acf_lags": result.significant_acf_lags,
|
|
482
|
+
"significant_pacf_lags": result.significant_pacf_lags,
|
|
483
|
+
"suggested_arima_order": list(result.suggested_arima_order),
|
|
484
|
+
"is_white_noise": result.is_white_noise,
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def _volatility_to_dict(result) -> dict[str, Any]:
|
|
489
|
+
"""Convert VolatilityAnalysisResult to dict."""
|
|
490
|
+
data = {
|
|
491
|
+
"has_volatility_clustering": result.has_volatility_clustering,
|
|
492
|
+
"persistence": result.persistence,
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
if result.arch_lm_result is not None:
|
|
496
|
+
data["arch_lm"] = {
|
|
497
|
+
"test_statistic": result.arch_lm_result.test_statistic,
|
|
498
|
+
"p_value": result.arch_lm_result.p_value,
|
|
499
|
+
"lags": result.arch_lm_result.lags,
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
if result.garch_result is not None:
|
|
503
|
+
data["garch"] = {
|
|
504
|
+
"omega": result.garch_result.omega,
|
|
505
|
+
"alpha": result.garch_result.alpha,
|
|
506
|
+
"beta": result.garch_result.beta,
|
|
507
|
+
"persistence": result.garch_result.persistence,
|
|
508
|
+
"half_life": result.garch_result.half_life,
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
return data
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _distribution_to_dict(result) -> dict[str, Any]:
|
|
515
|
+
"""Convert DistributionAnalysisResult to dict."""
|
|
516
|
+
data = {
|
|
517
|
+
"is_normal": result.is_normal,
|
|
518
|
+
"recommended_distribution": result.recommended_distribution,
|
|
519
|
+
"recommended_df": result.recommended_df,
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
if result.moments_result is not None:
|
|
523
|
+
data["moments"] = {
|
|
524
|
+
"mean": result.moments_result.mean,
|
|
525
|
+
"std": result.moments_result.std,
|
|
526
|
+
"skewness": result.moments_result.skewness,
|
|
527
|
+
"skewness_significant": result.moments_result.skewness_significant,
|
|
528
|
+
"excess_kurtosis": result.moments_result.excess_kurtosis,
|
|
529
|
+
"excess_kurtosis_significant": result.moments_result.excess_kurtosis_significant,
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
if result.jarque_bera_result is not None:
|
|
533
|
+
data["jarque_bera"] = {
|
|
534
|
+
"statistic": result.jarque_bera_result.statistic,
|
|
535
|
+
"p_value": result.jarque_bera_result.p_value,
|
|
536
|
+
"is_normal": result.jarque_bera_result.is_normal,
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
if result.shapiro_wilk_result is not None:
|
|
540
|
+
data["shapiro_wilk"] = {
|
|
541
|
+
"statistic": result.shapiro_wilk_result.statistic,
|
|
542
|
+
"p_value": result.shapiro_wilk_result.p_value,
|
|
543
|
+
"is_normal": result.shapiro_wilk_result.is_normal,
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
if (
|
|
547
|
+
result.tail_analysis_result is not None
|
|
548
|
+
and result.tail_analysis_result.hill_result is not None
|
|
549
|
+
):
|
|
550
|
+
hill = result.tail_analysis_result.hill_result
|
|
551
|
+
data["tail_analysis"] = {
|
|
552
|
+
"classification": hill.classification,
|
|
553
|
+
"tail_index": hill.tail_index,
|
|
554
|
+
"has_heavy_tails": hill.classification in ["heavy", "very_heavy"],
|
|
555
|
+
"best_fit": result.tail_analysis_result.best_fit,
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
return data
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _generate_default_html(
|
|
562
|
+
result: FeatureDiagnosticsResult,
|
|
563
|
+
include_plots: bool,
|
|
564
|
+
title: str | None,
|
|
565
|
+
) -> str:
|
|
566
|
+
"""Generate HTML report using default template."""
|
|
567
|
+
html_parts = []
|
|
568
|
+
|
|
569
|
+
# Header
|
|
570
|
+
report_title = title or f"Feature Diagnostics: {result.feature_name}"
|
|
571
|
+
html_parts.append(_html_header(report_title))
|
|
572
|
+
|
|
573
|
+
# Summary section
|
|
574
|
+
html_parts.append("<h2>Summary</h2>")
|
|
575
|
+
html_parts.append(f"<p><strong>Feature:</strong> {result.feature_name}</p>")
|
|
576
|
+
html_parts.append(f"<p><strong>Observations:</strong> {result.n_obs:,}</p>")
|
|
577
|
+
html_parts.append(f"<p><strong>Health Score:</strong> {result.health_score:.2f}/1.00</p>")
|
|
578
|
+
|
|
579
|
+
if result.flags:
|
|
580
|
+
html_parts.append("<h3>⚠️ Flags</h3>")
|
|
581
|
+
html_parts.append("<ul>")
|
|
582
|
+
for flag in result.flags:
|
|
583
|
+
html_parts.append(f"<li>{flag}</li>")
|
|
584
|
+
html_parts.append("</ul>")
|
|
585
|
+
|
|
586
|
+
# Summary table
|
|
587
|
+
if not result.summary_df.empty:
|
|
588
|
+
html_parts.append("<h3>Test Summary</h3>")
|
|
589
|
+
html_parts.append(result.summary_df.to_html(index=False, classes="dataframe"))
|
|
590
|
+
|
|
591
|
+
# Feature section
|
|
592
|
+
html_parts.append(_generate_feature_section_html(result, include_plots))
|
|
593
|
+
|
|
594
|
+
# Recommendations
|
|
595
|
+
if result.recommendations:
|
|
596
|
+
html_parts.append("<h2>Recommendations</h2>")
|
|
597
|
+
html_parts.append("<ol>")
|
|
598
|
+
for rec in result.recommendations:
|
|
599
|
+
html_parts.append(f"<li>{rec}</li>")
|
|
600
|
+
html_parts.append("</ol>")
|
|
601
|
+
|
|
602
|
+
# Footer
|
|
603
|
+
html_parts.append(_html_footer())
|
|
604
|
+
|
|
605
|
+
return "\n".join(html_parts)
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def _generate_feature_section_html(
|
|
609
|
+
result: FeatureDiagnosticsResult,
|
|
610
|
+
include_plots: bool,
|
|
611
|
+
) -> str:
|
|
612
|
+
"""Generate HTML section for a single feature's diagnostics."""
|
|
613
|
+
parts = []
|
|
614
|
+
|
|
615
|
+
# Stationarity
|
|
616
|
+
if result.stationarity is not None:
|
|
617
|
+
parts.append("<h3>Stationarity Analysis</h3>")
|
|
618
|
+
parts.append(f"<p><strong>Consensus:</strong> {result.stationarity.consensus}</p>")
|
|
619
|
+
|
|
620
|
+
if result.stationarity.adf_result is not None:
|
|
621
|
+
adf = result.stationarity.adf_result
|
|
622
|
+
parts.append(
|
|
623
|
+
f"<p><strong>ADF:</strong> statistic={adf.test_statistic:.4f}, p-value={adf.p_value:.4f}</p>"
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
if result.stationarity.kpss_result is not None:
|
|
627
|
+
kpss = result.stationarity.kpss_result
|
|
628
|
+
parts.append(
|
|
629
|
+
f"<p><strong>KPSS:</strong> statistic={kpss.test_statistic:.4f}, p-value={kpss.p_value:.4f}</p>"
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
if result.stationarity.pp_result is not None:
|
|
633
|
+
pp = result.stationarity.pp_result
|
|
634
|
+
parts.append(
|
|
635
|
+
f"<p><strong>PP:</strong> statistic={pp.test_statistic:.4f}, p-value={pp.p_value:.4f}</p>"
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
# Autocorrelation
|
|
639
|
+
if result.autocorrelation is not None:
|
|
640
|
+
parts.append("<h3>Autocorrelation Analysis</h3>")
|
|
641
|
+
n_sig_acf = len(result.autocorrelation.significant_acf_lags)
|
|
642
|
+
n_sig_pacf = len(result.autocorrelation.significant_pacf_lags)
|
|
643
|
+
parts.append(f"<p><strong>Significant ACF lags:</strong> {n_sig_acf}</p>")
|
|
644
|
+
parts.append(f"<p><strong>Significant PACF lags:</strong> {n_sig_pacf}</p>")
|
|
645
|
+
parts.append(
|
|
646
|
+
f"<p><strong>Suggested ARIMA order:</strong> {result.autocorrelation.suggested_arima_order}</p>"
|
|
647
|
+
)
|
|
648
|
+
parts.append(
|
|
649
|
+
f"<p><strong>White noise:</strong> {'Yes' if result.autocorrelation.is_white_noise else 'No'}</p>"
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
# Volatility
|
|
653
|
+
if result.volatility is not None:
|
|
654
|
+
parts.append("<h3>Volatility Analysis</h3>")
|
|
655
|
+
has_clustering = "Yes" if result.volatility.has_volatility_clustering else "No"
|
|
656
|
+
parts.append(f"<p><strong>Volatility clustering:</strong> {has_clustering}</p>")
|
|
657
|
+
|
|
658
|
+
if result.volatility.arch_lm_result is not None:
|
|
659
|
+
arch = result.volatility.arch_lm_result
|
|
660
|
+
parts.append(
|
|
661
|
+
f"<p><strong>ARCH-LM:</strong> statistic={arch.test_statistic:.4f}, p-value={arch.p_value:.4f}</p>"
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
# Distribution
|
|
665
|
+
if result.distribution is not None:
|
|
666
|
+
parts.append("<h3>Distribution Analysis</h3>")
|
|
667
|
+
parts.append(
|
|
668
|
+
f"<p><strong>Recommended distribution:</strong> {result.distribution.recommended_distribution}</p>"
|
|
669
|
+
)
|
|
670
|
+
parts.append(
|
|
671
|
+
f"<p><strong>Is normal:</strong> {'Yes' if result.distribution.is_normal else 'No'}</p>"
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
if result.distribution.moments_result is not None:
|
|
675
|
+
mom = result.distribution.moments_result
|
|
676
|
+
parts.append(f"<p><strong>Mean:</strong> {mom.mean:.6f}</p>")
|
|
677
|
+
parts.append(f"<p><strong>Std Dev:</strong> {mom.std:.6f}</p>")
|
|
678
|
+
parts.append(
|
|
679
|
+
f"<p><strong>Skewness:</strong> {mom.skewness:.4f} "
|
|
680
|
+
f"({'significant' if mom.skewness_significant else 'not significant'})</p>"
|
|
681
|
+
)
|
|
682
|
+
parts.append(
|
|
683
|
+
f"<p><strong>Excess Kurtosis:</strong> {mom.excess_kurtosis:.4f} "
|
|
684
|
+
f"({'significant' if mom.excess_kurtosis_significant else 'not significant'})</p>"
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
if result.distribution.jarque_bera_result is not None:
|
|
688
|
+
jb = result.distribution.jarque_bera_result
|
|
689
|
+
parts.append(
|
|
690
|
+
f"<p><strong>Jarque-Bera:</strong> statistic={jb.statistic:.4f}, p-value={jb.p_value:.4f}</p>"
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
# Plots (if requested)
|
|
694
|
+
if include_plots:
|
|
695
|
+
parts.append("<h3>Visualizations</h3>")
|
|
696
|
+
parts.append("<p><em>Interactive Plotly charts would be embedded here.</em></p>")
|
|
697
|
+
parts.append(
|
|
698
|
+
"<p><em>Implementation note: Requires creating plots from result data</em></p>"
|
|
699
|
+
)
|
|
700
|
+
# NOTE: Plot embedding planned for future version using diagnostic_plots module
|
|
701
|
+
|
|
702
|
+
return "\n".join(parts)
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def _html_header(title: str) -> str:
|
|
706
|
+
"""Generate HTML header with styling."""
|
|
707
|
+
return f"""<!DOCTYPE html>
|
|
708
|
+
<html lang="en">
|
|
709
|
+
<head>
|
|
710
|
+
<meta charset="UTF-8">
|
|
711
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
712
|
+
<title>{title}</title>
|
|
713
|
+
<script src="https://cdn.plot.ly/plotly-2.26.0.min.js"></script>
|
|
714
|
+
<style>
|
|
715
|
+
body {{
|
|
716
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
|
717
|
+
max-width: 1200px;
|
|
718
|
+
margin: 0 auto;
|
|
719
|
+
padding: 20px;
|
|
720
|
+
background-color: #f5f5f5;
|
|
721
|
+
color: #333;
|
|
722
|
+
}}
|
|
723
|
+
h1, h2, h3 {{
|
|
724
|
+
color: #2c3e50;
|
|
725
|
+
}}
|
|
726
|
+
h1 {{
|
|
727
|
+
border-bottom: 3px solid #3498db;
|
|
728
|
+
padding-bottom: 10px;
|
|
729
|
+
}}
|
|
730
|
+
h2 {{
|
|
731
|
+
border-bottom: 2px solid #95a5a6;
|
|
732
|
+
padding-bottom: 5px;
|
|
733
|
+
margin-top: 30px;
|
|
734
|
+
}}
|
|
735
|
+
.dataframe {{
|
|
736
|
+
border-collapse: collapse;
|
|
737
|
+
margin: 20px 0;
|
|
738
|
+
width: 100%;
|
|
739
|
+
background-color: white;
|
|
740
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
741
|
+
}}
|
|
742
|
+
.dataframe th {{
|
|
743
|
+
background-color: #3498db;
|
|
744
|
+
color: white;
|
|
745
|
+
padding: 12px;
|
|
746
|
+
text-align: left;
|
|
747
|
+
}}
|
|
748
|
+
.dataframe td {{
|
|
749
|
+
padding: 10px;
|
|
750
|
+
border-bottom: 1px solid #ddd;
|
|
751
|
+
}}
|
|
752
|
+
.dataframe tr:hover {{
|
|
753
|
+
background-color: #f1f1f1;
|
|
754
|
+
}}
|
|
755
|
+
ul, ol {{
|
|
756
|
+
line-height: 1.8;
|
|
757
|
+
}}
|
|
758
|
+
p {{
|
|
759
|
+
line-height: 1.6;
|
|
760
|
+
}}
|
|
761
|
+
hr {{
|
|
762
|
+
border: none;
|
|
763
|
+
border-top: 2px solid #95a5a6;
|
|
764
|
+
margin: 40px 0;
|
|
765
|
+
}}
|
|
766
|
+
.footer {{
|
|
767
|
+
margin-top: 50px;
|
|
768
|
+
padding-top: 20px;
|
|
769
|
+
border-top: 1px solid #ddd;
|
|
770
|
+
text-align: center;
|
|
771
|
+
color: #7f8c8d;
|
|
772
|
+
font-size: 0.9em;
|
|
773
|
+
}}
|
|
774
|
+
</style>
|
|
775
|
+
</head>
|
|
776
|
+
<body>
|
|
777
|
+
<h1>{title}</h1>
|
|
778
|
+
"""
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
def _html_footer() -> str:
|
|
782
|
+
"""Generate HTML footer."""
|
|
783
|
+
return f"""
|
|
784
|
+
<div class="footer">
|
|
785
|
+
<p>Generated by ML4T Diagnostic {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>
|
|
786
|
+
<p>Interactive diagnostic reports for quantitative trading features</p>
|
|
787
|
+
</div>
|
|
788
|
+
</body>
|
|
789
|
+
</html>
|
|
790
|
+
"""
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def _render_custom_template(
|
|
794
|
+
result: FeatureDiagnosticsResult,
|
|
795
|
+
template: str,
|
|
796
|
+
_include_plots: bool,
|
|
797
|
+
) -> str:
|
|
798
|
+
"""Render custom template with result data.
|
|
799
|
+
|
|
800
|
+
Args:
|
|
801
|
+
result: Diagnostic result
|
|
802
|
+
template: Template string with {placeholders}
|
|
803
|
+
include_plots: Whether to include plots
|
|
804
|
+
|
|
805
|
+
Returns:
|
|
806
|
+
Rendered HTML
|
|
807
|
+
"""
|
|
808
|
+
# Prepare template variables
|
|
809
|
+
template_vars = {
|
|
810
|
+
"feature_name": result.feature_name,
|
|
811
|
+
"n_obs": result.n_obs,
|
|
812
|
+
"health_score": f"{result.health_score:.2f}",
|
|
813
|
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
# Add test results
|
|
817
|
+
if result.stationarity is not None:
|
|
818
|
+
template_vars["stationarity_consensus"] = result.stationarity.consensus
|
|
819
|
+
|
|
820
|
+
if result.summary_df is not None and not result.summary_df.empty:
|
|
821
|
+
template_vars["summary_table"] = result.summary_df.to_html(index=False, classes="dataframe")
|
|
822
|
+
|
|
823
|
+
# Render template
|
|
824
|
+
return template.format(**template_vars)
|