ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,391 @@
1
+ """Reporting configuration (Module E).
2
+
3
+ This module defines configuration for report generation:
4
+ - Output formats (HTML, JSON, PDF)
5
+ - HTML report settings (templates, themes, tables)
6
+ - Visualization settings (plots, colors, interactivity)
7
+ - JSON output structure
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from pydantic import Field, field_validator
16
+
17
+ from ml4t.diagnostic.config.base import BaseConfig
18
+ from ml4t.diagnostic.config.validation import (
19
+ DataFrameExportFormat,
20
+ PositiveInt,
21
+ ReportFormat,
22
+ ReportTemplate,
23
+ ReportTheme,
24
+ TableFormat,
25
+ )
26
+
27
+
28
+ class OutputFormatConfig(BaseConfig):
29
+ """Configuration for output formats and file management.
30
+
31
+ Attributes:
32
+ formats: Output formats to generate
33
+ output_dir: Output directory
34
+ filename_template: Filename template with placeholders
35
+ compress: Create .zip if multiple outputs
36
+ overwrite_existing: Overwrite existing files
37
+
38
+ Examples:
39
+ >>> # Default: HTML + JSON
40
+ >>> config = OutputFormatConfig()
41
+
42
+ >>> # Custom: Only HTML with custom filename
43
+ >>> config = OutputFormatConfig(
44
+ ... formats=[ReportFormat.HTML],
45
+ ... filename_template="report_{strategy_name}_{date}.html"
46
+ ... )
47
+ """
48
+
49
+ formats: list[ReportFormat] = Field(
50
+ default_factory=lambda: [ReportFormat.HTML, ReportFormat.JSON],
51
+ description="Output formats to generate",
52
+ )
53
+ output_dir: Path = Field(
54
+ default_factory=lambda: Path.cwd() / "diagnostic_reports",
55
+ description="Output directory",
56
+ )
57
+ filename_template: str = Field(
58
+ "qeval_report_{date}",
59
+ description="Filename template (placeholders: {date}, {strategy_name}, {timestamp})",
60
+ )
61
+ compress: bool = Field(False, description="Create .zip if multiple outputs")
62
+ overwrite_existing: bool = Field(True, description="Overwrite existing files")
63
+
64
+ @field_validator("formats")
65
+ @classmethod
66
+ def check_formats(cls, v: list[ReportFormat]) -> list[ReportFormat]:
67
+ """Ensure at least one format specified."""
68
+ if not v:
69
+ raise ValueError("Must specify at least one output format")
70
+ return v
71
+
72
+ def model_post_init(self, __context: Any) -> None:
73
+ """Create output directory if it doesn't exist."""
74
+ self.output_dir.mkdir(parents=True, exist_ok=True)
75
+
76
+
77
+ class HTMLConfig(BaseConfig):
78
+ """Configuration for HTML report generation.
79
+
80
+ Attributes:
81
+ template: HTML template to use
82
+ theme: Visual theme
83
+ color_scheme: Color scheme for plots
84
+ interactive_plots: Use Plotly (True) or matplotlib (False)
85
+ include_sections: Which module sections to include
86
+ table_format: Table styling
87
+ include_toc: Include table of contents
88
+ include_summary: Include executive summary
89
+ custom_css: Path to custom CSS file
90
+
91
+ Examples:
92
+ >>> # Default: Full report with dark theme
93
+ >>> config = HTMLConfig()
94
+
95
+ >>> # Custom: Summary report with professional theme
96
+ >>> config = HTMLConfig(
97
+ ... template=ReportTemplate.SUMMARY,
98
+ ... theme=ReportTheme.PROFESSIONAL,
99
+ ... include_sections=["module_a", "module_c"]
100
+ ... )
101
+ """
102
+
103
+ template: ReportTemplate = Field(
104
+ ReportTemplate.FULL, description="HTML template: full, summary, or diagnostic"
105
+ )
106
+ theme: ReportTheme = Field(
107
+ ReportTheme.LIGHT, description="Visual theme: light, dark, or professional"
108
+ )
109
+ color_scheme: str = Field(
110
+ "viridis", description="Color scheme for plots (matplotlib/plotly colormap)"
111
+ )
112
+ interactive_plots: bool = Field(
113
+ True, description="Use Plotly (interactive) vs matplotlib (static)"
114
+ )
115
+ include_sections: list[str] = Field(
116
+ default_factory=lambda: [
117
+ "stationarity",
118
+ "acf",
119
+ "volatility",
120
+ "distribution",
121
+ "correlation",
122
+ "ic",
123
+ "sharpe",
124
+ ],
125
+ description="Which sections to include (stationarity, acf, volatility, distribution, correlation, pca, clustering, redundancy, ic, binary_classification, threshold_analysis, ml_diagnostics, sharpe, summary)",
126
+ )
127
+ table_format: TableFormat = Field(
128
+ TableFormat.STYLED, description="Table format: styled, plain, or datatables"
129
+ )
130
+ include_toc: bool = Field(True, description="Include table of contents")
131
+ include_summary: bool = Field(True, description="Include executive summary")
132
+ custom_css: Path | None = Field(None, description="Path to custom CSS file")
133
+
134
+ @field_validator("include_sections")
135
+ @classmethod
136
+ def check_sections(cls, v: list[str]) -> list[str]:
137
+ """Validate section names."""
138
+ valid_sections = {
139
+ "stationarity",
140
+ "acf",
141
+ "volatility",
142
+ "distribution",
143
+ "correlation",
144
+ "pca",
145
+ "clustering",
146
+ "redundancy",
147
+ "ic",
148
+ "binary_classification",
149
+ "threshold_analysis",
150
+ "ml_diagnostics",
151
+ "sharpe",
152
+ "summary",
153
+ }
154
+ invalid = set(v) - valid_sections
155
+ if invalid:
156
+ raise ValueError(f"Invalid sections: {invalid}. Valid: {valid_sections}")
157
+ return v
158
+
159
+ @field_validator("custom_css")
160
+ @classmethod
161
+ def check_custom_css(cls, v: Path | None) -> Path | None:
162
+ """Validate custom CSS exists if specified."""
163
+ if v is not None and not v.exists():
164
+ raise ValueError(f"Custom CSS file not found: {v}")
165
+ return v
166
+
167
+
168
+ class VisualizationConfig(BaseConfig):
169
+ """Configuration for visualization settings.
170
+
171
+ Attributes:
172
+ plot_dpi: DPI for static plots (matplotlib)
173
+ plot_width: Plot width in pixels
174
+ plot_height: Plot height in pixels
175
+ max_features_plot: Maximum features to plot (avoid clutter)
176
+ max_points_plot: Maximum points per plot (subsample if needed)
177
+ correlation_heatmap: Include correlation heatmap
178
+ time_series_plots: Include time series plots
179
+ distribution_plots: Include distribution plots (histograms, QQ)
180
+ scatter_plots: Include scatter plots (IC, etc.)
181
+ save_plots: Save plots as separate files
182
+ plot_format: Plot file format (png, svg, pdf)
183
+
184
+ Examples:
185
+ >>> # Default: All plots, moderate resolution
186
+ >>> config = VisualizationConfig()
187
+
188
+ >>> # Custom: High-res plots for publication
189
+ >>> config = VisualizationConfig(
190
+ ... plot_dpi=300,
191
+ ... plot_format="pdf",
192
+ ... save_plots=True
193
+ ... )
194
+ """
195
+
196
+ plot_dpi: PositiveInt = Field(100, description="DPI for static plots")
197
+ plot_width: PositiveInt = Field(800, description="Plot width in pixels")
198
+ plot_height: PositiveInt = Field(600, description="Plot height in pixels")
199
+ max_features_plot: PositiveInt = Field(50, description="Max features to plot (avoid clutter)")
200
+ max_points_plot: PositiveInt | None = Field(
201
+ 10000, description="Max points per plot (subsample if needed, None = no limit)"
202
+ )
203
+ correlation_heatmap: bool = Field(True, description="Include correlation heatmap")
204
+ time_series_plots: bool = Field(True, description="Include time series plots")
205
+ distribution_plots: bool = Field(True, description="Include distribution plots")
206
+ scatter_plots: bool = Field(True, description="Include scatter plots")
207
+ save_plots: bool = Field(False, description="Save plots as separate files")
208
+ plot_format: str = Field("png", description="Plot file format (png, svg, pdf)")
209
+
210
+ @field_validator("plot_format")
211
+ @classmethod
212
+ def check_plot_format(cls, v: str) -> str:
213
+ """Validate plot format."""
214
+ valid_formats = {"png", "svg", "pdf", "jpg", "jpeg"}
215
+ if v.lower() not in valid_formats:
216
+ raise ValueError(f"Invalid plot format: {v}. Valid: {valid_formats}")
217
+ return v.lower()
218
+
219
+
220
+ class JSONConfig(BaseConfig):
221
+ """Configuration for JSON output.
222
+
223
+ Attributes:
224
+ pretty_print: Pretty-print JSON (vs compact)
225
+ include_metadata: Include metadata (timestamp, config, versions)
226
+ export_dataframes: DataFrame serialization format
227
+ include_raw_data: Include raw data (features, returns) in output
228
+ indent: JSON indentation (if pretty_print=True)
229
+
230
+ Examples:
231
+ >>> # Default: Pretty JSON with metadata
232
+ >>> config = JSONConfig()
233
+
234
+ >>> # Custom: Compact JSON without raw data
235
+ >>> config = JSONConfig(
236
+ ... pretty_print=False,
237
+ ... include_raw_data=False
238
+ ... )
239
+ """
240
+
241
+ pretty_print: bool = Field(True, description="Pretty-print JSON (vs compact)")
242
+ include_metadata: bool = Field(
243
+ True, description="Include metadata (timestamp, config, versions)"
244
+ )
245
+ export_dataframes: DataFrameExportFormat = Field(
246
+ DataFrameExportFormat.RECORDS, description="DataFrame serialization format"
247
+ )
248
+ include_raw_data: bool = Field(
249
+ False, description="Include raw data (features, returns) in output"
250
+ )
251
+ indent: PositiveInt = Field(2, description="JSON indentation (if pretty_print=True)")
252
+
253
+
254
+ class ReportConfig(BaseConfig):
255
+ """Top-level configuration for reporting (Module E).
256
+
257
+ Orchestrates report generation:
258
+ - Output formats (HTML, JSON, PDF)
259
+ - HTML settings (templates, themes, tables)
260
+ - Visualization (plots, colors, interactivity)
261
+ - JSON structure
262
+
263
+ Attributes:
264
+ output_format: Output format configuration
265
+ html: HTML report configuration
266
+ visualization: Visualization configuration
267
+ json: JSON output configuration
268
+ lazy_rendering: Don't generate plots until accessed
269
+ cache_plots: Cache generated plots
270
+ parallel_plotting: Generate plots in parallel
271
+ n_jobs: Parallel jobs for plotting
272
+
273
+ Examples:
274
+ >>> # Quick start with defaults
275
+ >>> config = ReportConfig()
276
+ >>> reporter = Reporter(config)
277
+ >>> reporter.generate(results, output_name="my_strategy")
278
+
279
+ >>> # Load from YAML
280
+ >>> config = ReportConfig.from_yaml("report_config.yaml")
281
+
282
+ >>> # Custom configuration
283
+ >>> config = ReportConfig(
284
+ ... output_format=OutputFormatConfig(
285
+ ... formats=[ReportFormat.HTML, ReportFormat.PDF]
286
+ ... ),
287
+ ... html=HTMLConfig(
288
+ ... template=ReportTemplate.SUMMARY,
289
+ ... theme=ReportTheme.PROFESSIONAL
290
+ ... ),
291
+ ... visualization=VisualizationConfig(
292
+ ... plot_dpi=300,
293
+ ... save_plots=True
294
+ ... )
295
+ ... )
296
+ """
297
+
298
+ output_format: OutputFormatConfig = Field(
299
+ default_factory=OutputFormatConfig, description="Output format configuration"
300
+ )
301
+ html: HTMLConfig = Field(default_factory=HTMLConfig, description="HTML report configuration")
302
+ visualization: VisualizationConfig = Field(
303
+ default_factory=VisualizationConfig, description="Visualization configuration"
304
+ )
305
+ json_config: JSONConfig = Field(
306
+ default_factory=JSONConfig, description="JSON output configuration"
307
+ )
308
+
309
+ # Performance settings
310
+ lazy_rendering: bool = Field(
311
+ False, description="Don't generate plots until accessed (saves time)"
312
+ )
313
+ cache_plots: bool = Field(True, description="Cache generated plots")
314
+ parallel_plotting: bool = Field(False, description="Generate plots in parallel")
315
+ n_jobs: int = Field(-1, ge=-1, description="Parallel jobs for plotting (-1 = all cores)")
316
+
317
+ @classmethod
318
+ def for_quick_report(cls) -> ReportConfig:
319
+ """Preset for quick HTML-only report (minimal plots).
320
+
321
+ Returns:
322
+ Config optimized for speed
323
+ """
324
+ return cls(
325
+ output_format=OutputFormatConfig(formats=[ReportFormat.HTML]),
326
+ html=HTMLConfig(
327
+ template=ReportTemplate.SUMMARY,
328
+ interactive_plots=False, # Faster static plots
329
+ ),
330
+ visualization=VisualizationConfig(
331
+ correlation_heatmap=True,
332
+ time_series_plots=False,
333
+ distribution_plots=False,
334
+ scatter_plots=False,
335
+ ),
336
+ lazy_rendering=True,
337
+ )
338
+
339
+ @classmethod
340
+ def for_publication(cls) -> ReportConfig:
341
+ """Preset for publication-quality reports (high-res, all plots).
342
+
343
+ Returns:
344
+ Config optimized for publication
345
+ """
346
+ return cls(
347
+ output_format=OutputFormatConfig(
348
+ formats=[ReportFormat.HTML, ReportFormat.PDF],
349
+ compress=True,
350
+ ),
351
+ html=HTMLConfig(
352
+ template=ReportTemplate.FULL,
353
+ theme=ReportTheme.PROFESSIONAL,
354
+ table_format=TableFormat.STYLED,
355
+ ),
356
+ visualization=VisualizationConfig(
357
+ plot_dpi=300,
358
+ plot_format="pdf",
359
+ save_plots=True,
360
+ correlation_heatmap=True,
361
+ time_series_plots=True,
362
+ distribution_plots=True,
363
+ scatter_plots=True,
364
+ ),
365
+ json_config=JSONConfig(pretty_print=True, include_metadata=True),
366
+ cache_plots=True,
367
+ parallel_plotting=True,
368
+ )
369
+
370
+ @classmethod
371
+ def for_programmatic_access(cls) -> ReportConfig:
372
+ """Preset for programmatic access (JSON only, no plots).
373
+
374
+ Returns:
375
+ Config optimized for API/programmatic use
376
+ """
377
+ return cls(
378
+ output_format=OutputFormatConfig(formats=[ReportFormat.JSON]),
379
+ visualization=VisualizationConfig(
380
+ correlation_heatmap=False,
381
+ time_series_plots=False,
382
+ distribution_plots=False,
383
+ scatter_plots=False,
384
+ ),
385
+ json_config=JSONConfig(
386
+ pretty_print=False, # Compact for parsing
387
+ include_raw_data=True, # Include data for downstream processing
388
+ export_dataframes=DataFrameExportFormat.SPLIT, # Efficient format
389
+ ),
390
+ lazy_rendering=True,
391
+ )
@@ -0,0 +1,202 @@
1
+ """Statistical Testing Configuration.
2
+
3
+ This module provides configuration for advanced statistical testing:
4
+ - **PSR**: Probabilistic Sharpe Ratio (confidence in positive Sharpe)
5
+ - **MinTRL**: Minimum Track Record Length (required sample size)
6
+ - **DSR**: Deflated Sharpe Ratio (correction for multiple testing)
7
+ - **FDR**: False Discovery Rate control (family-wise error rate)
8
+
9
+ These methods address the critical problem of overfitting and false discoveries
10
+ in quantitative strategy research.
11
+
12
+ Consolidated Config:
13
+ - StatisticalConfig: Single config with all statistical test settings
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from pathlib import Path
19
+ from typing import Literal
20
+
21
+ from pydantic import Field, field_validator, model_validator
22
+
23
+ from ml4t.diagnostic.config.base import BaseConfig
24
+ from ml4t.diagnostic.config.validation import (
25
+ FDRMethod,
26
+ NonNegativeFloat,
27
+ PositiveFloat,
28
+ PositiveInt,
29
+ Probability,
30
+ )
31
+
32
+ # =============================================================================
33
+ # Settings Classes (Single-Level Nesting)
34
+ # =============================================================================
35
+
36
+
37
+ class PSRSettings(BaseConfig):
38
+ """Settings for Probabilistic Sharpe Ratio.
39
+
40
+ PSR computes the probability that the true Sharpe ratio exceeds a threshold,
41
+ accounting for higher moments (skewness, kurtosis) and estimation uncertainty.
42
+ """
43
+
44
+ enabled: bool = Field(True, description="Compute PSR")
45
+ confidence_level: Probability = Field(0.95, description="Confidence level")
46
+ target_sharpe: NonNegativeFloat = Field(0.0, description="Target SR to test against")
47
+ adjustment_factor: PositiveFloat | Literal["auto"] = Field(
48
+ "auto", description="Higher moment adjustment"
49
+ )
50
+ compute_for_thresholds: list[float] | None = Field(None, description="Multiple target values")
51
+
52
+
53
+ class MinTRLSettings(BaseConfig):
54
+ """Settings for Minimum Track Record Length.
55
+
56
+ MinTRL computes the minimum sample size required to be confident
57
+ that the true Sharpe ratio exceeds a target value.
58
+ """
59
+
60
+ enabled: bool = Field(True, description="Compute MinTRL")
61
+ confidence_level: Probability = Field(0.95, description="Confidence level")
62
+ target_sharpe: NonNegativeFloat = Field(0.0, description="Target SR to detect")
63
+ compute_for_thresholds: list[float] | None = Field(None, description="Multiple target values")
64
+
65
+
66
+ class DSRSettings(BaseConfig):
67
+ """Settings for Deflated Sharpe Ratio.
68
+
69
+ DSR corrects for multiple testing bias when evaluating many strategies.
70
+ """
71
+
72
+ enabled: bool = Field(True, description="Compute DSR")
73
+ n_trials: PositiveInt = Field(100, description="Number of strategies tested")
74
+ prob_zero_sharpe: Probability = Field(0.5, description="Prior probability SR=0")
75
+ variance_inflation: PositiveFloat = Field(1.0, description="Variance inflation factor")
76
+ expected_max_sharpe: float | Literal["auto"] = Field(
77
+ "auto", description="Expected max SR under null"
78
+ )
79
+
80
+ @field_validator("n_trials")
81
+ @classmethod
82
+ def check_n_trials(cls, v: int) -> int:
83
+ """Warn if n_trials is suspiciously low."""
84
+ if v < 10:
85
+ import warnings
86
+
87
+ warnings.warn(
88
+ f"n_trials={v} seems low. Include ALL strategies tested.",
89
+ stacklevel=2,
90
+ )
91
+ return v
92
+
93
+
94
+ class FDRSettings(BaseConfig):
95
+ """Settings for False Discovery Rate control.
96
+
97
+ FDR controls the expected proportion of false discoveries among all
98
+ rejected hypotheses.
99
+ """
100
+
101
+ enabled: bool = Field(True, description="Apply FDR control")
102
+ alpha: Probability = Field(0.05, description="Family-wise error rate")
103
+ method: FDRMethod = Field(FDRMethod.BENJAMINI_HOCHBERG, description="FDR method")
104
+ independent_tests: bool = Field(False, description="Are tests independent?")
105
+
106
+ @model_validator(mode="after")
107
+ def validate_method_independence(self) -> FDRSettings:
108
+ """Warn if using BH with correlated tests."""
109
+ if self.method == FDRMethod.BENJAMINI_HOCHBERG and not self.independent_tests:
110
+ import warnings
111
+
112
+ warnings.warn(
113
+ "Benjamini-Hochberg assumes independence. Consider BY method.",
114
+ stacklevel=2,
115
+ )
116
+ return self
117
+
118
+
119
+ # =============================================================================
120
+ # Consolidated Config
121
+ # =============================================================================
122
+
123
+
124
+ class StatisticalConfig(BaseConfig):
125
+ """Consolidated configuration for statistical testing.
126
+
127
+ Orchestrates advanced Sharpe ratio analysis with multiple testing correction.
128
+
129
+ Examples
130
+ --------
131
+ >>> config = StatisticalConfig(
132
+ ... psr=PSRSettings(target_sharpe=1.0),
133
+ ... dsr=DSRSettings(n_trials=500),
134
+ ... )
135
+ >>> # Or use presets
136
+ >>> config = StatisticalConfig.for_research()
137
+ """
138
+
139
+ psr: PSRSettings = Field(default_factory=PSRSettings, description="PSR settings")
140
+ mintrl: MinTRLSettings = Field(default_factory=MinTRLSettings, description="MinTRL settings")
141
+ dsr: DSRSettings = Field(default_factory=DSRSettings, description="DSR settings")
142
+ fdr: FDRSettings = Field(default_factory=FDRSettings, description="FDR settings")
143
+
144
+ # Output settings
145
+ return_dataframes: bool = Field(True, description="Return as DataFrames")
146
+ cache_enabled: bool = Field(True, description="Enable caching")
147
+ cache_dir: Path = Field(
148
+ default_factory=lambda: Path.home() / ".cache" / "ml4t-diagnostic" / "sharpe",
149
+ description="Cache directory",
150
+ )
151
+ verbose: bool = Field(False, description="Verbose output")
152
+
153
+ @classmethod
154
+ def for_quick_check(cls) -> StatisticalConfig:
155
+ """Preset for quick overfitting check (PSR + DSR only)."""
156
+ return cls(
157
+ psr=PSRSettings(compute_for_thresholds=None),
158
+ mintrl=MinTRLSettings(enabled=False),
159
+ dsr=DSRSettings(n_trials=100),
160
+ fdr=FDRSettings(enabled=False),
161
+ )
162
+
163
+ @classmethod
164
+ def for_research(cls) -> StatisticalConfig:
165
+ """Preset for academic research (comprehensive analysis)."""
166
+ return cls(
167
+ psr=PSRSettings(
168
+ compute_for_thresholds=[0.0, 0.5, 1.0, 1.5, 2.0],
169
+ confidence_level=0.99,
170
+ ),
171
+ mintrl=MinTRLSettings(compute_for_thresholds=[0.0, 0.5, 1.0]),
172
+ dsr=DSRSettings(n_trials=500, prob_zero_sharpe=0.5),
173
+ fdr=FDRSettings(
174
+ method=FDRMethod.BENJAMINI_YEKUTIELI,
175
+ alpha=0.05,
176
+ ),
177
+ )
178
+
179
+ @classmethod
180
+ def for_publication(cls) -> StatisticalConfig:
181
+ """Preset for academic publication (very conservative)."""
182
+ return cls(
183
+ psr=PSRSettings(confidence_level=0.99, target_sharpe=0.5),
184
+ mintrl=MinTRLSettings(confidence_level=0.99, target_sharpe=0.5),
185
+ dsr=DSRSettings(
186
+ n_trials=1000,
187
+ prob_zero_sharpe=0.8,
188
+ variance_inflation=1.5,
189
+ ),
190
+ fdr=FDRSettings(
191
+ method=FDRMethod.BONFERRONI,
192
+ alpha=0.01,
193
+ ),
194
+ )
195
+
196
+
197
+ # Rebuild models
198
+ PSRSettings.model_rebuild()
199
+ MinTRLSettings.model_rebuild()
200
+ DSRSettings.model_rebuild()
201
+ FDRSettings.model_rebuild()
202
+ StatisticalConfig.model_rebuild()