ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,299 @@
1
+ """Time series validation utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import SupportsFloat, SupportsInt, cast
6
+
7
+ import polars as pl
8
+
9
+ from ml4t.diagnostic.validation.dataframe import ValidationError
10
+
11
+
12
+ class TimeSeriesValidator:
13
+ """Validator for time series DataFrames.
14
+
15
+ Examples:
16
+ >>> validator = TimeSeriesValidator(df, index_col="date")
17
+ >>> validator.check_sorted()
18
+ >>> validator.check_duplicates()
19
+ >>> validator.check_frequency()
20
+ """
21
+
22
+ def __init__(self, df: pl.DataFrame, index_col: str = "date"):
23
+ """Initialize validator.
24
+
25
+ Args:
26
+ df: DataFrame to validate
27
+ index_col: Name of index/date column
28
+ """
29
+ self.df = df
30
+ self.index_col = index_col
31
+
32
+ def check_index_exists(self) -> TimeSeriesValidator:
33
+ """Check that index column exists.
34
+
35
+ Returns:
36
+ Self for chaining
37
+
38
+ Raises:
39
+ ValidationError: If index column missing
40
+ """
41
+ if self.index_col not in self.df.columns:
42
+ raise ValidationError(
43
+ f"Index column '{self.index_col}' not found",
44
+ context={"available_columns": self.df.columns},
45
+ )
46
+
47
+ return self
48
+
49
+ def check_index_type(self) -> TimeSeriesValidator:
50
+ """Check that index is datetime/date type.
51
+
52
+ Returns:
53
+ Self for chaining
54
+
55
+ Raises:
56
+ ValidationError: If index not datetime/date
57
+ """
58
+ self.check_index_exists()
59
+
60
+ dtype = self.df[self.index_col].dtype
61
+
62
+ if not (dtype.is_temporal() or str(dtype) in ["Date", "Datetime", "Time"]):
63
+ raise ValidationError(
64
+ "Index column must be temporal type",
65
+ context={
66
+ "column": self.index_col,
67
+ "actual_type": str(dtype),
68
+ "expected": "Date, Datetime, or Time",
69
+ },
70
+ )
71
+
72
+ return self
73
+
74
+ def check_sorted(self, ascending: bool = True) -> TimeSeriesValidator:
75
+ """Check that time series is sorted.
76
+
77
+ Args:
78
+ ascending: Whether series should be ascending
79
+
80
+ Returns:
81
+ Self for chaining
82
+
83
+ Raises:
84
+ ValidationError: If not sorted
85
+ """
86
+ self.check_index_exists()
87
+
88
+ index = self.df[self.index_col]
89
+
90
+ # Check if sorted using is_sorted() method
91
+ is_sorted = index.is_sorted() if ascending else index.is_sorted(descending=True)
92
+
93
+ if not is_sorted:
94
+ direction = "ascending" if ascending else "descending"
95
+ raise ValidationError(
96
+ f"Time series not sorted in {direction} order",
97
+ context={"index_column": self.index_col},
98
+ )
99
+
100
+ return self
101
+
102
+ def check_duplicates(self) -> TimeSeriesValidator:
103
+ """Check for duplicate timestamps.
104
+
105
+ Returns:
106
+ Self for chaining
107
+
108
+ Raises:
109
+ ValidationError: If duplicates found
110
+ """
111
+ self.check_index_exists()
112
+
113
+ duplicates = self.df[self.index_col].is_duplicated().sum()
114
+
115
+ if duplicates > 0:
116
+ # Get some example duplicates
117
+ dup_values = (
118
+ self.df.filter(pl.col(self.index_col).is_duplicated())
119
+ .select(self.index_col)
120
+ .unique()
121
+ .head(5)
122
+ .to_series()
123
+ .to_list()
124
+ )
125
+
126
+ raise ValidationError(
127
+ f"Found {duplicates} duplicate timestamps",
128
+ context={
129
+ "index_column": self.index_col,
130
+ "duplicate_count": duplicates,
131
+ "examples": dup_values,
132
+ },
133
+ )
134
+
135
+ return self
136
+
137
+ def check_gaps(self, max_gap_days: int | None = None) -> TimeSeriesValidator:
138
+ """Check for large gaps in time series.
139
+
140
+ Args:
141
+ max_gap_days: Maximum allowed gap in days (None = no check)
142
+
143
+ Returns:
144
+ Self for chaining
145
+
146
+ Raises:
147
+ ValidationError: If gaps exceed threshold
148
+ """
149
+ if max_gap_days is None:
150
+ return self
151
+
152
+ self.check_index_exists()
153
+
154
+ # Calculate gaps
155
+ gaps = self.df[self.index_col].diff().drop_nulls()
156
+
157
+ if len(gaps) == 0:
158
+ return self
159
+
160
+ # Check if any gap exceeds threshold
161
+ max_gap_raw = gaps.max()
162
+
163
+ # Convert to days if datetime (cast to handle Polars scalar types)
164
+ from datetime import timedelta
165
+
166
+ if isinstance(max_gap_raw, timedelta):
167
+ max_gap_days_actual = max_gap_raw.days
168
+ else:
169
+ # Assume already in days (Polars scalar type)
170
+ max_gap_days_actual = int(cast(SupportsInt, max_gap_raw))
171
+
172
+ if max_gap_days_actual > max_gap_days:
173
+ raise ValidationError(
174
+ f"Time series has gap of {max_gap_days_actual} days",
175
+ context={
176
+ "max_allowed": max_gap_days,
177
+ "max_gap": max_gap_days_actual,
178
+ },
179
+ )
180
+
181
+ return self
182
+
183
+
184
+ def validate_timeseries(
185
+ df: pl.DataFrame,
186
+ index_col: str = "date",
187
+ require_sorted: bool = True,
188
+ check_duplicates: bool = True,
189
+ max_gap_days: int | None = None,
190
+ ) -> None:
191
+ """Validate time series DataFrame.
192
+
193
+ Args:
194
+ df: DataFrame to validate
195
+ index_col: Name of index/date column
196
+ require_sorted: Whether series must be sorted
197
+ check_duplicates: Whether to check for duplicate timestamps
198
+ max_gap_days: Maximum allowed gap in days (None = no check)
199
+
200
+ Raises:
201
+ ValidationError: If validation fails
202
+
203
+ Examples:
204
+ >>> validate_timeseries(
205
+ ... df,
206
+ ... index_col="date",
207
+ ... require_sorted=True,
208
+ ... check_duplicates=True,
209
+ ... max_gap_days=7
210
+ ... )
211
+ """
212
+ validator = TimeSeriesValidator(df, index_col)
213
+
214
+ validator.check_index_exists().check_index_type()
215
+
216
+ if require_sorted:
217
+ validator.check_sorted()
218
+
219
+ if check_duplicates:
220
+ validator.check_duplicates()
221
+
222
+ if max_gap_days is not None:
223
+ validator.check_gaps(max_gap_days)
224
+
225
+
226
+ def validate_index(df: pl.DataFrame, index_col: str = "date") -> None:
227
+ """Validate time series index column.
228
+
229
+ Args:
230
+ df: DataFrame to validate
231
+ index_col: Name of index column
232
+
233
+ Raises:
234
+ ValidationError: If index invalid
235
+ """
236
+ validator = TimeSeriesValidator(df, index_col)
237
+ validator.check_index_exists().check_index_type()
238
+
239
+
240
+ def validate_frequency(
241
+ df: pl.DataFrame,
242
+ index_col: str = "date",
243
+ expected_freq: str | None = None,
244
+ ) -> None:
245
+ """Validate time series frequency.
246
+
247
+ Args:
248
+ df: DataFrame to validate
249
+ index_col: Name of index column
250
+ expected_freq: Expected frequency ("daily", "weekly", "monthly")
251
+
252
+ Raises:
253
+ ValidationError: If frequency doesn't match
254
+
255
+ Note:
256
+ Basic implementation checks consistent spacing.
257
+ Full frequency detection would require more sophisticated logic.
258
+ """
259
+ validator = TimeSeriesValidator(df, index_col)
260
+ validator.check_index_exists().check_sorted()
261
+
262
+ if expected_freq is not None:
263
+ # Basic frequency validation - check consistent spacing
264
+ gaps = df[index_col].diff().drop_nulls()
265
+
266
+ if len(gaps) == 0:
267
+ return
268
+
269
+ # Check if gaps are consistent (within tolerance)
270
+ # Convert Duration to microseconds for numeric comparison
271
+ from datetime import timedelta
272
+
273
+ gaps_us = gaps.dt.total_microseconds()
274
+ median_gap_us = gaps_us.median()
275
+ max_deviation_us = (gaps_us - median_gap_us).abs().max()
276
+
277
+ # Handle None cases (shouldn't happen with valid data)
278
+ if median_gap_us is None or max_deviation_us is None:
279
+ return
280
+
281
+ # Cast to float for arithmetic
282
+ median_gap = float(cast(SupportsFloat, median_gap_us))
283
+ max_deviation = float(cast(SupportsFloat, max_deviation_us))
284
+
285
+ # Allow 20% deviation
286
+ tolerance = median_gap * 0.2
287
+
288
+ if max_deviation > tolerance:
289
+ # Convert back to timedelta for human-readable output
290
+ median_td = timedelta(microseconds=median_gap)
291
+ max_dev_td = timedelta(microseconds=max_deviation)
292
+ raise ValidationError(
293
+ f"Inconsistent {expected_freq} frequency detected",
294
+ context={
295
+ "expected": expected_freq,
296
+ "median_gap": str(median_td),
297
+ "max_deviation": str(max_dev_td),
298
+ },
299
+ )
@@ -0,0 +1,19 @@
1
+ # visualization/ - Plotly Charts
2
+
3
+ ## Modules
4
+
5
+ | File | Lines | Purpose |
6
+ |------|-------|---------|
7
+ | core.py | 1060 | Theme, base utilities |
8
+ | barrier_plots.py | 782 | Barrier analysis charts |
9
+ | feature_plots.py | 888 | Importance charts |
10
+ | interaction_plots.py | 618 | Interaction visualization |
11
+ | report_generation.py | 1343 | HTML/PDF export |
12
+
13
+ ## Subdirectories
14
+
15
+ dashboards/, data_extraction/, backtest/, signal/, portfolio/
16
+
17
+ ## Key Functions
18
+
19
+ `plot_hit_rate_heatmap()`, `plot_importance_bar()`, `plot_interaction_heatmap()`, `generate_importance_report()`, `combine_figures_to_html()`
@@ -0,0 +1,223 @@
1
+ """ML4T Diagnostic Visualization Module.
2
+
3
+ Provides interactive Plotly-based visualizations for all analysis results.
4
+
5
+ This module implements the four-tier visualization architecture:
6
+ - Layer 1: Analysis (compute_*, analyze_*) - Returns dicts
7
+ - Layer 2: Visualization (plot_*) - Returns Plotly Figures
8
+ - Layer 3: Reporting (generate_*_report) - HTML/PDF outputs
9
+ - Layer 4: Dashboard (Streamlit) - Separate package
10
+
11
+ All plot functions follow consistent patterns:
12
+ - Accept results dict from analyze_*() functions
13
+ - Return go.Figure objects
14
+ - Support theming and customization
15
+ - Interactive by default
16
+
17
+ Examples
18
+ --------
19
+ >>> from ml4t-diagnostic import analyze_ml_importance
20
+ >>> from ml4t.diagnostic.visualization import plot_importance_bar
21
+ >>>
22
+ >>> # Analyze
23
+ >>> results = analyze_ml_importance(model, X, y)
24
+ >>>
25
+ >>> # Visualize
26
+ >>> fig = plot_importance_bar(results)
27
+ >>> fig.show()
28
+ >>>
29
+ >>> # Or save
30
+ >>> fig.write_html("importance.html")
31
+ """
32
+
33
+ from ml4t.diagnostic.visualization.barrier_plots import (
34
+ # Barrier analysis plots (Phase 4)
35
+ plot_hit_rate_heatmap,
36
+ plot_precision_recall_curve,
37
+ plot_profit_factor_bar,
38
+ plot_time_to_target_box,
39
+ )
40
+ from ml4t.diagnostic.visualization.core import (
41
+ # Common plot elements
42
+ add_annotation,
43
+ add_confidence_band,
44
+ add_threshold_line,
45
+ apply_responsive_layout,
46
+ # Layout helpers
47
+ create_base_figure,
48
+ # Color schemes
49
+ get_color_scheme,
50
+ get_colorscale,
51
+ get_plot_theme,
52
+ get_theme_config,
53
+ # Theme management
54
+ set_plot_theme,
55
+ # Validation
56
+ validate_plot_results,
57
+ validate_positive_int,
58
+ validate_theme,
59
+ )
60
+ from ml4t.diagnostic.visualization.dashboards import (
61
+ # Dashboard base classes
62
+ BaseDashboard,
63
+ DashboardSection,
64
+ # Interactive dashboards
65
+ FeatureImportanceDashboard,
66
+ FeatureInteractionDashboard,
67
+ )
68
+ from ml4t.diagnostic.visualization.data_extraction import (
69
+ # TypedDict structures
70
+ ImportanceVizData,
71
+ InteractionVizData,
72
+ # Data extraction functions
73
+ extract_importance_viz_data,
74
+ extract_interaction_viz_data,
75
+ )
76
+ from ml4t.diagnostic.visualization.feature_plots import (
77
+ # Feature importance visualizations
78
+ plot_importance_bar,
79
+ plot_importance_distribution,
80
+ plot_importance_heatmap,
81
+ plot_importance_summary,
82
+ )
83
+ from ml4t.diagnostic.visualization.interaction_plots import (
84
+ # Feature interaction visualizations
85
+ plot_interaction_bar,
86
+ plot_interaction_heatmap,
87
+ plot_interaction_network,
88
+ )
89
+ from ml4t.diagnostic.visualization.portfolio import (
90
+ create_portfolio_dashboard,
91
+ plot_annual_returns_bar,
92
+ plot_drawdown_periods,
93
+ plot_drawdown_underwater,
94
+ plot_monthly_returns_heatmap,
95
+ plot_returns_distribution,
96
+ plot_rolling_beta,
97
+ plot_rolling_sharpe,
98
+ plot_rolling_volatility,
99
+ )
100
+ from ml4t.diagnostic.visualization.portfolio import (
101
+ # Portfolio tear sheet (pyfolio replacement)
102
+ plot_cumulative_returns as plot_portfolio_cumulative_returns,
103
+ )
104
+ from ml4t.diagnostic.visualization.portfolio import (
105
+ plot_rolling_returns as plot_portfolio_rolling_returns,
106
+ )
107
+ from ml4t.diagnostic.visualization.report_generation import (
108
+ combine_figures_to_html,
109
+ # PDF export
110
+ export_figures_to_pdf,
111
+ generate_combined_report,
112
+ # HTML report generation
113
+ generate_importance_report,
114
+ generate_interaction_report,
115
+ )
116
+ from ml4t.diagnostic.visualization.signal import (
117
+ MultiSignalDashboard,
118
+ # Dashboards
119
+ SignalDashboard,
120
+ # Turnover plots
121
+ plot_autocorrelation,
122
+ # Quantile plots
123
+ plot_cumulative_returns,
124
+ # IC plots
125
+ plot_ic_heatmap,
126
+ plot_ic_histogram,
127
+ plot_ic_qq,
128
+ # Multi-signal plots (Phase 3)
129
+ plot_ic_ridge,
130
+ plot_ic_ts,
131
+ plot_pareto_frontier,
132
+ plot_quantile_returns_bar,
133
+ plot_quantile_returns_violin,
134
+ plot_signal_correlation_heatmap,
135
+ plot_signal_ranking_bar,
136
+ plot_spread_timeseries,
137
+ plot_top_bottom_turnover,
138
+ )
139
+
140
+ __all__ = [
141
+ # Theme management
142
+ "set_plot_theme",
143
+ "get_plot_theme",
144
+ "get_theme_config",
145
+ # Color schemes
146
+ "get_color_scheme",
147
+ "get_colorscale",
148
+ # Validation
149
+ "validate_plot_results",
150
+ "validate_positive_int",
151
+ "validate_theme",
152
+ # Layout helpers
153
+ "create_base_figure",
154
+ "apply_responsive_layout",
155
+ # Common plot elements
156
+ "add_annotation",
157
+ "add_threshold_line",
158
+ "add_confidence_band",
159
+ # Feature importance plots
160
+ "plot_importance_bar",
161
+ "plot_importance_heatmap",
162
+ "plot_importance_distribution",
163
+ "plot_importance_summary",
164
+ # Feature interaction plots
165
+ "plot_interaction_bar",
166
+ "plot_interaction_heatmap",
167
+ "plot_interaction_network",
168
+ # HTML report generation
169
+ "generate_importance_report",
170
+ "generate_interaction_report",
171
+ "generate_combined_report",
172
+ "combine_figures_to_html",
173
+ # PDF export
174
+ "export_figures_to_pdf",
175
+ # Data extraction
176
+ "extract_importance_viz_data",
177
+ "extract_interaction_viz_data",
178
+ "ImportanceVizData",
179
+ "InteractionVizData",
180
+ # Dashboard components
181
+ "BaseDashboard",
182
+ "DashboardSection",
183
+ "FeatureImportanceDashboard",
184
+ "FeatureInteractionDashboard",
185
+ # Signal IC plots
186
+ "plot_ic_ts",
187
+ "plot_ic_histogram",
188
+ "plot_ic_qq",
189
+ "plot_ic_heatmap",
190
+ # Signal quantile plots
191
+ "plot_quantile_returns_bar",
192
+ "plot_quantile_returns_violin",
193
+ "plot_cumulative_returns",
194
+ "plot_spread_timeseries",
195
+ # Signal turnover plots
196
+ "plot_top_bottom_turnover",
197
+ "plot_autocorrelation",
198
+ # Multi-signal plots (Phase 3)
199
+ "plot_ic_ridge",
200
+ "plot_signal_ranking_bar",
201
+ "plot_signal_correlation_heatmap",
202
+ "plot_pareto_frontier",
203
+ # Signal dashboards
204
+ "SignalDashboard",
205
+ "MultiSignalDashboard",
206
+ # Barrier analysis plots (Phase 4)
207
+ "plot_hit_rate_heatmap",
208
+ "plot_profit_factor_bar",
209
+ "plot_precision_recall_curve",
210
+ "plot_time_to_target_box",
211
+ # Portfolio tear sheet (pyfolio replacement)
212
+ "plot_portfolio_cumulative_returns",
213
+ "plot_portfolio_rolling_returns",
214
+ "plot_annual_returns_bar",
215
+ "plot_monthly_returns_heatmap",
216
+ "plot_returns_distribution",
217
+ "plot_rolling_volatility",
218
+ "plot_rolling_sharpe",
219
+ "plot_rolling_beta",
220
+ "plot_drawdown_underwater",
221
+ "plot_drawdown_periods",
222
+ "create_portfolio_dashboard",
223
+ ]
@@ -0,0 +1,98 @@
1
+ """Backtest visualization module.
2
+
3
+ Plotly-based interactive visualizations for backtest analysis.
4
+ State-of-the-art tearsheet generation exceeding QuantStats.
5
+
6
+ This module provides:
7
+ - Executive summary with KPI cards and traffic lights
8
+ - Trade-level visualizations (MFE/MAE, exit reasons, waterfall)
9
+ - Cost attribution analysis (gross-to-net decomposition)
10
+ - Statistical validity displays (DSR gauge, confidence intervals)
11
+ - Unified tearsheet generation with template system
12
+ """
13
+
14
+ from .cost_attribution import (
15
+ plot_cost_by_asset,
16
+ plot_cost_over_time,
17
+ plot_cost_pie,
18
+ plot_cost_sensitivity,
19
+ plot_cost_waterfall,
20
+ )
21
+ from .executive_summary import (
22
+ create_executive_summary,
23
+ create_key_insights,
24
+ create_metric_card,
25
+ get_traffic_light_color,
26
+ )
27
+ from .interactive_controls import (
28
+ get_date_range_html,
29
+ get_drill_down_modal_html,
30
+ get_interactive_toolbar_html,
31
+ get_metric_filter_html,
32
+ get_section_navigation_html,
33
+ get_theme_switcher_html,
34
+ )
35
+ from .statistical_validity import (
36
+ plot_confidence_intervals,
37
+ plot_dsr_gauge,
38
+ plot_minimum_track_record,
39
+ plot_ras_analysis,
40
+ plot_statistical_summary_card,
41
+ )
42
+ from .tearsheet import (
43
+ BacktestTearsheet,
44
+ generate_backtest_tearsheet,
45
+ )
46
+ from .template_system import (
47
+ TearsheetSection,
48
+ TearsheetTemplate,
49
+ get_template,
50
+ )
51
+ from .trade_plots import (
52
+ plot_consecutive_analysis,
53
+ plot_exit_reason_breakdown,
54
+ plot_mfe_mae_scatter,
55
+ plot_trade_duration_distribution,
56
+ plot_trade_size_vs_return,
57
+ plot_trade_waterfall,
58
+ )
59
+
60
+ __all__ = [
61
+ # Executive Summary
62
+ "create_executive_summary",
63
+ "create_key_insights",
64
+ "create_metric_card",
65
+ "get_traffic_light_color",
66
+ # Trade Plots (Phase 2)
67
+ "plot_mfe_mae_scatter",
68
+ "plot_exit_reason_breakdown",
69
+ "plot_trade_waterfall",
70
+ "plot_trade_duration_distribution",
71
+ "plot_trade_size_vs_return",
72
+ "plot_consecutive_analysis",
73
+ # Cost Attribution (Phase 3)
74
+ "plot_cost_waterfall",
75
+ "plot_cost_sensitivity",
76
+ "plot_cost_over_time",
77
+ "plot_cost_by_asset",
78
+ "plot_cost_pie",
79
+ # Statistical Validity (Phase 4)
80
+ "plot_dsr_gauge",
81
+ "plot_confidence_intervals",
82
+ "plot_ras_analysis",
83
+ "plot_minimum_track_record",
84
+ "plot_statistical_summary_card",
85
+ # Unified Tearsheet (Phase 5)
86
+ "generate_backtest_tearsheet",
87
+ "BacktestTearsheet",
88
+ "get_template",
89
+ "TearsheetTemplate",
90
+ "TearsheetSection",
91
+ # Interactive Controls (Phase 6)
92
+ "get_date_range_html",
93
+ "get_metric_filter_html",
94
+ "get_section_navigation_html",
95
+ "get_drill_down_modal_html",
96
+ "get_interactive_toolbar_html",
97
+ "get_theme_switcher_html",
98
+ ]