ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,304 @@
1
+ """Dashboard data normalization.
2
+
3
+ Converts various input formats (dict, TradeShapResult) into the unified
4
+ DashboardBundle for consumption by all dashboard tabs.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+ from ml4t.diagnostic.evaluation.trade_dashboard.io import coerce_result_to_dict
16
+ from ml4t.diagnostic.evaluation.trade_dashboard.types import DashboardBundle, DashboardConfig
17
+
18
+ if TYPE_CHECKING:
19
+ from ml4t.diagnostic.evaluation.trade_shap.models import TradeShapResult
20
+
21
+
22
+ def normalize_result(
23
+ result: TradeShapResult | dict[str, Any],
24
+ config: DashboardConfig | None = None,
25
+ ) -> DashboardBundle:
26
+ """Normalize result into a DashboardBundle.
27
+
28
+ This is the single point of schema adaptation. All tabs receive the
29
+ normalized DashboardBundle and don't need to handle dict/object branching.
30
+
31
+ Parameters
32
+ ----------
33
+ result : TradeShapResult or dict
34
+ Analysis result in either format.
35
+ config : DashboardConfig, optional
36
+ Dashboard configuration.
37
+
38
+ Returns
39
+ -------
40
+ DashboardBundle
41
+ Normalized data container with:
42
+ - trades_df sorted chronologically
43
+ - returns array (prefers return_pct, falls back to pnl)
44
+ - normalized explanations and patterns
45
+ """
46
+ if config is None:
47
+ config = DashboardConfig()
48
+
49
+ # Convert to dict if needed
50
+ result_dict = coerce_result_to_dict(result)
51
+
52
+ # Extract and normalize explanations
53
+ explanations = result_dict.get("explanations", [])
54
+ normalized_explanations = [_normalize_explanation(exp) for exp in explanations]
55
+
56
+ # Build trades DataFrame
57
+ trades_df = _build_trades_df(normalized_explanations)
58
+
59
+ # Sort chronologically for time-series tests
60
+ if "entry_time" in trades_df.columns and not trades_df["entry_time"].isna().all():
61
+ trades_df = trades_df.sort_values("entry_time", ascending=True).reset_index(drop=True)
62
+
63
+ # Extract returns (prefer return_pct, fall back to pnl)
64
+ returns, returns_label = _extract_returns(trades_df)
65
+
66
+ # Build patterns DataFrame
67
+ patterns = result_dict.get("error_patterns", [])
68
+ patterns_df = _build_patterns_df(patterns)
69
+
70
+ # Extract metadata
71
+ n_analyzed = result_dict.get("n_trades_analyzed", len(explanations))
72
+ n_explained = result_dict.get("n_trades_explained", len(explanations))
73
+ n_failed = result_dict.get("n_trades_failed", 0)
74
+ failed_trades = result_dict.get("failed_trades", [])
75
+
76
+ return DashboardBundle(
77
+ trades_df=trades_df,
78
+ returns=returns,
79
+ returns_label=returns_label,
80
+ explanations=normalized_explanations,
81
+ patterns_df=patterns_df,
82
+ n_trades_analyzed=n_analyzed,
83
+ n_trades_explained=n_explained,
84
+ n_trades_failed=n_failed,
85
+ failed_trades=failed_trades,
86
+ config=config,
87
+ )
88
+
89
+
90
+ def _normalize_explanation(exp: dict[str, Any]) -> dict[str, Any]:
91
+ """Normalize a single explanation to stable format.
92
+
93
+ Returns dict with stable keys:
94
+ - trade_id: str
95
+ - timestamp: datetime | None
96
+ - shap_vector: list[float]
97
+ - top_features: list[tuple[str, float]]
98
+ - trade_metrics: dict | None
99
+ """
100
+ result: dict[str, Any] = {
101
+ "trade_id": str(exp.get("trade_id", "")),
102
+ "timestamp": _parse_timestamp(exp.get("timestamp")),
103
+ "shap_vector": list(exp.get("shap_vector", [])),
104
+ "top_features": list(exp.get("top_features", [])),
105
+ "trade_metrics": None,
106
+ }
107
+
108
+ # Normalize trade_metrics if present
109
+ if exp.get("trade_metrics"):
110
+ tm = exp["trade_metrics"]
111
+ result["trade_metrics"] = {
112
+ "pnl": _safe_float(tm.get("pnl")),
113
+ "return_pct": _safe_float(tm.get("return_pct")),
114
+ "entry_time": _parse_timestamp(tm.get("entry_time")),
115
+ "exit_time": _parse_timestamp(tm.get("exit_time")),
116
+ "duration_days": _safe_float(tm.get("duration_days")),
117
+ "entry_price": _safe_float(tm.get("entry_price")),
118
+ "exit_price": _safe_float(tm.get("exit_price")),
119
+ "symbol": tm.get("symbol"),
120
+ }
121
+
122
+ return result
123
+
124
+
125
+ def _build_trades_df(explanations: list[dict[str, Any]]) -> pd.DataFrame:
126
+ """Build trades DataFrame from normalized explanations.
127
+
128
+ Returns DataFrame with columns:
129
+ - trade_id: str
130
+ - entry_time: datetime
131
+ - exit_time: datetime (optional)
132
+ - pnl: float
133
+ - return_pct: float (optional)
134
+ - symbol: str (optional)
135
+ - top_feature: str
136
+ - top_shap_value: float
137
+ """
138
+ records = []
139
+
140
+ for exp in explanations:
141
+ tm = exp.get("trade_metrics") or {}
142
+ top_features = exp.get("top_features", [])
143
+
144
+ record = {
145
+ "trade_id": exp.get("trade_id", ""),
146
+ "entry_time": tm.get("entry_time") or exp.get("timestamp"),
147
+ "exit_time": tm.get("exit_time"),
148
+ "pnl": tm.get("pnl"),
149
+ "return_pct": tm.get("return_pct"),
150
+ "duration_days": tm.get("duration_days"),
151
+ "entry_price": tm.get("entry_price"),
152
+ "exit_price": tm.get("exit_price"),
153
+ "symbol": tm.get("symbol"),
154
+ "top_feature": top_features[0][0] if top_features else None,
155
+ "top_shap_value": top_features[0][1] if top_features else None,
156
+ }
157
+ records.append(record)
158
+
159
+ if not records:
160
+ # Return empty DataFrame with expected columns
161
+ return pd.DataFrame(
162
+ columns=[
163
+ "trade_id",
164
+ "entry_time",
165
+ "exit_time",
166
+ "pnl",
167
+ "return_pct",
168
+ "duration_days",
169
+ "entry_price",
170
+ "exit_price",
171
+ "symbol",
172
+ "top_feature",
173
+ "top_shap_value",
174
+ ]
175
+ )
176
+
177
+ return pd.DataFrame(records)
178
+
179
+
180
+ def _extract_returns(trades_df: pd.DataFrame) -> tuple[np.ndarray | None, str]:
181
+ """Extract returns array from trades DataFrame.
182
+
183
+ Prefers return_pct if available, falls back to pnl.
184
+
185
+ Returns
186
+ -------
187
+ tuple[np.ndarray | None, str]
188
+ Returns array and label ("return_pct", "pnl", or "none").
189
+ """
190
+ if trades_df.empty:
191
+ return None, "none"
192
+
193
+ # Prefer return_pct (normalized returns)
194
+ if "return_pct" in trades_df.columns:
195
+ return_pct = trades_df["return_pct"].dropna()
196
+ if len(return_pct) > 0:
197
+ return return_pct.to_numpy(dtype=float), "return_pct"
198
+
199
+ # Fall back to pnl (dollar amounts)
200
+ if "pnl" in trades_df.columns:
201
+ pnl = trades_df["pnl"].dropna()
202
+ if len(pnl) > 0:
203
+ return pnl.to_numpy(dtype=float), "pnl"
204
+
205
+ return None, "none"
206
+
207
+
208
+ def _build_patterns_df(patterns: list[dict[str, Any] | Any]) -> pd.DataFrame:
209
+ """Build patterns DataFrame from pattern list.
210
+
211
+ Returns DataFrame with columns:
212
+ - cluster_id: int
213
+ - n_trades: int
214
+ - description: str
215
+ - top_features: list[tuple]
216
+ - hypothesis: str (optional)
217
+ - actions: list[str] (optional)
218
+ - confidence: float (optional)
219
+ - separation_score: float (optional)
220
+ - distinctiveness: float (optional)
221
+ """
222
+ records = []
223
+
224
+ for pattern in patterns:
225
+ if isinstance(pattern, dict):
226
+ record = {
227
+ "cluster_id": pattern.get("cluster_id", 0),
228
+ "n_trades": pattern.get("n_trades", 0),
229
+ "description": pattern.get("description", ""),
230
+ "top_features": pattern.get("top_features", []),
231
+ "separation_score": pattern.get("separation_score"),
232
+ "distinctiveness": pattern.get("distinctiveness"),
233
+ "hypothesis": pattern.get("hypothesis"),
234
+ "actions": pattern.get("actions", []),
235
+ "confidence": pattern.get("confidence"),
236
+ }
237
+ else:
238
+ record = {
239
+ "cluster_id": getattr(pattern, "cluster_id", 0),
240
+ "n_trades": getattr(pattern, "n_trades", 0),
241
+ "description": getattr(pattern, "description", ""),
242
+ "top_features": list(getattr(pattern, "top_features", [])),
243
+ "separation_score": getattr(pattern, "separation_score", None),
244
+ "distinctiveness": getattr(pattern, "distinctiveness", None),
245
+ "hypothesis": getattr(pattern, "hypothesis", None),
246
+ "actions": list(getattr(pattern, "actions", []) or []),
247
+ "confidence": getattr(pattern, "confidence", None),
248
+ }
249
+ records.append(record)
250
+
251
+ if not records:
252
+ return pd.DataFrame(
253
+ columns=[
254
+ "cluster_id",
255
+ "n_trades",
256
+ "description",
257
+ "top_features",
258
+ "separation_score",
259
+ "distinctiveness",
260
+ "hypothesis",
261
+ "actions",
262
+ "confidence",
263
+ ]
264
+ )
265
+
266
+ return pd.DataFrame(records)
267
+
268
+
269
+ def _parse_timestamp(value: Any) -> datetime | None:
270
+ """Parse a value into datetime or None."""
271
+ if value is None:
272
+ return None
273
+ if isinstance(value, datetime):
274
+ return value
275
+ if isinstance(value, str):
276
+ if not value or value == "N/A" or value == "None":
277
+ return None
278
+ try:
279
+ # Try ISO format first
280
+ return datetime.fromisoformat(value.replace("Z", "+00:00"))
281
+ except ValueError:
282
+ try:
283
+ # Try common datetime formats
284
+ for fmt in ["%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d"]:
285
+ try:
286
+ return datetime.strptime(value, fmt)
287
+ except ValueError:
288
+ continue
289
+ except Exception:
290
+ pass
291
+ return None
292
+
293
+
294
+ def _safe_float(value: Any) -> float | None:
295
+ """Safely convert value to float or None.
296
+
297
+ Fixes the float(None) bug in the original dashboard.
298
+ """
299
+ if value is None:
300
+ return None
301
+ try:
302
+ return float(value)
303
+ except (ValueError, TypeError):
304
+ return None
@@ -0,0 +1,386 @@
1
+ """Dashboard statistical computations.
2
+
3
+ Pure statistical functions for the dashboard, including PSR (Probabilistic
4
+ Sharpe Ratio) which replaces the incorrectly-used DSR for single-strategy analysis.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any, Literal, overload
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from scipy import stats
14
+ from scipy.stats import norm
15
+
16
+ from ml4t.diagnostic.evaluation.trade_dashboard.types import ReturnSummary
17
+
18
+
19
+ def compute_return_summary(returns: np.ndarray) -> ReturnSummary:
20
+ """Compute summary statistics for a returns series.
21
+
22
+ Parameters
23
+ ----------
24
+ returns : np.ndarray
25
+ Array of returns (can be return_pct or pnl).
26
+
27
+ Returns
28
+ -------
29
+ ReturnSummary
30
+ Summary statistics including mean, std, Sharpe, skewness, kurtosis.
31
+ """
32
+ n = len(returns)
33
+ if n == 0:
34
+ return ReturnSummary(
35
+ n_samples=0,
36
+ mean=np.nan,
37
+ std=np.nan,
38
+ sharpe=np.nan,
39
+ skewness=np.nan,
40
+ kurtosis=np.nan,
41
+ min_val=np.nan,
42
+ max_val=np.nan,
43
+ win_rate=np.nan,
44
+ )
45
+
46
+ mean = float(np.mean(returns))
47
+ std = float(np.std(returns, ddof=1)) if n > 1 else 0.0
48
+ sharpe = mean / std if std > 0 else np.nan
49
+
50
+ # Skewness and kurtosis require minimum samples
51
+ skewness = float(stats.skew(returns)) if n > 2 else 0.0
52
+ # Use Fisher=False to get actual kurtosis (3.0 for normal), not excess
53
+ kurtosis = float(stats.kurtosis(returns, fisher=False)) if n > 3 else 3.0
54
+
55
+ win_rate = float(np.mean(returns > 0))
56
+
57
+ return ReturnSummary(
58
+ n_samples=n,
59
+ mean=mean,
60
+ std=std,
61
+ sharpe=sharpe,
62
+ skewness=skewness,
63
+ kurtosis=kurtosis,
64
+ min_val=float(np.min(returns)),
65
+ max_val=float(np.max(returns)),
66
+ win_rate=win_rate,
67
+ )
68
+
69
+
70
+ @overload
71
+ def probabilistic_sharpe_ratio(
72
+ observed_sharpe: float,
73
+ benchmark_sharpe: float = ...,
74
+ n_samples: int = ...,
75
+ skewness: float = ...,
76
+ kurtosis: float = ...,
77
+ return_components: Literal[False] = ...,
78
+ ) -> float: ...
79
+
80
+
81
+ @overload
82
+ def probabilistic_sharpe_ratio(
83
+ observed_sharpe: float,
84
+ benchmark_sharpe: float = ...,
85
+ n_samples: int = ...,
86
+ skewness: float = ...,
87
+ kurtosis: float = ...,
88
+ return_components: Literal[True] = ...,
89
+ ) -> dict[str, float]: ...
90
+
91
+
92
+ def probabilistic_sharpe_ratio(
93
+ observed_sharpe: float,
94
+ benchmark_sharpe: float = 0.0,
95
+ n_samples: int = 1,
96
+ skewness: float = 0.0,
97
+ kurtosis: float = 3.0,
98
+ return_components: bool = False,
99
+ ) -> float | dict[str, float]:
100
+ """Calculate Probabilistic Sharpe Ratio (PSR).
101
+
102
+ PSR gives the probability that the true Sharpe ratio exceeds a benchmark,
103
+ accounting for sample size and return distribution characteristics.
104
+
105
+ Unlike DSR (which corrects for multiple testing across K strategies),
106
+ PSR is applicable to a SINGLE strategy's performance evaluation.
107
+
108
+ Parameters
109
+ ----------
110
+ observed_sharpe : float
111
+ Observed Sharpe ratio of the strategy.
112
+ benchmark_sharpe : float, default 0.0
113
+ Benchmark Sharpe ratio (typically 0 for testing significance).
114
+ n_samples : int, default 1
115
+ Number of return observations (T).
116
+ skewness : float, default 0.0
117
+ Skewness of returns distribution.
118
+ kurtosis : float, default 3.0
119
+ Kurtosis of returns (3.0 for normal, NOT excess kurtosis).
120
+ return_components : bool, default False
121
+ If True, return dict with intermediate calculations.
122
+
123
+ Returns
124
+ -------
125
+ float or dict
126
+ PSR probability in [0, 1], or dict with 'psr', 'z_score', 'std_sr'.
127
+
128
+ Notes
129
+ -----
130
+ Formula (Bailey & Lopez de Prado 2012):
131
+
132
+ PSR = Phi[(SR - SR_0) * sqrt(T-1) / sqrt(1 - gamma_3*SR + (gamma_4-1)/4*SR^2)]
133
+
134
+ where:
135
+ - SR = observed Sharpe ratio
136
+ - SR_0 = benchmark Sharpe ratio
137
+ - T = number of samples
138
+ - gamma_3 = skewness
139
+ - gamma_4 = kurtosis (not excess)
140
+ - Phi = standard normal CDF
141
+
142
+ Interpretation:
143
+ - PSR > 0.95: 95% confidence true SR > benchmark (significant at alpha=0.05)
144
+ - PSR < 0.50: More likely true SR < benchmark
145
+ - PSR = 0.50: No evidence either way
146
+
147
+ Examples
148
+ --------
149
+ >>> psr = probabilistic_sharpe_ratio(
150
+ ... observed_sharpe=1.5,
151
+ ... benchmark_sharpe=0.0,
152
+ ... n_samples=252,
153
+ ... skewness=-0.5,
154
+ ... kurtosis=4.0,
155
+ ... )
156
+ >>> print(f"PSR: {psr:.3f}")
157
+ PSR: 0.987
158
+
159
+ References
160
+ ----------
161
+ Bailey, D. H., & Lopez de Prado, M. (2012).
162
+ "The Sharpe Ratio Efficient Frontier."
163
+ Journal of Risk, 15(2), 3-44.
164
+ """
165
+ if n_samples < 2:
166
+ # Need at least 2 samples for meaningful calculation
167
+ if return_components:
168
+ return {"psr": 0.5, "z_score": 0.0, "std_sr": np.inf}
169
+ return 0.5
170
+
171
+ # Calculate denominator of z-score
172
+ # V[SR] = 1 - gamma_3*SR + (gamma_4-1)/4*SR^2
173
+ sr_squared = observed_sharpe**2
174
+ variance_component = 1 - skewness * observed_sharpe + (kurtosis - 1) / 4 * sr_squared
175
+
176
+ # Guard against negative variance (can happen with extreme skewness)
177
+ if variance_component <= 0:
178
+ variance_component = 0.01 # Small positive value
179
+
180
+ std_sr = np.sqrt(variance_component / (n_samples - 1))
181
+
182
+ # Calculate z-score
183
+ if std_sr > 0:
184
+ z_score = (observed_sharpe - benchmark_sharpe) / std_sr
185
+ else:
186
+ z_score = np.inf if observed_sharpe > benchmark_sharpe else -np.inf
187
+
188
+ # Convert to probability
189
+ psr = float(norm.cdf(z_score))
190
+
191
+ if return_components:
192
+ return {
193
+ "psr": psr,
194
+ "z_score": float(z_score) if np.isfinite(z_score) else 0.0,
195
+ "std_sr": float(std_sr),
196
+ }
197
+
198
+ return psr
199
+
200
+
201
+ def compute_distribution_tests(
202
+ returns: np.ndarray,
203
+ ) -> pd.DataFrame:
204
+ """Compute distribution tests for returns.
205
+
206
+ Parameters
207
+ ----------
208
+ returns : np.ndarray
209
+ Array of returns.
210
+
211
+ Returns
212
+ -------
213
+ pd.DataFrame
214
+ DataFrame with test results:
215
+ - test: Test name
216
+ - statistic: Test statistic
217
+ - p_value: P-value
218
+ - interpretation: Human-readable interpretation
219
+ """
220
+ results = []
221
+
222
+ n = len(returns)
223
+
224
+ # Shapiro-Wilk test (for n <= 5000)
225
+ if 3 <= n <= 5000:
226
+ try:
227
+ from scipy.stats import shapiro
228
+
229
+ stat, p = shapiro(returns)
230
+ results.append(
231
+ {
232
+ "test": "Shapiro-Wilk",
233
+ "statistic": stat,
234
+ "p_value": p,
235
+ "interpretation": "Normal" if p > 0.05 else "Non-normal",
236
+ }
237
+ )
238
+ except Exception:
239
+ pass
240
+
241
+ # Anderson-Darling test
242
+ if n >= 4:
243
+ try:
244
+ from scipy.stats import anderson
245
+
246
+ result = anderson(returns, dist="norm")
247
+ # Use 5% significance level
248
+ critical_idx = 2 # Index for 5% level
249
+ stat = result.statistic
250
+ critical = result.critical_values[critical_idx]
251
+ is_normal = stat < critical
252
+ results.append(
253
+ {
254
+ "test": "Anderson-Darling",
255
+ "statistic": stat,
256
+ "p_value": None, # Anderson doesn't provide p-value directly
257
+ "interpretation": "Normal" if is_normal else "Non-normal",
258
+ }
259
+ )
260
+ except Exception:
261
+ pass
262
+
263
+ # Jarque-Bera test
264
+ if n >= 20:
265
+ try:
266
+ from scipy.stats import jarque_bera
267
+
268
+ stat, p = jarque_bera(returns)
269
+ results.append(
270
+ {
271
+ "test": "Jarque-Bera",
272
+ "statistic": stat,
273
+ "p_value": p,
274
+ "interpretation": "Normal" if p > 0.05 else "Non-normal",
275
+ }
276
+ )
277
+ except Exception:
278
+ pass
279
+
280
+ if not results:
281
+ return pd.DataFrame(columns=["test", "statistic", "p_value", "interpretation"])
282
+
283
+ return pd.DataFrame(results)
284
+
285
+
286
+ def compute_time_series_tests(
287
+ returns: np.ndarray,
288
+ max_lags: int = 10,
289
+ ) -> pd.DataFrame:
290
+ """Compute time-series tests (requires chronologically sorted data).
291
+
292
+ Parameters
293
+ ----------
294
+ returns : np.ndarray
295
+ Array of returns (MUST be in chronological order).
296
+ max_lags : int, default 10
297
+ Maximum lags for Ljung-Box test.
298
+
299
+ Returns
300
+ -------
301
+ pd.DataFrame
302
+ DataFrame with test results.
303
+
304
+ Notes
305
+ -----
306
+ These tests are only meaningful on chronologically ordered data.
307
+ The dashboard normalizes data by sorting trades by entry_time.
308
+ """
309
+ results = []
310
+
311
+ n = len(returns)
312
+
313
+ # Ljung-Box test for autocorrelation
314
+ if n > max_lags + 5:
315
+ try:
316
+ from statsmodels.stats.diagnostic import acorr_ljungbox
317
+
318
+ lb_result = acorr_ljungbox(returns, lags=[max_lags], return_df=True)
319
+ stat = lb_result["lb_stat"].iloc[0]
320
+ p = lb_result["lb_pvalue"].iloc[0]
321
+ results.append(
322
+ {
323
+ "test": f"Ljung-Box (lag={max_lags})",
324
+ "statistic": stat,
325
+ "p_value": p,
326
+ "interpretation": "No autocorrelation"
327
+ if p > 0.05
328
+ else "Autocorrelation detected",
329
+ }
330
+ )
331
+ except Exception:
332
+ pass
333
+
334
+ # ADF test for stationarity
335
+ if n >= 20:
336
+ try:
337
+ from statsmodels.tsa.stattools import adfuller
338
+
339
+ adf_result = adfuller(returns, autolag="AIC")
340
+ stat = adf_result[0]
341
+ p = adf_result[1]
342
+ results.append(
343
+ {
344
+ "test": "ADF (stationarity)",
345
+ "statistic": stat,
346
+ "p_value": p,
347
+ "interpretation": "Stationary" if p < 0.05 else "Non-stationary",
348
+ }
349
+ )
350
+ except Exception:
351
+ pass
352
+
353
+ if not results:
354
+ return pd.DataFrame(columns=["test", "statistic", "p_value", "interpretation"])
355
+
356
+ return pd.DataFrame(results)
357
+
358
+
359
+ def benjamini_hochberg_fdr(
360
+ p_values: list[float] | np.ndarray,
361
+ alpha: float = 0.05,
362
+ ) -> dict[str, Any]:
363
+ """Apply Benjamini-Hochberg FDR correction.
364
+
365
+ Parameters
366
+ ----------
367
+ p_values : list or ndarray
368
+ Raw p-values.
369
+ alpha : float, default 0.05
370
+ Target FDR level.
371
+
372
+ Returns
373
+ -------
374
+ dict
375
+ - rejected: boolean array of rejected hypotheses
376
+ - adjusted_p_values: BH-adjusted p-values
377
+ - n_rejected: number of rejections
378
+ """
379
+ from ml4t.diagnostic.evaluation.stats import benjamini_hochberg_fdr as bh_fdr
380
+
381
+ result = bh_fdr(p_values, alpha=alpha, return_details=True)
382
+ return {
383
+ "rejected": result["rejected"],
384
+ "adjusted_p_values": result["adjusted_p_values"],
385
+ "n_rejected": result["n_rejected"],
386
+ }