ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1044 @@
1
+ Metadata-Version: 2.4
2
+ Name: ml4t-diagnostic
3
+ Version: 0.1.0a1
4
+ Summary: Comprehensive diagnostic and evaluation framework for quantitative finance ML workflows
5
+ Project-URL: Homepage, https://github.com/ml4t/ml4t-evaluation
6
+ Project-URL: Documentation, https://ml4t-evaluation.readthedocs.io
7
+ Project-URL: Repository, https://github.com/ml4t/ml4t-evaluation
8
+ Project-URL: Issues, https://github.com/ml4t/ml4t-evaluation/issues
9
+ Project-URL: Changelog, https://github.com/ml4t/ml4t-evaluation/blob/main/CHANGELOG.md
10
+ Author-email: QuantLab Team <info@quantlab.io>
11
+ Maintainer-email: QuantLab Contributors <dev@quantlab.io>
12
+ License: MIT
13
+ License-File: LICENSE
14
+ Keywords: backtesting,cross-validation,embargo,finance,machine-learning,polars,purging,quantitative-finance,statistical-tests,trading,validation
15
+ Classifier: Development Status :: 4 - Beta
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Intended Audience :: Financial and Insurance Industry
18
+ Classifier: Intended Audience :: Science/Research
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Operating System :: OS Independent
21
+ Classifier: Programming Language :: Python :: 3
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Topic :: Office/Business :: Financial :: Investment
26
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
27
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
28
+ Classifier: Typing :: Typed
29
+ Requires-Python: >=3.11
30
+ Requires-Dist: anywidget>=0.9.21
31
+ Requires-Dist: arch>=7.2.0
32
+ Requires-Dist: jinja2>=3.1.0
33
+ Requires-Dist: lightgbm>=4.6.0
34
+ Requires-Dist: numba>=0.57.0
35
+ Requires-Dist: numpy>=1.24.0
36
+ Requires-Dist: pandas-datareader>=0.10.0
37
+ Requires-Dist: pandas-market-calendars>=4.0.0
38
+ Requires-Dist: pandas>=2.0.0
39
+ Requires-Dist: plotly>=5.15.0
40
+ Requires-Dist: polars>=0.20.0
41
+ Requires-Dist: pyarrow>=14.0.0
42
+ Requires-Dist: pydantic>=2.0.0
43
+ Requires-Dist: pyyaml>=6.0
44
+ Requires-Dist: riskfolio-lib>=7.1.0
45
+ Requires-Dist: scikit-learn>=1.3.0
46
+ Requires-Dist: scipy>=1.10.0
47
+ Requires-Dist: setuptools>=80.9.0
48
+ Requires-Dist: statsmodels>=0.14.0
49
+ Requires-Dist: structlog>=23.0.0
50
+ Requires-Dist: sympy>=1.14.0
51
+ Requires-Dist: tabulate>=0.9.0
52
+ Requires-Dist: vectorbt>=0.28.2
53
+ Requires-Dist: xgboost>=2.1.4
54
+ Provides-Extra: advanced
55
+ Requires-Dist: arch>=6.0.0; extra == 'advanced'
56
+ Provides-Extra: all
57
+ Requires-Dist: arch>=6.0.0; extra == 'all'
58
+ Requires-Dist: hypothesis>=6.80.0; extra == 'all'
59
+ Requires-Dist: ipdb>=0.13.0; extra == 'all'
60
+ Requires-Dist: ipython>=8.14.0; extra == 'all'
61
+ Requires-Dist: kaleido>=0.2.0; extra == 'all'
62
+ Requires-Dist: lightgbm>=4.0.0; extra == 'all'
63
+ Requires-Dist: matplotlib>=3.7.0; extra == 'all'
64
+ Requires-Dist: myst-parser>=2.0.0; extra == 'all'
65
+ Requires-Dist: nbsphinx>=0.9.0; extra == 'all'
66
+ Requires-Dist: plotly>=5.15.0; extra == 'all'
67
+ Requires-Dist: pre-commit>=3.3.0; extra == 'all'
68
+ Requires-Dist: pypdf>=5.0.0; extra == 'all'
69
+ Requires-Dist: pytest-benchmark>=4.0.0; extra == 'all'
70
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'all'
71
+ Requires-Dist: pytest-timeout>=2.1.0; extra == 'all'
72
+ Requires-Dist: pytest-xdist>=3.3.0; extra == 'all'
73
+ Requires-Dist: pytest>=7.4.0; extra == 'all'
74
+ Requires-Dist: ruff>=0.1.0; extra == 'all'
75
+ Requires-Dist: seaborn>=0.12.0; extra == 'all'
76
+ Requires-Dist: shap<0.50.0,>=0.41.0; extra == 'all'
77
+ Requires-Dist: sphinx-autodoc-typehints>=1.24.0; extra == 'all'
78
+ Requires-Dist: sphinx-rtd-theme>=1.3.0; extra == 'all'
79
+ Requires-Dist: sphinx>=7.0.0; extra == 'all'
80
+ Requires-Dist: streamlit>=1.28.0; extra == 'all'
81
+ Requires-Dist: ty; extra == 'all'
82
+ Requires-Dist: xgboost>=2.0.0; extra == 'all'
83
+ Provides-Extra: all-ml
84
+ Requires-Dist: cupy-cuda11x>=11.0.0; extra == 'all-ml'
85
+ Requires-Dist: lightgbm>=4.0.0; extra == 'all-ml'
86
+ Requires-Dist: shap<0.50.0,>=0.41.0; extra == 'all-ml'
87
+ Requires-Dist: tensorflow>=2.0.0; extra == 'all-ml'
88
+ Requires-Dist: xgboost>=2.0.0; extra == 'all-ml'
89
+ Provides-Extra: dashboard
90
+ Requires-Dist: streamlit>=1.28.0; extra == 'dashboard'
91
+ Provides-Extra: deep
92
+ Requires-Dist: shap<0.50.0,>=0.41.0; extra == 'deep'
93
+ Requires-Dist: tensorflow>=2.0.0; extra == 'deep'
94
+ Provides-Extra: dev
95
+ Requires-Dist: hypothesis>=6.80.0; extra == 'dev'
96
+ Requires-Dist: ipdb>=0.13.0; extra == 'dev'
97
+ Requires-Dist: ipython>=8.14.0; extra == 'dev'
98
+ Requires-Dist: pre-commit>=3.3.0; extra == 'dev'
99
+ Requires-Dist: pytest-benchmark>=4.0.0; extra == 'dev'
100
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
101
+ Requires-Dist: pytest-timeout>=2.1.0; extra == 'dev'
102
+ Requires-Dist: pytest-xdist>=3.3.0; extra == 'dev'
103
+ Requires-Dist: pytest>=7.4.0; extra == 'dev'
104
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
105
+ Requires-Dist: ty; extra == 'dev'
106
+ Provides-Extra: docs
107
+ Requires-Dist: mkdocs-gen-files>=0.5.0; extra == 'docs'
108
+ Requires-Dist: mkdocs-literate-nav>=0.6.0; extra == 'docs'
109
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
110
+ Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
111
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'docs'
112
+ Provides-Extra: gpu
113
+ Requires-Dist: cupy-cuda11x>=11.0.0; extra == 'gpu'
114
+ Provides-Extra: integration
115
+ Provides-Extra: ml
116
+ Requires-Dist: lightgbm>=4.0.0; extra == 'ml'
117
+ Requires-Dist: shap<0.50.0,>=0.41.0; extra == 'ml'
118
+ Requires-Dist: xgboost>=2.0.0; extra == 'ml'
119
+ Provides-Extra: viz
120
+ Requires-Dist: kaleido>=0.2.0; extra == 'viz'
121
+ Requires-Dist: matplotlib>=3.7.0; extra == 'viz'
122
+ Requires-Dist: plotly>=5.15.0; extra == 'viz'
123
+ Requires-Dist: pypdf>=5.0.0; extra == 'viz'
124
+ Requires-Dist: seaborn>=0.12.0; extra == 'viz'
125
+ Description-Content-Type: text/markdown
126
+
127
+ # ML4T Diagnostic: Comprehensive Diagnostics for Quantitative Finance
128
+
129
+ **Statistical rigor meets actionable insights for ML trading strategies**
130
+
131
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
132
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
133
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
134
+ [![Type checked: mypy](https://img.shields.io/badge/type%20checked-mypy-blue.svg)](http://mypy-lang.org/)
135
+
136
+ ---
137
+
138
+ ## What is ML4T Diagnostic?
139
+
140
+ **ML4T Diagnostic** is a comprehensive evaluation library for quantitative trading strategies, spanning the entire ML workflow from feature analysis to portfolio performance.
141
+
142
+ ### Key Improvements
143
+
144
+ | Capability | What's New |
145
+ |------------|------------|
146
+ | **Performance** | Polars-powered for 10-100x faster analysis |
147
+ | **Visualizations** | Interactive Plotly charts |
148
+ | **Insights** | Auto-interpretation with warnings |
149
+ | **Statistics** | DSR, CPCV, RAS, PBO, FDR corrections |
150
+ | **Exploratory** | Stationarity, ACF, volatility, distribution tests |
151
+ | **Signal Analysis** | Multi-signal comparison and selection |
152
+ | **Trade Diagnostics** | SHAP-based error pattern discovery |
153
+ | **Binary Metrics** | Precision, recall, lift, coverage with Wilson intervals |
154
+ | **Threshold Analysis** | Threshold sweep, optimization, monotonicity checks |
155
+
156
+ ---
157
+
158
+ ## Quick Start
159
+
160
+ ### Installation
161
+
162
+ ```bash
163
+ # Core library
164
+ pip install ml4t-diagnostic
165
+
166
+ # With ML dependencies (for SHAP, importance, interactions)
167
+ pip install ml4t-diagnostic[ml]
168
+
169
+ # With visualization (for interactive reports)
170
+ pip install ml4t-diagnostic[viz]
171
+
172
+ # Everything (ML + viz + dashboard)
173
+ pip install ml4t-diagnostic[all]
174
+ ```
175
+
176
+ ### Example 1: Trade Diagnostics
177
+
178
+ **Close the ML→Trading feedback loop**: Understand why specific trades fail and get actionable improvement suggestions.
179
+
180
+ ```python
181
+ from ml4t.diagnostic.evaluation import TradeAnalysis, TradeShapAnalyzer
182
+ from ml4t.diagnostic.config import TradeShapConfig
183
+
184
+ # 1. Identify worst trades from backtest
185
+ analyzer = TradeAnalysis(trade_records)
186
+ worst_trades = analyzer.worst_trades(n=20)
187
+
188
+ # 2. Explain with SHAP
189
+ config = TradeShapConfig.for_quick_diagnostics()
190
+ shap_analyzer = TradeShapAnalyzer(
191
+ model=trained_model,
192
+ features_df=features_df, # Features with timestamps
193
+ shap_values=shap_values, # Precomputed SHAP values
194
+ config=config
195
+ )
196
+
197
+ # 3. Discover error patterns
198
+ result = shap_analyzer.explain_worst_trades(worst_trades)
199
+
200
+ # 4. Get actionable hypotheses
201
+ for pattern in result.error_patterns:
202
+ print(f"Pattern {pattern.cluster_id}: {pattern.hypothesis}")
203
+ print(f" Actions: {pattern.actions}")
204
+ print(f" Confidence: {pattern.confidence:.2%}")
205
+ print(f" Potential savings: ${pattern.potential_impact:,.2f}")
206
+ ```
207
+
208
+ **Output example**:
209
+ ```
210
+ Pattern 1: High momentum + Low volatility → Reversals
211
+ Actions: ['Add volatility regime filter', 'Shorten holding period in low vol']
212
+ Confidence: 85%
213
+ Potential savings: $12,450.00
214
+
215
+ Pattern 2: Low liquidity + Wide spreads → Poor execution
216
+ Actions: ['Add minimum liquidity filter', 'Widen entry criteria']
217
+ Confidence: 78%
218
+ Potential savings: $8,230.00
219
+ ```
220
+
221
+ See **[examples/trade_diagnostics_example.ipynb](examples/trade_diagnostics_example.ipynb)** for complete end-to-end workflow.
222
+
223
+ ### Example 2: Feature Importance Analysis
224
+
225
+ ```python
226
+ import polars as pl
227
+ from ml4t.diagnostic.evaluation import analyze_ml_importance
228
+
229
+ # Your data
230
+ X = pl.read_parquet("features.parquet")
231
+ y = pl.read_parquet("labels.parquet")
232
+
233
+ # Analyze feature importance (combines MDI, PFI, MDA, SHAP)
234
+ results = analyze_ml_importance(model, X, y)
235
+
236
+ # Get consensus ranking
237
+ print(results.consensus_ranking)
238
+ # [('momentum', 1.2), ('volatility', 2.1), ...]
239
+
240
+ # Check warnings
241
+ print(results.warnings)
242
+ # ["High SHAP importance but low PFI for 'spread' - possible overfitting"]
243
+
244
+ # Get interpretation
245
+ print(results.interpretation)
246
+ # "Strong consensus across methods. Top 3 features: momentum, volatility, volume..."
247
+ ```
248
+
249
+ ### Example 3: Feature Interactions
250
+
251
+ ```python
252
+ from ml4t.diagnostic.evaluation import analyze_interactions
253
+
254
+ # Detect feature interactions (Conditional IC, H-stat, SHAP)
255
+ results = analyze_interactions(model, X, y)
256
+
257
+ # Top interactions by consensus
258
+ print(results.top_interactions_consensus)
259
+ # [('momentum', 'volatility'), ('volume', 'spread'), ...]
260
+
261
+ # Method agreement
262
+ print(results.method_agreement)
263
+ # {('h_statistic', 'shap'): 0.85, ...} # High agreement = robust finding
264
+ ```
265
+
266
+ ### Example 4: Statistical Validation (DSR)
267
+
268
+ ```python
269
+ from ml4t.diagnostic.evaluation import stats
270
+
271
+ # Your backtest results
272
+ returns = strategy.compute_returns()
273
+
274
+ # Statistical validation with multiple testing correction
275
+ dsr_result = stats.compute_dsr(
276
+ returns=returns,
277
+ benchmark_sr=0.0,
278
+ n_trials=100, # Number of strategies tested
279
+ expected_max_sharpe=1.5
280
+ )
281
+
282
+ print(f"Sharpe Ratio: {dsr_result['sr']:.2f}")
283
+ print(f"Deflated Sharpe: {dsr_result['dsr']:.2f}") # Accounts for multiple testing
284
+ print(f"p-value: {dsr_result['pval']:.4f}")
285
+ print(f"Significant: {dsr_result['is_significant']}")
286
+ ```
287
+
288
+ ### Example 5: Binary Classification Metrics
289
+
290
+ **Evaluate discrete trading signals** with proper statistical inference:
291
+
292
+ ```python
293
+ from ml4t.diagnostic.evaluation import (
294
+ binary_classification_report,
295
+ precision, recall, lift, coverage, f1_score,
296
+ wilson_score_interval,
297
+ binomial_test_precision,
298
+ )
299
+
300
+ # Your signals and outcomes
301
+ signals = momentum > threshold # Binary signals
302
+ labels = forward_returns > 0 # Binary outcomes (profitable or not)
303
+
304
+ # Comprehensive report with confidence intervals
305
+ report = binary_classification_report(signals, labels)
306
+
307
+ print(f"Precision: {report['precision']:.2%} ± {report['precision_ci_width']:.2%}")
308
+ print(f"Lift: {report['lift']:.2f}x (vs random)")
309
+ print(f"Coverage: {report['coverage']:.1%} of observations")
310
+ print(f"Statistically significant: {report['binomial_pvalue'] < 0.05}")
311
+ ```
312
+
313
+ **Key metrics**:
314
+ - **Precision**: When you signal, how often are you right?
315
+ - **Lift**: How much better than random selection?
316
+ - **Coverage**: What fraction of time are you in a position?
317
+ - **Wilson interval**: Accurate confidence bounds for proportions
318
+
319
+ ### Example 6: Threshold Optimization
320
+
321
+ **Find optimal signal thresholds** with train-only selection:
322
+
323
+ ```python
324
+ from ml4t.diagnostic.evaluation import (
325
+ evaluate_threshold_sweep,
326
+ find_optimal_threshold,
327
+ check_monotonicity,
328
+ )
329
+
330
+ # Sweep thresholds and compute metrics at each
331
+ results = evaluate_threshold_sweep(
332
+ indicator=momentum_values,
333
+ label=future_profitable,
334
+ thresholds=[0.1, 0.3, 0.5, 0.7, 0.9],
335
+ direction='above'
336
+ )
337
+
338
+ # Find optimal with constraints
339
+ optimal = find_optimal_threshold(
340
+ indicator=momentum_values,
341
+ label=future_profitable,
342
+ metric="f1_score",
343
+ min_coverage=0.02, # At least 2% signal frequency
344
+ require_significant=True # Must pass binomial test
345
+ )
346
+
347
+ print(f"Optimal threshold: {optimal['threshold']:.2f}")
348
+ print(f"F1 Score: {optimal['f1_score']:.2%}")
349
+
350
+ # Check if relationship is monotonic (good) or non-monotonic (investigate)
351
+ mono = check_monotonicity(results, metric="precision")
352
+ print(f"Monotonicity score: {mono['score']:.2f}")
353
+ ```
354
+
355
+ **Critical**: Use train-only threshold selection in cross-validation to prevent leakage.
356
+
357
+ ---
358
+
359
+ ## Library Overview
360
+
361
+ ML4T Diagnostic provides **three complementary capabilities** across **four application domains**:
362
+
363
+ ### Three Pillars of Analysis
364
+
365
+ | Pillar | Purpose | Examples |
366
+ |--------|---------|----------|
367
+ | **Explore** | Understand patterns before modeling | Stationarity tests, ACF/PACF, distribution analysis |
368
+ | **Validate** | Test significance and prevent overfitting | DSR, CPCV, RAS, FDR corrections |
369
+ | **Visualize** | Communicate findings effectively | Interactive Plotly charts, dashboards, reports |
370
+
371
+ ### Four Application Domains
372
+
373
+ | Domain | Stage | Key Classes |
374
+ |--------|-------|-------------|
375
+ | **Features & Data** | Pre-modeling | `FeatureDiagnostics`, `analyze_stationarity()` |
376
+ | **Signals & Models** | Modeling | `SignalAnalysis`, `MultiSignalAnalysis` |
377
+ | **Trades & Backtest** | Post-modeling | `TradeAnalysis`, `TradeShapAnalyzer` |
378
+ | **Portfolio** | Production | `PortfolioAnalysis`, rolling metrics |
379
+
380
+ This architecture ensures you can **explore, validate, and visualize** at every stage of the ML workflow.
381
+
382
+ ---
383
+
384
+ ## Architecture: Four-Tier Diagnostic Framework
385
+
386
+ ML4T Diagnostic covers **four tiers** of the quantitative workflow:
387
+
388
+ ```
389
+ ┌──────────────────────────────────────────────────────────────┐
390
+ │ Tier 1: Feature Analysis (Pre-Modeling) │
391
+ │ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ │
392
+ │ • Time series diagnostics (stationarity, ACF, volatility) │
393
+ │ • Distribution analysis (moments, normality, tails) │
394
+ │ • Feature-outcome predictiveness (IC, MI, quantiles) │
395
+ │ • Feature importance (MDI, PFI, MDA, SHAP consensus) │
396
+ │ • Feature interactions (Conditional IC, H-stat, SHAP) │
397
+ │ • Drift detection (PSI, domain classifier) │
398
+ └──────────────────────────────────────────────────────────────┘
399
+ ┌──────────────────────────────────────────────────────────────┐
400
+ │ Tier 2: Signal Analysis (Model Outputs) │
401
+ │ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ │
402
+ │ • IC analysis (time series, histogram, heatmap) │
403
+ │ • Quantile returns (bar, violin, cumulative) │
404
+ │ • Turnover analysis (top/bottom basket, autocorrelation) │
405
+ │ • Multi-signal comparison and ranking │
406
+ │ • Signal selection framework │
407
+ └──────────────────────────────────────────────────────────────┘
408
+ ┌──────────────────────────────────────────────────────────────┐
409
+ │ Tier 3: Backtest Analysis (Post-Modeling) │
410
+ │ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ │
411
+ │ • Trade analysis (win/loss, PnL, holding periods) │
412
+ │ • Statistical validity (DSR, RAS, PBO, FDR corrections) │
413
+ │ • Trade-SHAP diagnostics (error pattern discovery) │
414
+ │ • Excursion analysis (TP/SL parameter optimization) │
415
+ └──────────────────────────────────────────────────────────────┘
416
+ ┌──────────────────────────────────────────────────────────────┐
417
+ │ Tier 4: Portfolio Analysis (Production) │
418
+ │ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ │
419
+ │ • Performance metrics (Sharpe, Sortino, Calmar, Omega) │
420
+ │ • Drawdown analysis (underwater curve, top drawdowns) │
421
+ │ • Rolling metrics (Sharpe, volatility, beta windows) │
422
+ │ • Risk metrics (VaR, CVaR, tail ratio) │
423
+ │ • Monthly/annual returns visualization │
424
+ └──────────────────────────────────────────────────────────────┘
425
+ ```
426
+
427
+ **See [docs/architecture.md](docs/architecture.md) for complete technical details.**
428
+
429
+ ---
430
+
431
+ ## Key Features
432
+
433
+ ### Trade-Level Diagnostics
434
+
435
+ Connect SHAP explanations to trade outcomes for systematic continuous improvement.
436
+
437
+ **Core workflow**:
438
+ 1. Extract worst trades from backtest
439
+ 2. Align SHAP values to trade entry timestamps
440
+ 3. Cluster trades by SHAP similarity (hierarchical clustering)
441
+ 4. Generate actionable hypotheses for improvement
442
+ 5. Iterate: Adjust features/model based on insights
443
+
444
+ **Benefits**:
445
+ - **Systematic debugging**: Understand exactly why trades fail
446
+ - **Pattern discovery**: Find recurring error modes
447
+ - **Actionable insights**: Get specific improvement suggestions
448
+ - **Continuous improvement**: Close the ML→trading feedback loop
449
+
450
+ ### Performance (10-100x Faster)
451
+
452
+ **Polars + Numba optimization** for blazing fast analysis:
453
+
454
+ | Operation | Dataset | Time |
455
+ |-----------|---------|------|
456
+ | 5-fold CV | 1M rows | <10 seconds |
457
+ | Feature importance | 100 features | <5 seconds |
458
+ | CPCV backtest | 100K bars | <30 seconds |
459
+ | DSR calculation | 252 returns | <50ms |
460
+
461
+ ### Interactive Visualizations
462
+
463
+ **Modern Plotly charts** (not outdated matplotlib):
464
+ - Hover for details
465
+ - Zoom and pan
466
+ - Responsive design
467
+ - Publication-ready
468
+ - Export to HTML/PDF
469
+
470
+ ### Auto-Interpretation
471
+
472
+ **Human-readable insights**, not just numbers:
473
+ ```python
474
+ results.warnings
475
+ # ["High Conditional IC but low H-statistic for (momentum, volatility)",
476
+ # "Suggests regime-specific interaction - investigate market conditions"]
477
+
478
+ results.interpretation
479
+ # "Strong consensus across 3 methods. Top interaction: momentum × volatility.
480
+ # High agreement (Spearman 0.85+) indicates robust finding."
481
+ ```
482
+
483
+ ### Advanced Statistics
484
+
485
+ **State-of-the-art methods** from López de Prado and others:
486
+ - **DSR** (Deflated Sharpe Ratio) - Corrects for multiple testing
487
+ - **CPCV** (Combinatorial Purged Cross-Validation) - Leak-free validation
488
+ - **RAS** (Rademacher Anti-Serum) - Backtest overfitting detection
489
+ - **PBO** (Probability of Backtest Overfitting) - Overfitting probability
490
+ - **HAC-adjusted IC** - Autocorrelation-robust information coefficient
491
+ - **FDR control** (Benjamini-Hochberg) - Multiple comparisons
492
+ - **SHAP-based diagnostics** - Trade-level error analysis
493
+
494
+ ### Time Series Diagnostics
495
+
496
+ **Understand your data before making decisions**:
497
+
498
+ ```python
499
+ from ml4t.diagnostic.evaluation import (
500
+ analyze_stationarity,
501
+ analyze_autocorrelation,
502
+ analyze_volatility,
503
+ analyze_distribution,
504
+ )
505
+
506
+ # Stationarity: ADF, KPSS, Phillips-Perron with consensus
507
+ result = analyze_stationarity(returns)
508
+ print(f"Consensus: {result.consensus}") # 'stationary', 'non_stationary', 'inconclusive'
509
+ print(f"ADF p-value: {result.adf_result.pvalue:.4f}")
510
+
511
+ # Autocorrelation: ACF/PACF with significance bands
512
+ acf_result = analyze_autocorrelation(returns, nlags=20)
513
+ print(f"Significant lags: {acf_result.significant_lags}")
514
+
515
+ # Volatility: ARCH-LM test, GARCH(1,1) fitting
516
+ vol_result = analyze_volatility(returns)
517
+ print(f"ARCH effects: {vol_result.has_arch_effects}")
518
+
519
+ # Distribution: moments, normality, tail analysis
520
+ dist_result = analyze_distribution(returns)
521
+ print(f"Skewness: {dist_result.skewness:.3f}")
522
+ print(f"Jarque-Bera p-value: {dist_result.jb_pvalue:.4f}")
523
+ ```
524
+
525
+ ### Signal Analysis
526
+
527
+ **Full signal evaluation framework**:
528
+
529
+ ```python
530
+ from ml4t.diagnostic.evaluation import SignalAnalysis, MultiSignalAnalysis
531
+
532
+ # Single signal analysis
533
+ signal_analyzer = SignalAnalysis(
534
+ signal=factor_data,
535
+ returns=forward_returns,
536
+ periods=[1, 5, 21], # 1D, 1W, 1M
537
+ )
538
+
539
+ # IC analysis with HAC adjustment
540
+ ic_result = signal_analyzer.compute_ic_analysis()
541
+ print(f"IC Mean: {ic_result.ic_mean:.4f}")
542
+ print(f"IC IR: {ic_result.ic_ir:.4f}")
543
+ print(f"HAC t-stat: {ic_result.hac_tstat:.2f}")
544
+
545
+ # Quantile returns
546
+ quantile_result = signal_analyzer.compute_quantile_analysis()
547
+ print(f"Q5-Q1 spread: {quantile_result.spread:.2%}")
548
+
549
+ # Turnover analysis
550
+ turnover = signal_analyzer.compute_turnover_analysis()
551
+
552
+ # Multi-signal comparison
553
+ multi_analyzer = MultiSignalAnalysis(signals_dict, returns)
554
+ ranking = multi_analyzer.rank_signals(metric='ic_ir')
555
+ ```
556
+
557
+ ### Portfolio Analysis
558
+
559
+ **Full portfolio tear sheet** with modern visualizations:
560
+
561
+ ```python
562
+ from ml4t.diagnostic.evaluation import PortfolioAnalysis
563
+
564
+ # Initialize with returns
565
+ portfolio = PortfolioAnalysis(returns, benchmark=spy_returns)
566
+
567
+ # Summary statistics
568
+ metrics = portfolio.compute_summary_stats()
569
+ print(f"Sharpe: {metrics.sharpe_ratio:.2f}")
570
+ print(f"Sortino: {metrics.sortino_ratio:.2f}")
571
+ print(f"Calmar: {metrics.calmar_ratio:.2f}")
572
+ print(f"Omega: {metrics.omega_ratio:.2f}")
573
+ print(f"Max Drawdown: {metrics.max_drawdown:.2%}")
574
+
575
+ # Rolling metrics
576
+ rolling = portfolio.compute_rolling_metrics(window=252)
577
+ rolling_df = rolling.to_dataframe() # rolling Sharpe, vol, beta
578
+
579
+ # Drawdown analysis
580
+ drawdowns = portfolio.compute_drawdown_analysis(top_n=5)
581
+ print(f"Worst drawdown: {drawdowns.max_drawdown:.2%}")
582
+ print(f"Recovery days: {drawdowns.max_duration}")
583
+
584
+ # Generate tear sheet
585
+ portfolio.generate_tear_sheet() # Interactive Plotly dashboard
586
+ ```
587
+
588
+ ### Seamless Integration
589
+
590
+ **Works with your existing tools**:
591
+ ```python
592
+ # Supports pandas, polars, numpy
593
+ X_pandas = pd.DataFrame(...)
594
+ X_polars = pl.DataFrame(...)
595
+ X_numpy = np.array(...)
596
+
597
+ # All work seamlessly
598
+ analyze_ml_importance(model, X_pandas, y)
599
+ analyze_ml_importance(model, X_polars, y)
600
+ analyze_ml_importance(model, X_numpy, y)
601
+ ```
602
+
603
+ **Integrates with popular backtesting engines**:
604
+ - ml4t-backtest (native support)
605
+ - zipline-reloaded (via adapter)
606
+ - VectorBT (via adapter)
607
+ - Custom engines (implement TradeRecord schema)
608
+
609
+ ---
610
+
611
+ ## Modular Design
612
+
613
+ Like AlphaLens, **every function works standalone or composed**:
614
+
615
+ ```python
616
+ # Use individual metrics
617
+ from ml4t.diagnostic.evaluation import compute_ic_series, compute_h_statistic
618
+
619
+ ic = compute_ic_series(features, returns)
620
+ h_stat = compute_h_statistic(model, X)
621
+
622
+ # Or use tear sheets (combines multiple metrics)
623
+ from ml4t.diagnostic.evaluation import analyze_ml_importance
624
+
625
+ importance = analyze_ml_importance(model, X, y)
626
+ # → Combines MDI, PFI, MDA, SHAP
627
+ # → Consensus ranking
628
+ # → Warnings and interpretation
629
+
630
+ # Or use full workflow
631
+ from ml4t.diagnostic.evaluation import TradeShapAnalyzer
632
+
633
+ analyzer = TradeShapAnalyzer(model, features_df, shap_values, config)
634
+ result = analyzer.explain_worst_trades(worst_trades)
635
+ # → Trade analysis + SHAP + clustering + hypotheses
636
+ ```
637
+
638
+ ---
639
+
640
+ ## Documentation
641
+
642
+ ### User Guides
643
+ - **[Trade Diagnostics Example](examples/trade_diagnostics_example.ipynb)** - Complete end-to-end tutorial
644
+ - **[Architecture Guide](docs/architecture.md)** - Technical deep dive
645
+ - **[Visualization Strategy](docs/visualization_strategy.md)** - Plotly + reporting
646
+ - **[Data Schemas](docs/schemas.md)** - Integration contracts
647
+ - **[Installation Guide](docs/INSTALLATION.md)** - Detailed installation options
648
+
649
+ ### Academic References
650
+ - **[Academic References](docs/REFERENCES.md)** - Comprehensive citations for all implemented methods
651
+
652
+ ### Integration Guides
653
+ - **[Book Integration](docs/book_integration.md)** - ML4T 3rd Edition alignment
654
+ - **[Backtest Integration](docs/integration_backtest.md)** - Backtesting engine
655
+ - **[Engineer Integration](docs/integration_engineer.md)** - Feature engineering
656
+
657
+ ### Technical Documentation
658
+ - **[Optional Dependencies](docs/OPTIONAL_DEPENDENCIES.md)** - ML libraries and graceful degradation
659
+ - **[Dashboard Guide](docs/DASHBOARD.md)** - Interactive Streamlit dashboard
660
+ - **[Error Handling](docs/error_handling.md)** - Best practices
661
+ - **[Logging](docs/logging.md)** - Structured logging with structlog
662
+
663
+ ### Migration
664
+ - **[Migration Guide](docs/MIGRATION.md)** - Upgrade from ml4t.evaluation to ml4t.diagnostic
665
+
666
+ ---
667
+
668
+ ## Optional Dependencies
669
+
670
+ ML4T Diagnostic is designed with minimal required dependencies. Optional ML libraries enhance functionality but are NOT required:
671
+
672
+ **Available Features**:
673
+ - **Core Analysis** - Always available (IC, statistics, distributions, DSR, RAS)
674
+ - **ML Importance** - Requires `lightgbm` or `xgboost`
675
+ - **SHAP Analysis** - Requires `shap` (interpretability)
676
+ - **Deep Learning** (v1.1+) - Requires `tensorflow` or `pytorch`
677
+ - **GPU Acceleration** (v1.1+) - Requires `cupy`
678
+ - **Dashboards** - Requires `streamlit` (interactive viz)
679
+
680
+ **Quick Check**:
681
+ ```python
682
+ from ml4t.diagnostic.utils import get_dependency_summary
683
+ print(get_dependency_summary())
684
+ ```
685
+
686
+ **Installation Options**:
687
+ ```bash
688
+ # Core library (no ML dependencies)
689
+ pip install ml4t-diagnostic
690
+
691
+ # Standard ML support (Tree, Linear, Kernel explainers)
692
+ pip install ml4t-diagnostic[ml] # LightGBM, XGBoost, SHAP
693
+
694
+ # Neural network support (adds Deep explainer)
695
+ pip install ml4t-diagnostic[deep] # + TensorFlow
696
+
697
+ # GPU acceleration (10-50x speedup for large datasets)
698
+ pip install ml4t-diagnostic[gpu] # + cupy
699
+
700
+ # Visualization and dashboards
701
+ pip install ml4t-diagnostic[viz] # + Plotly, Streamlit
702
+
703
+ # Everything (all explainers + GPU + viz)
704
+ pip install ml4t-diagnostic[all-ml] # ml + deep + gpu
705
+ pip install ml4t-diagnostic[all] # all-ml + viz
706
+ ```
707
+
708
+ **Explainer Availability (v1.1)**:
709
+
710
+ | Explainer | Dependency Group | Required Packages |
711
+ |-----------|-----------------|-------------------|
712
+ | TreeExplainer | `[ml]` | shap, lightgbm/xgboost |
713
+ | LinearExplainer | `[ml]` | shap, scikit-learn |
714
+ | KernelExplainer | `[ml]` | shap, scikit-learn |
715
+ | DeepExplainer | `[deep]` | shap, tensorflow or pytorch |
716
+ | GPU Support | `[gpu]` | cupy |
717
+
718
+ **Graceful Degradation**: Missing dependencies trigger clear warnings, not crashes. See [docs/OPTIONAL_DEPENDENCIES.md](docs/OPTIONAL_DEPENDENCIES.md) for details.
719
+
720
+ ---
721
+
722
+ ## API Stability
723
+
724
+ **ML4T Diagnostic follows [Semantic Versioning](https://semver.org/)**.
725
+
726
+ | Version Type | API Changes | Examples |
727
+ |--------------|-------------|----------|
728
+ | **Patch** (1.3.x) | Bug fixes only | Performance improvements, docs |
729
+ | **Minor** (1.x.0) | Backward compatible | New features, new config options |
730
+ | **Major** (x.0.0) | Breaking changes | Removed functions, renamed params |
731
+
732
+ **Public API**: Everything in `__all__` exports is considered stable. Internal modules (prefixed with `_`) may change without notice.
733
+
734
+ **Current Stability**: As of v1.3.0, the API is considered **stable** for production use.
735
+
736
+ ---
737
+
738
+ ## Development Status
739
+
740
+ **Current**: v0.1.0a1
741
+
742
+ ### v1.3 - Module Decomposition & UX Improvements
743
+
744
+ **Major Feature**: Large monolithic modules decomposed into focused submodules for better maintainability.
745
+
746
+ **Key improvements**:
747
+ - **Module Decomposition**: 5 large modules (~12,000 lines) split into focused submodules
748
+ - `metrics.py` (5,643 lines) → 13 modules in `metrics/`
749
+ - `distribution.py`, `drift.py`, `stationarity.py` → dedicated packages
750
+ - **ValidatedCrossValidation**: One-step CPCV + DSR validation (20 lines → 5 lines)
751
+ - **Result.interpret()**: Human-readable insights on all key result classes
752
+ - **Data Quality Integration**: `DataQualityReport` contract with ml4t-data
753
+ - **Backward Compatible**: All old imports still work via `__init__.py` exports
754
+ - **Type Stubs**: Added `py.typed` marker for better IDE support
755
+
756
+ **New Usage Pattern**:
757
+ ```python
758
+ from ml4t.diagnostic import ValidatedCrossValidation
759
+
760
+ # One-step validated cross-validation (combines CPCV + DSR)
761
+ vcv = ValidatedCrossValidation(n_splits=10, embargo_pct=0.01)
762
+ result = vcv.fit_evaluate(X, y, model, times=times)
763
+
764
+ if result.is_significant:
765
+ print(f"Strategy passes DSR at {result.significance_level:.0%} confidence")
766
+ print(result.summary())
767
+ else:
768
+ print("Strategy may be overfit - DSR test failed")
769
+ for insight in result.interpretation:
770
+ print(f" • {insight}")
771
+ ```
772
+
773
+ ### v1.2 - Configuration Consolidation
774
+
775
+ **Major Feature**: Reduced 61+ config classes to 10 primary configs with single-level nesting.
776
+
777
+ **Key improvements**:
778
+ - **Config Consolidation**: `DiagnosticConfig`, `StatisticalConfig`, `TradeConfig`, etc.
779
+ - **Single-Level Nesting**: `config.stationarity.enabled` (not deeply nested)
780
+ - **Presets Preserved**: `for_quick_analysis()`, `for_research()`, `for_production()`
781
+ - **Backward Compatible**: Old class names work as deprecated aliases
782
+
783
+ ### v1.1 - Model-Agnostic SHAP Support
784
+
785
+ **Major Feature**: SHAP importance now works with **ANY sklearn-compatible model**, not just tree models!
786
+
787
+ **Key improvements**:
788
+ - **Multi-Explainer Support**: Auto-selects best explainer (Tree, Linear, Kernel, Deep)
789
+ - **Universal Compatibility**: Works with SVM, KNN, neural networks, ANY model
790
+ - **Smart Performance**: Automatic cascade (Tree → Linear → Kernel)
791
+ - **GPU Acceleration**: Optional GPU support for large datasets
792
+ - **Backward Compatible**: 100% compatible with v1.0 API
793
+
794
+ **Explainer Comparison**:
795
+
796
+ | Explainer | Models | Speed | Quality | Installation |
797
+ |-----------|--------|-------|---------|--------------|
798
+ | **Tree** | LightGBM, XGBoost, RF | <10ms/sample | Exact | `[ml]` |
799
+ | **Linear** | LogisticReg, Ridge, Lasso | <100ms/sample | Exact | `[ml]` |
800
+ | **Deep** | TensorFlow, PyTorch | <500ms/sample | Approx | `[deep]` |
801
+ | **Kernel** | ANY sklearn model | 100-5000ms/sample | Approx | `[ml]` |
802
+
803
+ **Installation**:
804
+ ```bash
805
+ # Standard ML support (Tree, Linear, Kernel explainers)
806
+ pip install ml4t-diagnostic[ml]
807
+
808
+ # Neural network support (adds Deep explainer)
809
+ pip install ml4t-diagnostic[deep]
810
+
811
+ # GPU acceleration (10-50x speedup for large datasets)
812
+ pip install ml4t-diagnostic[gpu]
813
+
814
+ # Everything (all explainers + GPU)
815
+ pip install ml4t-diagnostic[all-ml]
816
+ ```
817
+
818
+ **Migration from v1.0**:
819
+ - **Zero changes required** - All v1.0 code works unchanged
820
+ - **Auto-selection** - Tree models automatically use TreeExplainer
821
+ - **New models supported** - Linear and other models now work automatically
822
+ - **Explicit control** - Set `explainer_type='kernel'` to force specific explainer
823
+ - **Check explainer** - Use `result['explainer_type']` to see which was used
824
+
825
+ **Example (New in v1.1)**:
826
+ ```python
827
+ from sklearn.svm import SVC
828
+ from ml4t.diagnostic.evaluation import compute_shap_importance
829
+
830
+ # Train ANY model (SVM example - not supported in v1.0!)
831
+ model = SVC(kernel='rbf', probability=True)
832
+ model.fit(X_train, y_train)
833
+
834
+ # Compute SHAP importance (auto-selects KernelExplainer)
835
+ result = compute_shap_importance(model, X_test, max_samples=100)
836
+ print(f"Explainer used: {result['explainer_type']}") # 'kernel'
837
+
838
+ # Works with linear models too
839
+ from sklearn.linear_model import LogisticRegression
840
+ model = LogisticRegression()
841
+ model.fit(X_train, y_train)
842
+ result = compute_shap_importance(model, X_test) # Auto-selects LinearExplainer
843
+ ```
844
+
845
+ ### v1.0 - Trade Diagnostics Framework
846
+ - Trade analysis framework (TradeAnalysis, TradeMetrics)
847
+ - Trade-SHAP diagnostics (TradeShapAnalyzer)
848
+ - Error pattern clustering (hierarchical clustering)
849
+ - Hypothesis generation (rule-based templates)
850
+ - Interactive dashboard (Streamlit)
851
+ - Feature importance analysis (MDI, PFI, MDA, SHAP consensus)
852
+ - Feature interactions (Conditional IC, H-statistic, SHAP)
853
+ - Statistical framework (CPCV, DSR, RAS, FDR, HAC-adjusted IC)
854
+ - Time-series cross-validation (purging, embargo)
855
+ - Comprehensive example notebook
856
+
857
+ ### Roadmap
858
+ - **v0.1**: Alpha release - Core diagnostics framework
859
+ - **v0.2**: Event studies and barrier analysis
860
+ - **v1.0**: Full book integration (ML4T 3rd Edition)
861
+
862
+ ---
863
+
864
+ ## Performance Benchmarks
865
+
866
+ **Rigorous time-series validation** (After Numba optimization):
867
+
868
+ | Operation | Dataset Size | Time | vs Pandas |
869
+ |-----------|-------------|------|-----------|
870
+ | Maximum Drawdown | 10K points | 2ms | **6x faster** |
871
+ | Block Bootstrap | 100K samples | 30ms | **5x faster** |
872
+ | Rolling Sharpe | 50K window | 8ms | **12x faster** |
873
+ | Information Coefficient | 1M points | 10ms | **5x faster** |
874
+ | DSR Calculation | 252 returns | 50ms | **10x faster** |
875
+
876
+ **Target achieved**: 5-fold CV on 1M rows < 10 seconds
877
+
878
+ ---
879
+
880
+ ## Leakage Prevention
881
+
882
+ **Information leakage** in validation causes inflated performance estimates. ML4T Diagnostic provides tools to prevent common validation pitfalls:
883
+
884
+ ### 1. Cross-Validation Leakage
885
+
886
+ **Wrong**: Using standard k-fold on time-series data
887
+ ```python
888
+ # BAD - future data leaks into training
889
+ from sklearn.model_selection import KFold
890
+ kf = KFold(n_splits=5)
891
+ for train, test in kf.split(X):
892
+ model.fit(X[train], y[train]) # WRONG: Train may contain future data
893
+ ```
894
+
895
+ **Right**: Purged walk-forward or CPCV
896
+ ```python
897
+ # GOOD - proper temporal separation with purging
898
+ from ml4t.diagnostic.splitters import PurgedWalkForwardCV
899
+
900
+ cv = PurgedWalkForwardCV(
901
+ n_splits=5,
902
+ embargo_pct=0.01, # Gap between train/test
903
+ purge_pct=0.02 # Remove overlapping labels
904
+ )
905
+ for train, test in cv.split(X, y, times):
906
+ model.fit(X[train], y[train]) # Strictly past data only
907
+ ```
908
+
909
+ ### 2. Threshold Selection Leakage
910
+
911
+ **Wrong**: Optimizing thresholds on full dataset
912
+ ```python
913
+ # BAD - uses test data to select threshold
914
+ from sklearn.metrics import f1_score
915
+ best_threshold = max(thresholds, key=lambda t: f1_score(y, pred > t)) # WRONG
916
+ ```
917
+
918
+ **Right**: Train-only threshold optimization
919
+ ```python
920
+ # GOOD - optimize on training fold only
921
+ from ml4t.diagnostic.evaluation import find_optimal_threshold
922
+
923
+ for train_idx, test_idx in cv.split(X, y, times):
924
+ # Find optimal threshold using ONLY training data
925
+ optimal = find_optimal_threshold(
926
+ indicator=predictions[train_idx],
927
+ label=y[train_idx],
928
+ metric="f1_score",
929
+ min_coverage=0.02
930
+ )
931
+ # Apply to test set
932
+ test_signals = predictions[test_idx] > optimal['threshold'] # OK
933
+ ```
934
+
935
+ ### 3. Multiple Testing Correction
936
+
937
+ **Wrong**: Ignoring number of strategies tested
938
+ ```python
939
+ # BAD - reports raw Sharpe without correction
940
+ sharpe = returns.mean() / returns.std() * np.sqrt(252)
941
+ print(f"Sharpe: {sharpe:.2f}") # WRONG: May be spurious from many trials
942
+ ```
943
+
944
+ **Right**: Deflated Sharpe Ratio accounts for trials
945
+ ```python
946
+ # GOOD - corrects for multiple testing
947
+ from ml4t.diagnostic.evaluation import comprehensive_sharpe_evaluation
948
+
949
+ results = comprehensive_sharpe_evaluation(
950
+ returns=strategy_returns,
951
+ SR_benchmark=0.0,
952
+ K_trials=100, # Account for all strategies tested
953
+ variance_trials=0.1, # Variance across trials
954
+ alpha=0.05
955
+ )
956
+ print(f"Raw Sharpe: {results['SR_observed']:.2f}")
957
+ print(f"Deflated Sharpe: {results['DSR']:.2f}") # Adjusted for trials
958
+ print(f"Significant: {results['is_significant']}")
959
+ ```
960
+
961
+ ### Best Practice: Use CPCV for All Validation
962
+
963
+ The `CombinatorialPurgedCV` ensures leak-proof validation by construction:
964
+
965
+ ```python
966
+ from ml4t.diagnostic.splitters import CombinatorialPurgedCV
967
+
968
+ cv = CombinatorialPurgedCV(
969
+ n_splits=10,
970
+ embargo_pct=0.01, # Gap after test period
971
+ purge_pct=0.05 # Remove label overlap
972
+ )
973
+
974
+ # Each fold is leak-proof by design
975
+ for train_idx, test_idx in cv.split(X, y, timestamps):
976
+ # Training data strictly precedes test data
977
+ # Embargo prevents information bleeding
978
+ # Purging handles overlapping label windows
979
+ pass
980
+ ```
981
+
982
+ ---
983
+
984
+ ## For ML4T Book Readers
985
+
986
+ ML4T Diagnostic is the **reference implementation** for the ML4T 3rd Edition book.
987
+
988
+ **Chapter mapping** (ML4T 3rd Edition):
989
+ - Chapter 6 (Alpha Factor Engineering) → `FeatureDiagnostics`, feature importance, interactions
990
+ - Chapter 7 (Evaluating Alpha Factors) → `SignalAnalysis`, IC analysis, RAS
991
+ - Chapter 9 (Backtesting) → `TradeAnalysis`, DSR, CPCV, `TradeShapAnalyzer`
992
+ - Chapter 10 (Portfolio Construction) → `PortfolioAnalysis`, rolling metrics, drawdowns
993
+ - Chapter 12 (Risk Management) → Risk metrics, VaR, stress tests
994
+
995
+ See [docs/book_integration.md](docs/book_integration.md) for complete mapping.
996
+
997
+ ---
998
+
999
+ ## Contributing
1000
+
1001
+ We welcome contributions! See [CLAUDE.md](CLAUDE.md) for:
1002
+ - Development setup
1003
+ - Code standards (ruff, mypy, pytest)
1004
+ - Architecture principles
1005
+ - How to add new metrics/tear sheets
1006
+
1007
+ ---
1008
+
1009
+ ## Citation
1010
+
1011
+ If you use ML4T Diagnostic in your research, please cite:
1012
+
1013
+ ```bibtex
1014
+ @software{ml4t_diagnostic2025,
1015
+ author = {Stefan Jansen},
1016
+ title = {ML4T Diagnostic: Comprehensive Diagnostics for Quantitative Finance},
1017
+ year = {2025},
1018
+ version = {0.1.0a1},
1019
+ publisher = {GitHub},
1020
+ url = {https://github.com/stefan-jansen/ml4t-diagnostic}
1021
+ }
1022
+ ```
1023
+
1024
+ For academic references to the statistical methods implemented in this library, see **[docs/REFERENCES.md](docs/REFERENCES.md)**.
1025
+
1026
+ ---
1027
+
1028
+ ## License
1029
+
1030
+ MIT License - See [LICENSE](LICENSE) for details.
1031
+
1032
+ ---
1033
+
1034
+ ## Related Projects
1035
+
1036
+ Part of the **ML4T ecosystem**:
1037
+ - **[ml4t-data](../data/)** - Market data infrastructure
1038
+ - **[ml4t-engineer](../engineer/)** - Feature engineering toolkit
1039
+ - **[ml4t-backtest](../backtest/)** - Event-driven backtest engine
1040
+ - **[ml4t-diagnostic](../diagnostics/)** - This library
1041
+
1042
+ ---
1043
+
1044
+ **Ready to get started?** See [Quick Start](#quick-start) above or dive into the [Trade Diagnostics Example](examples/trade_diagnostics_example.ipynb).