ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,25 @@
1
+ # ml4t.diagnostic - Package Index
2
+
3
+ ## Public API
4
+
5
+ `analyze_signal`, `ValidatedCrossValidation`, `Evaluator`, `BarrierAnalysis`
6
+
7
+ ## Subpackages
8
+
9
+ | Package | Purpose |
10
+ |---------|---------|
11
+ | [signal/](signal/AGENT.md) | Factor signal analysis |
12
+ | [splitters/](splitters/AGENT.md) | Cross-validation |
13
+ | [evaluation/](evaluation/AGENT.md) | Analysis framework |
14
+ | [visualization/](visualization/AGENT.md) | Plotly charts |
15
+ | [results/](results/AGENT.md) | Result dataclasses |
16
+ | [config/](config/AGENT.md) | Pydantic configuration |
17
+
18
+ ## Large Files (>500 lines)
19
+
20
+ | File | Lines |
21
+ |------|-------|
22
+ | evaluation/stats/dsr.py | 1415 |
23
+ | splitters/combinatorial.py | 1392 |
24
+ | visualization/report_generation.py | 1343 |
25
+ | evaluation/barrier_analysis.py | 1050 |
@@ -0,0 +1,166 @@
1
+ """ml4t-diagnostic - A hierarchical framework for financial time-series validation.
2
+
3
+ ml4t-diagnostic provides rigorous validation tools for financial machine learning models,
4
+ implementing a Four-Tier Validation Framework to combat data leakage, backtest
5
+ overfitting, and statistical fallacies.
6
+
7
+ Main Features
8
+ -------------
9
+ - **Cross-Validation**: CPCV, Purged Walk-Forward with proper embargo/purging
10
+ - **Statistical Validity**: DSR, RAS, FDR corrections for multiple testing
11
+ - **Feature Analysis**: IC, importance (MDI/PFI/MDA/SHAP), interactions
12
+ - **Trade Diagnostics**: SHAP-based error pattern analysis
13
+ - **Data Quality**: Integration contracts with ml4t-data
14
+
15
+ Quick Start
16
+ -----------
17
+ >>> from ml4t.diagnostic import ValidatedCrossValidation
18
+ >>> from ml4t.diagnostic.splitters import CombinatorialPurgedCV
19
+ >>>
20
+ >>> # One-step validated cross-validation
21
+ >>> vcv = ValidatedCrossValidation(n_splits=10)
22
+ >>> result = vcv.fit_validate(model, X, y, times)
23
+ >>> if result.is_significant:
24
+ ... print(f"Sharpe: {result.sharpe:.2f}, DSR p-value: {result.dsr_pvalue:.4f}")
25
+
26
+ API Stability
27
+ -------------
28
+ This library follows semantic versioning. The public API consists of all symbols
29
+ exported in __all__. Breaking changes will only occur in major version bumps.
30
+ """
31
+
32
+ __version__ = "0.1.0a1"
33
+
34
+ # Sub-modules for advanced usage
35
+ from . import backends, caching, config, core, evaluation, integration, logging, signal, splitters
36
+
37
+ # Configuration classes
38
+ from .config import (
39
+ BarrierConfig,
40
+ DiagnosticConfig,
41
+ EventConfig,
42
+ PortfolioConfig,
43
+ ReportConfig,
44
+ RuntimeConfig,
45
+ SignalConfig,
46
+ StatisticalConfig,
47
+ TradeConfig,
48
+ )
49
+
50
+ # Main evaluation framework
51
+ from .evaluation import BarrierAnalysis, EvaluationResult, Evaluator
52
+
53
+ # ValidatedCrossValidation - combines CPCV + DSR in one step
54
+ from .evaluation.validated_cv import ValidatedCrossValidation
55
+
56
+ # Data quality integration
57
+ from .integration.data_contract import (
58
+ AnomalyType,
59
+ DataAnomaly,
60
+ DataQualityMetrics,
61
+ DataQualityReport,
62
+ DataValidationRequest,
63
+ Severity,
64
+ )
65
+
66
+ # Signal analysis (new clean API)
67
+ from .signal import SignalResult, analyze_signal
68
+
69
+ # Visualization (optional - may fail if plotly not installed)
70
+ try:
71
+ from .visualization import (
72
+ plot_hit_rate_heatmap,
73
+ plot_precision_recall_curve,
74
+ plot_profit_factor_bar,
75
+ plot_time_to_target_box,
76
+ )
77
+
78
+ _VIZ_AVAILABLE = True
79
+ except ImportError:
80
+ _VIZ_AVAILABLE = False
81
+ plot_hit_rate_heatmap = None
82
+ plot_precision_recall_curve = None
83
+ plot_profit_factor_bar = None
84
+ plot_time_to_target_box = None
85
+
86
+
87
+ def get_agent_docs() -> dict[str, str]:
88
+ """Get AGENT.md documentation for AI agent navigation.
89
+
90
+ Returns a dictionary mapping relative paths to AGENT.md content.
91
+ Useful for AI agents to understand the library structure.
92
+
93
+ Returns
94
+ -------
95
+ dict[str, str]
96
+ Mapping of relative path to AGENT.md content.
97
+
98
+ Example
99
+ -------
100
+ >>> docs = get_agent_docs()
101
+ >>> print(docs.keys())
102
+ dict_keys(['AGENT.md', 'signal/AGENT.md', 'splitters/AGENT.md', ...])
103
+ """
104
+ from pathlib import Path
105
+
106
+ package_dir = Path(__file__).parent
107
+ agent_docs = {}
108
+
109
+ # Find all AGENT.md files
110
+ for agent_file in package_dir.rglob("AGENT.md"):
111
+ rel_path = agent_file.relative_to(package_dir)
112
+ try:
113
+ agent_docs[str(rel_path)] = agent_file.read_text()
114
+ except OSError:
115
+ continue
116
+
117
+ return agent_docs
118
+
119
+
120
+ __all__ = [
121
+ # Version
122
+ "__version__",
123
+ # Agent Navigation
124
+ "get_agent_docs",
125
+ # Core Framework
126
+ "Evaluator",
127
+ "EvaluationResult",
128
+ "ValidatedCrossValidation",
129
+ # Signal Analysis (new clean API)
130
+ "analyze_signal",
131
+ "SignalResult",
132
+ # Barrier Analysis
133
+ "BarrierAnalysis",
134
+ # Configuration (10 primary configs)
135
+ "DiagnosticConfig",
136
+ "StatisticalConfig",
137
+ "PortfolioConfig",
138
+ "TradeConfig",
139
+ "SignalConfig",
140
+ "EventConfig",
141
+ "BarrierConfig",
142
+ "ReportConfig",
143
+ "RuntimeConfig",
144
+ # Data Quality Integration
145
+ "DataQualityReport",
146
+ "DataQualityMetrics",
147
+ "DataAnomaly",
148
+ "DataValidationRequest",
149
+ "AnomalyType",
150
+ "Severity",
151
+ # Visualization (optional)
152
+ "plot_hit_rate_heatmap",
153
+ "plot_profit_factor_bar",
154
+ "plot_precision_recall_curve",
155
+ "plot_time_to_target_box",
156
+ # Sub-modules
157
+ "backends",
158
+ "caching",
159
+ "config",
160
+ "core",
161
+ "evaluation",
162
+ "integration",
163
+ "logging",
164
+ "signal",
165
+ "splitters",
166
+ ]
@@ -0,0 +1,10 @@
1
+ """Backend adapters for DataFrame compatibility.
2
+
3
+ This module provides adapters to seamlessly work with both Polars (internal)
4
+ and Pandas (compatibility) DataFrames.
5
+ """
6
+
7
+ from ml4t.diagnostic.backends.adapter import DataFrameAdapter
8
+ from ml4t.diagnostic.backends.polars_backend import PolarsBackend
9
+
10
+ __all__ = ["DataFrameAdapter", "PolarsBackend"]
@@ -0,0 +1,192 @@
1
+ """Adapter layer for seamless DataFrame conversion between Polars and Pandas.
2
+
3
+ This module provides utilities to convert between different DataFrame representations.
4
+ The internal implementation uses Polars for performance, but the adapter ensures
5
+ compatibility with Pandas-based workflows.
6
+
7
+ Note: MultiIndex preservation/restoration has been removed as it was unused.
8
+ If you need MultiIndex support, use pandas directly or convert after receiving
9
+ the Polars DataFrame.
10
+ """
11
+
12
+ from typing import TYPE_CHECKING, Any, Union
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ import polars as pl
17
+
18
+ if TYPE_CHECKING:
19
+ from numpy.typing import NDArray
20
+
21
+
22
+ class DataFrameAdapter:
23
+ """Adapter for converting between Polars and Pandas DataFrames.
24
+
25
+ This class handles conversions between different DataFrame representations.
26
+ It's designed to be used internally by ml4t-diagnostic to ensure consistent
27
+ behavior regardless of the input format.
28
+
29
+ Methods
30
+ -------
31
+ to_polars(data, columns=None)
32
+ Convert input data to Polars DataFrame.
33
+ to_numpy(data)
34
+ Convert any supported data type to numpy array.
35
+ get_shape(data)
36
+ Get the shape of the data regardless of type.
37
+ """
38
+
39
+ @staticmethod
40
+ def to_polars(
41
+ data: Union[pl.DataFrame, pd.DataFrame, "NDArray[Any]"],
42
+ columns: list[str] | None = None,
43
+ ) -> tuple[pl.DataFrame, None]:
44
+ """Convert input data to Polars DataFrame.
45
+
46
+ Parameters
47
+ ----------
48
+ data : polars.DataFrame, pandas.DataFrame, or numpy.ndarray
49
+ The input data to convert.
50
+ columns : list of str, optional
51
+ Column names to use if data is a numpy array.
52
+
53
+ Returns
54
+ -------
55
+ df : polars.DataFrame
56
+ The data as a Polars DataFrame.
57
+ index : None
58
+ Always None. Kept for backward compatibility with existing code
59
+ that unpacks the tuple return value.
60
+
61
+ Raises
62
+ ------
63
+ TypeError
64
+ If the input type is not supported.
65
+ ValueError
66
+ If columns are needed but not provided.
67
+
68
+ Examples
69
+ --------
70
+ >>> import pandas as pd
71
+ >>> import polars as pl
72
+ >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
73
+ >>> df_pl, _ = DataFrameAdapter.to_polars(df_pd)
74
+ >>> isinstance(df_pl, pl.DataFrame)
75
+ True
76
+ """
77
+ if isinstance(data, pl.DataFrame):
78
+ return data, None
79
+
80
+ if isinstance(data, pd.DataFrame):
81
+ # Reset index to columns if it's not a default RangeIndex
82
+ index = data.index
83
+ if isinstance(index, pd.MultiIndex):
84
+ # For MultiIndex, reset to columns
85
+ df_reset = data.reset_index(drop=False)
86
+ return pl.from_pandas(df_reset), None
87
+ elif not isinstance(index, pd.RangeIndex) or index.start != 0 or index.step != 1:
88
+ # Custom index - reset to column
89
+ df_reset = data.reset_index(drop=False)
90
+ return pl.from_pandas(df_reset), None
91
+ else:
92
+ # Default RangeIndex
93
+ return pl.from_pandas(data), None
94
+
95
+ if isinstance(data, np.ndarray):
96
+ if data.ndim == 1:
97
+ # 1D array, treat as single column
98
+ if columns is None:
99
+ columns = ["column_0"]
100
+ elif len(columns) != 1:
101
+ raise ValueError(
102
+ f"1D array requires exactly 1 column name, got {len(columns)}",
103
+ )
104
+ return pl.DataFrame({columns[0]: data}), None
105
+
106
+ if data.ndim == 2:
107
+ # 2D array
108
+ if columns is None:
109
+ columns = [f"column_{i}" for i in range(data.shape[1])]
110
+ elif len(columns) != data.shape[1]:
111
+ raise ValueError(
112
+ f"Number of columns ({len(columns)}) doesn't match array shape ({data.shape[1]})",
113
+ )
114
+ return pl.DataFrame(data, schema=columns), None
115
+
116
+ raise ValueError(f"Arrays must be 1D or 2D, got {data.ndim}D")
117
+
118
+ raise TypeError(
119
+ f"Data must be a Polars DataFrame, Pandas DataFrame, or numpy array. "
120
+ f"Got {type(data).__name__}",
121
+ )
122
+
123
+ @staticmethod
124
+ def to_numpy(
125
+ data: Union[pl.DataFrame, pl.Series, pd.DataFrame, pd.Series, "NDArray[Any]"],
126
+ ) -> "NDArray[Any]":
127
+ """Convert any supported data type to numpy array.
128
+
129
+ Parameters
130
+ ----------
131
+ data : polars.DataFrame/Series, pandas.DataFrame/Series, or numpy.ndarray
132
+ The data to convert.
133
+
134
+ Returns
135
+ -------
136
+ array : numpy.ndarray
137
+ The data as a numpy array.
138
+
139
+ Raises
140
+ ------
141
+ TypeError
142
+ If the input type is not supported.
143
+
144
+ Examples
145
+ --------
146
+ >>> import polars as pl
147
+ >>> s = pl.Series([1, 2, 3])
148
+ >>> arr = DataFrameAdapter.to_numpy(s)
149
+ >>> arr.tolist()
150
+ [1, 2, 3]
151
+ """
152
+ if isinstance(data, np.ndarray):
153
+ return data
154
+ if isinstance(data, pl.DataFrame | pl.Series | pd.DataFrame | pd.Series):
155
+ return data.to_numpy()
156
+ raise TypeError(f"Cannot convert {type(data).__name__} to numpy array")
157
+
158
+ @staticmethod
159
+ def get_shape(
160
+ data: Union[pl.DataFrame, pd.DataFrame, "NDArray[Any]"],
161
+ ) -> tuple[int, int]:
162
+ """Get the shape of the data regardless of type.
163
+
164
+ Parameters
165
+ ----------
166
+ data : polars.DataFrame, pandas.DataFrame, or numpy.ndarray
167
+ The data to get the shape from.
168
+
169
+ Returns
170
+ -------
171
+ shape : tuple of int
172
+ (n_rows, n_cols) for 2D data, (n_rows, 1) for 1D data.
173
+
174
+ Raises
175
+ ------
176
+ TypeError
177
+ If the input type is not supported.
178
+
179
+ Examples
180
+ --------
181
+ >>> import numpy as np
182
+ >>> arr = np.array([1, 2, 3])
183
+ >>> DataFrameAdapter.get_shape(arr)
184
+ (3, 1)
185
+ """
186
+ if isinstance(data, pl.DataFrame | pd.DataFrame):
187
+ return data.shape
188
+ if isinstance(data, np.ndarray):
189
+ if data.ndim == 1:
190
+ return (data.shape[0], 1)
191
+ return data.shape
192
+ raise TypeError(f"Cannot get shape of {type(data).__name__}")