ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,318 @@
1
+ """Optional dependency checking and validation utilities.
2
+
3
+ This module provides centralized dependency checking for optional ML libraries.
4
+ It ensures clear error messages and graceful degradation when dependencies
5
+ are unavailable.
6
+
7
+ Example:
8
+ >>> from ml4t.diagnostic.utils.dependencies import check_dependency, DEPS
9
+ >>>
10
+ >>> # Check if LightGBM is available
11
+ >>> if check_dependency("lightgbm"):
12
+ ... import lightgbm as lgb
13
+ ... # Use LightGBM
14
+ ... else:
15
+ ... print("LightGBM not available, using fallback")
16
+ >>>
17
+ >>> # Get dependency information
18
+ >>> print(DEPS.lightgbm.install_cmd) # pip install lightgbm
19
+ >>> print(DEPS.lightgbm.purpose) # Feature importance, boosting models
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import importlib
25
+ import warnings
26
+ from dataclasses import dataclass
27
+
28
+
29
+ @dataclass
30
+ class DependencyInfo:
31
+ """Information about an optional dependency.
32
+
33
+ Attributes:
34
+ name: Package name (e.g., "lightgbm")
35
+ import_name: Import name (e.g., "lightgbm" or "lgb")
36
+ install_cmd: pip install command
37
+ purpose: What this dependency is used for
38
+ features: List of features requiring this dependency
39
+ alternatives: Alternative packages that can be used instead
40
+ """
41
+
42
+ name: str
43
+ import_name: str
44
+ install_cmd: str
45
+ purpose: str
46
+ features: list[str]
47
+ alternatives: list[str] | None = None
48
+
49
+ def __post_init__(self) -> None:
50
+ if self.alternatives is None:
51
+ self.alternatives = []
52
+
53
+ @property
54
+ def is_available(self) -> bool:
55
+ """Check if this dependency is installed."""
56
+ try:
57
+ importlib.import_module(self.import_name)
58
+ return True
59
+ except ImportError:
60
+ return False
61
+
62
+ def require(self, feature: str | None = None) -> None:
63
+ """Raise ImportError with helpful message if dependency not available.
64
+
65
+ Args:
66
+ feature: Specific feature name requesting this dependency
67
+
68
+ Raises:
69
+ ImportError: If dependency is not available
70
+ """
71
+ if not self.is_available:
72
+ msg = f"{self.name} is required"
73
+ if feature:
74
+ msg += f" for {feature}"
75
+ msg += f". Install with: {self.install_cmd}"
76
+
77
+ if self.alternatives:
78
+ msg += f"\n Alternatives: {', '.join(self.alternatives)}"
79
+
80
+ raise ImportError(msg)
81
+
82
+ def warn_if_missing(self, feature: str | None = None, action: str = "skipping") -> bool:
83
+ """Warn if dependency is missing, return availability status.
84
+
85
+ Args:
86
+ feature: Specific feature name requesting this dependency
87
+ action: What will happen without this dependency (e.g., "skipping", "using fallback")
88
+
89
+ Returns:
90
+ bool: True if available, False if missing
91
+ """
92
+ if not self.is_available:
93
+ msg = f"{self.name} not available - {action}"
94
+ if feature:
95
+ msg += f" {feature}"
96
+ msg += f". Install with: {self.install_cmd}"
97
+
98
+ if self.alternatives:
99
+ msg += f" (or use: {', '.join(self.alternatives)})"
100
+
101
+ warnings.warn(msg, UserWarning, stacklevel=2)
102
+ return False
103
+ return True
104
+
105
+
106
+ class OptionalDependencies:
107
+ """Registry of all optional dependencies with their metadata."""
108
+
109
+ def __init__(self):
110
+ self._deps: dict[str, DependencyInfo] = {}
111
+ self._register_dependencies()
112
+
113
+ def _register_dependencies(self):
114
+ """Register all known optional dependencies."""
115
+
116
+ # ML Libraries
117
+ self._deps["lightgbm"] = DependencyInfo(
118
+ name="LightGBM",
119
+ import_name="lightgbm",
120
+ install_cmd="pip install lightgbm",
121
+ purpose="Feature importance (MDI, permutation), boosting models",
122
+ features=[
123
+ "FeatureOutcome.run_analysis (ML importance)",
124
+ "MDI feature importance",
125
+ "Permutation importance",
126
+ ],
127
+ alternatives=["xgboost", "scikit-learn RandomForest"],
128
+ )
129
+
130
+ self._deps["xgboost"] = DependencyInfo(
131
+ name="XGBoost",
132
+ import_name="xgboost",
133
+ install_cmd="pip install xgboost",
134
+ purpose="Domain classifier drift detection, boosting models",
135
+ features=[
136
+ "compute_domain_classifier_drift (XGBoost backend)",
137
+ "Drift detection with XGBoost",
138
+ ],
139
+ alternatives=["lightgbm", "scikit-learn RandomForest"],
140
+ )
141
+
142
+ self._deps["shap"] = DependencyInfo(
143
+ name="SHAP",
144
+ import_name="shap",
145
+ install_cmd="pip install shap",
146
+ purpose="Shapley value feature importance and interactions",
147
+ features=[
148
+ "SHAP-based feature importance",
149
+ "Feature interactions analysis",
150
+ "Model interpretation",
151
+ ],
152
+ alternatives=["Permutation importance", "MDI importance"],
153
+ )
154
+
155
+ # Other optional dependencies
156
+ self._deps["plotly"] = DependencyInfo(
157
+ name="Plotly",
158
+ import_name="plotly",
159
+ install_cmd="pip install plotly",
160
+ purpose="Interactive visualizations and dashboards",
161
+ features=[
162
+ "create_evaluation_dashboard",
163
+ "Interactive plots",
164
+ "HTML reports",
165
+ ],
166
+ alternatives=["matplotlib", "seaborn"],
167
+ )
168
+
169
+ def __getattr__(self, name: str) -> DependencyInfo:
170
+ """Access dependencies as attributes (e.g., DEPS.lightgbm)."""
171
+ if name in self._deps:
172
+ return self._deps[name]
173
+ raise AttributeError(f"Unknown dependency: {name}")
174
+
175
+ def __getitem__(self, name: str) -> DependencyInfo:
176
+ """Access dependencies as items (e.g., DEPS["lightgbm"])."""
177
+ return self._deps[name]
178
+
179
+ def get(self, name: str, default=None) -> DependencyInfo | None:
180
+ """Get dependency info, return default if not found."""
181
+ return self._deps.get(name, default)
182
+
183
+ def check(self, name: str) -> bool:
184
+ """Check if a dependency is available."""
185
+ if name in self._deps:
186
+ return self._deps[name].is_available
187
+ return False
188
+
189
+ def check_multiple(self, names: list[str]) -> dict[str, bool]:
190
+ """Check availability of multiple dependencies.
191
+
192
+ Args:
193
+ names: List of dependency names to check
194
+
195
+ Returns:
196
+ Dict mapping dependency name to availability status
197
+ """
198
+ return {name: self.check(name) for name in names}
199
+
200
+ def get_missing(self, names: list[str]) -> list[str]:
201
+ """Get list of missing dependencies from a list.
202
+
203
+ Args:
204
+ names: List of dependency names to check
205
+
206
+ Returns:
207
+ List of missing dependency names
208
+ """
209
+ return [name for name in names if not self.check(name)]
210
+
211
+ def warn_missing(self, names: list[str], feature: str | None = None) -> list[str]:
212
+ """Warn about missing dependencies, return list of missing ones.
213
+
214
+ Args:
215
+ names: List of dependency names to check
216
+ feature: Feature name using these dependencies
217
+
218
+ Returns:
219
+ List of missing dependency names
220
+ """
221
+ missing = []
222
+ for name in names:
223
+ if name in self._deps and not self._deps[name].is_available:
224
+ self._deps[name].warn_if_missing(feature)
225
+ missing.append(name)
226
+ return missing
227
+
228
+ def summary(self) -> str:
229
+ """Generate summary of all dependencies and their status."""
230
+ lines = ["Optional Dependencies Status:"]
231
+ lines.append("=" * 60)
232
+
233
+ for _name, info in sorted(self._deps.items()):
234
+ status = "✓ Installed" if info.is_available else "✗ Missing"
235
+ lines.append(f"{info.name:15} {status:15} {info.purpose}")
236
+ if not info.is_available:
237
+ lines.append(f" → Install: {info.install_cmd}")
238
+
239
+ return "\n".join(lines)
240
+
241
+
242
+ # Global instance
243
+ DEPS = OptionalDependencies()
244
+
245
+
246
+ def check_dependency(name: str) -> bool:
247
+ """Quick check if a dependency is available.
248
+
249
+ Args:
250
+ name: Dependency name (e.g., "lightgbm", "shap")
251
+
252
+ Returns:
253
+ bool: True if available, False otherwise
254
+
255
+ Example:
256
+ >>> if check_dependency("lightgbm"):
257
+ ... import lightgbm as lgb
258
+ ... # Use LightGBM
259
+ """
260
+ return DEPS.check(name)
261
+
262
+
263
+ def require_dependency(name: str, feature: str | None = None) -> None:
264
+ """Require a dependency, raise ImportError if missing.
265
+
266
+ Args:
267
+ name: Dependency name
268
+ feature: Feature name requiring this dependency
269
+
270
+ Raises:
271
+ ImportError: If dependency is not available
272
+
273
+ Example:
274
+ >>> require_dependency("shap", "SHAP analysis")
275
+ >>> import shap # Safe to import now
276
+ """
277
+ if name in DEPS._deps:
278
+ DEPS[name].require(feature)
279
+ else:
280
+ raise ImportError(f"Unknown dependency: {name}")
281
+
282
+
283
+ def warn_if_missing(name: str, feature: str | None = None, action: str = "skipping") -> bool:
284
+ """Warn if dependency is missing, return availability status.
285
+
286
+ Args:
287
+ name: Dependency name
288
+ feature: Feature name requesting this dependency
289
+ action: What will happen without this dependency
290
+
291
+ Returns:
292
+ bool: True if available, False if missing
293
+
294
+ Example:
295
+ >>> if warn_if_missing("lightgbm", "feature importance", "using fallback"):
296
+ ... import lightgbm as lgb
297
+ ... # Use LightGBM
298
+ ... else:
299
+ ... # Use fallback method
300
+ """
301
+ if name in DEPS._deps:
302
+ return DEPS[name].warn_if_missing(feature, action)
303
+ warnings.warn(f"Unknown dependency: {name}", stacklevel=2)
304
+ return False
305
+
306
+
307
+ def get_dependency_summary() -> str:
308
+ """Get summary of all optional dependencies and their status.
309
+
310
+ Returns:
311
+ str: Formatted summary of dependencies
312
+
313
+ Example:
314
+ >>> print(get_dependency_summary())
315
+ Optional Dependencies Status:
316
+ ...
317
+ """
318
+ return DEPS.summary()
@@ -0,0 +1,127 @@
1
+ """Session assignment utilities for financial time-series data.
2
+
3
+ This module provides utilities to assign session dates to intraday data,
4
+ enabling session-aware cross-validation where sessions are the atomic unit.
5
+ """
6
+
7
+ import pandas as pd
8
+
9
+ try:
10
+ import pandas_market_calendars as mcal # noqa: F401 (availability check)
11
+
12
+ HAS_MARKET_CALENDARS = True
13
+ except ImportError:
14
+ HAS_MARKET_CALENDARS = False
15
+
16
+
17
+ def assign_session_dates(
18
+ df: pd.DataFrame,
19
+ calendar: str = "CME_Equity",
20
+ timezone: str = "UTC",
21
+ session_column: str = "session_date",
22
+ ) -> pd.DataFrame:
23
+ """Assign trading session dates to intraday data.
24
+
25
+ This function adds a session_date column to the DataFrame, where each
26
+ timestamp is assigned to its trading session. Sessions are atomic units
27
+ for cross-validation - we don't split within a session.
28
+
29
+ Parameters
30
+ ----------
31
+ df : pd.DataFrame
32
+ DataFrame with DatetimeIndex (may be tz-naive or tz-aware)
33
+ calendar : str, default='CME_Equity'
34
+ Name of pandas_market_calendars calendar
35
+ Examples: 'CME_Equity', 'NYSE', 'LSE', 'TSX'
36
+ timezone : str, default='UTC'
37
+ Timezone for calendar operations
38
+ session_column : str, default='session_date'
39
+ Name of the column to add with session dates
40
+
41
+ Returns
42
+ -------
43
+ pd.DataFrame
44
+ DataFrame with added session_date column
45
+
46
+ Notes
47
+ -----
48
+ - For CME futures: Sunday 5pm CT - Friday 4pm CT is one session
49
+ - For US equities: Standard trading day 9:30am - 4pm ET
50
+ - If df already has the session_column, it will be overwritten
51
+
52
+ Examples
53
+ --------
54
+ >>> df = pd.read_parquet('nq_data.parquet') # Has DatetimeIndex
55
+ >>> df = assign_session_dates(df, calendar='CME_Equity', timezone='America/Chicago')
56
+ >>> df.groupby('session_date').size() # Samples per session
57
+
58
+ For data that already has session_date:
59
+ >>> if 'session_date' not in df.columns:
60
+ ... df = assign_session_dates(df)
61
+ """
62
+ if not HAS_MARKET_CALENDARS:
63
+ raise ImportError(
64
+ "pandas_market_calendars is required for session assignment. "
65
+ "Install with: pip install pandas_market_calendars"
66
+ )
67
+
68
+ if not isinstance(df.index, pd.DatetimeIndex):
69
+ raise ValueError(f"DataFrame must have a DatetimeIndex. Got index type: {type(df.index)}")
70
+
71
+ # Import here to avoid circular dependency
72
+ from ml4t.diagnostic.splitters.calendar import TradingCalendar
73
+ from ml4t.diagnostic.splitters.calendar_config import CalendarConfig
74
+
75
+ # Create calendar configuration
76
+ config = CalendarConfig(exchange=calendar, timezone=timezone, localize_naive=True)
77
+
78
+ # Get trading calendar
79
+ trading_calendar = TradingCalendar(config)
80
+
81
+ # Assign sessions (vectorized, fast)
82
+ sessions = trading_calendar.get_sessions(df.index)
83
+
84
+ # Add as column (copy to avoid modifying original)
85
+ result = df.copy()
86
+ result[session_column] = sessions
87
+
88
+ return result
89
+
90
+
91
+ def get_complete_sessions(
92
+ df: pd.DataFrame, session_column: str = "session_date", min_samples: int = 100
93
+ ) -> pd.Series:
94
+ """Get list of complete sessions with sufficient data.
95
+
96
+ Parameters
97
+ ----------
98
+ df : pd.DataFrame
99
+ DataFrame with session_date column
100
+ session_column : str, default='session_date'
101
+ Name of the column containing session dates
102
+ min_samples : int, default=100
103
+ Minimum samples per session to consider complete
104
+
105
+ Returns
106
+ -------
107
+ pd.Series
108
+ Session dates that are complete (have >= min_samples)
109
+
110
+ Examples
111
+ --------
112
+ >>> df = assign_session_dates(df)
113
+ >>> complete = get_complete_sessions(df, min_samples=500)
114
+ >>> df_clean = df[df['session_date'].isin(complete)]
115
+ """
116
+ if session_column not in df.columns:
117
+ raise ValueError(
118
+ f"DataFrame does not have '{session_column}' column. Run assign_session_dates() first."
119
+ )
120
+
121
+ # Count samples per session
122
+ session_counts = df.groupby(session_column).size()
123
+
124
+ # Filter to complete sessions
125
+ complete_sessions = session_counts[session_counts >= min_samples].index
126
+
127
+ return complete_sessions.to_series(name=session_column)
@@ -0,0 +1,54 @@
1
+ """Data validation utilities for ML4T Diagnostic inputs.
2
+
3
+ Provides comprehensive validation for DataFrames, time series, returns,
4
+ and other common financial data inputs.
5
+
6
+ Examples:
7
+ >>> from ml4t.diagnostic.validation import validate_dataframe, validate_returns
8
+ >>>
9
+ >>> # Validate DataFrame structure
10
+ >>> validate_dataframe(
11
+ ... df,
12
+ ... required_columns=["close", "volume"],
13
+ ... numeric_columns=["close", "volume"]
14
+ ... )
15
+ >>>
16
+ >>> # Validate returns series
17
+ >>> validate_returns(returns, allow_nulls=False, bounds=(-0.5, 0.5))
18
+ """
19
+
20
+ from ml4t.diagnostic.validation.dataframe import (
21
+ DataFrameValidator,
22
+ ValidationError,
23
+ validate_dataframe,
24
+ validate_schema,
25
+ )
26
+ from ml4t.diagnostic.validation.returns import (
27
+ ReturnsValidator,
28
+ validate_bounds,
29
+ validate_returns,
30
+ )
31
+ from ml4t.diagnostic.validation.timeseries import (
32
+ TimeSeriesValidator,
33
+ validate_frequency,
34
+ validate_index,
35
+ validate_timeseries,
36
+ )
37
+
38
+ __all__ = [
39
+ # Core
40
+ "ValidationError",
41
+ # DataFrame validation
42
+ "DataFrameValidator",
43
+ "validate_dataframe",
44
+ "validate_schema",
45
+ # Time series validation
46
+ "TimeSeriesValidator",
47
+ "validate_timeseries",
48
+ "validate_index",
49
+ "validate_frequency",
50
+ # Returns validation
51
+ "ReturnsValidator",
52
+ "validate_returns",
53
+ "validate_bounds",
54
+ ]