ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
"""Optional dependency checking and validation utilities.
|
|
2
|
+
|
|
3
|
+
This module provides centralized dependency checking for optional ML libraries.
|
|
4
|
+
It ensures clear error messages and graceful degradation when dependencies
|
|
5
|
+
are unavailable.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> from ml4t.diagnostic.utils.dependencies import check_dependency, DEPS
|
|
9
|
+
>>>
|
|
10
|
+
>>> # Check if LightGBM is available
|
|
11
|
+
>>> if check_dependency("lightgbm"):
|
|
12
|
+
... import lightgbm as lgb
|
|
13
|
+
... # Use LightGBM
|
|
14
|
+
... else:
|
|
15
|
+
... print("LightGBM not available, using fallback")
|
|
16
|
+
>>>
|
|
17
|
+
>>> # Get dependency information
|
|
18
|
+
>>> print(DEPS.lightgbm.install_cmd) # pip install lightgbm
|
|
19
|
+
>>> print(DEPS.lightgbm.purpose) # Feature importance, boosting models
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import importlib
|
|
25
|
+
import warnings
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class DependencyInfo:
|
|
31
|
+
"""Information about an optional dependency.
|
|
32
|
+
|
|
33
|
+
Attributes:
|
|
34
|
+
name: Package name (e.g., "lightgbm")
|
|
35
|
+
import_name: Import name (e.g., "lightgbm" or "lgb")
|
|
36
|
+
install_cmd: pip install command
|
|
37
|
+
purpose: What this dependency is used for
|
|
38
|
+
features: List of features requiring this dependency
|
|
39
|
+
alternatives: Alternative packages that can be used instead
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
name: str
|
|
43
|
+
import_name: str
|
|
44
|
+
install_cmd: str
|
|
45
|
+
purpose: str
|
|
46
|
+
features: list[str]
|
|
47
|
+
alternatives: list[str] | None = None
|
|
48
|
+
|
|
49
|
+
def __post_init__(self) -> None:
|
|
50
|
+
if self.alternatives is None:
|
|
51
|
+
self.alternatives = []
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def is_available(self) -> bool:
|
|
55
|
+
"""Check if this dependency is installed."""
|
|
56
|
+
try:
|
|
57
|
+
importlib.import_module(self.import_name)
|
|
58
|
+
return True
|
|
59
|
+
except ImportError:
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
def require(self, feature: str | None = None) -> None:
|
|
63
|
+
"""Raise ImportError with helpful message if dependency not available.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
feature: Specific feature name requesting this dependency
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
ImportError: If dependency is not available
|
|
70
|
+
"""
|
|
71
|
+
if not self.is_available:
|
|
72
|
+
msg = f"{self.name} is required"
|
|
73
|
+
if feature:
|
|
74
|
+
msg += f" for {feature}"
|
|
75
|
+
msg += f". Install with: {self.install_cmd}"
|
|
76
|
+
|
|
77
|
+
if self.alternatives:
|
|
78
|
+
msg += f"\n Alternatives: {', '.join(self.alternatives)}"
|
|
79
|
+
|
|
80
|
+
raise ImportError(msg)
|
|
81
|
+
|
|
82
|
+
def warn_if_missing(self, feature: str | None = None, action: str = "skipping") -> bool:
|
|
83
|
+
"""Warn if dependency is missing, return availability status.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
feature: Specific feature name requesting this dependency
|
|
87
|
+
action: What will happen without this dependency (e.g., "skipping", "using fallback")
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
bool: True if available, False if missing
|
|
91
|
+
"""
|
|
92
|
+
if not self.is_available:
|
|
93
|
+
msg = f"{self.name} not available - {action}"
|
|
94
|
+
if feature:
|
|
95
|
+
msg += f" {feature}"
|
|
96
|
+
msg += f". Install with: {self.install_cmd}"
|
|
97
|
+
|
|
98
|
+
if self.alternatives:
|
|
99
|
+
msg += f" (or use: {', '.join(self.alternatives)})"
|
|
100
|
+
|
|
101
|
+
warnings.warn(msg, UserWarning, stacklevel=2)
|
|
102
|
+
return False
|
|
103
|
+
return True
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class OptionalDependencies:
|
|
107
|
+
"""Registry of all optional dependencies with their metadata."""
|
|
108
|
+
|
|
109
|
+
def __init__(self):
|
|
110
|
+
self._deps: dict[str, DependencyInfo] = {}
|
|
111
|
+
self._register_dependencies()
|
|
112
|
+
|
|
113
|
+
def _register_dependencies(self):
|
|
114
|
+
"""Register all known optional dependencies."""
|
|
115
|
+
|
|
116
|
+
# ML Libraries
|
|
117
|
+
self._deps["lightgbm"] = DependencyInfo(
|
|
118
|
+
name="LightGBM",
|
|
119
|
+
import_name="lightgbm",
|
|
120
|
+
install_cmd="pip install lightgbm",
|
|
121
|
+
purpose="Feature importance (MDI, permutation), boosting models",
|
|
122
|
+
features=[
|
|
123
|
+
"FeatureOutcome.run_analysis (ML importance)",
|
|
124
|
+
"MDI feature importance",
|
|
125
|
+
"Permutation importance",
|
|
126
|
+
],
|
|
127
|
+
alternatives=["xgboost", "scikit-learn RandomForest"],
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
self._deps["xgboost"] = DependencyInfo(
|
|
131
|
+
name="XGBoost",
|
|
132
|
+
import_name="xgboost",
|
|
133
|
+
install_cmd="pip install xgboost",
|
|
134
|
+
purpose="Domain classifier drift detection, boosting models",
|
|
135
|
+
features=[
|
|
136
|
+
"compute_domain_classifier_drift (XGBoost backend)",
|
|
137
|
+
"Drift detection with XGBoost",
|
|
138
|
+
],
|
|
139
|
+
alternatives=["lightgbm", "scikit-learn RandomForest"],
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
self._deps["shap"] = DependencyInfo(
|
|
143
|
+
name="SHAP",
|
|
144
|
+
import_name="shap",
|
|
145
|
+
install_cmd="pip install shap",
|
|
146
|
+
purpose="Shapley value feature importance and interactions",
|
|
147
|
+
features=[
|
|
148
|
+
"SHAP-based feature importance",
|
|
149
|
+
"Feature interactions analysis",
|
|
150
|
+
"Model interpretation",
|
|
151
|
+
],
|
|
152
|
+
alternatives=["Permutation importance", "MDI importance"],
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Other optional dependencies
|
|
156
|
+
self._deps["plotly"] = DependencyInfo(
|
|
157
|
+
name="Plotly",
|
|
158
|
+
import_name="plotly",
|
|
159
|
+
install_cmd="pip install plotly",
|
|
160
|
+
purpose="Interactive visualizations and dashboards",
|
|
161
|
+
features=[
|
|
162
|
+
"create_evaluation_dashboard",
|
|
163
|
+
"Interactive plots",
|
|
164
|
+
"HTML reports",
|
|
165
|
+
],
|
|
166
|
+
alternatives=["matplotlib", "seaborn"],
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def __getattr__(self, name: str) -> DependencyInfo:
|
|
170
|
+
"""Access dependencies as attributes (e.g., DEPS.lightgbm)."""
|
|
171
|
+
if name in self._deps:
|
|
172
|
+
return self._deps[name]
|
|
173
|
+
raise AttributeError(f"Unknown dependency: {name}")
|
|
174
|
+
|
|
175
|
+
def __getitem__(self, name: str) -> DependencyInfo:
|
|
176
|
+
"""Access dependencies as items (e.g., DEPS["lightgbm"])."""
|
|
177
|
+
return self._deps[name]
|
|
178
|
+
|
|
179
|
+
def get(self, name: str, default=None) -> DependencyInfo | None:
|
|
180
|
+
"""Get dependency info, return default if not found."""
|
|
181
|
+
return self._deps.get(name, default)
|
|
182
|
+
|
|
183
|
+
def check(self, name: str) -> bool:
|
|
184
|
+
"""Check if a dependency is available."""
|
|
185
|
+
if name in self._deps:
|
|
186
|
+
return self._deps[name].is_available
|
|
187
|
+
return False
|
|
188
|
+
|
|
189
|
+
def check_multiple(self, names: list[str]) -> dict[str, bool]:
|
|
190
|
+
"""Check availability of multiple dependencies.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
names: List of dependency names to check
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Dict mapping dependency name to availability status
|
|
197
|
+
"""
|
|
198
|
+
return {name: self.check(name) for name in names}
|
|
199
|
+
|
|
200
|
+
def get_missing(self, names: list[str]) -> list[str]:
|
|
201
|
+
"""Get list of missing dependencies from a list.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
names: List of dependency names to check
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
List of missing dependency names
|
|
208
|
+
"""
|
|
209
|
+
return [name for name in names if not self.check(name)]
|
|
210
|
+
|
|
211
|
+
def warn_missing(self, names: list[str], feature: str | None = None) -> list[str]:
|
|
212
|
+
"""Warn about missing dependencies, return list of missing ones.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
names: List of dependency names to check
|
|
216
|
+
feature: Feature name using these dependencies
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
List of missing dependency names
|
|
220
|
+
"""
|
|
221
|
+
missing = []
|
|
222
|
+
for name in names:
|
|
223
|
+
if name in self._deps and not self._deps[name].is_available:
|
|
224
|
+
self._deps[name].warn_if_missing(feature)
|
|
225
|
+
missing.append(name)
|
|
226
|
+
return missing
|
|
227
|
+
|
|
228
|
+
def summary(self) -> str:
|
|
229
|
+
"""Generate summary of all dependencies and their status."""
|
|
230
|
+
lines = ["Optional Dependencies Status:"]
|
|
231
|
+
lines.append("=" * 60)
|
|
232
|
+
|
|
233
|
+
for _name, info in sorted(self._deps.items()):
|
|
234
|
+
status = "✓ Installed" if info.is_available else "✗ Missing"
|
|
235
|
+
lines.append(f"{info.name:15} {status:15} {info.purpose}")
|
|
236
|
+
if not info.is_available:
|
|
237
|
+
lines.append(f" → Install: {info.install_cmd}")
|
|
238
|
+
|
|
239
|
+
return "\n".join(lines)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# Global instance
|
|
243
|
+
DEPS = OptionalDependencies()
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def check_dependency(name: str) -> bool:
|
|
247
|
+
"""Quick check if a dependency is available.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
name: Dependency name (e.g., "lightgbm", "shap")
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
bool: True if available, False otherwise
|
|
254
|
+
|
|
255
|
+
Example:
|
|
256
|
+
>>> if check_dependency("lightgbm"):
|
|
257
|
+
... import lightgbm as lgb
|
|
258
|
+
... # Use LightGBM
|
|
259
|
+
"""
|
|
260
|
+
return DEPS.check(name)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def require_dependency(name: str, feature: str | None = None) -> None:
|
|
264
|
+
"""Require a dependency, raise ImportError if missing.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
name: Dependency name
|
|
268
|
+
feature: Feature name requiring this dependency
|
|
269
|
+
|
|
270
|
+
Raises:
|
|
271
|
+
ImportError: If dependency is not available
|
|
272
|
+
|
|
273
|
+
Example:
|
|
274
|
+
>>> require_dependency("shap", "SHAP analysis")
|
|
275
|
+
>>> import shap # Safe to import now
|
|
276
|
+
"""
|
|
277
|
+
if name in DEPS._deps:
|
|
278
|
+
DEPS[name].require(feature)
|
|
279
|
+
else:
|
|
280
|
+
raise ImportError(f"Unknown dependency: {name}")
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def warn_if_missing(name: str, feature: str | None = None, action: str = "skipping") -> bool:
|
|
284
|
+
"""Warn if dependency is missing, return availability status.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
name: Dependency name
|
|
288
|
+
feature: Feature name requesting this dependency
|
|
289
|
+
action: What will happen without this dependency
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
bool: True if available, False if missing
|
|
293
|
+
|
|
294
|
+
Example:
|
|
295
|
+
>>> if warn_if_missing("lightgbm", "feature importance", "using fallback"):
|
|
296
|
+
... import lightgbm as lgb
|
|
297
|
+
... # Use LightGBM
|
|
298
|
+
... else:
|
|
299
|
+
... # Use fallback method
|
|
300
|
+
"""
|
|
301
|
+
if name in DEPS._deps:
|
|
302
|
+
return DEPS[name].warn_if_missing(feature, action)
|
|
303
|
+
warnings.warn(f"Unknown dependency: {name}", stacklevel=2)
|
|
304
|
+
return False
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def get_dependency_summary() -> str:
|
|
308
|
+
"""Get summary of all optional dependencies and their status.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
str: Formatted summary of dependencies
|
|
312
|
+
|
|
313
|
+
Example:
|
|
314
|
+
>>> print(get_dependency_summary())
|
|
315
|
+
Optional Dependencies Status:
|
|
316
|
+
...
|
|
317
|
+
"""
|
|
318
|
+
return DEPS.summary()
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Session assignment utilities for financial time-series data.
|
|
2
|
+
|
|
3
|
+
This module provides utilities to assign session dates to intraday data,
|
|
4
|
+
enabling session-aware cross-validation where sessions are the atomic unit.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pandas_market_calendars as mcal # noqa: F401 (availability check)
|
|
11
|
+
|
|
12
|
+
HAS_MARKET_CALENDARS = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
HAS_MARKET_CALENDARS = False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def assign_session_dates(
|
|
18
|
+
df: pd.DataFrame,
|
|
19
|
+
calendar: str = "CME_Equity",
|
|
20
|
+
timezone: str = "UTC",
|
|
21
|
+
session_column: str = "session_date",
|
|
22
|
+
) -> pd.DataFrame:
|
|
23
|
+
"""Assign trading session dates to intraday data.
|
|
24
|
+
|
|
25
|
+
This function adds a session_date column to the DataFrame, where each
|
|
26
|
+
timestamp is assigned to its trading session. Sessions are atomic units
|
|
27
|
+
for cross-validation - we don't split within a session.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
df : pd.DataFrame
|
|
32
|
+
DataFrame with DatetimeIndex (may be tz-naive or tz-aware)
|
|
33
|
+
calendar : str, default='CME_Equity'
|
|
34
|
+
Name of pandas_market_calendars calendar
|
|
35
|
+
Examples: 'CME_Equity', 'NYSE', 'LSE', 'TSX'
|
|
36
|
+
timezone : str, default='UTC'
|
|
37
|
+
Timezone for calendar operations
|
|
38
|
+
session_column : str, default='session_date'
|
|
39
|
+
Name of the column to add with session dates
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
pd.DataFrame
|
|
44
|
+
DataFrame with added session_date column
|
|
45
|
+
|
|
46
|
+
Notes
|
|
47
|
+
-----
|
|
48
|
+
- For CME futures: Sunday 5pm CT - Friday 4pm CT is one session
|
|
49
|
+
- For US equities: Standard trading day 9:30am - 4pm ET
|
|
50
|
+
- If df already has the session_column, it will be overwritten
|
|
51
|
+
|
|
52
|
+
Examples
|
|
53
|
+
--------
|
|
54
|
+
>>> df = pd.read_parquet('nq_data.parquet') # Has DatetimeIndex
|
|
55
|
+
>>> df = assign_session_dates(df, calendar='CME_Equity', timezone='America/Chicago')
|
|
56
|
+
>>> df.groupby('session_date').size() # Samples per session
|
|
57
|
+
|
|
58
|
+
For data that already has session_date:
|
|
59
|
+
>>> if 'session_date' not in df.columns:
|
|
60
|
+
... df = assign_session_dates(df)
|
|
61
|
+
"""
|
|
62
|
+
if not HAS_MARKET_CALENDARS:
|
|
63
|
+
raise ImportError(
|
|
64
|
+
"pandas_market_calendars is required for session assignment. "
|
|
65
|
+
"Install with: pip install pandas_market_calendars"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
|
69
|
+
raise ValueError(f"DataFrame must have a DatetimeIndex. Got index type: {type(df.index)}")
|
|
70
|
+
|
|
71
|
+
# Import here to avoid circular dependency
|
|
72
|
+
from ml4t.diagnostic.splitters.calendar import TradingCalendar
|
|
73
|
+
from ml4t.diagnostic.splitters.calendar_config import CalendarConfig
|
|
74
|
+
|
|
75
|
+
# Create calendar configuration
|
|
76
|
+
config = CalendarConfig(exchange=calendar, timezone=timezone, localize_naive=True)
|
|
77
|
+
|
|
78
|
+
# Get trading calendar
|
|
79
|
+
trading_calendar = TradingCalendar(config)
|
|
80
|
+
|
|
81
|
+
# Assign sessions (vectorized, fast)
|
|
82
|
+
sessions = trading_calendar.get_sessions(df.index)
|
|
83
|
+
|
|
84
|
+
# Add as column (copy to avoid modifying original)
|
|
85
|
+
result = df.copy()
|
|
86
|
+
result[session_column] = sessions
|
|
87
|
+
|
|
88
|
+
return result
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_complete_sessions(
|
|
92
|
+
df: pd.DataFrame, session_column: str = "session_date", min_samples: int = 100
|
|
93
|
+
) -> pd.Series:
|
|
94
|
+
"""Get list of complete sessions with sufficient data.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
df : pd.DataFrame
|
|
99
|
+
DataFrame with session_date column
|
|
100
|
+
session_column : str, default='session_date'
|
|
101
|
+
Name of the column containing session dates
|
|
102
|
+
min_samples : int, default=100
|
|
103
|
+
Minimum samples per session to consider complete
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
pd.Series
|
|
108
|
+
Session dates that are complete (have >= min_samples)
|
|
109
|
+
|
|
110
|
+
Examples
|
|
111
|
+
--------
|
|
112
|
+
>>> df = assign_session_dates(df)
|
|
113
|
+
>>> complete = get_complete_sessions(df, min_samples=500)
|
|
114
|
+
>>> df_clean = df[df['session_date'].isin(complete)]
|
|
115
|
+
"""
|
|
116
|
+
if session_column not in df.columns:
|
|
117
|
+
raise ValueError(
|
|
118
|
+
f"DataFrame does not have '{session_column}' column. Run assign_session_dates() first."
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Count samples per session
|
|
122
|
+
session_counts = df.groupby(session_column).size()
|
|
123
|
+
|
|
124
|
+
# Filter to complete sessions
|
|
125
|
+
complete_sessions = session_counts[session_counts >= min_samples].index
|
|
126
|
+
|
|
127
|
+
return complete_sessions.to_series(name=session_column)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Data validation utilities for ML4T Diagnostic inputs.
|
|
2
|
+
|
|
3
|
+
Provides comprehensive validation for DataFrames, time series, returns,
|
|
4
|
+
and other common financial data inputs.
|
|
5
|
+
|
|
6
|
+
Examples:
|
|
7
|
+
>>> from ml4t.diagnostic.validation import validate_dataframe, validate_returns
|
|
8
|
+
>>>
|
|
9
|
+
>>> # Validate DataFrame structure
|
|
10
|
+
>>> validate_dataframe(
|
|
11
|
+
... df,
|
|
12
|
+
... required_columns=["close", "volume"],
|
|
13
|
+
... numeric_columns=["close", "volume"]
|
|
14
|
+
... )
|
|
15
|
+
>>>
|
|
16
|
+
>>> # Validate returns series
|
|
17
|
+
>>> validate_returns(returns, allow_nulls=False, bounds=(-0.5, 0.5))
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from ml4t.diagnostic.validation.dataframe import (
|
|
21
|
+
DataFrameValidator,
|
|
22
|
+
ValidationError,
|
|
23
|
+
validate_dataframe,
|
|
24
|
+
validate_schema,
|
|
25
|
+
)
|
|
26
|
+
from ml4t.diagnostic.validation.returns import (
|
|
27
|
+
ReturnsValidator,
|
|
28
|
+
validate_bounds,
|
|
29
|
+
validate_returns,
|
|
30
|
+
)
|
|
31
|
+
from ml4t.diagnostic.validation.timeseries import (
|
|
32
|
+
TimeSeriesValidator,
|
|
33
|
+
validate_frequency,
|
|
34
|
+
validate_index,
|
|
35
|
+
validate_timeseries,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
# Core
|
|
40
|
+
"ValidationError",
|
|
41
|
+
# DataFrame validation
|
|
42
|
+
"DataFrameValidator",
|
|
43
|
+
"validate_dataframe",
|
|
44
|
+
"validate_schema",
|
|
45
|
+
# Time series validation
|
|
46
|
+
"TimeSeriesValidator",
|
|
47
|
+
"validate_timeseries",
|
|
48
|
+
"validate_index",
|
|
49
|
+
"validate_frequency",
|
|
50
|
+
# Returns validation
|
|
51
|
+
"ReturnsValidator",
|
|
52
|
+
"validate_returns",
|
|
53
|
+
"validate_bounds",
|
|
54
|
+
]
|