ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
ml4t/diagnostic/AGENT.md
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# ml4t.diagnostic - Package Index
|
|
2
|
+
|
|
3
|
+
## Public API
|
|
4
|
+
|
|
5
|
+
`analyze_signal`, `ValidatedCrossValidation`, `Evaluator`, `BarrierAnalysis`
|
|
6
|
+
|
|
7
|
+
## Subpackages
|
|
8
|
+
|
|
9
|
+
| Package | Purpose |
|
|
10
|
+
|---------|---------|
|
|
11
|
+
| [signal/](signal/AGENT.md) | Factor signal analysis |
|
|
12
|
+
| [splitters/](splitters/AGENT.md) | Cross-validation |
|
|
13
|
+
| [evaluation/](evaluation/AGENT.md) | Analysis framework |
|
|
14
|
+
| [visualization/](visualization/AGENT.md) | Plotly charts |
|
|
15
|
+
| [results/](results/AGENT.md) | Result dataclasses |
|
|
16
|
+
| [config/](config/AGENT.md) | Pydantic configuration |
|
|
17
|
+
|
|
18
|
+
## Large Files (>500 lines)
|
|
19
|
+
|
|
20
|
+
| File | Lines |
|
|
21
|
+
|------|-------|
|
|
22
|
+
| evaluation/stats/dsr.py | 1415 |
|
|
23
|
+
| splitters/combinatorial.py | 1392 |
|
|
24
|
+
| visualization/report_generation.py | 1343 |
|
|
25
|
+
| evaluation/barrier_analysis.py | 1050 |
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""ml4t-diagnostic - A hierarchical framework for financial time-series validation.
|
|
2
|
+
|
|
3
|
+
ml4t-diagnostic provides rigorous validation tools for financial machine learning models,
|
|
4
|
+
implementing a Four-Tier Validation Framework to combat data leakage, backtest
|
|
5
|
+
overfitting, and statistical fallacies.
|
|
6
|
+
|
|
7
|
+
Main Features
|
|
8
|
+
-------------
|
|
9
|
+
- **Cross-Validation**: CPCV, Purged Walk-Forward with proper embargo/purging
|
|
10
|
+
- **Statistical Validity**: DSR, RAS, FDR corrections for multiple testing
|
|
11
|
+
- **Feature Analysis**: IC, importance (MDI/PFI/MDA/SHAP), interactions
|
|
12
|
+
- **Trade Diagnostics**: SHAP-based error pattern analysis
|
|
13
|
+
- **Data Quality**: Integration contracts with ml4t-data
|
|
14
|
+
|
|
15
|
+
Quick Start
|
|
16
|
+
-----------
|
|
17
|
+
>>> from ml4t.diagnostic import ValidatedCrossValidation
|
|
18
|
+
>>> from ml4t.diagnostic.splitters import CombinatorialPurgedCV
|
|
19
|
+
>>>
|
|
20
|
+
>>> # One-step validated cross-validation
|
|
21
|
+
>>> vcv = ValidatedCrossValidation(n_splits=10)
|
|
22
|
+
>>> result = vcv.fit_validate(model, X, y, times)
|
|
23
|
+
>>> if result.is_significant:
|
|
24
|
+
... print(f"Sharpe: {result.sharpe:.2f}, DSR p-value: {result.dsr_pvalue:.4f}")
|
|
25
|
+
|
|
26
|
+
API Stability
|
|
27
|
+
-------------
|
|
28
|
+
This library follows semantic versioning. The public API consists of all symbols
|
|
29
|
+
exported in __all__. Breaking changes will only occur in major version bumps.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
__version__ = "0.1.0a1"
|
|
33
|
+
|
|
34
|
+
# Sub-modules for advanced usage
|
|
35
|
+
from . import backends, caching, config, core, evaluation, integration, logging, signal, splitters
|
|
36
|
+
|
|
37
|
+
# Configuration classes
|
|
38
|
+
from .config import (
|
|
39
|
+
BarrierConfig,
|
|
40
|
+
DiagnosticConfig,
|
|
41
|
+
EventConfig,
|
|
42
|
+
PortfolioConfig,
|
|
43
|
+
ReportConfig,
|
|
44
|
+
RuntimeConfig,
|
|
45
|
+
SignalConfig,
|
|
46
|
+
StatisticalConfig,
|
|
47
|
+
TradeConfig,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Main evaluation framework
|
|
51
|
+
from .evaluation import BarrierAnalysis, EvaluationResult, Evaluator
|
|
52
|
+
|
|
53
|
+
# ValidatedCrossValidation - combines CPCV + DSR in one step
|
|
54
|
+
from .evaluation.validated_cv import ValidatedCrossValidation
|
|
55
|
+
|
|
56
|
+
# Data quality integration
|
|
57
|
+
from .integration.data_contract import (
|
|
58
|
+
AnomalyType,
|
|
59
|
+
DataAnomaly,
|
|
60
|
+
DataQualityMetrics,
|
|
61
|
+
DataQualityReport,
|
|
62
|
+
DataValidationRequest,
|
|
63
|
+
Severity,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Signal analysis (new clean API)
|
|
67
|
+
from .signal import SignalResult, analyze_signal
|
|
68
|
+
|
|
69
|
+
# Visualization (optional - may fail if plotly not installed)
|
|
70
|
+
try:
|
|
71
|
+
from .visualization import (
|
|
72
|
+
plot_hit_rate_heatmap,
|
|
73
|
+
plot_precision_recall_curve,
|
|
74
|
+
plot_profit_factor_bar,
|
|
75
|
+
plot_time_to_target_box,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
_VIZ_AVAILABLE = True
|
|
79
|
+
except ImportError:
|
|
80
|
+
_VIZ_AVAILABLE = False
|
|
81
|
+
plot_hit_rate_heatmap = None
|
|
82
|
+
plot_precision_recall_curve = None
|
|
83
|
+
plot_profit_factor_bar = None
|
|
84
|
+
plot_time_to_target_box = None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_agent_docs() -> dict[str, str]:
|
|
88
|
+
"""Get AGENT.md documentation for AI agent navigation.
|
|
89
|
+
|
|
90
|
+
Returns a dictionary mapping relative paths to AGENT.md content.
|
|
91
|
+
Useful for AI agents to understand the library structure.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
dict[str, str]
|
|
96
|
+
Mapping of relative path to AGENT.md content.
|
|
97
|
+
|
|
98
|
+
Example
|
|
99
|
+
-------
|
|
100
|
+
>>> docs = get_agent_docs()
|
|
101
|
+
>>> print(docs.keys())
|
|
102
|
+
dict_keys(['AGENT.md', 'signal/AGENT.md', 'splitters/AGENT.md', ...])
|
|
103
|
+
"""
|
|
104
|
+
from pathlib import Path
|
|
105
|
+
|
|
106
|
+
package_dir = Path(__file__).parent
|
|
107
|
+
agent_docs = {}
|
|
108
|
+
|
|
109
|
+
# Find all AGENT.md files
|
|
110
|
+
for agent_file in package_dir.rglob("AGENT.md"):
|
|
111
|
+
rel_path = agent_file.relative_to(package_dir)
|
|
112
|
+
try:
|
|
113
|
+
agent_docs[str(rel_path)] = agent_file.read_text()
|
|
114
|
+
except OSError:
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
return agent_docs
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
__all__ = [
|
|
121
|
+
# Version
|
|
122
|
+
"__version__",
|
|
123
|
+
# Agent Navigation
|
|
124
|
+
"get_agent_docs",
|
|
125
|
+
# Core Framework
|
|
126
|
+
"Evaluator",
|
|
127
|
+
"EvaluationResult",
|
|
128
|
+
"ValidatedCrossValidation",
|
|
129
|
+
# Signal Analysis (new clean API)
|
|
130
|
+
"analyze_signal",
|
|
131
|
+
"SignalResult",
|
|
132
|
+
# Barrier Analysis
|
|
133
|
+
"BarrierAnalysis",
|
|
134
|
+
# Configuration (10 primary configs)
|
|
135
|
+
"DiagnosticConfig",
|
|
136
|
+
"StatisticalConfig",
|
|
137
|
+
"PortfolioConfig",
|
|
138
|
+
"TradeConfig",
|
|
139
|
+
"SignalConfig",
|
|
140
|
+
"EventConfig",
|
|
141
|
+
"BarrierConfig",
|
|
142
|
+
"ReportConfig",
|
|
143
|
+
"RuntimeConfig",
|
|
144
|
+
# Data Quality Integration
|
|
145
|
+
"DataQualityReport",
|
|
146
|
+
"DataQualityMetrics",
|
|
147
|
+
"DataAnomaly",
|
|
148
|
+
"DataValidationRequest",
|
|
149
|
+
"AnomalyType",
|
|
150
|
+
"Severity",
|
|
151
|
+
# Visualization (optional)
|
|
152
|
+
"plot_hit_rate_heatmap",
|
|
153
|
+
"plot_profit_factor_bar",
|
|
154
|
+
"plot_precision_recall_curve",
|
|
155
|
+
"plot_time_to_target_box",
|
|
156
|
+
# Sub-modules
|
|
157
|
+
"backends",
|
|
158
|
+
"caching",
|
|
159
|
+
"config",
|
|
160
|
+
"core",
|
|
161
|
+
"evaluation",
|
|
162
|
+
"integration",
|
|
163
|
+
"logging",
|
|
164
|
+
"signal",
|
|
165
|
+
"splitters",
|
|
166
|
+
]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Backend adapters for DataFrame compatibility.
|
|
2
|
+
|
|
3
|
+
This module provides adapters to seamlessly work with both Polars (internal)
|
|
4
|
+
and Pandas (compatibility) DataFrames.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from ml4t.diagnostic.backends.adapter import DataFrameAdapter
|
|
8
|
+
from ml4t.diagnostic.backends.polars_backend import PolarsBackend
|
|
9
|
+
|
|
10
|
+
__all__ = ["DataFrameAdapter", "PolarsBackend"]
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Adapter layer for seamless DataFrame conversion between Polars and Pandas.
|
|
2
|
+
|
|
3
|
+
This module provides utilities to convert between different DataFrame representations.
|
|
4
|
+
The internal implementation uses Polars for performance, but the adapter ensures
|
|
5
|
+
compatibility with Pandas-based workflows.
|
|
6
|
+
|
|
7
|
+
Note: MultiIndex preservation/restoration has been removed as it was unused.
|
|
8
|
+
If you need MultiIndex support, use pandas directly or convert after receiving
|
|
9
|
+
the Polars DataFrame.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import TYPE_CHECKING, Any, Union
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import polars as pl
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from numpy.typing import NDArray
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DataFrameAdapter:
|
|
23
|
+
"""Adapter for converting between Polars and Pandas DataFrames.
|
|
24
|
+
|
|
25
|
+
This class handles conversions between different DataFrame representations.
|
|
26
|
+
It's designed to be used internally by ml4t-diagnostic to ensure consistent
|
|
27
|
+
behavior regardless of the input format.
|
|
28
|
+
|
|
29
|
+
Methods
|
|
30
|
+
-------
|
|
31
|
+
to_polars(data, columns=None)
|
|
32
|
+
Convert input data to Polars DataFrame.
|
|
33
|
+
to_numpy(data)
|
|
34
|
+
Convert any supported data type to numpy array.
|
|
35
|
+
get_shape(data)
|
|
36
|
+
Get the shape of the data regardless of type.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def to_polars(
|
|
41
|
+
data: Union[pl.DataFrame, pd.DataFrame, "NDArray[Any]"],
|
|
42
|
+
columns: list[str] | None = None,
|
|
43
|
+
) -> tuple[pl.DataFrame, None]:
|
|
44
|
+
"""Convert input data to Polars DataFrame.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
data : polars.DataFrame, pandas.DataFrame, or numpy.ndarray
|
|
49
|
+
The input data to convert.
|
|
50
|
+
columns : list of str, optional
|
|
51
|
+
Column names to use if data is a numpy array.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
df : polars.DataFrame
|
|
56
|
+
The data as a Polars DataFrame.
|
|
57
|
+
index : None
|
|
58
|
+
Always None. Kept for backward compatibility with existing code
|
|
59
|
+
that unpacks the tuple return value.
|
|
60
|
+
|
|
61
|
+
Raises
|
|
62
|
+
------
|
|
63
|
+
TypeError
|
|
64
|
+
If the input type is not supported.
|
|
65
|
+
ValueError
|
|
66
|
+
If columns are needed but not provided.
|
|
67
|
+
|
|
68
|
+
Examples
|
|
69
|
+
--------
|
|
70
|
+
>>> import pandas as pd
|
|
71
|
+
>>> import polars as pl
|
|
72
|
+
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
|
|
73
|
+
>>> df_pl, _ = DataFrameAdapter.to_polars(df_pd)
|
|
74
|
+
>>> isinstance(df_pl, pl.DataFrame)
|
|
75
|
+
True
|
|
76
|
+
"""
|
|
77
|
+
if isinstance(data, pl.DataFrame):
|
|
78
|
+
return data, None
|
|
79
|
+
|
|
80
|
+
if isinstance(data, pd.DataFrame):
|
|
81
|
+
# Reset index to columns if it's not a default RangeIndex
|
|
82
|
+
index = data.index
|
|
83
|
+
if isinstance(index, pd.MultiIndex):
|
|
84
|
+
# For MultiIndex, reset to columns
|
|
85
|
+
df_reset = data.reset_index(drop=False)
|
|
86
|
+
return pl.from_pandas(df_reset), None
|
|
87
|
+
elif not isinstance(index, pd.RangeIndex) or index.start != 0 or index.step != 1:
|
|
88
|
+
# Custom index - reset to column
|
|
89
|
+
df_reset = data.reset_index(drop=False)
|
|
90
|
+
return pl.from_pandas(df_reset), None
|
|
91
|
+
else:
|
|
92
|
+
# Default RangeIndex
|
|
93
|
+
return pl.from_pandas(data), None
|
|
94
|
+
|
|
95
|
+
if isinstance(data, np.ndarray):
|
|
96
|
+
if data.ndim == 1:
|
|
97
|
+
# 1D array, treat as single column
|
|
98
|
+
if columns is None:
|
|
99
|
+
columns = ["column_0"]
|
|
100
|
+
elif len(columns) != 1:
|
|
101
|
+
raise ValueError(
|
|
102
|
+
f"1D array requires exactly 1 column name, got {len(columns)}",
|
|
103
|
+
)
|
|
104
|
+
return pl.DataFrame({columns[0]: data}), None
|
|
105
|
+
|
|
106
|
+
if data.ndim == 2:
|
|
107
|
+
# 2D array
|
|
108
|
+
if columns is None:
|
|
109
|
+
columns = [f"column_{i}" for i in range(data.shape[1])]
|
|
110
|
+
elif len(columns) != data.shape[1]:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
f"Number of columns ({len(columns)}) doesn't match array shape ({data.shape[1]})",
|
|
113
|
+
)
|
|
114
|
+
return pl.DataFrame(data, schema=columns), None
|
|
115
|
+
|
|
116
|
+
raise ValueError(f"Arrays must be 1D or 2D, got {data.ndim}D")
|
|
117
|
+
|
|
118
|
+
raise TypeError(
|
|
119
|
+
f"Data must be a Polars DataFrame, Pandas DataFrame, or numpy array. "
|
|
120
|
+
f"Got {type(data).__name__}",
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def to_numpy(
|
|
125
|
+
data: Union[pl.DataFrame, pl.Series, pd.DataFrame, pd.Series, "NDArray[Any]"],
|
|
126
|
+
) -> "NDArray[Any]":
|
|
127
|
+
"""Convert any supported data type to numpy array.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
data : polars.DataFrame/Series, pandas.DataFrame/Series, or numpy.ndarray
|
|
132
|
+
The data to convert.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
array : numpy.ndarray
|
|
137
|
+
The data as a numpy array.
|
|
138
|
+
|
|
139
|
+
Raises
|
|
140
|
+
------
|
|
141
|
+
TypeError
|
|
142
|
+
If the input type is not supported.
|
|
143
|
+
|
|
144
|
+
Examples
|
|
145
|
+
--------
|
|
146
|
+
>>> import polars as pl
|
|
147
|
+
>>> s = pl.Series([1, 2, 3])
|
|
148
|
+
>>> arr = DataFrameAdapter.to_numpy(s)
|
|
149
|
+
>>> arr.tolist()
|
|
150
|
+
[1, 2, 3]
|
|
151
|
+
"""
|
|
152
|
+
if isinstance(data, np.ndarray):
|
|
153
|
+
return data
|
|
154
|
+
if isinstance(data, pl.DataFrame | pl.Series | pd.DataFrame | pd.Series):
|
|
155
|
+
return data.to_numpy()
|
|
156
|
+
raise TypeError(f"Cannot convert {type(data).__name__} to numpy array")
|
|
157
|
+
|
|
158
|
+
@staticmethod
|
|
159
|
+
def get_shape(
|
|
160
|
+
data: Union[pl.DataFrame, pd.DataFrame, "NDArray[Any]"],
|
|
161
|
+
) -> tuple[int, int]:
|
|
162
|
+
"""Get the shape of the data regardless of type.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
data : polars.DataFrame, pandas.DataFrame, or numpy.ndarray
|
|
167
|
+
The data to get the shape from.
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
shape : tuple of int
|
|
172
|
+
(n_rows, n_cols) for 2D data, (n_rows, 1) for 1D data.
|
|
173
|
+
|
|
174
|
+
Raises
|
|
175
|
+
------
|
|
176
|
+
TypeError
|
|
177
|
+
If the input type is not supported.
|
|
178
|
+
|
|
179
|
+
Examples
|
|
180
|
+
--------
|
|
181
|
+
>>> import numpy as np
|
|
182
|
+
>>> arr = np.array([1, 2, 3])
|
|
183
|
+
>>> DataFrameAdapter.get_shape(arr)
|
|
184
|
+
(3, 1)
|
|
185
|
+
"""
|
|
186
|
+
if isinstance(data, pl.DataFrame | pd.DataFrame):
|
|
187
|
+
return data.shape
|
|
188
|
+
if isinstance(data, np.ndarray):
|
|
189
|
+
if data.ndim == 1:
|
|
190
|
+
return (data.shape[0], 1)
|
|
191
|
+
return data.shape
|
|
192
|
+
raise TypeError(f"Cannot get shape of {type(data).__name__}")
|