ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
"""Result schemas for Enhanced Sharpe Ratio Framework.
|
|
2
|
+
|
|
3
|
+
Implements proper statistical evaluation of Sharpe ratios following
|
|
4
|
+
López de Prado, Lipton & Zoonekynd (2025) "How to Use the Sharpe Ratio".
|
|
5
|
+
|
|
6
|
+
Includes:
|
|
7
|
+
- Probabilistic Sharpe Ratio (PSR)
|
|
8
|
+
- Minimum Track Record Length (MinTRL)
|
|
9
|
+
- Deflated Sharpe Ratio (DSR)
|
|
10
|
+
- Bayesian False Discovery Rate (FDR)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import polars as pl
|
|
16
|
+
from pydantic import Field
|
|
17
|
+
|
|
18
|
+
from ml4t.diagnostic.results.base import BaseResult
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PSRResult(BaseResult):
|
|
22
|
+
"""Probabilistic Sharpe Ratio (PSR) results.
|
|
23
|
+
|
|
24
|
+
PSR accounts for non-normality and sample length to compute the
|
|
25
|
+
probability that the true Sharpe ratio exceeds a target.
|
|
26
|
+
|
|
27
|
+
Reference: López de Prado et al. (2025), Equation 9
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
observed_sharpe: Observed Sharpe ratio from returns
|
|
31
|
+
target_sharpe: Target Sharpe ratio to exceed
|
|
32
|
+
psr_value: Probability that true SR > target
|
|
33
|
+
confidence_level: Confidence level used (typically 0.95)
|
|
34
|
+
skewness: Return skewness (affects PSR)
|
|
35
|
+
kurtosis: Return excess kurtosis (affects PSR)
|
|
36
|
+
n_observations: Sample size
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
analysis_type: str = "probabilistic_sharpe_ratio"
|
|
40
|
+
|
|
41
|
+
observed_sharpe: float = Field(..., description="Observed Sharpe ratio")
|
|
42
|
+
target_sharpe: float = Field(..., description="Target Sharpe to exceed")
|
|
43
|
+
psr_value: float = Field(..., ge=0.0, le=1.0, description="P(true SR > target)")
|
|
44
|
+
confidence_level: float = Field(default=0.95, description="Confidence level")
|
|
45
|
+
|
|
46
|
+
# Distribution parameters
|
|
47
|
+
skewness: float = Field(..., description="Return skewness")
|
|
48
|
+
kurtosis: float = Field(..., description="Excess kurtosis")
|
|
49
|
+
n_observations: int = Field(..., gt=0, description="Sample size")
|
|
50
|
+
|
|
51
|
+
def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
|
|
52
|
+
"""Get PSR results as single-row DataFrame.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
DataFrame with PSR statistics
|
|
56
|
+
"""
|
|
57
|
+
data = {
|
|
58
|
+
"observed_sharpe": [self.observed_sharpe],
|
|
59
|
+
"target_sharpe": [self.target_sharpe],
|
|
60
|
+
"psr": [self.psr_value],
|
|
61
|
+
"confidence_level": [self.confidence_level],
|
|
62
|
+
"skewness": [self.skewness],
|
|
63
|
+
"kurtosis": [self.kurtosis],
|
|
64
|
+
"n_obs": [self.n_observations],
|
|
65
|
+
}
|
|
66
|
+
return pl.DataFrame(data)
|
|
67
|
+
|
|
68
|
+
def summary(self) -> str:
|
|
69
|
+
"""Human-readable summary of PSR analysis."""
|
|
70
|
+
lines = ["Probabilistic Sharpe Ratio (PSR)", "=" * 40]
|
|
71
|
+
lines.append(f"Observed Sharpe: {self.observed_sharpe:.3f}")
|
|
72
|
+
lines.append(f"Target Sharpe: {self.target_sharpe:.3f}")
|
|
73
|
+
lines.append(f"PSR: {self.psr_value:.1%}")
|
|
74
|
+
lines.append("")
|
|
75
|
+
lines.append(f"Sample size: {self.n_observations}")
|
|
76
|
+
lines.append(f"Skewness: {self.skewness:.3f}")
|
|
77
|
+
lines.append(f"Kurtosis: {self.kurtosis:.3f}")
|
|
78
|
+
lines.append("")
|
|
79
|
+
|
|
80
|
+
# Interpretation
|
|
81
|
+
for interp in self.interpret():
|
|
82
|
+
lines.append(interp)
|
|
83
|
+
|
|
84
|
+
return "\n".join(lines)
|
|
85
|
+
|
|
86
|
+
def interpret(self) -> list[str]:
|
|
87
|
+
"""Get interpretation of PSR results.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
List of interpretation strings
|
|
91
|
+
"""
|
|
92
|
+
if self.psr_value >= 0.95:
|
|
93
|
+
return ["Conclusion: High confidence that SR exceeds target"]
|
|
94
|
+
elif self.psr_value >= 0.80:
|
|
95
|
+
return ["Conclusion: Moderate confidence that SR exceeds target"]
|
|
96
|
+
elif self.psr_value >= 0.50:
|
|
97
|
+
return ["Conclusion: Weak evidence that SR exceeds target"]
|
|
98
|
+
else:
|
|
99
|
+
return ["Conclusion: Unlikely that SR exceeds target"]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class MinTRLResult(BaseResult):
|
|
103
|
+
"""Minimum Track Record Length (MinTRL) results.
|
|
104
|
+
|
|
105
|
+
MinTRL computes the minimum sample size needed to reject the null
|
|
106
|
+
hypothesis that true Sharpe <= target Sharpe at a given confidence level.
|
|
107
|
+
|
|
108
|
+
Reference: López de Prado et al. (2025), Equation 11
|
|
109
|
+
|
|
110
|
+
Attributes:
|
|
111
|
+
observed_sharpe: Observed Sharpe ratio
|
|
112
|
+
target_sharpe: Target Sharpe ratio
|
|
113
|
+
min_trl_days: Minimum track record length (days) needed
|
|
114
|
+
actual_days: Actual sample size (days)
|
|
115
|
+
is_sufficient: Whether actual >= min_trl
|
|
116
|
+
confidence_level: Confidence level (typically 0.95)
|
|
117
|
+
skewness: Return skewness
|
|
118
|
+
kurtosis: Return excess kurtosis
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
analysis_type: str = "minimum_track_record_length"
|
|
122
|
+
|
|
123
|
+
observed_sharpe: float = Field(..., description="Observed Sharpe ratio")
|
|
124
|
+
target_sharpe: float = Field(..., description="Target Sharpe ratio")
|
|
125
|
+
|
|
126
|
+
min_trl_days: int = Field(..., gt=0, description="Minimum TRL in days")
|
|
127
|
+
actual_days: int = Field(..., gt=0, description="Actual sample size in days")
|
|
128
|
+
is_sufficient: bool = Field(..., description="Whether sample is sufficient")
|
|
129
|
+
|
|
130
|
+
confidence_level: float = Field(default=0.95, description="Confidence level")
|
|
131
|
+
skewness: float = Field(..., description="Return skewness")
|
|
132
|
+
kurtosis: float = Field(..., description="Excess kurtosis")
|
|
133
|
+
|
|
134
|
+
def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
|
|
135
|
+
"""Get MinTRL results as single-row DataFrame.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
DataFrame with MinTRL statistics
|
|
139
|
+
"""
|
|
140
|
+
data = {
|
|
141
|
+
"observed_sharpe": [self.observed_sharpe],
|
|
142
|
+
"target_sharpe": [self.target_sharpe],
|
|
143
|
+
"min_trl_days": [self.min_trl_days],
|
|
144
|
+
"actual_days": [self.actual_days],
|
|
145
|
+
"is_sufficient": [self.is_sufficient],
|
|
146
|
+
"confidence_level": [self.confidence_level],
|
|
147
|
+
}
|
|
148
|
+
return pl.DataFrame(data)
|
|
149
|
+
|
|
150
|
+
def summary(self) -> str:
|
|
151
|
+
"""Human-readable summary of MinTRL analysis."""
|
|
152
|
+
lines = ["Minimum Track Record Length (MinTRL)", "=" * 40]
|
|
153
|
+
lines.append(f"Observed Sharpe: {self.observed_sharpe:.3f}")
|
|
154
|
+
lines.append(f"Target Sharpe: {self.target_sharpe:.3f}")
|
|
155
|
+
lines.append("")
|
|
156
|
+
lines.append(f"Minimum TRL: {self.min_trl_days} days")
|
|
157
|
+
lines.append(f"Actual sample: {self.actual_days} days")
|
|
158
|
+
lines.append(f"Sufficient: {'Yes' if self.is_sufficient else 'No'}")
|
|
159
|
+
lines.append("")
|
|
160
|
+
|
|
161
|
+
# Interpretation
|
|
162
|
+
if self.is_sufficient:
|
|
163
|
+
lines.append("Conclusion: Sample size is adequate")
|
|
164
|
+
else:
|
|
165
|
+
shortfall = self.min_trl_days - self.actual_days
|
|
166
|
+
lines.append(
|
|
167
|
+
f"Conclusion: Need {shortfall} more days of data ({shortfall / 252:.1f} years)"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return "\n".join(lines)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class DSRResult(BaseResult):
|
|
174
|
+
"""Deflated Sharpe Ratio (DSR) results.
|
|
175
|
+
|
|
176
|
+
DSR adjusts for backtest overfitting by correcting for multiple testing.
|
|
177
|
+
Accounts for number of trials and variance across trials.
|
|
178
|
+
|
|
179
|
+
Reference: Bailey & López de Prado (2014), López de Prado et al. (2025)
|
|
180
|
+
|
|
181
|
+
Attributes:
|
|
182
|
+
observed_sharpe: Observed Sharpe ratio
|
|
183
|
+
dsr_value: Deflated Sharpe ratio (adjusted for multiple testing)
|
|
184
|
+
adjusted_pvalue: P-value adjusted for FWER control
|
|
185
|
+
is_significant: Whether DSR is significant at alpha level
|
|
186
|
+
n_trials: Number of trials/strategies tested
|
|
187
|
+
variance_trials: Variance of Sharpe ratios across trials
|
|
188
|
+
alpha: Significance level (typically 0.05)
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
analysis_type: str = "deflated_sharpe_ratio"
|
|
192
|
+
|
|
193
|
+
observed_sharpe: float = Field(..., description="Observed Sharpe ratio")
|
|
194
|
+
dsr_value: float = Field(..., description="Deflated Sharpe ratio")
|
|
195
|
+
adjusted_pvalue: float = Field(..., ge=0.0, le=1.0, description="FWER-adjusted p-value")
|
|
196
|
+
is_significant: bool = Field(..., description="Is significant at alpha")
|
|
197
|
+
|
|
198
|
+
n_trials: int = Field(..., gt=0, description="Number of trials tested")
|
|
199
|
+
variance_trials: float = Field(..., ge=0.0, description="Variance of Sharpe across trials")
|
|
200
|
+
alpha: float = Field(default=0.05, description="Significance level")
|
|
201
|
+
|
|
202
|
+
def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
|
|
203
|
+
"""Get DSR results as single-row DataFrame.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
DataFrame with DSR statistics
|
|
207
|
+
"""
|
|
208
|
+
data = {
|
|
209
|
+
"observed_sharpe": [self.observed_sharpe],
|
|
210
|
+
"dsr": [self.dsr_value],
|
|
211
|
+
"adjusted_pvalue": [self.adjusted_pvalue],
|
|
212
|
+
"is_significant": [self.is_significant],
|
|
213
|
+
"n_trials": [self.n_trials],
|
|
214
|
+
"variance_trials": [self.variance_trials],
|
|
215
|
+
"alpha": [self.alpha],
|
|
216
|
+
}
|
|
217
|
+
return pl.DataFrame(data)
|
|
218
|
+
|
|
219
|
+
def summary(self) -> str:
|
|
220
|
+
"""Human-readable summary of DSR analysis."""
|
|
221
|
+
lines = ["Deflated Sharpe Ratio (DSR)", "=" * 40]
|
|
222
|
+
lines.append(f"Observed Sharpe: {self.observed_sharpe:.3f}")
|
|
223
|
+
lines.append(f"Deflated Sharpe: {self.dsr_value:.3f}")
|
|
224
|
+
lines.append(f"Adjusted p-value: {self.adjusted_pvalue:.4f}")
|
|
225
|
+
lines.append("")
|
|
226
|
+
lines.append(f"Number of trials: {self.n_trials}")
|
|
227
|
+
lines.append(f"Trial variance: {self.variance_trials:.4f}")
|
|
228
|
+
lines.append(f"Significance level: {self.alpha}")
|
|
229
|
+
lines.append("")
|
|
230
|
+
|
|
231
|
+
# Interpretation
|
|
232
|
+
for interp in self.interpret():
|
|
233
|
+
lines.append(interp)
|
|
234
|
+
|
|
235
|
+
return "\n".join(lines)
|
|
236
|
+
|
|
237
|
+
def interpret(self) -> list[str]:
|
|
238
|
+
"""Get interpretation of DSR results.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
List of interpretation strings with recommendations
|
|
242
|
+
"""
|
|
243
|
+
interpretations = []
|
|
244
|
+
if self.is_significant:
|
|
245
|
+
interpretations.append("Conclusion: Significant after multiple testing correction")
|
|
246
|
+
if self.observed_sharpe > 1.0:
|
|
247
|
+
interpretations.append(
|
|
248
|
+
"Recommendation: Strong evidence of skill, proceed to paper trading"
|
|
249
|
+
)
|
|
250
|
+
else:
|
|
251
|
+
interpretations.append(
|
|
252
|
+
"Recommendation: Modest but real edge, investigate improvements"
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
interpretations.append("Conclusion: Not significant (likely due to overfitting)")
|
|
256
|
+
interpretations.append(
|
|
257
|
+
"Recommendation: Revisit feature selection or reduce strategy complexity"
|
|
258
|
+
)
|
|
259
|
+
return interpretations
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class FDRResult(BaseResult):
|
|
263
|
+
"""Bayesian False Discovery Rate (FDR) results.
|
|
264
|
+
|
|
265
|
+
Computes probability of no skill given observed data using Bayesian inference.
|
|
266
|
+
More intuitive than p-values!
|
|
267
|
+
|
|
268
|
+
Reference: López de Prado et al. (2025), Equations 21 & 24
|
|
269
|
+
|
|
270
|
+
Attributes:
|
|
271
|
+
observed_sharpe: Observed Sharpe ratio
|
|
272
|
+
null_sharpe: Sharpe under H0 (no skill)
|
|
273
|
+
alternative_sharpe: Sharpe under H1 (has skill)
|
|
274
|
+
prior_h0: Prior probability of no skill
|
|
275
|
+
ofdr: Observed FDR - P(H0 | data)
|
|
276
|
+
pfdr: Planned FDR - expected FDR at planning stage
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
analysis_type: str = "bayesian_fdr"
|
|
280
|
+
|
|
281
|
+
observed_sharpe: float = Field(..., description="Observed Sharpe ratio")
|
|
282
|
+
null_sharpe: float = Field(..., description="Sharpe under H0 (no skill)")
|
|
283
|
+
alternative_sharpe: float = Field(..., description="Sharpe under H1 (skill)")
|
|
284
|
+
|
|
285
|
+
prior_h0: float = Field(..., ge=0.0, le=1.0, description="Prior P(no skill)")
|
|
286
|
+
ofdr: float = Field(..., ge=0.0, le=1.0, description="Observed FDR: P(H0 | data)")
|
|
287
|
+
pfdr: float | None = Field(None, ge=0.0, le=1.0, description="Planned FDR (at design stage)")
|
|
288
|
+
|
|
289
|
+
def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
|
|
290
|
+
"""Get FDR results as single-row DataFrame.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
DataFrame with FDR statistics
|
|
294
|
+
"""
|
|
295
|
+
data = {
|
|
296
|
+
"observed_sharpe": [self.observed_sharpe],
|
|
297
|
+
"null_sharpe": [self.null_sharpe],
|
|
298
|
+
"alt_sharpe": [self.alternative_sharpe],
|
|
299
|
+
"prior_h0": [self.prior_h0],
|
|
300
|
+
"ofdr": [self.ofdr],
|
|
301
|
+
"pfdr": [self.pfdr],
|
|
302
|
+
}
|
|
303
|
+
return pl.DataFrame(data)
|
|
304
|
+
|
|
305
|
+
def summary(self) -> str:
|
|
306
|
+
"""Human-readable summary of FDR analysis."""
|
|
307
|
+
lines = ["Bayesian False Discovery Rate (FDR)", "=" * 40]
|
|
308
|
+
lines.append(f"Observed Sharpe: {self.observed_sharpe:.3f}")
|
|
309
|
+
lines.append(f"H0 Sharpe: {self.null_sharpe:.3f}")
|
|
310
|
+
lines.append(f"H1 Sharpe: {self.alternative_sharpe:.3f}")
|
|
311
|
+
lines.append("")
|
|
312
|
+
lines.append(f"Prior P(no skill): {self.prior_h0:.1%}")
|
|
313
|
+
lines.append(f"Observed FDR - P(no skill | data): {self.ofdr:.1%}")
|
|
314
|
+
if self.pfdr is not None:
|
|
315
|
+
lines.append(f"Planned FDR: {self.pfdr:.1%}")
|
|
316
|
+
lines.append("")
|
|
317
|
+
|
|
318
|
+
# Interpretation
|
|
319
|
+
if self.ofdr < 0.05:
|
|
320
|
+
lines.append("Conclusion: Strong evidence of skill")
|
|
321
|
+
elif self.ofdr < 0.20:
|
|
322
|
+
lines.append("Conclusion: Moderate evidence of skill")
|
|
323
|
+
elif self.ofdr < 0.50:
|
|
324
|
+
lines.append("Conclusion: Weak evidence of skill")
|
|
325
|
+
else:
|
|
326
|
+
lines.append("Conclusion: Likely no skill (false discovery)")
|
|
327
|
+
|
|
328
|
+
return "\n".join(lines)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class SharpeFrameworkResult(BaseResult):
|
|
332
|
+
"""Complete results from Enhanced Sharpe Framework.
|
|
333
|
+
|
|
334
|
+
Comprehensive Sharpe ratio evaluation including:
|
|
335
|
+
- PSR (non-normality adjustment)
|
|
336
|
+
- MinTRL (sample adequacy)
|
|
337
|
+
- DSR (multiple testing correction)
|
|
338
|
+
- FDR (Bayesian interpretation)
|
|
339
|
+
|
|
340
|
+
Attributes:
|
|
341
|
+
psr: Probabilistic Sharpe Ratio results
|
|
342
|
+
min_trl: Minimum Track Record Length results
|
|
343
|
+
dsr: Deflated Sharpe Ratio results
|
|
344
|
+
fdr_results: Bayesian FDR results
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
analysis_type: str = "sharpe_framework"
|
|
348
|
+
|
|
349
|
+
psr: PSRResult | None = Field(None, description="PSR analysis")
|
|
350
|
+
min_trl: MinTRLResult | None = Field(None, description="MinTRL analysis")
|
|
351
|
+
dsr: DSRResult | None = Field(None, description="DSR analysis")
|
|
352
|
+
fdr_results: FDRResult | None = Field(None, description="Bayesian FDR")
|
|
353
|
+
|
|
354
|
+
def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
|
|
355
|
+
"""Get framework results as DataFrame.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
name: 'psr', 'min_trl', 'dsr', or 'fdr'
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Requested DataFrame
|
|
362
|
+
"""
|
|
363
|
+
if name == "psr" and self.psr:
|
|
364
|
+
return self.psr.get_dataframe()
|
|
365
|
+
elif name == "min_trl" and self.min_trl:
|
|
366
|
+
return self.min_trl.get_dataframe()
|
|
367
|
+
elif name == "dsr" and self.dsr:
|
|
368
|
+
return self.dsr.get_dataframe()
|
|
369
|
+
elif name == "fdr" and self.fdr_results:
|
|
370
|
+
return self.fdr_results.get_dataframe()
|
|
371
|
+
else:
|
|
372
|
+
# Return combined summary
|
|
373
|
+
rows = []
|
|
374
|
+
if self.psr:
|
|
375
|
+
rows.append(
|
|
376
|
+
{
|
|
377
|
+
"metric": "PSR",
|
|
378
|
+
"value": self.psr.psr_value,
|
|
379
|
+
"sharpe": self.psr.observed_sharpe,
|
|
380
|
+
}
|
|
381
|
+
)
|
|
382
|
+
if self.min_trl:
|
|
383
|
+
rows.append(
|
|
384
|
+
{
|
|
385
|
+
"metric": "MinTRL",
|
|
386
|
+
"value": self.min_trl.min_trl_days,
|
|
387
|
+
"sharpe": self.min_trl.observed_sharpe,
|
|
388
|
+
}
|
|
389
|
+
)
|
|
390
|
+
if self.dsr:
|
|
391
|
+
rows.append(
|
|
392
|
+
{
|
|
393
|
+
"metric": "DSR",
|
|
394
|
+
"value": self.dsr.dsr_value,
|
|
395
|
+
"sharpe": self.dsr.observed_sharpe,
|
|
396
|
+
}
|
|
397
|
+
)
|
|
398
|
+
if self.fdr_results:
|
|
399
|
+
rows.append(
|
|
400
|
+
{
|
|
401
|
+
"metric": "oFDR",
|
|
402
|
+
"value": self.fdr_results.ofdr,
|
|
403
|
+
"sharpe": self.fdr_results.observed_sharpe,
|
|
404
|
+
}
|
|
405
|
+
)
|
|
406
|
+
return pl.DataFrame(rows) if rows else pl.DataFrame()
|
|
407
|
+
|
|
408
|
+
def summary(self) -> str:
|
|
409
|
+
"""Human-readable summary of complete framework analysis."""
|
|
410
|
+
lines = ["Enhanced Sharpe Framework Summary", "=" * 60]
|
|
411
|
+
|
|
412
|
+
if self.psr:
|
|
413
|
+
lines.append("")
|
|
414
|
+
lines.append(self.psr.summary())
|
|
415
|
+
|
|
416
|
+
if self.min_trl:
|
|
417
|
+
lines.append("")
|
|
418
|
+
lines.append(self.min_trl.summary())
|
|
419
|
+
|
|
420
|
+
if self.dsr:
|
|
421
|
+
lines.append("")
|
|
422
|
+
lines.append(self.dsr.summary())
|
|
423
|
+
|
|
424
|
+
if self.fdr_results:
|
|
425
|
+
lines.append("")
|
|
426
|
+
lines.append(self.fdr_results.summary())
|
|
427
|
+
|
|
428
|
+
# Overall conclusion
|
|
429
|
+
lines.append("")
|
|
430
|
+
lines.append("Overall Assessment")
|
|
431
|
+
lines.append("-" * 60)
|
|
432
|
+
|
|
433
|
+
checks = []
|
|
434
|
+
if self.psr and self.psr.psr_value >= 0.80:
|
|
435
|
+
checks.append("PSR: High probability of exceeding target")
|
|
436
|
+
if self.min_trl and self.min_trl.is_sufficient:
|
|
437
|
+
checks.append("MinTRL: Adequate sample size")
|
|
438
|
+
if self.dsr and self.dsr.is_significant:
|
|
439
|
+
checks.append("DSR: Significant after multiple testing")
|
|
440
|
+
if self.fdr_results and self.fdr_results.ofdr < 0.20:
|
|
441
|
+
checks.append("FDR: Low probability of false discovery")
|
|
442
|
+
|
|
443
|
+
if checks:
|
|
444
|
+
lines.extend(checks)
|
|
445
|
+
else:
|
|
446
|
+
lines.append("⚠ Concerns about statistical significance")
|
|
447
|
+
|
|
448
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Signal analysis result classes.
|
|
2
|
+
|
|
3
|
+
This package provides Pydantic result classes for storing and serializing
|
|
4
|
+
signal analysis outputs including IC metrics, quantile analysis, turnover,
|
|
5
|
+
and tear sheet data.
|
|
6
|
+
|
|
7
|
+
The package is decomposed into focused submodules:
|
|
8
|
+
- validation: Helper functions for key validation and period normalization
|
|
9
|
+
- ic: IC-related classes (ICStats, SignalICResult, RASICResult)
|
|
10
|
+
- quantile: Quantile analysis (QuantileAnalysisResult)
|
|
11
|
+
- turnover: Turnover analysis (TurnoverAnalysisResult)
|
|
12
|
+
- irtc: Transaction-cost adjusted IR (IRtcResult)
|
|
13
|
+
- tearsheet: Complete tear sheet (SignalTearSheet)
|
|
14
|
+
|
|
15
|
+
References
|
|
16
|
+
----------
|
|
17
|
+
Lopez de Prado, M. (2018). "Advances in Financial Machine Learning"
|
|
18
|
+
Paleologo, G. (2024). "Elements of Quantitative Investing"
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
# IC-related classes
|
|
24
|
+
from ml4t.diagnostic.results.signal_results.ic import (
|
|
25
|
+
ICStats,
|
|
26
|
+
RASICResult,
|
|
27
|
+
SignalICResult,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# IR_tc analysis
|
|
31
|
+
from ml4t.diagnostic.results.signal_results.irtc import (
|
|
32
|
+
IRtcResult,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Quantile analysis
|
|
36
|
+
from ml4t.diagnostic.results.signal_results.quantile import (
|
|
37
|
+
QuantileAnalysisResult,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Complete tear sheet
|
|
41
|
+
from ml4t.diagnostic.results.signal_results.tearsheet import (
|
|
42
|
+
SignalTearSheet,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Turnover analysis
|
|
46
|
+
from ml4t.diagnostic.results.signal_results.turnover import (
|
|
47
|
+
TurnoverAnalysisResult,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Validation helpers (for internal use, but exported for testing)
|
|
51
|
+
from ml4t.diagnostic.results.signal_results.validation import (
|
|
52
|
+
_figure_from_data,
|
|
53
|
+
_normalize_period,
|
|
54
|
+
_validate_dict_keys_match,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
__all__ = [
|
|
58
|
+
# Validation helpers
|
|
59
|
+
"_figure_from_data",
|
|
60
|
+
"_normalize_period",
|
|
61
|
+
"_validate_dict_keys_match",
|
|
62
|
+
# IC classes
|
|
63
|
+
"ICStats",
|
|
64
|
+
"SignalICResult",
|
|
65
|
+
"RASICResult",
|
|
66
|
+
# Quantile
|
|
67
|
+
"QuantileAnalysisResult",
|
|
68
|
+
# Turnover
|
|
69
|
+
"TurnoverAnalysisResult",
|
|
70
|
+
# IR_tc
|
|
71
|
+
"IRtcResult",
|
|
72
|
+
# Tear sheet
|
|
73
|
+
"SignalTearSheet",
|
|
74
|
+
]
|