ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,591 @@
1
+ """Deflated Sharpe Ratio (DSR) and Probabilistic Sharpe Ratio (PSR).
2
+
3
+ This module provides the main entry points for Sharpe ratio inference:
4
+
5
+ - deflated_sharpe_ratio: Compute DSR/PSR from raw returns (recommended)
6
+ - deflated_sharpe_ratio_from_statistics: Compute DSR from pre-computed statistics
7
+
8
+ The underlying components are in separate modules:
9
+ - moments.py: Return statistics computation
10
+ - sharpe_inference.py: Variance estimation and expected max
11
+ - min_trl.py: Minimum Track Record Length
12
+ - pbo.py: Probability of Backtest Overfitting
13
+
14
+ References
15
+ ----------
16
+ López de Prado, M., Lipton, A., & Zoonekynd, V. (2025).
17
+ "How to Use the Sharpe Ratio." ADIA Lab Research Paper Series, No. 19.
18
+
19
+ Bailey, D. H., & López de Prado, M. (2014).
20
+ "The Deflated Sharpe Ratio." Journal of Portfolio Management.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import math
26
+ from collections.abc import Sequence
27
+ from dataclasses import dataclass
28
+ from typing import Any, Literal
29
+
30
+ import numpy as np
31
+ from numpy.typing import ArrayLike
32
+ from scipy.stats import norm
33
+
34
+ from ml4t.diagnostic.evaluation.stats.backtest_overfitting import PBOResult, compute_pbo
35
+
36
+ # Import from decomposed modules
37
+ from ml4t.diagnostic.evaluation.stats.minimum_track_record import (
38
+ DEFAULT_PERIODS_PER_YEAR,
39
+ MinTRLResult,
40
+ _compute_min_trl_core,
41
+ compute_min_trl,
42
+ min_trl_fwer,
43
+ )
44
+ from ml4t.diagnostic.evaluation.stats.moments import compute_return_statistics
45
+ from ml4t.diagnostic.evaluation.stats.sharpe_inference import (
46
+ VARIANCE_RESCALING_FACTORS,
47
+ compute_expected_max_sharpe,
48
+ compute_sharpe_variance,
49
+ get_variance_rescaling_factor,
50
+ )
51
+
52
+ # Type alias
53
+ Frequency = Literal["daily", "weekly", "monthly"]
54
+
55
+
56
+ @dataclass
57
+ class DSRResult:
58
+ """Result of Deflated/Probabilistic Sharpe Ratio analysis.
59
+
60
+ Attributes
61
+ ----------
62
+ probability : float
63
+ Probability that the true Sharpe ratio exceeds the benchmark,
64
+ after correcting for multiple testing (if applicable).
65
+ Range: [0, 1]. Higher is better.
66
+ is_significant : bool
67
+ Whether the result is significant at the specified confidence level.
68
+ z_score : float
69
+ Test statistic (z-score) for the hypothesis test.
70
+ p_value : float
71
+ P-value for the null hypothesis that true SR <= benchmark.
72
+
73
+ sharpe_ratio : float
74
+ Observed Sharpe ratio at native frequency.
75
+ sharpe_ratio_annualized : float
76
+ Annualized Sharpe ratio (for interpretation).
77
+ benchmark_sharpe : float
78
+ Null hypothesis threshold (default 0).
79
+
80
+ n_samples : int
81
+ Number of return observations (T).
82
+ n_trials : int
83
+ Number of strategies tested (K). K=1 means PSR, K>1 means DSR.
84
+ frequency : str
85
+ Return frequency ("daily", "weekly", "monthly").
86
+
87
+ skewness : float
88
+ Return distribution skewness (gamma_3).
89
+ excess_kurtosis : float
90
+ Return distribution excess kurtosis (gamma_4 - 3). Normal = 0.
91
+ This is what scipy.stats.kurtosis() returns by default.
92
+ autocorrelation : float
93
+ First-order return autocorrelation (rho).
94
+
95
+ expected_max_sharpe : float
96
+ Expected maximum Sharpe from noise under multiple testing.
97
+ E[max{SR}] from Equation 26. Zero for single strategy (PSR).
98
+ deflated_sharpe : float
99
+ Observed Sharpe minus expected max: SR - E[max{SR}].
100
+ variance_trials : float
101
+ Cross-sectional variance of Sharpe ratios across trials.
102
+
103
+ min_trl : float
104
+ Minimum Track Record Length in observations.
105
+ Can be math.inf if observed SR <= target SR.
106
+ min_trl_years : float
107
+ Minimum Track Record Length in calendar years.
108
+ Can be math.inf if observed SR <= target SR.
109
+ has_adequate_sample : bool
110
+ Whether n_samples >= min_trl.
111
+
112
+ confidence_level : float
113
+ Confidence level used for significance testing.
114
+ """
115
+
116
+ # Core inference results
117
+ probability: float
118
+ is_significant: bool
119
+ z_score: float
120
+ p_value: float
121
+
122
+ # Sharpe ratios
123
+ sharpe_ratio: float
124
+ sharpe_ratio_annualized: float
125
+ benchmark_sharpe: float
126
+
127
+ # Sample information
128
+ n_samples: int
129
+ n_trials: int
130
+ frequency: str
131
+ periods_per_year: int
132
+
133
+ # Computed statistics
134
+ skewness: float
135
+ excess_kurtosis: float # Fisher convention: normal = 0
136
+ autocorrelation: float
137
+
138
+ # Multiple testing adjustment
139
+ expected_max_sharpe: float
140
+ deflated_sharpe: float
141
+ variance_trials: float
142
+
143
+ # Minimum track record
144
+ min_trl: float # Can be inf
145
+ min_trl_years: float # Can be inf
146
+ has_adequate_sample: bool
147
+
148
+ # Configuration
149
+ confidence_level: float
150
+
151
+ def interpret(self) -> str:
152
+ """Generate human-readable interpretation of results."""
153
+ if self.n_trials == 1:
154
+ test_type = "Probabilistic Sharpe Ratio (PSR)"
155
+ selection_note = ""
156
+ else:
157
+ test_type = f"Deflated Sharpe Ratio (DSR) - best of {self.n_trials} strategies"
158
+ selection_note = (
159
+ f"\n Expected max from noise: {self.expected_max_sharpe:.4f}"
160
+ f"\n Deflated Sharpe: {self.deflated_sharpe:.4f}"
161
+ )
162
+
163
+ significance = "Yes" if self.is_significant else "No"
164
+ confidence_pct = self.confidence_level * 100
165
+
166
+ lines = [
167
+ f"{test_type}",
168
+ f" Frequency: {self.frequency} ({self.periods_per_year} periods/year)",
169
+ f" Sample size: {self.n_samples} observations",
170
+ "",
171
+ f" Sharpe ratio: {self.sharpe_ratio:.4f} "
172
+ f"({self.sharpe_ratio_annualized:.2f} annualized)",
173
+ f" Benchmark: {self.benchmark_sharpe:.4f}",
174
+ selection_note,
175
+ "",
176
+ f" Probability of skill: {self.probability:.1%}",
177
+ f" Significant at {confidence_pct:.0f}%: {significance}",
178
+ f" P-value: {self.p_value:.4f}",
179
+ "",
180
+ " Statistics used:",
181
+ f" Skewness (gamma_3): {self.skewness:.3f}",
182
+ f" Excess kurtosis (gamma_4-3): {self.excess_kurtosis:.3f}",
183
+ f" Autocorrelation (rho): {self.autocorrelation:.3f}",
184
+ ]
185
+
186
+ if math.isinf(self.min_trl):
187
+ lines.extend(
188
+ [
189
+ "",
190
+ " WARNING: MinTRL is infinite (observed SR <= target SR)",
191
+ " Cannot reject null hypothesis at any sample size",
192
+ ]
193
+ )
194
+ elif not self.has_adequate_sample:
195
+ deficit = self.min_trl - self.n_samples
196
+ lines.extend(
197
+ [
198
+ "",
199
+ " WARNING: Insufficient sample size",
200
+ f" Need {deficit:.0f} more observations ({self.min_trl_years:.1f} years total)",
201
+ ]
202
+ )
203
+
204
+ return "\n".join(lines)
205
+
206
+ def to_dict(self) -> dict[str, Any]:
207
+ """Convert to dictionary."""
208
+ return {
209
+ "probability": self.probability,
210
+ "is_significant": self.is_significant,
211
+ "z_score": self.z_score,
212
+ "p_value": self.p_value,
213
+ "sharpe_ratio": self.sharpe_ratio,
214
+ "sharpe_ratio_annualized": self.sharpe_ratio_annualized,
215
+ "benchmark_sharpe": self.benchmark_sharpe,
216
+ "n_samples": self.n_samples,
217
+ "n_trials": self.n_trials,
218
+ "frequency": self.frequency,
219
+ "periods_per_year": self.periods_per_year,
220
+ "skewness": self.skewness,
221
+ "excess_kurtosis": self.excess_kurtosis,
222
+ "autocorrelation": self.autocorrelation,
223
+ "expected_max_sharpe": self.expected_max_sharpe,
224
+ "deflated_sharpe": self.deflated_sharpe,
225
+ "variance_trials": self.variance_trials,
226
+ "min_trl": self.min_trl,
227
+ "min_trl_years": self.min_trl_years,
228
+ "has_adequate_sample": self.has_adequate_sample,
229
+ "confidence_level": self.confidence_level,
230
+ }
231
+
232
+
233
+ def deflated_sharpe_ratio(
234
+ returns: ArrayLike | Sequence[ArrayLike],
235
+ frequency: Frequency = "daily",
236
+ benchmark_sharpe: float = 0.0,
237
+ confidence_level: float = 0.95,
238
+ periods_per_year: int | None = None,
239
+ *,
240
+ skewness: float | None = None,
241
+ excess_kurtosis: float | None = None,
242
+ autocorrelation: float | None = None,
243
+ ) -> DSRResult:
244
+ """Compute Deflated Sharpe Ratio (DSR) or Probabilistic Sharpe Ratio (PSR).
245
+
246
+ This function computes the probability that the true Sharpe ratio exceeds
247
+ a benchmark threshold, correcting for:
248
+
249
+ - **Non-normality**: Skewness and excess kurtosis of returns
250
+ - **Serial correlation**: First-order autocorrelation of returns
251
+ - **Multiple testing**: Selection bias when choosing the best of K strategies
252
+
253
+ **Single strategy (PSR)**: Pass a single returns array.
254
+ **Multiple strategies (DSR)**: Pass a list of returns arrays.
255
+
256
+ Parameters
257
+ ----------
258
+ returns : array-like or Sequence[array-like]
259
+ Strategy returns at the specified frequency.
260
+ - Single array: Computes PSR (no multiple testing adjustment)
261
+ - Sequence of K arrays: Computes DSR for the best strategy
262
+ frequency : {"daily", "weekly", "monthly"}, default "daily"
263
+ Return frequency. Affects annualization for display.
264
+ benchmark_sharpe : float, default 0.0
265
+ Null hypothesis threshold (SR_0) at native frequency.
266
+ confidence_level : float, default 0.95
267
+ Confidence level for significance testing.
268
+ periods_per_year : int, optional
269
+ Trading periods per year. Defaults: daily=252, weekly=52, monthly=12.
270
+ skewness : float, optional
271
+ Override computed skewness.
272
+ excess_kurtosis : float, optional
273
+ Override computed excess kurtosis (Fisher convention, normal=0).
274
+ autocorrelation : float, optional
275
+ Override computed autocorrelation.
276
+
277
+ Returns
278
+ -------
279
+ DSRResult
280
+ Comprehensive results. Use `.interpret()` for human-readable summary.
281
+
282
+ Examples
283
+ --------
284
+ Single strategy (PSR):
285
+
286
+ >>> result = deflated_sharpe_ratio(daily_returns, frequency="daily")
287
+ >>> print(f"Probability of skill: {result.probability:.1%}")
288
+
289
+ Multiple strategies (DSR):
290
+
291
+ >>> strategies = [strat1_returns, strat2_returns, strat3_returns]
292
+ >>> result = deflated_sharpe_ratio(strategies, frequency="daily")
293
+ >>> print(f"Probability after deflation: {result.probability:.1%}")
294
+
295
+ References
296
+ ----------
297
+ Lopez de Prado et al. (2025). "How to Use the Sharpe Ratio."
298
+ """
299
+ # Resolve periods per year
300
+ if periods_per_year is None:
301
+ periods_per_year = DEFAULT_PERIODS_PER_YEAR[frequency]
302
+
303
+ annualization_factor = np.sqrt(periods_per_year)
304
+
305
+ # Detect multiple strategies
306
+ is_multiple = (
307
+ isinstance(returns, list | tuple)
308
+ and len(returns) > 1
309
+ and not isinstance(returns[0], int | float)
310
+ )
311
+
312
+ if is_multiple:
313
+ # Multiple strategies - DSR
314
+ returns_seq = list(returns) # type: ignore[arg-type]
315
+ n_trials = len(returns_seq)
316
+
317
+ # Compute Sharpe ratio for each strategy
318
+ sharpe_ratios = []
319
+ for ret in returns_seq:
320
+ ret_arr = np.asarray(ret).flatten()
321
+ ret_arr = ret_arr[~np.isnan(ret_arr)]
322
+ sr, _, _, _, _ = compute_return_statistics(ret_arr)
323
+ sharpe_ratios.append(sr)
324
+
325
+ # Best strategy
326
+ best_idx = int(np.argmax(sharpe_ratios))
327
+ best_returns = np.asarray(returns_seq[best_idx]).flatten()
328
+ best_returns = best_returns[~np.isnan(best_returns)]
329
+
330
+ observed_sharpe, comp_skew, comp_kurt, comp_rho, n_samples = compute_return_statistics(
331
+ best_returns
332
+ )
333
+
334
+ # Cross-sectional variance
335
+ variance_trials = float(np.var(sharpe_ratios, ddof=1)) if n_trials > 1 else 0.0
336
+
337
+ else:
338
+ # Single strategy - PSR
339
+ n_trials = 1
340
+ variance_trials = 0.0
341
+
342
+ if isinstance(returns, list | tuple) and len(returns) == 1:
343
+ ret_arr = np.asarray(returns[0]).flatten()
344
+ else:
345
+ ret_arr = np.asarray(returns).flatten()
346
+
347
+ observed_sharpe, comp_skew, comp_kurt, comp_rho, n_samples = compute_return_statistics(
348
+ ret_arr
349
+ )
350
+
351
+ # Use provided statistics or computed ones
352
+ skew = skewness if skewness is not None else comp_skew
353
+ if excess_kurtosis is not None:
354
+ kurt = excess_kurtosis + 3.0 # Fisher -> Pearson
355
+ else:
356
+ kurt = comp_kurt
357
+ rho = autocorrelation if autocorrelation is not None else comp_rho
358
+
359
+ # Expected max Sharpe (multiple testing adjustment)
360
+ expected_max = compute_expected_max_sharpe(n_trials, variance_trials)
361
+ adjusted_threshold = benchmark_sharpe + expected_max
362
+
363
+ # Variance of Sharpe estimator
364
+ variance_sr = compute_sharpe_variance(
365
+ sharpe=adjusted_threshold,
366
+ n_samples=n_samples,
367
+ skewness=skew,
368
+ kurtosis=kurt,
369
+ autocorrelation=rho,
370
+ n_trials=n_trials,
371
+ )
372
+ std_sr = np.sqrt(variance_sr)
373
+
374
+ # Z-score
375
+ if std_sr > 0:
376
+ z_score = (observed_sharpe - adjusted_threshold) / std_sr
377
+ else:
378
+ z_score = np.inf if observed_sharpe > adjusted_threshold else -np.inf
379
+
380
+ # Probability and p-value
381
+ probability = float(norm.cdf(z_score))
382
+ p_value = float(1 - probability)
383
+ is_significant = probability >= confidence_level
384
+
385
+ # Annualized Sharpe
386
+ sharpe_annualized = observed_sharpe * annualization_factor
387
+ deflated = observed_sharpe - expected_max
388
+
389
+ # MinTRL
390
+ min_trl = _compute_min_trl_core(
391
+ observed_sharpe=observed_sharpe,
392
+ target_sharpe=benchmark_sharpe,
393
+ confidence_level=confidence_level,
394
+ skewness=skew,
395
+ kurtosis=kurt,
396
+ autocorrelation=rho,
397
+ )
398
+ min_trl_years = min_trl / periods_per_year
399
+ has_adequate = n_samples >= min_trl
400
+
401
+ return DSRResult(
402
+ probability=probability,
403
+ is_significant=is_significant,
404
+ z_score=float(z_score),
405
+ p_value=p_value,
406
+ sharpe_ratio=float(observed_sharpe),
407
+ sharpe_ratio_annualized=float(sharpe_annualized),
408
+ benchmark_sharpe=benchmark_sharpe,
409
+ n_samples=n_samples,
410
+ n_trials=n_trials,
411
+ frequency=frequency,
412
+ periods_per_year=periods_per_year,
413
+ skewness=float(skew),
414
+ excess_kurtosis=float(kurt - 3.0),
415
+ autocorrelation=float(rho),
416
+ expected_max_sharpe=float(expected_max),
417
+ deflated_sharpe=float(deflated),
418
+ variance_trials=float(variance_trials),
419
+ min_trl=min_trl,
420
+ min_trl_years=float(min_trl_years),
421
+ has_adequate_sample=has_adequate,
422
+ confidence_level=confidence_level,
423
+ )
424
+
425
+
426
+ def deflated_sharpe_ratio_from_statistics(
427
+ observed_sharpe: float,
428
+ n_samples: int,
429
+ n_trials: int = 1,
430
+ variance_trials: float = 0.0,
431
+ benchmark_sharpe: float = 0.0,
432
+ skewness: float = 0.0,
433
+ excess_kurtosis: float = 0.0,
434
+ autocorrelation: float = 0.0,
435
+ confidence_level: float = 0.95,
436
+ frequency: Frequency = "daily",
437
+ periods_per_year: int | None = None,
438
+ ) -> DSRResult:
439
+ """Compute DSR/PSR from pre-computed statistics.
440
+
441
+ Use this when you have already computed the required statistics.
442
+ For most users, `deflated_sharpe_ratio()` with raw returns is recommended.
443
+
444
+ Parameters
445
+ ----------
446
+ observed_sharpe : float
447
+ Observed Sharpe ratio at native frequency.
448
+ n_samples : int
449
+ Number of return observations (T).
450
+ n_trials : int, default 1
451
+ Number of strategies tested (K).
452
+ variance_trials : float, default 0.0
453
+ Cross-sectional variance of Sharpe ratios.
454
+ benchmark_sharpe : float, default 0.0
455
+ Null hypothesis threshold.
456
+ skewness : float, default 0.0
457
+ Return skewness.
458
+ excess_kurtosis : float, default 0.0
459
+ Return excess kurtosis (Fisher, normal=0).
460
+ autocorrelation : float, default 0.0
461
+ First-order autocorrelation.
462
+ confidence_level : float, default 0.95
463
+ Confidence level for testing.
464
+ frequency : {"daily", "weekly", "monthly"}, default "daily"
465
+ Return frequency.
466
+ periods_per_year : int, optional
467
+ Periods per year.
468
+
469
+ Returns
470
+ -------
471
+ DSRResult
472
+ Same as `deflated_sharpe_ratio()`.
473
+ """
474
+ # Validate inputs
475
+ if n_samples < 1:
476
+ raise ValueError("n_samples must be positive")
477
+ if n_trials < 1:
478
+ raise ValueError("n_trials must be positive")
479
+ if n_trials > 1 and variance_trials <= 0:
480
+ raise ValueError("variance_trials must be positive when n_trials > 1")
481
+ if abs(autocorrelation) >= 1:
482
+ raise ValueError("autocorrelation must be in (-1, 1)")
483
+
484
+ kurtosis = excess_kurtosis + 3.0
485
+
486
+ if periods_per_year is None:
487
+ periods_per_year = DEFAULT_PERIODS_PER_YEAR[frequency]
488
+
489
+ annualization_factor = np.sqrt(periods_per_year)
490
+
491
+ # Expected max Sharpe
492
+ expected_max = compute_expected_max_sharpe(n_trials, variance_trials)
493
+ adjusted_threshold = benchmark_sharpe + expected_max
494
+
495
+ # Variance
496
+ variance_sr = compute_sharpe_variance(
497
+ sharpe=adjusted_threshold,
498
+ n_samples=n_samples,
499
+ skewness=skewness,
500
+ kurtosis=kurtosis,
501
+ autocorrelation=autocorrelation,
502
+ n_trials=n_trials,
503
+ )
504
+ std_sr = np.sqrt(variance_sr)
505
+
506
+ # Z-score
507
+ if std_sr > 0:
508
+ z_score = (observed_sharpe - adjusted_threshold) / std_sr
509
+ else:
510
+ z_score = np.inf if observed_sharpe > adjusted_threshold else -np.inf
511
+
512
+ probability = float(norm.cdf(z_score))
513
+ p_value = float(1 - probability)
514
+ is_significant = probability >= confidence_level
515
+
516
+ sharpe_annualized = observed_sharpe * annualization_factor
517
+ deflated = observed_sharpe - expected_max
518
+
519
+ # MinTRL
520
+ min_trl = _compute_min_trl_core(
521
+ observed_sharpe=observed_sharpe,
522
+ target_sharpe=benchmark_sharpe,
523
+ confidence_level=confidence_level,
524
+ skewness=skewness,
525
+ kurtosis=kurtosis,
526
+ autocorrelation=autocorrelation,
527
+ )
528
+ min_trl_years = min_trl / periods_per_year
529
+ has_adequate = n_samples >= min_trl
530
+
531
+ return DSRResult(
532
+ probability=probability,
533
+ is_significant=is_significant,
534
+ z_score=float(z_score),
535
+ p_value=p_value,
536
+ sharpe_ratio=float(observed_sharpe),
537
+ sharpe_ratio_annualized=float(sharpe_annualized),
538
+ benchmark_sharpe=benchmark_sharpe,
539
+ n_samples=n_samples,
540
+ n_trials=n_trials,
541
+ frequency=frequency,
542
+ periods_per_year=periods_per_year,
543
+ skewness=float(skewness),
544
+ excess_kurtosis=float(excess_kurtosis),
545
+ autocorrelation=float(autocorrelation),
546
+ expected_max_sharpe=float(expected_max),
547
+ deflated_sharpe=float(deflated),
548
+ variance_trials=float(variance_trials),
549
+ min_trl=min_trl,
550
+ min_trl_years=float(min_trl_years),
551
+ has_adequate_sample=has_adequate,
552
+ confidence_level=confidence_level,
553
+ )
554
+
555
+
556
+ # =============================================================================
557
+ # BACKWARD COMPATIBILITY RE-EXPORTS
558
+ # =============================================================================
559
+ # These were previously defined in dsr.py but are now in separate modules.
560
+ # Re-export for backward compatibility.
561
+
562
+ _VARIANCE_RESCALING_FACTORS = VARIANCE_RESCALING_FACTORS
563
+ _get_variance_rescaling_factor = get_variance_rescaling_factor
564
+ _compute_return_statistics = compute_return_statistics
565
+ _compute_sharpe_variance = compute_sharpe_variance
566
+ _compute_expected_max_sharpe = compute_expected_max_sharpe
567
+ _compute_min_trl = _compute_min_trl_core
568
+
569
+ __all__ = [
570
+ # Result classes
571
+ "DSRResult",
572
+ # Main functions
573
+ "deflated_sharpe_ratio",
574
+ "deflated_sharpe_ratio_from_statistics",
575
+ # Re-exports from other modules (for backward compat)
576
+ "MinTRLResult",
577
+ "PBOResult",
578
+ "compute_min_trl",
579
+ "min_trl_fwer",
580
+ "compute_pbo",
581
+ "DEFAULT_PERIODS_PER_YEAR",
582
+ # Type aliases
583
+ "Frequency",
584
+ # Private backward compat
585
+ "_VARIANCE_RESCALING_FACTORS",
586
+ "_get_variance_rescaling_factor",
587
+ "_compute_return_statistics",
588
+ "_compute_sharpe_variance",
589
+ "_compute_expected_max_sharpe",
590
+ "_compute_min_trl",
591
+ ]