ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,734 @@
1
+ """PortfolioAnalysis class - modern pyfolio replacement.
2
+
3
+ This module provides the PortfolioAnalysis class for comprehensive
4
+ portfolio tear sheet generation with:
5
+ - Polars backend (10-100x faster than pandas)
6
+ - Plotly visualizations (interactive, shareable)
7
+ - Enhanced statistics (DSR, regime analysis, Bayesian comparison)
8
+ - Pyfolio feature parity plus modern enhancements
9
+
10
+ Example:
11
+ >>> from ml4t.diagnostic.evaluation import PortfolioAnalysis
12
+ >>>
13
+ >>> analysis = PortfolioAnalysis(
14
+ ... returns=strategy_returns,
15
+ ... benchmark=spy_returns,
16
+ ... )
17
+ >>> metrics = analysis.compute_summary_stats()
18
+ >>> print(metrics.summary())
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from typing import TYPE_CHECKING, Union
24
+
25
+ import numpy as np
26
+ import polars as pl
27
+ from scipy import stats
28
+
29
+ from .metrics import (
30
+ _safe_cumprod,
31
+ _safe_prod,
32
+ _to_numpy,
33
+ alpha_beta,
34
+ annual_return,
35
+ annual_volatility,
36
+ calmar_ratio,
37
+ conditional_var,
38
+ information_ratio,
39
+ max_drawdown,
40
+ omega_ratio,
41
+ sharpe_ratio,
42
+ sortino_ratio,
43
+ stability_of_timeseries,
44
+ tail_ratio,
45
+ up_down_capture,
46
+ value_at_risk,
47
+ )
48
+ from .results import (
49
+ DistributionResult,
50
+ DrawdownPeriod,
51
+ DrawdownResult,
52
+ PortfolioMetrics,
53
+ RollingMetricsResult,
54
+ )
55
+
56
+ if TYPE_CHECKING:
57
+ import pandas as pd
58
+
59
+ # Type aliases - use Union for Python 3.9 compatibility
60
+ ArrayLike = Union[np.ndarray, pl.Series, "list[float]"]
61
+ DataFrameLike = Union[pl.DataFrame, "pd.DataFrame"]
62
+
63
+
64
+ class PortfolioAnalysis:
65
+ """Modern replacement for pyfolio.create_full_tear_sheet().
66
+
67
+ Provides comprehensive portfolio analysis with:
68
+ - Polars backend (10-100x faster than pandas)
69
+ - Plotly visualizations (interactive, shareable)
70
+ - Enhanced statistics (DSR, regime analysis, Bayesian comparison)
71
+ - Full pyfolio feature parity plus modern enhancements
72
+
73
+ Parameters
74
+ ----------
75
+ returns : Series
76
+ Daily returns of the strategy (non-cumulative).
77
+ Accepts Polars Series, Pandas Series, or numpy array.
78
+ benchmark : Series, optional
79
+ Benchmark returns for alpha/beta calculation (e.g., SPY).
80
+ positions : DataFrame, optional
81
+ Daily position values by asset.
82
+ Columns: [date, asset, value] or pivoted with assets as columns.
83
+ transactions : DataFrame, optional
84
+ Trade execution records.
85
+ Columns: [date, asset, quantity, price, commission]
86
+ risk_free : float, default 0.0
87
+ Annual risk-free rate for Sharpe/Sortino calculation.
88
+ periods_per_year : int, default 252
89
+ Trading periods per year (252 for daily data).
90
+
91
+ Examples
92
+ --------
93
+ >>> # Basic usage
94
+ >>> analysis = PortfolioAnalysis(returns=daily_returns)
95
+ >>> metrics = analysis.compute_summary_stats()
96
+ >>> print(metrics.summary())
97
+
98
+ >>> # With benchmark
99
+ >>> analysis = PortfolioAnalysis(
100
+ ... returns=strategy_returns,
101
+ ... benchmark=spy_returns,
102
+ ... )
103
+ >>> metrics = analysis.compute_summary_stats()
104
+ >>> print(f"Alpha: {metrics.alpha:.2%}")
105
+ >>> print(f"Beta: {metrics.beta:.2f}")
106
+
107
+ >>> # With positions and transactions
108
+ >>> analysis = PortfolioAnalysis(
109
+ ... returns=strategy_returns,
110
+ ... positions=position_df,
111
+ ... transactions=trades_df,
112
+ ... )
113
+ >>> tear_sheet = analysis.create_tear_sheet()
114
+ >>> tear_sheet.save_html("report.html")
115
+ """
116
+
117
+ def __init__(
118
+ self,
119
+ returns: ArrayLike | pl.Series,
120
+ benchmark: ArrayLike | pl.Series | None = None,
121
+ positions: DataFrameLike | None = None,
122
+ transactions: DataFrameLike | None = None,
123
+ dates: ArrayLike | pl.Series | None = None,
124
+ risk_free: float = 0.0,
125
+ periods_per_year: int = 252,
126
+ ):
127
+ # Convert returns to numpy
128
+ self._returns = _to_numpy(returns)
129
+
130
+ # Handle dates
131
+ if dates is not None:
132
+ if isinstance(dates, pl.Series):
133
+ self._dates = dates
134
+ else:
135
+ self._dates = pl.Series("date", dates)
136
+ else:
137
+ # Generate synthetic dates
138
+ self._dates = pl.Series(
139
+ "date",
140
+ pl.date_range(
141
+ pl.date(2000, 1, 1),
142
+ pl.date(2000, 1, 1) + pl.duration(days=len(self._returns) - 1),
143
+ eager=True,
144
+ ),
145
+ )
146
+
147
+ # Convert benchmark if provided
148
+ self._benchmark = _to_numpy(benchmark) if benchmark is not None else None
149
+
150
+ # Store positions and transactions (convert to Polars if needed)
151
+ self._positions = self._to_polars_df(positions) if positions is not None else None
152
+ self._transactions = self._to_polars_df(transactions) if transactions is not None else None
153
+
154
+ # Configuration
155
+ self._risk_free = risk_free
156
+ self._periods_per_year = periods_per_year
157
+
158
+ # Cached results
159
+ self._metrics_cache: PortfolioMetrics | None = None
160
+ self._rolling_cache: dict[tuple, RollingMetricsResult] = {}
161
+ self._drawdown_cache: DrawdownResult | None = None
162
+
163
+ @staticmethod
164
+ def _to_polars_df(df: DataFrameLike | None) -> pl.DataFrame | None:
165
+ """Convert DataFrame to Polars."""
166
+ if df is None:
167
+ return None
168
+ if isinstance(df, pl.DataFrame):
169
+ return df
170
+ # Assume pandas DataFrame
171
+ return pl.from_pandas(df)
172
+
173
+ @property
174
+ def returns(self) -> np.ndarray:
175
+ """Get returns as numpy array."""
176
+ return self._returns
177
+
178
+ @property
179
+ def dates(self) -> pl.Series:
180
+ """Get dates as Polars Series."""
181
+ return self._dates
182
+
183
+ @property
184
+ def benchmark(self) -> np.ndarray | None:
185
+ """Get benchmark returns as numpy array."""
186
+ return self._benchmark
187
+
188
+ @property
189
+ def has_benchmark(self) -> bool:
190
+ """Check if benchmark was provided."""
191
+ return self._benchmark is not None
192
+
193
+ @property
194
+ def has_positions(self) -> bool:
195
+ """Check if positions data was provided."""
196
+ return self._positions is not None
197
+
198
+ @property
199
+ def has_transactions(self) -> bool:
200
+ """Check if transactions data was provided."""
201
+ return self._transactions is not None
202
+
203
+ # =========================================================================
204
+ # Core Metric Methods
205
+ # =========================================================================
206
+
207
+ def compute_summary_stats(self, force_recompute: bool = False) -> PortfolioMetrics:
208
+ """Compute all standard portfolio metrics.
209
+
210
+ This is the main method for getting performance statistics,
211
+ equivalent to pyfolio's perf_stats output.
212
+
213
+ Parameters
214
+ ----------
215
+ force_recompute : bool, default False
216
+ Force recomputation even if cached
217
+
218
+ Returns
219
+ -------
220
+ PortfolioMetrics
221
+ Complete set of portfolio metrics
222
+
223
+ Examples
224
+ --------
225
+ >>> metrics = analysis.compute_summary_stats()
226
+ >>> print(f"Sharpe: {metrics.sharpe_ratio:.2f}")
227
+ >>> print(f"Max Drawdown: {metrics.max_drawdown:.1%}")
228
+ """
229
+ if self._metrics_cache is not None and not force_recompute:
230
+ return self._metrics_cache
231
+
232
+ returns = self._returns
233
+ rf = self._risk_free
234
+ ppy = self._periods_per_year
235
+
236
+ # Basic returns
237
+ total_ret = float(_safe_prod(1 + returns) - 1)
238
+ ann_ret = annual_return(returns, ppy)
239
+ ann_vol = annual_volatility(returns, ppy)
240
+
241
+ # Risk-adjusted
242
+ sr = sharpe_ratio(returns, rf, ppy)
243
+ sortino = sortino_ratio(returns, rf, ppy)
244
+ calmar = calmar_ratio(returns, ppy)
245
+ omega = omega_ratio(returns)
246
+ tail = tail_ratio(returns)
247
+
248
+ # Drawdown
249
+ max_dd = max_drawdown(returns)
250
+
251
+ # Distribution
252
+ skew = float(stats.skew(returns[~np.isnan(returns)]))
253
+ kurt = float(stats.kurtosis(returns[~np.isnan(returns)]))
254
+
255
+ # Risk
256
+ var95 = value_at_risk(returns, 0.95)
257
+ cvar95 = conditional_var(returns, 0.95)
258
+
259
+ # Stability
260
+ stability = stability_of_timeseries(returns)
261
+
262
+ # Win/loss
263
+ wins = returns[returns > 0]
264
+ losses = returns[returns < 0]
265
+
266
+ win_rate = len(wins) / len(returns) if len(returns) > 0 else np.nan
267
+ avg_win = float(np.mean(wins)) if len(wins) > 0 else 0.0
268
+ avg_loss = float(np.mean(losses)) if len(losses) > 0 else 0.0
269
+
270
+ gross_profit = float(np.sum(wins)) if len(wins) > 0 else 0.0
271
+ gross_loss = float(abs(np.sum(losses))) if len(losses) > 0 else 0.0
272
+ profit_factor = gross_profit / gross_loss if gross_loss > 0 else np.inf
273
+
274
+ # Benchmark metrics
275
+ alpha_val = beta_val = ir = up_cap = down_cap = None
276
+
277
+ if self.has_benchmark and self._benchmark is not None:
278
+ alpha_val, beta_val = alpha_beta(returns, self._benchmark, rf, ppy)
279
+ ir = information_ratio(returns, self._benchmark, ppy)
280
+ up_cap, down_cap = up_down_capture(returns, self._benchmark)
281
+
282
+ self._metrics_cache = PortfolioMetrics(
283
+ total_return=total_ret,
284
+ annual_return=ann_ret,
285
+ annual_volatility=ann_vol,
286
+ sharpe_ratio=sr,
287
+ sortino_ratio=sortino,
288
+ calmar_ratio=calmar,
289
+ omega_ratio=omega,
290
+ tail_ratio=tail,
291
+ max_drawdown=max_dd,
292
+ skewness=skew,
293
+ kurtosis=kurt,
294
+ var_95=var95,
295
+ cvar_95=cvar95,
296
+ stability=stability,
297
+ win_rate=win_rate,
298
+ profit_factor=profit_factor,
299
+ avg_win=avg_win,
300
+ avg_loss=avg_loss,
301
+ alpha=alpha_val,
302
+ beta=beta_val,
303
+ information_ratio=ir,
304
+ up_capture=up_cap,
305
+ down_capture=down_cap,
306
+ )
307
+
308
+ return self._metrics_cache
309
+
310
+ def compute_rolling_metrics(
311
+ self,
312
+ windows: list[int] | None = None,
313
+ metrics: list[str] | None = None,
314
+ ) -> RollingMetricsResult:
315
+ """Compute rolling metrics over multiple windows.
316
+
317
+ Parameters
318
+ ----------
319
+ windows : list[int], optional
320
+ Window sizes in periods. Default [21, 63, 126, 252].
321
+ metrics : list[str], optional
322
+ Which metrics to compute. Default ["sharpe", "volatility", "returns"].
323
+
324
+ Returns
325
+ -------
326
+ RollingMetricsResult
327
+ Rolling metrics for each window
328
+
329
+ Examples
330
+ --------
331
+ >>> rolling = analysis.compute_rolling_metrics(windows=[21, 63, 252])
332
+ >>> sharpe_df = rolling.to_dataframe("sharpe")
333
+ """
334
+ if windows is None:
335
+ windows = [21, 63, 126, 252]
336
+
337
+ if metrics is None:
338
+ metrics = ["sharpe", "volatility", "returns"]
339
+
340
+ cache_key = (tuple(windows), tuple(metrics))
341
+ if cache_key in self._rolling_cache:
342
+ return self._rolling_cache[cache_key]
343
+
344
+ returns = self._returns
345
+ rf = self._risk_free
346
+ ppy = self._periods_per_year
347
+
348
+ result = RollingMetricsResult(windows=windows, dates=self._dates)
349
+
350
+ for window in windows:
351
+ if "sharpe" in metrics:
352
+ rolling_sharpe = self._rolling_sharpe(returns, window, rf, ppy)
353
+ result.sharpe[window] = pl.Series(f"sharpe_{window}d", rolling_sharpe)
354
+
355
+ if "volatility" in metrics:
356
+ rolling_vol = self._rolling_volatility(returns, window, ppy)
357
+ result.volatility[window] = pl.Series(f"vol_{window}d", rolling_vol)
358
+
359
+ if "returns" in metrics:
360
+ rolling_ret = self._rolling_returns(returns, window)
361
+ result.returns[window] = pl.Series(f"ret_{window}d", rolling_ret)
362
+
363
+ if "beta" in metrics and self.has_benchmark and self._benchmark is not None:
364
+ rolling_beta = self._rolling_beta(returns, self._benchmark, window)
365
+ result.beta[window] = pl.Series(f"beta_{window}d", rolling_beta)
366
+
367
+ self._rolling_cache[cache_key] = result
368
+ return result
369
+
370
+ @staticmethod
371
+ def _rolling_sharpe(
372
+ returns: np.ndarray,
373
+ window: int,
374
+ risk_free: float,
375
+ periods_per_year: int,
376
+ ) -> np.ndarray:
377
+ """Compute rolling Sharpe ratio using vectorized sliding_window_view."""
378
+ from numpy.lib.stride_tricks import sliding_window_view
379
+
380
+ n = len(returns)
381
+ result = np.full(n, np.nan)
382
+
383
+ if n < window:
384
+ return result
385
+
386
+ daily_rf = (1 + risk_free) ** (1 / periods_per_year) - 1
387
+
388
+ # Vectorized: create all windows at once
389
+ windows = sliding_window_view(returns, window)
390
+ excess = windows - daily_rf
391
+
392
+ # Compute mean and std across each window (axis=1)
393
+ mu = np.mean(excess, axis=1)
394
+ sd = np.std(excess, axis=1, ddof=1)
395
+
396
+ # Sharpe where std > 0
397
+ sharpe = np.where(sd > 0, (mu / sd) * np.sqrt(periods_per_year), np.nan)
398
+ result[window - 1 :] = sharpe
399
+
400
+ return result
401
+
402
+ @staticmethod
403
+ def _rolling_volatility(
404
+ returns: np.ndarray,
405
+ window: int,
406
+ periods_per_year: int,
407
+ ) -> np.ndarray:
408
+ """Compute rolling annualized volatility using vectorized sliding_window_view."""
409
+ from numpy.lib.stride_tricks import sliding_window_view
410
+
411
+ n = len(returns)
412
+ result = np.full(n, np.nan)
413
+
414
+ if n < window:
415
+ return result
416
+
417
+ # Vectorized: create all windows at once
418
+ windows = sliding_window_view(returns, window)
419
+ sd = np.std(windows, axis=1, ddof=1)
420
+ result[window - 1 :] = sd * np.sqrt(periods_per_year)
421
+
422
+ return result
423
+
424
+ @staticmethod
425
+ def _rolling_returns(
426
+ returns: np.ndarray,
427
+ window: int,
428
+ ) -> np.ndarray:
429
+ """Compute rolling cumulative returns using O(n) log1p cumsum."""
430
+ n = len(returns)
431
+ result = np.full(n, np.nan)
432
+
433
+ if n < window:
434
+ return result
435
+
436
+ # Vectorized O(n): use log1p cumsum for compound returns
437
+ # Requires returns > -1 (valid for typical financial returns)
438
+ # Clip to prevent log of non-positive numbers
439
+ safe_returns = np.clip(returns, -0.9999, None)
440
+ log_returns = np.log1p(safe_returns)
441
+ cumsum = np.concatenate(([0.0], np.cumsum(log_returns)))
442
+
443
+ # Rolling sum of log returns = log(compound return + 1)
444
+ window_log_sum = cumsum[window:] - cumsum[:-window]
445
+ result[window - 1 :] = np.expm1(window_log_sum)
446
+
447
+ return result
448
+
449
+ @staticmethod
450
+ def _rolling_beta(
451
+ returns: np.ndarray,
452
+ benchmark: np.ndarray,
453
+ window: int,
454
+ ) -> np.ndarray:
455
+ """Compute rolling beta using vectorized sliding_window_view."""
456
+ from numpy.lib.stride_tricks import sliding_window_view
457
+
458
+ n = len(returns)
459
+ result = np.full(n, np.nan)
460
+
461
+ if n < window:
462
+ return result
463
+
464
+ # Vectorized: create all windows at once
465
+ ret_windows = sliding_window_view(returns, window)
466
+ bench_windows = sliding_window_view(benchmark, window)
467
+
468
+ # Compute means
469
+ ret_mean = np.mean(ret_windows, axis=1, keepdims=True)
470
+ bench_mean = np.mean(bench_windows, axis=1, keepdims=True)
471
+
472
+ # Deviations from mean
473
+ ret_dev = ret_windows - ret_mean
474
+ bench_dev = bench_windows - bench_mean
475
+
476
+ # Covariance and variance (using ddof=1 for sample variance)
477
+ cov = np.sum(ret_dev * bench_dev, axis=1) / (window - 1)
478
+ var = np.sum(bench_dev * bench_dev, axis=1) / (window - 1)
479
+
480
+ # Beta = cov / var where var > 0
481
+ beta = np.where(var > 0, cov / var, np.nan)
482
+ result[window - 1 :] = beta
483
+
484
+ return result
485
+
486
+ def compute_drawdown_analysis(
487
+ self,
488
+ top_n: int = 5,
489
+ threshold: float = 0.01,
490
+ ) -> DrawdownResult:
491
+ """Compute detailed drawdown analysis.
492
+
493
+ Parameters
494
+ ----------
495
+ top_n : int, default 5
496
+ Number of top drawdowns to identify
497
+ threshold : float, default 0.01
498
+ Minimum drawdown depth to count (1%)
499
+
500
+ Returns
501
+ -------
502
+ DrawdownResult
503
+ Detailed drawdown statistics
504
+
505
+ Examples
506
+ --------
507
+ >>> dd = analysis.compute_drawdown_analysis(top_n=10)
508
+ >>> print(f"Max drawdown: {dd.max_drawdown:.1%}")
509
+ >>> print(f"Avg duration: {dd.avg_duration_days:.0f} days")
510
+ """
511
+ if self._drawdown_cache is not None:
512
+ return self._drawdown_cache
513
+
514
+ returns = self._returns
515
+ dates = self._dates
516
+
517
+ # Compute cumulative returns and running max
518
+ cum_returns = _safe_cumprod(1 + returns)
519
+ running_max = np.maximum.accumulate(cum_returns)
520
+
521
+ # Underwater curve
522
+ underwater = (cum_returns - running_max) / running_max
523
+
524
+ # Identify drawdown periods
525
+ drawdown_periods = self._identify_drawdown_periods(underwater, dates, threshold)
526
+
527
+ # Sort by depth and take top N
528
+ drawdown_periods.sort(key=lambda x: x.depth)
529
+ top_drawdowns = drawdown_periods[:top_n]
530
+
531
+ # Statistics
532
+ current_dd = float(underwater[-1]) if len(underwater) > 0 else 0.0
533
+ max_dd = float(np.min(underwater))
534
+ avg_dd = (
535
+ float(np.mean(underwater[underwater < -threshold]))
536
+ if np.any(underwater < -threshold)
537
+ else 0.0
538
+ )
539
+
540
+ durations = [p.duration_days for p in drawdown_periods if p.duration_days > 0]
541
+ max_duration = max(durations) if durations else 0
542
+ avg_duration = float(np.mean(durations)) if durations else 0.0
543
+
544
+ self._drawdown_cache = DrawdownResult(
545
+ current_drawdown=current_dd,
546
+ max_drawdown=max_dd,
547
+ avg_drawdown=avg_dd,
548
+ underwater_curve=pl.Series("drawdown", underwater),
549
+ top_drawdowns=top_drawdowns,
550
+ max_duration_days=max_duration,
551
+ avg_duration_days=avg_duration,
552
+ num_drawdowns=len(drawdown_periods),
553
+ dates=dates,
554
+ )
555
+
556
+ return self._drawdown_cache
557
+
558
+ def _identify_drawdown_periods(
559
+ self,
560
+ underwater: np.ndarray,
561
+ dates: pl.Series,
562
+ threshold: float,
563
+ ) -> list[DrawdownPeriod]:
564
+ """Identify individual drawdown periods."""
565
+ periods = []
566
+
567
+ in_drawdown = False
568
+ peak_idx = 0
569
+ valley_idx = 0
570
+ valley_depth = 0.0
571
+
572
+ for i, dd in enumerate(underwater):
573
+ if dd < -threshold and not in_drawdown:
574
+ # Start of drawdown
575
+ in_drawdown = True
576
+ peak_idx = i - 1 if i > 0 else 0
577
+ valley_idx = i
578
+ valley_depth = dd
579
+ elif in_drawdown:
580
+ if dd < valley_depth:
581
+ # New valley
582
+ valley_idx = i
583
+ valley_depth = dd
584
+ elif dd >= 0:
585
+ # Recovery
586
+ period = DrawdownPeriod(
587
+ peak_date=dates[peak_idx],
588
+ valley_date=dates[valley_idx],
589
+ recovery_date=dates[i],
590
+ depth=valley_depth,
591
+ duration_days=valley_idx - peak_idx,
592
+ recovery_days=i - valley_idx,
593
+ )
594
+ periods.append(period)
595
+ in_drawdown = False
596
+
597
+ # Handle ongoing drawdown
598
+ if in_drawdown:
599
+ period = DrawdownPeriod(
600
+ peak_date=dates[peak_idx],
601
+ valley_date=dates[valley_idx],
602
+ recovery_date=None,
603
+ depth=valley_depth,
604
+ duration_days=valley_idx - peak_idx,
605
+ recovery_days=None,
606
+ )
607
+ periods.append(period)
608
+
609
+ return periods
610
+
611
+ def compute_returns_distribution(self) -> DistributionResult:
612
+ """Compute returns distribution analysis.
613
+
614
+ Returns
615
+ -------
616
+ DistributionResult
617
+ Distribution statistics and normality tests
618
+ """
619
+ returns = self._returns
620
+ clean_returns = returns[~np.isnan(returns)]
621
+
622
+ # Moments
623
+ mean = float(np.mean(clean_returns))
624
+ std = float(np.std(clean_returns, ddof=1))
625
+ skew = float(stats.skew(clean_returns))
626
+ kurt = float(stats.kurtosis(clean_returns))
627
+
628
+ # Jarque-Bera test
629
+ jb_stat, jb_pval = stats.jarque_bera(clean_returns)
630
+
631
+ # VaR/CVaR
632
+ var95 = value_at_risk(returns, 0.95)
633
+ var99 = value_at_risk(returns, 0.99)
634
+ cvar95 = conditional_var(returns, 0.95)
635
+ cvar99 = conditional_var(returns, 0.99)
636
+
637
+ return DistributionResult(
638
+ mean=mean,
639
+ std=std,
640
+ skewness=skew,
641
+ kurtosis=kurt,
642
+ jarque_bera_stat=float(jb_stat),
643
+ jarque_bera_pvalue=float(jb_pval),
644
+ is_normal=jb_pval > 0.05,
645
+ var_95=var95,
646
+ var_99=var99,
647
+ cvar_95=cvar95,
648
+ cvar_99=cvar99,
649
+ best_day=float(np.max(clean_returns)),
650
+ worst_day=float(np.min(clean_returns)),
651
+ )
652
+
653
+ # =========================================================================
654
+ # Monthly / Annual Returns
655
+ # =========================================================================
656
+
657
+ def compute_monthly_returns(self) -> pl.DataFrame:
658
+ """Compute monthly returns.
659
+
660
+ Returns
661
+ -------
662
+ pl.DataFrame
663
+ Monthly returns with year and month columns
664
+ """
665
+ df = pl.DataFrame(
666
+ {
667
+ "date": self._dates,
668
+ "return": self._returns,
669
+ }
670
+ )
671
+
672
+ # Group by year-month and compound
673
+ monthly = (
674
+ df.with_columns(
675
+ [
676
+ pl.col("date").dt.year().alias("year"),
677
+ pl.col("date").dt.month().alias("month"),
678
+ ]
679
+ )
680
+ .group_by(["year", "month"])
681
+ .agg((1 + pl.col("return")).product().alias("monthly_return") - 1)
682
+ .sort(["year", "month"])
683
+ )
684
+
685
+ return monthly
686
+
687
+ def compute_annual_returns(self) -> pl.DataFrame:
688
+ """Compute annual returns.
689
+
690
+ Returns
691
+ -------
692
+ pl.DataFrame
693
+ Annual returns with year column
694
+ """
695
+ df = pl.DataFrame(
696
+ {
697
+ "date": self._dates,
698
+ "return": self._returns,
699
+ }
700
+ )
701
+
702
+ # Group by year and compound
703
+ annual = (
704
+ df.with_columns(
705
+ [
706
+ pl.col("date").dt.year().alias("year"),
707
+ ]
708
+ )
709
+ .group_by("year")
710
+ .agg((1 + pl.col("return")).product().alias("annual_return") - 1)
711
+ .sort("year")
712
+ )
713
+
714
+ return annual
715
+
716
+ def get_monthly_returns_matrix(self) -> pl.DataFrame:
717
+ """Get monthly returns as year x month matrix (for heatmap).
718
+
719
+ Returns
720
+ -------
721
+ pl.DataFrame
722
+ Pivoted DataFrame with years as rows, months as columns
723
+ """
724
+ monthly = self.compute_monthly_returns()
725
+
726
+ # Pivot to matrix form
727
+ return monthly.pivot(
728
+ values="monthly_return",
729
+ index="year",
730
+ on="month",
731
+ ).sort("year")
732
+
733
+
734
+ __all__ = ["PortfolioAnalysis"]