ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,550 @@
1
+ """Multi-Signal Analysis module for batch signal evaluation.
2
+
3
+ This module provides efficient analysis of 50-200 signals with:
4
+ - Parallel computation via joblib
5
+ - Smart caching with Polars fingerprinting
6
+ - FDR and FWER multiple testing corrections
7
+ - Signal selection algorithms for comparison
8
+ - Focus + Context visualization patterns
9
+
10
+ References
11
+ ----------
12
+ Benjamini, Y., & Hochberg, Y. (1995). "Controlling the False Discovery Rate"
13
+ Holm, S. (1979). "A Simple Sequentially Rejective Multiple Test Procedure"
14
+ López de Prado, M. (2018). "Advances in Financial Machine Learning"
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import warnings
20
+ from typing import TYPE_CHECKING, Any, Literal
21
+
22
+ import numpy as np
23
+ import polars as pl
24
+ from tqdm import tqdm
25
+
26
+ from ml4t.diagnostic.backends.adapter import DataFrameAdapter
27
+ from ml4t.diagnostic.caching.smart_cache import SmartCache
28
+ from ml4t.diagnostic.config.multi_signal_config import MultiSignalAnalysisConfig
29
+ from ml4t.diagnostic.evaluation.signal_selector import SignalSelector
30
+ from ml4t.diagnostic.evaluation.stats import benjamini_hochberg_fdr, holm_bonferroni
31
+ from ml4t.diagnostic.results.multi_signal_results import ComparisonResult, MultiSignalSummary
32
+ from ml4t.diagnostic.signal import SignalResult, analyze_signal
33
+
34
+ if TYPE_CHECKING:
35
+ import pandas as pd
36
+
37
+
38
+ class MultiSignalAnalysis:
39
+ """Batch analysis of multiple signals with statistical corrections.
40
+
41
+ Efficiently analyze 50-200 signals with parallel computation,
42
+ smart caching, and multiple testing corrections.
43
+
44
+ Parameters
45
+ ----------
46
+ signals : dict[str, pl.DataFrame | pd.DataFrame]
47
+ Dictionary mapping signal names to factor DataFrames.
48
+ Each DataFrame must have columns: date, asset, factor
49
+ prices : pl.DataFrame | pd.DataFrame
50
+ Price data with columns: date, asset, price
51
+ config : MultiSignalAnalysisConfig | None
52
+ Configuration object. If None, uses defaults.
53
+
54
+ Examples
55
+ --------
56
+ >>> # Basic usage
57
+ >>> signals = {
58
+ ... 'momentum_12m': mom_df,
59
+ ... 'value_btm': val_df,
60
+ ... 'quality': qual_df,
61
+ ... }
62
+ >>> analyzer = MultiSignalAnalysis(signals, prices)
63
+ >>> summary = analyzer.compute_summary()
64
+ >>> print(f"Significant: {summary.n_fdr_significant}/{summary.n_signals}")
65
+
66
+ >>> # Compare top uncorrelated signals
67
+ >>> comparison = analyzer.compare(selection="uncorrelated", n=5)
68
+ >>> comparison.save_html("top_signals.html")
69
+
70
+ >>> # Custom configuration
71
+ >>> config = MultiSignalAnalysisConfig(
72
+ ... fdr_alpha=0.01,
73
+ ... fwer_alpha=0.01,
74
+ ... n_jobs=-1, # All cores
75
+ ... )
76
+ >>> analyzer = MultiSignalAnalysis(signals, prices, config=config)
77
+ """
78
+
79
+ def __init__(
80
+ self,
81
+ signals: dict[str, pl.DataFrame | pd.DataFrame],
82
+ prices: pl.DataFrame | pd.DataFrame,
83
+ config: MultiSignalAnalysisConfig | None = None,
84
+ ) -> None:
85
+ """Initialize MultiSignalAnalysis."""
86
+ self.config = config or MultiSignalAnalysisConfig()
87
+
88
+ # Convert signals to Polars
89
+ self._signals: dict[str, pl.DataFrame] = {}
90
+ for name, df in signals.items():
91
+ converted, _ = DataFrameAdapter.to_polars(df)
92
+ self._signals[name] = converted
93
+
94
+ # Convert prices to Polars
95
+ self._prices, _ = DataFrameAdapter.to_polars(prices)
96
+
97
+ # Validate inputs
98
+ self._validate_inputs()
99
+
100
+ # Initialize cache if enabled
101
+ self._cache: SmartCache | None = None
102
+ if self.config.cache_enabled:
103
+ self._cache = SmartCache(
104
+ max_items=self.config.cache_max_items,
105
+ ttl_seconds=self.config.cache_ttl,
106
+ )
107
+
108
+ # Cached results
109
+ self._summary: MultiSignalSummary | None = None
110
+ self._individual_results: dict[str, SignalResult] = {}
111
+ self._correlation_matrix: pl.DataFrame | None = None
112
+
113
+ def _validate_inputs(self) -> None:
114
+ """Validate input data structure."""
115
+ if not self._signals:
116
+ raise ValueError("No signals provided")
117
+
118
+ # Check each signal has required columns
119
+ required_cols = {"date", "asset", "factor"}
120
+ for name, df in self._signals.items():
121
+ missing = required_cols - set(df.columns)
122
+ if missing:
123
+ raise ValueError(f"Signal '{name}' missing required columns: {missing}")
124
+
125
+ # Check prices
126
+ price_required = {"date", "asset", "price"}
127
+ missing_price = price_required - set(self._prices.columns)
128
+ if missing_price:
129
+ raise ValueError(f"Price data missing required columns: {missing_price}")
130
+
131
+ @property
132
+ def signal_names(self) -> list[str]:
133
+ """List of signal names."""
134
+ return list(self._signals.keys())
135
+
136
+ @property
137
+ def n_signals(self) -> int:
138
+ """Number of signals."""
139
+ return len(self._signals)
140
+
141
+ def get_individual(self, signal_name: str) -> SignalResult:
142
+ """Get or create SignalResult for a specific signal.
143
+
144
+ Parameters
145
+ ----------
146
+ signal_name : str
147
+ Name of signal
148
+
149
+ Returns
150
+ -------
151
+ SignalResult
152
+ Analysis result for the signal
153
+ """
154
+ if signal_name not in self._signals:
155
+ raise ValueError(f"Signal '{signal_name}' not found. Available: {self.signal_names}")
156
+
157
+ if signal_name not in self._individual_results:
158
+ self._individual_results[signal_name] = analyze_signal(
159
+ self._signals[signal_name],
160
+ self._prices,
161
+ periods=tuple(self.config.signal_config.periods),
162
+ quantiles=self.config.signal_config.quantiles,
163
+ filter_zscore=self.config.signal_config.filter_zscore,
164
+ compute_turnover_flag=self.config.signal_config.compute_turnover,
165
+ )
166
+
167
+ return self._individual_results[signal_name]
168
+
169
+ def _compute_signal_metrics(self, signal_name: str) -> dict[str, Any]:
170
+ """Compute metrics for a single signal.
171
+
172
+ This is the parallelizable unit of work.
173
+ """
174
+ # Check cache
175
+ cache_key = None
176
+ if self._cache is not None:
177
+ cache_key = self._cache.make_key(
178
+ signal_name,
179
+ self._signals[signal_name],
180
+ self.config.signal_config,
181
+ )
182
+ cached = self._cache.get(cache_key)
183
+ if cached is not None:
184
+ return cached
185
+
186
+ # Compute metrics using new functional API
187
+ try:
188
+ result = analyze_signal(
189
+ self._signals[signal_name],
190
+ self._prices,
191
+ periods=tuple(self.config.signal_config.periods),
192
+ quantiles=self.config.signal_config.quantiles,
193
+ filter_zscore=self.config.signal_config.filter_zscore,
194
+ compute_turnover_flag=self.config.signal_config.compute_turnover,
195
+ )
196
+
197
+ # Extract metrics for first period (most common use case)
198
+ period = self.config.signal_config.periods[0]
199
+ period_key = f"{period}D"
200
+
201
+ metrics = {
202
+ "signal_name": signal_name,
203
+ "ic_mean": result.ic.get(period_key, np.nan),
204
+ "ic_std": result.ic_std.get(period_key, np.nan),
205
+ "ic_t_stat": result.ic_t_stat.get(period_key, np.nan),
206
+ "ic_p_value": result.ic_p_value.get(period_key, np.nan),
207
+ "ic_ir": result.ic_ir.get(period_key, np.nan),
208
+ "ic_positive_pct": result.ic_positive_pct.get(period_key, np.nan),
209
+ "n_observations": result.n_dates,
210
+ "error": None,
211
+ }
212
+
213
+ # Add turnover if computed
214
+ if result.turnover is not None:
215
+ metrics["turnover_mean"] = result.turnover.get(period_key, np.nan)
216
+ else:
217
+ metrics["turnover_mean"] = np.nan
218
+
219
+ if result.autocorrelation is not None and len(result.autocorrelation) > 0:
220
+ metrics["autocorr_1"] = result.autocorrelation[0]
221
+ else:
222
+ metrics["autocorr_1"] = np.nan
223
+
224
+ except Exception as e:
225
+ metrics = {
226
+ "signal_name": signal_name,
227
+ "ic_mean": np.nan,
228
+ "ic_std": np.nan,
229
+ "ic_t_stat": np.nan,
230
+ "ic_p_value": np.nan,
231
+ "ic_ir": np.nan,
232
+ "ic_positive_pct": np.nan,
233
+ "n_observations": 0,
234
+ "turnover_mean": np.nan,
235
+ "autocorr_1": np.nan,
236
+ "error": str(e),
237
+ }
238
+
239
+ # Cache result
240
+ if self._cache is not None and cache_key is not None:
241
+ self._cache.set(cache_key, metrics)
242
+
243
+ return metrics
244
+
245
+ def compute_summary(
246
+ self,
247
+ progress: bool = True,
248
+ ) -> MultiSignalSummary:
249
+ """Compute summary metrics for all signals with FDR/FWER correction.
250
+
251
+ Parameters
252
+ ----------
253
+ progress : bool, default True
254
+ Show progress bar
255
+
256
+ Returns
257
+ -------
258
+ MultiSignalSummary
259
+ Summary with metrics and multiple testing corrections
260
+ """
261
+ if self._summary is not None:
262
+ return self._summary
263
+
264
+ # Compute metrics for all signals
265
+ if self.config.n_jobs == 1:
266
+ # Serial execution
267
+ results = []
268
+ iterator = tqdm(self.signal_names, disable=not progress, desc="Analyzing signals")
269
+ for name in iterator:
270
+ results.append(self._compute_signal_metrics(name))
271
+ else:
272
+ # Parallel execution
273
+ try:
274
+ from joblib import Parallel, delayed
275
+
276
+ results = Parallel(
277
+ n_jobs=self.config.n_jobs,
278
+ backend=self.config.backend,
279
+ )(
280
+ delayed(self._compute_signal_metrics)(name)
281
+ for name in tqdm(
282
+ self.signal_names, disable=not progress, desc="Analyzing signals"
283
+ )
284
+ )
285
+ except ImportError:
286
+ warnings.warn(
287
+ "joblib not available, falling back to serial execution",
288
+ UserWarning,
289
+ stacklevel=2,
290
+ )
291
+ results = []
292
+ iterator = tqdm(self.signal_names, disable=not progress, desc="Analyzing signals")
293
+ for name in iterator:
294
+ results.append(self._compute_signal_metrics(name))
295
+
296
+ # Build summary DataFrame
297
+ summary_data: dict[str, list[Any]] = {
298
+ "signal_name": [],
299
+ "ic_mean": [],
300
+ "ic_std": [],
301
+ "ic_t_stat": [],
302
+ "ic_p_value": [],
303
+ "ic_ir": [],
304
+ "ic_positive_pct": [],
305
+ "n_observations": [],
306
+ "turnover_mean": [],
307
+ "autocorr_1": [],
308
+ }
309
+
310
+ for r in results:
311
+ for key in summary_data:
312
+ summary_data[key].append(r.get(key, np.nan))
313
+
314
+ # Apply FDR correction
315
+ p_values = summary_data["ic_p_value"]
316
+ valid_p_values = [p if not np.isnan(p) else 1.0 for p in p_values]
317
+
318
+ fdr_result = benjamini_hochberg_fdr(
319
+ valid_p_values,
320
+ alpha=self.config.fdr_alpha,
321
+ return_details=True,
322
+ )
323
+ summary_data["fdr_significant"] = list(fdr_result["rejected"])
324
+ summary_data["fdr_adjusted_p"] = list(fdr_result["adjusted_p_values"])
325
+
326
+ # Apply FWER correction
327
+ fwer_result = holm_bonferroni(valid_p_values, alpha=self.config.fwer_alpha)
328
+ summary_data["fwer_significant"] = fwer_result["rejected"]
329
+ summary_data["fwer_adjusted_p"] = fwer_result["adjusted_p_values"]
330
+
331
+ # Count significant
332
+ n_fdr_sig = sum(summary_data["fdr_significant"])
333
+ n_fwer_sig = sum(summary_data["fwer_significant"])
334
+
335
+ # Create result
336
+ self._summary = MultiSignalSummary(
337
+ summary_data=summary_data,
338
+ n_signals=self.n_signals,
339
+ n_fdr_significant=n_fdr_sig,
340
+ n_fwer_significant=n_fwer_sig,
341
+ periods=self.config.signal_config.periods,
342
+ fdr_alpha=self.config.fdr_alpha,
343
+ fwer_alpha=self.config.fwer_alpha,
344
+ )
345
+
346
+ return self._summary
347
+
348
+ def correlation_matrix(
349
+ self,
350
+ method: Literal["returns", "ic"] = "returns",
351
+ ) -> pl.DataFrame:
352
+ """Compute pairwise signal correlation matrix.
353
+
354
+ Parameters
355
+ ----------
356
+ method : str, default "returns"
357
+ Correlation method:
358
+ - "returns": Correlation of signal-weighted returns
359
+ - "ic": Correlation of IC time series
360
+
361
+ Returns
362
+ -------
363
+ pl.DataFrame
364
+ Correlation matrix with signal names as columns
365
+ """
366
+ if self._correlation_matrix is not None:
367
+ return self._correlation_matrix
368
+
369
+ # For now, use simple cross-sectional correlation of factor values
370
+ # This is a reasonable approximation for signal similarity
371
+
372
+ # Get all dates that appear in all signals
373
+ all_dates: set[Any] | None = None
374
+ for df in self._signals.values():
375
+ dates = set(df["date"].unique().to_list())
376
+ if all_dates is None:
377
+ all_dates = dates
378
+ else:
379
+ all_dates = all_dates.intersection(dates)
380
+
381
+ if not all_dates:
382
+ raise ValueError("No overlapping dates across signals")
383
+
384
+ # Build correlation matrix
385
+ n = self.n_signals
386
+ corr_matrix = np.eye(n)
387
+
388
+ for i, name_i in enumerate(self.signal_names):
389
+ for j, name_j in enumerate(self.signal_names):
390
+ if i >= j:
391
+ continue
392
+
393
+ # Get factor values for common dates and assets
394
+ df_i = self._signals[name_i].filter(pl.col("date").is_in(list(all_dates)))
395
+ df_j = self._signals[name_j].filter(pl.col("date").is_in(list(all_dates)))
396
+
397
+ # Join on date and asset
398
+ merged = df_i.select(["date", "asset", "factor"]).join(
399
+ df_j.select(["date", "asset", pl.col("factor").alias("factor_j")]),
400
+ on=["date", "asset"],
401
+ how="inner",
402
+ )
403
+
404
+ if merged.height > 10:
405
+ corr = np.corrcoef(
406
+ merged["factor"].to_numpy(),
407
+ merged["factor_j"].to_numpy(),
408
+ )[0, 1]
409
+ if not np.isnan(corr):
410
+ corr_matrix[i, j] = corr
411
+ corr_matrix[j, i] = corr
412
+
413
+ # Convert to DataFrame
414
+ self._correlation_matrix = pl.DataFrame(
415
+ corr_matrix,
416
+ schema=self.signal_names,
417
+ )
418
+
419
+ return self._correlation_matrix
420
+
421
+ def compare(
422
+ self,
423
+ selection: Literal["top_n", "uncorrelated", "pareto", "cluster", "manual"] = "top_n",
424
+ n: int = 10,
425
+ signals: list[str] | None = None,
426
+ **kwargs: Any,
427
+ ) -> ComparisonResult:
428
+ """Create detailed comparison of selected signals.
429
+
430
+ Parameters
431
+ ----------
432
+ selection : str, default "top_n"
433
+ Selection method:
434
+ - "top_n": Best N by metric (default: ic_ir)
435
+ - "uncorrelated": Diverse signals with low correlation
436
+ - "pareto": Signals on efficient frontier
437
+ - "cluster": Representative from each cluster
438
+ - "manual": Use provided signal list
439
+ n : int, default 10
440
+ Number of signals to select (ignored for "manual")
441
+ signals : list[str] | None
442
+ Signal names for "manual" selection
443
+ **kwargs : Any
444
+ Additional parameters for selection methods
445
+
446
+ Returns
447
+ -------
448
+ ComparisonResult
449
+ Detailed comparison with tear sheet data
450
+ """
451
+ # Ensure summary is computed
452
+ summary = self.compute_summary(progress=False)
453
+ summary_df = summary.get_dataframe()
454
+
455
+ # Get correlation matrix if needed
456
+ corr_matrix = None
457
+ if selection in ("uncorrelated", "cluster"):
458
+ corr_matrix = self.correlation_matrix()
459
+
460
+ # Select signals
461
+ if selection == "manual":
462
+ if signals is None:
463
+ raise ValueError("signals parameter required for manual selection")
464
+ selected = signals
465
+ elif selection == "top_n":
466
+ metric = kwargs.get("metric", self.config.default_selection_metric)
467
+ selected = SignalSelector.select_top_n(summary_df, n=n, metric=metric, **kwargs)
468
+ elif selection == "uncorrelated":
469
+ if corr_matrix is None:
470
+ raise ValueError("Correlation matrix required for uncorrelated selection")
471
+ max_corr = kwargs.get("max_correlation", self.config.default_correlation_threshold)
472
+ selected = SignalSelector.select_uncorrelated(
473
+ summary_df, corr_matrix, n=n, max_correlation=max_corr, **kwargs
474
+ )
475
+ elif selection == "pareto":
476
+ selected = SignalSelector.select_pareto_frontier(summary_df, **kwargs)
477
+ if len(selected) > n:
478
+ selected = selected[:n]
479
+ elif selection == "cluster":
480
+ if corr_matrix is None:
481
+ raise ValueError("Correlation matrix required for cluster selection")
482
+ n_clusters = kwargs.get("n_clusters", n)
483
+ selected = SignalSelector.select_by_cluster(
484
+ corr_matrix, summary_df, n_clusters=n_clusters, **kwargs
485
+ )
486
+ else:
487
+ raise ValueError(f"Unknown selection method: {selection}")
488
+
489
+ # Limit to max comparison signals
490
+ if len(selected) > self.config.max_signals_comparison:
491
+ selected = selected[: self.config.max_signals_comparison]
492
+
493
+ # Compute tear sheets (signal results) for selected signals
494
+ tear_sheets: dict[str, dict[str, Any]] = {}
495
+ for name in selected:
496
+ try:
497
+ result = self.get_individual(name)
498
+ tear_sheets[name] = result.to_dict()
499
+ except Exception as e:
500
+ warnings.warn(
501
+ f"Failed to analyze signal {name}: {e}",
502
+ UserWarning,
503
+ stacklevel=2,
504
+ )
505
+ tear_sheets[name] = {"error": str(e)}
506
+
507
+ # Get correlation matrix for selected signals
508
+ full_corr = self.correlation_matrix()
509
+ selected_corr: dict[str, list[float]] = {}
510
+ for name in selected:
511
+ if name in full_corr.columns:
512
+ idx = self.signal_names.index(name)
513
+ selected_corr[name] = [full_corr[s][idx] for s in selected]
514
+ else:
515
+ selected_corr[name] = [np.nan] * len(selected)
516
+
517
+ return ComparisonResult(
518
+ signals=selected,
519
+ selection_method=selection,
520
+ selection_params={"n": n, **kwargs},
521
+ tear_sheets=tear_sheets,
522
+ correlation_matrix=selected_corr,
523
+ )
524
+
525
+ def cache_stats(self) -> dict[str, Any] | None:
526
+ """Get cache statistics.
527
+
528
+ Returns
529
+ -------
530
+ dict | None
531
+ Cache statistics if caching enabled, else None
532
+ """
533
+ if self._cache is None:
534
+ return None
535
+ return self._cache.stats
536
+
537
+ def clear_cache(self) -> None:
538
+ """Clear the cache."""
539
+ if self._cache is not None:
540
+ self._cache.clear()
541
+ self._summary = None
542
+ self._individual_results.clear()
543
+ self._correlation_matrix = None
544
+
545
+ def __repr__(self) -> str:
546
+ """Developer representation."""
547
+ return (
548
+ f"MultiSignalAnalysis(n_signals={self.n_signals}, "
549
+ f"cache={'enabled' if self._cache else 'disabled'})"
550
+ )
@@ -0,0 +1,83 @@
1
+ """Portfolio analysis module.
2
+
3
+ This package provides comprehensive portfolio performance analysis:
4
+ - PortfolioAnalysis: Main analyzer class for portfolio diagnostics
5
+ - PortfolioMetrics: Complete portfolio performance metrics
6
+ - RollingMetricsResult: Rolling metrics over multiple windows
7
+ - DrawdownResult: Detailed drawdown analysis
8
+ - DistributionResult: Returns distribution analysis
9
+
10
+ Decomposed from portfolio_analysis.py (1,620 lines) into:
11
+ - results.py: Result dataclasses (~335 lines)
12
+ - metrics.py: Core metric functions (~588 lines)
13
+ - analysis.py: PortfolioAnalysis class (~672 lines)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ # Main analysis class
19
+ from ml4t.diagnostic.evaluation.portfolio_analysis.analysis import PortfolioAnalysis
20
+
21
+ # Core metric functions
22
+ from ml4t.diagnostic.evaluation.portfolio_analysis.metrics import (
23
+ _annualization_factor,
24
+ _safe_cumprod,
25
+ _safe_prod,
26
+ _to_numpy,
27
+ alpha_beta,
28
+ annual_return,
29
+ annual_volatility,
30
+ calmar_ratio,
31
+ compute_portfolio_turnover,
32
+ conditional_var,
33
+ information_ratio,
34
+ max_drawdown,
35
+ omega_ratio,
36
+ sharpe_ratio,
37
+ sortino_ratio,
38
+ stability_of_timeseries,
39
+ tail_ratio,
40
+ up_down_capture,
41
+ value_at_risk,
42
+ )
43
+
44
+ # Result classes
45
+ from ml4t.diagnostic.evaluation.portfolio_analysis.results import (
46
+ DistributionResult,
47
+ DrawdownPeriod,
48
+ DrawdownResult,
49
+ PortfolioMetrics,
50
+ RollingMetricsResult,
51
+ )
52
+
53
+ __all__ = [
54
+ # Main class
55
+ "PortfolioAnalysis",
56
+ # Result classes
57
+ "PortfolioMetrics",
58
+ "RollingMetricsResult",
59
+ "DrawdownPeriod",
60
+ "DrawdownResult",
61
+ "DistributionResult",
62
+ # Core metric functions
63
+ "sharpe_ratio",
64
+ "sortino_ratio",
65
+ "calmar_ratio",
66
+ "omega_ratio",
67
+ "tail_ratio",
68
+ "max_drawdown",
69
+ "annual_return",
70
+ "annual_volatility",
71
+ "value_at_risk",
72
+ "conditional_var",
73
+ "stability_of_timeseries",
74
+ "alpha_beta",
75
+ "information_ratio",
76
+ "up_down_capture",
77
+ "compute_portfolio_turnover",
78
+ # Internal helpers (exported for testing)
79
+ "_to_numpy",
80
+ "_safe_prod",
81
+ "_safe_cumprod",
82
+ "_annualization_factor",
83
+ ]