ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,431 @@
1
+ """Result classes for Multi-Signal Analysis module.
2
+
3
+ This module provides Pydantic result classes for storing and serializing
4
+ multi-signal analysis outputs including summary metrics across many signals,
5
+ multiple testing corrections, and signal comparisons.
6
+
7
+ References
8
+ ----------
9
+ Benjamini, Y., & Hochberg, Y. (1995). "Controlling the False Discovery Rate"
10
+ Holm, S. (1979). "A Simple Sequentially Rejective Multiple Test Procedure"
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import Any
16
+
17
+ import polars as pl
18
+ from pydantic import Field
19
+
20
+ from ml4t.diagnostic.results.base import BaseResult
21
+
22
+
23
+ class MultiSignalSummary(BaseResult):
24
+ """Summary metrics for all analyzed signals.
25
+
26
+ Contains aggregated metrics across 50-200 signals with FDR and FWER
27
+ corrections for multiple testing. Provides ranking, filtering, and
28
+ DataFrame access for downstream analysis and visualization.
29
+
30
+ Examples
31
+ --------
32
+ >>> summary = multi_signal_analysis.compute_summary()
33
+ >>> print(f"Significant: {summary.n_fdr_significant}/{summary.n_signals}")
34
+ >>> df = summary.get_dataframe()
35
+ >>> top_signals = summary.get_significant_signals(method="fdr")
36
+ """
37
+
38
+ analysis_type: str = Field(default="multi_signal_summary", frozen=True)
39
+
40
+ # ==========================================================================
41
+ # Core Summary Data
42
+ # ==========================================================================
43
+
44
+ summary_data: dict[str, list[Any]] = Field(
45
+ ...,
46
+ description="DataFrame columns as dict of lists. Keys: column names",
47
+ )
48
+
49
+ # ==========================================================================
50
+ # Metadata
51
+ # ==========================================================================
52
+
53
+ n_signals: int = Field(
54
+ ...,
55
+ ge=1,
56
+ description="Total number of signals analyzed",
57
+ )
58
+
59
+ n_fdr_significant: int = Field(
60
+ ...,
61
+ ge=0,
62
+ description="Number of signals significant after FDR correction",
63
+ )
64
+
65
+ n_fwer_significant: int = Field(
66
+ ...,
67
+ ge=0,
68
+ description="Number of signals significant after FWER correction",
69
+ )
70
+
71
+ periods: tuple[int, ...] = Field(
72
+ ...,
73
+ description="Forward return periods analyzed (e.g., (1, 5, 10))",
74
+ )
75
+
76
+ fdr_alpha: float = Field(
77
+ ...,
78
+ ge=0.0,
79
+ le=1.0,
80
+ description="FDR significance level used",
81
+ )
82
+
83
+ fwer_alpha: float = Field(
84
+ ...,
85
+ ge=0.0,
86
+ le=1.0,
87
+ description="FWER significance level used",
88
+ )
89
+
90
+ # ==========================================================================
91
+ # Correlation Data (Optional)
92
+ # ==========================================================================
93
+
94
+ correlation_data: dict[str, list[float]] | None = Field(
95
+ default=None,
96
+ description="Signal correlation matrix as dict of lists (optional)",
97
+ )
98
+
99
+ # ==========================================================================
100
+ # Methods
101
+ # ==========================================================================
102
+
103
+ def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
104
+ """Get results as Polars DataFrame.
105
+
106
+ Parameters
107
+ ----------
108
+ name : str | None
109
+ DataFrame to retrieve:
110
+ - None or "summary": Main summary with all signals
111
+ - "correlation": Signal correlation matrix (if available)
112
+
113
+ Returns
114
+ -------
115
+ pl.DataFrame
116
+ Requested DataFrame
117
+ """
118
+ if name is None or name == "summary":
119
+ return pl.DataFrame(self.summary_data)
120
+ elif name == "correlation":
121
+ if self.correlation_data is None:
122
+ raise ValueError("Correlation data not computed")
123
+ return pl.DataFrame(self.correlation_data)
124
+ else:
125
+ available = self.list_available_dataframes()
126
+ raise ValueError(f"Unknown DataFrame '{name}'. Available: {available}")
127
+
128
+ def list_available_dataframes(self) -> list[str]:
129
+ """List available DataFrame views."""
130
+ available = ["summary"]
131
+ if self.correlation_data is not None:
132
+ available.append("correlation")
133
+ return available
134
+
135
+ def get_significant_signals(
136
+ self,
137
+ method: str = "fdr",
138
+ ) -> list[str]:
139
+ """Get list of significant signal names.
140
+
141
+ Parameters
142
+ ----------
143
+ method : str, default "fdr"
144
+ Correction method: "fdr" or "fwer"
145
+
146
+ Returns
147
+ -------
148
+ list[str]
149
+ Names of significant signals
150
+ """
151
+ col = f"{method}_significant"
152
+ if col not in self.summary_data:
153
+ raise ValueError(f"Column '{col}' not in summary data")
154
+
155
+ signal_names = self.summary_data["signal_name"]
156
+ significant = self.summary_data[col]
157
+
158
+ return [name for name, sig in zip(signal_names, significant) if sig]
159
+
160
+ def get_ranking(
161
+ self,
162
+ metric: str = "ic_ir",
163
+ ascending: bool = False,
164
+ n: int | None = None,
165
+ ) -> list[str]:
166
+ """Get signal names ranked by metric.
167
+
168
+ Parameters
169
+ ----------
170
+ metric : str, default "ic_ir"
171
+ Metric to rank by
172
+ ascending : bool, default False
173
+ If True, return lowest values first
174
+ n : int | None
175
+ Number of signals to return (None = all)
176
+
177
+ Returns
178
+ -------
179
+ list[str]
180
+ Ranked signal names
181
+ """
182
+ df = self.get_dataframe()
183
+ sorted_df = df.sort(metric, descending=not ascending)
184
+ if n is not None:
185
+ sorted_df = sorted_df.head(n)
186
+ return sorted_df["signal_name"].to_list()
187
+
188
+ def filter_signals(
189
+ self,
190
+ min_ic: float | None = None,
191
+ min_ic_ir: float | None = None,
192
+ max_turnover: float | None = None,
193
+ significant_only: bool = False,
194
+ significance_method: str = "fdr",
195
+ ) -> pl.DataFrame:
196
+ """Filter signals by criteria.
197
+
198
+ Parameters
199
+ ----------
200
+ min_ic : float | None
201
+ Minimum IC mean
202
+ min_ic_ir : float | None
203
+ Minimum IC IR
204
+ max_turnover : float | None
205
+ Maximum turnover
206
+ significant_only : bool
207
+ Only include significant signals
208
+ significance_method : str
209
+ "fdr" or "fwer" for significance filter
210
+
211
+ Returns
212
+ -------
213
+ pl.DataFrame
214
+ Filtered summary DataFrame
215
+ """
216
+ df = self.get_dataframe()
217
+
218
+ if min_ic is not None and "ic_mean" in df.columns:
219
+ df = df.filter(pl.col("ic_mean") >= min_ic)
220
+ if min_ic_ir is not None and "ic_ir" in df.columns:
221
+ df = df.filter(pl.col("ic_ir") >= min_ic_ir)
222
+ if max_turnover is not None and "turnover_mean" in df.columns:
223
+ df = df.filter(pl.col("turnover_mean") <= max_turnover)
224
+ if significant_only:
225
+ sig_col = f"{significance_method}_significant"
226
+ if sig_col in df.columns:
227
+ df = df.filter(pl.col(sig_col))
228
+
229
+ return df
230
+
231
+ def summary(self) -> str:
232
+ """Get human-readable summary of results."""
233
+ lines = [
234
+ "=" * 60,
235
+ "Multi-Signal Analysis Summary",
236
+ "=" * 60,
237
+ f"Signals Analyzed: {self.n_signals}",
238
+ f"Periods: {self.periods}",
239
+ "",
240
+ "Multiple Testing Corrections:",
241
+ f" FDR ({self.fdr_alpha:.0%}): {self.n_fdr_significant} significant ({self.n_fdr_significant / self.n_signals:.1%})",
242
+ f" FWER ({self.fwer_alpha:.0%}): {self.n_fwer_significant} significant ({self.n_fwer_significant / self.n_signals:.1%})",
243
+ ]
244
+
245
+ # Add top signals if we have IC IR
246
+ if "ic_ir" in self.summary_data:
247
+ top = self.get_ranking("ic_ir", n=5)
248
+ lines.extend(["", "Top 5 Signals by IC IR:"])
249
+ for i, name in enumerate(top, 1):
250
+ lines.append(f" {i}. {name}")
251
+
252
+ lines.append("=" * 60)
253
+ return "\n".join(lines)
254
+
255
+
256
+ class ComparisonResult(BaseResult):
257
+ """Detailed comparison of selected signals.
258
+
259
+ Contains individual tear sheet data for a subset of signals
260
+ selected for detailed comparison, along with correlation information
261
+ and selection metadata.
262
+
263
+ Examples
264
+ --------
265
+ >>> comparison = analyzer.compare(selection="uncorrelated", n=5)
266
+ >>> for signal in comparison.signals:
267
+ ... tear_sheet = comparison.get_tear_sheet(signal)
268
+ ... print(f"{signal}: IC IR = {tear_sheet.ic_ir}")
269
+ """
270
+
271
+ analysis_type: str = Field(default="signal_comparison", frozen=True)
272
+
273
+ # ==========================================================================
274
+ # Selection Metadata
275
+ # ==========================================================================
276
+
277
+ signals: list[str] = Field(
278
+ ...,
279
+ description="Names of selected signals",
280
+ )
281
+
282
+ selection_method: str = Field(
283
+ ...,
284
+ description="Selection method used: 'top_n', 'uncorrelated', 'pareto', 'cluster', 'manual'",
285
+ )
286
+
287
+ selection_params: dict[str, Any] = Field(
288
+ default_factory=dict,
289
+ description="Parameters used for selection",
290
+ )
291
+
292
+ # ==========================================================================
293
+ # Tear Sheet Data
294
+ # ==========================================================================
295
+
296
+ tear_sheets: dict[str, dict[str, Any]] = Field(
297
+ ...,
298
+ description="Serialized SignalTearSheet data per signal",
299
+ )
300
+
301
+ # ==========================================================================
302
+ # Correlation Data
303
+ # ==========================================================================
304
+
305
+ correlation_matrix: dict[str, list[float]] = Field(
306
+ ...,
307
+ description="Pairwise correlation matrix for selected signals",
308
+ )
309
+
310
+ # ==========================================================================
311
+ # Methods
312
+ # ==========================================================================
313
+
314
+ def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
315
+ """Get results as Polars DataFrame.
316
+
317
+ Parameters
318
+ ----------
319
+ name : str | None
320
+ DataFrame to retrieve:
321
+ - None or "summary": Summary metrics for compared signals
322
+ - "correlation": Correlation matrix
323
+
324
+ Returns
325
+ -------
326
+ pl.DataFrame
327
+ Requested DataFrame
328
+ """
329
+ if name is None or name == "summary":
330
+ # Build summary from tear sheets
331
+ rows = []
332
+ for signal_name, data in self.tear_sheets.items():
333
+ row = {"signal_name": signal_name}
334
+ # Extract key metrics from tear sheet data
335
+ if "ic_analysis" in data and data["ic_analysis"]:
336
+ ic_data = data["ic_analysis"]
337
+ # Get first period's metrics
338
+ if "ic_mean" in ic_data:
339
+ for period, value in ic_data["ic_mean"].items():
340
+ row[f"ic_mean_{period}"] = value
341
+ break # Just first period for summary
342
+ if "ic_ir" in ic_data:
343
+ for period, value in ic_data["ic_ir"].items():
344
+ row[f"ic_ir_{period}"] = value
345
+ break
346
+ rows.append(row)
347
+ return pl.DataFrame(rows)
348
+
349
+ elif name == "correlation":
350
+ return pl.DataFrame(self.correlation_matrix)
351
+
352
+ else:
353
+ available = self.list_available_dataframes()
354
+ raise ValueError(f"Unknown DataFrame '{name}'. Available: {available}")
355
+
356
+ def list_available_dataframes(self) -> list[str]:
357
+ """List available DataFrame views."""
358
+ return ["summary", "correlation"]
359
+
360
+ def get_tear_sheet_data(self, signal_name: str) -> dict[str, Any]:
361
+ """Get tear sheet data for a specific signal.
362
+
363
+ Parameters
364
+ ----------
365
+ signal_name : str
366
+ Name of signal
367
+
368
+ Returns
369
+ -------
370
+ dict
371
+ Serialized tear sheet data
372
+ """
373
+ if signal_name not in self.tear_sheets:
374
+ raise ValueError(f"Signal '{signal_name}' not in comparison. Available: {self.signals}")
375
+ return self.tear_sheets[signal_name]
376
+
377
+ def get_correlation_dataframe(self) -> pl.DataFrame:
378
+ """Get correlation matrix as DataFrame.
379
+
380
+ Returns
381
+ -------
382
+ pl.DataFrame
383
+ Correlation matrix with signal names as columns
384
+ """
385
+ return pl.DataFrame(self.correlation_matrix)
386
+
387
+ def get_pairwise_correlation(self, signal1: str, signal2: str) -> float:
388
+ """Get correlation between two signals.
389
+
390
+ Parameters
391
+ ----------
392
+ signal1 : str
393
+ First signal name
394
+ signal2 : str
395
+ Second signal name
396
+
397
+ Returns
398
+ -------
399
+ float
400
+ Correlation coefficient
401
+ """
402
+ if signal1 not in self.correlation_matrix:
403
+ raise ValueError(f"Signal '{signal1}' not found")
404
+ if signal2 not in self.signals:
405
+ raise ValueError(f"Signal '{signal2}' not found")
406
+
407
+ idx = self.signals.index(signal2)
408
+ return self.correlation_matrix[signal1][idx]
409
+
410
+ def summary(self) -> str:
411
+ """Get human-readable summary of comparison."""
412
+ lines = [
413
+ "=" * 60,
414
+ "Signal Comparison",
415
+ "=" * 60,
416
+ f"Selection Method: {self.selection_method}",
417
+ f"Signals Compared: {len(self.signals)}",
418
+ "",
419
+ "Signals:",
420
+ ]
421
+
422
+ for i, signal in enumerate(self.signals, 1):
423
+ lines.append(f" {i}. {signal}")
424
+
425
+ if self.selection_params:
426
+ lines.extend(["", "Selection Parameters:"])
427
+ for key, value in self.selection_params.items():
428
+ lines.append(f" {key}: {value}")
429
+
430
+ lines.append("=" * 60)
431
+ return "\n".join(lines)