ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
"""Result classes for Multi-Signal Analysis module.
|
|
2
|
+
|
|
3
|
+
This module provides Pydantic result classes for storing and serializing
|
|
4
|
+
multi-signal analysis outputs including summary metrics across many signals,
|
|
5
|
+
multiple testing corrections, and signal comparisons.
|
|
6
|
+
|
|
7
|
+
References
|
|
8
|
+
----------
|
|
9
|
+
Benjamini, Y., & Hochberg, Y. (1995). "Controlling the False Discovery Rate"
|
|
10
|
+
Holm, S. (1979). "A Simple Sequentially Rejective Multiple Test Procedure"
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import polars as pl
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
|
|
20
|
+
from ml4t.diagnostic.results.base import BaseResult
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MultiSignalSummary(BaseResult):
|
|
24
|
+
"""Summary metrics for all analyzed signals.
|
|
25
|
+
|
|
26
|
+
Contains aggregated metrics across 50-200 signals with FDR and FWER
|
|
27
|
+
corrections for multiple testing. Provides ranking, filtering, and
|
|
28
|
+
DataFrame access for downstream analysis and visualization.
|
|
29
|
+
|
|
30
|
+
Examples
|
|
31
|
+
--------
|
|
32
|
+
>>> summary = multi_signal_analysis.compute_summary()
|
|
33
|
+
>>> print(f"Significant: {summary.n_fdr_significant}/{summary.n_signals}")
|
|
34
|
+
>>> df = summary.get_dataframe()
|
|
35
|
+
>>> top_signals = summary.get_significant_signals(method="fdr")
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
analysis_type: str = Field(default="multi_signal_summary", frozen=True)
|
|
39
|
+
|
|
40
|
+
# ==========================================================================
|
|
41
|
+
# Core Summary Data
|
|
42
|
+
# ==========================================================================
|
|
43
|
+
|
|
44
|
+
summary_data: dict[str, list[Any]] = Field(
|
|
45
|
+
...,
|
|
46
|
+
description="DataFrame columns as dict of lists. Keys: column names",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# ==========================================================================
|
|
50
|
+
# Metadata
|
|
51
|
+
# ==========================================================================
|
|
52
|
+
|
|
53
|
+
n_signals: int = Field(
|
|
54
|
+
...,
|
|
55
|
+
ge=1,
|
|
56
|
+
description="Total number of signals analyzed",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
n_fdr_significant: int = Field(
|
|
60
|
+
...,
|
|
61
|
+
ge=0,
|
|
62
|
+
description="Number of signals significant after FDR correction",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
n_fwer_significant: int = Field(
|
|
66
|
+
...,
|
|
67
|
+
ge=0,
|
|
68
|
+
description="Number of signals significant after FWER correction",
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
periods: tuple[int, ...] = Field(
|
|
72
|
+
...,
|
|
73
|
+
description="Forward return periods analyzed (e.g., (1, 5, 10))",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
fdr_alpha: float = Field(
|
|
77
|
+
...,
|
|
78
|
+
ge=0.0,
|
|
79
|
+
le=1.0,
|
|
80
|
+
description="FDR significance level used",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
fwer_alpha: float = Field(
|
|
84
|
+
...,
|
|
85
|
+
ge=0.0,
|
|
86
|
+
le=1.0,
|
|
87
|
+
description="FWER significance level used",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# ==========================================================================
|
|
91
|
+
# Correlation Data (Optional)
|
|
92
|
+
# ==========================================================================
|
|
93
|
+
|
|
94
|
+
correlation_data: dict[str, list[float]] | None = Field(
|
|
95
|
+
default=None,
|
|
96
|
+
description="Signal correlation matrix as dict of lists (optional)",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# ==========================================================================
|
|
100
|
+
# Methods
|
|
101
|
+
# ==========================================================================
|
|
102
|
+
|
|
103
|
+
def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
|
|
104
|
+
"""Get results as Polars DataFrame.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
name : str | None
|
|
109
|
+
DataFrame to retrieve:
|
|
110
|
+
- None or "summary": Main summary with all signals
|
|
111
|
+
- "correlation": Signal correlation matrix (if available)
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
pl.DataFrame
|
|
116
|
+
Requested DataFrame
|
|
117
|
+
"""
|
|
118
|
+
if name is None or name == "summary":
|
|
119
|
+
return pl.DataFrame(self.summary_data)
|
|
120
|
+
elif name == "correlation":
|
|
121
|
+
if self.correlation_data is None:
|
|
122
|
+
raise ValueError("Correlation data not computed")
|
|
123
|
+
return pl.DataFrame(self.correlation_data)
|
|
124
|
+
else:
|
|
125
|
+
available = self.list_available_dataframes()
|
|
126
|
+
raise ValueError(f"Unknown DataFrame '{name}'. Available: {available}")
|
|
127
|
+
|
|
128
|
+
def list_available_dataframes(self) -> list[str]:
|
|
129
|
+
"""List available DataFrame views."""
|
|
130
|
+
available = ["summary"]
|
|
131
|
+
if self.correlation_data is not None:
|
|
132
|
+
available.append("correlation")
|
|
133
|
+
return available
|
|
134
|
+
|
|
135
|
+
def get_significant_signals(
|
|
136
|
+
self,
|
|
137
|
+
method: str = "fdr",
|
|
138
|
+
) -> list[str]:
|
|
139
|
+
"""Get list of significant signal names.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
method : str, default "fdr"
|
|
144
|
+
Correction method: "fdr" or "fwer"
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
list[str]
|
|
149
|
+
Names of significant signals
|
|
150
|
+
"""
|
|
151
|
+
col = f"{method}_significant"
|
|
152
|
+
if col not in self.summary_data:
|
|
153
|
+
raise ValueError(f"Column '{col}' not in summary data")
|
|
154
|
+
|
|
155
|
+
signal_names = self.summary_data["signal_name"]
|
|
156
|
+
significant = self.summary_data[col]
|
|
157
|
+
|
|
158
|
+
return [name for name, sig in zip(signal_names, significant) if sig]
|
|
159
|
+
|
|
160
|
+
def get_ranking(
|
|
161
|
+
self,
|
|
162
|
+
metric: str = "ic_ir",
|
|
163
|
+
ascending: bool = False,
|
|
164
|
+
n: int | None = None,
|
|
165
|
+
) -> list[str]:
|
|
166
|
+
"""Get signal names ranked by metric.
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
metric : str, default "ic_ir"
|
|
171
|
+
Metric to rank by
|
|
172
|
+
ascending : bool, default False
|
|
173
|
+
If True, return lowest values first
|
|
174
|
+
n : int | None
|
|
175
|
+
Number of signals to return (None = all)
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
list[str]
|
|
180
|
+
Ranked signal names
|
|
181
|
+
"""
|
|
182
|
+
df = self.get_dataframe()
|
|
183
|
+
sorted_df = df.sort(metric, descending=not ascending)
|
|
184
|
+
if n is not None:
|
|
185
|
+
sorted_df = sorted_df.head(n)
|
|
186
|
+
return sorted_df["signal_name"].to_list()
|
|
187
|
+
|
|
188
|
+
def filter_signals(
|
|
189
|
+
self,
|
|
190
|
+
min_ic: float | None = None,
|
|
191
|
+
min_ic_ir: float | None = None,
|
|
192
|
+
max_turnover: float | None = None,
|
|
193
|
+
significant_only: bool = False,
|
|
194
|
+
significance_method: str = "fdr",
|
|
195
|
+
) -> pl.DataFrame:
|
|
196
|
+
"""Filter signals by criteria.
|
|
197
|
+
|
|
198
|
+
Parameters
|
|
199
|
+
----------
|
|
200
|
+
min_ic : float | None
|
|
201
|
+
Minimum IC mean
|
|
202
|
+
min_ic_ir : float | None
|
|
203
|
+
Minimum IC IR
|
|
204
|
+
max_turnover : float | None
|
|
205
|
+
Maximum turnover
|
|
206
|
+
significant_only : bool
|
|
207
|
+
Only include significant signals
|
|
208
|
+
significance_method : str
|
|
209
|
+
"fdr" or "fwer" for significance filter
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
pl.DataFrame
|
|
214
|
+
Filtered summary DataFrame
|
|
215
|
+
"""
|
|
216
|
+
df = self.get_dataframe()
|
|
217
|
+
|
|
218
|
+
if min_ic is not None and "ic_mean" in df.columns:
|
|
219
|
+
df = df.filter(pl.col("ic_mean") >= min_ic)
|
|
220
|
+
if min_ic_ir is not None and "ic_ir" in df.columns:
|
|
221
|
+
df = df.filter(pl.col("ic_ir") >= min_ic_ir)
|
|
222
|
+
if max_turnover is not None and "turnover_mean" in df.columns:
|
|
223
|
+
df = df.filter(pl.col("turnover_mean") <= max_turnover)
|
|
224
|
+
if significant_only:
|
|
225
|
+
sig_col = f"{significance_method}_significant"
|
|
226
|
+
if sig_col in df.columns:
|
|
227
|
+
df = df.filter(pl.col(sig_col))
|
|
228
|
+
|
|
229
|
+
return df
|
|
230
|
+
|
|
231
|
+
def summary(self) -> str:
|
|
232
|
+
"""Get human-readable summary of results."""
|
|
233
|
+
lines = [
|
|
234
|
+
"=" * 60,
|
|
235
|
+
"Multi-Signal Analysis Summary",
|
|
236
|
+
"=" * 60,
|
|
237
|
+
f"Signals Analyzed: {self.n_signals}",
|
|
238
|
+
f"Periods: {self.periods}",
|
|
239
|
+
"",
|
|
240
|
+
"Multiple Testing Corrections:",
|
|
241
|
+
f" FDR ({self.fdr_alpha:.0%}): {self.n_fdr_significant} significant ({self.n_fdr_significant / self.n_signals:.1%})",
|
|
242
|
+
f" FWER ({self.fwer_alpha:.0%}): {self.n_fwer_significant} significant ({self.n_fwer_significant / self.n_signals:.1%})",
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
# Add top signals if we have IC IR
|
|
246
|
+
if "ic_ir" in self.summary_data:
|
|
247
|
+
top = self.get_ranking("ic_ir", n=5)
|
|
248
|
+
lines.extend(["", "Top 5 Signals by IC IR:"])
|
|
249
|
+
for i, name in enumerate(top, 1):
|
|
250
|
+
lines.append(f" {i}. {name}")
|
|
251
|
+
|
|
252
|
+
lines.append("=" * 60)
|
|
253
|
+
return "\n".join(lines)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class ComparisonResult(BaseResult):
|
|
257
|
+
"""Detailed comparison of selected signals.
|
|
258
|
+
|
|
259
|
+
Contains individual tear sheet data for a subset of signals
|
|
260
|
+
selected for detailed comparison, along with correlation information
|
|
261
|
+
and selection metadata.
|
|
262
|
+
|
|
263
|
+
Examples
|
|
264
|
+
--------
|
|
265
|
+
>>> comparison = analyzer.compare(selection="uncorrelated", n=5)
|
|
266
|
+
>>> for signal in comparison.signals:
|
|
267
|
+
... tear_sheet = comparison.get_tear_sheet(signal)
|
|
268
|
+
... print(f"{signal}: IC IR = {tear_sheet.ic_ir}")
|
|
269
|
+
"""
|
|
270
|
+
|
|
271
|
+
analysis_type: str = Field(default="signal_comparison", frozen=True)
|
|
272
|
+
|
|
273
|
+
# ==========================================================================
|
|
274
|
+
# Selection Metadata
|
|
275
|
+
# ==========================================================================
|
|
276
|
+
|
|
277
|
+
signals: list[str] = Field(
|
|
278
|
+
...,
|
|
279
|
+
description="Names of selected signals",
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
selection_method: str = Field(
|
|
283
|
+
...,
|
|
284
|
+
description="Selection method used: 'top_n', 'uncorrelated', 'pareto', 'cluster', 'manual'",
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
selection_params: dict[str, Any] = Field(
|
|
288
|
+
default_factory=dict,
|
|
289
|
+
description="Parameters used for selection",
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# ==========================================================================
|
|
293
|
+
# Tear Sheet Data
|
|
294
|
+
# ==========================================================================
|
|
295
|
+
|
|
296
|
+
tear_sheets: dict[str, dict[str, Any]] = Field(
|
|
297
|
+
...,
|
|
298
|
+
description="Serialized SignalTearSheet data per signal",
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# ==========================================================================
|
|
302
|
+
# Correlation Data
|
|
303
|
+
# ==========================================================================
|
|
304
|
+
|
|
305
|
+
correlation_matrix: dict[str, list[float]] = Field(
|
|
306
|
+
...,
|
|
307
|
+
description="Pairwise correlation matrix for selected signals",
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# ==========================================================================
|
|
311
|
+
# Methods
|
|
312
|
+
# ==========================================================================
|
|
313
|
+
|
|
314
|
+
def get_dataframe(self, name: str | None = None) -> pl.DataFrame:
|
|
315
|
+
"""Get results as Polars DataFrame.
|
|
316
|
+
|
|
317
|
+
Parameters
|
|
318
|
+
----------
|
|
319
|
+
name : str | None
|
|
320
|
+
DataFrame to retrieve:
|
|
321
|
+
- None or "summary": Summary metrics for compared signals
|
|
322
|
+
- "correlation": Correlation matrix
|
|
323
|
+
|
|
324
|
+
Returns
|
|
325
|
+
-------
|
|
326
|
+
pl.DataFrame
|
|
327
|
+
Requested DataFrame
|
|
328
|
+
"""
|
|
329
|
+
if name is None or name == "summary":
|
|
330
|
+
# Build summary from tear sheets
|
|
331
|
+
rows = []
|
|
332
|
+
for signal_name, data in self.tear_sheets.items():
|
|
333
|
+
row = {"signal_name": signal_name}
|
|
334
|
+
# Extract key metrics from tear sheet data
|
|
335
|
+
if "ic_analysis" in data and data["ic_analysis"]:
|
|
336
|
+
ic_data = data["ic_analysis"]
|
|
337
|
+
# Get first period's metrics
|
|
338
|
+
if "ic_mean" in ic_data:
|
|
339
|
+
for period, value in ic_data["ic_mean"].items():
|
|
340
|
+
row[f"ic_mean_{period}"] = value
|
|
341
|
+
break # Just first period for summary
|
|
342
|
+
if "ic_ir" in ic_data:
|
|
343
|
+
for period, value in ic_data["ic_ir"].items():
|
|
344
|
+
row[f"ic_ir_{period}"] = value
|
|
345
|
+
break
|
|
346
|
+
rows.append(row)
|
|
347
|
+
return pl.DataFrame(rows)
|
|
348
|
+
|
|
349
|
+
elif name == "correlation":
|
|
350
|
+
return pl.DataFrame(self.correlation_matrix)
|
|
351
|
+
|
|
352
|
+
else:
|
|
353
|
+
available = self.list_available_dataframes()
|
|
354
|
+
raise ValueError(f"Unknown DataFrame '{name}'. Available: {available}")
|
|
355
|
+
|
|
356
|
+
def list_available_dataframes(self) -> list[str]:
|
|
357
|
+
"""List available DataFrame views."""
|
|
358
|
+
return ["summary", "correlation"]
|
|
359
|
+
|
|
360
|
+
def get_tear_sheet_data(self, signal_name: str) -> dict[str, Any]:
|
|
361
|
+
"""Get tear sheet data for a specific signal.
|
|
362
|
+
|
|
363
|
+
Parameters
|
|
364
|
+
----------
|
|
365
|
+
signal_name : str
|
|
366
|
+
Name of signal
|
|
367
|
+
|
|
368
|
+
Returns
|
|
369
|
+
-------
|
|
370
|
+
dict
|
|
371
|
+
Serialized tear sheet data
|
|
372
|
+
"""
|
|
373
|
+
if signal_name not in self.tear_sheets:
|
|
374
|
+
raise ValueError(f"Signal '{signal_name}' not in comparison. Available: {self.signals}")
|
|
375
|
+
return self.tear_sheets[signal_name]
|
|
376
|
+
|
|
377
|
+
def get_correlation_dataframe(self) -> pl.DataFrame:
|
|
378
|
+
"""Get correlation matrix as DataFrame.
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
pl.DataFrame
|
|
383
|
+
Correlation matrix with signal names as columns
|
|
384
|
+
"""
|
|
385
|
+
return pl.DataFrame(self.correlation_matrix)
|
|
386
|
+
|
|
387
|
+
def get_pairwise_correlation(self, signal1: str, signal2: str) -> float:
|
|
388
|
+
"""Get correlation between two signals.
|
|
389
|
+
|
|
390
|
+
Parameters
|
|
391
|
+
----------
|
|
392
|
+
signal1 : str
|
|
393
|
+
First signal name
|
|
394
|
+
signal2 : str
|
|
395
|
+
Second signal name
|
|
396
|
+
|
|
397
|
+
Returns
|
|
398
|
+
-------
|
|
399
|
+
float
|
|
400
|
+
Correlation coefficient
|
|
401
|
+
"""
|
|
402
|
+
if signal1 not in self.correlation_matrix:
|
|
403
|
+
raise ValueError(f"Signal '{signal1}' not found")
|
|
404
|
+
if signal2 not in self.signals:
|
|
405
|
+
raise ValueError(f"Signal '{signal2}' not found")
|
|
406
|
+
|
|
407
|
+
idx = self.signals.index(signal2)
|
|
408
|
+
return self.correlation_matrix[signal1][idx]
|
|
409
|
+
|
|
410
|
+
def summary(self) -> str:
|
|
411
|
+
"""Get human-readable summary of comparison."""
|
|
412
|
+
lines = [
|
|
413
|
+
"=" * 60,
|
|
414
|
+
"Signal Comparison",
|
|
415
|
+
"=" * 60,
|
|
416
|
+
f"Selection Method: {self.selection_method}",
|
|
417
|
+
f"Signals Compared: {len(self.signals)}",
|
|
418
|
+
"",
|
|
419
|
+
"Signals:",
|
|
420
|
+
]
|
|
421
|
+
|
|
422
|
+
for i, signal in enumerate(self.signals, 1):
|
|
423
|
+
lines.append(f" {i}. {signal}")
|
|
424
|
+
|
|
425
|
+
if self.selection_params:
|
|
426
|
+
lines.extend(["", "Selection Parameters:"])
|
|
427
|
+
for key, value in self.selection_params.items():
|
|
428
|
+
lines.append(f" {key}: {value}")
|
|
429
|
+
|
|
430
|
+
lines.append("=" * 60)
|
|
431
|
+
return "\n".join(lines)
|