ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,275 @@
1
+ """Core signal analysis functions.
2
+
3
+ The main entry point is `analyze_signal()` - one function for 95% of use cases.
4
+ For power users, `prepare_data()` allows custom workflows.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ import polars as pl
12
+
13
+ from ml4t.diagnostic.signal._utils import (
14
+ QuantileMethod,
15
+ compute_forward_returns,
16
+ ensure_polars,
17
+ filter_outliers,
18
+ quantize_factor,
19
+ )
20
+ from ml4t.diagnostic.signal.quantile import (
21
+ compute_monotonicity,
22
+ compute_quantile_returns,
23
+ compute_spread,
24
+ )
25
+ from ml4t.diagnostic.signal.result import SignalResult
26
+ from ml4t.diagnostic.signal.signal_ic import compute_ic_series, compute_ic_summary
27
+ from ml4t.diagnostic.signal.turnover import (
28
+ compute_autocorrelation,
29
+ compute_turnover,
30
+ estimate_half_life,
31
+ )
32
+
33
+ if TYPE_CHECKING:
34
+ import pandas as pd
35
+
36
+
37
+ def prepare_data(
38
+ factor: pl.DataFrame | pd.DataFrame,
39
+ prices: pl.DataFrame | pd.DataFrame,
40
+ periods: tuple[int, ...] = (1, 5, 21),
41
+ quantiles: int = 5,
42
+ filter_zscore: float | None = 3.0,
43
+ quantile_method: str = "quantile",
44
+ factor_col: str = "factor",
45
+ date_col: str = "date",
46
+ asset_col: str = "asset",
47
+ price_col: str = "price",
48
+ ) -> pl.DataFrame:
49
+ """Prepare factor data for analysis.
50
+
51
+ Joins factor with prices, computes forward returns, filters outliers,
52
+ and assigns quantiles.
53
+
54
+ Parameters
55
+ ----------
56
+ factor : DataFrame
57
+ Factor data with columns: date, asset, factor.
58
+ prices : DataFrame
59
+ Price data with columns: date, asset, price.
60
+ periods : tuple[int, ...]
61
+ Forward return periods in trading days.
62
+ quantiles : int
63
+ Number of quantiles.
64
+ filter_zscore : float | None
65
+ Z-score threshold for outlier filtering. None disables.
66
+ quantile_method : str
67
+ "quantile" (equal frequency) or "uniform" (equal width).
68
+ factor_col, date_col, asset_col, price_col : str
69
+ Column names.
70
+
71
+ Returns
72
+ -------
73
+ pl.DataFrame
74
+ Prepared data with: date, asset, factor, quantile, {period}D_fwd_return.
75
+ """
76
+ # Convert to Polars
77
+ factor_pl = ensure_polars(factor)
78
+ prices_pl = ensure_polars(prices)
79
+
80
+ # Compute forward returns
81
+ data = compute_forward_returns(factor_pl, prices_pl, periods, date_col, asset_col, price_col)
82
+
83
+ # Filter outliers
84
+ if filter_zscore is not None and filter_zscore > 0:
85
+ data = filter_outliers(data, filter_zscore, factor_col, date_col)
86
+
87
+ # Assign quantiles
88
+ method = QuantileMethod.QUANTILE if quantile_method == "quantile" else QuantileMethod.UNIFORM
89
+ data = quantize_factor(data, quantiles, method, factor_col, date_col)
90
+
91
+ return data
92
+
93
+
94
+ def analyze_signal(
95
+ factor: pl.DataFrame | pd.DataFrame,
96
+ prices: pl.DataFrame | pd.DataFrame,
97
+ *,
98
+ periods: tuple[int, ...] = (1, 5, 21),
99
+ quantiles: int = 5,
100
+ filter_zscore: float | None = 3.0,
101
+ quantile_method: str = "quantile",
102
+ ic_method: str = "spearman",
103
+ compute_turnover_flag: bool = True,
104
+ autocorrelation_lags: int = 10,
105
+ min_assets: int = 10,
106
+ factor_col: str = "factor",
107
+ date_col: str = "date",
108
+ asset_col: str = "asset",
109
+ price_col: str = "price",
110
+ ) -> SignalResult:
111
+ """Analyze a factor signal.
112
+
113
+ This is the main entry point for signal analysis. Computes IC, quantile
114
+ returns, spread, monotonicity, and optionally turnover/autocorrelation.
115
+
116
+ Parameters
117
+ ----------
118
+ factor : DataFrame
119
+ Factor data with columns: date, asset, factor.
120
+ Higher factor values should predict higher returns.
121
+ prices : DataFrame
122
+ Price data with columns: date, asset, price.
123
+ periods : tuple[int, ...]
124
+ Forward return periods in trading days (default: 1, 5, 21 days).
125
+ quantiles : int
126
+ Number of quantiles for grouping assets (default: 5 quintiles).
127
+ filter_zscore : float | None
128
+ Z-score threshold for outlier filtering. None disables.
129
+ quantile_method : str
130
+ "quantile" (equal frequency) or "uniform" (equal width).
131
+ ic_method : str
132
+ "spearman" (rank correlation) or "pearson" (linear correlation).
133
+ compute_turnover_flag : bool
134
+ Whether to compute turnover and autocorrelation metrics.
135
+ autocorrelation_lags : int
136
+ Number of lags for autocorrelation analysis.
137
+ min_assets : int
138
+ Minimum assets per date for IC computation.
139
+ factor_col, date_col, asset_col, price_col : str
140
+ Column names.
141
+
142
+ Returns
143
+ -------
144
+ SignalResult
145
+ Analysis results with IC, quantile returns, spread, monotonicity,
146
+ and optionally turnover metrics.
147
+
148
+ Examples
149
+ --------
150
+ Basic usage:
151
+
152
+ >>> result = analyze_signal(factor_df, prices_df)
153
+ >>> print(result.summary())
154
+ >>> result.to_json("results.json")
155
+
156
+ With custom parameters:
157
+
158
+ >>> result = analyze_signal(
159
+ ... factor_df, prices_df,
160
+ ... periods=(1, 5, 21, 63),
161
+ ... quantiles=10,
162
+ ... ic_method="pearson",
163
+ ... )
164
+ """
165
+ # Prepare data
166
+ data = prepare_data(
167
+ factor,
168
+ prices,
169
+ periods,
170
+ quantiles,
171
+ filter_zscore,
172
+ quantile_method,
173
+ factor_col,
174
+ date_col,
175
+ asset_col,
176
+ price_col,
177
+ )
178
+
179
+ # Extract metadata
180
+ n_assets = data.select(asset_col).n_unique()
181
+ n_dates = data.select(date_col).n_unique()
182
+ all_dates = data.select(date_col).unique().sort(date_col).to_series().to_list()
183
+ date_range = (str(all_dates[0]), str(all_dates[-1])) if all_dates else ("", "")
184
+
185
+ # Initialize result dicts
186
+ ic: dict[str, float] = {}
187
+ ic_std: dict[str, float] = {}
188
+ ic_t_stat: dict[str, float] = {}
189
+ ic_p_value: dict[str, float] = {}
190
+ ic_ir: dict[str, float] = {}
191
+ ic_positive_pct: dict[str, float] = {}
192
+ ic_series: dict[str, list[float]] = {}
193
+ quantile_returns: dict[str, dict[int, float]] = {}
194
+ spread: dict[str, float] = {}
195
+ spread_t_stat: dict[str, float] = {}
196
+ spread_p_value: dict[str, float] = {}
197
+ monotonicity: dict[str, float] = {}
198
+
199
+ # Compute metrics for each period
200
+ for period in periods:
201
+ period_key = f"{period}D"
202
+
203
+ # IC
204
+ dates, ic_vals = compute_ic_series(
205
+ data, period, ic_method, factor_col, date_col, min_assets
206
+ )
207
+ summary = compute_ic_summary(ic_vals)
208
+
209
+ ic[period_key] = summary["mean"]
210
+ ic_std[period_key] = summary["std"]
211
+ ic_t_stat[period_key] = summary["t_stat"]
212
+ ic_p_value[period_key] = summary["p_value"]
213
+ ic_series[period_key] = ic_vals
214
+
215
+ # IC Information Ratio and positive percentage
216
+ if summary["std"] > 0:
217
+ ic_ir[period_key] = summary["mean"] / summary["std"]
218
+ else:
219
+ ic_ir[period_key] = 0.0
220
+ if ic_vals:
221
+ ic_positive_pct[period_key] = sum(1 for x in ic_vals if x > 0) / len(ic_vals) * 100
222
+ else:
223
+ ic_positive_pct[period_key] = 0.0
224
+
225
+ # Quantile returns
226
+ q_returns = compute_quantile_returns(data, period, quantiles)
227
+ quantile_returns[period_key] = q_returns
228
+
229
+ # Spread
230
+ spread_stats = compute_spread(data, period, quantiles)
231
+ spread[period_key] = spread_stats["spread"]
232
+ spread_t_stat[period_key] = spread_stats["t_stat"]
233
+ spread_p_value[period_key] = spread_stats["p_value"]
234
+
235
+ # Monotonicity
236
+ monotonicity[period_key] = compute_monotonicity(q_returns)
237
+
238
+ # Turnover (optional)
239
+ turnover_dict: dict[str, float] | None = None
240
+ autocorr: list[float] | None = None
241
+ half_life: float | None = None
242
+
243
+ if compute_turnover_flag:
244
+ turnover_val = compute_turnover(data, quantiles, date_col, asset_col)
245
+ turnover_dict = {f"{p}D": turnover_val for p in periods}
246
+
247
+ lags = list(range(1, autocorrelation_lags + 1))
248
+ autocorr = compute_autocorrelation(data, lags, date_col, asset_col, factor_col)
249
+ half_life = estimate_half_life(autocorr)
250
+
251
+ return SignalResult(
252
+ ic=ic,
253
+ ic_std=ic_std,
254
+ ic_t_stat=ic_t_stat,
255
+ ic_p_value=ic_p_value,
256
+ ic_ir=ic_ir,
257
+ ic_positive_pct=ic_positive_pct,
258
+ ic_series=ic_series,
259
+ quantile_returns=quantile_returns,
260
+ spread=spread,
261
+ spread_t_stat=spread_t_stat,
262
+ spread_p_value=spread_p_value,
263
+ monotonicity=monotonicity,
264
+ turnover=turnover_dict,
265
+ autocorrelation=autocorr,
266
+ half_life=half_life,
267
+ n_assets=n_assets,
268
+ n_dates=n_dates,
269
+ date_range=date_range,
270
+ periods=periods,
271
+ quantiles=quantiles,
272
+ )
273
+
274
+
275
+ __all__ = ["prepare_data", "analyze_signal"]
@@ -0,0 +1,148 @@
1
+ """Quantile analysis functions.
2
+
3
+ Simple, pure functions for analyzing returns by quantile.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import numpy as np
9
+ import polars as pl
10
+ from scipy.stats import spearmanr, ttest_ind
11
+
12
+
13
+ def compute_quantile_returns(
14
+ data: pl.DataFrame,
15
+ period: int,
16
+ n_quantiles: int,
17
+ quantile_col: str = "quantile",
18
+ ) -> dict[int, float]:
19
+ """Compute mean forward returns by quantile.
20
+
21
+ Parameters
22
+ ----------
23
+ data : pl.DataFrame
24
+ Data with quantile and forward return columns.
25
+ period : int
26
+ Forward return period in days.
27
+ n_quantiles : int
28
+ Number of quantiles.
29
+ quantile_col : str, default "quantile"
30
+ Quantile column name.
31
+
32
+ Returns
33
+ -------
34
+ dict[int, float]
35
+ Mean return by quantile (1 = lowest factor).
36
+ """
37
+ return_col = f"{period}D_fwd_return"
38
+
39
+ if return_col not in data.columns:
40
+ return dict.fromkeys(range(1, n_quantiles + 1), float("nan"))
41
+
42
+ result: dict[int, float] = {}
43
+
44
+ quantile_means = (
45
+ data.filter(pl.col(return_col).is_not_null())
46
+ .group_by(quantile_col)
47
+ .agg(pl.col(return_col).mean().alias("mean_return"))
48
+ .sort(quantile_col)
49
+ )
50
+
51
+ for row in quantile_means.iter_rows(named=True):
52
+ result[int(row[quantile_col])] = float(row["mean_return"])
53
+
54
+ # Fill missing quantiles
55
+ for q in range(1, n_quantiles + 1):
56
+ if q not in result:
57
+ result[q] = float("nan")
58
+
59
+ return result
60
+
61
+
62
+ def compute_spread(
63
+ data: pl.DataFrame,
64
+ period: int,
65
+ n_quantiles: int,
66
+ quantile_col: str = "quantile",
67
+ ) -> dict[str, float]:
68
+ """Compute long-short spread and statistics.
69
+
70
+ Parameters
71
+ ----------
72
+ data : pl.DataFrame
73
+ Data with quantile and forward return columns.
74
+ period : int
75
+ Forward return period in days.
76
+ n_quantiles : int
77
+ Number of quantiles.
78
+ quantile_col : str, default "quantile"
79
+ Quantile column name.
80
+
81
+ Returns
82
+ -------
83
+ dict[str, float]
84
+ spread, t_stat, p_value
85
+ """
86
+ return_col = f"{period}D_fwd_return"
87
+
88
+ if return_col not in data.columns:
89
+ return {
90
+ "spread": float("nan"),
91
+ "t_stat": float("nan"),
92
+ "p_value": float("nan"),
93
+ }
94
+
95
+ top_returns = data.filter(pl.col(quantile_col) == n_quantiles)[return_col].to_numpy()
96
+ bottom_returns = data.filter(pl.col(quantile_col) == 1)[return_col].to_numpy()
97
+
98
+ top_returns = top_returns[~np.isnan(top_returns)]
99
+ bottom_returns = bottom_returns[~np.isnan(bottom_returns)]
100
+
101
+ if len(top_returns) < 2 or len(bottom_returns) < 2:
102
+ return {
103
+ "spread": float("nan"),
104
+ "t_stat": float("nan"),
105
+ "p_value": float("nan"),
106
+ }
107
+
108
+ spread = float(np.mean(top_returns) - np.mean(bottom_returns))
109
+ t_stat, p_value = ttest_ind(top_returns, bottom_returns)
110
+
111
+ return {
112
+ "spread": spread,
113
+ "t_stat": float(t_stat),
114
+ "p_value": float(p_value),
115
+ }
116
+
117
+
118
+ def compute_monotonicity(
119
+ quantile_returns: dict[int, float],
120
+ ) -> float:
121
+ """Compute monotonicity of quantile returns.
122
+
123
+ Measures how well returns increase monotonically across quantiles.
124
+ Uses Spearman correlation: 1.0 = perfect increase, -1.0 = perfect decrease.
125
+
126
+ Parameters
127
+ ----------
128
+ quantile_returns : dict[int, float]
129
+ Mean return by quantile.
130
+
131
+ Returns
132
+ -------
133
+ float
134
+ Monotonicity score (-1 to 1).
135
+ """
136
+ # Sort by quantile
137
+ sorted_items = sorted(quantile_returns.items())
138
+ quantiles = [q for q, r in sorted_items if not np.isnan(r)]
139
+ returns = [r for q, r in sorted_items if not np.isnan(r)]
140
+
141
+ if len(quantiles) < 3:
142
+ return float("nan")
143
+
144
+ rho, _ = spearmanr(quantiles, returns)
145
+ return float(rho) if not np.isnan(rho) else float("nan")
146
+
147
+
148
+ __all__ = ["compute_quantile_returns", "compute_spread", "compute_monotonicity"]
@@ -0,0 +1,214 @@
1
+ """Signal analysis result dataclass.
2
+
3
+ Simple, immutable result container for signal analysis.
4
+ No Pydantic, no inheritance - just a frozen dataclass.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from dataclasses import asdict, dataclass, field
11
+ from typing import Any
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class SignalResult:
16
+ """Immutable result from signal analysis.
17
+
18
+ All metrics are keyed by period (e.g., "1D", "5D", "21D").
19
+
20
+ Attributes
21
+ ----------
22
+ ic : dict[str, float]
23
+ Mean IC by period.
24
+ ic_std : dict[str, float]
25
+ IC standard deviation by period.
26
+ ic_t_stat : dict[str, float]
27
+ T-statistic for IC != 0.
28
+ ic_p_value : dict[str, float]
29
+ P-value for IC significance.
30
+ ic_ir : dict[str, float]
31
+ Information Ratio (IC mean / IC std) by period.
32
+ ic_positive_pct : dict[str, float]
33
+ Percentage of periods with positive IC.
34
+ ic_series : dict[str, list[float]]
35
+ IC time series by period.
36
+ quantile_returns : dict[str, dict[int, float]]
37
+ Mean returns by period and quantile.
38
+ spread : dict[str, float]
39
+ Top minus bottom quantile spread.
40
+ spread_t_stat : dict[str, float]
41
+ T-statistic for spread.
42
+ spread_p_value : dict[str, float]
43
+ P-value for spread significance.
44
+ monotonicity : dict[str, float]
45
+ Rank correlation of quantile returns (how monotonic).
46
+ turnover : dict[str, float] | None
47
+ Mean turnover rate by period.
48
+ autocorrelation : list[float] | None
49
+ Factor autocorrelation at lags 1, 2, ...
50
+ half_life : float | None
51
+ Estimated signal half-life in periods.
52
+ n_assets : int
53
+ Number of unique assets.
54
+ n_dates : int
55
+ Number of unique dates.
56
+ date_range : tuple[str, str]
57
+ (first_date, last_date).
58
+ periods : tuple[int, ...]
59
+ Forward return periods analyzed.
60
+ quantiles : int
61
+ Number of quantiles used.
62
+ """
63
+
64
+ # IC metrics
65
+ ic: dict[str, float]
66
+ ic_std: dict[str, float]
67
+ ic_t_stat: dict[str, float]
68
+ ic_p_value: dict[str, float]
69
+ ic_ir: dict[str, float] = field(default_factory=dict) # Information Ratio (ic/ic_std)
70
+ ic_positive_pct: dict[str, float] = field(default_factory=dict) # % of positive ICs
71
+ ic_series: dict[str, list[float]] = field(default_factory=dict)
72
+
73
+ # Quantile metrics
74
+ quantile_returns: dict[str, dict[int, float]] = field(default_factory=dict)
75
+ spread: dict[str, float] = field(default_factory=dict)
76
+ spread_t_stat: dict[str, float] = field(default_factory=dict)
77
+ spread_p_value: dict[str, float] = field(default_factory=dict)
78
+ monotonicity: dict[str, float] = field(default_factory=dict)
79
+
80
+ # Turnover (optional)
81
+ turnover: dict[str, float] | None = None
82
+ autocorrelation: list[float] | None = None
83
+ half_life: float | None = None
84
+
85
+ # Metadata
86
+ n_assets: int = 0
87
+ n_dates: int = 0
88
+ date_range: tuple[str, str] = ("", "")
89
+ periods: tuple[int, ...] = ()
90
+ quantiles: int = 5
91
+
92
+ def summary(self) -> str:
93
+ """Human-readable summary of results."""
94
+ lines = [
95
+ f"Signal Analysis: {self.n_assets} assets, {self.n_dates} dates",
96
+ f"Date range: {self.date_range[0]} to {self.date_range[1]}",
97
+ f"Periods: {self.periods}, Quantiles: {self.quantiles}",
98
+ "",
99
+ "IC Summary:",
100
+ ]
101
+
102
+ for period in [f"{p}D" for p in self.periods]:
103
+ ic_val = self.ic.get(period, float("nan"))
104
+ t = self.ic_t_stat.get(period, float("nan"))
105
+ p = self.ic_p_value.get(period, float("nan"))
106
+ ir = self.ic_ir.get(period, float("nan"))
107
+ pos_pct = self.ic_positive_pct.get(period, float("nan"))
108
+ sig = "*" if p < 0.05 else ""
109
+ lines.append(
110
+ f" {period}: IC={ic_val:+.4f} (t={t:.2f}, p={p:.3f}){sig}, IR={ir:.2f}, +%={pos_pct:.0f}%"
111
+ )
112
+
113
+ lines.append("\nSpread (Top - Bottom):")
114
+ for period in [f"{p}D" for p in self.periods]:
115
+ spread = self.spread.get(period, float("nan"))
116
+ t = self.spread_t_stat.get(period, float("nan"))
117
+ p = self.spread_p_value.get(period, float("nan"))
118
+ sig = "*" if p < 0.05 else ""
119
+ lines.append(f" {period}: {spread:+.4f} (t={t:.2f}, p={p:.3f}){sig}")
120
+
121
+ lines.append("\nMonotonicity:")
122
+ for period in [f"{p}D" for p in self.periods]:
123
+ mono = self.monotonicity.get(period, float("nan"))
124
+ lines.append(f" {period}: {mono:+.3f}")
125
+
126
+ if self.turnover:
127
+ lines.append("\nTurnover:")
128
+ for period in [f"{p}D" for p in self.periods]:
129
+ t = self.turnover.get(period, float("nan"))
130
+ lines.append(f" {period}: {t:.1%}")
131
+
132
+ if self.half_life is not None:
133
+ lines.append(f"\nHalf-life: {self.half_life:.1f} periods")
134
+
135
+ return "\n".join(lines)
136
+
137
+ def to_dict(self) -> dict[str, Any]:
138
+ """Export to dictionary."""
139
+ return asdict(self)
140
+
141
+ def to_json(self, path: str | None = None, indent: int = 2) -> str:
142
+ """Export to JSON string or file.
143
+
144
+ Parameters
145
+ ----------
146
+ path : str | None
147
+ If provided, write to file. Otherwise return string.
148
+ indent : int
149
+ JSON indentation level.
150
+
151
+ Returns
152
+ -------
153
+ str
154
+ JSON string.
155
+ """
156
+ data = self.to_dict()
157
+
158
+ def convert(obj: Any) -> Any:
159
+ if isinstance(obj, float) and (obj != obj): # NaN check
160
+ return None
161
+ if isinstance(obj, tuple):
162
+ return list(obj)
163
+ return obj
164
+
165
+ def serialize(d: Any) -> Any:
166
+ if isinstance(d, dict):
167
+ return {str(k): serialize(v) for k, v in d.items()}
168
+ if isinstance(d, list):
169
+ return [serialize(v) for v in d]
170
+ return convert(d)
171
+
172
+ serialized = serialize(data)
173
+ json_str = json.dumps(serialized, indent=indent)
174
+
175
+ if path:
176
+ with open(path, "w") as f:
177
+ f.write(json_str)
178
+
179
+ return json_str
180
+
181
+ @classmethod
182
+ def from_json(cls, path: str) -> SignalResult:
183
+ """Load from JSON file.
184
+
185
+ Parameters
186
+ ----------
187
+ path : str
188
+ Path to JSON file.
189
+
190
+ Returns
191
+ -------
192
+ SignalResult
193
+ Loaded result.
194
+ """
195
+ with open(path) as f:
196
+ data = json.load(f)
197
+
198
+ # Convert lists back to tuples for immutable fields
199
+ if "date_range" in data:
200
+ data["date_range"] = tuple(data["date_range"])
201
+ if "periods" in data:
202
+ data["periods"] = tuple(data["periods"])
203
+
204
+ # Convert quantile keys back to int
205
+ if "quantile_returns" in data:
206
+ data["quantile_returns"] = {
207
+ period: {int(k): v for k, v in qr.items()}
208
+ for period, qr in data["quantile_returns"].items()
209
+ }
210
+
211
+ return cls(**data)
212
+
213
+
214
+ __all__ = ["SignalResult"]