ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,147 @@
1
+ """Validation helper functions for signal result classes.
2
+
3
+ This module provides utility functions for validating dictionary key consistency
4
+ and normalizing period strings used in signal analysis results.
5
+
6
+ References
7
+ ----------
8
+ Lopez de Prado, M. (2018). "Advances in Financial Machine Learning"
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import Any
14
+
15
+
16
+ def _validate_dict_keys_match(
17
+ data: dict[str, Any],
18
+ required_fields: list[str],
19
+ optional_fields: list[str] | None = None,
20
+ reference_field: str | None = None,
21
+ ) -> None:
22
+ """Validate that all dict fields share the same keys.
23
+
24
+ Parameters
25
+ ----------
26
+ data : dict
27
+ Model data dictionary.
28
+ required_fields : list[str]
29
+ Required dict field names that must all share the same keys.
30
+ optional_fields : list[str] | None
31
+ Optional dict field names that, if present and not None, must also share the same keys.
32
+ reference_field : str | None
33
+ Field to use as reference for key set. If None, uses first required field.
34
+
35
+ Raises
36
+ ------
37
+ ValueError
38
+ If any dict field has different keys than the reference.
39
+ """
40
+ if not required_fields:
41
+ return
42
+
43
+ ref_field = reference_field or required_fields[0]
44
+ ref_keys = set(data.get(ref_field, {}).keys())
45
+
46
+ if not ref_keys:
47
+ return # Empty reference, nothing to validate
48
+
49
+ # Check required fields
50
+ for field in required_fields:
51
+ if field == ref_field:
52
+ continue
53
+ field_data = data.get(field)
54
+ if field_data is None:
55
+ raise ValueError(
56
+ f"Required field '{field}' is None but '{ref_field}' has keys: {ref_keys}"
57
+ )
58
+ field_keys = set(field_data.keys())
59
+ if field_keys != ref_keys:
60
+ missing = ref_keys - field_keys
61
+ extra = field_keys - ref_keys
62
+ raise ValueError(
63
+ f"Key mismatch in '{field}': "
64
+ f"missing={missing or 'none'}, extra={extra or 'none'} "
65
+ f"(reference: '{ref_field}')"
66
+ )
67
+
68
+ # Check optional fields (only if they exist and are not None)
69
+ for field in optional_fields or []:
70
+ field_data = data.get(field)
71
+ if field_data is None:
72
+ continue
73
+ field_keys = set(field_data.keys())
74
+ if field_keys != ref_keys:
75
+ missing = ref_keys - field_keys
76
+ extra = field_keys - ref_keys
77
+ raise ValueError(
78
+ f"Key mismatch in '{field}': "
79
+ f"missing={missing or 'none'}, extra={extra or 'none'} "
80
+ f"(reference: '{ref_field}')"
81
+ )
82
+
83
+
84
+ def _normalize_period(period: int | str) -> str:
85
+ """Normalize period to canonical string format used internally.
86
+
87
+ Accepts:
88
+ - int: 21 -> "21D"
89
+ - str without suffix: "21" -> "21D"
90
+ - str with suffix: "21D" -> "21D"
91
+
92
+ Parameters
93
+ ----------
94
+ period : int | str
95
+ Period as integer or string, with or without 'D' suffix.
96
+
97
+ Returns
98
+ -------
99
+ str
100
+ Canonical period key with 'D' suffix (e.g., "21D").
101
+
102
+ Examples
103
+ --------
104
+ >>> _normalize_period(21)
105
+ '21D'
106
+ >>> _normalize_period('21')
107
+ '21D'
108
+ >>> _normalize_period('21D')
109
+ '21D'
110
+ """
111
+ if isinstance(period, int):
112
+ return f"{period}D"
113
+ period_str = str(period).strip()
114
+ if period_str.endswith("D"):
115
+ return period_str
116
+ return f"{period_str}D"
117
+
118
+
119
+ def _figure_from_data(data: dict | str) -> Any:
120
+ """Convert figure data to Plotly Figure.
121
+
122
+ Handles both dict (direct) and JSON string formats transparently.
123
+ This fixes the type ambiguity where figures may be stored as either
124
+ Python dicts or JSON strings.
125
+
126
+ Parameters
127
+ ----------
128
+ data : dict | str
129
+ Figure data as Python dict or JSON string.
130
+
131
+ Returns
132
+ -------
133
+ plotly.graph_objects.Figure
134
+ Plotly Figure object.
135
+ """
136
+ import plotly.io as pio
137
+
138
+ if isinstance(data, str):
139
+ # Already JSON string
140
+ return pio.from_json(data)
141
+ elif isinstance(data, dict):
142
+ # Python dict - convert directly to Figure
143
+ import plotly.graph_objects as go
144
+
145
+ return go.Figure(data)
146
+ else:
147
+ raise TypeError(f"Expected dict or str for figure data, got {type(data)}")
@@ -0,0 +1,17 @@
1
+ # signal/ - Factor Signal Analysis
2
+
3
+ Alphalens-style signal quality analysis.
4
+
5
+ ## Modules
6
+
7
+ | File | Purpose |
8
+ |------|---------|
9
+ | core.py | `analyze_signal()` entry point |
10
+ | result.py | `SignalResult` dataclass |
11
+ | ic.py | IC computation |
12
+ | quantile.py | Quantile returns, spread |
13
+ | turnover.py | Turnover, autocorrelation |
14
+
15
+ ## Key Functions
16
+
17
+ `analyze_signal()`, `compute_ic_series()`, `compute_quantile_returns()`, `compute_turnover()`
@@ -0,0 +1,69 @@
1
+ """Signal analysis for factor/alpha evaluation.
2
+
3
+ This module provides tools for analyzing the predictive power of signals
4
+ (factors) for future returns.
5
+
6
+ Main Entry Point
7
+ ----------------
8
+ analyze_signal : Compute IC, quantile returns, spread, and turnover
9
+ for a factor signal. This is the recommended way to use this module.
10
+
11
+ Example
12
+ -------
13
+ >>> from ml4t.diagnostic.signal import analyze_signal
14
+ >>> result = analyze_signal(factor_df, prices_df)
15
+ >>> print(result.summary())
16
+ >>> result.to_json("results.json")
17
+
18
+ Building Blocks
19
+ ---------------
20
+ For custom workflows, use the component functions:
21
+
22
+ - prepare_data : Join factor with prices and compute forward returns
23
+ - compute_ic_series : Compute IC time series
24
+ - compute_quantile_returns : Compute returns by quantile
25
+ - compute_turnover : Compute factor turnover rate
26
+ - filter_outliers : Remove cross-sectional outliers
27
+ - quantize_factor : Assign quantile labels
28
+ """
29
+
30
+ from ml4t.diagnostic.signal._utils import (
31
+ QuantileMethod,
32
+ filter_outliers,
33
+ quantize_factor,
34
+ )
35
+ from ml4t.diagnostic.signal.core import analyze_signal, prepare_data
36
+ from ml4t.diagnostic.signal.quantile import (
37
+ compute_monotonicity,
38
+ compute_quantile_returns,
39
+ compute_spread,
40
+ )
41
+ from ml4t.diagnostic.signal.result import SignalResult
42
+ from ml4t.diagnostic.signal.signal_ic import compute_ic_series, compute_ic_summary
43
+ from ml4t.diagnostic.signal.turnover import (
44
+ compute_autocorrelation,
45
+ compute_turnover,
46
+ estimate_half_life,
47
+ )
48
+
49
+ __all__ = [
50
+ # Main entry point
51
+ "analyze_signal",
52
+ "SignalResult",
53
+ # Data preparation
54
+ "prepare_data",
55
+ "filter_outliers",
56
+ "quantize_factor",
57
+ "QuantileMethod",
58
+ # IC functions
59
+ "compute_ic_series",
60
+ "compute_ic_summary",
61
+ # Quantile functions
62
+ "compute_quantile_returns",
63
+ "compute_spread",
64
+ "compute_monotonicity",
65
+ # Turnover functions
66
+ "compute_turnover",
67
+ "compute_autocorrelation",
68
+ "estimate_half_life",
69
+ ]
@@ -0,0 +1,152 @@
1
+ """Report generation for signal analysis.
2
+
3
+ Internal module for HTML report generation.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from ml4t.diagnostic.signal.result import SignalResult
12
+
13
+
14
+ def generate_html(result: SignalResult, path: str) -> None:
15
+ """Generate HTML report from signal analysis results.
16
+
17
+ Parameters
18
+ ----------
19
+ result : SignalResult
20
+ Analysis results.
21
+ path : str
22
+ Output file path.
23
+ """
24
+ try:
25
+ import plotly.graph_objects as go
26
+ from plotly.subplots import make_subplots
27
+ except ImportError:
28
+ # Fallback to text-only report
29
+ _generate_text_html(result, path)
30
+ return
31
+
32
+ # Create figure with subplots
33
+ fig = make_subplots(
34
+ rows=2,
35
+ cols=2,
36
+ subplot_titles=("IC Time Series", "Quantile Returns", "IC Summary", "Spread Summary"),
37
+ specs=[
38
+ [{"type": "scatter"}, {"type": "bar"}],
39
+ [{"type": "table"}, {"type": "table"}],
40
+ ],
41
+ )
42
+
43
+ # IC Time Series
44
+ for period_key, ic_vals in result.ic_series.items():
45
+ if ic_vals:
46
+ fig.add_trace(
47
+ go.Scatter(
48
+ y=ic_vals,
49
+ mode="lines",
50
+ name=f"IC {period_key}",
51
+ ),
52
+ row=1,
53
+ col=1,
54
+ )
55
+
56
+ # Quantile Returns (first period)
57
+ if result.periods:
58
+ first_period = f"{result.periods[0]}D"
59
+ q_returns = result.quantile_returns.get(first_period, {})
60
+ if q_returns:
61
+ quantiles = sorted(q_returns.keys())
62
+ returns = [q_returns[q] for q in quantiles]
63
+ fig.add_trace(
64
+ go.Bar(
65
+ x=[f"Q{q}" for q in quantiles],
66
+ y=returns,
67
+ name=f"Returns {first_period}",
68
+ ),
69
+ row=1,
70
+ col=2,
71
+ )
72
+
73
+ # IC Summary Table
74
+ ic_data = []
75
+ for period in result.periods:
76
+ period_key = f"{period}D"
77
+ ic_data.append(
78
+ [
79
+ period_key,
80
+ f"{result.ic.get(period_key, float('nan')):.4f}",
81
+ f"{result.ic_t_stat.get(period_key, float('nan')):.2f}",
82
+ f"{result.ic_p_value.get(period_key, float('nan')):.4f}",
83
+ ]
84
+ )
85
+
86
+ fig.add_trace(
87
+ go.Table(
88
+ header={"values": ["Period", "IC", "t-stat", "p-value"]},
89
+ cells={"values": list(zip(*ic_data)) if ic_data else [[], [], [], []]},
90
+ ),
91
+ row=2,
92
+ col=1,
93
+ )
94
+
95
+ # Spread Summary Table
96
+ spread_data = []
97
+ for period in result.periods:
98
+ period_key = f"{period}D"
99
+ spread_data.append(
100
+ [
101
+ period_key,
102
+ f"{result.spread.get(period_key, float('nan')):.4f}",
103
+ f"{result.spread_t_stat.get(period_key, float('nan')):.2f}",
104
+ f"{result.monotonicity.get(period_key, float('nan')):.3f}",
105
+ ]
106
+ )
107
+
108
+ fig.add_trace(
109
+ go.Table(
110
+ header={"values": ["Period", "Spread", "t-stat", "Monotonicity"]},
111
+ cells={"values": list(zip(*spread_data)) if spread_data else [[], [], [], []]},
112
+ ),
113
+ row=2,
114
+ col=2,
115
+ )
116
+
117
+ # Update layout
118
+ fig.update_layout(
119
+ title_text=f"Signal Analysis: {result.n_assets} assets, {result.n_dates} dates",
120
+ height=800,
121
+ showlegend=True,
122
+ )
123
+
124
+ # Write HTML
125
+ fig.write_html(path, include_plotlyjs=True)
126
+
127
+
128
+ def _generate_text_html(result: SignalResult, path: str) -> None:
129
+ """Generate text-only HTML report (no Plotly)."""
130
+ html = f"""<!DOCTYPE html>
131
+ <html>
132
+ <head>
133
+ <title>Signal Analysis Report</title>
134
+ <style>
135
+ body {{ font-family: monospace; padding: 20px; }}
136
+ table {{ border-collapse: collapse; margin: 10px 0; }}
137
+ th, td {{ border: 1px solid #ddd; padding: 8px; text-align: right; }}
138
+ th {{ background-color: #f2f2f2; }}
139
+ pre {{ background-color: #f5f5f5; padding: 15px; }}
140
+ </style>
141
+ </head>
142
+ <body>
143
+ <h1>Signal Analysis Report</h1>
144
+ <pre>{result.summary()}</pre>
145
+ </body>
146
+ </html>"""
147
+
148
+ with open(path, "w") as f:
149
+ f.write(html)
150
+
151
+
152
+ __all__ = ["generate_html"]
@@ -0,0 +1,261 @@
1
+ """Internal utilities for signal analysis.
2
+
3
+ Simple, pure functions for data preparation.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from enum import Enum
9
+ from typing import TYPE_CHECKING
10
+
11
+ import polars as pl
12
+
13
+ if TYPE_CHECKING:
14
+ import pandas as pd
15
+
16
+
17
+ class QuantileMethod(str, Enum):
18
+ """Method for quantile assignment."""
19
+
20
+ QUANTILE = "quantile" # Equal frequency (rank-based)
21
+ UNIFORM = "uniform" # Equal width
22
+
23
+
24
+ def ensure_polars(df: pl.DataFrame | pd.DataFrame) -> pl.DataFrame:
25
+ """Convert pandas DataFrame to Polars if needed.
26
+
27
+ Parameters
28
+ ----------
29
+ df : pl.DataFrame | pd.DataFrame
30
+ Input DataFrame.
31
+
32
+ Returns
33
+ -------
34
+ pl.DataFrame
35
+ Polars DataFrame.
36
+ """
37
+ if isinstance(df, pl.DataFrame):
38
+ return df
39
+ # Pandas DataFrame
40
+ return pl.from_pandas(df)
41
+
42
+
43
+ def filter_outliers(
44
+ data: pl.DataFrame,
45
+ z_threshold: float = 3.0,
46
+ factor_col: str = "factor",
47
+ date_col: str = "date",
48
+ ) -> pl.DataFrame:
49
+ """Filter outliers using cross-sectional z-score.
50
+
51
+ Removes observations where factor z-score exceeds threshold
52
+ within each date's cross-section.
53
+
54
+ Parameters
55
+ ----------
56
+ data : pl.DataFrame
57
+ Data with date and factor columns.
58
+ z_threshold : float, default 3.0
59
+ Z-score threshold. Values <= 0 disable filtering.
60
+ factor_col : str, default "factor"
61
+ Factor column name.
62
+ date_col : str, default "date"
63
+ Date column name.
64
+
65
+ Returns
66
+ -------
67
+ pl.DataFrame
68
+ Data with outliers removed.
69
+ """
70
+ if z_threshold <= 0:
71
+ return data
72
+
73
+ # Cross-sectional z-score with std=0 edge case
74
+ data = data.with_columns(
75
+ pl.when(pl.col(factor_col).std().over(date_col) > 0)
76
+ .then(
77
+ (pl.col(factor_col) - pl.col(factor_col).mean().over(date_col))
78
+ / pl.col(factor_col).std().over(date_col)
79
+ )
80
+ .otherwise(pl.lit(None))
81
+ .alias("_zscore")
82
+ )
83
+
84
+ # Keep rows within threshold or with null z-score (constant cross-section)
85
+ data = data.filter(pl.col("_zscore").is_null() | (pl.col("_zscore").abs() <= z_threshold))
86
+ return data.drop("_zscore")
87
+
88
+
89
+ def quantize_factor(
90
+ data: pl.DataFrame,
91
+ n_quantiles: int = 5,
92
+ method: QuantileMethod = QuantileMethod.QUANTILE,
93
+ factor_col: str = "factor",
94
+ date_col: str = "date",
95
+ ) -> pl.DataFrame:
96
+ """Assign quantile labels to factor values within each date.
97
+
98
+ Parameters
99
+ ----------
100
+ data : pl.DataFrame
101
+ Data with date and factor columns.
102
+ n_quantiles : int, default 5
103
+ Number of quantiles.
104
+ method : QuantileMethod, default QUANTILE
105
+ QUANTILE = equal frequency, UNIFORM = equal width.
106
+ factor_col : str, default "factor"
107
+ Factor column name.
108
+ date_col : str, default "date"
109
+ Date column name.
110
+
111
+ Returns
112
+ -------
113
+ pl.DataFrame
114
+ Data with "quantile" column (1 = lowest, n = highest).
115
+ """
116
+ if method == QuantileMethod.QUANTILE:
117
+ # Rank-based (equal count per quantile)
118
+ data = data.with_columns(
119
+ (
120
+ (pl.col(factor_col).rank().over(date_col) - 1)
121
+ / pl.col(factor_col).count().over(date_col)
122
+ * n_quantiles
123
+ )
124
+ .floor()
125
+ .cast(pl.Int32)
126
+ .clip(0, n_quantiles - 1)
127
+ .alias("_rank")
128
+ )
129
+ data = data.with_columns((pl.col("_rank") + 1).alias("quantile"))
130
+ return data.drop("_rank")
131
+ else:
132
+ # Equal width
133
+ data = data.with_columns(
134
+ (
135
+ (pl.col(factor_col) - pl.col(factor_col).min().over(date_col))
136
+ / (
137
+ pl.col(factor_col).max().over(date_col)
138
+ - pl.col(factor_col).min().over(date_col)
139
+ + 1e-10
140
+ )
141
+ * n_quantiles
142
+ )
143
+ .floor()
144
+ .cast(pl.Int32)
145
+ .clip(0, n_quantiles - 1)
146
+ .alias("_pct")
147
+ )
148
+ data = data.with_columns((pl.col("_pct") + 1).alias("quantile"))
149
+ return data.drop("_pct")
150
+
151
+
152
+ def compute_forward_returns(
153
+ data: pl.DataFrame,
154
+ prices: pl.DataFrame,
155
+ periods: tuple[int, ...],
156
+ date_col: str = "date",
157
+ asset_col: str = "asset",
158
+ price_col: str = "price",
159
+ ) -> pl.DataFrame:
160
+ """Compute forward returns for each period using vectorized operations.
161
+
162
+ For each (date, asset), computes return from date to date + period.
163
+ Forward returns are computed using the factor data's date universe,
164
+ so period N means "N dates forward in the factor dates", not calendar days.
165
+
166
+ Parameters
167
+ ----------
168
+ data : pl.DataFrame
169
+ Factor data with date and asset columns.
170
+ prices : pl.DataFrame
171
+ Price data with date, asset, and price columns.
172
+ periods : tuple[int, ...]
173
+ Forward return periods in trading days (factor date indices).
174
+ date_col, asset_col, price_col : str
175
+ Column names.
176
+
177
+ Returns
178
+ -------
179
+ pl.DataFrame
180
+ Data with forward return columns (e.g., "1D_fwd_return").
181
+ """
182
+ if data.is_empty():
183
+ # Add empty columns for each period
184
+ for p in periods:
185
+ data = data.with_columns(pl.lit(None).cast(pl.Float64).alias(f"{p}D_fwd_return"))
186
+ return data
187
+
188
+ # 1. Create date index mapping from FACTOR data (not prices)
189
+ # This ensures forward returns align with factor date universe
190
+ factor_dates = data.select(date_col).unique().sort(date_col)
191
+ factor_dates = factor_dates.with_row_index("_factor_date_idx")
192
+
193
+ # 2. Join data with current prices
194
+ result = data.join(
195
+ prices.select([date_col, asset_col, price_col]).rename({price_col: "_current_price"}),
196
+ on=[date_col, asset_col],
197
+ how="left",
198
+ )
199
+
200
+ # 3. Join to get factor date index for each row
201
+ result = result.join(factor_dates, on=date_col, how="left")
202
+
203
+ # 4. For each period, compute forward return via joins
204
+ for p in periods:
205
+ col_name = f"{p}D_fwd_return"
206
+
207
+ # Create mapping: current_factor_idx -> future_factor_date
208
+ # future_factor_idx = current_factor_idx + p
209
+ future_date_map = factor_dates.with_columns(
210
+ (pl.col("_factor_date_idx") - p).alias("_current_idx")
211
+ ).filter(pl.col("_current_idx") >= 0)
212
+
213
+ # Join to get future date (from factor date sequence)
214
+ result = result.join(
215
+ future_date_map.select([date_col, "_current_idx"]).rename(
216
+ {date_col: f"_future_date_{p}"}
217
+ ),
218
+ left_on="_factor_date_idx",
219
+ right_on="_current_idx",
220
+ how="left",
221
+ )
222
+
223
+ # Join to get future price (from price data)
224
+ result = result.join(
225
+ prices.select([date_col, asset_col, price_col]).rename(
226
+ {price_col: f"_future_price_{p}"}
227
+ ),
228
+ left_on=[f"_future_date_{p}", asset_col],
229
+ right_on=[date_col, asset_col],
230
+ how="left",
231
+ )
232
+
233
+ # Compute return: (future - current) / current
234
+ # Handle NaN in current price (use is_nan check)
235
+ result = result.with_columns(
236
+ pl.when(
237
+ pl.col("_current_price").is_not_null()
238
+ & pl.col("_current_price").is_not_nan()
239
+ & pl.col(f"_future_price_{p}").is_not_null()
240
+ & pl.col(f"_future_price_{p}").is_not_nan()
241
+ & (pl.col("_current_price") != 0)
242
+ )
243
+ .then(
244
+ (pl.col(f"_future_price_{p}") - pl.col("_current_price")) / pl.col("_current_price")
245
+ )
246
+ .otherwise(None)
247
+ .alias(col_name)
248
+ )
249
+
250
+ # 5. Clean up temporary columns
251
+ temp_cols = [c for c in result.columns if c.startswith("_")]
252
+ return result.drop(temp_cols)
253
+
254
+
255
+ __all__ = [
256
+ "QuantileMethod",
257
+ "ensure_polars",
258
+ "filter_outliers",
259
+ "quantize_factor",
260
+ "compute_forward_returns",
261
+ ]