ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,45 @@
1
+ """Volatility clustering detection and modeling for time series.
2
+
3
+ This module provides statistical tests and models for analyzing conditional
4
+ heteroscedasticity (volatility clustering) in financial time series:
5
+
6
+ - ARCH-LM test - Tests for autoregressive conditional heteroscedasticity (ARCH effects)
7
+ - GARCH(p,q) fitting - Models time-varying volatility dynamics
8
+ - Comprehensive volatility analysis - Combines ARCH-LM and GARCH
9
+
10
+ Volatility clustering is a key stylized fact of financial returns where large
11
+ changes tend to be followed by large changes, and small changes by small changes.
12
+
13
+ Example:
14
+ >>> import numpy as np
15
+ >>> from ml4t.diagnostic.evaluation.volatility import arch_lm_test, analyze_volatility
16
+ >>>
17
+ >>> # White noise (no ARCH effects)
18
+ >>> white_noise = np.random.randn(1000)
19
+ >>> result = arch_lm_test(white_noise)
20
+ >>> print(f"Has ARCH effects: {result.has_arch_effects}") # Should be False
21
+ >>>
22
+ >>> # Comprehensive analysis
23
+ >>> analysis = analyze_volatility(returns_data)
24
+ >>> print(analysis.summary())
25
+
26
+ References:
27
+ - Engle, R. F. (1982). Autoregressive Conditional Heteroscedasticity.
28
+ - Bollerslev, T. (1986). Generalized Autoregressive Conditional Heteroskedasticity.
29
+ """
30
+
31
+ from .analysis import VolatilityAnalysisResult, analyze_volatility
32
+ from .arch import ARCHLMResult, arch_lm_test
33
+ from .garch import GARCHResult, fit_garch
34
+
35
+ __all__ = [
36
+ # ARCH-LM test
37
+ "ARCHLMResult",
38
+ "arch_lm_test",
39
+ # GARCH model
40
+ "GARCHResult",
41
+ "fit_garch",
42
+ # Combined analysis
43
+ "VolatilityAnalysisResult",
44
+ "analyze_volatility",
45
+ ]
@@ -0,0 +1,351 @@
1
+ """Comprehensive volatility analysis combining ARCH-LM and GARCH.
2
+
3
+ This module provides a unified interface for volatility analysis, combining
4
+ the ARCH-LM test for detecting volatility clustering with GARCH model
5
+ fitting for estimating conditional volatility dynamics.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ from ml4t.diagnostic.errors import ComputationError, ValidationError
14
+ from ml4t.diagnostic.logging import get_logger
15
+
16
+ from .arch import ARCHLMResult, arch_lm_test
17
+ from .garch import GARCHResult, fit_garch
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ class VolatilityAnalysisResult:
23
+ """Comprehensive volatility analysis results combining ARCH-LM and GARCH.
24
+
25
+ This class provides a unified interface for volatility analysis, combining
26
+ the ARCH-LM test for detecting volatility clustering with GARCH model
27
+ fitting for estimating conditional volatility dynamics.
28
+
29
+ Workflow:
30
+ 1. Run ARCH-LM test to detect volatility clustering
31
+ 2. If clustering detected AND fit_garch=True, fit GARCH model
32
+ 3. Provide comprehensive summary and recommendations
33
+
34
+ Attributes:
35
+ arch_lm_result: Results from ARCH-LM test
36
+ garch_result: Results from GARCH fitting (None if not fitted or no ARCH effects)
37
+ has_volatility_clustering: Whether volatility clustering was detected
38
+ persistence: Overall volatility persistence (alpha + beta from GARCH, None if not fitted)
39
+ interpretation: Human-readable interpretation of results
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ arch_lm_result: ARCHLMResult,
45
+ garch_result: GARCHResult | None = None,
46
+ ):
47
+ """Initialize volatility analysis result.
48
+
49
+ Args:
50
+ arch_lm_result: Results from ARCH-LM test
51
+ garch_result: Results from GARCH fitting (optional)
52
+ """
53
+ self.arch_lm_result = arch_lm_result
54
+ self.garch_result = garch_result
55
+ self.has_volatility_clustering = arch_lm_result.has_arch_effects
56
+
57
+ # Extract persistence if GARCH was fitted
58
+ self.persistence: float | None
59
+ if garch_result is not None:
60
+ self.persistence = garch_result.persistence
61
+ else:
62
+ self.persistence = None
63
+
64
+ # Generate interpretation
65
+ self.interpretation = self._generate_interpretation()
66
+
67
+ def _generate_interpretation(self) -> str:
68
+ """Generate human-readable interpretation of results."""
69
+ lines = []
70
+
71
+ # ARCH-LM test interpretation
72
+ if self.has_volatility_clustering:
73
+ lines.append("✓ Volatility clustering detected (ARCH effects present)")
74
+ lines.append(" - Time-varying volatility in returns")
75
+ lines.append(" - Large changes tend to follow large changes")
76
+ else:
77
+ lines.append("✗ No volatility clustering detected (no ARCH effects)")
78
+ lines.append(" - Constant variance assumption reasonable")
79
+ lines.append(" - Classical methods with homoscedasticity appropriate")
80
+
81
+ # GARCH model interpretation (if fitted)
82
+ if self.garch_result is not None:
83
+ lines.append("")
84
+ lines.append("GARCH Model Results:")
85
+
86
+ persistence = self.persistence
87
+ if persistence is not None:
88
+ lines.append(f" - Persistence (α+β): {persistence:.4f}")
89
+
90
+ if persistence >= 1.0:
91
+ lines.append(" ⚠ WARNING: Non-stationary (persistence ≥ 1)")
92
+ lines.append(" - Volatility shocks do not decay")
93
+ lines.append(" - Consider IGARCH or alternative models")
94
+ elif persistence >= 0.99:
95
+ lines.append(" ⚠ Very high persistence (near unit root)")
96
+ lines.append(" - Volatility shocks decay very slowly")
97
+ lines.append(" - Risk forecasts remain elevated for long periods")
98
+ elif persistence > 0.95:
99
+ lines.append(" → High persistence (slow mean reversion)")
100
+ lines.append(" - Typical for daily financial returns")
101
+ lines.append(" - Volatility shocks persist for many periods")
102
+ else:
103
+ lines.append(" → Moderate persistence (faster mean reversion)")
104
+ lines.append(" - Volatility shocks decay relatively quickly")
105
+
106
+ # Compute half-life if stationary and positive
107
+ # Guard against persistence <= 0 which would make log undefined
108
+ if 0.0 < persistence < 1.0:
109
+ half_life = np.log(0.5) / np.log(persistence)
110
+ lines.append(f" - Shock half-life: {half_life:.1f} periods")
111
+
112
+ # Recommendations
113
+ lines.append("")
114
+ lines.append("Recommendations:")
115
+ if self.has_volatility_clustering:
116
+ if self.garch_result is not None:
117
+ lines.append(" 1. Use fitted GARCH model for volatility forecasting")
118
+ lines.append(" 2. Apply conditional volatility in risk models (VaR, CVaR)")
119
+ lines.append(" 3. Consider HAC-adjusted standard errors for inference")
120
+ lines.append(" 4. Account for volatility clustering in trading strategies")
121
+ else:
122
+ lines.append(" 1. Consider fitting GARCH/EGARCH models")
123
+ lines.append(" 2. Use HAC-adjusted standard errors")
124
+ lines.append(" 3. Account for time-varying volatility in risk models")
125
+ else:
126
+ lines.append(" 1. Constant variance models appropriate")
127
+ lines.append(" 2. Standard OLS methods valid")
128
+ lines.append(" 3. Classical risk models acceptable")
129
+
130
+ return "\n".join(lines)
131
+
132
+ def __repr__(self) -> str:
133
+ """String representation."""
134
+ garch_info = (
135
+ f", persistence={self.persistence:.4f}"
136
+ if self.persistence is not None
137
+ else ", no_garch"
138
+ )
139
+ return (
140
+ f"VolatilityAnalysisResult("
141
+ f"has_clustering={self.has_volatility_clustering}, "
142
+ f"arch_p={self.arch_lm_result.p_value:.4f}"
143
+ f"{garch_info})"
144
+ )
145
+
146
+ def summary(self) -> str:
147
+ """Comprehensive volatility analysis summary.
148
+
149
+ Returns:
150
+ Formatted summary string with all analysis results
151
+ """
152
+ lines = [
153
+ "=" * 70,
154
+ "Comprehensive Volatility Analysis",
155
+ "=" * 70,
156
+ ]
157
+
158
+ # Section 1: ARCH-LM Test
159
+ lines.append("")
160
+ lines.append("1. ARCH-LM Test for Volatility Clustering")
161
+ lines.append("-" * 70)
162
+ lines.append(f"Test Statistic: {self.arch_lm_result.test_statistic:.4f}")
163
+ lines.append(f"P-value: {self.arch_lm_result.p_value:.4f}")
164
+ lines.append(f"Lags Used: {self.arch_lm_result.lags}")
165
+ lines.append(f"Observations: {self.arch_lm_result.n_obs}")
166
+ lines.append("")
167
+ conclusion = (
168
+ "ARCH effects detected (volatility clustering present)"
169
+ if self.has_volatility_clustering
170
+ else "No ARCH effects (constant variance)"
171
+ )
172
+ lines.append(f"Conclusion: {conclusion}")
173
+
174
+ # Section 2: GARCH Model (if fitted)
175
+ if self.garch_result is not None:
176
+ lines.append("")
177
+ lines.append("2. GARCH Model Fitting Results")
178
+ lines.append("-" * 70)
179
+ # Infer p and q from coefficient shapes
180
+ p = (
181
+ len(self.garch_result.alpha)
182
+ if isinstance(self.garch_result.alpha, tuple | list)
183
+ else 1
184
+ )
185
+ q = (
186
+ len(self.garch_result.beta)
187
+ if isinstance(self.garch_result.beta, tuple | list)
188
+ else 1
189
+ )
190
+ lines.append(f"Model: GARCH({p},{q})")
191
+ lines.append(f"Converged: {'Yes' if self.garch_result.converged else 'No'}")
192
+ lines.append(f"Iterations: {self.garch_result.iterations}")
193
+ lines.append("")
194
+ lines.append("Parameters:")
195
+ lines.append(f" ω (omega): {self.garch_result.omega:.6f}")
196
+
197
+ if isinstance(self.garch_result.alpha, tuple | list):
198
+ for i, a in enumerate(self.garch_result.alpha, 1):
199
+ lines.append(f" α{i} (alpha): {a:.6f}")
200
+ else:
201
+ lines.append(f" α (alpha): {self.garch_result.alpha:.6f}")
202
+
203
+ if isinstance(self.garch_result.beta, tuple | list):
204
+ for i, b in enumerate(self.garch_result.beta, 1):
205
+ lines.append(f" β{i} (beta): {b:.6f}")
206
+ else:
207
+ lines.append(f" β (beta): {self.garch_result.beta:.6f}")
208
+
209
+ lines.append("")
210
+ lines.append(f"Persistence (α+β): {self.persistence:.6f}")
211
+
212
+ # Model fit statistics
213
+ lines.append("")
214
+ lines.append("Model Fit:")
215
+ lines.append(f" Log-Likelihood: {self.garch_result.log_likelihood:.4f}")
216
+ lines.append(f" AIC: {self.garch_result.aic:.4f}")
217
+ lines.append(f" BIC: {self.garch_result.bic:.4f}")
218
+
219
+ elif self.has_volatility_clustering:
220
+ lines.append("")
221
+ lines.append("2. GARCH Model")
222
+ lines.append("-" * 70)
223
+ lines.append("Not fitted (fit_garch=False or fitting skipped)")
224
+
225
+ # Section 3: Interpretation
226
+ lines.append("")
227
+ lines.append("3. Interpretation")
228
+ lines.append("-" * 70)
229
+ lines.append(self.interpretation)
230
+
231
+ lines.append("")
232
+ lines.append("=" * 70)
233
+
234
+ return "\n".join(lines)
235
+
236
+
237
+ def analyze_volatility(
238
+ returns: pd.Series | np.ndarray,
239
+ arch_lags: int = 12,
240
+ fit_garch_model: bool = True,
241
+ garch_p: int = 1,
242
+ garch_q: int = 1,
243
+ alpha: float = 0.05,
244
+ ) -> VolatilityAnalysisResult:
245
+ """Comprehensive volatility analysis combining ARCH-LM and GARCH.
246
+
247
+ This function provides a complete workflow for volatility analysis:
248
+ 1. Tests for volatility clustering using ARCH-LM test
249
+ 2. If clustering detected AND fit_garch=True, fits GARCH model
250
+ 3. Returns comprehensive summary with interpretation and recommendations
251
+
252
+ The ARCH-LM test detects autoregressive conditional heteroscedasticity
253
+ (volatility clustering), and the GARCH model quantifies the dynamics
254
+ of time-varying volatility.
255
+
256
+ Args:
257
+ returns: Returns series (NOT prices) to analyze
258
+ arch_lags: Number of lags for ARCH-LM test (default 12)
259
+ fit_garch_model: Whether to fit GARCH model if ARCH effects detected (default True)
260
+ garch_p: GARCH AR order (default 1)
261
+ garch_q: GARCH MA order (default 1)
262
+ alpha: Significance level for ARCH-LM test (default 0.05)
263
+
264
+ Returns:
265
+ VolatilityAnalysisResult with comprehensive analysis
266
+
267
+ Raises:
268
+ ValidationError: If data is invalid
269
+ ComputationError: If analysis fails
270
+
271
+ Notes:
272
+ - Always run ARCH-LM test first (even if fit_garch_model=False)
273
+ - GARCH fitting only attempted if ARCH effects detected
274
+ - Set fit_garch_model=False to skip GARCH (faster, detection only)
275
+ - GARCH requires 'arch' package (pip install arch)
276
+ - Default GARCH(1,1) sufficient for most financial applications
277
+ - Results include interpretation and actionable recommendations
278
+
279
+ References:
280
+ Engle, R. F. (1982). Autoregressive Conditional Heteroscedasticity.
281
+ Bollerslev, T. (1986). Generalized Autoregressive Conditional Heteroskedasticity.
282
+ """
283
+ logger.debug(
284
+ f"Running comprehensive volatility analysis: "
285
+ f"arch_lags={arch_lags}, fit_garch_model={fit_garch_model}, "
286
+ f"garch_p={garch_p}, garch_q={garch_q}"
287
+ )
288
+
289
+ # Step 1: Run ARCH-LM test
290
+ try:
291
+ arch_result = arch_lm_test(returns, lags=arch_lags, demean=True, alpha=alpha)
292
+ logger.info(
293
+ f"ARCH-LM test complete: has_arch={arch_result.has_arch_effects}, p_value={arch_result.p_value:.4f}"
294
+ )
295
+ except ValidationError:
296
+ # Let validation errors pass through (invalid inputs)
297
+ raise
298
+ except Exception as e:
299
+ # Wrap other errors as computation errors
300
+ logger.error(f"ARCH-LM test failed: {e}")
301
+ raise ComputationError( # noqa: B904
302
+ f"ARCH-LM test failed during volatility analysis: {e}",
303
+ context={"arch_lags": arch_lags},
304
+ cause=e,
305
+ )
306
+
307
+ # Step 2: Fit GARCH if ARCH effects detected and requested
308
+ garch_result = None
309
+ if arch_result.has_arch_effects and fit_garch_model:
310
+ logger.debug(
311
+ f"ARCH effects detected (p={arch_result.p_value:.4f}), fitting GARCH({garch_p},{garch_q}) model"
312
+ )
313
+ try:
314
+ garch_result = fit_garch(returns, p=garch_p, q=garch_q)
315
+ logger.info(
316
+ f"GARCH({garch_p},{garch_q}) fitted successfully: "
317
+ f"persistence={garch_result.persistence:.4f}, "
318
+ f"converged={garch_result.converged}"
319
+ )
320
+ except ValidationError as e:
321
+ # If arch package not installed, log warning but continue
322
+ if "arch" in str(e).lower() and "package" in str(e).lower():
323
+ logger.warning(
324
+ "GARCH fitting skipped: arch package not installed. Install with: pip install arch"
325
+ )
326
+ else:
327
+ # Re-raise other validation errors
328
+ raise
329
+ except Exception as e:
330
+ # Log error but continue with ARCH-LM results only
331
+ logger.warning(f"GARCH fitting failed: {e}. Continuing with ARCH-LM results only.")
332
+ elif not arch_result.has_arch_effects:
333
+ logger.info(
334
+ f"No ARCH effects detected (p={arch_result.p_value:.4f}), skipping GARCH fitting"
335
+ )
336
+ else:
337
+ logger.debug("fit_garch_model=False, skipping GARCH fitting")
338
+
339
+ # Step 3: Create comprehensive result
340
+ result = VolatilityAnalysisResult(
341
+ arch_lm_result=arch_result,
342
+ garch_result=garch_result,
343
+ )
344
+
345
+ logger.info(
346
+ f"Volatility analysis complete: "
347
+ f"has_clustering={result.has_volatility_clustering}, "
348
+ f"persistence={result.persistence}"
349
+ )
350
+
351
+ return result
@@ -0,0 +1,258 @@
1
+ """ARCH Lagrange Multiplier test for conditional heteroscedasticity.
2
+
3
+ The ARCH-LM test (Engle, 1982) detects autoregressive conditional
4
+ heteroscedasticity (volatility clustering) in time series data.
5
+
6
+ References:
7
+ Engle, R. F. (1982). Autoregressive Conditional Heteroscedasticity with
8
+ Estimates of the Variance of United Kingdom Inflation. Econometrica, 50(4),
9
+ 987-1007. DOI: 10.2307/1912773
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+
17
+ # het_arch is in statsmodels (required dependency)
18
+ from statsmodels.stats.diagnostic import het_arch
19
+
20
+ from ml4t.diagnostic.errors import ComputationError, ValidationError
21
+ from ml4t.diagnostic.logging import get_logger
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class ARCHLMResult:
27
+ """Results from ARCH Lagrange Multiplier test.
28
+
29
+ The ARCH-LM test detects autoregressive conditional heteroscedasticity
30
+ (volatility clustering) in time series data. The null hypothesis is
31
+ that there are no ARCH effects (constant variance).
32
+
33
+ Attributes:
34
+ test_statistic: LM test statistic (n * R² from auxiliary regression)
35
+ p_value: P-value for null hypothesis (no ARCH effects)
36
+ lags: Number of lags tested in auxiliary regression
37
+ n_obs: Number of observations used in test
38
+ alpha: Significance level used for the test
39
+ has_arch_effects: Whether ARCH effects detected (p < alpha)
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ test_statistic: float,
45
+ p_value: float,
46
+ lags: int,
47
+ n_obs: int,
48
+ alpha: float = 0.05,
49
+ ):
50
+ """Initialize ARCH-LM result.
51
+
52
+ Args:
53
+ test_statistic: LM test statistic
54
+ p_value: P-value for no ARCH effects hypothesis
55
+ lags: Number of lags used in test
56
+ n_obs: Number of observations
57
+ alpha: Significance level for the test (default 0.05)
58
+ """
59
+ self.test_statistic = test_statistic
60
+ self.p_value = p_value
61
+ self.lags = lags
62
+ self.n_obs = n_obs
63
+ self.alpha = alpha
64
+
65
+ # Determine ARCH effects at specified significance level
66
+ # Low p-value (< alpha) means reject H0 => ARCH effects present
67
+ self.has_arch_effects = p_value < alpha
68
+
69
+ def __repr__(self) -> str:
70
+ """String representation."""
71
+ return (
72
+ f"ARCHLMResult(statistic={self.test_statistic:.4f}, "
73
+ f"p_value={self.p_value:.4f}, "
74
+ f"has_arch_effects={self.has_arch_effects})"
75
+ )
76
+
77
+ def summary(self) -> str:
78
+ """Human-readable summary of ARCH-LM test results.
79
+
80
+ Returns:
81
+ Formatted summary string
82
+ """
83
+ lines = [
84
+ "ARCH Lagrange Multiplier Test Results",
85
+ "=" * 50,
86
+ f"Test Statistic: {self.test_statistic:.4f}",
87
+ f"P-value: {self.p_value:.4f}",
88
+ f"Lags Used: {self.lags}",
89
+ f"Observations: {self.n_obs}",
90
+ ]
91
+
92
+ lines.append("")
93
+ lines.append(
94
+ f"Conclusion: {'ARCH effects detected' if self.has_arch_effects else 'No ARCH effects'}"
95
+ )
96
+ alpha_pct = self.alpha * 100
97
+ lines.append(
98
+ f" (Reject H0 at {alpha_pct:.0f}% level)"
99
+ if self.has_arch_effects
100
+ else f" (Fail to reject H0 at {alpha_pct:.0f}% level)"
101
+ )
102
+
103
+ lines.append("")
104
+ lines.append("Interpretation:")
105
+ if self.has_arch_effects:
106
+ lines.append(" - Volatility clustering present (time-varying variance)")
107
+ lines.append(" - Consider GARCH/EGARCH models for volatility forecasting")
108
+ lines.append(" - Standard errors may be unreliable without correction")
109
+ lines.append(" - Risk models should account for conditional heteroscedasticity")
110
+ else:
111
+ lines.append(" - No evidence of volatility clustering")
112
+ lines.append(" - Constant variance assumption is reasonable")
113
+ lines.append(" - Classical methods with homoscedasticity are appropriate")
114
+
115
+ lines.append("")
116
+ lines.append("Test Methodology:")
117
+ lines.append(" - Auxiliary regression: ε²_t = α₀ + Σ(α_i * ε²_{t-i})")
118
+ lines.append(f" - LM statistic = n * R² ~ χ²({self.lags})")
119
+ lines.append(" - H0: No ARCH effects (α₁ = α₂ = ... = α_lags = 0)")
120
+
121
+ return "\n".join(lines)
122
+
123
+
124
+ def arch_lm_test(
125
+ data: pd.Series | np.ndarray,
126
+ lags: int = 12,
127
+ demean: bool = True,
128
+ alpha: float = 0.05,
129
+ ) -> ARCHLMResult:
130
+ """Perform ARCH Lagrange Multiplier test for conditional heteroscedasticity.
131
+
132
+ The ARCH-LM test (Engle, 1982) tests for autoregressive conditional
133
+ heteroscedasticity (volatility clustering) in time series data. The test
134
+ is based on the principle that if ARCH effects are present, squared
135
+ residuals will be autocorrelated.
136
+
137
+ Test Methodology:
138
+ 1. Compute residuals: ε_t (de-meaned if demean=True)
139
+ 2. Square residuals: ε²_t
140
+ 3. Regress ε²_t on ε²_{t-1}, ..., ε²_{t-lags}
141
+ 4. LM statistic = n * R² from auxiliary regression
142
+ 5. Under H0 (no ARCH): LM ~ χ²(lags)
143
+
144
+ Args:
145
+ data: Time series data to test (1D array or Series)
146
+ lags: Number of lags to test (default 12, ~1 year of monthly data)
147
+ demean: Whether to subtract mean before computing squared residuals.
148
+ True is common for returns which are approximately zero-mean.
149
+ alpha: Significance level for the test (default 0.05)
150
+
151
+ Returns:
152
+ ARCHLMResult with test statistics and conclusion
153
+
154
+ Raises:
155
+ ValidationError: If data is invalid (empty, wrong shape, etc.)
156
+ ComputationError: If test computation fails
157
+
158
+ Notes:
159
+ - De-meaning (demean=True) is standard for return series
160
+ - Lag selection: 12 for monthly, ~250 for daily returns
161
+ - Test is asymptotically valid (needs large sample)
162
+ - Presence of ARCH effects suggests GARCH models may be appropriate
163
+ - Uses statsmodels.stats.diagnostic.het_arch (core dependency)
164
+
165
+ References:
166
+ Engle, R. F. (1982). Autoregressive Conditional Heteroscedasticity with
167
+ Estimates of the Variance of United Kingdom Inflation. Econometrica,
168
+ 50(4), 987-1007. DOI: 10.2307/1912773
169
+ """
170
+ # Input validation
171
+ logger.debug(f"Running ARCH-LM test with lags={lags}, demean={demean}")
172
+
173
+ # Convert to numpy array if needed
174
+ arr = data.to_numpy() if isinstance(data, pd.Series) else np.asarray(data)
175
+
176
+ # Validate input
177
+ if arr.size == 0:
178
+ raise ValidationError(
179
+ "Cannot perform ARCH-LM test on empty data",
180
+ context={"data_size": 0},
181
+ )
182
+
183
+ if arr.ndim != 1:
184
+ raise ValidationError(
185
+ f"Data must be 1-dimensional, got shape {arr.shape}",
186
+ context={"data_shape": arr.shape},
187
+ )
188
+
189
+ if np.any(~np.isfinite(arr)):
190
+ n_invalid = np.sum(~np.isfinite(arr))
191
+ raise ValidationError(
192
+ f"Data contains {n_invalid} NaN or infinite values",
193
+ context={"n_invalid": n_invalid, "data_size": arr.size},
194
+ )
195
+
196
+ # Validate lags parameter FIRST (before computing min_obs)
197
+ if lags < 1:
198
+ raise ValidationError(
199
+ f"Number of lags must be positive, got {lags}",
200
+ context={"lags": lags},
201
+ )
202
+
203
+ # Check minimum sample size (now safe since lags >= 1)
204
+ min_obs = lags + 10 # Need at least lags + some buffer
205
+ if arr.size < min_obs:
206
+ raise ValidationError(
207
+ f"Insufficient data for ARCH-LM test with {lags} lags. "
208
+ f"Need at least {min_obs} observations, got {arr.size}",
209
+ context={"n_obs": arr.size, "lags": lags, "min_required": min_obs},
210
+ )
211
+
212
+ if lags >= arr.size:
213
+ raise ValidationError(
214
+ f"Number of lags ({lags}) must be less than data size ({arr.size})",
215
+ context={"lags": lags, "data_size": arr.size},
216
+ )
217
+
218
+ try:
219
+ # De-mean the data if requested (standard for returns)
220
+ if demean:
221
+ residuals = arr - np.mean(arr)
222
+ logger.debug(f"De-meaned data: mean={np.mean(arr):.6f}")
223
+ else:
224
+ residuals = arr.copy()
225
+
226
+ # Run ARCH-LM test using statsmodels
227
+ # het_arch returns (statistic, p-value, f-stat, f-pvalue)
228
+ # We use the LM test statistic (first two values)
229
+ result_tuple = het_arch(residuals, nlags=lags)
230
+ lm_stat = result_tuple[0]
231
+ p_value = result_tuple[1]
232
+
233
+ logger.info(
234
+ f"ARCH-LM test complete: statistic={lm_stat:.4f}, p-value={p_value:.4f}",
235
+ lags=lags,
236
+ n_obs=arr.size,
237
+ )
238
+
239
+ return ARCHLMResult(
240
+ test_statistic=float(lm_stat),
241
+ p_value=float(p_value),
242
+ lags=lags,
243
+ n_obs=arr.size,
244
+ alpha=alpha,
245
+ )
246
+
247
+ except Exception as e:
248
+ # Handle computation errors
249
+ logger.error(f"ARCH-LM test failed: {e}", lags=lags, n_obs=arr.size)
250
+ raise ComputationError( # noqa: B904
251
+ f"ARCH-LM test computation failed: {e}",
252
+ context={
253
+ "n_obs": arr.size,
254
+ "lags": lags,
255
+ "demean": demean,
256
+ },
257
+ cause=e,
258
+ )