ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,460 @@
1
+ """GARCH model fitting for time-varying volatility.
2
+
3
+ GARCH (Generalized Autoregressive Conditional Heteroskedasticity) models
4
+ capture time-varying volatility in financial time series.
5
+
6
+ References:
7
+ Bollerslev, T. (1986). Generalized Autoregressive Conditional Heteroskedasticity.
8
+ Journal of Econometrics, 31(3), 307-327. DOI: 10.1016/0304-4076(86)90063-1
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from collections.abc import Callable
14
+ from typing import TYPE_CHECKING, Any, Literal
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+
19
+ from ml4t.diagnostic.errors import ComputationError, ValidationError
20
+ from ml4t.diagnostic.logging import get_logger
21
+
22
+ if TYPE_CHECKING:
23
+ pass
24
+
25
+ logger = get_logger(__name__)
26
+
27
+
28
+ # GARCH model fitting requires arch package (optional dependency)
29
+ # Lazy loading to avoid slow module-level import (~200ms)
30
+ HAS_ARCH: bool | None = None # Will be set on first check
31
+ _arch_model_cache: Callable[..., Any] | None = None
32
+ _ARCHModelResult_cache: type[Any] | None = None
33
+
34
+
35
+ def _check_arch_available() -> bool:
36
+ """Check if arch package is available and import it (lazy)."""
37
+ global HAS_ARCH, _arch_model_cache, _ARCHModelResult_cache
38
+ if HAS_ARCH is None:
39
+ try:
40
+ from arch import arch_model as _impl
41
+ from arch.univariate.base import ARCHModelResult as _ARCHModelResultImpl
42
+
43
+ _arch_model_cache = _impl
44
+ _ARCHModelResult_cache = _ARCHModelResultImpl
45
+ HAS_ARCH = True
46
+ except ImportError:
47
+ HAS_ARCH = False
48
+ _arch_model_cache = None
49
+ _ARCHModelResult_cache = None
50
+ return HAS_ARCH
51
+
52
+
53
+ def _get_arch_model() -> Callable[..., Any]:
54
+ """Get the arch_model function (lazy import)."""
55
+ _check_arch_available()
56
+ if _arch_model_cache is None:
57
+ raise ImportError(
58
+ "GARCH fitting requires the 'arch' package. Install with: pip install arch"
59
+ )
60
+ return _arch_model_cache
61
+
62
+
63
+ def _compute_skewness(data: np.ndarray) -> float:
64
+ """Compute sample skewness."""
65
+ mean = np.mean(data)
66
+ std = np.std(data)
67
+ if std == 0:
68
+ return 0.0
69
+ return float(np.mean(((data - mean) / std) ** 3))
70
+
71
+
72
+ def _compute_kurtosis(data: np.ndarray) -> float:
73
+ """Compute sample excess kurtosis."""
74
+ mean = np.mean(data)
75
+ std = np.std(data)
76
+ if std == 0:
77
+ return 0.0
78
+ return float(np.mean(((data - mean) / std) ** 4) - 3)
79
+
80
+
81
+ class GARCHResult:
82
+ """Results from GARCH model fitting.
83
+
84
+ GARCH (Generalized Autoregressive Conditional Heteroskedasticity) models
85
+ capture time-varying volatility in financial time series. The GARCH(p,q)
86
+ model specifies conditional variance as:
87
+
88
+ σ²ₜ = ω + Σ(αᵢ·ε²ₜ₋ᵢ) + Σ(βⱼ·σ²ₜ₋ⱼ)
89
+
90
+ For GARCH(1,1):
91
+ σ²ₜ = ω + α·ε²ₜ₋₁ + β·σ²ₜ₋₁
92
+
93
+ Attributes:
94
+ omega: Constant term (long-run variance component)
95
+ alpha: ARCH coefficient (impact of past squared errors)
96
+ beta: GARCH coefficient (impact of past conditional variance)
97
+ persistence: α + β (should be < 1 for stationarity)
98
+ log_likelihood: Log-likelihood of fitted model
99
+ aic: Akaike Information Criterion
100
+ bic: Bayesian Information Criterion
101
+ conditional_volatility: Fitted conditional volatility (σₜ)
102
+ standardized_residuals: Residuals divided by conditional volatility
103
+ converged: Whether optimization converged successfully
104
+ iterations: Number of iterations taken
105
+ n_obs: Number of observations used in fitting
106
+ """
107
+
108
+ def __init__(
109
+ self,
110
+ omega: float,
111
+ alpha: float | tuple[float, ...],
112
+ beta: float | tuple[float, ...],
113
+ persistence: float,
114
+ log_likelihood: float,
115
+ aic: float,
116
+ bic: float,
117
+ conditional_volatility: pd.Series,
118
+ standardized_residuals: pd.Series,
119
+ converged: bool,
120
+ iterations: int,
121
+ n_obs: int,
122
+ ):
123
+ """Initialize GARCH result.
124
+
125
+ Args:
126
+ omega: Constant term
127
+ alpha: ARCH coefficient(s)
128
+ beta: GARCH coefficient(s)
129
+ persistence: Sum of alpha and beta (alpha + beta)
130
+ log_likelihood: Log-likelihood value
131
+ aic: Akaike Information Criterion
132
+ bic: Bayesian Information Criterion
133
+ conditional_volatility: Fitted conditional volatility series
134
+ standardized_residuals: Standardized residuals
135
+ converged: Whether optimization converged
136
+ iterations: Number of iterations
137
+ n_obs: Number of observations
138
+ """
139
+ self.omega = omega
140
+ self.alpha = alpha
141
+ self.beta = beta
142
+ self.persistence = persistence
143
+ self.log_likelihood = log_likelihood
144
+ self.aic = aic
145
+ self.bic = bic
146
+ self.conditional_volatility = conditional_volatility
147
+ self.standardized_residuals = standardized_residuals
148
+ self.converged = converged
149
+ self.iterations = iterations
150
+ self.n_obs = n_obs
151
+
152
+ def __repr__(self) -> str:
153
+ """String representation."""
154
+ return (
155
+ f"GARCHResult(omega={self.omega:.6f}, "
156
+ f"alpha={self.alpha}, "
157
+ f"beta={self.beta}, "
158
+ f"persistence={self.persistence:.4f})"
159
+ )
160
+
161
+ def summary(self) -> str:
162
+ """Human-readable summary of GARCH model results.
163
+
164
+ Returns:
165
+ Formatted summary string
166
+ """
167
+ lines = [
168
+ "GARCH Model Fitting Results",
169
+ "=" * 50,
170
+ f"Observations: {self.n_obs}",
171
+ f"Converged: {'Yes' if self.converged else 'No'}",
172
+ f"Iterations: {self.iterations}",
173
+ ]
174
+
175
+ lines.append("")
176
+ lines.append("Model Parameters:")
177
+ lines.append(f" ω (omega): {self.omega:.6f}")
178
+
179
+ # Handle scalar or vector alpha/beta
180
+ if isinstance(self.alpha, tuple | list):
181
+ for i, a in enumerate(self.alpha, 1):
182
+ lines.append(f" α{i} (alpha[{i}]): {a:.6f}")
183
+ else:
184
+ lines.append(f" α (alpha): {self.alpha:.6f}")
185
+
186
+ if isinstance(self.beta, tuple | list):
187
+ for i, b in enumerate(self.beta, 1):
188
+ lines.append(f" β{i} (beta[{i}]): {b:.6f}")
189
+ else:
190
+ lines.append(f" β (beta): {self.beta:.6f}")
191
+
192
+ lines.append("")
193
+ lines.append(f"Persistence (α+β): {self.persistence:.6f}")
194
+
195
+ if self.persistence >= 1.0:
196
+ lines.append(" ⚠ WARNING: Persistence ≥ 1 (non-stationary)")
197
+ elif self.persistence > 0.95:
198
+ lines.append(" → High persistence (slow mean reversion)")
199
+ else:
200
+ lines.append(" → Stationary process")
201
+
202
+ lines.append("")
203
+ lines.append("Model Fit Statistics:")
204
+ lines.append(f" Log-Likelihood: {self.log_likelihood:.4f}")
205
+ lines.append(f" AIC: {self.aic:.4f}")
206
+ lines.append(f" BIC: {self.bic:.4f}")
207
+
208
+ lines.append("")
209
+ lines.append("Conditional Volatility:")
210
+ vol = np.asarray(self.conditional_volatility.to_numpy(), dtype=np.float64)
211
+ lines.append(f" Mean: {float(np.mean(vol)):.6f}")
212
+ lines.append(f" Std Dev: {float(np.std(vol)):.6f}")
213
+ lines.append(f" Min: {np.min(vol):.6f}")
214
+ lines.append(f" Max: {np.max(vol):.6f}")
215
+
216
+ lines.append("")
217
+ lines.append("Standardized Residuals:")
218
+ resid = np.asarray(self.standardized_residuals.to_numpy(), dtype=np.float64)
219
+ lines.append(f" Mean: {float(np.mean(resid)):.6f}")
220
+ lines.append(f" Std Dev: {float(np.std(resid)):.6f}")
221
+ lines.append(f" Skewness: {_compute_skewness(resid):.4f}")
222
+ lines.append(f" Kurtosis: {_compute_kurtosis(resid):.4f}")
223
+
224
+ lines.append("")
225
+ lines.append("Interpretation:")
226
+ lines.append(" - ω: Long-run unconditional variance = ω / (1 - α - β)")
227
+ lines.append(" - α: Sensitivity to recent shocks (news impact)")
228
+ lines.append(" - β: Persistence of past volatility")
229
+ lines.append(" - α+β: Overall persistence (< 1 for stationarity)")
230
+
231
+ return "\n".join(lines)
232
+
233
+
234
+ def fit_garch(
235
+ returns: pd.Series | np.ndarray,
236
+ p: int = 1,
237
+ q: int = 1,
238
+ mean_model: Literal[
239
+ "Constant", "Zero", "LS", "AR", "ARX", "HAR", "HARX", "constant", "zero"
240
+ ] = "Zero",
241
+ dist: Literal[
242
+ "normal", "gaussian", "t", "studentst", "skewstudent", "skewt", "ged", "generalized error"
243
+ ] = "normal",
244
+ ) -> GARCHResult:
245
+ """Fit GARCH(p, q) model to returns series.
246
+
247
+ GARCH (Generalized Autoregressive Conditional Heteroskedasticity) models
248
+ are used to model time-varying volatility in financial time series. The
249
+ GARCH(p,q) model specifies conditional variance as:
250
+
251
+ σ²ₜ = ω + Σ(αᵢ·ε²ₜ₋ᵢ) + Σ(βⱼ·σ²ₜ₋ⱼ)
252
+
253
+ For the common GARCH(1,1):
254
+ σ²ₜ = ω + α·ε²ₜ₋₁ + β·σ²ₜ₋₁
255
+
256
+ Where:
257
+ - ω (omega): Constant term
258
+ - α (alpha): ARCH coefficient (impact of past squared errors)
259
+ - β (beta): GARCH coefficient (impact of past conditional variance)
260
+
261
+ Persistence (α + β) should be < 1 for stationarity. Values close to 1
262
+ indicate high volatility persistence.
263
+
264
+ Args:
265
+ returns: Returns series (NOT prices) to fit GARCH model
266
+ p: ARCH order (number of lagged squared errors), default 1
267
+ q: GARCH order (number of lagged conditional variances), default 1
268
+ mean_model: Mean model specification, one of:
269
+ - "Zero": Zero mean (default, common for returns)
270
+ - "Constant": Constant mean
271
+ - "AR": Autoregressive mean
272
+ - "ARX": AR with exogenous regressors
273
+ - "HAR": Heterogeneous AR
274
+ - "LS": Least squares
275
+ dist: Error distribution, one of:
276
+ - "normal": Normal distribution (default)
277
+ - "t": Student's t distribution (fat tails)
278
+ - "skewt": Skewed Student's t distribution
279
+ - "ged": Generalized Error Distribution
280
+
281
+ Returns:
282
+ GARCHResult with fitted parameters and diagnostics
283
+
284
+ Raises:
285
+ ValidationError: If data is invalid or arch package not installed
286
+ ComputationError: If GARCH fitting fails
287
+
288
+ Notes:
289
+ - Requires arch package: pip install arch
290
+ - GARCH(1,1) is sufficient for most financial applications
291
+ - Higher orders (p>1, q>1) rarely improve fit significantly
292
+ - Use ARCH-LM test first to check if GARCH is appropriate
293
+ - Convergence can be sensitive to starting values
294
+ - Consider Student's t or skewed t for fat-tailed returns
295
+
296
+ References:
297
+ Bollerslev, T. (1986). Generalized Autoregressive Conditional
298
+ Heteroskedasticity. Journal of Econometrics, 31(3), 307-327.
299
+ DOI: 10.1016/0304-4076(86)90063-1
300
+ """
301
+ # Check if arch package is available (lazy check)
302
+ if not _check_arch_available():
303
+ raise ValidationError(
304
+ "GARCH fitting requires the 'arch' package. Install with: pip install arch",
305
+ context={"available": False},
306
+ )
307
+ logger.debug(f"Fitting GARCH({p},{q}) model with mean_model={mean_model}, dist={dist}")
308
+
309
+ # Convert to numpy array if needed
310
+ arr = returns.to_numpy() if isinstance(returns, pd.Series) else np.asarray(returns)
311
+
312
+ # Validate input
313
+ if arr.size == 0:
314
+ raise ValidationError(
315
+ "Cannot fit GARCH on empty data",
316
+ context={"data_size": 0},
317
+ )
318
+
319
+ if arr.ndim != 1:
320
+ raise ValidationError(
321
+ f"Returns must be 1-dimensional, got shape {arr.shape}",
322
+ context={"data_shape": arr.shape},
323
+ )
324
+
325
+ if np.any(~np.isfinite(arr)):
326
+ n_invalid = np.sum(~np.isfinite(arr))
327
+ raise ValidationError(
328
+ f"Returns contain {n_invalid} NaN or infinite values",
329
+ context={"n_invalid": n_invalid, "data_size": arr.size},
330
+ )
331
+
332
+ # Check minimum sample size
333
+ min_obs = max(p, q) * 10 + 50 # Need sufficient data for estimation
334
+ if arr.size < min_obs:
335
+ raise ValidationError(
336
+ f"Insufficient data for GARCH({p},{q}). Need at least {min_obs} observations, got {arr.size}",
337
+ context={"n_obs": arr.size, "p": p, "q": q, "min_required": min_obs},
338
+ )
339
+
340
+ # Validate model parameters
341
+ if p < 1:
342
+ raise ValidationError(
343
+ f"ARCH order (p) must be at least 1, got {p}",
344
+ context={"p": p},
345
+ )
346
+
347
+ if q < 1:
348
+ raise ValidationError(
349
+ f"GARCH order (q) must be at least 1, got {q}",
350
+ context={"q": q},
351
+ )
352
+
353
+ try:
354
+ # Scale returns to percentage (arch works better with scaled data)
355
+ # Convert to pandas Series if needed (arch requires Series or DataFrame)
356
+ returns_series = (
357
+ pd.Series(arr, name="returns") if not isinstance(returns, pd.Series) else returns.copy()
358
+ )
359
+
360
+ # Create and fit GARCH model using arch library (lazy import)
361
+ model = _get_arch_model()(
362
+ returns_series,
363
+ mean=mean_model,
364
+ vol="GARCH",
365
+ p=p,
366
+ q=q,
367
+ dist=dist,
368
+ )
369
+
370
+ # Fit model (may take time for complex models)
371
+ fitted = model.fit(disp="off", show_warning=False)
372
+
373
+ # Extract parameters
374
+ params = fitted.params
375
+
376
+ # For GARCH(1,1), parameters are typically:
377
+ # omega (constant), alpha[1] (ARCH), beta[1] (GARCH)
378
+ omega = float(params.get("omega", 0.0))
379
+
380
+ # Extract ARCH coefficients (alpha)
381
+ alpha_list = []
382
+ for i in range(1, p + 1):
383
+ key = f"alpha[{i}]"
384
+ if key in params:
385
+ alpha_list.append(float(params[key]))
386
+
387
+ # Extract GARCH coefficients (beta)
388
+ beta_list = []
389
+ for i in range(1, q + 1):
390
+ key = f"beta[{i}]"
391
+ if key in params:
392
+ beta_list.append(float(params[key]))
393
+
394
+ # Handle scalar vs vector
395
+ if len(alpha_list) == 1:
396
+ alpha: float | tuple[float, ...] = alpha_list[0]
397
+ else:
398
+ alpha = tuple(alpha_list)
399
+
400
+ if len(beta_list) == 1:
401
+ beta: float | tuple[float, ...] = beta_list[0]
402
+ else:
403
+ beta = tuple(beta_list)
404
+
405
+ # Compute persistence (sum of all alpha and beta coefficients)
406
+ persistence = sum(alpha_list) + sum(beta_list)
407
+
408
+ # Extract fitted values
409
+ conditional_volatility = fitted.conditional_volatility
410
+ standardized_residuals = fitted.std_resid
411
+
412
+ # Extract convergence info
413
+ converged = fitted.convergence_flag == 0 # 0 means success
414
+ # fit_stop is a string (e.g., "Normal convergence"), not iteration count
415
+ # Try to get actual iteration count from optimization result if available
416
+ try:
417
+ iterations = fitted.fit_info.get("iterations", 0)
418
+ if not isinstance(iterations, int):
419
+ iterations = 0
420
+ except (AttributeError, TypeError):
421
+ iterations = 0 # Fallback if not available
422
+
423
+ logger.info(
424
+ f"GARCH({p},{q}) fitted successfully",
425
+ omega=omega,
426
+ alpha=alpha,
427
+ beta=beta,
428
+ persistence=persistence,
429
+ converged=converged,
430
+ )
431
+
432
+ return GARCHResult(
433
+ omega=omega,
434
+ alpha=alpha,
435
+ beta=beta,
436
+ persistence=persistence,
437
+ log_likelihood=float(fitted.loglikelihood),
438
+ aic=float(fitted.aic),
439
+ bic=float(fitted.bic),
440
+ conditional_volatility=conditional_volatility,
441
+ standardized_residuals=standardized_residuals,
442
+ converged=converged,
443
+ iterations=iterations,
444
+ n_obs=arr.size,
445
+ )
446
+
447
+ except Exception as e:
448
+ # Handle computation errors
449
+ logger.error(f"GARCH fitting failed: {e}", p=p, q=q, n_obs=arr.size)
450
+ raise ComputationError( # noqa: B904
451
+ f"GARCH({p},{q}) fitting failed: {e}",
452
+ context={
453
+ "n_obs": arr.size,
454
+ "p": p,
455
+ "q": q,
456
+ "mean_model": mean_model,
457
+ "dist": dist,
458
+ },
459
+ cause=e,
460
+ )
@@ -0,0 +1,48 @@
1
+ """Integration contracts for external libraries."""
2
+
3
+ from ml4t.diagnostic.integration.backtest_contract import (
4
+ ComparisonRequest,
5
+ ComparisonResult,
6
+ ComparisonType,
7
+ EnvironmentType,
8
+ EvaluationExport,
9
+ PromotionWorkflow,
10
+ StrategyMetadata,
11
+ TradeRecord,
12
+ )
13
+ from ml4t.diagnostic.integration.data_contract import (
14
+ AnomalyType,
15
+ DataAnomaly,
16
+ DataQualityMetrics,
17
+ DataQualityReport,
18
+ DataValidationRequest,
19
+ Severity,
20
+ )
21
+ from ml4t.diagnostic.integration.engineer_contract import (
22
+ EngineerConfig,
23
+ PreprocessingRecommendation,
24
+ TransformType,
25
+ )
26
+
27
+ __all__ = [
28
+ # ml4t.data integration
29
+ "AnomalyType",
30
+ "DataAnomaly",
31
+ "DataQualityMetrics",
32
+ "DataQualityReport",
33
+ "DataValidationRequest",
34
+ "Severity",
35
+ # ml4t.engineer integration
36
+ "PreprocessingRecommendation",
37
+ "EngineerConfig",
38
+ "TransformType",
39
+ # ml4t.backtest integration
40
+ "ComparisonRequest",
41
+ "ComparisonResult",
42
+ "ComparisonType",
43
+ "EnvironmentType",
44
+ "EvaluationExport",
45
+ "PromotionWorkflow",
46
+ "StrategyMetadata",
47
+ "TradeRecord",
48
+ ]