ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
"""GARCH model fitting for time-varying volatility.
|
|
2
|
+
|
|
3
|
+
GARCH (Generalized Autoregressive Conditional Heteroskedasticity) models
|
|
4
|
+
capture time-varying volatility in financial time series.
|
|
5
|
+
|
|
6
|
+
References:
|
|
7
|
+
Bollerslev, T. (1986). Generalized Autoregressive Conditional Heteroskedasticity.
|
|
8
|
+
Journal of Econometrics, 31(3), 307-327. DOI: 10.1016/0304-4076(86)90063-1
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections.abc import Callable
|
|
14
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
from ml4t.diagnostic.errors import ComputationError, ValidationError
|
|
20
|
+
from ml4t.diagnostic.logging import get_logger
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
logger = get_logger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# GARCH model fitting requires arch package (optional dependency)
|
|
29
|
+
# Lazy loading to avoid slow module-level import (~200ms)
|
|
30
|
+
HAS_ARCH: bool | None = None # Will be set on first check
|
|
31
|
+
_arch_model_cache: Callable[..., Any] | None = None
|
|
32
|
+
_ARCHModelResult_cache: type[Any] | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _check_arch_available() -> bool:
|
|
36
|
+
"""Check if arch package is available and import it (lazy)."""
|
|
37
|
+
global HAS_ARCH, _arch_model_cache, _ARCHModelResult_cache
|
|
38
|
+
if HAS_ARCH is None:
|
|
39
|
+
try:
|
|
40
|
+
from arch import arch_model as _impl
|
|
41
|
+
from arch.univariate.base import ARCHModelResult as _ARCHModelResultImpl
|
|
42
|
+
|
|
43
|
+
_arch_model_cache = _impl
|
|
44
|
+
_ARCHModelResult_cache = _ARCHModelResultImpl
|
|
45
|
+
HAS_ARCH = True
|
|
46
|
+
except ImportError:
|
|
47
|
+
HAS_ARCH = False
|
|
48
|
+
_arch_model_cache = None
|
|
49
|
+
_ARCHModelResult_cache = None
|
|
50
|
+
return HAS_ARCH
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _get_arch_model() -> Callable[..., Any]:
|
|
54
|
+
"""Get the arch_model function (lazy import)."""
|
|
55
|
+
_check_arch_available()
|
|
56
|
+
if _arch_model_cache is None:
|
|
57
|
+
raise ImportError(
|
|
58
|
+
"GARCH fitting requires the 'arch' package. Install with: pip install arch"
|
|
59
|
+
)
|
|
60
|
+
return _arch_model_cache
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _compute_skewness(data: np.ndarray) -> float:
|
|
64
|
+
"""Compute sample skewness."""
|
|
65
|
+
mean = np.mean(data)
|
|
66
|
+
std = np.std(data)
|
|
67
|
+
if std == 0:
|
|
68
|
+
return 0.0
|
|
69
|
+
return float(np.mean(((data - mean) / std) ** 3))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _compute_kurtosis(data: np.ndarray) -> float:
|
|
73
|
+
"""Compute sample excess kurtosis."""
|
|
74
|
+
mean = np.mean(data)
|
|
75
|
+
std = np.std(data)
|
|
76
|
+
if std == 0:
|
|
77
|
+
return 0.0
|
|
78
|
+
return float(np.mean(((data - mean) / std) ** 4) - 3)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class GARCHResult:
|
|
82
|
+
"""Results from GARCH model fitting.
|
|
83
|
+
|
|
84
|
+
GARCH (Generalized Autoregressive Conditional Heteroskedasticity) models
|
|
85
|
+
capture time-varying volatility in financial time series. The GARCH(p,q)
|
|
86
|
+
model specifies conditional variance as:
|
|
87
|
+
|
|
88
|
+
σ²ₜ = ω + Σ(αᵢ·ε²ₜ₋ᵢ) + Σ(βⱼ·σ²ₜ₋ⱼ)
|
|
89
|
+
|
|
90
|
+
For GARCH(1,1):
|
|
91
|
+
σ²ₜ = ω + α·ε²ₜ₋₁ + β·σ²ₜ₋₁
|
|
92
|
+
|
|
93
|
+
Attributes:
|
|
94
|
+
omega: Constant term (long-run variance component)
|
|
95
|
+
alpha: ARCH coefficient (impact of past squared errors)
|
|
96
|
+
beta: GARCH coefficient (impact of past conditional variance)
|
|
97
|
+
persistence: α + β (should be < 1 for stationarity)
|
|
98
|
+
log_likelihood: Log-likelihood of fitted model
|
|
99
|
+
aic: Akaike Information Criterion
|
|
100
|
+
bic: Bayesian Information Criterion
|
|
101
|
+
conditional_volatility: Fitted conditional volatility (σₜ)
|
|
102
|
+
standardized_residuals: Residuals divided by conditional volatility
|
|
103
|
+
converged: Whether optimization converged successfully
|
|
104
|
+
iterations: Number of iterations taken
|
|
105
|
+
n_obs: Number of observations used in fitting
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
def __init__(
|
|
109
|
+
self,
|
|
110
|
+
omega: float,
|
|
111
|
+
alpha: float | tuple[float, ...],
|
|
112
|
+
beta: float | tuple[float, ...],
|
|
113
|
+
persistence: float,
|
|
114
|
+
log_likelihood: float,
|
|
115
|
+
aic: float,
|
|
116
|
+
bic: float,
|
|
117
|
+
conditional_volatility: pd.Series,
|
|
118
|
+
standardized_residuals: pd.Series,
|
|
119
|
+
converged: bool,
|
|
120
|
+
iterations: int,
|
|
121
|
+
n_obs: int,
|
|
122
|
+
):
|
|
123
|
+
"""Initialize GARCH result.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
omega: Constant term
|
|
127
|
+
alpha: ARCH coefficient(s)
|
|
128
|
+
beta: GARCH coefficient(s)
|
|
129
|
+
persistence: Sum of alpha and beta (alpha + beta)
|
|
130
|
+
log_likelihood: Log-likelihood value
|
|
131
|
+
aic: Akaike Information Criterion
|
|
132
|
+
bic: Bayesian Information Criterion
|
|
133
|
+
conditional_volatility: Fitted conditional volatility series
|
|
134
|
+
standardized_residuals: Standardized residuals
|
|
135
|
+
converged: Whether optimization converged
|
|
136
|
+
iterations: Number of iterations
|
|
137
|
+
n_obs: Number of observations
|
|
138
|
+
"""
|
|
139
|
+
self.omega = omega
|
|
140
|
+
self.alpha = alpha
|
|
141
|
+
self.beta = beta
|
|
142
|
+
self.persistence = persistence
|
|
143
|
+
self.log_likelihood = log_likelihood
|
|
144
|
+
self.aic = aic
|
|
145
|
+
self.bic = bic
|
|
146
|
+
self.conditional_volatility = conditional_volatility
|
|
147
|
+
self.standardized_residuals = standardized_residuals
|
|
148
|
+
self.converged = converged
|
|
149
|
+
self.iterations = iterations
|
|
150
|
+
self.n_obs = n_obs
|
|
151
|
+
|
|
152
|
+
def __repr__(self) -> str:
|
|
153
|
+
"""String representation."""
|
|
154
|
+
return (
|
|
155
|
+
f"GARCHResult(omega={self.omega:.6f}, "
|
|
156
|
+
f"alpha={self.alpha}, "
|
|
157
|
+
f"beta={self.beta}, "
|
|
158
|
+
f"persistence={self.persistence:.4f})"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def summary(self) -> str:
|
|
162
|
+
"""Human-readable summary of GARCH model results.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Formatted summary string
|
|
166
|
+
"""
|
|
167
|
+
lines = [
|
|
168
|
+
"GARCH Model Fitting Results",
|
|
169
|
+
"=" * 50,
|
|
170
|
+
f"Observations: {self.n_obs}",
|
|
171
|
+
f"Converged: {'Yes' if self.converged else 'No'}",
|
|
172
|
+
f"Iterations: {self.iterations}",
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
lines.append("")
|
|
176
|
+
lines.append("Model Parameters:")
|
|
177
|
+
lines.append(f" ω (omega): {self.omega:.6f}")
|
|
178
|
+
|
|
179
|
+
# Handle scalar or vector alpha/beta
|
|
180
|
+
if isinstance(self.alpha, tuple | list):
|
|
181
|
+
for i, a in enumerate(self.alpha, 1):
|
|
182
|
+
lines.append(f" α{i} (alpha[{i}]): {a:.6f}")
|
|
183
|
+
else:
|
|
184
|
+
lines.append(f" α (alpha): {self.alpha:.6f}")
|
|
185
|
+
|
|
186
|
+
if isinstance(self.beta, tuple | list):
|
|
187
|
+
for i, b in enumerate(self.beta, 1):
|
|
188
|
+
lines.append(f" β{i} (beta[{i}]): {b:.6f}")
|
|
189
|
+
else:
|
|
190
|
+
lines.append(f" β (beta): {self.beta:.6f}")
|
|
191
|
+
|
|
192
|
+
lines.append("")
|
|
193
|
+
lines.append(f"Persistence (α+β): {self.persistence:.6f}")
|
|
194
|
+
|
|
195
|
+
if self.persistence >= 1.0:
|
|
196
|
+
lines.append(" ⚠ WARNING: Persistence ≥ 1 (non-stationary)")
|
|
197
|
+
elif self.persistence > 0.95:
|
|
198
|
+
lines.append(" → High persistence (slow mean reversion)")
|
|
199
|
+
else:
|
|
200
|
+
lines.append(" → Stationary process")
|
|
201
|
+
|
|
202
|
+
lines.append("")
|
|
203
|
+
lines.append("Model Fit Statistics:")
|
|
204
|
+
lines.append(f" Log-Likelihood: {self.log_likelihood:.4f}")
|
|
205
|
+
lines.append(f" AIC: {self.aic:.4f}")
|
|
206
|
+
lines.append(f" BIC: {self.bic:.4f}")
|
|
207
|
+
|
|
208
|
+
lines.append("")
|
|
209
|
+
lines.append("Conditional Volatility:")
|
|
210
|
+
vol = np.asarray(self.conditional_volatility.to_numpy(), dtype=np.float64)
|
|
211
|
+
lines.append(f" Mean: {float(np.mean(vol)):.6f}")
|
|
212
|
+
lines.append(f" Std Dev: {float(np.std(vol)):.6f}")
|
|
213
|
+
lines.append(f" Min: {np.min(vol):.6f}")
|
|
214
|
+
lines.append(f" Max: {np.max(vol):.6f}")
|
|
215
|
+
|
|
216
|
+
lines.append("")
|
|
217
|
+
lines.append("Standardized Residuals:")
|
|
218
|
+
resid = np.asarray(self.standardized_residuals.to_numpy(), dtype=np.float64)
|
|
219
|
+
lines.append(f" Mean: {float(np.mean(resid)):.6f}")
|
|
220
|
+
lines.append(f" Std Dev: {float(np.std(resid)):.6f}")
|
|
221
|
+
lines.append(f" Skewness: {_compute_skewness(resid):.4f}")
|
|
222
|
+
lines.append(f" Kurtosis: {_compute_kurtosis(resid):.4f}")
|
|
223
|
+
|
|
224
|
+
lines.append("")
|
|
225
|
+
lines.append("Interpretation:")
|
|
226
|
+
lines.append(" - ω: Long-run unconditional variance = ω / (1 - α - β)")
|
|
227
|
+
lines.append(" - α: Sensitivity to recent shocks (news impact)")
|
|
228
|
+
lines.append(" - β: Persistence of past volatility")
|
|
229
|
+
lines.append(" - α+β: Overall persistence (< 1 for stationarity)")
|
|
230
|
+
|
|
231
|
+
return "\n".join(lines)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def fit_garch(
|
|
235
|
+
returns: pd.Series | np.ndarray,
|
|
236
|
+
p: int = 1,
|
|
237
|
+
q: int = 1,
|
|
238
|
+
mean_model: Literal[
|
|
239
|
+
"Constant", "Zero", "LS", "AR", "ARX", "HAR", "HARX", "constant", "zero"
|
|
240
|
+
] = "Zero",
|
|
241
|
+
dist: Literal[
|
|
242
|
+
"normal", "gaussian", "t", "studentst", "skewstudent", "skewt", "ged", "generalized error"
|
|
243
|
+
] = "normal",
|
|
244
|
+
) -> GARCHResult:
|
|
245
|
+
"""Fit GARCH(p, q) model to returns series.
|
|
246
|
+
|
|
247
|
+
GARCH (Generalized Autoregressive Conditional Heteroskedasticity) models
|
|
248
|
+
are used to model time-varying volatility in financial time series. The
|
|
249
|
+
GARCH(p,q) model specifies conditional variance as:
|
|
250
|
+
|
|
251
|
+
σ²ₜ = ω + Σ(αᵢ·ε²ₜ₋ᵢ) + Σ(βⱼ·σ²ₜ₋ⱼ)
|
|
252
|
+
|
|
253
|
+
For the common GARCH(1,1):
|
|
254
|
+
σ²ₜ = ω + α·ε²ₜ₋₁ + β·σ²ₜ₋₁
|
|
255
|
+
|
|
256
|
+
Where:
|
|
257
|
+
- ω (omega): Constant term
|
|
258
|
+
- α (alpha): ARCH coefficient (impact of past squared errors)
|
|
259
|
+
- β (beta): GARCH coefficient (impact of past conditional variance)
|
|
260
|
+
|
|
261
|
+
Persistence (α + β) should be < 1 for stationarity. Values close to 1
|
|
262
|
+
indicate high volatility persistence.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
returns: Returns series (NOT prices) to fit GARCH model
|
|
266
|
+
p: ARCH order (number of lagged squared errors), default 1
|
|
267
|
+
q: GARCH order (number of lagged conditional variances), default 1
|
|
268
|
+
mean_model: Mean model specification, one of:
|
|
269
|
+
- "Zero": Zero mean (default, common for returns)
|
|
270
|
+
- "Constant": Constant mean
|
|
271
|
+
- "AR": Autoregressive mean
|
|
272
|
+
- "ARX": AR with exogenous regressors
|
|
273
|
+
- "HAR": Heterogeneous AR
|
|
274
|
+
- "LS": Least squares
|
|
275
|
+
dist: Error distribution, one of:
|
|
276
|
+
- "normal": Normal distribution (default)
|
|
277
|
+
- "t": Student's t distribution (fat tails)
|
|
278
|
+
- "skewt": Skewed Student's t distribution
|
|
279
|
+
- "ged": Generalized Error Distribution
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
GARCHResult with fitted parameters and diagnostics
|
|
283
|
+
|
|
284
|
+
Raises:
|
|
285
|
+
ValidationError: If data is invalid or arch package not installed
|
|
286
|
+
ComputationError: If GARCH fitting fails
|
|
287
|
+
|
|
288
|
+
Notes:
|
|
289
|
+
- Requires arch package: pip install arch
|
|
290
|
+
- GARCH(1,1) is sufficient for most financial applications
|
|
291
|
+
- Higher orders (p>1, q>1) rarely improve fit significantly
|
|
292
|
+
- Use ARCH-LM test first to check if GARCH is appropriate
|
|
293
|
+
- Convergence can be sensitive to starting values
|
|
294
|
+
- Consider Student's t or skewed t for fat-tailed returns
|
|
295
|
+
|
|
296
|
+
References:
|
|
297
|
+
Bollerslev, T. (1986). Generalized Autoregressive Conditional
|
|
298
|
+
Heteroskedasticity. Journal of Econometrics, 31(3), 307-327.
|
|
299
|
+
DOI: 10.1016/0304-4076(86)90063-1
|
|
300
|
+
"""
|
|
301
|
+
# Check if arch package is available (lazy check)
|
|
302
|
+
if not _check_arch_available():
|
|
303
|
+
raise ValidationError(
|
|
304
|
+
"GARCH fitting requires the 'arch' package. Install with: pip install arch",
|
|
305
|
+
context={"available": False},
|
|
306
|
+
)
|
|
307
|
+
logger.debug(f"Fitting GARCH({p},{q}) model with mean_model={mean_model}, dist={dist}")
|
|
308
|
+
|
|
309
|
+
# Convert to numpy array if needed
|
|
310
|
+
arr = returns.to_numpy() if isinstance(returns, pd.Series) else np.asarray(returns)
|
|
311
|
+
|
|
312
|
+
# Validate input
|
|
313
|
+
if arr.size == 0:
|
|
314
|
+
raise ValidationError(
|
|
315
|
+
"Cannot fit GARCH on empty data",
|
|
316
|
+
context={"data_size": 0},
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
if arr.ndim != 1:
|
|
320
|
+
raise ValidationError(
|
|
321
|
+
f"Returns must be 1-dimensional, got shape {arr.shape}",
|
|
322
|
+
context={"data_shape": arr.shape},
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if np.any(~np.isfinite(arr)):
|
|
326
|
+
n_invalid = np.sum(~np.isfinite(arr))
|
|
327
|
+
raise ValidationError(
|
|
328
|
+
f"Returns contain {n_invalid} NaN or infinite values",
|
|
329
|
+
context={"n_invalid": n_invalid, "data_size": arr.size},
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# Check minimum sample size
|
|
333
|
+
min_obs = max(p, q) * 10 + 50 # Need sufficient data for estimation
|
|
334
|
+
if arr.size < min_obs:
|
|
335
|
+
raise ValidationError(
|
|
336
|
+
f"Insufficient data for GARCH({p},{q}). Need at least {min_obs} observations, got {arr.size}",
|
|
337
|
+
context={"n_obs": arr.size, "p": p, "q": q, "min_required": min_obs},
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
# Validate model parameters
|
|
341
|
+
if p < 1:
|
|
342
|
+
raise ValidationError(
|
|
343
|
+
f"ARCH order (p) must be at least 1, got {p}",
|
|
344
|
+
context={"p": p},
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
if q < 1:
|
|
348
|
+
raise ValidationError(
|
|
349
|
+
f"GARCH order (q) must be at least 1, got {q}",
|
|
350
|
+
context={"q": q},
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
try:
|
|
354
|
+
# Scale returns to percentage (arch works better with scaled data)
|
|
355
|
+
# Convert to pandas Series if needed (arch requires Series or DataFrame)
|
|
356
|
+
returns_series = (
|
|
357
|
+
pd.Series(arr, name="returns") if not isinstance(returns, pd.Series) else returns.copy()
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Create and fit GARCH model using arch library (lazy import)
|
|
361
|
+
model = _get_arch_model()(
|
|
362
|
+
returns_series,
|
|
363
|
+
mean=mean_model,
|
|
364
|
+
vol="GARCH",
|
|
365
|
+
p=p,
|
|
366
|
+
q=q,
|
|
367
|
+
dist=dist,
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
# Fit model (may take time for complex models)
|
|
371
|
+
fitted = model.fit(disp="off", show_warning=False)
|
|
372
|
+
|
|
373
|
+
# Extract parameters
|
|
374
|
+
params = fitted.params
|
|
375
|
+
|
|
376
|
+
# For GARCH(1,1), parameters are typically:
|
|
377
|
+
# omega (constant), alpha[1] (ARCH), beta[1] (GARCH)
|
|
378
|
+
omega = float(params.get("omega", 0.0))
|
|
379
|
+
|
|
380
|
+
# Extract ARCH coefficients (alpha)
|
|
381
|
+
alpha_list = []
|
|
382
|
+
for i in range(1, p + 1):
|
|
383
|
+
key = f"alpha[{i}]"
|
|
384
|
+
if key in params:
|
|
385
|
+
alpha_list.append(float(params[key]))
|
|
386
|
+
|
|
387
|
+
# Extract GARCH coefficients (beta)
|
|
388
|
+
beta_list = []
|
|
389
|
+
for i in range(1, q + 1):
|
|
390
|
+
key = f"beta[{i}]"
|
|
391
|
+
if key in params:
|
|
392
|
+
beta_list.append(float(params[key]))
|
|
393
|
+
|
|
394
|
+
# Handle scalar vs vector
|
|
395
|
+
if len(alpha_list) == 1:
|
|
396
|
+
alpha: float | tuple[float, ...] = alpha_list[0]
|
|
397
|
+
else:
|
|
398
|
+
alpha = tuple(alpha_list)
|
|
399
|
+
|
|
400
|
+
if len(beta_list) == 1:
|
|
401
|
+
beta: float | tuple[float, ...] = beta_list[0]
|
|
402
|
+
else:
|
|
403
|
+
beta = tuple(beta_list)
|
|
404
|
+
|
|
405
|
+
# Compute persistence (sum of all alpha and beta coefficients)
|
|
406
|
+
persistence = sum(alpha_list) + sum(beta_list)
|
|
407
|
+
|
|
408
|
+
# Extract fitted values
|
|
409
|
+
conditional_volatility = fitted.conditional_volatility
|
|
410
|
+
standardized_residuals = fitted.std_resid
|
|
411
|
+
|
|
412
|
+
# Extract convergence info
|
|
413
|
+
converged = fitted.convergence_flag == 0 # 0 means success
|
|
414
|
+
# fit_stop is a string (e.g., "Normal convergence"), not iteration count
|
|
415
|
+
# Try to get actual iteration count from optimization result if available
|
|
416
|
+
try:
|
|
417
|
+
iterations = fitted.fit_info.get("iterations", 0)
|
|
418
|
+
if not isinstance(iterations, int):
|
|
419
|
+
iterations = 0
|
|
420
|
+
except (AttributeError, TypeError):
|
|
421
|
+
iterations = 0 # Fallback if not available
|
|
422
|
+
|
|
423
|
+
logger.info(
|
|
424
|
+
f"GARCH({p},{q}) fitted successfully",
|
|
425
|
+
omega=omega,
|
|
426
|
+
alpha=alpha,
|
|
427
|
+
beta=beta,
|
|
428
|
+
persistence=persistence,
|
|
429
|
+
converged=converged,
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
return GARCHResult(
|
|
433
|
+
omega=omega,
|
|
434
|
+
alpha=alpha,
|
|
435
|
+
beta=beta,
|
|
436
|
+
persistence=persistence,
|
|
437
|
+
log_likelihood=float(fitted.loglikelihood),
|
|
438
|
+
aic=float(fitted.aic),
|
|
439
|
+
bic=float(fitted.bic),
|
|
440
|
+
conditional_volatility=conditional_volatility,
|
|
441
|
+
standardized_residuals=standardized_residuals,
|
|
442
|
+
converged=converged,
|
|
443
|
+
iterations=iterations,
|
|
444
|
+
n_obs=arr.size,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
except Exception as e:
|
|
448
|
+
# Handle computation errors
|
|
449
|
+
logger.error(f"GARCH fitting failed: {e}", p=p, q=q, n_obs=arr.size)
|
|
450
|
+
raise ComputationError( # noqa: B904
|
|
451
|
+
f"GARCH({p},{q}) fitting failed: {e}",
|
|
452
|
+
context={
|
|
453
|
+
"n_obs": arr.size,
|
|
454
|
+
"p": p,
|
|
455
|
+
"q": q,
|
|
456
|
+
"mean_model": mean_model,
|
|
457
|
+
"dist": dist,
|
|
458
|
+
},
|
|
459
|
+
cause=e,
|
|
460
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Integration contracts for external libraries."""
|
|
2
|
+
|
|
3
|
+
from ml4t.diagnostic.integration.backtest_contract import (
|
|
4
|
+
ComparisonRequest,
|
|
5
|
+
ComparisonResult,
|
|
6
|
+
ComparisonType,
|
|
7
|
+
EnvironmentType,
|
|
8
|
+
EvaluationExport,
|
|
9
|
+
PromotionWorkflow,
|
|
10
|
+
StrategyMetadata,
|
|
11
|
+
TradeRecord,
|
|
12
|
+
)
|
|
13
|
+
from ml4t.diagnostic.integration.data_contract import (
|
|
14
|
+
AnomalyType,
|
|
15
|
+
DataAnomaly,
|
|
16
|
+
DataQualityMetrics,
|
|
17
|
+
DataQualityReport,
|
|
18
|
+
DataValidationRequest,
|
|
19
|
+
Severity,
|
|
20
|
+
)
|
|
21
|
+
from ml4t.diagnostic.integration.engineer_contract import (
|
|
22
|
+
EngineerConfig,
|
|
23
|
+
PreprocessingRecommendation,
|
|
24
|
+
TransformType,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
# ml4t.data integration
|
|
29
|
+
"AnomalyType",
|
|
30
|
+
"DataAnomaly",
|
|
31
|
+
"DataQualityMetrics",
|
|
32
|
+
"DataQualityReport",
|
|
33
|
+
"DataValidationRequest",
|
|
34
|
+
"Severity",
|
|
35
|
+
# ml4t.engineer integration
|
|
36
|
+
"PreprocessingRecommendation",
|
|
37
|
+
"EngineerConfig",
|
|
38
|
+
"TransformType",
|
|
39
|
+
# ml4t.backtest integration
|
|
40
|
+
"ComparisonRequest",
|
|
41
|
+
"ComparisonResult",
|
|
42
|
+
"ComparisonType",
|
|
43
|
+
"EnvironmentType",
|
|
44
|
+
"EvaluationExport",
|
|
45
|
+
"PromotionWorkflow",
|
|
46
|
+
"StrategyMetadata",
|
|
47
|
+
"TradeRecord",
|
|
48
|
+
]
|