ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1037 @@
|
|
|
1
|
+
"""Interactive diagnostic visualizations for feature analysis.
|
|
2
|
+
|
|
3
|
+
This module provides Plotly-based interactive diagnostic plots for the Feature
|
|
4
|
+
Diagnostics framework (Module A), including:
|
|
5
|
+
|
|
6
|
+
- ACF/PACF plots with confidence bands
|
|
7
|
+
- QQ plots for normality assessment
|
|
8
|
+
- Volatility clustering visualizations
|
|
9
|
+
- Distribution analysis with fitted curves
|
|
10
|
+
|
|
11
|
+
All visualizations are interactive (zoom, hover, pan) and designed for
|
|
12
|
+
browser-based dashboards. Static exports (PNG, PDF) are available via
|
|
13
|
+
the export_static() function.
|
|
14
|
+
|
|
15
|
+
References
|
|
16
|
+
----------
|
|
17
|
+
.. [1] Box, G. E. P., & Jenkins, G. M. (1976). Time Series Analysis: Forecasting and Control.
|
|
18
|
+
.. [2] Hamilton, J. D. (1994). Time Series Analysis. Princeton University Press.
|
|
19
|
+
.. [3] Tsay, R. S. (2005). Analysis of Financial Time Series. Wiley.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from typing import TYPE_CHECKING
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
import pandas as pd
|
|
26
|
+
import plotly.graph_objects as go
|
|
27
|
+
from plotly.subplots import make_subplots
|
|
28
|
+
from scipy import stats
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from numpy.typing import NDArray
|
|
32
|
+
|
|
33
|
+
# Color scheme for financial diagnostics (consistent with viz.py)
|
|
34
|
+
COLORS = {
|
|
35
|
+
"primary": "#3366CC", # Blue
|
|
36
|
+
"secondary": "#FF9900", # Orange
|
|
37
|
+
"positive": "#00CC88", # Green
|
|
38
|
+
"negative": "#FF4444", # Red
|
|
39
|
+
"neutral": "#888888", # Gray
|
|
40
|
+
"confidence": "rgba(255, 68, 68, 0.2)", # Light red fill
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def plot_acf_pacf(
|
|
45
|
+
data: "NDArray | pd.Series",
|
|
46
|
+
max_lags: int = 40,
|
|
47
|
+
alpha: float = 0.05,
|
|
48
|
+
title: str | None = None,
|
|
49
|
+
height: int = 400,
|
|
50
|
+
) -> go.Figure:
|
|
51
|
+
"""Create interactive ACF and PACF plots with confidence bands.
|
|
52
|
+
|
|
53
|
+
Creates a two-panel interactive figure showing:
|
|
54
|
+
1. Autocorrelation Function (ACF) - correlation with lagged values
|
|
55
|
+
2. Partial Autocorrelation Function (PACF) - correlation controlling for intermediate lags
|
|
56
|
+
|
|
57
|
+
Includes confidence bands based on the specified significance level (alpha).
|
|
58
|
+
Hover over bars to see exact values. Zoom and pan for detailed exploration.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
data : ndarray or pd.Series
|
|
63
|
+
Time series data to analyze
|
|
64
|
+
max_lags : int, default 40
|
|
65
|
+
Maximum number of lags to display
|
|
66
|
+
alpha : float, default 0.05
|
|
67
|
+
Significance level for confidence bands (default: 95% confidence)
|
|
68
|
+
title : str, optional
|
|
69
|
+
Figure title. If None, uses "ACF and PACF Analysis"
|
|
70
|
+
height : int, default 400
|
|
71
|
+
Figure height in pixels
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
go.Figure
|
|
76
|
+
Interactive Plotly figure with ACF and PACF plots
|
|
77
|
+
|
|
78
|
+
Examples
|
|
79
|
+
--------
|
|
80
|
+
>>> import numpy as np
|
|
81
|
+
>>> # AR(1) process
|
|
82
|
+
>>> data = np.random.randn(1000)
|
|
83
|
+
>>> for i in range(1, len(data)):
|
|
84
|
+
... data[i] = 0.7 * data[i-1] + np.random.randn()
|
|
85
|
+
>>> fig = plot_acf_pacf(data)
|
|
86
|
+
>>> fig.show() # Opens in browser
|
|
87
|
+
>>> # Or in dashboard:
|
|
88
|
+
>>> import streamlit as st
|
|
89
|
+
>>> st.plotly_chart(fig)
|
|
90
|
+
|
|
91
|
+
Notes
|
|
92
|
+
-----
|
|
93
|
+
The confidence bands are computed as ±z * sqrt(1/n) where z is the
|
|
94
|
+
critical value for the specified alpha level and n is the sample size.
|
|
95
|
+
This assumes the series is white noise under the null hypothesis.
|
|
96
|
+
|
|
97
|
+
For ACF, significant lags indicate autocorrelation that may violate
|
|
98
|
+
assumptions of many statistical tests.
|
|
99
|
+
|
|
100
|
+
For PACF, the number of significant lags helps identify AR order:
|
|
101
|
+
- PACF cuts off after lag p → AR(p) process
|
|
102
|
+
- ACF cuts off after lag q → MA(q) process
|
|
103
|
+
- Both decay gradually → ARMA process
|
|
104
|
+
|
|
105
|
+
See Also
|
|
106
|
+
--------
|
|
107
|
+
ml4t-diagnostic.evaluation.autocorrelation : Statistical autocorrelation tests
|
|
108
|
+
statsmodels.graphics.tsaplots : Alternative ACF/PACF plotting
|
|
109
|
+
|
|
110
|
+
References
|
|
111
|
+
----------
|
|
112
|
+
.. [1] Box, G. E. P., & Jenkins, G. M. (1976). Time Series Analysis:
|
|
113
|
+
Forecasting and Control.
|
|
114
|
+
"""
|
|
115
|
+
# Convert to numpy array if pandas Series
|
|
116
|
+
data_array: NDArray = data.to_numpy() if isinstance(data, pd.Series) else data
|
|
117
|
+
|
|
118
|
+
# Remove NaN values
|
|
119
|
+
data_array = data_array[~np.isnan(data_array)]
|
|
120
|
+
|
|
121
|
+
if len(data_array) == 0:
|
|
122
|
+
raise ValueError("Input data is empty after removing NaN values")
|
|
123
|
+
|
|
124
|
+
n = len(data_array)
|
|
125
|
+
if max_lags >= n:
|
|
126
|
+
max_lags = n - 1
|
|
127
|
+
|
|
128
|
+
# Compute ACF and PACF
|
|
129
|
+
acf_values = _compute_acf(data_array, max_lags)
|
|
130
|
+
pacf_values = _compute_pacf(data_array, max_lags)
|
|
131
|
+
|
|
132
|
+
# Compute confidence bands
|
|
133
|
+
z_crit = stats.norm.ppf(1 - alpha / 2)
|
|
134
|
+
conf_level = z_crit / np.sqrt(n)
|
|
135
|
+
|
|
136
|
+
# Create subplots
|
|
137
|
+
fig = make_subplots(
|
|
138
|
+
rows=1,
|
|
139
|
+
cols=2,
|
|
140
|
+
subplot_titles=(
|
|
141
|
+
f"ACF ({100 * (1 - alpha):.0f}% Confidence Band)",
|
|
142
|
+
f"PACF ({100 * (1 - alpha):.0f}% Confidence Band)",
|
|
143
|
+
),
|
|
144
|
+
horizontal_spacing=0.12,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
lags = np.arange(max_lags + 1)
|
|
148
|
+
|
|
149
|
+
# Plot ACF
|
|
150
|
+
fig.add_trace(
|
|
151
|
+
go.Bar(
|
|
152
|
+
x=lags,
|
|
153
|
+
y=acf_values,
|
|
154
|
+
marker_color=COLORS["primary"],
|
|
155
|
+
name="ACF",
|
|
156
|
+
hovertemplate="Lag: %{x}<br>ACF: %{y:.4f}<extra></extra>",
|
|
157
|
+
showlegend=False,
|
|
158
|
+
),
|
|
159
|
+
row=1,
|
|
160
|
+
col=1,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Add ACF confidence bands
|
|
164
|
+
fig.add_trace(
|
|
165
|
+
go.Scatter(
|
|
166
|
+
x=lags,
|
|
167
|
+
y=[conf_level] * len(lags),
|
|
168
|
+
mode="lines",
|
|
169
|
+
line={"color": COLORS["negative"], "dash": "dash", "width": 1},
|
|
170
|
+
name="Confidence Band",
|
|
171
|
+
showlegend=False,
|
|
172
|
+
hoverinfo="skip",
|
|
173
|
+
),
|
|
174
|
+
row=1,
|
|
175
|
+
col=1,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
fig.add_trace(
|
|
179
|
+
go.Scatter(
|
|
180
|
+
x=lags,
|
|
181
|
+
y=[-conf_level] * len(lags),
|
|
182
|
+
mode="lines",
|
|
183
|
+
line={"color": COLORS["negative"], "dash": "dash", "width": 1},
|
|
184
|
+
fill="tonexty",
|
|
185
|
+
fillcolor=COLORS["confidence"],
|
|
186
|
+
name="Confidence Band",
|
|
187
|
+
showlegend=False,
|
|
188
|
+
hoverinfo="skip",
|
|
189
|
+
),
|
|
190
|
+
row=1,
|
|
191
|
+
col=1,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Plot PACF
|
|
195
|
+
fig.add_trace(
|
|
196
|
+
go.Bar(
|
|
197
|
+
x=lags,
|
|
198
|
+
y=pacf_values,
|
|
199
|
+
marker_color=COLORS["secondary"],
|
|
200
|
+
name="PACF",
|
|
201
|
+
hovertemplate="Lag: %{x}<br>PACF: %{y:.4f}<extra></extra>",
|
|
202
|
+
showlegend=False,
|
|
203
|
+
),
|
|
204
|
+
row=1,
|
|
205
|
+
col=2,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Add PACF confidence bands
|
|
209
|
+
fig.add_trace(
|
|
210
|
+
go.Scatter(
|
|
211
|
+
x=lags,
|
|
212
|
+
y=[conf_level] * len(lags),
|
|
213
|
+
mode="lines",
|
|
214
|
+
line={"color": COLORS["negative"], "dash": "dash", "width": 1},
|
|
215
|
+
showlegend=False,
|
|
216
|
+
hoverinfo="skip",
|
|
217
|
+
),
|
|
218
|
+
row=1,
|
|
219
|
+
col=2,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
fig.add_trace(
|
|
223
|
+
go.Scatter(
|
|
224
|
+
x=lags,
|
|
225
|
+
y=[-conf_level] * len(lags),
|
|
226
|
+
mode="lines",
|
|
227
|
+
line={"color": COLORS["negative"], "dash": "dash", "width": 1},
|
|
228
|
+
fill="tonexty",
|
|
229
|
+
fillcolor=COLORS["confidence"],
|
|
230
|
+
showlegend=False,
|
|
231
|
+
hoverinfo="skip",
|
|
232
|
+
),
|
|
233
|
+
row=1,
|
|
234
|
+
col=2,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Add zero lines
|
|
238
|
+
fig.add_hline(y=0, line_color="black", line_width=0.5, row=1, col=1)
|
|
239
|
+
fig.add_hline(y=0, line_color="black", line_width=0.5, row=1, col=2)
|
|
240
|
+
|
|
241
|
+
# Update layout
|
|
242
|
+
fig.update_xaxes(title_text="Lag", row=1, col=1)
|
|
243
|
+
fig.update_xaxes(title_text="Lag", row=1, col=2)
|
|
244
|
+
fig.update_yaxes(title_text="Autocorrelation", row=1, col=1)
|
|
245
|
+
fig.update_yaxes(title_text="Partial Autocorrelation", row=1, col=2)
|
|
246
|
+
|
|
247
|
+
if title is None:
|
|
248
|
+
title = "ACF and PACF Analysis"
|
|
249
|
+
|
|
250
|
+
fig.update_layout(
|
|
251
|
+
title={"text": title, "x": 0.5, "xanchor": "center"},
|
|
252
|
+
height=height,
|
|
253
|
+
hovermode="x unified",
|
|
254
|
+
plot_bgcolor="white",
|
|
255
|
+
paper_bgcolor="white",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Grid styling
|
|
259
|
+
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
|
|
260
|
+
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
|
|
261
|
+
|
|
262
|
+
return fig
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def plot_qq(
|
|
266
|
+
data: "NDArray | pd.Series",
|
|
267
|
+
distribution: str = "norm",
|
|
268
|
+
title: str | None = None,
|
|
269
|
+
height: int = 500,
|
|
270
|
+
width: int = 500,
|
|
271
|
+
) -> go.Figure:
|
|
272
|
+
"""Create interactive QQ plot for assessing distributional assumptions.
|
|
273
|
+
|
|
274
|
+
A Quantile-Quantile (QQ) plot compares the quantiles of the data against
|
|
275
|
+
the quantiles of a theoretical distribution. Points falling along the
|
|
276
|
+
diagonal line indicate the data follows the theoretical distribution.
|
|
277
|
+
|
|
278
|
+
Interactive features: hover for exact values, zoom to focus on tails.
|
|
279
|
+
|
|
280
|
+
Deviations from the diagonal indicate departures from the assumed distribution:
|
|
281
|
+
- S-shaped curve: Heavy tails (fat-tailed distribution)
|
|
282
|
+
- Inverted S: Light tails (thin-tailed distribution)
|
|
283
|
+
- Points above/below line at extremes: Asymmetric tails
|
|
284
|
+
|
|
285
|
+
Parameters
|
|
286
|
+
----------
|
|
287
|
+
data : ndarray or pd.Series
|
|
288
|
+
Data to assess
|
|
289
|
+
distribution : str, default "norm"
|
|
290
|
+
Theoretical distribution to compare against.
|
|
291
|
+
Options: "norm" (normal), "t" (Student's t), "uniform"
|
|
292
|
+
title : str, optional
|
|
293
|
+
Plot title. If None, uses "QQ Plot vs {distribution}"
|
|
294
|
+
height : int, default 500
|
|
295
|
+
Figure height in pixels
|
|
296
|
+
width : int, default 500
|
|
297
|
+
Figure width in pixels
|
|
298
|
+
|
|
299
|
+
Returns
|
|
300
|
+
-------
|
|
301
|
+
go.Figure
|
|
302
|
+
Interactive Plotly figure with QQ plot
|
|
303
|
+
|
|
304
|
+
Examples
|
|
305
|
+
--------
|
|
306
|
+
>>> import numpy as np
|
|
307
|
+
>>> # Normal data
|
|
308
|
+
>>> data = np.random.randn(1000)
|
|
309
|
+
>>> fig = plot_qq(data)
|
|
310
|
+
>>> fig.show()
|
|
311
|
+
|
|
312
|
+
>>> # Heavy-tailed data
|
|
313
|
+
>>> data = np.random.standard_t(df=3, size=1000)
|
|
314
|
+
>>> fig = plot_qq(data, distribution='t')
|
|
315
|
+
>>> fig.show()
|
|
316
|
+
|
|
317
|
+
Notes
|
|
318
|
+
-----
|
|
319
|
+
The QQ plot is a graphical complement to normality tests like Jarque-Bera
|
|
320
|
+
or Shapiro-Wilk. It provides visual insight into *how* the data deviates
|
|
321
|
+
from normality, not just whether it does.
|
|
322
|
+
|
|
323
|
+
Common patterns:
|
|
324
|
+
- Normal: Points on diagonal
|
|
325
|
+
- Skewed: Curved pattern
|
|
326
|
+
- Heavy-tailed: Points diverge at extremes (S-curve)
|
|
327
|
+
- Light-tailed: Points converge at extremes (inverted S)
|
|
328
|
+
|
|
329
|
+
For financial returns, heavy tails (leptokurtosis) are common, so observing
|
|
330
|
+
departures at the extremes is typical.
|
|
331
|
+
|
|
332
|
+
See Also
|
|
333
|
+
--------
|
|
334
|
+
ml4t-diagnostic.evaluation.distribution : Distribution diagnostic tests
|
|
335
|
+
scipy.stats.probplot : Underlying QQ plot function
|
|
336
|
+
|
|
337
|
+
References
|
|
338
|
+
----------
|
|
339
|
+
.. [1] Wilk, M. B., & Gnanadesikan, R. (1968). "Probability plotting
|
|
340
|
+
methods for the analysis of data." Biometrika, 55(1), 1-17.
|
|
341
|
+
"""
|
|
342
|
+
# Convert to numpy array if pandas Series
|
|
343
|
+
if isinstance(data, pd.Series):
|
|
344
|
+
data = data.to_numpy()
|
|
345
|
+
|
|
346
|
+
# Remove NaN values
|
|
347
|
+
data = data[~np.isnan(data)]
|
|
348
|
+
|
|
349
|
+
if len(data) == 0:
|
|
350
|
+
raise ValueError("Input data is empty after removing NaN values")
|
|
351
|
+
|
|
352
|
+
# Generate QQ plot data based on distribution
|
|
353
|
+
if distribution == "norm":
|
|
354
|
+
(theoretical_q, sample_q), (slope, intercept, r) = stats.probplot(data, dist="norm")
|
|
355
|
+
dist_name = "Normal"
|
|
356
|
+
elif distribution == "t":
|
|
357
|
+
# Estimate degrees of freedom
|
|
358
|
+
params = stats.t.fit(data)
|
|
359
|
+
df = params[0]
|
|
360
|
+
(theoretical_q, sample_q), (slope, intercept, r) = stats.probplot(
|
|
361
|
+
data, dist=stats.t, sparams=(df,)
|
|
362
|
+
)
|
|
363
|
+
dist_name = f"Student's t (df={df:.1f})"
|
|
364
|
+
elif distribution == "uniform":
|
|
365
|
+
(theoretical_q, sample_q), (slope, intercept, r) = stats.probplot(data, dist="uniform")
|
|
366
|
+
dist_name = "Uniform"
|
|
367
|
+
else:
|
|
368
|
+
raise ValueError(f"Unknown distribution: {distribution}. Use 'norm', 't', or 'uniform'")
|
|
369
|
+
|
|
370
|
+
# Create figure
|
|
371
|
+
fig = go.Figure()
|
|
372
|
+
|
|
373
|
+
# Add sample points
|
|
374
|
+
fig.add_trace(
|
|
375
|
+
go.Scatter(
|
|
376
|
+
x=theoretical_q,
|
|
377
|
+
y=sample_q,
|
|
378
|
+
mode="markers",
|
|
379
|
+
marker={"color": COLORS["primary"], "size": 5, "opacity": 0.6},
|
|
380
|
+
name="Sample Data",
|
|
381
|
+
hovertemplate="Theoretical: %{x:.3f}<br>Sample: %{y:.3f}<extra></extra>",
|
|
382
|
+
)
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
# Add reference line
|
|
386
|
+
fitted_line = slope * theoretical_q + intercept
|
|
387
|
+
fig.add_trace(
|
|
388
|
+
go.Scatter(
|
|
389
|
+
x=theoretical_q,
|
|
390
|
+
y=fitted_line,
|
|
391
|
+
mode="lines",
|
|
392
|
+
line={"color": COLORS["negative"], "dash": "dash", "width": 2},
|
|
393
|
+
name="Reference Line",
|
|
394
|
+
hovertemplate="Theoretical: %{x:.3f}<br>Expected: %{y:.3f}<extra></extra>",
|
|
395
|
+
)
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# Update layout
|
|
399
|
+
if title is None:
|
|
400
|
+
title = f"QQ Plot vs {dist_name} Distribution"
|
|
401
|
+
|
|
402
|
+
fig.update_layout(
|
|
403
|
+
title={"text": title, "x": 0.5, "xanchor": "center"},
|
|
404
|
+
xaxis_title="Theoretical Quantiles",
|
|
405
|
+
yaxis_title="Sample Quantiles",
|
|
406
|
+
height=height,
|
|
407
|
+
width=width,
|
|
408
|
+
hovermode="closest",
|
|
409
|
+
plot_bgcolor="white",
|
|
410
|
+
paper_bgcolor="white",
|
|
411
|
+
showlegend=True,
|
|
412
|
+
legend={"x": 0.02, "y": 0.98, "bgcolor": "rgba(255,255,255,0.8)"},
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
# Grid styling
|
|
416
|
+
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
|
|
417
|
+
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
|
|
418
|
+
|
|
419
|
+
# Add annotation for interpretation
|
|
420
|
+
fig.add_annotation(
|
|
421
|
+
text=(
|
|
422
|
+
"Points on diagonal → data follows distribution<br>S-curve → heavy tails<br>Inverted S → light tails"
|
|
423
|
+
),
|
|
424
|
+
xref="paper",
|
|
425
|
+
yref="paper",
|
|
426
|
+
x=0.02,
|
|
427
|
+
y=0.10,
|
|
428
|
+
showarrow=False,
|
|
429
|
+
bgcolor="rgba(255, 248, 220, 0.8)",
|
|
430
|
+
bordercolor="gray",
|
|
431
|
+
borderwidth=1,
|
|
432
|
+
font={"size": 9},
|
|
433
|
+
align="left",
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
return fig
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def plot_volatility_clustering(
|
|
440
|
+
data: "NDArray | pd.Series",
|
|
441
|
+
window: int = 20,
|
|
442
|
+
title: str | None = None,
|
|
443
|
+
height: int = 800,
|
|
444
|
+
) -> go.Figure:
|
|
445
|
+
"""Create interactive volatility clustering visualization.
|
|
446
|
+
|
|
447
|
+
Volatility clustering is a common feature in financial time series where
|
|
448
|
+
large changes tend to be followed by large changes (of either sign), and
|
|
449
|
+
small changes tend to be followed by small changes.
|
|
450
|
+
|
|
451
|
+
This creates a 4-panel interactive figure showing:
|
|
452
|
+
1. Original returns series
|
|
453
|
+
2. Absolute returns (magnitude of changes)
|
|
454
|
+
3. Squared returns (volatility proxy)
|
|
455
|
+
4. Rolling volatility (rolling standard deviation)
|
|
456
|
+
|
|
457
|
+
Hover for exact values, zoom to focus on volatility episodes, linked x-axes.
|
|
458
|
+
|
|
459
|
+
Parameters
|
|
460
|
+
----------
|
|
461
|
+
data : ndarray or pd.Series
|
|
462
|
+
Time series data (typically returns)
|
|
463
|
+
window : int, default 20
|
|
464
|
+
Rolling window size for volatility calculation
|
|
465
|
+
title : str, optional
|
|
466
|
+
Figure title. If None, uses "Volatility Clustering Analysis"
|
|
467
|
+
height : int, default 800
|
|
468
|
+
Figure height in pixels
|
|
469
|
+
|
|
470
|
+
Returns
|
|
471
|
+
-------
|
|
472
|
+
go.Figure
|
|
473
|
+
Interactive Plotly figure with 4-panel volatility analysis
|
|
474
|
+
|
|
475
|
+
Examples
|
|
476
|
+
--------
|
|
477
|
+
>>> import numpy as np
|
|
478
|
+
>>> # GARCH-like data
|
|
479
|
+
>>> n = 1000
|
|
480
|
+
>>> returns = np.zeros(n)
|
|
481
|
+
>>> sigma = np.zeros(n)
|
|
482
|
+
>>> sigma[0] = 0.1
|
|
483
|
+
>>> for t in range(1, n):
|
|
484
|
+
... sigma[t] = np.sqrt(0.01 + 0.05 * returns[t-1]**2 + 0.9 * sigma[t-1]**2)
|
|
485
|
+
... returns[t] = sigma[t] * np.random.randn()
|
|
486
|
+
>>> fig = plot_volatility_clustering(returns)
|
|
487
|
+
>>> fig.show()
|
|
488
|
+
|
|
489
|
+
Notes
|
|
490
|
+
-----
|
|
491
|
+
Volatility clustering violates the constant variance (homoscedasticity)
|
|
492
|
+
assumption of many statistical models. If present, consider:
|
|
493
|
+
- GARCH models for volatility forecasting
|
|
494
|
+
- Robust standard errors in regressions
|
|
495
|
+
- Volatility-adjusted metrics
|
|
496
|
+
|
|
497
|
+
Visual signs of clustering:
|
|
498
|
+
- Periods of high/low volatility in returns plot
|
|
499
|
+
- Autocorrelation in squared returns (clustering persists)
|
|
500
|
+
- Time-varying rolling volatility
|
|
501
|
+
|
|
502
|
+
See Also
|
|
503
|
+
--------
|
|
504
|
+
ml4t-diagnostic.evaluation.volatility : ARCH/GARCH tests for volatility clustering
|
|
505
|
+
|
|
506
|
+
References
|
|
507
|
+
----------
|
|
508
|
+
.. [1] Engle, R. F. (1982). "Autoregressive Conditional Heteroscedasticity
|
|
509
|
+
with Estimates of the Variance of United Kingdom Inflation."
|
|
510
|
+
Econometrica, 50(4), 987-1007.
|
|
511
|
+
.. [2] Bollerslev, T. (1986). "Generalized autoregressive conditional
|
|
512
|
+
heteroskedasticity." Journal of Econometrics, 31(3), 307-327.
|
|
513
|
+
"""
|
|
514
|
+
# Convert to numpy array if pandas Series
|
|
515
|
+
original_index: pd.Index | None
|
|
516
|
+
if isinstance(data, pd.Series):
|
|
517
|
+
original_index = data.index
|
|
518
|
+
data_values = data.to_numpy()
|
|
519
|
+
else:
|
|
520
|
+
original_index = None
|
|
521
|
+
data_values = data
|
|
522
|
+
|
|
523
|
+
# Remove NaN values
|
|
524
|
+
valid_idx = ~np.isnan(data_values)
|
|
525
|
+
data_values = data_values[valid_idx]
|
|
526
|
+
|
|
527
|
+
if len(data_values) == 0:
|
|
528
|
+
raise ValueError("Input data is empty after removing NaN values")
|
|
529
|
+
|
|
530
|
+
# Create time index - either filtered original index or sequential integers
|
|
531
|
+
time_index: NDArray = (
|
|
532
|
+
original_index[valid_idx].to_numpy()
|
|
533
|
+
if original_index is not None
|
|
534
|
+
else np.arange(len(data_values))
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
# Compute volatility measures
|
|
538
|
+
abs_returns = np.abs(data_values)
|
|
539
|
+
squared_returns = data_values**2
|
|
540
|
+
rolling_vol = pd.Series(data_values).rolling(window=window, min_periods=1).std().values
|
|
541
|
+
|
|
542
|
+
# Create 4-panel figure
|
|
543
|
+
fig = make_subplots(
|
|
544
|
+
rows=4,
|
|
545
|
+
cols=1,
|
|
546
|
+
subplot_titles=(
|
|
547
|
+
"Returns Series",
|
|
548
|
+
"Absolute Returns (Magnitude)",
|
|
549
|
+
"Squared Returns (Volatility Proxy)",
|
|
550
|
+
f"Rolling Volatility (window={window})",
|
|
551
|
+
),
|
|
552
|
+
shared_xaxes=True,
|
|
553
|
+
vertical_spacing=0.06,
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
# 1. Original returns
|
|
557
|
+
fig.add_trace(
|
|
558
|
+
go.Scatter(
|
|
559
|
+
x=time_index,
|
|
560
|
+
y=data_values,
|
|
561
|
+
mode="lines",
|
|
562
|
+
line={"color": COLORS["primary"], "width": 0.8},
|
|
563
|
+
name="Returns",
|
|
564
|
+
hovertemplate="Time: %{x}<br>Return: %{y:.4f}<extra></extra>",
|
|
565
|
+
),
|
|
566
|
+
row=1,
|
|
567
|
+
col=1,
|
|
568
|
+
)
|
|
569
|
+
fig.add_hline(y=0, line_color="black", line_width=0.5, row=1, col=1)
|
|
570
|
+
|
|
571
|
+
# 2. Absolute returns
|
|
572
|
+
mean_abs = np.mean(abs_returns)
|
|
573
|
+
fig.add_trace(
|
|
574
|
+
go.Scatter(
|
|
575
|
+
x=time_index,
|
|
576
|
+
y=abs_returns,
|
|
577
|
+
mode="lines",
|
|
578
|
+
line={"color": COLORS["secondary"], "width": 0.8},
|
|
579
|
+
name="Absolute Returns",
|
|
580
|
+
hovertemplate="Time: %{x}<br>|Return|: %{y:.4f}<extra></extra>",
|
|
581
|
+
),
|
|
582
|
+
row=2,
|
|
583
|
+
col=1,
|
|
584
|
+
)
|
|
585
|
+
fig.add_hline(
|
|
586
|
+
y=mean_abs,
|
|
587
|
+
line_color=COLORS["negative"],
|
|
588
|
+
line_dash="dash",
|
|
589
|
+
line_width=1.5,
|
|
590
|
+
annotation_text=f"Mean: {mean_abs:.4f}",
|
|
591
|
+
annotation_position="right",
|
|
592
|
+
row=2,
|
|
593
|
+
col=1,
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
# 3. Squared returns
|
|
597
|
+
mean_sq = np.mean(squared_returns)
|
|
598
|
+
fig.add_trace(
|
|
599
|
+
go.Scatter(
|
|
600
|
+
x=time_index,
|
|
601
|
+
y=squared_returns,
|
|
602
|
+
mode="lines",
|
|
603
|
+
line={"color": COLORS["positive"], "width": 0.8},
|
|
604
|
+
name="Squared Returns",
|
|
605
|
+
hovertemplate="Time: %{x}<br>Return²: %{y:.6f}<extra></extra>",
|
|
606
|
+
),
|
|
607
|
+
row=3,
|
|
608
|
+
col=1,
|
|
609
|
+
)
|
|
610
|
+
fig.add_hline(
|
|
611
|
+
y=mean_sq,
|
|
612
|
+
line_color=COLORS["negative"],
|
|
613
|
+
line_dash="dash",
|
|
614
|
+
line_width=1.5,
|
|
615
|
+
annotation_text=f"Mean: {mean_sq:.6f}",
|
|
616
|
+
annotation_position="right",
|
|
617
|
+
row=3,
|
|
618
|
+
col=1,
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
# 4. Rolling volatility
|
|
622
|
+
fig.add_trace(
|
|
623
|
+
go.Scatter(
|
|
624
|
+
x=time_index,
|
|
625
|
+
y=rolling_vol,
|
|
626
|
+
mode="lines",
|
|
627
|
+
line={"color": COLORS["negative"], "width": 1.2},
|
|
628
|
+
fill="tozeroy",
|
|
629
|
+
fillcolor="rgba(255, 68, 68, 0.2)",
|
|
630
|
+
name="Rolling Volatility",
|
|
631
|
+
hovertemplate="Time: %{x}<br>Volatility: %{y:.4f}<extra></extra>",
|
|
632
|
+
),
|
|
633
|
+
row=4,
|
|
634
|
+
col=1,
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
# Update axes
|
|
638
|
+
fig.update_yaxes(title_text="Returns", row=1, col=1)
|
|
639
|
+
fig.update_yaxes(title_text="|Returns|", row=2, col=1)
|
|
640
|
+
fig.update_yaxes(title_text="Returns²", row=3, col=1)
|
|
641
|
+
fig.update_yaxes(title_text="Volatility", row=4, col=1)
|
|
642
|
+
fig.update_xaxes(title_text="Time", row=4, col=1)
|
|
643
|
+
|
|
644
|
+
# Update layout
|
|
645
|
+
if title is None:
|
|
646
|
+
title = "Volatility Clustering Analysis"
|
|
647
|
+
|
|
648
|
+
fig.update_layout(
|
|
649
|
+
title={"text": title, "x": 0.5, "xanchor": "center"},
|
|
650
|
+
height=height,
|
|
651
|
+
hovermode="x unified",
|
|
652
|
+
plot_bgcolor="white",
|
|
653
|
+
paper_bgcolor="white",
|
|
654
|
+
showlegend=False,
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
# Grid styling
|
|
658
|
+
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
|
|
659
|
+
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
|
|
660
|
+
|
|
661
|
+
return fig
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
def plot_distribution(
|
|
665
|
+
data: "NDArray | pd.Series",
|
|
666
|
+
bins: int = 50,
|
|
667
|
+
fit_normal: bool = True,
|
|
668
|
+
fit_t: bool = False,
|
|
669
|
+
show_moments: bool = True,
|
|
670
|
+
title: str | None = None,
|
|
671
|
+
height: int = 500,
|
|
672
|
+
) -> go.Figure:
|
|
673
|
+
"""Create interactive distribution histogram with fitted curves.
|
|
674
|
+
|
|
675
|
+
Visualizes the empirical distribution of data with:
|
|
676
|
+
- Interactive histogram of observed values
|
|
677
|
+
- Fitted normal distribution (optional)
|
|
678
|
+
- Fitted Student's t distribution (optional)
|
|
679
|
+
- Moment statistics annotation (mean, std, skewness, kurtosis)
|
|
680
|
+
|
|
681
|
+
Hover for bin details, toggle fitted distributions on/off.
|
|
682
|
+
|
|
683
|
+
Useful for assessing normality and identifying distributional characteristics
|
|
684
|
+
such as skewness and heavy tails.
|
|
685
|
+
|
|
686
|
+
Parameters
|
|
687
|
+
----------
|
|
688
|
+
data : ndarray or pd.Series
|
|
689
|
+
Data to plot
|
|
690
|
+
bins : int, default 50
|
|
691
|
+
Number of histogram bins
|
|
692
|
+
fit_normal : bool, default True
|
|
693
|
+
Whether to overlay fitted normal distribution
|
|
694
|
+
fit_t : bool, default False
|
|
695
|
+
Whether to overlay fitted Student's t distribution
|
|
696
|
+
show_moments : bool, default True
|
|
697
|
+
Whether to display moment statistics on plot
|
|
698
|
+
title : str, optional
|
|
699
|
+
Plot title. If None, uses "Distribution Analysis"
|
|
700
|
+
height : int, default 500
|
|
701
|
+
Figure height in pixels
|
|
702
|
+
|
|
703
|
+
Returns
|
|
704
|
+
-------
|
|
705
|
+
go.Figure
|
|
706
|
+
Interactive Plotly figure with distribution plot
|
|
707
|
+
|
|
708
|
+
Examples
|
|
709
|
+
--------
|
|
710
|
+
>>> import numpy as np
|
|
711
|
+
>>> # Normal data
|
|
712
|
+
>>> data = np.random.randn(1000)
|
|
713
|
+
>>> fig = plot_distribution(data)
|
|
714
|
+
>>> fig.show()
|
|
715
|
+
|
|
716
|
+
>>> # Heavy-tailed data
|
|
717
|
+
>>> data = np.random.standard_t(df=3, size=1000)
|
|
718
|
+
>>> fig = plot_distribution(data, fit_t=True)
|
|
719
|
+
>>> fig.show()
|
|
720
|
+
|
|
721
|
+
Notes
|
|
722
|
+
-----
|
|
723
|
+
Financial returns typically exhibit:
|
|
724
|
+
- Near-zero mean (if de-meaned)
|
|
725
|
+
- Positive excess kurtosis (heavy tails)
|
|
726
|
+
- Slight negative skewness (larger losses than gains)
|
|
727
|
+
|
|
728
|
+
The fitted distributions help identify:
|
|
729
|
+
- Normal: Good fit if kurtosis ≈ 3, skewness ≈ 0
|
|
730
|
+
- Student's t: Better fit for heavy tails (kurtosis > 3)
|
|
731
|
+
|
|
732
|
+
See Also
|
|
733
|
+
--------
|
|
734
|
+
ml4t-diagnostic.evaluation.distribution : Statistical distribution tests
|
|
735
|
+
plot_qq : QQ plot for normality assessment
|
|
736
|
+
|
|
737
|
+
References
|
|
738
|
+
----------
|
|
739
|
+
.. [1] Mandelbrot, B. (1963). "The variation of certain speculative prices."
|
|
740
|
+
Journal of Business, 36(4), 394-419.
|
|
741
|
+
.. [2] Fama, E. F. (1965). "The behavior of stock-market prices."
|
|
742
|
+
Journal of Business, 38(1), 34-105.
|
|
743
|
+
"""
|
|
744
|
+
# Convert to numpy array if pandas Series
|
|
745
|
+
if isinstance(data, pd.Series):
|
|
746
|
+
data = data.to_numpy()
|
|
747
|
+
|
|
748
|
+
# Remove NaN values
|
|
749
|
+
data = data[~np.isnan(data)]
|
|
750
|
+
|
|
751
|
+
if len(data) == 0:
|
|
752
|
+
raise ValueError("Input data is empty after removing NaN values")
|
|
753
|
+
|
|
754
|
+
# Compute moments
|
|
755
|
+
mean = np.mean(data)
|
|
756
|
+
std = np.std(data, ddof=1)
|
|
757
|
+
skewness = stats.skew(data)
|
|
758
|
+
kurtosis = stats.kurtosis(data, fisher=True) # Excess kurtosis
|
|
759
|
+
|
|
760
|
+
# Create figure
|
|
761
|
+
fig = go.Figure()
|
|
762
|
+
|
|
763
|
+
# Add histogram
|
|
764
|
+
fig.add_trace(
|
|
765
|
+
go.Histogram(
|
|
766
|
+
x=data,
|
|
767
|
+
nbinsx=bins,
|
|
768
|
+
histnorm="probability density",
|
|
769
|
+
marker={
|
|
770
|
+
"color": COLORS["primary"],
|
|
771
|
+
"opacity": 0.6,
|
|
772
|
+
"line": {"color": "black", "width": 0.5},
|
|
773
|
+
},
|
|
774
|
+
name="Empirical",
|
|
775
|
+
hovertemplate="Value: %{x:.4f}<br>Density: %{y:.4f}<extra></extra>",
|
|
776
|
+
)
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
# Generate x values for fitted distributions
|
|
780
|
+
x = np.linspace(data.min(), data.max(), 500)
|
|
781
|
+
|
|
782
|
+
# Fit and plot normal distribution
|
|
783
|
+
if fit_normal:
|
|
784
|
+
normal_pdf = stats.norm.pdf(x, mean, std)
|
|
785
|
+
fig.add_trace(
|
|
786
|
+
go.Scatter(
|
|
787
|
+
x=x,
|
|
788
|
+
y=normal_pdf,
|
|
789
|
+
mode="lines",
|
|
790
|
+
line={"color": COLORS["negative"], "width": 2},
|
|
791
|
+
name=f"Normal(μ={mean:.3f}, σ={std:.3f})",
|
|
792
|
+
hovertemplate="Value: %{x:.4f}<br>Density: %{y:.4f}<extra></extra>",
|
|
793
|
+
)
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
# Fit and plot Student's t distribution
|
|
797
|
+
if fit_t:
|
|
798
|
+
# Fit t distribution
|
|
799
|
+
params = stats.t.fit(data)
|
|
800
|
+
df, loc, scale = params
|
|
801
|
+
t_pdf = stats.t.pdf(x, df, loc, scale)
|
|
802
|
+
fig.add_trace(
|
|
803
|
+
go.Scatter(
|
|
804
|
+
x=x,
|
|
805
|
+
y=t_pdf,
|
|
806
|
+
mode="lines",
|
|
807
|
+
line={"color": COLORS["positive"], "width": 2, "dash": "dash"},
|
|
808
|
+
name=f"Student's t (df={df:.1f})",
|
|
809
|
+
hovertemplate="Value: %{x:.4f}<br>Density: %{y:.4f}<extra></extra>",
|
|
810
|
+
)
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
# Update layout
|
|
814
|
+
if title is None:
|
|
815
|
+
title = "Distribution Analysis"
|
|
816
|
+
|
|
817
|
+
fig.update_layout(
|
|
818
|
+
title={"text": title, "x": 0.5, "xanchor": "center"},
|
|
819
|
+
xaxis_title="Value",
|
|
820
|
+
yaxis_title="Density",
|
|
821
|
+
height=height,
|
|
822
|
+
hovermode="closest",
|
|
823
|
+
plot_bgcolor="white",
|
|
824
|
+
paper_bgcolor="white",
|
|
825
|
+
showlegend=True,
|
|
826
|
+
legend={"x": 0.98, "y": 0.98, "xanchor": "right", "bgcolor": "rgba(255,255,255,0.8)"},
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
# Grid styling
|
|
830
|
+
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
|
|
831
|
+
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
|
|
832
|
+
|
|
833
|
+
# Add moment statistics
|
|
834
|
+
if show_moments:
|
|
835
|
+
textstr = (
|
|
836
|
+
f"<b>Moments</b><br>"
|
|
837
|
+
f"Mean: {mean:.4f}<br>"
|
|
838
|
+
f"Std Dev: {std:.4f}<br>"
|
|
839
|
+
f"Skewness: {skewness:.4f}<br>"
|
|
840
|
+
f"Excess Kurtosis: {kurtosis:.4f}"
|
|
841
|
+
)
|
|
842
|
+
fig.add_annotation(
|
|
843
|
+
text=textstr,
|
|
844
|
+
xref="paper",
|
|
845
|
+
yref="paper",
|
|
846
|
+
x=0.02,
|
|
847
|
+
y=0.98,
|
|
848
|
+
showarrow=False,
|
|
849
|
+
bgcolor="rgba(255, 248, 220, 0.8)",
|
|
850
|
+
bordercolor="gray",
|
|
851
|
+
borderwidth=1,
|
|
852
|
+
font={"size": 10, "family": "monospace"},
|
|
853
|
+
align="left",
|
|
854
|
+
valign="top",
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
return fig
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
# Helper functions for ACF/PACF computation (unchanged from matplotlib version)
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
def _compute_acf(data: "NDArray", max_lags: int) -> "NDArray":
|
|
864
|
+
"""Compute autocorrelation function.
|
|
865
|
+
|
|
866
|
+
Parameters
|
|
867
|
+
----------
|
|
868
|
+
data : ndarray
|
|
869
|
+
Time series data
|
|
870
|
+
max_lags : int
|
|
871
|
+
Maximum number of lags
|
|
872
|
+
|
|
873
|
+
Returns
|
|
874
|
+
-------
|
|
875
|
+
ndarray
|
|
876
|
+
ACF values for lags 0 to max_lags
|
|
877
|
+
"""
|
|
878
|
+
data = data - np.mean(data)
|
|
879
|
+
c0 = np.dot(data, data) / len(data)
|
|
880
|
+
|
|
881
|
+
acf = np.zeros(max_lags + 1)
|
|
882
|
+
acf[0] = 1.0 # Correlation with self is 1
|
|
883
|
+
|
|
884
|
+
for k in range(1, max_lags + 1):
|
|
885
|
+
ck = np.dot(data[:-k], data[k:]) / len(data)
|
|
886
|
+
acf[k] = ck / c0
|
|
887
|
+
|
|
888
|
+
return acf
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
def _compute_pacf(data: "NDArray", max_lags: int) -> "NDArray":
|
|
892
|
+
"""Compute partial autocorrelation function using Durbin-Levinson recursion.
|
|
893
|
+
|
|
894
|
+
Parameters
|
|
895
|
+
----------
|
|
896
|
+
data : ndarray
|
|
897
|
+
Time series data
|
|
898
|
+
max_lags : int
|
|
899
|
+
Maximum number of lags
|
|
900
|
+
|
|
901
|
+
Returns
|
|
902
|
+
-------
|
|
903
|
+
ndarray
|
|
904
|
+
PACF values for lags 0 to max_lags
|
|
905
|
+
|
|
906
|
+
References
|
|
907
|
+
----------
|
|
908
|
+
.. [1] Durbin, J. (1960). "The fitting of time-series models."
|
|
909
|
+
Revue de l'Institut International de Statistique, 233-244.
|
|
910
|
+
"""
|
|
911
|
+
acf = _compute_acf(data, max_lags)
|
|
912
|
+
|
|
913
|
+
pacf = np.zeros(max_lags + 1)
|
|
914
|
+
pacf[0] = 1.0 # PACF at lag 0 is 1
|
|
915
|
+
|
|
916
|
+
if max_lags == 0:
|
|
917
|
+
return pacf
|
|
918
|
+
|
|
919
|
+
# Durbin-Levinson recursion
|
|
920
|
+
phi = np.zeros((max_lags + 1, max_lags + 1))
|
|
921
|
+
phi[1, 1] = acf[1]
|
|
922
|
+
pacf[1] = acf[1]
|
|
923
|
+
|
|
924
|
+
for k in range(2, max_lags + 1):
|
|
925
|
+
# Compute phi[k, k]
|
|
926
|
+
numerator = acf[k]
|
|
927
|
+
for j in range(1, k):
|
|
928
|
+
numerator -= phi[k - 1, j] * acf[k - j]
|
|
929
|
+
|
|
930
|
+
denominator = 1.0
|
|
931
|
+
for j in range(1, k):
|
|
932
|
+
denominator -= phi[k - 1, j] * acf[j]
|
|
933
|
+
|
|
934
|
+
phi[k, k] = numerator / denominator
|
|
935
|
+
pacf[k] = phi[k, k]
|
|
936
|
+
|
|
937
|
+
# Update phi[k, j] for j < k
|
|
938
|
+
for j in range(1, k):
|
|
939
|
+
phi[k, j] = phi[k - 1, j] - phi[k, k] * phi[k - 1, k - j]
|
|
940
|
+
|
|
941
|
+
return pacf
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
def export_static(fig: go.Figure, filename: str, format: str = "png", **kwargs) -> None:
|
|
945
|
+
"""Export Plotly figure as static image.
|
|
946
|
+
|
|
947
|
+
Converts interactive Plotly figure to static format (PNG, PDF, SVG) for
|
|
948
|
+
presentations, papers, or printable reports.
|
|
949
|
+
|
|
950
|
+
Requires kaleido package: `pip install kaleido`
|
|
951
|
+
|
|
952
|
+
Parameters
|
|
953
|
+
----------
|
|
954
|
+
fig : go.Figure
|
|
955
|
+
Plotly figure to export
|
|
956
|
+
filename : str
|
|
957
|
+
Output filename (without extension)
|
|
958
|
+
format : str, default "png"
|
|
959
|
+
Export format: "png", "pdf", "svg", "jpeg"
|
|
960
|
+
**kwargs
|
|
961
|
+
Additional arguments passed to fig.write_image()
|
|
962
|
+
Common options:
|
|
963
|
+
- width: int, image width in pixels
|
|
964
|
+
- height: int, image height in pixels
|
|
965
|
+
- scale: float, image scale factor
|
|
966
|
+
|
|
967
|
+
Examples
|
|
968
|
+
--------
|
|
969
|
+
>>> fig = plot_acf_pacf(data)
|
|
970
|
+
>>> export_static(fig, "acf_pacf_report", format="pdf", width=1200, height=400)
|
|
971
|
+
>>> # Creates: acf_pacf_report.pdf
|
|
972
|
+
|
|
973
|
+
Notes
|
|
974
|
+
-----
|
|
975
|
+
For best quality PDFs:
|
|
976
|
+
- Use format="pdf"
|
|
977
|
+
- Set scale=2 or higher
|
|
978
|
+
- Specify explicit width/height matching your document
|
|
979
|
+
|
|
980
|
+
For web use:
|
|
981
|
+
- Use format="png" or "svg"
|
|
982
|
+
- SVG is vector (scales infinitely) but larger file size
|
|
983
|
+
|
|
984
|
+
See Also
|
|
985
|
+
--------
|
|
986
|
+
plotly.graph_objects.Figure.write_image : Underlying export function
|
|
987
|
+
"""
|
|
988
|
+
try:
|
|
989
|
+
output_file = f"{filename}.{format}"
|
|
990
|
+
fig.write_image(output_file, format=format, **kwargs)
|
|
991
|
+
print(f"✓ Exported static image: {output_file}")
|
|
992
|
+
except Exception as e:
|
|
993
|
+
print(f"❌ Export failed: {e}")
|
|
994
|
+
print("Install kaleido for static export: pip install kaleido")
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def get_figure_data(fig: go.Figure) -> pd.DataFrame:
|
|
998
|
+
"""Extract underlying data from Plotly figure as DataFrame.
|
|
999
|
+
|
|
1000
|
+
Retrieves the numerical data used to create the visualization,
|
|
1001
|
+
enabling custom analysis or alternative visualizations.
|
|
1002
|
+
|
|
1003
|
+
Parameters
|
|
1004
|
+
----------
|
|
1005
|
+
fig : go.Figure
|
|
1006
|
+
Plotly figure
|
|
1007
|
+
|
|
1008
|
+
Returns
|
|
1009
|
+
-------
|
|
1010
|
+
pd.DataFrame
|
|
1011
|
+
Data from all traces in the figure
|
|
1012
|
+
|
|
1013
|
+
Examples
|
|
1014
|
+
--------
|
|
1015
|
+
>>> fig = plot_acf_pacf(data)
|
|
1016
|
+
>>> df = get_figure_data(fig)
|
|
1017
|
+
>>> print(df.columns)
|
|
1018
|
+
>>> # Custom analysis on ACF/PACF values
|
|
1019
|
+
>>> significant_lags = df[df['acf'].abs() > 0.1]
|
|
1020
|
+
|
|
1021
|
+
Notes
|
|
1022
|
+
-----
|
|
1023
|
+
The DataFrame structure depends on the plot type.
|
|
1024
|
+
Inspect df.columns to understand available data.
|
|
1025
|
+
"""
|
|
1026
|
+
data_dict = {}
|
|
1027
|
+
|
|
1028
|
+
for i, trace in enumerate(fig.data):
|
|
1029
|
+
trace_name = trace.name or f"trace_{i}"
|
|
1030
|
+
|
|
1031
|
+
if hasattr(trace, "x") and trace.x is not None:
|
|
1032
|
+
data_dict[f"{trace_name}_x"] = trace.x
|
|
1033
|
+
|
|
1034
|
+
if hasattr(trace, "y") and trace.y is not None:
|
|
1035
|
+
data_dict[f"{trace_name}_y"] = trace.y
|
|
1036
|
+
|
|
1037
|
+
return pd.DataFrame(data_dict)
|