ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1037 @@
1
+ """Interactive diagnostic visualizations for feature analysis.
2
+
3
+ This module provides Plotly-based interactive diagnostic plots for the Feature
4
+ Diagnostics framework (Module A), including:
5
+
6
+ - ACF/PACF plots with confidence bands
7
+ - QQ plots for normality assessment
8
+ - Volatility clustering visualizations
9
+ - Distribution analysis with fitted curves
10
+
11
+ All visualizations are interactive (zoom, hover, pan) and designed for
12
+ browser-based dashboards. Static exports (PNG, PDF) are available via
13
+ the export_static() function.
14
+
15
+ References
16
+ ----------
17
+ .. [1] Box, G. E. P., & Jenkins, G. M. (1976). Time Series Analysis: Forecasting and Control.
18
+ .. [2] Hamilton, J. D. (1994). Time Series Analysis. Princeton University Press.
19
+ .. [3] Tsay, R. S. (2005). Analysis of Financial Time Series. Wiley.
20
+ """
21
+
22
+ from typing import TYPE_CHECKING
23
+
24
+ import numpy as np
25
+ import pandas as pd
26
+ import plotly.graph_objects as go
27
+ from plotly.subplots import make_subplots
28
+ from scipy import stats
29
+
30
+ if TYPE_CHECKING:
31
+ from numpy.typing import NDArray
32
+
33
+ # Color scheme for financial diagnostics (consistent with viz.py)
34
+ COLORS = {
35
+ "primary": "#3366CC", # Blue
36
+ "secondary": "#FF9900", # Orange
37
+ "positive": "#00CC88", # Green
38
+ "negative": "#FF4444", # Red
39
+ "neutral": "#888888", # Gray
40
+ "confidence": "rgba(255, 68, 68, 0.2)", # Light red fill
41
+ }
42
+
43
+
44
+ def plot_acf_pacf(
45
+ data: "NDArray | pd.Series",
46
+ max_lags: int = 40,
47
+ alpha: float = 0.05,
48
+ title: str | None = None,
49
+ height: int = 400,
50
+ ) -> go.Figure:
51
+ """Create interactive ACF and PACF plots with confidence bands.
52
+
53
+ Creates a two-panel interactive figure showing:
54
+ 1. Autocorrelation Function (ACF) - correlation with lagged values
55
+ 2. Partial Autocorrelation Function (PACF) - correlation controlling for intermediate lags
56
+
57
+ Includes confidence bands based on the specified significance level (alpha).
58
+ Hover over bars to see exact values. Zoom and pan for detailed exploration.
59
+
60
+ Parameters
61
+ ----------
62
+ data : ndarray or pd.Series
63
+ Time series data to analyze
64
+ max_lags : int, default 40
65
+ Maximum number of lags to display
66
+ alpha : float, default 0.05
67
+ Significance level for confidence bands (default: 95% confidence)
68
+ title : str, optional
69
+ Figure title. If None, uses "ACF and PACF Analysis"
70
+ height : int, default 400
71
+ Figure height in pixels
72
+
73
+ Returns
74
+ -------
75
+ go.Figure
76
+ Interactive Plotly figure with ACF and PACF plots
77
+
78
+ Examples
79
+ --------
80
+ >>> import numpy as np
81
+ >>> # AR(1) process
82
+ >>> data = np.random.randn(1000)
83
+ >>> for i in range(1, len(data)):
84
+ ... data[i] = 0.7 * data[i-1] + np.random.randn()
85
+ >>> fig = plot_acf_pacf(data)
86
+ >>> fig.show() # Opens in browser
87
+ >>> # Or in dashboard:
88
+ >>> import streamlit as st
89
+ >>> st.plotly_chart(fig)
90
+
91
+ Notes
92
+ -----
93
+ The confidence bands are computed as ±z * sqrt(1/n) where z is the
94
+ critical value for the specified alpha level and n is the sample size.
95
+ This assumes the series is white noise under the null hypothesis.
96
+
97
+ For ACF, significant lags indicate autocorrelation that may violate
98
+ assumptions of many statistical tests.
99
+
100
+ For PACF, the number of significant lags helps identify AR order:
101
+ - PACF cuts off after lag p → AR(p) process
102
+ - ACF cuts off after lag q → MA(q) process
103
+ - Both decay gradually → ARMA process
104
+
105
+ See Also
106
+ --------
107
+ ml4t-diagnostic.evaluation.autocorrelation : Statistical autocorrelation tests
108
+ statsmodels.graphics.tsaplots : Alternative ACF/PACF plotting
109
+
110
+ References
111
+ ----------
112
+ .. [1] Box, G. E. P., & Jenkins, G. M. (1976). Time Series Analysis:
113
+ Forecasting and Control.
114
+ """
115
+ # Convert to numpy array if pandas Series
116
+ data_array: NDArray = data.to_numpy() if isinstance(data, pd.Series) else data
117
+
118
+ # Remove NaN values
119
+ data_array = data_array[~np.isnan(data_array)]
120
+
121
+ if len(data_array) == 0:
122
+ raise ValueError("Input data is empty after removing NaN values")
123
+
124
+ n = len(data_array)
125
+ if max_lags >= n:
126
+ max_lags = n - 1
127
+
128
+ # Compute ACF and PACF
129
+ acf_values = _compute_acf(data_array, max_lags)
130
+ pacf_values = _compute_pacf(data_array, max_lags)
131
+
132
+ # Compute confidence bands
133
+ z_crit = stats.norm.ppf(1 - alpha / 2)
134
+ conf_level = z_crit / np.sqrt(n)
135
+
136
+ # Create subplots
137
+ fig = make_subplots(
138
+ rows=1,
139
+ cols=2,
140
+ subplot_titles=(
141
+ f"ACF ({100 * (1 - alpha):.0f}% Confidence Band)",
142
+ f"PACF ({100 * (1 - alpha):.0f}% Confidence Band)",
143
+ ),
144
+ horizontal_spacing=0.12,
145
+ )
146
+
147
+ lags = np.arange(max_lags + 1)
148
+
149
+ # Plot ACF
150
+ fig.add_trace(
151
+ go.Bar(
152
+ x=lags,
153
+ y=acf_values,
154
+ marker_color=COLORS["primary"],
155
+ name="ACF",
156
+ hovertemplate="Lag: %{x}<br>ACF: %{y:.4f}<extra></extra>",
157
+ showlegend=False,
158
+ ),
159
+ row=1,
160
+ col=1,
161
+ )
162
+
163
+ # Add ACF confidence bands
164
+ fig.add_trace(
165
+ go.Scatter(
166
+ x=lags,
167
+ y=[conf_level] * len(lags),
168
+ mode="lines",
169
+ line={"color": COLORS["negative"], "dash": "dash", "width": 1},
170
+ name="Confidence Band",
171
+ showlegend=False,
172
+ hoverinfo="skip",
173
+ ),
174
+ row=1,
175
+ col=1,
176
+ )
177
+
178
+ fig.add_trace(
179
+ go.Scatter(
180
+ x=lags,
181
+ y=[-conf_level] * len(lags),
182
+ mode="lines",
183
+ line={"color": COLORS["negative"], "dash": "dash", "width": 1},
184
+ fill="tonexty",
185
+ fillcolor=COLORS["confidence"],
186
+ name="Confidence Band",
187
+ showlegend=False,
188
+ hoverinfo="skip",
189
+ ),
190
+ row=1,
191
+ col=1,
192
+ )
193
+
194
+ # Plot PACF
195
+ fig.add_trace(
196
+ go.Bar(
197
+ x=lags,
198
+ y=pacf_values,
199
+ marker_color=COLORS["secondary"],
200
+ name="PACF",
201
+ hovertemplate="Lag: %{x}<br>PACF: %{y:.4f}<extra></extra>",
202
+ showlegend=False,
203
+ ),
204
+ row=1,
205
+ col=2,
206
+ )
207
+
208
+ # Add PACF confidence bands
209
+ fig.add_trace(
210
+ go.Scatter(
211
+ x=lags,
212
+ y=[conf_level] * len(lags),
213
+ mode="lines",
214
+ line={"color": COLORS["negative"], "dash": "dash", "width": 1},
215
+ showlegend=False,
216
+ hoverinfo="skip",
217
+ ),
218
+ row=1,
219
+ col=2,
220
+ )
221
+
222
+ fig.add_trace(
223
+ go.Scatter(
224
+ x=lags,
225
+ y=[-conf_level] * len(lags),
226
+ mode="lines",
227
+ line={"color": COLORS["negative"], "dash": "dash", "width": 1},
228
+ fill="tonexty",
229
+ fillcolor=COLORS["confidence"],
230
+ showlegend=False,
231
+ hoverinfo="skip",
232
+ ),
233
+ row=1,
234
+ col=2,
235
+ )
236
+
237
+ # Add zero lines
238
+ fig.add_hline(y=0, line_color="black", line_width=0.5, row=1, col=1)
239
+ fig.add_hline(y=0, line_color="black", line_width=0.5, row=1, col=2)
240
+
241
+ # Update layout
242
+ fig.update_xaxes(title_text="Lag", row=1, col=1)
243
+ fig.update_xaxes(title_text="Lag", row=1, col=2)
244
+ fig.update_yaxes(title_text="Autocorrelation", row=1, col=1)
245
+ fig.update_yaxes(title_text="Partial Autocorrelation", row=1, col=2)
246
+
247
+ if title is None:
248
+ title = "ACF and PACF Analysis"
249
+
250
+ fig.update_layout(
251
+ title={"text": title, "x": 0.5, "xanchor": "center"},
252
+ height=height,
253
+ hovermode="x unified",
254
+ plot_bgcolor="white",
255
+ paper_bgcolor="white",
256
+ )
257
+
258
+ # Grid styling
259
+ fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
260
+ fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
261
+
262
+ return fig
263
+
264
+
265
+ def plot_qq(
266
+ data: "NDArray | pd.Series",
267
+ distribution: str = "norm",
268
+ title: str | None = None,
269
+ height: int = 500,
270
+ width: int = 500,
271
+ ) -> go.Figure:
272
+ """Create interactive QQ plot for assessing distributional assumptions.
273
+
274
+ A Quantile-Quantile (QQ) plot compares the quantiles of the data against
275
+ the quantiles of a theoretical distribution. Points falling along the
276
+ diagonal line indicate the data follows the theoretical distribution.
277
+
278
+ Interactive features: hover for exact values, zoom to focus on tails.
279
+
280
+ Deviations from the diagonal indicate departures from the assumed distribution:
281
+ - S-shaped curve: Heavy tails (fat-tailed distribution)
282
+ - Inverted S: Light tails (thin-tailed distribution)
283
+ - Points above/below line at extremes: Asymmetric tails
284
+
285
+ Parameters
286
+ ----------
287
+ data : ndarray or pd.Series
288
+ Data to assess
289
+ distribution : str, default "norm"
290
+ Theoretical distribution to compare against.
291
+ Options: "norm" (normal), "t" (Student's t), "uniform"
292
+ title : str, optional
293
+ Plot title. If None, uses "QQ Plot vs {distribution}"
294
+ height : int, default 500
295
+ Figure height in pixels
296
+ width : int, default 500
297
+ Figure width in pixels
298
+
299
+ Returns
300
+ -------
301
+ go.Figure
302
+ Interactive Plotly figure with QQ plot
303
+
304
+ Examples
305
+ --------
306
+ >>> import numpy as np
307
+ >>> # Normal data
308
+ >>> data = np.random.randn(1000)
309
+ >>> fig = plot_qq(data)
310
+ >>> fig.show()
311
+
312
+ >>> # Heavy-tailed data
313
+ >>> data = np.random.standard_t(df=3, size=1000)
314
+ >>> fig = plot_qq(data, distribution='t')
315
+ >>> fig.show()
316
+
317
+ Notes
318
+ -----
319
+ The QQ plot is a graphical complement to normality tests like Jarque-Bera
320
+ or Shapiro-Wilk. It provides visual insight into *how* the data deviates
321
+ from normality, not just whether it does.
322
+
323
+ Common patterns:
324
+ - Normal: Points on diagonal
325
+ - Skewed: Curved pattern
326
+ - Heavy-tailed: Points diverge at extremes (S-curve)
327
+ - Light-tailed: Points converge at extremes (inverted S)
328
+
329
+ For financial returns, heavy tails (leptokurtosis) are common, so observing
330
+ departures at the extremes is typical.
331
+
332
+ See Also
333
+ --------
334
+ ml4t-diagnostic.evaluation.distribution : Distribution diagnostic tests
335
+ scipy.stats.probplot : Underlying QQ plot function
336
+
337
+ References
338
+ ----------
339
+ .. [1] Wilk, M. B., & Gnanadesikan, R. (1968). "Probability plotting
340
+ methods for the analysis of data." Biometrika, 55(1), 1-17.
341
+ """
342
+ # Convert to numpy array if pandas Series
343
+ if isinstance(data, pd.Series):
344
+ data = data.to_numpy()
345
+
346
+ # Remove NaN values
347
+ data = data[~np.isnan(data)]
348
+
349
+ if len(data) == 0:
350
+ raise ValueError("Input data is empty after removing NaN values")
351
+
352
+ # Generate QQ plot data based on distribution
353
+ if distribution == "norm":
354
+ (theoretical_q, sample_q), (slope, intercept, r) = stats.probplot(data, dist="norm")
355
+ dist_name = "Normal"
356
+ elif distribution == "t":
357
+ # Estimate degrees of freedom
358
+ params = stats.t.fit(data)
359
+ df = params[0]
360
+ (theoretical_q, sample_q), (slope, intercept, r) = stats.probplot(
361
+ data, dist=stats.t, sparams=(df,)
362
+ )
363
+ dist_name = f"Student's t (df={df:.1f})"
364
+ elif distribution == "uniform":
365
+ (theoretical_q, sample_q), (slope, intercept, r) = stats.probplot(data, dist="uniform")
366
+ dist_name = "Uniform"
367
+ else:
368
+ raise ValueError(f"Unknown distribution: {distribution}. Use 'norm', 't', or 'uniform'")
369
+
370
+ # Create figure
371
+ fig = go.Figure()
372
+
373
+ # Add sample points
374
+ fig.add_trace(
375
+ go.Scatter(
376
+ x=theoretical_q,
377
+ y=sample_q,
378
+ mode="markers",
379
+ marker={"color": COLORS["primary"], "size": 5, "opacity": 0.6},
380
+ name="Sample Data",
381
+ hovertemplate="Theoretical: %{x:.3f}<br>Sample: %{y:.3f}<extra></extra>",
382
+ )
383
+ )
384
+
385
+ # Add reference line
386
+ fitted_line = slope * theoretical_q + intercept
387
+ fig.add_trace(
388
+ go.Scatter(
389
+ x=theoretical_q,
390
+ y=fitted_line,
391
+ mode="lines",
392
+ line={"color": COLORS["negative"], "dash": "dash", "width": 2},
393
+ name="Reference Line",
394
+ hovertemplate="Theoretical: %{x:.3f}<br>Expected: %{y:.3f}<extra></extra>",
395
+ )
396
+ )
397
+
398
+ # Update layout
399
+ if title is None:
400
+ title = f"QQ Plot vs {dist_name} Distribution"
401
+
402
+ fig.update_layout(
403
+ title={"text": title, "x": 0.5, "xanchor": "center"},
404
+ xaxis_title="Theoretical Quantiles",
405
+ yaxis_title="Sample Quantiles",
406
+ height=height,
407
+ width=width,
408
+ hovermode="closest",
409
+ plot_bgcolor="white",
410
+ paper_bgcolor="white",
411
+ showlegend=True,
412
+ legend={"x": 0.02, "y": 0.98, "bgcolor": "rgba(255,255,255,0.8)"},
413
+ )
414
+
415
+ # Grid styling
416
+ fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
417
+ fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
418
+
419
+ # Add annotation for interpretation
420
+ fig.add_annotation(
421
+ text=(
422
+ "Points on diagonal → data follows distribution<br>S-curve → heavy tails<br>Inverted S → light tails"
423
+ ),
424
+ xref="paper",
425
+ yref="paper",
426
+ x=0.02,
427
+ y=0.10,
428
+ showarrow=False,
429
+ bgcolor="rgba(255, 248, 220, 0.8)",
430
+ bordercolor="gray",
431
+ borderwidth=1,
432
+ font={"size": 9},
433
+ align="left",
434
+ )
435
+
436
+ return fig
437
+
438
+
439
+ def plot_volatility_clustering(
440
+ data: "NDArray | pd.Series",
441
+ window: int = 20,
442
+ title: str | None = None,
443
+ height: int = 800,
444
+ ) -> go.Figure:
445
+ """Create interactive volatility clustering visualization.
446
+
447
+ Volatility clustering is a common feature in financial time series where
448
+ large changes tend to be followed by large changes (of either sign), and
449
+ small changes tend to be followed by small changes.
450
+
451
+ This creates a 4-panel interactive figure showing:
452
+ 1. Original returns series
453
+ 2. Absolute returns (magnitude of changes)
454
+ 3. Squared returns (volatility proxy)
455
+ 4. Rolling volatility (rolling standard deviation)
456
+
457
+ Hover for exact values, zoom to focus on volatility episodes, linked x-axes.
458
+
459
+ Parameters
460
+ ----------
461
+ data : ndarray or pd.Series
462
+ Time series data (typically returns)
463
+ window : int, default 20
464
+ Rolling window size for volatility calculation
465
+ title : str, optional
466
+ Figure title. If None, uses "Volatility Clustering Analysis"
467
+ height : int, default 800
468
+ Figure height in pixels
469
+
470
+ Returns
471
+ -------
472
+ go.Figure
473
+ Interactive Plotly figure with 4-panel volatility analysis
474
+
475
+ Examples
476
+ --------
477
+ >>> import numpy as np
478
+ >>> # GARCH-like data
479
+ >>> n = 1000
480
+ >>> returns = np.zeros(n)
481
+ >>> sigma = np.zeros(n)
482
+ >>> sigma[0] = 0.1
483
+ >>> for t in range(1, n):
484
+ ... sigma[t] = np.sqrt(0.01 + 0.05 * returns[t-1]**2 + 0.9 * sigma[t-1]**2)
485
+ ... returns[t] = sigma[t] * np.random.randn()
486
+ >>> fig = plot_volatility_clustering(returns)
487
+ >>> fig.show()
488
+
489
+ Notes
490
+ -----
491
+ Volatility clustering violates the constant variance (homoscedasticity)
492
+ assumption of many statistical models. If present, consider:
493
+ - GARCH models for volatility forecasting
494
+ - Robust standard errors in regressions
495
+ - Volatility-adjusted metrics
496
+
497
+ Visual signs of clustering:
498
+ - Periods of high/low volatility in returns plot
499
+ - Autocorrelation in squared returns (clustering persists)
500
+ - Time-varying rolling volatility
501
+
502
+ See Also
503
+ --------
504
+ ml4t-diagnostic.evaluation.volatility : ARCH/GARCH tests for volatility clustering
505
+
506
+ References
507
+ ----------
508
+ .. [1] Engle, R. F. (1982). "Autoregressive Conditional Heteroscedasticity
509
+ with Estimates of the Variance of United Kingdom Inflation."
510
+ Econometrica, 50(4), 987-1007.
511
+ .. [2] Bollerslev, T. (1986). "Generalized autoregressive conditional
512
+ heteroskedasticity." Journal of Econometrics, 31(3), 307-327.
513
+ """
514
+ # Convert to numpy array if pandas Series
515
+ original_index: pd.Index | None
516
+ if isinstance(data, pd.Series):
517
+ original_index = data.index
518
+ data_values = data.to_numpy()
519
+ else:
520
+ original_index = None
521
+ data_values = data
522
+
523
+ # Remove NaN values
524
+ valid_idx = ~np.isnan(data_values)
525
+ data_values = data_values[valid_idx]
526
+
527
+ if len(data_values) == 0:
528
+ raise ValueError("Input data is empty after removing NaN values")
529
+
530
+ # Create time index - either filtered original index or sequential integers
531
+ time_index: NDArray = (
532
+ original_index[valid_idx].to_numpy()
533
+ if original_index is not None
534
+ else np.arange(len(data_values))
535
+ )
536
+
537
+ # Compute volatility measures
538
+ abs_returns = np.abs(data_values)
539
+ squared_returns = data_values**2
540
+ rolling_vol = pd.Series(data_values).rolling(window=window, min_periods=1).std().values
541
+
542
+ # Create 4-panel figure
543
+ fig = make_subplots(
544
+ rows=4,
545
+ cols=1,
546
+ subplot_titles=(
547
+ "Returns Series",
548
+ "Absolute Returns (Magnitude)",
549
+ "Squared Returns (Volatility Proxy)",
550
+ f"Rolling Volatility (window={window})",
551
+ ),
552
+ shared_xaxes=True,
553
+ vertical_spacing=0.06,
554
+ )
555
+
556
+ # 1. Original returns
557
+ fig.add_trace(
558
+ go.Scatter(
559
+ x=time_index,
560
+ y=data_values,
561
+ mode="lines",
562
+ line={"color": COLORS["primary"], "width": 0.8},
563
+ name="Returns",
564
+ hovertemplate="Time: %{x}<br>Return: %{y:.4f}<extra></extra>",
565
+ ),
566
+ row=1,
567
+ col=1,
568
+ )
569
+ fig.add_hline(y=0, line_color="black", line_width=0.5, row=1, col=1)
570
+
571
+ # 2. Absolute returns
572
+ mean_abs = np.mean(abs_returns)
573
+ fig.add_trace(
574
+ go.Scatter(
575
+ x=time_index,
576
+ y=abs_returns,
577
+ mode="lines",
578
+ line={"color": COLORS["secondary"], "width": 0.8},
579
+ name="Absolute Returns",
580
+ hovertemplate="Time: %{x}<br>|Return|: %{y:.4f}<extra></extra>",
581
+ ),
582
+ row=2,
583
+ col=1,
584
+ )
585
+ fig.add_hline(
586
+ y=mean_abs,
587
+ line_color=COLORS["negative"],
588
+ line_dash="dash",
589
+ line_width=1.5,
590
+ annotation_text=f"Mean: {mean_abs:.4f}",
591
+ annotation_position="right",
592
+ row=2,
593
+ col=1,
594
+ )
595
+
596
+ # 3. Squared returns
597
+ mean_sq = np.mean(squared_returns)
598
+ fig.add_trace(
599
+ go.Scatter(
600
+ x=time_index,
601
+ y=squared_returns,
602
+ mode="lines",
603
+ line={"color": COLORS["positive"], "width": 0.8},
604
+ name="Squared Returns",
605
+ hovertemplate="Time: %{x}<br>Return²: %{y:.6f}<extra></extra>",
606
+ ),
607
+ row=3,
608
+ col=1,
609
+ )
610
+ fig.add_hline(
611
+ y=mean_sq,
612
+ line_color=COLORS["negative"],
613
+ line_dash="dash",
614
+ line_width=1.5,
615
+ annotation_text=f"Mean: {mean_sq:.6f}",
616
+ annotation_position="right",
617
+ row=3,
618
+ col=1,
619
+ )
620
+
621
+ # 4. Rolling volatility
622
+ fig.add_trace(
623
+ go.Scatter(
624
+ x=time_index,
625
+ y=rolling_vol,
626
+ mode="lines",
627
+ line={"color": COLORS["negative"], "width": 1.2},
628
+ fill="tozeroy",
629
+ fillcolor="rgba(255, 68, 68, 0.2)",
630
+ name="Rolling Volatility",
631
+ hovertemplate="Time: %{x}<br>Volatility: %{y:.4f}<extra></extra>",
632
+ ),
633
+ row=4,
634
+ col=1,
635
+ )
636
+
637
+ # Update axes
638
+ fig.update_yaxes(title_text="Returns", row=1, col=1)
639
+ fig.update_yaxes(title_text="|Returns|", row=2, col=1)
640
+ fig.update_yaxes(title_text="Returns²", row=3, col=1)
641
+ fig.update_yaxes(title_text="Volatility", row=4, col=1)
642
+ fig.update_xaxes(title_text="Time", row=4, col=1)
643
+
644
+ # Update layout
645
+ if title is None:
646
+ title = "Volatility Clustering Analysis"
647
+
648
+ fig.update_layout(
649
+ title={"text": title, "x": 0.5, "xanchor": "center"},
650
+ height=height,
651
+ hovermode="x unified",
652
+ plot_bgcolor="white",
653
+ paper_bgcolor="white",
654
+ showlegend=False,
655
+ )
656
+
657
+ # Grid styling
658
+ fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
659
+ fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
660
+
661
+ return fig
662
+
663
+
664
+ def plot_distribution(
665
+ data: "NDArray | pd.Series",
666
+ bins: int = 50,
667
+ fit_normal: bool = True,
668
+ fit_t: bool = False,
669
+ show_moments: bool = True,
670
+ title: str | None = None,
671
+ height: int = 500,
672
+ ) -> go.Figure:
673
+ """Create interactive distribution histogram with fitted curves.
674
+
675
+ Visualizes the empirical distribution of data with:
676
+ - Interactive histogram of observed values
677
+ - Fitted normal distribution (optional)
678
+ - Fitted Student's t distribution (optional)
679
+ - Moment statistics annotation (mean, std, skewness, kurtosis)
680
+
681
+ Hover for bin details, toggle fitted distributions on/off.
682
+
683
+ Useful for assessing normality and identifying distributional characteristics
684
+ such as skewness and heavy tails.
685
+
686
+ Parameters
687
+ ----------
688
+ data : ndarray or pd.Series
689
+ Data to plot
690
+ bins : int, default 50
691
+ Number of histogram bins
692
+ fit_normal : bool, default True
693
+ Whether to overlay fitted normal distribution
694
+ fit_t : bool, default False
695
+ Whether to overlay fitted Student's t distribution
696
+ show_moments : bool, default True
697
+ Whether to display moment statistics on plot
698
+ title : str, optional
699
+ Plot title. If None, uses "Distribution Analysis"
700
+ height : int, default 500
701
+ Figure height in pixels
702
+
703
+ Returns
704
+ -------
705
+ go.Figure
706
+ Interactive Plotly figure with distribution plot
707
+
708
+ Examples
709
+ --------
710
+ >>> import numpy as np
711
+ >>> # Normal data
712
+ >>> data = np.random.randn(1000)
713
+ >>> fig = plot_distribution(data)
714
+ >>> fig.show()
715
+
716
+ >>> # Heavy-tailed data
717
+ >>> data = np.random.standard_t(df=3, size=1000)
718
+ >>> fig = plot_distribution(data, fit_t=True)
719
+ >>> fig.show()
720
+
721
+ Notes
722
+ -----
723
+ Financial returns typically exhibit:
724
+ - Near-zero mean (if de-meaned)
725
+ - Positive excess kurtosis (heavy tails)
726
+ - Slight negative skewness (larger losses than gains)
727
+
728
+ The fitted distributions help identify:
729
+ - Normal: Good fit if kurtosis ≈ 3, skewness ≈ 0
730
+ - Student's t: Better fit for heavy tails (kurtosis > 3)
731
+
732
+ See Also
733
+ --------
734
+ ml4t-diagnostic.evaluation.distribution : Statistical distribution tests
735
+ plot_qq : QQ plot for normality assessment
736
+
737
+ References
738
+ ----------
739
+ .. [1] Mandelbrot, B. (1963). "The variation of certain speculative prices."
740
+ Journal of Business, 36(4), 394-419.
741
+ .. [2] Fama, E. F. (1965). "The behavior of stock-market prices."
742
+ Journal of Business, 38(1), 34-105.
743
+ """
744
+ # Convert to numpy array if pandas Series
745
+ if isinstance(data, pd.Series):
746
+ data = data.to_numpy()
747
+
748
+ # Remove NaN values
749
+ data = data[~np.isnan(data)]
750
+
751
+ if len(data) == 0:
752
+ raise ValueError("Input data is empty after removing NaN values")
753
+
754
+ # Compute moments
755
+ mean = np.mean(data)
756
+ std = np.std(data, ddof=1)
757
+ skewness = stats.skew(data)
758
+ kurtosis = stats.kurtosis(data, fisher=True) # Excess kurtosis
759
+
760
+ # Create figure
761
+ fig = go.Figure()
762
+
763
+ # Add histogram
764
+ fig.add_trace(
765
+ go.Histogram(
766
+ x=data,
767
+ nbinsx=bins,
768
+ histnorm="probability density",
769
+ marker={
770
+ "color": COLORS["primary"],
771
+ "opacity": 0.6,
772
+ "line": {"color": "black", "width": 0.5},
773
+ },
774
+ name="Empirical",
775
+ hovertemplate="Value: %{x:.4f}<br>Density: %{y:.4f}<extra></extra>",
776
+ )
777
+ )
778
+
779
+ # Generate x values for fitted distributions
780
+ x = np.linspace(data.min(), data.max(), 500)
781
+
782
+ # Fit and plot normal distribution
783
+ if fit_normal:
784
+ normal_pdf = stats.norm.pdf(x, mean, std)
785
+ fig.add_trace(
786
+ go.Scatter(
787
+ x=x,
788
+ y=normal_pdf,
789
+ mode="lines",
790
+ line={"color": COLORS["negative"], "width": 2},
791
+ name=f"Normal(μ={mean:.3f}, σ={std:.3f})",
792
+ hovertemplate="Value: %{x:.4f}<br>Density: %{y:.4f}<extra></extra>",
793
+ )
794
+ )
795
+
796
+ # Fit and plot Student's t distribution
797
+ if fit_t:
798
+ # Fit t distribution
799
+ params = stats.t.fit(data)
800
+ df, loc, scale = params
801
+ t_pdf = stats.t.pdf(x, df, loc, scale)
802
+ fig.add_trace(
803
+ go.Scatter(
804
+ x=x,
805
+ y=t_pdf,
806
+ mode="lines",
807
+ line={"color": COLORS["positive"], "width": 2, "dash": "dash"},
808
+ name=f"Student's t (df={df:.1f})",
809
+ hovertemplate="Value: %{x:.4f}<br>Density: %{y:.4f}<extra></extra>",
810
+ )
811
+ )
812
+
813
+ # Update layout
814
+ if title is None:
815
+ title = "Distribution Analysis"
816
+
817
+ fig.update_layout(
818
+ title={"text": title, "x": 0.5, "xanchor": "center"},
819
+ xaxis_title="Value",
820
+ yaxis_title="Density",
821
+ height=height,
822
+ hovermode="closest",
823
+ plot_bgcolor="white",
824
+ paper_bgcolor="white",
825
+ showlegend=True,
826
+ legend={"x": 0.98, "y": 0.98, "xanchor": "right", "bgcolor": "rgba(255,255,255,0.8)"},
827
+ )
828
+
829
+ # Grid styling
830
+ fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
831
+ fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
832
+
833
+ # Add moment statistics
834
+ if show_moments:
835
+ textstr = (
836
+ f"<b>Moments</b><br>"
837
+ f"Mean: {mean:.4f}<br>"
838
+ f"Std Dev: {std:.4f}<br>"
839
+ f"Skewness: {skewness:.4f}<br>"
840
+ f"Excess Kurtosis: {kurtosis:.4f}"
841
+ )
842
+ fig.add_annotation(
843
+ text=textstr,
844
+ xref="paper",
845
+ yref="paper",
846
+ x=0.02,
847
+ y=0.98,
848
+ showarrow=False,
849
+ bgcolor="rgba(255, 248, 220, 0.8)",
850
+ bordercolor="gray",
851
+ borderwidth=1,
852
+ font={"size": 10, "family": "monospace"},
853
+ align="left",
854
+ valign="top",
855
+ )
856
+
857
+ return fig
858
+
859
+
860
+ # Helper functions for ACF/PACF computation (unchanged from matplotlib version)
861
+
862
+
863
+ def _compute_acf(data: "NDArray", max_lags: int) -> "NDArray":
864
+ """Compute autocorrelation function.
865
+
866
+ Parameters
867
+ ----------
868
+ data : ndarray
869
+ Time series data
870
+ max_lags : int
871
+ Maximum number of lags
872
+
873
+ Returns
874
+ -------
875
+ ndarray
876
+ ACF values for lags 0 to max_lags
877
+ """
878
+ data = data - np.mean(data)
879
+ c0 = np.dot(data, data) / len(data)
880
+
881
+ acf = np.zeros(max_lags + 1)
882
+ acf[0] = 1.0 # Correlation with self is 1
883
+
884
+ for k in range(1, max_lags + 1):
885
+ ck = np.dot(data[:-k], data[k:]) / len(data)
886
+ acf[k] = ck / c0
887
+
888
+ return acf
889
+
890
+
891
+ def _compute_pacf(data: "NDArray", max_lags: int) -> "NDArray":
892
+ """Compute partial autocorrelation function using Durbin-Levinson recursion.
893
+
894
+ Parameters
895
+ ----------
896
+ data : ndarray
897
+ Time series data
898
+ max_lags : int
899
+ Maximum number of lags
900
+
901
+ Returns
902
+ -------
903
+ ndarray
904
+ PACF values for lags 0 to max_lags
905
+
906
+ References
907
+ ----------
908
+ .. [1] Durbin, J. (1960). "The fitting of time-series models."
909
+ Revue de l'Institut International de Statistique, 233-244.
910
+ """
911
+ acf = _compute_acf(data, max_lags)
912
+
913
+ pacf = np.zeros(max_lags + 1)
914
+ pacf[0] = 1.0 # PACF at lag 0 is 1
915
+
916
+ if max_lags == 0:
917
+ return pacf
918
+
919
+ # Durbin-Levinson recursion
920
+ phi = np.zeros((max_lags + 1, max_lags + 1))
921
+ phi[1, 1] = acf[1]
922
+ pacf[1] = acf[1]
923
+
924
+ for k in range(2, max_lags + 1):
925
+ # Compute phi[k, k]
926
+ numerator = acf[k]
927
+ for j in range(1, k):
928
+ numerator -= phi[k - 1, j] * acf[k - j]
929
+
930
+ denominator = 1.0
931
+ for j in range(1, k):
932
+ denominator -= phi[k - 1, j] * acf[j]
933
+
934
+ phi[k, k] = numerator / denominator
935
+ pacf[k] = phi[k, k]
936
+
937
+ # Update phi[k, j] for j < k
938
+ for j in range(1, k):
939
+ phi[k, j] = phi[k - 1, j] - phi[k, k] * phi[k - 1, k - j]
940
+
941
+ return pacf
942
+
943
+
944
+ def export_static(fig: go.Figure, filename: str, format: str = "png", **kwargs) -> None:
945
+ """Export Plotly figure as static image.
946
+
947
+ Converts interactive Plotly figure to static format (PNG, PDF, SVG) for
948
+ presentations, papers, or printable reports.
949
+
950
+ Requires kaleido package: `pip install kaleido`
951
+
952
+ Parameters
953
+ ----------
954
+ fig : go.Figure
955
+ Plotly figure to export
956
+ filename : str
957
+ Output filename (without extension)
958
+ format : str, default "png"
959
+ Export format: "png", "pdf", "svg", "jpeg"
960
+ **kwargs
961
+ Additional arguments passed to fig.write_image()
962
+ Common options:
963
+ - width: int, image width in pixels
964
+ - height: int, image height in pixels
965
+ - scale: float, image scale factor
966
+
967
+ Examples
968
+ --------
969
+ >>> fig = plot_acf_pacf(data)
970
+ >>> export_static(fig, "acf_pacf_report", format="pdf", width=1200, height=400)
971
+ >>> # Creates: acf_pacf_report.pdf
972
+
973
+ Notes
974
+ -----
975
+ For best quality PDFs:
976
+ - Use format="pdf"
977
+ - Set scale=2 or higher
978
+ - Specify explicit width/height matching your document
979
+
980
+ For web use:
981
+ - Use format="png" or "svg"
982
+ - SVG is vector (scales infinitely) but larger file size
983
+
984
+ See Also
985
+ --------
986
+ plotly.graph_objects.Figure.write_image : Underlying export function
987
+ """
988
+ try:
989
+ output_file = f"{filename}.{format}"
990
+ fig.write_image(output_file, format=format, **kwargs)
991
+ print(f"✓ Exported static image: {output_file}")
992
+ except Exception as e:
993
+ print(f"❌ Export failed: {e}")
994
+ print("Install kaleido for static export: pip install kaleido")
995
+
996
+
997
+ def get_figure_data(fig: go.Figure) -> pd.DataFrame:
998
+ """Extract underlying data from Plotly figure as DataFrame.
999
+
1000
+ Retrieves the numerical data used to create the visualization,
1001
+ enabling custom analysis or alternative visualizations.
1002
+
1003
+ Parameters
1004
+ ----------
1005
+ fig : go.Figure
1006
+ Plotly figure
1007
+
1008
+ Returns
1009
+ -------
1010
+ pd.DataFrame
1011
+ Data from all traces in the figure
1012
+
1013
+ Examples
1014
+ --------
1015
+ >>> fig = plot_acf_pacf(data)
1016
+ >>> df = get_figure_data(fig)
1017
+ >>> print(df.columns)
1018
+ >>> # Custom analysis on ACF/PACF values
1019
+ >>> significant_lags = df[df['acf'].abs() > 0.1]
1020
+
1021
+ Notes
1022
+ -----
1023
+ The DataFrame structure depends on the plot type.
1024
+ Inspect df.columns to understand available data.
1025
+ """
1026
+ data_dict = {}
1027
+
1028
+ for i, trace in enumerate(fig.data):
1029
+ trace_name = trace.name or f"trace_{i}"
1030
+
1031
+ if hasattr(trace, "x") and trace.x is not None:
1032
+ data_dict[f"{trace_name}_x"] = trace.x
1033
+
1034
+ if hasattr(trace, "y") and trace.y is not None:
1035
+ data_dict[f"{trace_name}_y"] = trace.y
1036
+
1037
+ return pd.DataFrame(data_dict)