ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
"""Distribution moments analysis with significance tests.
|
|
2
|
+
|
|
3
|
+
This module provides moment computation (skewness and kurtosis) with statistical
|
|
4
|
+
significance testing for financial returns analysis.
|
|
5
|
+
|
|
6
|
+
Moment Interpretation:
|
|
7
|
+
- Skewness = 0: Symmetric distribution (normal)
|
|
8
|
+
- Skewness > 0: Right-skewed (long right tail)
|
|
9
|
+
- Skewness < 0: Left-skewed (long left tail, common for equity returns)
|
|
10
|
+
- Excess Kurtosis = 0: Normal tail thickness
|
|
11
|
+
- Excess Kurtosis > 0: Fat tails (leptokurtic, more extreme events)
|
|
12
|
+
- Excess Kurtosis < 0: Thin tails (platykurtic, fewer extreme events)
|
|
13
|
+
|
|
14
|
+
References:
|
|
15
|
+
- D'Agostino, R. B., & Pearson, E. S. (1973). Tests for departure from
|
|
16
|
+
normality. Biometrika, 60(3), 613-622. (Standard errors for moments)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
import pandas as pd
|
|
25
|
+
from scipy import stats
|
|
26
|
+
|
|
27
|
+
from ml4t.diagnostic.errors import ComputationError, ValidationError
|
|
28
|
+
from ml4t.diagnostic.logging import get_logger
|
|
29
|
+
|
|
30
|
+
logger = get_logger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class MomentsResult:
|
|
35
|
+
"""Distribution moments (skewness and excess kurtosis) with significance tests.
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
mean: Sample mean
|
|
39
|
+
std: Sample standard deviation
|
|
40
|
+
skewness: Sample skewness (third standardized moment)
|
|
41
|
+
skewness_se: Standard error of skewness
|
|
42
|
+
excess_kurtosis: Sample excess kurtosis (Fisher: normal=0)
|
|
43
|
+
excess_kurtosis_se: Standard error of excess kurtosis
|
|
44
|
+
skewness_significant: Whether skewness is significantly different from 0
|
|
45
|
+
excess_kurtosis_significant: Whether excess kurtosis is significantly different from 0
|
|
46
|
+
n_obs: Number of observations
|
|
47
|
+
alpha: Significance level used for tests
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
mean: float
|
|
51
|
+
std: float
|
|
52
|
+
skewness: float
|
|
53
|
+
skewness_se: float
|
|
54
|
+
excess_kurtosis: float
|
|
55
|
+
excess_kurtosis_se: float
|
|
56
|
+
skewness_significant: bool
|
|
57
|
+
excess_kurtosis_significant: bool
|
|
58
|
+
n_obs: int
|
|
59
|
+
alpha: float = 0.05
|
|
60
|
+
|
|
61
|
+
def __repr__(self) -> str:
|
|
62
|
+
"""String representation."""
|
|
63
|
+
return f"MomentsResult(skewness={self.skewness:.4f}, excess_kurtosis={self.excess_kurtosis:.4f}, n={self.n_obs})"
|
|
64
|
+
|
|
65
|
+
def summary(self) -> str:
|
|
66
|
+
"""Human-readable summary of moment analysis.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Formatted summary string
|
|
70
|
+
"""
|
|
71
|
+
lines = [
|
|
72
|
+
"Distribution Moments Analysis",
|
|
73
|
+
"=" * 50,
|
|
74
|
+
f"Observations: {self.n_obs}",
|
|
75
|
+
f"Mean: {self.mean:.6f}",
|
|
76
|
+
f"Std Dev: {self.std:.6f}",
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
lines.append("")
|
|
80
|
+
lines.append("Skewness:")
|
|
81
|
+
lines.append(f" Value: {self.skewness:.4f}")
|
|
82
|
+
lines.append(f" Std Error: {self.skewness_se:.4f}")
|
|
83
|
+
lines.append(f" Z-score: {self.skewness / self.skewness_se:.4f}")
|
|
84
|
+
lines.append(
|
|
85
|
+
f" Significant: {'Yes' if self.skewness_significant else 'No'} (α={self.alpha})"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
if abs(self.skewness) < 0.1:
|
|
89
|
+
interpretation = "approximately symmetric"
|
|
90
|
+
elif self.skewness > 0:
|
|
91
|
+
interpretation = "right-skewed (positive, long right tail)"
|
|
92
|
+
else:
|
|
93
|
+
interpretation = "left-skewed (negative, long left tail)"
|
|
94
|
+
lines.append(f" Interpretation: {interpretation}")
|
|
95
|
+
|
|
96
|
+
lines.append("")
|
|
97
|
+
lines.append("Excess Kurtosis:")
|
|
98
|
+
lines.append(f" Value: {self.excess_kurtosis:.4f}")
|
|
99
|
+
lines.append(f" Std Error: {self.excess_kurtosis_se:.4f}")
|
|
100
|
+
lines.append(f" Z-score: {self.excess_kurtosis / self.excess_kurtosis_se:.4f}")
|
|
101
|
+
lines.append(
|
|
102
|
+
f" Significant: {'Yes' if self.excess_kurtosis_significant else 'No'} (α={self.alpha})"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if abs(self.excess_kurtosis) < 0.1:
|
|
106
|
+
interpretation = "approximately normal (mesokurtic)"
|
|
107
|
+
elif self.excess_kurtosis > 0:
|
|
108
|
+
interpretation = "fat tails (leptokurtic, more extreme events)"
|
|
109
|
+
else:
|
|
110
|
+
interpretation = "thin tails (platykurtic, fewer extreme events)"
|
|
111
|
+
lines.append(f" Interpretation: {interpretation}")
|
|
112
|
+
|
|
113
|
+
lines.append("")
|
|
114
|
+
lines.append("Implications:")
|
|
115
|
+
if self.skewness_significant or self.excess_kurtosis_significant:
|
|
116
|
+
lines.append(" - Distribution deviates significantly from normality")
|
|
117
|
+
if self.skewness_significant:
|
|
118
|
+
lines.append(" - Skewness affects mean-variance optimization")
|
|
119
|
+
lines.append(" - Asymmetric risk profiles require adjusted risk measures")
|
|
120
|
+
if self.excess_kurtosis_significant:
|
|
121
|
+
lines.append(" - Fat tails increase probability of extreme events")
|
|
122
|
+
lines.append(" - VaR/CVaR estimates may underestimate tail risk")
|
|
123
|
+
else:
|
|
124
|
+
lines.append(" - Moments consistent with normal distribution")
|
|
125
|
+
lines.append(" - Classical statistical methods appropriate")
|
|
126
|
+
|
|
127
|
+
return "\n".join(lines)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def compute_moments(
|
|
131
|
+
data: pd.Series | np.ndarray,
|
|
132
|
+
test_significance: bool = True,
|
|
133
|
+
alpha: float = 0.05,
|
|
134
|
+
) -> MomentsResult:
|
|
135
|
+
"""Compute distribution moments with significance tests.
|
|
136
|
+
|
|
137
|
+
Calculates sample skewness and excess kurtosis with standard errors.
|
|
138
|
+
Optionally tests whether moments are significantly different from zero
|
|
139
|
+
(normal distribution values) using z-tests.
|
|
140
|
+
|
|
141
|
+
Standard Errors:
|
|
142
|
+
- SE(skewness) = sqrt(6/n)
|
|
143
|
+
- SE(kurtosis) = sqrt(24/n)
|
|
144
|
+
|
|
145
|
+
Significance Test:
|
|
146
|
+
- H0: moment = 0 (consistent with normal distribution)
|
|
147
|
+
- Reject if |moment| > 2 * SE (approximately α=0.05, two-tailed)
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
data: Time series data (1D array or Series)
|
|
151
|
+
test_significance: Whether to test significance (default True)
|
|
152
|
+
alpha: Significance level for tests (default 0.05)
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
MomentsResult with moments and significance tests
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
ValidationError: If data is invalid (empty, wrong shape, etc.)
|
|
159
|
+
ComputationError: If computation fails
|
|
160
|
+
|
|
161
|
+
Example:
|
|
162
|
+
>>> import numpy as np
|
|
163
|
+
>>> # Normal data (skewness ≈ 0, excess_kurtosis ≈ 0)
|
|
164
|
+
>>> normal = np.random.normal(0, 1, 1000)
|
|
165
|
+
>>> result = compute_moments(normal)
|
|
166
|
+
>>> print(result.summary())
|
|
167
|
+
>>>
|
|
168
|
+
>>> # Lognormal data (skewed, fat-tailed)
|
|
169
|
+
>>> lognormal = np.random.lognormal(0, 0.5, 1000)
|
|
170
|
+
>>> result = compute_moments(lognormal)
|
|
171
|
+
>>> print(f"Skewness significant: {result.skewness_significant}")
|
|
172
|
+
>>> print(f"Excess kurtosis significant: {result.excess_kurtosis_significant}")
|
|
173
|
+
|
|
174
|
+
Notes:
|
|
175
|
+
- Skewness = 0 for symmetric distributions (normal)
|
|
176
|
+
- Excess kurtosis = 0 for normal distribution (Fisher convention)
|
|
177
|
+
- Financial returns typically show negative skew, positive excess kurtosis
|
|
178
|
+
- Large samples (n > 1000) recommended for reliable inference
|
|
179
|
+
"""
|
|
180
|
+
# Input validation
|
|
181
|
+
if data is None:
|
|
182
|
+
raise ValidationError("Data cannot be None", context={"function": "compute_moments"})
|
|
183
|
+
|
|
184
|
+
# Convert to numpy array
|
|
185
|
+
if isinstance(data, pd.Series):
|
|
186
|
+
arr = data.to_numpy()
|
|
187
|
+
logger.debug("Converted pandas Series to numpy array", shape=arr.shape)
|
|
188
|
+
elif isinstance(data, np.ndarray):
|
|
189
|
+
arr = data
|
|
190
|
+
else:
|
|
191
|
+
raise ValidationError(
|
|
192
|
+
f"Data must be pandas Series or numpy array, got {type(data)}",
|
|
193
|
+
context={"function": "compute_moments", "data_type": type(data).__name__},
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Check array properties
|
|
197
|
+
if arr.ndim != 1:
|
|
198
|
+
raise ValidationError(
|
|
199
|
+
f"Data must be 1-dimensional, got {arr.ndim}D",
|
|
200
|
+
context={"function": "compute_moments", "shape": arr.shape},
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if len(arr) == 0:
|
|
204
|
+
raise ValidationError(
|
|
205
|
+
"Data cannot be empty", context={"function": "compute_moments", "length": 0}
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Check for missing values
|
|
209
|
+
if np.any(np.isnan(arr)):
|
|
210
|
+
n_missing = np.sum(np.isnan(arr))
|
|
211
|
+
raise ValidationError(
|
|
212
|
+
f"Data contains {n_missing} missing values (NaN)",
|
|
213
|
+
context={"function": "compute_moments", "n_missing": n_missing, "length": len(arr)},
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Check for infinite values
|
|
217
|
+
if np.any(np.isinf(arr)):
|
|
218
|
+
n_inf = np.sum(np.isinf(arr))
|
|
219
|
+
raise ValidationError(
|
|
220
|
+
f"Data contains {n_inf} infinite values",
|
|
221
|
+
context={"function": "compute_moments", "n_inf": n_inf, "length": len(arr)},
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Check minimum length
|
|
225
|
+
min_length = 20 # Need reasonable sample size for moments
|
|
226
|
+
if len(arr) < min_length:
|
|
227
|
+
raise ValidationError(
|
|
228
|
+
f"Insufficient data for moment computation (need at least {min_length} observations)",
|
|
229
|
+
context={"function": "compute_moments", "length": len(arr), "min_length": min_length},
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Check for constant series
|
|
233
|
+
if np.std(arr) == 0:
|
|
234
|
+
raise ValidationError(
|
|
235
|
+
"Data is constant (zero variance)",
|
|
236
|
+
context={
|
|
237
|
+
"function": "compute_moments",
|
|
238
|
+
"length": len(arr),
|
|
239
|
+
"mean": float(np.mean(arr)),
|
|
240
|
+
},
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
logger.info("Computing distribution moments", n_obs=len(arr))
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
# Compute basic statistics
|
|
247
|
+
mean = float(np.mean(arr))
|
|
248
|
+
std = float(np.std(arr, ddof=1)) # Sample std (n-1 denominator)
|
|
249
|
+
|
|
250
|
+
# Compute skewness and excess kurtosis using scipy
|
|
251
|
+
skewness = float(stats.skew(arr, bias=False)) # Sample skewness (bias=False)
|
|
252
|
+
excess_kurtosis = float(
|
|
253
|
+
stats.kurtosis(arr, bias=False)
|
|
254
|
+
) # Excess kurtosis (Fisher: normal=0)
|
|
255
|
+
|
|
256
|
+
# Compute standard errors
|
|
257
|
+
n = len(arr)
|
|
258
|
+
skewness_se = float(np.sqrt(6 / n))
|
|
259
|
+
excess_kurtosis_se = float(np.sqrt(24 / n))
|
|
260
|
+
|
|
261
|
+
# Test significance if requested
|
|
262
|
+
if test_significance:
|
|
263
|
+
# Significance test: |moment| > critical_value * SE
|
|
264
|
+
# For α=0.05 (two-tailed), critical value ≈ 1.96 ≈ 2
|
|
265
|
+
critical_value = stats.norm.ppf(1 - alpha / 2)
|
|
266
|
+
skewness_significant = abs(skewness) > critical_value * skewness_se
|
|
267
|
+
excess_kurtosis_significant = abs(excess_kurtosis) > critical_value * excess_kurtosis_se
|
|
268
|
+
else:
|
|
269
|
+
skewness_significant = False
|
|
270
|
+
excess_kurtosis_significant = False
|
|
271
|
+
|
|
272
|
+
logger.info(
|
|
273
|
+
"Moments computed",
|
|
274
|
+
skewness=skewness,
|
|
275
|
+
excess_kurtosis=excess_kurtosis,
|
|
276
|
+
skewness_sig=skewness_significant,
|
|
277
|
+
excess_kurtosis_sig=excess_kurtosis_significant,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
return MomentsResult(
|
|
281
|
+
mean=mean,
|
|
282
|
+
std=std,
|
|
283
|
+
skewness=skewness,
|
|
284
|
+
skewness_se=skewness_se,
|
|
285
|
+
excess_kurtosis=excess_kurtosis,
|
|
286
|
+
excess_kurtosis_se=excess_kurtosis_se,
|
|
287
|
+
skewness_significant=skewness_significant,
|
|
288
|
+
excess_kurtosis_significant=excess_kurtosis_significant,
|
|
289
|
+
n_obs=n,
|
|
290
|
+
alpha=alpha,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
except Exception as e:
|
|
294
|
+
logger.error("Moment computation failed", error=str(e), n_obs=len(arr))
|
|
295
|
+
raise ComputationError( # noqa: B904
|
|
296
|
+
f"Moment computation failed: {e}",
|
|
297
|
+
context={"function": "compute_moments", "n_obs": len(arr)},
|
|
298
|
+
cause=e,
|
|
299
|
+
)
|