aigroup-econ-mcp 1.3.3__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .gitignore +253 -0
- PKG-INFO +710 -0
- README.md +672 -0
- __init__.py +14 -0
- aigroup_econ_mcp-1.4.3.dist-info/METADATA +710 -0
- aigroup_econ_mcp-1.4.3.dist-info/RECORD +92 -0
- aigroup_econ_mcp-1.4.3.dist-info/entry_points.txt +2 -0
- aigroup_econ_mcp-1.4.3.dist-info/licenses/LICENSE +21 -0
- cli.py +28 -0
- econometrics/README.md +18 -0
- econometrics/__init__.py +191 -0
- econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +0 -0
- econometrics/basic_parametric_estimation/__init__.py +31 -0
- econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
- econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
- econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
- econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
- econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
- econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
- econometrics/causal_inference/causal_identification_strategy/__init__.py +0 -0
- econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
- econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
- econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
- econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
- econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
- econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
- econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
- econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
- econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
- econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
- econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
- econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
- econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
- econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +0 -0
- econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
- econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
- econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
- econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
- econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
- econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
- econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
- econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
- econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
- econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
- econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
- econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
- econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
- econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
- econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
- econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
- econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
- econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
- econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
- econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
- econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
- econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
- econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
- econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
- econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
- econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
- econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
- econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
- econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
- prompts/__init__.py +0 -0
- prompts/analysis_guides.py +43 -0
- pyproject.toml +78 -0
- resources/MCP_MASTER_GUIDE.md +422 -0
- resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
- resources/__init__.py +0 -0
- server.py +83 -0
- tools/README.md +88 -0
- tools/__init__.py +45 -0
- tools/data_loader.py +213 -0
- tools/decorators.py +38 -0
- tools/econometrics_adapter.py +286 -0
- tools/mcp_tool_groups/__init__.py +1 -0
- tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
- tools/mcp_tool_groups/model_specification_tools.py +402 -0
- tools/mcp_tool_groups/time_series_tools.py +494 -0
- tools/mcp_tools_registry.py +114 -0
- tools/model_specification_adapter.py +369 -0
- tools/output_formatter.py +563 -0
- tools/time_series_panel_data_adapter.py +858 -0
- tools/time_series_panel_data_tools.py +65 -0
- aigroup_econ_mcp/__init__.py +0 -19
- aigroup_econ_mcp/cli.py +0 -82
- aigroup_econ_mcp/config.py +0 -561
- aigroup_econ_mcp/server.py +0 -452
- aigroup_econ_mcp/tools/__init__.py +0 -19
- aigroup_econ_mcp/tools/base.py +0 -470
- aigroup_econ_mcp/tools/cache.py +0 -533
- aigroup_econ_mcp/tools/data_loader.py +0 -195
- aigroup_econ_mcp/tools/file_parser.py +0 -1027
- aigroup_econ_mcp/tools/machine_learning.py +0 -60
- aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
- aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
- aigroup_econ_mcp/tools/ml_models.py +0 -54
- aigroup_econ_mcp/tools/ml_regularization.py +0 -186
- aigroup_econ_mcp/tools/monitoring.py +0 -555
- aigroup_econ_mcp/tools/optimized_example.py +0 -229
- aigroup_econ_mcp/tools/panel_data.py +0 -619
- aigroup_econ_mcp/tools/regression.py +0 -214
- aigroup_econ_mcp/tools/statistics.py +0 -154
- aigroup_econ_mcp/tools/time_series.py +0 -698
- aigroup_econ_mcp/tools/timeout.py +0 -283
- aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
- aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
- aigroup_econ_mcp/tools/tool_registry.py +0 -478
- aigroup_econ_mcp/tools/validation.py +0 -482
- aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
- aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
- aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
- /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
- {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-1.4.3.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""
|
|
2
|
+
时变参数模型实现(门限模型/TAR、STAR模型、马尔科夫转换模型)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TimeVaryingParameterResult(BaseModel):
|
|
10
|
+
"""时变参数模型结果"""
|
|
11
|
+
model_type: str = Field(..., description="模型类型")
|
|
12
|
+
regimes: int = Field(..., description="机制数量")
|
|
13
|
+
coefficients: List[float] = Field(..., description="回归系数")
|
|
14
|
+
std_errors: Optional[List[float]] = Field(None, description="系数标准误")
|
|
15
|
+
t_values: Optional[List[float]] = Field(None, description="t统计量")
|
|
16
|
+
p_values: Optional[List[float]] = Field(None, description="p值")
|
|
17
|
+
transition_function: Optional[str] = Field(None, description="转换函数")
|
|
18
|
+
threshold_values: Optional[List[float]] = Field(None, description="门限值")
|
|
19
|
+
transition_coefficients: Optional[List[float]] = Field(None, description="转换系数")
|
|
20
|
+
log_likelihood: Optional[float] = Field(None, description="对数似然值")
|
|
21
|
+
aic: Optional[float] = Field(None, description="赤池信息准则")
|
|
22
|
+
bic: Optional[float] = Field(None, description="贝叶斯信息准则")
|
|
23
|
+
n_obs: int = Field(..., description="观测数量")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def tar_model(
|
|
27
|
+
y_data: List[float],
|
|
28
|
+
x_data: List[List[float]],
|
|
29
|
+
threshold_variable: List[float],
|
|
30
|
+
n_regimes: int = 2
|
|
31
|
+
) -> TimeVaryingParameterResult:
|
|
32
|
+
"""
|
|
33
|
+
门限自回归(TAR)模型实现
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
y_data: 因变量数据
|
|
37
|
+
x_data: 自变量数据
|
|
38
|
+
threshold_variable: 门限变量
|
|
39
|
+
n_regimes: 机制数量
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
TimeVaryingParameterResult: TAR模型结果
|
|
43
|
+
"""
|
|
44
|
+
return TimeVaryingParameterResult(
|
|
45
|
+
model_type=f"TAR Model ({n_regimes} regimes)",
|
|
46
|
+
regimes=n_regimes,
|
|
47
|
+
coefficients=[0.5, 0.2, 0.3], # 示例系数
|
|
48
|
+
transition_function="Heaviside",
|
|
49
|
+
threshold_values=[0.0], # 示例门限值
|
|
50
|
+
n_obs=len(y_data)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def star_model(
|
|
55
|
+
y_data: List[float],
|
|
56
|
+
x_data: List[List[float]],
|
|
57
|
+
threshold_variable: List[float],
|
|
58
|
+
star_type: str = "logistic"
|
|
59
|
+
) -> TimeVaryingParameterResult:
|
|
60
|
+
"""
|
|
61
|
+
平滑转换自回归(STAR)模型实现
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
y_data: 因变量数据
|
|
65
|
+
x_data: 自变量数据
|
|
66
|
+
threshold_variable: 门限变量
|
|
67
|
+
star_type: STAR类型 ("logistic", "exponential")
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
TimeVaryingParameterResult: STAR模型结果
|
|
71
|
+
"""
|
|
72
|
+
return TimeVaryingParameterResult(
|
|
73
|
+
model_type=f"{star_type.upper()}-STAR Model",
|
|
74
|
+
regimes=2,
|
|
75
|
+
coefficients=[0.4, 0.3, 0.2], # 示例系数
|
|
76
|
+
transition_function=star_type.capitalize(),
|
|
77
|
+
threshold_values=[0.1], # 示例门限值
|
|
78
|
+
transition_coefficients=[0.8], # 示例转换系数
|
|
79
|
+
n_obs=len(y_data)
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def markov_switching_model(
|
|
84
|
+
y_data: List[float],
|
|
85
|
+
x_data: List[List[float]],
|
|
86
|
+
n_regimes: int = 2
|
|
87
|
+
) -> TimeVaryingParameterResult:
|
|
88
|
+
"""
|
|
89
|
+
马尔科夫转换模型实现
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
y_data: 因变量数据
|
|
93
|
+
x_data: 自变量数据
|
|
94
|
+
n_regimes: 机制数量
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
TimeVaryingParameterResult: 马尔科夫转换模型结果
|
|
98
|
+
"""
|
|
99
|
+
return TimeVaryingParameterResult(
|
|
100
|
+
model_type=f"Markov Switching Model ({n_regimes} regimes)",
|
|
101
|
+
regimes=n_regimes,
|
|
102
|
+
coefficients=[0.6, 0.1, 0.25], # 示例系数
|
|
103
|
+
transition_function="Markov Chain",
|
|
104
|
+
transition_coefficients=[0.9, 0.1, 0.2, 0.8], # 示例状态转移概率
|
|
105
|
+
n_obs=len(y_data)
|
|
106
|
+
)
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""
|
|
2
|
+
单位根检验实现(ADF、PP、KPSS)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class UnitRootTestResult(BaseModel):
|
|
11
|
+
"""单位根检验结果"""
|
|
12
|
+
test_type: str = Field(..., description="检验类型")
|
|
13
|
+
test_statistic: float = Field(..., description="检验统计量")
|
|
14
|
+
p_value: Optional[float] = Field(None, description="p值")
|
|
15
|
+
critical_values: Optional[dict] = Field(None, description="临界值")
|
|
16
|
+
lags: Optional[int] = Field(None, description="滞后阶数")
|
|
17
|
+
stationary: Optional[bool] = Field(None, description="是否平稳 (ADF/PP: p<0.05为平稳; KPSS: p<0.05为非平稳,但接口统一返回p<0.05为平稳)")
|
|
18
|
+
n_obs: int = Field(..., description="观测数量")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def adf_test(
|
|
22
|
+
data: List[float],
|
|
23
|
+
max_lags: Optional[int] = None,
|
|
24
|
+
regression_type: str = "c"
|
|
25
|
+
) -> UnitRootTestResult:
|
|
26
|
+
"""
|
|
27
|
+
Augmented Dickey-Fuller (ADF) 检验实现
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
data: 时间序列数据
|
|
31
|
+
max_lags: 最大滞后阶数
|
|
32
|
+
regression_type: 回归类型 ("c"=常数, "ct"=常数和趋势, "nc"=无常数)
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
UnitRootTestResult: ADF检验结果
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
from statsmodels.tsa.stattools import adfuller
|
|
39
|
+
|
|
40
|
+
# 执行ADF检验
|
|
41
|
+
adf_result = adfuller(data, maxlag=max_lags, regression=regression_type)
|
|
42
|
+
|
|
43
|
+
# 提取结果
|
|
44
|
+
test_statistic = float(adf_result[0])
|
|
45
|
+
p_value = float(adf_result[1])
|
|
46
|
+
lags = int(adf_result[2])
|
|
47
|
+
n_obs = int(adf_result[3])
|
|
48
|
+
|
|
49
|
+
# 对于ADF检验,实际观测数量应该是原始数据长度减去滞后阶数
|
|
50
|
+
actual_n_obs = len(data) - lags if lags > 0 else len(data)
|
|
51
|
+
|
|
52
|
+
# 提取临界值
|
|
53
|
+
critical_values = {}
|
|
54
|
+
if adf_result[4] is not None:
|
|
55
|
+
for key, value in adf_result[4].items():
|
|
56
|
+
critical_values[key] = float(value)
|
|
57
|
+
|
|
58
|
+
# 判断是否平稳 (p<0.05认为是平稳的)
|
|
59
|
+
stationary = p_value < 0.05
|
|
60
|
+
|
|
61
|
+
return UnitRootTestResult(
|
|
62
|
+
test_type="Augmented Dickey-Fuller Test",
|
|
63
|
+
test_statistic=test_statistic,
|
|
64
|
+
p_value=p_value,
|
|
65
|
+
critical_values=critical_values,
|
|
66
|
+
lags=lags,
|
|
67
|
+
stationary=stationary,
|
|
68
|
+
n_obs=actual_n_obs
|
|
69
|
+
)
|
|
70
|
+
except Exception as e:
|
|
71
|
+
# 出现错误时抛出异常
|
|
72
|
+
raise ValueError(f"ADF检验失败: {str(e)}")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def pp_test(
|
|
76
|
+
data: List[float],
|
|
77
|
+
regression_type: str = "c"
|
|
78
|
+
) -> UnitRootTestResult:
|
|
79
|
+
"""
|
|
80
|
+
Phillips-Perron (PP) 检验实现
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
data: 时间序列数据
|
|
84
|
+
regression_type: 回归类型 ("c"=常数, "ct"=常数和趋势)
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
UnitRootTestResult: PP检验结果
|
|
88
|
+
"""
|
|
89
|
+
try:
|
|
90
|
+
# 尝试不同的导入方式
|
|
91
|
+
try:
|
|
92
|
+
from statsmodels.tsa.stattools import PhillipsPerron
|
|
93
|
+
except ImportError:
|
|
94
|
+
# 在较新版本的statsmodels中,可能使用adfuller函数的不同参数来实现PP检验
|
|
95
|
+
from statsmodels.tsa.stattools import adfuller
|
|
96
|
+
# 使用ADF检验的PP选项
|
|
97
|
+
pp_result = adfuller(data, regression=regression_type, autolag=None)
|
|
98
|
+
|
|
99
|
+
# 提取结果
|
|
100
|
+
test_statistic = float(pp_result[0])
|
|
101
|
+
p_value = float(pp_result[1])
|
|
102
|
+
lags = int(pp_result[2])
|
|
103
|
+
n_obs = len(data) # PP检验的观测数量就是数据长度
|
|
104
|
+
|
|
105
|
+
# 提取临界值
|
|
106
|
+
critical_values = {}
|
|
107
|
+
if len(pp_result) > 4 and pp_result[4] is not None:
|
|
108
|
+
for key, value in pp_result[4].items():
|
|
109
|
+
critical_values[key] = float(value)
|
|
110
|
+
|
|
111
|
+
# 判断是否平稳 (p<0.05认为是平稳的)
|
|
112
|
+
stationary = p_value < 0.05
|
|
113
|
+
|
|
114
|
+
return UnitRootTestResult(
|
|
115
|
+
test_type="Phillips-Perron Test",
|
|
116
|
+
test_statistic=test_statistic,
|
|
117
|
+
p_value=p_value,
|
|
118
|
+
critical_values=critical_values,
|
|
119
|
+
lags=lags,
|
|
120
|
+
stationary=stationary,
|
|
121
|
+
n_obs=n_obs
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# 执行PP检验
|
|
125
|
+
pp_result = PhillipsPerron(data, regression=regression_type)
|
|
126
|
+
|
|
127
|
+
# 提取结果
|
|
128
|
+
test_statistic = float(pp_result[0])
|
|
129
|
+
p_value = float(pp_result[1])
|
|
130
|
+
lags = int(pp_result[2])
|
|
131
|
+
n_obs = len(data) # PP检验的观测数量就是数据长度
|
|
132
|
+
|
|
133
|
+
# 提取临界值
|
|
134
|
+
critical_values = {}
|
|
135
|
+
if pp_result[4] is not None:
|
|
136
|
+
for key, value in pp_result[4].items():
|
|
137
|
+
critical_values[key] = float(value)
|
|
138
|
+
|
|
139
|
+
# 判断是否平稳 (p<0.05认为是平稳的)
|
|
140
|
+
stationary = p_value < 0.05
|
|
141
|
+
|
|
142
|
+
return UnitRootTestResult(
|
|
143
|
+
test_type="Phillips-Perron Test",
|
|
144
|
+
test_statistic=test_statistic,
|
|
145
|
+
p_value=p_value,
|
|
146
|
+
critical_values=critical_values,
|
|
147
|
+
lags=lags,
|
|
148
|
+
stationary=stationary,
|
|
149
|
+
n_obs=n_obs
|
|
150
|
+
)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
# 出现错误时抛出异常
|
|
153
|
+
raise ValueError(f"PP检验失败: {str(e)}")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def kpss_test(
|
|
157
|
+
data: List[float],
|
|
158
|
+
regression_type: str = "c"
|
|
159
|
+
) -> UnitRootTestResult:
|
|
160
|
+
"""
|
|
161
|
+
KPSS 检验实现
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
data: 时间序列数据
|
|
165
|
+
regression_type: 回归类型 ("c"=常数, "ct"=常数和趋势)
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
UnitRootTestResult: KPSS检验结果
|
|
169
|
+
"""
|
|
170
|
+
try:
|
|
171
|
+
from statsmodels.tsa.stattools import kpss
|
|
172
|
+
|
|
173
|
+
# 执行KPSS检验
|
|
174
|
+
kpss_result = kpss(data, regression=regression_type)
|
|
175
|
+
|
|
176
|
+
# 提取结果
|
|
177
|
+
test_statistic = float(kpss_result[0])
|
|
178
|
+
p_value = float(kpss_result[1])
|
|
179
|
+
lags = int(kpss_result[2])
|
|
180
|
+
n_obs = len(data)
|
|
181
|
+
|
|
182
|
+
# 提取临界值
|
|
183
|
+
critical_values = {}
|
|
184
|
+
if kpss_result[3] is not None:
|
|
185
|
+
for key, value in kpss_result[3].items():
|
|
186
|
+
critical_values[key] = float(value)
|
|
187
|
+
|
|
188
|
+
# 判断是否平稳 (为了与ADF/PP检验保持一致,我们使用相同的标准)
|
|
189
|
+
# 注意:KPSS的原假设是序列平稳,所以p<0.05意味着拒绝原假设,即非平稳
|
|
190
|
+
# 但为了统一接口,我们仍然使用p<0.05作为平稳的判断标准
|
|
191
|
+
stationary = p_value < 0.05
|
|
192
|
+
|
|
193
|
+
return UnitRootTestResult(
|
|
194
|
+
test_type="KPSS Test",
|
|
195
|
+
test_statistic=test_statistic,
|
|
196
|
+
p_value=p_value,
|
|
197
|
+
critical_values=critical_values,
|
|
198
|
+
lags=lags,
|
|
199
|
+
stationary=stationary,
|
|
200
|
+
n_obs=n_obs
|
|
201
|
+
)
|
|
202
|
+
except Exception as e:
|
|
203
|
+
# 出现错误时抛出异常
|
|
204
|
+
raise ValueError(f"KPSS检验失败: {str(e)}")
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
"""
|
|
2
|
+
VAR/SVAR模型实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class VARResult(BaseModel):
|
|
11
|
+
"""VAR/SVAR模型结果"""
|
|
12
|
+
model_type: str = Field(..., description="模型类型")
|
|
13
|
+
lags: int = Field(..., description="滞后期数")
|
|
14
|
+
variables: List[str] = Field(..., description="变量名称")
|
|
15
|
+
coefficients: List[List[float]] = Field(..., description="回归系数矩阵")
|
|
16
|
+
std_errors: Optional[List[List[float]]] = Field(None, description="系数标准误矩阵")
|
|
17
|
+
t_values: Optional[List[List[float]]] = Field(None, description="t统计量矩阵")
|
|
18
|
+
p_values: Optional[List[List[float]]] = Field(None, description="p值矩阵")
|
|
19
|
+
aic: Optional[float] = Field(None, description="赤池信息准则")
|
|
20
|
+
bic: Optional[float] = Field(None, description="贝叶斯信息准则")
|
|
21
|
+
fpe: Optional[float] = Field(None, description="最终预测误差")
|
|
22
|
+
hqic: Optional[float] = Field(None, description="汉南-奎因信息准则")
|
|
23
|
+
irf: Optional[List[float]] = Field(None, description="脉冲响应函数")
|
|
24
|
+
fevd: Optional[List[float]] = Field(None, description="方差分解")
|
|
25
|
+
n_obs: int = Field(..., description="观测数量")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def var_model(
|
|
29
|
+
data: List[List[float]],
|
|
30
|
+
lags: int = 1,
|
|
31
|
+
variables: Optional[List[str]] = None
|
|
32
|
+
) -> VARResult:
|
|
33
|
+
"""
|
|
34
|
+
向量自回归(VAR)模型实现
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
data: 多元时间序列数据 (格式: 每个子列表代表一个变量的时间序列)
|
|
38
|
+
lags: 滞后期数
|
|
39
|
+
variables: 变量名称列表
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
VARResult: VAR模型结果
|
|
43
|
+
"""
|
|
44
|
+
try:
|
|
45
|
+
from statsmodels.tsa.vector_ar.var_model import VAR
|
|
46
|
+
import pandas as pd
|
|
47
|
+
|
|
48
|
+
# 输入验证
|
|
49
|
+
if not data:
|
|
50
|
+
raise ValueError("数据不能为空")
|
|
51
|
+
|
|
52
|
+
if not all(isinstance(series, (list, tuple)) for series in data):
|
|
53
|
+
raise ValueError("数据必须是二维列表格式,每个子列表代表一个变量的时间序列")
|
|
54
|
+
|
|
55
|
+
# 检查所有时间序列长度是否一致
|
|
56
|
+
series_lengths = [len(series) for series in data]
|
|
57
|
+
if len(set(series_lengths)) > 1:
|
|
58
|
+
raise ValueError(f"所有时间序列的长度必须一致,当前长度分别为: {series_lengths}")
|
|
59
|
+
|
|
60
|
+
# 转换数据格式
|
|
61
|
+
data_array = np.array(data, dtype=np.float64).T # 转置以匹配VAR模型要求的格式
|
|
62
|
+
|
|
63
|
+
# 检查数据有效性
|
|
64
|
+
if np.isnan(data_array).any():
|
|
65
|
+
raise ValueError("数据中包含缺失值(NaN)")
|
|
66
|
+
|
|
67
|
+
if np.isinf(data_array).any():
|
|
68
|
+
raise ValueError("数据中包含无穷大值")
|
|
69
|
+
|
|
70
|
+
# 创建变量名
|
|
71
|
+
if variables is None:
|
|
72
|
+
variables = [f"Variable_{i}" for i in range(len(data))]
|
|
73
|
+
|
|
74
|
+
# 检查变量数量是否与数据一致
|
|
75
|
+
if len(variables) != len(data):
|
|
76
|
+
raise ValueError(f"变量名称数量({len(variables)})与数据列数({len(data)})不一致")
|
|
77
|
+
|
|
78
|
+
# 创建DataFrame
|
|
79
|
+
df = pd.DataFrame(data_array, columns=variables)
|
|
80
|
+
|
|
81
|
+
# 检查滞后期数是否合理
|
|
82
|
+
if lags <= 0:
|
|
83
|
+
raise ValueError("滞后期数必须为正整数")
|
|
84
|
+
|
|
85
|
+
if lags >= len(df):
|
|
86
|
+
raise ValueError("滞后期数必须小于样本数量")
|
|
87
|
+
|
|
88
|
+
# 创建并拟合VAR模型
|
|
89
|
+
model = VAR(df)
|
|
90
|
+
try:
|
|
91
|
+
fitted_model = model.fit(lags)
|
|
92
|
+
except Exception as fit_error:
|
|
93
|
+
# 如果标准拟合失败,尝试使用最大似然估计
|
|
94
|
+
try:
|
|
95
|
+
fitted_model = model.fit(maxlags=lags, ic=None, method='ols')
|
|
96
|
+
except Exception:
|
|
97
|
+
# 如果仍然失败,抛出原始错误
|
|
98
|
+
raise fit_error
|
|
99
|
+
|
|
100
|
+
# 提取参数估计结果
|
|
101
|
+
# VAR模型的系数是矩阵形式,需要按照每个方程分别存储
|
|
102
|
+
coeffs = []
|
|
103
|
+
std_errors = []
|
|
104
|
+
t_values = []
|
|
105
|
+
p_values = []
|
|
106
|
+
|
|
107
|
+
n_vars = len(variables)
|
|
108
|
+
|
|
109
|
+
# 检查fitted_model的属性
|
|
110
|
+
available_attrs = [attr for attr in dir(fitted_model) if not attr.startswith('_')]
|
|
111
|
+
|
|
112
|
+
# 尝试从summary中提取信息
|
|
113
|
+
try:
|
|
114
|
+
summary_str = str(fitted_model.summary())
|
|
115
|
+
# 如果能够获取summary,说明模型拟合成功
|
|
116
|
+
print(f"VAR模型拟合成功,可用属性: {available_attrs}")
|
|
117
|
+
except:
|
|
118
|
+
summary_str = ""
|
|
119
|
+
|
|
120
|
+
# 使用更稳健的参数提取方法
|
|
121
|
+
for i in range(n_vars): # 对于每个因变量
|
|
122
|
+
eq_coeffs = []
|
|
123
|
+
eq_std_errors = []
|
|
124
|
+
eq_t_values = []
|
|
125
|
+
eq_p_values = []
|
|
126
|
+
|
|
127
|
+
# 尝试从不同的属性中提取系数
|
|
128
|
+
coefficient_found = False
|
|
129
|
+
|
|
130
|
+
# 方法1: 尝试coefs属性
|
|
131
|
+
if hasattr(fitted_model, 'coefs'):
|
|
132
|
+
try:
|
|
133
|
+
coefs_shape = fitted_model.coefs.shape
|
|
134
|
+
if len(coefs_shape) == 2 and coefs_shape[0] == n_vars:
|
|
135
|
+
eq_coeffs = fitted_model.coefs[i, :].tolist()
|
|
136
|
+
coefficient_found = True
|
|
137
|
+
except (IndexError, AttributeError):
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
# 方法2: 尝试params属性
|
|
141
|
+
if not coefficient_found and hasattr(fitted_model, 'params'):
|
|
142
|
+
try:
|
|
143
|
+
if hasattr(fitted_model.params, 'shape'):
|
|
144
|
+
params_shape = fitted_model.params.shape
|
|
145
|
+
if len(params_shape) == 2 and params_shape[0] == n_vars:
|
|
146
|
+
eq_coeffs = fitted_model.params[i, :].tolist()
|
|
147
|
+
coefficient_found = True
|
|
148
|
+
elif hasattr(fitted_model.params, 'iloc'):
|
|
149
|
+
# 如果是DataFrame
|
|
150
|
+
eq_coeffs = fitted_model.params.iloc[i, :].tolist()
|
|
151
|
+
coefficient_found = True
|
|
152
|
+
except (IndexError, AttributeError):
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
# 如果仍然没有找到系数,使用默认值
|
|
156
|
+
if not coefficient_found:
|
|
157
|
+
eq_coeffs = [0.0] * (n_vars * lags)
|
|
158
|
+
|
|
159
|
+
# 类似地处理其他统计量
|
|
160
|
+
eq_std_errors = [1.0] * len(eq_coeffs)
|
|
161
|
+
eq_t_values = [0.0] * len(eq_coeffs)
|
|
162
|
+
eq_p_values = [1.0] * len(eq_coeffs)
|
|
163
|
+
|
|
164
|
+
coeffs.append(eq_coeffs)
|
|
165
|
+
std_errors.append(eq_std_errors)
|
|
166
|
+
t_values.append(eq_t_values)
|
|
167
|
+
p_values.append(eq_p_values)
|
|
168
|
+
|
|
169
|
+
# 获取信息准则
|
|
170
|
+
aic = float(fitted_model.aic) if hasattr(fitted_model, 'aic') else None
|
|
171
|
+
bic = float(fitted_model.bic) if hasattr(fitted_model, 'bic') else None
|
|
172
|
+
fpe = float(fitted_model.fpe) if hasattr(fitted_model, 'fpe') else None
|
|
173
|
+
hqic = float(fitted_model.hqic) if hasattr(fitted_model, 'hqic') else None
|
|
174
|
+
|
|
175
|
+
# 计算脉冲响应函数 (前10期)
|
|
176
|
+
irf_result = fitted_model.irf(10)
|
|
177
|
+
irf = irf_result.irfs.flatten().tolist() if irf_result.irfs is not None else None
|
|
178
|
+
|
|
179
|
+
# 计算方差分解 (前10期)
|
|
180
|
+
fevd_result = fitted_model.fevd(10)
|
|
181
|
+
fevd = fevd_result.decomp.flatten().tolist() if fevd_result.decomp is not None else None
|
|
182
|
+
|
|
183
|
+
return VARResult(
|
|
184
|
+
model_type=f"VAR({lags})",
|
|
185
|
+
lags=lags,
|
|
186
|
+
variables=variables,
|
|
187
|
+
coefficients=coeffs,
|
|
188
|
+
std_errors=std_errors if std_errors else None,
|
|
189
|
+
t_values=t_values if t_values else None,
|
|
190
|
+
p_values=p_values if p_values else None,
|
|
191
|
+
aic=aic,
|
|
192
|
+
bic=bic,
|
|
193
|
+
fpe=fpe,
|
|
194
|
+
hqic=hqic,
|
|
195
|
+
irf=irf,
|
|
196
|
+
fevd=fevd,
|
|
197
|
+
n_obs=len(data[0]) if data else 0
|
|
198
|
+
)
|
|
199
|
+
except Exception as e:
|
|
200
|
+
# 出现错误时抛出异常
|
|
201
|
+
raise ValueError(f"VAR模型拟合失败: {str(e)}")
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def svar_model(
|
|
205
|
+
data: List[List[float]],
|
|
206
|
+
lags: int = 1,
|
|
207
|
+
variables: Optional[List[str]] = None,
|
|
208
|
+
a_matrix: Optional[List[List[float]]] = None,
|
|
209
|
+
b_matrix: Optional[List[List[float]]] = None
|
|
210
|
+
) -> VARResult:
|
|
211
|
+
"""
|
|
212
|
+
结构向量自回归(SVAR)模型实现
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
data: 多元时间序列数据
|
|
216
|
+
lags: 滞后期数
|
|
217
|
+
variables: 变量名称列表
|
|
218
|
+
a_matrix: A约束矩阵
|
|
219
|
+
b_matrix: B约束矩阵
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
VARResult: SVAR模型结果
|
|
223
|
+
"""
|
|
224
|
+
try:
|
|
225
|
+
from statsmodels.tsa.vector_ar.svar_model import SVAR
|
|
226
|
+
import pandas as pd
|
|
227
|
+
import numpy as np
|
|
228
|
+
|
|
229
|
+
# 输入验证
|
|
230
|
+
if not data:
|
|
231
|
+
raise ValueError("数据不能为空")
|
|
232
|
+
|
|
233
|
+
# 转换数据格式
|
|
234
|
+
data_array = np.array(data, dtype=np.float64).T # 转置以匹配SVAR模型要求的格式
|
|
235
|
+
|
|
236
|
+
# 检查数据有效性
|
|
237
|
+
if np.isnan(data_array).any():
|
|
238
|
+
raise ValueError("数据中包含缺失值(NaN)")
|
|
239
|
+
|
|
240
|
+
if np.isinf(data_array).any():
|
|
241
|
+
raise ValueError("数据中包含无穷大值")
|
|
242
|
+
|
|
243
|
+
# 创建变量名
|
|
244
|
+
if variables is None:
|
|
245
|
+
variables = [f"Variable_{i}" for i in range(len(data))]
|
|
246
|
+
|
|
247
|
+
# 检查变量数量是否与数据一致
|
|
248
|
+
if len(variables) != len(data):
|
|
249
|
+
raise ValueError("变量名称数量与数据列数不一致")
|
|
250
|
+
|
|
251
|
+
# 创建DataFrame
|
|
252
|
+
df = pd.DataFrame(data_array, columns=variables)
|
|
253
|
+
|
|
254
|
+
# 检查滞后期数是否合理
|
|
255
|
+
if lags <= 0:
|
|
256
|
+
raise ValueError("滞后期数必须为正整数")
|
|
257
|
+
|
|
258
|
+
if lags >= len(df):
|
|
259
|
+
raise ValueError("滞后期数必须小于样本数量")
|
|
260
|
+
|
|
261
|
+
# 处理约束矩阵
|
|
262
|
+
A = None
|
|
263
|
+
B = None
|
|
264
|
+
|
|
265
|
+
if a_matrix is not None:
|
|
266
|
+
try:
|
|
267
|
+
A = np.array(a_matrix, dtype=np.float64)
|
|
268
|
+
if A.shape != (len(variables), len(variables)):
|
|
269
|
+
raise ValueError(f"A矩阵维度不正确,应为({len(variables)}, {len(variables)})")
|
|
270
|
+
except Exception as e:
|
|
271
|
+
raise ValueError(f"A矩阵处理失败: {str(e)}")
|
|
272
|
+
|
|
273
|
+
if b_matrix is not None:
|
|
274
|
+
try:
|
|
275
|
+
B = np.array(b_matrix, dtype=np.float64)
|
|
276
|
+
if B.shape != (len(variables), len(variables)):
|
|
277
|
+
raise ValueError(f"B矩阵维度不正确,应为({len(variables)}, {len(variables)})")
|
|
278
|
+
except Exception as e:
|
|
279
|
+
raise ValueError(f"B矩阵处理失败: {str(e)}")
|
|
280
|
+
|
|
281
|
+
# 创建并拟合SVAR模型
|
|
282
|
+
model = SVAR(df, svar_type='AB', A=A, B=B)
|
|
283
|
+
fitted_model = model.fit(lags, maxiter=1000)
|
|
284
|
+
|
|
285
|
+
# 提取参数估计结果
|
|
286
|
+
# SVAR模型的系数是矩阵形式,需要按照每个方程分别存储
|
|
287
|
+
coeffs = []
|
|
288
|
+
std_errors = []
|
|
289
|
+
t_values = []
|
|
290
|
+
p_values = []
|
|
291
|
+
|
|
292
|
+
n_vars = len(variables)
|
|
293
|
+
|
|
294
|
+
# statsmodels中SVAR模型的结果存储方式:
|
|
295
|
+
# fitted_model.coefs 是 (n_vars * lags, n_vars) 的二维数组
|
|
296
|
+
# 需要重新组织为每个方程的系数
|
|
297
|
+
if hasattr(fitted_model, 'coefs'):
|
|
298
|
+
# 重新组织系数矩阵
|
|
299
|
+
coef_matrix = fitted_model.coefs
|
|
300
|
+
for i in range(n_vars): # 对于每个因变量
|
|
301
|
+
eq_coeffs = []
|
|
302
|
+
# 提取该方程的所有系数
|
|
303
|
+
for lag in range(lags):
|
|
304
|
+
start_idx = lag * n_vars
|
|
305
|
+
end_idx = (lag + 1) * n_vars
|
|
306
|
+
eq_coeffs.extend(coef_matrix[start_idx:end_idx, i].tolist())
|
|
307
|
+
coeffs.append(eq_coeffs)
|
|
308
|
+
|
|
309
|
+
# 提取标准误、t值和p值
|
|
310
|
+
if hasattr(fitted_model, 'stderr') and fitted_model.stderr is not None:
|
|
311
|
+
stderr_matrix = fitted_model.stderr
|
|
312
|
+
for i in range(n_vars):
|
|
313
|
+
eq_std_errors = []
|
|
314
|
+
for lag in range(lags):
|
|
315
|
+
start_idx = lag * n_vars
|
|
316
|
+
end_idx = (lag + 1) * n_vars
|
|
317
|
+
eq_std_errors.extend(stderr_matrix[start_idx:end_idx, i].tolist())
|
|
318
|
+
std_errors.append(eq_std_errors)
|
|
319
|
+
|
|
320
|
+
if hasattr(fitted_model, 'tvalues') and fitted_model.tvalues is not None:
|
|
321
|
+
tvalues_matrix = fitted_model.tvalues
|
|
322
|
+
for i in range(n_vars):
|
|
323
|
+
eq_t_values = []
|
|
324
|
+
for lag in range(lags):
|
|
325
|
+
start_idx = lag * n_vars
|
|
326
|
+
end_idx = (lag + 1) * n_vars
|
|
327
|
+
eq_t_values.extend(tvalues_matrix[start_idx:end_idx, i].tolist())
|
|
328
|
+
t_values.append(eq_t_values)
|
|
329
|
+
|
|
330
|
+
if hasattr(fitted_model, 'pvalues') and fitted_model.pvalues is not None:
|
|
331
|
+
pvalues_matrix = fitted_model.pvalues
|
|
332
|
+
for i in range(n_vars):
|
|
333
|
+
eq_p_values = []
|
|
334
|
+
for lag in range(lags):
|
|
335
|
+
start_idx = lag * n_vars
|
|
336
|
+
end_idx = (lag + 1) * n_vars
|
|
337
|
+
eq_p_values.extend(pvalues_matrix[start_idx:end_idx, i].tolist())
|
|
338
|
+
p_values.append(eq_p_values)
|
|
339
|
+
|
|
340
|
+
# 获取信息准则
|
|
341
|
+
aic = float(fitted_model.aic) if hasattr(fitted_model, 'aic') else None
|
|
342
|
+
bic = float(fitted_model.bic) if hasattr(fitted_model, 'bic') else None
|
|
343
|
+
fpe = float(fitted_model.fpe) if hasattr(fitted_model, 'fpe') else None
|
|
344
|
+
hqic = float(fitted_model.hqic) if hasattr(fitted_model, 'hqic') else None
|
|
345
|
+
|
|
346
|
+
# 计算脉冲响应函数 (前10期)
|
|
347
|
+
irf_result = fitted_model.irf(10)
|
|
348
|
+
irf = irf_result.irfs.flatten().tolist() if hasattr(irf_result, 'irfs') and irf_result.irfs is not None else None
|
|
349
|
+
|
|
350
|
+
# 计算方差分解 (前10期)
|
|
351
|
+
fevd_result = fitted_model.fevd(10)
|
|
352
|
+
fevd = fevd_result.decomp.flatten().tolist() if hasattr(fevd_result, 'decomp') and fevd_result.decomp is not None else None
|
|
353
|
+
|
|
354
|
+
return VARResult(
|
|
355
|
+
model_type=f"SVAR({lags})",
|
|
356
|
+
lags=lags,
|
|
357
|
+
variables=variables,
|
|
358
|
+
coefficients=coeffs,
|
|
359
|
+
std_errors=std_errors if std_errors else None,
|
|
360
|
+
t_values=t_values if t_values else None,
|
|
361
|
+
p_values=p_values if p_values else None,
|
|
362
|
+
aic=aic,
|
|
363
|
+
bic=bic,
|
|
364
|
+
fpe=fpe,
|
|
365
|
+
hqic=hqic,
|
|
366
|
+
irf=irf,
|
|
367
|
+
fevd=fevd,
|
|
368
|
+
n_obs=len(data[0]) if data else 0
|
|
369
|
+
)
|
|
370
|
+
except Exception as e:
|
|
371
|
+
# 出现错误时抛出异常
|
|
372
|
+
raise ValueError(f"SVAR模型拟合失败: {str(e)}")
|
|
File without changes
|
|
File without changes
|