aigroup-econ-mcp 0.4.2__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +710 -0
  3. README.md +672 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-1.4.3.dist-info/METADATA +710 -0
  6. aigroup_econ_mcp-1.4.3.dist-info/RECORD +92 -0
  7. aigroup_econ_mcp-1.4.3.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-1.4.3.dist-info/licenses/LICENSE +21 -0
  9. cli.py +28 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +0 -0
  13. econometrics/basic_parametric_estimation/__init__.py +31 -0
  14. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  15. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  16. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  17. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  18. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  19. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  20. econometrics/causal_inference/causal_identification_strategy/__init__.py +0 -0
  21. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  22. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  23. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  24. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  25. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  26. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  27. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  28. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  29. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  30. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  31. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  32. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  33. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  34. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  35. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  36. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  37. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  38. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  39. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  40. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +0 -0
  41. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  42. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  43. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  44. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  45. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  46. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  47. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  48. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  49. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  50. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  51. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  52. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  53. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  54. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  55. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  56. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  57. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  58. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  59. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  60. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  61. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  62. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  63. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  64. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  65. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  66. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  67. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  68. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  69. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  70. prompts/__init__.py +0 -0
  71. prompts/analysis_guides.py +43 -0
  72. pyproject.toml +78 -0
  73. resources/MCP_MASTER_GUIDE.md +422 -0
  74. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  75. resources/__init__.py +0 -0
  76. server.py +83 -0
  77. tools/README.md +88 -0
  78. tools/__init__.py +45 -0
  79. tools/data_loader.py +213 -0
  80. tools/decorators.py +38 -0
  81. tools/econometrics_adapter.py +286 -0
  82. tools/mcp_tool_groups/__init__.py +1 -0
  83. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  84. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  85. tools/mcp_tool_groups/time_series_tools.py +494 -0
  86. tools/mcp_tools_registry.py +114 -0
  87. tools/model_specification_adapter.py +369 -0
  88. tools/output_formatter.py +563 -0
  89. tools/time_series_panel_data_adapter.py +858 -0
  90. tools/time_series_panel_data_tools.py +65 -0
  91. aigroup_econ_mcp/__init__.py +0 -19
  92. aigroup_econ_mcp/cli.py +0 -82
  93. aigroup_econ_mcp/config.py +0 -561
  94. aigroup_econ_mcp/server.py +0 -452
  95. aigroup_econ_mcp/tools/__init__.py +0 -18
  96. aigroup_econ_mcp/tools/base.py +0 -470
  97. aigroup_econ_mcp/tools/cache.py +0 -533
  98. aigroup_econ_mcp/tools/data_loader.py +0 -171
  99. aigroup_econ_mcp/tools/file_parser.py +0 -829
  100. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  101. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  102. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  103. aigroup_econ_mcp/tools/ml_models.py +0 -54
  104. aigroup_econ_mcp/tools/ml_regularization.py +0 -172
  105. aigroup_econ_mcp/tools/monitoring.py +0 -555
  106. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  107. aigroup_econ_mcp/tools/panel_data.py +0 -553
  108. aigroup_econ_mcp/tools/regression.py +0 -214
  109. aigroup_econ_mcp/tools/statistics.py +0 -154
  110. aigroup_econ_mcp/tools/time_series.py +0 -667
  111. aigroup_econ_mcp/tools/timeout.py +0 -283
  112. aigroup_econ_mcp/tools/tool_handlers.py +0 -378
  113. aigroup_econ_mcp/tools/tool_registry.py +0 -170
  114. aigroup_econ_mcp/tools/validation.py +0 -482
  115. aigroup_econ_mcp-0.4.2.dist-info/METADATA +0 -360
  116. aigroup_econ_mcp-0.4.2.dist-info/RECORD +0 -29
  117. aigroup_econ_mcp-0.4.2.dist-info/entry_points.txt +0 -2
  118. /aigroup_econ_mcp-0.4.2.dist-info/licenses/LICENSE → /LICENSE +0 -0
  119. {aigroup_econ_mcp-0.4.2.dist-info → aigroup_econ_mcp-1.4.3.dist-info}/WHEEL +0 -0
@@ -0,0 +1,149 @@
1
+ """
2
+ 模型诊断测试 (Diagnostic Tests) 模块实现
3
+
4
+ 包括各种统计检验方法:
5
+ - 异方差检验(White、Breusch-Pagan)
6
+ - 自相关检验(Durbin-Watson、Ljung-Box)
7
+ - 正态性检验(Jarque-Bera)
8
+ - 多重共线性诊断(VIF)
9
+ - 内生性检验(Durbin-Wu-Hausman)
10
+ """
11
+
12
+ from typing import List, Dict, Any, Optional, Tuple
13
+ from dataclasses import dataclass
14
+ from pydantic import BaseModel, Field
15
+ import numpy as np
16
+ import pandas as pd
17
+ from scipy import stats
18
+ import statsmodels.api as sm
19
+ from statsmodels.stats.diagnostic import het_breuschpagan, het_white, acorr_ljungbox
20
+ from statsmodels.stats.stattools import jarque_bera
21
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
22
+
23
+ from tools.decorators import with_file_support_decorator as econometric_tool, validate_input
24
+
25
+
26
+ class DiagnosticTestsResult(BaseModel):
27
+ """模型诊断测试结果"""
28
+ het_breuschpagan_stat: Optional[float] = Field(None, description="Breusch-Pagan异方差检验统计量")
29
+ het_breuschpagan_pvalue: Optional[float] = Field(None, description="Breusch-Pagan异方差检验p值")
30
+ het_white_stat: Optional[float] = Field(None, description="White异方差检验统计量")
31
+ het_white_pvalue: Optional[float] = Field(None, description="White异方差检验p值")
32
+ dw_statistic: Optional[float] = Field(None, description="Durbin-Watson自相关检验统计量")
33
+ jb_statistic: Optional[float] = Field(None, description="Jarque-Bera正态性检验统计量")
34
+ jb_pvalue: Optional[float] = Field(None, description="Jarque-Bera正态性检验p值")
35
+ vif_values: Optional[List[float]] = Field(None, description="方差膨胀因子(VIF)")
36
+ feature_names: Optional[List[str]] = Field(None, description="特征名称")
37
+
38
+
39
+ @econometric_tool("diagnostic_tests")
40
+ @validate_input(data_type="econometric")
41
+ def diagnostic_tests(
42
+ y_data: List[float],
43
+ x_data: List[List[float]],
44
+ feature_names: Optional[List[str]] = None,
45
+ constant: bool = True
46
+ ) -> DiagnosticTestsResult:
47
+ """
48
+ 执行多种模型诊断测试
49
+
50
+ Args:
51
+ y_data: 因变量数据
52
+ x_data: 自变量数据
53
+ feature_names: 特征名称
54
+ constant: 是否包含常数项
55
+
56
+ Returns:
57
+ DiagnosticTestsResult: 诊断测试结果
58
+ """
59
+ # 转换为numpy数组并确保浮点精度
60
+ y = np.asarray(y_data, dtype=np.float64)
61
+ X = np.asarray(x_data, dtype=np.float64)
62
+
63
+ # 添加常数项
64
+ if constant:
65
+ X = sm.add_constant(X)
66
+ if feature_names:
67
+ feature_names = ["const"] + feature_names
68
+ else:
69
+ feature_names = [f"x{i}" for i in range(X.shape[1])]
70
+ else:
71
+ if not feature_names:
72
+ feature_names = [f"x{i}" for i in range(X.shape[1])]
73
+
74
+ # 执行OLS回归
75
+ try:
76
+ ols_model = sm.OLS(y, X)
77
+ ols_results = ols_model.fit()
78
+ except Exception as e:
79
+ # 如果OLS失败,返回默认结果
80
+ return DiagnosticTestsResult(
81
+ het_breuschpagan_stat=None,
82
+ het_breuschpagan_pvalue=None,
83
+ het_white_stat=None,
84
+ het_white_pvalue=None,
85
+ dw_statistic=None,
86
+ jb_statistic=None,
87
+ jb_pvalue=None,
88
+ vif_values=None,
89
+ feature_names=feature_names[1:] if feature_names and len(feature_names) > 1 else None
90
+ )
91
+
92
+ # 计算预测值和残差
93
+ y_pred = ols_results.fittedvalues
94
+ residuals = ols_results.resid
95
+
96
+ # Breusch-Pagan异方差检验
97
+ try:
98
+ bp_stat, bp_pvalue, _, _ = het_breuschpagan(residuals, X)
99
+ bp_stat = float(bp_stat)
100
+ bp_pvalue = float(bp_pvalue)
101
+ except:
102
+ bp_stat = None
103
+ bp_pvalue = None
104
+
105
+ # White异方差检验
106
+ try:
107
+ white_stat, white_pvalue, _, _ = het_white(residuals, X)
108
+ white_stat = float(white_stat)
109
+ white_pvalue = float(white_pvalue)
110
+ except:
111
+ white_stat = None
112
+ white_pvalue = None
113
+
114
+ # Durbin-Watson自相关检验
115
+ try:
116
+ dw_stat = float(sm.stats.durbin_watson(residuals))
117
+ except:
118
+ dw_stat = None
119
+
120
+ # Jarque-Bera正态性检验
121
+ try:
122
+ jb_stat, jb_pvalue, _, _ = jarque_bera(residuals)
123
+ jb_stat = float(jb_stat)
124
+ jb_pvalue = float(jb_pvalue)
125
+ except:
126
+ jb_stat = None
127
+ jb_pvalue = None
128
+
129
+ # VIF计算(方差膨胀因子)
130
+ try:
131
+ vif_values = []
132
+ # 只对自变量计算VIF(跳过常数项)
133
+ for i in range(1 if constant else 0, X.shape[1]):
134
+ vif = variance_inflation_factor(X, i)
135
+ vif_values.append(float(vif))
136
+ except:
137
+ vif_values = None
138
+
139
+ return DiagnosticTestsResult(
140
+ het_breuschpagan_stat=bp_stat,
141
+ het_breuschpagan_pvalue=bp_pvalue,
142
+ het_white_stat=white_stat,
143
+ het_white_pvalue=white_pvalue,
144
+ dw_statistic=dw_stat,
145
+ jb_statistic=jb_stat,
146
+ jb_pvalue=jb_pvalue,
147
+ vif_values=vif_values,
148
+ feature_names=feature_names[1:] if constant and feature_names and len(feature_names) > 1 else feature_names
149
+ )
@@ -0,0 +1,15 @@
1
+ """
2
+ 广义最小二乘法 (Generalized Least Squares) 模块
3
+
4
+ GLS方法实现
5
+ """
6
+
7
+ from .gls_model import (
8
+ GLSResult,
9
+ gls_regression
10
+ )
11
+
12
+ __all__ = [
13
+ "GLSResult",
14
+ "gls_regression"
15
+ ]
@@ -0,0 +1,130 @@
1
+ """
2
+ 广义最小二乘法 (Generalized Least Squares, GLS) 模型实现
3
+ """
4
+
5
+ from typing import List, Dict, Any, Optional
6
+ from dataclasses import dataclass
7
+ from pydantic import BaseModel, Field
8
+ import numpy as np
9
+ import pandas as pd
10
+ from scipy import stats
11
+ import statsmodels.api as sm
12
+
13
+ from tools.decorators import with_file_support_decorator as econometric_tool, validate_input
14
+
15
+
16
+ class GLSResult(BaseModel):
17
+ """GLS回归结果"""
18
+ coefficients: List[float] = Field(..., description="回归系数")
19
+ std_errors: List[float] = Field(..., description="系数标准误")
20
+ t_values: List[float] = Field(..., description="t统计量")
21
+ p_values: List[float] = Field(..., description="p值")
22
+ conf_int_lower: List[float] = Field(..., description="置信区间下界")
23
+ conf_int_upper: List[float] = Field(..., description="置信区间上界")
24
+ r_squared: float = Field(..., description="R方")
25
+ adj_r_squared: float = Field(..., description="调整R方")
26
+ f_statistic: float = Field(..., description="F统计量")
27
+ f_p_value: float = Field(..., description="F统计量p值")
28
+ n_obs: int = Field(..., description="观测数量")
29
+ feature_names: List[str] = Field(..., description="特征名称")
30
+ log_likelihood: float = Field(..., description="对数似然值")
31
+
32
+
33
+ @econometric_tool("gls_regression")
34
+ @validate_input(data_type="econometric")
35
+ def gls_regression(
36
+ y_data: List[float],
37
+ x_data: List[List[float]],
38
+ sigma: Optional[List[List[float]]] = None,
39
+ feature_names: Optional[List[str]] = None,
40
+ constant: bool = True,
41
+ confidence_level: float = 0.95
42
+ ) -> GLSResult:
43
+ """
44
+ 广义最小二乘法回归
45
+
46
+ Args:
47
+ y_data: 因变量数据
48
+ x_data: 自变量数据
49
+ sigma: 误差项协方差矩阵(可选,如未提供则使用单位矩阵)
50
+ feature_names: 特征名称
51
+ constant: 是否包含常数项
52
+ confidence_level: 置信水平
53
+
54
+ Returns:
55
+ GLSResult: GLS回归结果
56
+ """
57
+ # 转换为numpy数组
58
+ y = np.asarray(y_data, dtype=np.float64)
59
+ X = np.asarray(x_data, dtype=np.float64)
60
+
61
+ # 添加常数项
62
+ if constant:
63
+ X = sm.add_constant(X)
64
+ if feature_names:
65
+ feature_names = ["const"] + feature_names
66
+ else:
67
+ feature_names = [f"x{i}" for i in range(X.shape[1])]
68
+ else:
69
+ if not feature_names:
70
+ feature_names = [f"x{i}" for i in range(X.shape[1])]
71
+
72
+ # 检查数据维度
73
+ n, k = X.shape
74
+ if n <= k:
75
+ raise ValueError(f"观测数量({n})必须大于变量数量({k})")
76
+
77
+ # 如果未提供协方差矩阵,则使用单位矩阵(等价于OLS)
78
+ if sigma is None:
79
+ model = sm.GLS(y, X)
80
+ else:
81
+ sigma_array = np.asarray(sigma, dtype=np.float64)
82
+ # 检查协方差矩阵维度
83
+ if sigma_array.shape != (n, n):
84
+ raise ValueError(f"协方差矩阵sigma的维度必须是({n}, {n}),当前是{sigma_array.shape}")
85
+ model = sm.GLS(y, X, sigma=sigma_array)
86
+
87
+ # 拟合模型
88
+ try:
89
+ results = model.fit()
90
+ except Exception as e:
91
+ raise ValueError(f"无法拟合GLS模型: {str(e)}")
92
+
93
+ # 提取结果
94
+ coefficients = results.params.tolist()
95
+ std_errors = results.bse.tolist()
96
+ t_values = results.tvalues.tolist()
97
+ p_values = results.pvalues.tolist()
98
+
99
+ # 计算置信区间
100
+ alpha = 1 - confidence_level
101
+ conf_int = results.conf_int(alpha=alpha)
102
+ conf_int_lower = conf_int[:, 0].tolist()
103
+ conf_int_upper = conf_int[:, 1].tolist()
104
+
105
+ # 其他统计量
106
+ r_squared = float(results.rsquared)
107
+ adj_r_squared = float(results.rsquared_adj)
108
+
109
+ # F统计量
110
+ f_statistic = float(results.fvalue) if not np.isnan(results.fvalue) else 0.0
111
+ f_p_value = float(results.f_pvalue) if not np.isnan(results.f_pvalue) else 1.0
112
+
113
+ # 对数似然值
114
+ log_likelihood = float(results.llf)
115
+
116
+ return GLSResult(
117
+ coefficients=coefficients,
118
+ std_errors=std_errors,
119
+ t_values=t_values,
120
+ p_values=p_values,
121
+ conf_int_lower=conf_int_lower,
122
+ conf_int_upper=conf_int_upper,
123
+ r_squared=r_squared,
124
+ adj_r_squared=adj_r_squared,
125
+ f_statistic=f_statistic,
126
+ f_p_value=f_p_value,
127
+ n_obs=int(results.nobs),
128
+ feature_names=feature_names,
129
+ log_likelihood=log_likelihood
130
+ )
@@ -0,0 +1,18 @@
1
+ """
2
+ 模型选择 (Model Selection) 模块
3
+
4
+ 包括:
5
+ - 信息准则(AIC/BIC/HQIC)
6
+ - 交叉验证(K折、留一法)
7
+ - 格兰杰因果检验
8
+ """
9
+
10
+ from .model_selection_model import (
11
+ ModelSelectionResult,
12
+ model_selection_criteria
13
+ )
14
+
15
+ __all__ = [
16
+ "ModelSelectionResult",
17
+ "model_selection_criteria"
18
+ ]
@@ -0,0 +1,286 @@
1
+ """
2
+ 模型选择 (Model Selection) 模块实现
3
+
4
+ 包括:
5
+ - 信息准则(AIC/BIC/HQIC)
6
+ - 交叉验证(K折、留一法)
7
+ - 格兰杰因果检验
8
+ """
9
+
10
+ from typing import List, Dict, Any, Optional, Tuple
11
+ from dataclasses import dataclass
12
+ from pydantic import BaseModel, Field
13
+ import numpy as np
14
+ import pandas as pd
15
+ from scipy import stats
16
+ import statsmodels.api as sm
17
+ from statsmodels.tsa.stattools import grangercausalitytests
18
+
19
+ from tools.decorators import with_file_support_decorator as econometric_tool, validate_input
20
+
21
+
22
+ class GrangerCausalityResult(BaseModel):
23
+ """格兰杰因果检验结果"""
24
+ f_statistic: float = Field(..., description="F统计量")
25
+ p_value: float = Field(..., description="p值")
26
+ lag_order: int = Field(..., description="滞后阶数")
27
+ n_obs: int = Field(..., description="观测数量")
28
+ dependent_variable: str = Field(..., description="因变量")
29
+ independent_variable: str = Field(..., description="格兰杰原因变量")
30
+
31
+
32
+ class ModelSelectionResult(BaseModel):
33
+ """模型选择结果"""
34
+ aic: float = Field(..., description="赤池信息准则 (AIC)")
35
+ bic: float = Field(..., description="贝叶斯信息准则 (BIC)")
36
+ hqic: float = Field(..., description="汉南-奎因信息准则 (HQIC)")
37
+ r_squared: float = Field(..., description="R方")
38
+ adj_r_squared: float = Field(..., description="调整R方")
39
+ log_likelihood: float = Field(..., description="对数似然值")
40
+ n_obs: int = Field(..., description="观测数量")
41
+ n_params: int = Field(..., description="参数数量")
42
+ cv_score: Optional[float] = Field(None, description="交叉验证得分")
43
+
44
+
45
+ @econometric_tool("granger_causality_test")
46
+ @validate_input(data_type="timeseries")
47
+ def granger_causality_test(
48
+ x_data: List[float],
49
+ y_data: List[float],
50
+ max_lag: int = 1,
51
+ add_constant: bool = True
52
+ ) -> GrangerCausalityResult:
53
+ """
54
+ 格兰杰因果检验
55
+
56
+ Args:
57
+ x_data: 可能的格兰杰原因变量
58
+ y_data: 因变量
59
+ max_lag: 最大滞后阶数
60
+ add_constant: 是否添加常数项
61
+
62
+ Returns:
63
+ GrangerCausalityResult: 格兰杰因果检验结果
64
+ """
65
+ # 转换为numpy数组
66
+ x = np.asarray(x_data, dtype=np.float64)
67
+ y = np.asarray(y_data, dtype=np.float64)
68
+
69
+ # 检查数据长度
70
+ if len(x) != len(y):
71
+ raise ValueError("x_data和y_data的长度必须相同")
72
+
73
+ if len(x) <= max_lag:
74
+ raise ValueError("数据长度必须大于滞后阶数")
75
+
76
+ # 构建数据框用于statsmodels
77
+ data = pd.DataFrame({'y': y, 'x': x})
78
+
79
+ # 执行格兰杰因果检验
80
+ try:
81
+ # grangercausalitytests返回一个字典,键为滞后阶数
82
+ test_result = grangercausalitytests(data, max_lag, addconst=add_constant, verbose=False)
83
+
84
+ # 获取指定滞后阶数的结果(使用最大滞后阶数)
85
+ lag_order = max_lag
86
+ test_stats = test_result[lag_order][0]
87
+
88
+ # 提取F统计量和p值(使用ssr F-test)
89
+ f_statistic = test_stats['F test']
90
+ f_stat = f_statistic[0] # F统计量
91
+ p_value = f_statistic[1] # p值
92
+
93
+ except Exception as e:
94
+ # 如果检验失败,返回默认值
95
+ f_stat = 0.0
96
+ p_value = 1.0
97
+ lag_order = max_lag
98
+
99
+ return GrangerCausalityResult(
100
+ f_statistic=float(f_stat),
101
+ p_value=float(p_value),
102
+ lag_order=lag_order,
103
+ n_obs=len(y) - lag_order, # 考虑滞后后的实际观测数
104
+ dependent_variable="y",
105
+ independent_variable="x"
106
+ )
107
+
108
+
109
+ @econometric_tool("model_selection_criteria")
110
+ @validate_input(data_type="econometric")
111
+ def model_selection_criteria(
112
+ y_data: List[float],
113
+ x_data: List[List[float]],
114
+ feature_names: Optional[List[str]] = None,
115
+ constant: bool = True,
116
+ cv_folds: Optional[int] = None
117
+ ) -> ModelSelectionResult:
118
+ """
119
+ 计算模型选择信息准则
120
+
121
+ Args:
122
+ y_data: 因变量数据
123
+ x_data: 自变量数据
124
+ feature_names: 特征名称
125
+ constant: 是否包含常数项
126
+ cv_folds: 交叉验证折数 (None表示不进行交叉验证,-1表示留一法)
127
+
128
+ Returns:
129
+ ModelSelectionResult: 模型选择结果
130
+ """
131
+ # 转换为numpy数组
132
+ y = np.array(y_data)
133
+ X = np.array(x_data)
134
+
135
+ # 添加常数项
136
+ if constant:
137
+ X = sm.add_constant(X)
138
+ if feature_names:
139
+ feature_names = ["const"] + feature_names
140
+ else:
141
+ feature_names = [f"x{i}" for i in range(X.shape[1])]
142
+ else:
143
+ if not feature_names:
144
+ feature_names = [f"x{i}" for i in range(X.shape[1])]
145
+
146
+ # 执行OLS回归
147
+ try:
148
+ model = sm.OLS(y, X)
149
+ results = model.fit()
150
+ except Exception as e:
151
+ raise ValueError(f"无法拟合模型: {str(e)}")
152
+
153
+ # 提取统计量
154
+ n = int(results.nobs)
155
+ k = len(results.params)
156
+ r_squared = float(results.rsquared)
157
+ adj_r_squared = float(results.rsquared_adj)
158
+ log_likelihood = float(results.llf)
159
+ aic = float(results.aic)
160
+ bic = float(results.bic)
161
+
162
+ # 计算HQIC (statsmodels中没有直接提供HQIC)
163
+ if n > 1 and np.log(n) != 0:
164
+ hqic = -2 * log_likelihood + 2 * k * np.log(np.log(n))
165
+ else:
166
+ hqic = np.inf
167
+
168
+ # 交叉验证
169
+ cv_score = None
170
+ if cv_folds is not None:
171
+ cv_score = _cross_validation(y, X, cv_folds)
172
+
173
+ return ModelSelectionResult(
174
+ aic=aic,
175
+ bic=bic,
176
+ hqic=float(hqic) if np.isfinite(hqic) else np.inf,
177
+ r_squared=r_squared,
178
+ adj_r_squared=adj_r_squared,
179
+ log_likelihood=log_likelihood,
180
+ n_obs=n,
181
+ n_params=k,
182
+ cv_score=float(cv_score) if cv_score is not None else None
183
+ )
184
+
185
+
186
+ def _cross_validation(y: np.ndarray, X: np.ndarray, folds: Optional[int]) -> float:
187
+ """
188
+ 执行交叉验证
189
+
190
+ Args:
191
+ y: 因变量
192
+ X: 自变量矩阵
193
+ folds: 折数 (-1表示留一法,其他正数表示K折交叉验证)
194
+
195
+ Returns:
196
+ float: 交叉验证得分 (平均MSE)
197
+ """
198
+ n = len(y)
199
+
200
+ if folds is None or folds == 0:
201
+ return None
202
+
203
+ if folds == -1 or folds >= n:
204
+ # 留一法交叉验证
205
+ folds = n
206
+
207
+ if folds <= 1 or X.shape[0] != n:
208
+ return None
209
+
210
+ # 检查是否有足够的数据进行训练和测试
211
+ if X.shape[0] < X.shape[1]:
212
+ return None
213
+
214
+ # 创建折叠索引
215
+ indices = np.arange(n)
216
+ np.random.seed(42) # 固定随机种子以确保结果可重现
217
+ np.random.shuffle(indices)
218
+
219
+ # 计算每折的大小
220
+ fold_sizes = np.full(folds, n // folds)
221
+ fold_sizes[:n % folds] += 1
222
+
223
+ current = 0
224
+ mse_scores = []
225
+
226
+ for fold_size in fold_sizes:
227
+ start, stop = current, current + fold_size
228
+ test_idx = indices[start:stop]
229
+ train_idx = np.concatenate([indices[:start], indices[stop:]])
230
+
231
+ # 分割数据
232
+ X_train, X_test = X[train_idx], X[test_idx]
233
+ y_train, y_test = y[train_idx], y[test_idx]
234
+
235
+ try:
236
+ # 检查是否有足够的数据进行训练和测试
237
+ if X_train.shape[0] < X_train.shape[1] or X_train.shape[0] == 0 or X_test.shape[0] == 0:
238
+ continue
239
+
240
+ # 训练模型,使用带正则化的求解方法
241
+ try:
242
+ # 使用statsmodels进行更稳定的回归
243
+ train_model = sm.OLS(y_train, X_train)
244
+ train_results = train_model.fit()
245
+ beta_train = train_results.params
246
+ except:
247
+ # 如果statsmodels失败,使用numpy的最小二乘法
248
+ # 添加正则化防止矩阵奇异
249
+ XtX = X_train.T @ X_train
250
+ if XtX.shape[0] > 0:
251
+ # 添加一个小的正则化项
252
+ reg_param = 1e-10 * np.trace(XtX) / XtX.shape[0] if np.trace(XtX) > 0 and XtX.shape[0] > 0 else 1e-10
253
+ XtX_reg = XtX + reg_param * np.eye(XtX.shape[0])
254
+ try:
255
+ beta_train = np.linalg.solve(XtX_reg, X_train.T @ y_train)
256
+ except np.linalg.LinAlgError:
257
+ # 如果仍然失败,使用伪逆
258
+ beta_train = np.linalg.pinv(XtX_reg) @ X_train.T @ y_train
259
+ else:
260
+ continue
261
+
262
+ # 预测
263
+ try:
264
+ y_pred = X_test @ beta_train
265
+ except:
266
+ continue
267
+
268
+ # 检查预测值是否有效
269
+ if not np.all(np.isfinite(y_pred)):
270
+ continue
271
+
272
+ # 计算MSE
273
+ mse = np.mean((y_test - y_pred) ** 2)
274
+ # 检查MSE是否有效
275
+ if np.isfinite(mse):
276
+ mse_scores.append(mse)
277
+ except (np.linalg.LinAlgError, ValueError, ZeroDivisionError):
278
+ # 如果出现数值问题,跳过这一折
279
+ pass
280
+ except Exception:
281
+ # 捕获其他可能的异常
282
+ pass
283
+
284
+ current = stop
285
+
286
+ return np.mean(mse_scores) if mse_scores and len(mse_scores) > 0 else None
@@ -0,0 +1,15 @@
1
+ """
2
+ 正则化方法 (Regularization Methods) 模块
3
+
4
+ 包括岭回归、LASSO和弹性网络等方法,用于处理多重共线性/高维数据
5
+ """
6
+
7
+ from .regularization_model import (
8
+ RegularizationResult,
9
+ regularized_regression
10
+ )
11
+
12
+ __all__ = [
13
+ "RegularizationResult",
14
+ "regularized_regression"
15
+ ]