aigroup-econ-mcp 0.4.2__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .gitignore +253 -0
- PKG-INFO +710 -0
- README.md +672 -0
- __init__.py +14 -0
- aigroup_econ_mcp-1.4.3.dist-info/METADATA +710 -0
- aigroup_econ_mcp-1.4.3.dist-info/RECORD +92 -0
- aigroup_econ_mcp-1.4.3.dist-info/entry_points.txt +2 -0
- aigroup_econ_mcp-1.4.3.dist-info/licenses/LICENSE +21 -0
- cli.py +28 -0
- econometrics/README.md +18 -0
- econometrics/__init__.py +191 -0
- econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +0 -0
- econometrics/basic_parametric_estimation/__init__.py +31 -0
- econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
- econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
- econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
- econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
- econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
- econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
- econometrics/causal_inference/causal_identification_strategy/__init__.py +0 -0
- econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
- econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
- econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
- econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
- econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
- econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
- econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
- econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
- econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
- econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
- econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
- econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
- econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
- econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +0 -0
- econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
- econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
- econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
- econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
- econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
- econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
- econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
- econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
- econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
- econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
- econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
- econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
- econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
- econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
- econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
- econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
- econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
- econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
- econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
- econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
- econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
- econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
- econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
- econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
- econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
- econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
- econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
- econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
- econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
- prompts/__init__.py +0 -0
- prompts/analysis_guides.py +43 -0
- pyproject.toml +78 -0
- resources/MCP_MASTER_GUIDE.md +422 -0
- resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
- resources/__init__.py +0 -0
- server.py +83 -0
- tools/README.md +88 -0
- tools/__init__.py +45 -0
- tools/data_loader.py +213 -0
- tools/decorators.py +38 -0
- tools/econometrics_adapter.py +286 -0
- tools/mcp_tool_groups/__init__.py +1 -0
- tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
- tools/mcp_tool_groups/model_specification_tools.py +402 -0
- tools/mcp_tool_groups/time_series_tools.py +494 -0
- tools/mcp_tools_registry.py +114 -0
- tools/model_specification_adapter.py +369 -0
- tools/output_formatter.py +563 -0
- tools/time_series_panel_data_adapter.py +858 -0
- tools/time_series_panel_data_tools.py +65 -0
- aigroup_econ_mcp/__init__.py +0 -19
- aigroup_econ_mcp/cli.py +0 -82
- aigroup_econ_mcp/config.py +0 -561
- aigroup_econ_mcp/server.py +0 -452
- aigroup_econ_mcp/tools/__init__.py +0 -18
- aigroup_econ_mcp/tools/base.py +0 -470
- aigroup_econ_mcp/tools/cache.py +0 -533
- aigroup_econ_mcp/tools/data_loader.py +0 -171
- aigroup_econ_mcp/tools/file_parser.py +0 -829
- aigroup_econ_mcp/tools/machine_learning.py +0 -60
- aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
- aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
- aigroup_econ_mcp/tools/ml_models.py +0 -54
- aigroup_econ_mcp/tools/ml_regularization.py +0 -172
- aigroup_econ_mcp/tools/monitoring.py +0 -555
- aigroup_econ_mcp/tools/optimized_example.py +0 -229
- aigroup_econ_mcp/tools/panel_data.py +0 -553
- aigroup_econ_mcp/tools/regression.py +0 -214
- aigroup_econ_mcp/tools/statistics.py +0 -154
- aigroup_econ_mcp/tools/time_series.py +0 -667
- aigroup_econ_mcp/tools/timeout.py +0 -283
- aigroup_econ_mcp/tools/tool_handlers.py +0 -378
- aigroup_econ_mcp/tools/tool_registry.py +0 -170
- aigroup_econ_mcp/tools/validation.py +0 -482
- aigroup_econ_mcp-0.4.2.dist-info/METADATA +0 -360
- aigroup_econ_mcp-0.4.2.dist-info/RECORD +0 -29
- aigroup_econ_mcp-0.4.2.dist-info/entry_points.txt +0 -2
- /aigroup_econ_mcp-0.4.2.dist-info/licenses/LICENSE → /LICENSE +0 -0
- {aigroup_econ_mcp-0.4.2.dist-info → aigroup_econ_mcp-1.4.3.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""
|
|
2
|
+
最大似然估计 (MLE) 模型实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Dict, Any, Optional, Callable
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from scipy.optimize import minimize
|
|
11
|
+
from scipy import stats
|
|
12
|
+
import statsmodels.api as sm
|
|
13
|
+
from statsmodels.base.model import GenericLikelihoodModel
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MLEResult(BaseModel):
|
|
17
|
+
"""最大似然估计结果"""
|
|
18
|
+
parameters: List[float] = Field(..., description="估计参数")
|
|
19
|
+
std_errors: List[float] = Field(..., description="参数标准误")
|
|
20
|
+
conf_int_lower: List[float] = Field(..., description="置信区间下界")
|
|
21
|
+
conf_int_upper: List[float] = Field(..., description="置信区间上界")
|
|
22
|
+
log_likelihood: float = Field(..., description="对数似然值")
|
|
23
|
+
aic: float = Field(..., description="赤池信息准则")
|
|
24
|
+
bic: float = Field(..., description="贝叶斯信息准则")
|
|
25
|
+
convergence: bool = Field(..., description="是否收敛")
|
|
26
|
+
n_obs: int = Field(..., description="观测数量")
|
|
27
|
+
param_names: List[str] = Field(..., description="参数名称")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def mle_estimation(
|
|
31
|
+
data: List[float],
|
|
32
|
+
distribution: str = "normal",
|
|
33
|
+
initial_params: Optional[List[float]] = None,
|
|
34
|
+
confidence_level: float = 0.95
|
|
35
|
+
) -> MLEResult:
|
|
36
|
+
"""
|
|
37
|
+
最大似然估计
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
data: 数据
|
|
41
|
+
distribution: 分布类型 ('normal', 'poisson', 'exponential')
|
|
42
|
+
initial_params: 初始参数值
|
|
43
|
+
confidence_level: 置信水平
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
MLEResult: 最大似然估计结果
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
ValueError: 当输入数据无效时抛出异常
|
|
50
|
+
"""
|
|
51
|
+
# 输入验证
|
|
52
|
+
if not data:
|
|
53
|
+
raise ValueError("数据不能为空")
|
|
54
|
+
|
|
55
|
+
data = np.array(data, dtype=np.float64)
|
|
56
|
+
n = len(data)
|
|
57
|
+
|
|
58
|
+
# 检查数据有效性
|
|
59
|
+
if np.isnan(data).any():
|
|
60
|
+
raise ValueError("数据中包含缺失值(NaN)")
|
|
61
|
+
|
|
62
|
+
if np.isinf(data).any():
|
|
63
|
+
raise ValueError("数据中包含无穷大值")
|
|
64
|
+
|
|
65
|
+
# 分布特定的验证
|
|
66
|
+
if distribution == "exponential" and np.any(data < 0):
|
|
67
|
+
raise ValueError("指数分布的数据必须为非负数")
|
|
68
|
+
|
|
69
|
+
if distribution == "poisson" and (np.any(data < 0) or not np.all(data == np.floor(data))):
|
|
70
|
+
raise ValueError("泊松分布的数据必须为非负整数")
|
|
71
|
+
|
|
72
|
+
if distribution == "normal":
|
|
73
|
+
# 正态分布的MLE
|
|
74
|
+
return _normal_mle(data, initial_params, confidence_level)
|
|
75
|
+
elif distribution == "poisson":
|
|
76
|
+
# 泊松分布的MLE
|
|
77
|
+
return _poisson_mle(data, initial_params, confidence_level)
|
|
78
|
+
elif distribution == "exponential":
|
|
79
|
+
# 指数分布的MLE
|
|
80
|
+
return _exponential_mle(data, initial_params, confidence_level)
|
|
81
|
+
else:
|
|
82
|
+
raise ValueError(f"不支持的分布类型: {distribution}")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _normal_mle(data: np.ndarray, initial_params: Optional[List[float]], confidence_level: float) -> MLEResult:
|
|
86
|
+
"""正态分布最大似然估计"""
|
|
87
|
+
# 使用样本均值和标准差作为初始估计
|
|
88
|
+
mu_hat = np.mean(data)
|
|
89
|
+
sigma_hat = np.std(data, ddof=1) # 使用样本标准差
|
|
90
|
+
|
|
91
|
+
# 检查标准差是否为零
|
|
92
|
+
if sigma_hat == 0:
|
|
93
|
+
raise ValueError("数据标准差为零,无法进行正态分布MLE估计")
|
|
94
|
+
|
|
95
|
+
# 使用statsmodels的MLE估计
|
|
96
|
+
try:
|
|
97
|
+
# 直接使用解析解
|
|
98
|
+
n = len(data)
|
|
99
|
+
log_likelihood = float(np.sum(stats.norm.logpdf(data, loc=mu_hat, scale=sigma_hat)))
|
|
100
|
+
|
|
101
|
+
# 标准误
|
|
102
|
+
std_error_mu = sigma_hat / np.sqrt(n)
|
|
103
|
+
std_error_sigma = sigma_hat / np.sqrt(2 * n)
|
|
104
|
+
std_errors = [std_error_mu, std_error_sigma]
|
|
105
|
+
|
|
106
|
+
# 置信区间
|
|
107
|
+
alpha = 1 - confidence_level
|
|
108
|
+
z_value = stats.norm.ppf(1 - alpha/2)
|
|
109
|
+
conf_int_lower = [mu_hat - z_value * std_error_mu, sigma_hat - z_value * std_error_sigma]
|
|
110
|
+
conf_int_upper = [mu_hat + z_value * std_error_mu, sigma_hat + z_value * std_error_sigma]
|
|
111
|
+
|
|
112
|
+
# 信息准则
|
|
113
|
+
k = 2 # 参数数量
|
|
114
|
+
aic = -2 * log_likelihood + 2 * k
|
|
115
|
+
bic = -2 * log_likelihood + k * np.log(n)
|
|
116
|
+
|
|
117
|
+
return MLEResult(
|
|
118
|
+
parameters=[float(mu_hat), float(sigma_hat)],
|
|
119
|
+
std_errors=std_errors,
|
|
120
|
+
conf_int_lower=conf_int_lower,
|
|
121
|
+
conf_int_upper=conf_int_upper,
|
|
122
|
+
log_likelihood=log_likelihood,
|
|
123
|
+
aic=float(aic),
|
|
124
|
+
bic=float(bic),
|
|
125
|
+
convergence=True,
|
|
126
|
+
n_obs=n,
|
|
127
|
+
param_names=["mu", "sigma"]
|
|
128
|
+
)
|
|
129
|
+
except Exception as e:
|
|
130
|
+
raise ValueError(f"正态分布MLE估计失败: {str(e)}")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _poisson_mle(data: np.ndarray, initial_params: Optional[List[float]], confidence_level: float) -> MLEResult:
|
|
134
|
+
"""泊松分布最大似然估计"""
|
|
135
|
+
# 泊松分布的MLE有解析解:lambda_hat = mean(data)
|
|
136
|
+
lambda_hat = np.mean(data)
|
|
137
|
+
n = len(data)
|
|
138
|
+
|
|
139
|
+
# 检查均值是否为零
|
|
140
|
+
if lambda_hat == 0:
|
|
141
|
+
raise ValueError("数据均值为零,无法进行泊松分布MLE估计")
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
# 计算对数似然值
|
|
145
|
+
log_likelihood = float(np.sum(stats.poisson.logpmf(data, lambda_hat)))
|
|
146
|
+
|
|
147
|
+
# 标准误
|
|
148
|
+
std_error = np.sqrt(lambda_hat / n)
|
|
149
|
+
std_errors = [std_error]
|
|
150
|
+
|
|
151
|
+
# 置信区间
|
|
152
|
+
alpha = 1 - confidence_level
|
|
153
|
+
z_value = stats.norm.ppf(1 - alpha/2)
|
|
154
|
+
conf_int_lower = [lambda_hat - z_value * std_error]
|
|
155
|
+
conf_int_upper = [lambda_hat + z_value * std_error]
|
|
156
|
+
|
|
157
|
+
# 信息准则
|
|
158
|
+
k = 1 # 参数数量
|
|
159
|
+
aic = -2 * log_likelihood + 2 * k
|
|
160
|
+
bic = -2 * log_likelihood + k * np.log(n)
|
|
161
|
+
|
|
162
|
+
return MLEResult(
|
|
163
|
+
parameters=[float(lambda_hat)],
|
|
164
|
+
std_errors=std_errors,
|
|
165
|
+
conf_int_lower=conf_int_lower,
|
|
166
|
+
conf_int_upper=conf_int_upper,
|
|
167
|
+
log_likelihood=log_likelihood,
|
|
168
|
+
aic=float(aic),
|
|
169
|
+
bic=float(bic),
|
|
170
|
+
convergence=True,
|
|
171
|
+
n_obs=n,
|
|
172
|
+
param_names=["lambda"]
|
|
173
|
+
)
|
|
174
|
+
except Exception as e:
|
|
175
|
+
raise ValueError(f"泊松分布MLE估计失败: {str(e)}")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _exponential_mle(data: np.ndarray, initial_params: Optional[List[float]], confidence_level: float) -> MLEResult:
|
|
179
|
+
"""指数分布最大似然估计"""
|
|
180
|
+
# 指数分布的MLE有解析解:lambda_hat = 1 / mean(data)
|
|
181
|
+
mean_data = np.mean(data)
|
|
182
|
+
if mean_data <= 0:
|
|
183
|
+
raise ValueError("指数分布的数据均值必须为正数")
|
|
184
|
+
|
|
185
|
+
lambda_hat = 1.0 / mean_data
|
|
186
|
+
n = len(data)
|
|
187
|
+
|
|
188
|
+
# 检查参数有效性
|
|
189
|
+
if not np.isfinite(lambda_hat):
|
|
190
|
+
raise ValueError("计算出的参数值无效")
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
# 计算对数似然值
|
|
194
|
+
log_likelihood = float(np.sum(stats.expon.logpdf(data, scale=1/lambda_hat)))
|
|
195
|
+
|
|
196
|
+
# 标准误计算 (对于指数分布,标准误为lambda/sqrt(n))
|
|
197
|
+
# 使用更精确的计算方法
|
|
198
|
+
std_error = lambda_hat / np.sqrt(n)
|
|
199
|
+
std_errors = [std_error]
|
|
200
|
+
|
|
201
|
+
# 验证标准误的有效性
|
|
202
|
+
if not np.isfinite(std_error) or std_error <= 0:
|
|
203
|
+
raise ValueError("计算出的标准误无效")
|
|
204
|
+
|
|
205
|
+
# 置信区间
|
|
206
|
+
alpha = 1 - confidence_level
|
|
207
|
+
z_value = stats.norm.ppf(1 - alpha/2)
|
|
208
|
+
|
|
209
|
+
# 检查z值有效性
|
|
210
|
+
if not np.isfinite(z_value):
|
|
211
|
+
raise ValueError("计算出的临界值无效")
|
|
212
|
+
|
|
213
|
+
conf_int_lower = [lambda_hat - z_value * std_error]
|
|
214
|
+
conf_int_upper = [lambda_hat + z_value * std_error]
|
|
215
|
+
|
|
216
|
+
# 检查置信区间边界有效性
|
|
217
|
+
if not (np.isfinite(conf_int_lower[0]) and np.isfinite(conf_int_upper[0])):
|
|
218
|
+
raise ValueError("计算出的置信区间无效")
|
|
219
|
+
|
|
220
|
+
# 确保置信区间下限不为负
|
|
221
|
+
conf_int_lower[0] = max(conf_int_lower[0], 1e-10)
|
|
222
|
+
|
|
223
|
+
# 信息准则
|
|
224
|
+
k = 1 # 参数数量
|
|
225
|
+
aic = -2 * log_likelihood + 2 * k
|
|
226
|
+
bic = -2 * log_likelihood + k * np.log(n)
|
|
227
|
+
|
|
228
|
+
return MLEResult(
|
|
229
|
+
parameters=[float(lambda_hat)],
|
|
230
|
+
std_errors=std_errors,
|
|
231
|
+
conf_int_lower=conf_int_lower,
|
|
232
|
+
conf_int_upper=conf_int_upper,
|
|
233
|
+
log_likelihood=log_likelihood,
|
|
234
|
+
aic=float(aic),
|
|
235
|
+
bic=float(bic),
|
|
236
|
+
convergence=True,
|
|
237
|
+
n_obs=n,
|
|
238
|
+
param_names=["lambda"]
|
|
239
|
+
)
|
|
240
|
+
except Exception as e:
|
|
241
|
+
raise ValueError(f"指数分布MLE估计失败: {str(e)}")
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
普通最小二乘法 (OLS) 模型实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Dict, Any, Optional, Union
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from scipy import stats
|
|
11
|
+
import statsmodels.api as sm
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OLSResult(BaseModel):
|
|
15
|
+
"""OLS回归结果"""
|
|
16
|
+
coefficients: List[float] = Field(..., description="回归系数")
|
|
17
|
+
std_errors: List[float] = Field(..., description="系数标准误")
|
|
18
|
+
t_values: List[float] = Field(..., description="t统计量")
|
|
19
|
+
p_values: List[float] = Field(..., description="p值")
|
|
20
|
+
conf_int_lower: List[float] = Field(..., description="置信区间下界")
|
|
21
|
+
conf_int_upper: List[float] = Field(..., description="置信区间上界")
|
|
22
|
+
r_squared: float = Field(..., description="R方")
|
|
23
|
+
adj_r_squared: float = Field(..., description="调整R方")
|
|
24
|
+
f_statistic: float = Field(..., description="F统计量")
|
|
25
|
+
f_p_value: float = Field(..., description="F统计量p值")
|
|
26
|
+
aic: float = Field(..., description="赤池信息准则")
|
|
27
|
+
bic: float = Field(..., description="贝叶斯信息准则")
|
|
28
|
+
n_obs: int = Field(..., description="观测数量")
|
|
29
|
+
feature_names: List[str] = Field(..., description="特征名称")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def ols_regression(
|
|
33
|
+
y_data: List[float],
|
|
34
|
+
x_data: List[List[float]],
|
|
35
|
+
feature_names: Optional[List[str]] = None,
|
|
36
|
+
constant: bool = True,
|
|
37
|
+
confidence_level: float = 0.95
|
|
38
|
+
) -> OLSResult:
|
|
39
|
+
"""
|
|
40
|
+
普通最小二乘法回归
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
y_data: 因变量数据
|
|
44
|
+
x_data: 自变量数据
|
|
45
|
+
feature_names: 特征名称
|
|
46
|
+
constant: 是否包含常数项
|
|
47
|
+
confidence_level: 置信水平
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
OLSResult: OLS回归结果
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
ValueError: 当输入数据无效时抛出异常
|
|
54
|
+
"""
|
|
55
|
+
# 输入验证
|
|
56
|
+
if not y_data or not x_data:
|
|
57
|
+
raise ValueError("因变量和自变量数据不能为空")
|
|
58
|
+
|
|
59
|
+
# 转换为numpy数组
|
|
60
|
+
y = np.array(y_data, dtype=np.float64)
|
|
61
|
+
|
|
62
|
+
# 确保X是二维数组
|
|
63
|
+
if x_data and isinstance(x_data[0], (int, float)):
|
|
64
|
+
# 单个特征的情况,需要转置
|
|
65
|
+
X = np.array(x_data, dtype=np.float64).reshape(-1, 1)
|
|
66
|
+
else:
|
|
67
|
+
X = np.array(x_data, dtype=np.float64)
|
|
68
|
+
|
|
69
|
+
# 验证数据维度一致性
|
|
70
|
+
if len(y) != X.shape[0]:
|
|
71
|
+
raise ValueError(f"因变量长度({len(y)})与自变量长度({X.shape[0]})不一致")
|
|
72
|
+
|
|
73
|
+
# 检查是否有足够的数据点
|
|
74
|
+
if len(y) < X.shape[1] + (1 if constant else 0):
|
|
75
|
+
raise ValueError("数据点数量不足以估计模型参数")
|
|
76
|
+
|
|
77
|
+
# 检查是否存在缺失值或无穷大值
|
|
78
|
+
if np.isnan(y).any() or np.isnan(X).any():
|
|
79
|
+
raise ValueError("数据中包含缺失值(NaN)")
|
|
80
|
+
|
|
81
|
+
if np.isinf(y).any() or np.isinf(X).any():
|
|
82
|
+
raise ValueError("数据中包含无穷大值")
|
|
83
|
+
|
|
84
|
+
# 添加常数项
|
|
85
|
+
if constant:
|
|
86
|
+
X = sm.add_constant(X)
|
|
87
|
+
if feature_names:
|
|
88
|
+
feature_names = ["const"] + feature_names
|
|
89
|
+
else:
|
|
90
|
+
feature_names = ["const"] + [f"x{i}" for i in range(X.shape[1]-1)]
|
|
91
|
+
else:
|
|
92
|
+
if not feature_names:
|
|
93
|
+
feature_names = [f"x{i}" for i in range(X.shape[1])]
|
|
94
|
+
|
|
95
|
+
# 使用statsmodels执行OLS回归
|
|
96
|
+
try:
|
|
97
|
+
model = sm.OLS(y, X)
|
|
98
|
+
results = model.fit()
|
|
99
|
+
except Exception as e:
|
|
100
|
+
raise ValueError(f"无法拟合OLS模型: {str(e)}")
|
|
101
|
+
|
|
102
|
+
# 提取结果
|
|
103
|
+
coefficients = results.params.tolist()
|
|
104
|
+
std_errors = results.bse.tolist()
|
|
105
|
+
t_values = results.tvalues.tolist()
|
|
106
|
+
p_values = results.pvalues.tolist()
|
|
107
|
+
|
|
108
|
+
# 计算置信区间
|
|
109
|
+
alpha = 1 - confidence_level
|
|
110
|
+
conf_int = results.conf_int(alpha=alpha)
|
|
111
|
+
conf_int_lower = conf_int[:, 0].tolist()
|
|
112
|
+
conf_int_upper = conf_int[:, 1].tolist()
|
|
113
|
+
|
|
114
|
+
# 其他统计量
|
|
115
|
+
r_squared = float(results.rsquared)
|
|
116
|
+
adj_r_squared = float(results.rsquared_adj)
|
|
117
|
+
|
|
118
|
+
# F统计量
|
|
119
|
+
f_statistic = float(results.fvalue) if not np.isnan(results.fvalue) else 0.0
|
|
120
|
+
f_p_value = float(results.f_pvalue) if not np.isnan(results.f_pvalue) else 1.0
|
|
121
|
+
|
|
122
|
+
# 信息准则
|
|
123
|
+
aic = float(results.aic)
|
|
124
|
+
bic = float(results.bic)
|
|
125
|
+
|
|
126
|
+
return OLSResult(
|
|
127
|
+
coefficients=coefficients,
|
|
128
|
+
std_errors=std_errors,
|
|
129
|
+
t_values=t_values,
|
|
130
|
+
p_values=p_values,
|
|
131
|
+
conf_int_lower=conf_int_lower,
|
|
132
|
+
conf_int_upper=conf_int_upper,
|
|
133
|
+
r_squared=r_squared,
|
|
134
|
+
adj_r_squared=adj_r_squared,
|
|
135
|
+
f_statistic=f_statistic,
|
|
136
|
+
f_p_value=f_p_value,
|
|
137
|
+
aic=aic,
|
|
138
|
+
bic=bic,
|
|
139
|
+
n_obs=int(results.nobs),
|
|
140
|
+
feature_names=feature_names
|
|
141
|
+
)
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# 模型规范、诊断和稳健推断工具
|
|
2
|
+
|
|
3
|
+
本模块提供了完整的模型规范检验、诊断测试和稳健推断方法工具集。
|
|
4
|
+
|
|
5
|
+
## 工具列表
|
|
6
|
+
|
|
7
|
+
### 1. 模型诊断检验 (Model Diagnostic Tests)
|
|
8
|
+
**工具名称**: `model_diagnostic_tests`
|
|
9
|
+
|
|
10
|
+
**功能**: 执行综合的模型诊断测试,包括:
|
|
11
|
+
- 异方差检验(Breusch-Pagan、White检验)
|
|
12
|
+
- 自相关检验(Durbin-Watson检验)
|
|
13
|
+
- 正态性检验(Jarque-Bera检验)
|
|
14
|
+
- 多重共线性诊断(方差膨胀因子VIF)
|
|
15
|
+
|
|
16
|
+
**使用场景**:
|
|
17
|
+
- OLS回归后的模型验证
|
|
18
|
+
- 检测模型假设是否满足
|
|
19
|
+
- 识别数据质量问题
|
|
20
|
+
|
|
21
|
+
### 2. 广义最小二乘法 (Generalized Least Squares - GLS)
|
|
22
|
+
**工具名称**: `generalized_least_squares`
|
|
23
|
+
|
|
24
|
+
**功能**: 处理异方差性和自相关的回归方法
|
|
25
|
+
|
|
26
|
+
**主要特点**:
|
|
27
|
+
- 可指定误差项协方差矩阵
|
|
28
|
+
- 在满足GLS假设时比OLS更有效
|
|
29
|
+
- 适用于存在异方差或自相关的数据
|
|
30
|
+
|
|
31
|
+
**使用场景**:
|
|
32
|
+
- 时间序列数据回归
|
|
33
|
+
- 存在已知异方差模式的数据
|
|
34
|
+
|
|
35
|
+
### 3. 加权最小二乘法 (Weighted Least Squares - WLS)
|
|
36
|
+
**工具名称**: `weighted_least_squares`
|
|
37
|
+
|
|
38
|
+
**功能**: 使用权重处理已知异方差性的回归方法
|
|
39
|
+
|
|
40
|
+
**主要特点**:
|
|
41
|
+
- 需要提供观测值权重
|
|
42
|
+
- 权重通常为方差的倒数
|
|
43
|
+
- 适用于分组数据或调查数据
|
|
44
|
+
|
|
45
|
+
**使用场景**:
|
|
46
|
+
- 调查数据分析
|
|
47
|
+
- 分组数据回归
|
|
48
|
+
- 已知误差方差的数据
|
|
49
|
+
|
|
50
|
+
### 4. 稳健标准误回归 (Robust Standard Errors)
|
|
51
|
+
**工具名称**: `robust_errors_regression`
|
|
52
|
+
|
|
53
|
+
**功能**: 计算异方差稳健的标准误
|
|
54
|
+
|
|
55
|
+
**主要特点**:
|
|
56
|
+
- 支持多种协方差矩阵类型(HC0、HC1、HC2、HC3)
|
|
57
|
+
- 不改变系数估计,只调整标准误
|
|
58
|
+
- 在存在异方差时提供有效推断
|
|
59
|
+
|
|
60
|
+
**使用场景**:
|
|
61
|
+
- 横截面数据分析
|
|
62
|
+
- 异方差问题明显但形式未知
|
|
63
|
+
- 需要稳健推断的场景
|
|
64
|
+
|
|
65
|
+
### 5. 模型选择准则 (Model Selection Criteria)
|
|
66
|
+
**工具名称**: `model_selection_criteria`
|
|
67
|
+
|
|
68
|
+
**功能**: 计算多种模型选择信息准则
|
|
69
|
+
|
|
70
|
+
**提供指标**:
|
|
71
|
+
- AIC(赤池信息准则)
|
|
72
|
+
- BIC(贝叶斯信息准则)
|
|
73
|
+
- HQIC(汉南-奎因信息准则)
|
|
74
|
+
- 交叉验证得分(可选)
|
|
75
|
+
|
|
76
|
+
**使用场景**:
|
|
77
|
+
- 比较不同模型规格
|
|
78
|
+
- 变量选择
|
|
79
|
+
- 确定最优模型
|
|
80
|
+
|
|
81
|
+
### 6. 正则化回归 (Regularized Regression)
|
|
82
|
+
**工具名称**: `regularized_regression`
|
|
83
|
+
|
|
84
|
+
**功能**: 处理多重共线性和高维数据的正则化方法
|
|
85
|
+
|
|
86
|
+
**支持方法**:
|
|
87
|
+
- 岭回归(Ridge):L2惩罚
|
|
88
|
+
- LASSO:L1惩罚,可进行变量选择
|
|
89
|
+
- 弹性网络(Elastic Net):L1和L2的组合
|
|
90
|
+
|
|
91
|
+
**使用场景**:
|
|
92
|
+
- 高维数据回归
|
|
93
|
+
- 变量选择
|
|
94
|
+
- 处理多重共线性
|
|
95
|
+
|
|
96
|
+
### 7. 联立方程模型 (Simultaneous Equations Model)
|
|
97
|
+
**工具名称**: `simultaneous_equations_model`
|
|
98
|
+
|
|
99
|
+
**功能**: 两阶段最小二乘法(2SLS)处理联立方程系统
|
|
100
|
+
|
|
101
|
+
**主要特点**:
|
|
102
|
+
- 处理内生性问题
|
|
103
|
+
- 需要有效的工具变量
|
|
104
|
+
- 支持多方程系统
|
|
105
|
+
|
|
106
|
+
**使用场景**:
|
|
107
|
+
- 供需模型
|
|
108
|
+
- 宏观经济模型
|
|
109
|
+
- 存在双向因果关系的模型
|
|
110
|
+
|
|
111
|
+
## 使用示例
|
|
112
|
+
|
|
113
|
+
### 诊断检验示例
|
|
114
|
+
```python
|
|
115
|
+
# 使用MCP工具
|
|
116
|
+
{
|
|
117
|
+
"y_data": [1.0, 2.0, 3.0, 4.0, 5.0],
|
|
118
|
+
"x_data": [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5.0, 6.0]],
|
|
119
|
+
"feature_names": ["x1", "x2"],
|
|
120
|
+
"constant": true
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### 稳健标准误回归示例
|
|
125
|
+
```python
|
|
126
|
+
{
|
|
127
|
+
"y_data": [1.0, 2.0, 3.0, 4.0, 5.0],
|
|
128
|
+
"x_data": [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5.0, 6.0]],
|
|
129
|
+
"cov_type": "HC1",
|
|
130
|
+
"confidence_level": 0.95
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### 正则化回归示例
|
|
135
|
+
```python
|
|
136
|
+
{
|
|
137
|
+
"y_data": [1.0, 2.0, 3.0, 4.0, 5.0],
|
|
138
|
+
"x_data": [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5.0, 6.0]],
|
|
139
|
+
"method": "ridge",
|
|
140
|
+
"alpha": 1.0
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## 技术细节
|
|
145
|
+
|
|
146
|
+
### 实现架构
|
|
147
|
+
- **核心算法**: 位于各子模块的 `*_model.py` 文件
|
|
148
|
+
- **MCP适配器**: `tools/model_specification_adapter.py`
|
|
149
|
+
- **工具注册**: `tools/mcp_tool_groups/model_specification_tools.py`
|
|
150
|
+
|
|
151
|
+
### 依赖库
|
|
152
|
+
- `statsmodels`: 用于统计模型和诊断检验
|
|
153
|
+
- `scikit-learn`: 用于正则化方法
|
|
154
|
+
- `linearmodels`: 用于联立方程模型
|
|
155
|
+
- `numpy`, `pandas`: 基础数据处理
|
|
156
|
+
|
|
157
|
+
### 数据格式支持
|
|
158
|
+
- **输入**: JSON、CSV、Excel、TXT
|
|
159
|
+
- **输出**: JSON、Markdown、HTML
|
|
160
|
+
|
|
161
|
+
## 注意事项
|
|
162
|
+
|
|
163
|
+
1. **诊断检验**: 应在OLS回归后使用,检验模型假设
|
|
164
|
+
2. **GLS/WLS**: 需要正确指定协方差矩阵或权重
|
|
165
|
+
3. **稳健标准误**: 不改变系数估计,仅影响推断
|
|
166
|
+
4. **正则化**: alpha参数需要通过交叉验证选择
|
|
167
|
+
5. **联立方程**: 需要有效且足够数量的工具变量
|
|
168
|
+
|
|
169
|
+
## 贡献者
|
|
170
|
+
AIGroup Economics Team
|
|
171
|
+
|
|
172
|
+
## 许可证
|
|
173
|
+
MIT License
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
模型设定、诊断与稳健推断 (Model Specification, Diagnostics and Robust Inference)
|
|
3
|
+
|
|
4
|
+
当基础模型的理想假设不成立时,修正模型或调整推断;对模型进行诊断和选择。
|
|
5
|
+
|
|
6
|
+
主要方法包括:
|
|
7
|
+
- 稳健标准误(处理异方差/自相关)
|
|
8
|
+
- 广义最小二乘法 (GLS)
|
|
9
|
+
- 加权最小二乘法 (WLS)
|
|
10
|
+
- 岭回归/LASSO/弹性网络(处理多重共线性/高维数据)
|
|
11
|
+
- 联立方程模型(处理双向因果关系)
|
|
12
|
+
|
|
13
|
+
模型诊断:
|
|
14
|
+
- 异方差检验(White、Breusch-Pagan)
|
|
15
|
+
- 自相关检验(Durbin-Watson、Ljung-Box)
|
|
16
|
+
- 正态性检验(Jarque-Bera)
|
|
17
|
+
- 多重共线性诊断(VIF)
|
|
18
|
+
- 内生性检验(Durbin-Wu-Hausman)
|
|
19
|
+
- 残差诊断、影响点分析
|
|
20
|
+
|
|
21
|
+
模型选择:
|
|
22
|
+
- 信息准则(AIC/BIC/HQIC)
|
|
23
|
+
- 交叉验证(K折、留一法)
|
|
24
|
+
- 格兰杰因果检验
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# 导入子模块
|
|
28
|
+
from .robust_errors import (
|
|
29
|
+
RobustErrorsResult,
|
|
30
|
+
robust_errors_regression
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
from .diagnostic_tests import (
|
|
34
|
+
DiagnosticTestsResult,
|
|
35
|
+
diagnostic_tests
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
from .model_selection import (
|
|
39
|
+
ModelSelectionResult,
|
|
40
|
+
model_selection_criteria
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
from .generalized_least_squares import (
|
|
44
|
+
GLSResult,
|
|
45
|
+
gls_regression
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
from .weighted_least_squares import (
|
|
49
|
+
WLSResult,
|
|
50
|
+
wls_regression
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
from .regularization import (
|
|
54
|
+
RegularizationResult,
|
|
55
|
+
regularized_regression
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
from .simultaneous_equations import (
|
|
59
|
+
SimultaneousEquationsResult,
|
|
60
|
+
two_stage_least_squares
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
__all__ = [
|
|
64
|
+
"RobustErrorsResult",
|
|
65
|
+
"robust_errors_regression",
|
|
66
|
+
"DiagnosticTestsResult",
|
|
67
|
+
"diagnostic_tests",
|
|
68
|
+
"ModelSelectionResult",
|
|
69
|
+
"model_selection_criteria",
|
|
70
|
+
"GLSResult",
|
|
71
|
+
"gls_regression",
|
|
72
|
+
"WLSResult",
|
|
73
|
+
"wls_regression",
|
|
74
|
+
"RegularizationResult",
|
|
75
|
+
"regularized_regression",
|
|
76
|
+
"SimultaneousEquationsResult",
|
|
77
|
+
"two_stage_least_squares"
|
|
78
|
+
]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
模型诊断测试 (Diagnostic Tests) 模块
|
|
3
|
+
|
|
4
|
+
包括各种统计检验方法:
|
|
5
|
+
- 异方差检验(White、Breusch-Pagan)
|
|
6
|
+
- 自相关检验(Durbin-Watson、Ljung-Box)
|
|
7
|
+
- 正态性检验(Jarque-Bera)
|
|
8
|
+
- 多重共线性诊断(VIF)
|
|
9
|
+
- 内生性检验(Durbin-Wu-Hausman)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .diagnostic_tests_model import (
|
|
13
|
+
DiagnosticTestsResult,
|
|
14
|
+
diagnostic_tests
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"DiagnosticTestsResult",
|
|
19
|
+
"diagnostic_tests"
|
|
20
|
+
]
|