aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PKG-INFO +344 -322
- README.md +335 -320
- __init__.py +1 -1
- aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
- aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
- cli.py +4 -0
- econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
- econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
- econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
- econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
- econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
- econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
- econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
- econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
- econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
- econometrics/causal_inference/__init__.py +66 -0
- econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
- econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
- econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
- econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
- econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
- econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
- econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
- econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
- econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
- econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
- econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
- econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
- econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
- econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
- econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
- econometrics/distribution_analysis/__init__.py +28 -0
- econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
- econometrics/distribution_analysis/time_series_decomposition.py +152 -0
- econometrics/distribution_analysis/variance_decomposition.py +179 -0
- econometrics/missing_data/__init__.py +18 -0
- econometrics/missing_data/imputation_methods.py +219 -0
- econometrics/nonparametric/__init__.py +35 -0
- econometrics/nonparametric/gam_model.py +117 -0
- econometrics/nonparametric/kernel_regression.py +161 -0
- econometrics/nonparametric/quantile_regression.py +249 -0
- econometrics/nonparametric/spline_regression.py +100 -0
- econometrics/spatial_econometrics/__init__.py +68 -0
- econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
- econometrics/spatial_econometrics/gwr_simple.py +154 -0
- econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
- econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
- econometrics/spatial_econometrics/spatial_regression.py +315 -0
- econometrics/spatial_econometrics/spatial_weights.py +226 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
- econometrics/statistical_inference/__init__.py +21 -0
- econometrics/statistical_inference/bootstrap_methods.py +162 -0
- econometrics/statistical_inference/permutation_test.py +177 -0
- econometrics/survival_analysis/__init__.py +18 -0
- econometrics/survival_analysis/survival_models.py +259 -0
- econometrics/tests/causal_inference_tests/__init__.py +3 -0
- econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
- econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
- econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
- econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
- econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
- econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
- econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
- pyproject.toml +9 -2
- server.py +15 -1
- tools/__init__.py +75 -1
- tools/causal_inference_adapter.py +658 -0
- tools/distribution_analysis_adapter.py +121 -0
- tools/gwr_simple_adapter.py +54 -0
- tools/machine_learning_adapter.py +567 -0
- tools/mcp_tool_groups/__init__.py +15 -1
- tools/mcp_tool_groups/causal_inference_tools.py +643 -0
- tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
- tools/mcp_tool_groups/machine_learning_tools.py +422 -0
- tools/mcp_tool_groups/microecon_tools.py +325 -0
- tools/mcp_tool_groups/missing_data_tools.py +117 -0
- tools/mcp_tool_groups/nonparametric_tools.py +225 -0
- tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
- tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
- tools/mcp_tools_registry.py +13 -3
- tools/microecon_adapter.py +412 -0
- tools/missing_data_adapter.py +73 -0
- tools/nonparametric_adapter.py +190 -0
- tools/spatial_econometrics_adapter.py +318 -0
- tools/statistical_inference_adapter.py +90 -0
- tools/survival_analysis_adapter.py +46 -0
- aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
- aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""
|
|
2
|
+
核回归 (Kernel Regression)
|
|
3
|
+
基于 statsmodels.nonparametric 库实现
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Optional, Tuple
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from statsmodels.nonparametric.kernel_regression import KernelReg
|
|
12
|
+
STATSMODELS_AVAILABLE = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
STATSMODELS_AVAILABLE = False
|
|
15
|
+
KernelReg = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class KernelRegressionResult(BaseModel):
|
|
19
|
+
"""核回归结果"""
|
|
20
|
+
fitted_values: List[float] = Field(..., description="拟合值")
|
|
21
|
+
residuals: List[float] = Field(..., description="残差")
|
|
22
|
+
bandwidth: List[float] = Field(..., description="带宽参数")
|
|
23
|
+
kernel_type: str = Field(..., description="核函数类型")
|
|
24
|
+
n_observations: int = Field(..., description="观测数量")
|
|
25
|
+
n_predictors: int = Field(..., description="预测变量数量")
|
|
26
|
+
r_squared: float = Field(..., description="R²统计量")
|
|
27
|
+
aic: Optional[float] = Field(None, description="AIC信息准则")
|
|
28
|
+
summary: str = Field(..., description="摘要信息")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def kernel_regression(
|
|
32
|
+
y_data: List[float],
|
|
33
|
+
x_data: List[List[float]],
|
|
34
|
+
kernel_type: str = "gaussian",
|
|
35
|
+
bandwidth: Optional[List[float]] = None,
|
|
36
|
+
bandwidth_method: str = "cv_ls",
|
|
37
|
+
variable_type: Optional[str] = None
|
|
38
|
+
) -> KernelRegressionResult:
|
|
39
|
+
"""
|
|
40
|
+
核回归估计
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
y_data: 因变量
|
|
44
|
+
x_data: 自变量(二维列表)
|
|
45
|
+
kernel_type: 核函数类型 - "gaussian"(高斯), "epanechnikov"(Epanechnikov核),
|
|
46
|
+
"uniform"(均匀核), "triangular"(三角核), "biweight"(双权核)
|
|
47
|
+
bandwidth: 带宽参数(每个变量一个),如果为None则自动选择
|
|
48
|
+
bandwidth_method: 带宽选择方法 - "cv_ls"(交叉验证最小二乘),
|
|
49
|
+
"aic"(AIC准则), "normal_reference"(正态参考)
|
|
50
|
+
variable_type: 变量类型 - None(全部连续), "c"(连续), "u"(无序分类), "o"(有序分类)
|
|
51
|
+
可以是字符串(如 "cco"表示3个变量:连续、连续、有序)
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
KernelRegressionResult: 核回归结果
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
ImportError: statsmodels库未安装
|
|
58
|
+
ValueError: 输入数据无效
|
|
59
|
+
"""
|
|
60
|
+
if not STATSMODELS_AVAILABLE:
|
|
61
|
+
raise ImportError(
|
|
62
|
+
"statsmodels库未安装。请运行: pip install statsmodels"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# 输入验证
|
|
66
|
+
if not y_data or not x_data:
|
|
67
|
+
raise ValueError("y_data和x_data不能为空")
|
|
68
|
+
|
|
69
|
+
# 数据准备
|
|
70
|
+
y = np.array(y_data, dtype=np.float64)
|
|
71
|
+
X = np.array(x_data, dtype=np.float64)
|
|
72
|
+
|
|
73
|
+
# 确保X是二维数组
|
|
74
|
+
if X.ndim == 1:
|
|
75
|
+
X = X.reshape(-1, 1)
|
|
76
|
+
|
|
77
|
+
n = len(y)
|
|
78
|
+
k = X.shape[1]
|
|
79
|
+
|
|
80
|
+
# 数据验证
|
|
81
|
+
if len(y) != X.shape[0]:
|
|
82
|
+
raise ValueError(f"因变量长度({len(y)})与自变量长度({X.shape[0]})不一致")
|
|
83
|
+
|
|
84
|
+
# 变量类型设置
|
|
85
|
+
if variable_type is None:
|
|
86
|
+
var_type = 'c' * k # 默认全部为连续变量
|
|
87
|
+
else:
|
|
88
|
+
var_type = variable_type
|
|
89
|
+
if len(var_type) != k:
|
|
90
|
+
raise ValueError(f"variable_type长度({len(var_type)})与自变量数量({k})不一致")
|
|
91
|
+
|
|
92
|
+
# 构建核回归模型
|
|
93
|
+
try:
|
|
94
|
+
if bandwidth is None:
|
|
95
|
+
# 自动选择带宽
|
|
96
|
+
kr = KernelReg(
|
|
97
|
+
endog=y,
|
|
98
|
+
exog=X,
|
|
99
|
+
var_type=var_type,
|
|
100
|
+
reg_type='ll', # 局部线性回归
|
|
101
|
+
bw=bandwidth_method
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
# 使用指定带宽
|
|
105
|
+
if len(bandwidth) != k:
|
|
106
|
+
raise ValueError(f"bandwidth长度({len(bandwidth)})与自变量数量({k})不一致")
|
|
107
|
+
kr = KernelReg(
|
|
108
|
+
endog=y,
|
|
109
|
+
exog=X,
|
|
110
|
+
var_type=var_type,
|
|
111
|
+
reg_type='ll',
|
|
112
|
+
bw=np.array(bandwidth)
|
|
113
|
+
)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
raise ValueError(f"核回归模型构建失败: {str(e)}")
|
|
116
|
+
|
|
117
|
+
# 拟合值
|
|
118
|
+
fitted_values, _ = kr.fit(X)
|
|
119
|
+
fitted_values = fitted_values.flatten()
|
|
120
|
+
|
|
121
|
+
# 残差
|
|
122
|
+
residuals = y - fitted_values
|
|
123
|
+
|
|
124
|
+
# 带宽
|
|
125
|
+
bw = kr.bw.tolist() if hasattr(kr.bw, 'tolist') else [float(kr.bw)]
|
|
126
|
+
|
|
127
|
+
# R²
|
|
128
|
+
ss_res = np.sum(residuals ** 2)
|
|
129
|
+
ss_tot = np.sum((y - np.mean(y)) ** 2)
|
|
130
|
+
r_squared = float(1 - ss_res / ss_tot) if ss_tot > 0 else 0.0
|
|
131
|
+
|
|
132
|
+
# AIC(近似计算)
|
|
133
|
+
try:
|
|
134
|
+
log_likelihood = -0.5 * n * (np.log(2 * np.pi) + np.log(ss_res / n) + 1)
|
|
135
|
+
aic = float(2 * k - 2 * log_likelihood)
|
|
136
|
+
except:
|
|
137
|
+
aic = None
|
|
138
|
+
|
|
139
|
+
# 生成摘要
|
|
140
|
+
summary = f"""核回归分析:
|
|
141
|
+
- 观测数量: {n}
|
|
142
|
+
- 预测变量: {k}
|
|
143
|
+
- 核函数: {kernel_type}
|
|
144
|
+
- 带宽: {[f'{b:.4f}' for b in bw]}
|
|
145
|
+
- 带宽方法: {bandwidth_method}
|
|
146
|
+
- R²: {r_squared:.4f}
|
|
147
|
+
"""
|
|
148
|
+
if aic is not None:
|
|
149
|
+
summary += f"- AIC: {aic:.2f}\n"
|
|
150
|
+
|
|
151
|
+
return KernelRegressionResult(
|
|
152
|
+
fitted_values=fitted_values.tolist(),
|
|
153
|
+
residuals=residuals.tolist(),
|
|
154
|
+
bandwidth=bw,
|
|
155
|
+
kernel_type=kernel_type,
|
|
156
|
+
n_observations=n,
|
|
157
|
+
n_predictors=k,
|
|
158
|
+
r_squared=r_squared,
|
|
159
|
+
aic=aic,
|
|
160
|
+
summary=summary
|
|
161
|
+
)
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""
|
|
2
|
+
分位数回归 (Quantile Regression)
|
|
3
|
+
基于 statsmodels.regression.quantile_regression 库实现
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Optional, Dict
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import statsmodels.api as sm
|
|
12
|
+
from statsmodels.regression.quantile_regression import QuantReg
|
|
13
|
+
STATSMODELS_AVAILABLE = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
STATSMODELS_AVAILABLE = False
|
|
16
|
+
QuantReg = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class QuantileRegressionResult(BaseModel):
|
|
20
|
+
"""分位数回归结果"""
|
|
21
|
+
quantile: float = Field(..., description="分位数水平")
|
|
22
|
+
coefficients: List[float] = Field(..., description="回归系数")
|
|
23
|
+
std_errors: List[float] = Field(..., description="标准误")
|
|
24
|
+
t_values: List[float] = Field(..., description="t统计量")
|
|
25
|
+
p_values: List[float] = Field(..., description="p值")
|
|
26
|
+
conf_int_lower: List[float] = Field(..., description="置信区间下界")
|
|
27
|
+
conf_int_upper: List[float] = Field(..., description="置信区间上界")
|
|
28
|
+
feature_names: List[str] = Field(..., description="特征名称")
|
|
29
|
+
pseudo_r_squared: float = Field(..., description="伪R²")
|
|
30
|
+
n_observations: int = Field(..., description="观测数量")
|
|
31
|
+
summary: str = Field(..., description="摘要信息")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MultiQuantileResult(BaseModel):
|
|
35
|
+
"""多分位数回归结果"""
|
|
36
|
+
quantiles: List[float] = Field(..., description="分位数水平列表")
|
|
37
|
+
coefficients_by_quantile: Dict[str, List[float]] = Field(..., description="各分位数的系数")
|
|
38
|
+
feature_names: List[str] = Field(..., description="特征名称")
|
|
39
|
+
n_observations: int = Field(..., description="观测数量")
|
|
40
|
+
summary: str = Field(..., description="摘要信息")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def quantile_regression(
|
|
44
|
+
y_data: List[float],
|
|
45
|
+
x_data: List[List[float]],
|
|
46
|
+
quantile: float = 0.5,
|
|
47
|
+
feature_names: Optional[List[str]] = None,
|
|
48
|
+
confidence_level: float = 0.95
|
|
49
|
+
) -> QuantileRegressionResult:
|
|
50
|
+
"""
|
|
51
|
+
分位数回归
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
y_data: 因变量
|
|
55
|
+
x_data: 自变量(二维列表)
|
|
56
|
+
quantile: 分位数水平(0-1之间),默认0.5为中位数回归
|
|
57
|
+
feature_names: 特征名称
|
|
58
|
+
confidence_level: 置信水平
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
QuantileRegressionResult: 分位数回归结果
|
|
62
|
+
|
|
63
|
+
Raises:
|
|
64
|
+
ImportError: statsmodels库未安装
|
|
65
|
+
ValueError: 输入数据无效
|
|
66
|
+
"""
|
|
67
|
+
if not STATSMODELS_AVAILABLE:
|
|
68
|
+
raise ImportError(
|
|
69
|
+
"statsmodels库未安装。请运行: pip install statsmodels"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# 输入验证
|
|
73
|
+
if not y_data or not x_data:
|
|
74
|
+
raise ValueError("y_data和x_data不能为空")
|
|
75
|
+
|
|
76
|
+
if not 0 < quantile < 1:
|
|
77
|
+
raise ValueError("quantile必须在0和1之间")
|
|
78
|
+
|
|
79
|
+
# 数据准备
|
|
80
|
+
y = np.array(y_data, dtype=np.float64)
|
|
81
|
+
X = np.array(x_data, dtype=np.float64)
|
|
82
|
+
|
|
83
|
+
# 确保X是二维数组
|
|
84
|
+
if X.ndim == 1:
|
|
85
|
+
X = X.reshape(-1, 1)
|
|
86
|
+
|
|
87
|
+
n = len(y)
|
|
88
|
+
k = X.shape[1]
|
|
89
|
+
|
|
90
|
+
# 数据验证
|
|
91
|
+
if len(y) != X.shape[0]:
|
|
92
|
+
raise ValueError(f"因变量长度({len(y)})与自变量长度({X.shape[0]})不一致")
|
|
93
|
+
|
|
94
|
+
# 添加常数项
|
|
95
|
+
X_with_const = sm.add_constant(X)
|
|
96
|
+
|
|
97
|
+
# 特征名称
|
|
98
|
+
if feature_names is None:
|
|
99
|
+
feature_names = [f"X{i+1}" for i in range(k)]
|
|
100
|
+
all_feature_names = ["const"] + feature_names
|
|
101
|
+
|
|
102
|
+
# 构建并拟合分位数回归模型
|
|
103
|
+
try:
|
|
104
|
+
model = QuantReg(y, X_with_const)
|
|
105
|
+
results = model.fit(q=quantile)
|
|
106
|
+
except Exception as e:
|
|
107
|
+
raise ValueError(f"分位数回归拟合失败: {str(e)}")
|
|
108
|
+
|
|
109
|
+
# 提取结果
|
|
110
|
+
coefficients = results.params.tolist()
|
|
111
|
+
|
|
112
|
+
# 标准误(使用稳健标准误)
|
|
113
|
+
try:
|
|
114
|
+
# 尝试使用稳健标准误
|
|
115
|
+
std_errors = results.bse.tolist()
|
|
116
|
+
except:
|
|
117
|
+
# 如果失败,使用常规标准误
|
|
118
|
+
std_errors = [0.0] * len(coefficients)
|
|
119
|
+
|
|
120
|
+
# t统计量和p值
|
|
121
|
+
try:
|
|
122
|
+
t_values = results.tvalues.tolist()
|
|
123
|
+
p_values = results.pvalues.tolist()
|
|
124
|
+
except:
|
|
125
|
+
t_values = [0.0] * len(coefficients)
|
|
126
|
+
p_values = [1.0] * len(coefficients)
|
|
127
|
+
|
|
128
|
+
# 置信区间
|
|
129
|
+
try:
|
|
130
|
+
alpha = 1 - confidence_level
|
|
131
|
+
conf_int = results.conf_int(alpha=alpha)
|
|
132
|
+
conf_int_lower = conf_int.iloc[:, 0].tolist()
|
|
133
|
+
conf_int_upper = conf_int.iloc[:, 1].tolist()
|
|
134
|
+
except:
|
|
135
|
+
conf_int_lower = [c - 1.96 * se for c, se in zip(coefficients, std_errors)]
|
|
136
|
+
conf_int_upper = [c + 1.96 * se for c, se in zip(coefficients, std_errors)]
|
|
137
|
+
|
|
138
|
+
# 伪R²
|
|
139
|
+
try:
|
|
140
|
+
pseudo_r_squared = float(results.prsquared)
|
|
141
|
+
except:
|
|
142
|
+
pseudo_r_squared = 0.0
|
|
143
|
+
|
|
144
|
+
# 生成摘要
|
|
145
|
+
summary = f"""分位数回归分析:
|
|
146
|
+
- 分位数τ: {quantile}
|
|
147
|
+
- 观测数量: {n}
|
|
148
|
+
- 协变量数: {k}
|
|
149
|
+
- 伪R²: {pseudo_r_squared:.4f}
|
|
150
|
+
|
|
151
|
+
系数估计:
|
|
152
|
+
"""
|
|
153
|
+
for i, (name, coef, se, t, p) in enumerate(zip(
|
|
154
|
+
all_feature_names, coefficients, std_errors, t_values, p_values
|
|
155
|
+
)):
|
|
156
|
+
sig = "***" if p < 0.01 else "**" if p < 0.05 else "*" if p < 0.10 else ""
|
|
157
|
+
summary += f" {name}: {coef:.4f} (SE: {se:.4f}, t={t:.2f}, p={p:.4f}){sig}\n"
|
|
158
|
+
|
|
159
|
+
return QuantileRegressionResult(
|
|
160
|
+
quantile=quantile,
|
|
161
|
+
coefficients=coefficients,
|
|
162
|
+
std_errors=std_errors,
|
|
163
|
+
t_values=t_values,
|
|
164
|
+
p_values=p_values,
|
|
165
|
+
conf_int_lower=conf_int_lower,
|
|
166
|
+
conf_int_upper=conf_int_upper,
|
|
167
|
+
feature_names=all_feature_names,
|
|
168
|
+
pseudo_r_squared=pseudo_r_squared,
|
|
169
|
+
n_observations=n,
|
|
170
|
+
summary=summary
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def multi_quantile_regression(
|
|
175
|
+
y_data: List[float],
|
|
176
|
+
x_data: List[List[float]],
|
|
177
|
+
quantiles: List[float] = [0.1, 0.25, 0.5, 0.75, 0.9],
|
|
178
|
+
feature_names: Optional[List[str]] = None
|
|
179
|
+
) -> MultiQuantileResult:
|
|
180
|
+
"""
|
|
181
|
+
多分位数回归
|
|
182
|
+
同时估计多个分位数水平的回归系数
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
y_data: 因变量
|
|
186
|
+
x_data: 自变量
|
|
187
|
+
quantiles: 分位数水平列表
|
|
188
|
+
feature_names: 特征名称
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
MultiQuantileResult: 多分位数回归结果
|
|
192
|
+
"""
|
|
193
|
+
if not STATSMODELS_AVAILABLE:
|
|
194
|
+
raise ImportError("statsmodels库未安装")
|
|
195
|
+
|
|
196
|
+
# 输入验证
|
|
197
|
+
if not y_data or not x_data:
|
|
198
|
+
raise ValueError("y_data和x_data不能为空")
|
|
199
|
+
|
|
200
|
+
# 数据准备
|
|
201
|
+
y = np.array(y_data, dtype=np.float64)
|
|
202
|
+
X = np.array(x_data, dtype=np.float64)
|
|
203
|
+
|
|
204
|
+
if X.ndim == 1:
|
|
205
|
+
X = X.reshape(-1, 1)
|
|
206
|
+
|
|
207
|
+
n = len(y)
|
|
208
|
+
k = X.shape[1]
|
|
209
|
+
|
|
210
|
+
# 添加常数项
|
|
211
|
+
X_with_const = sm.add_constant(X)
|
|
212
|
+
|
|
213
|
+
# 特征名称
|
|
214
|
+
if feature_names is None:
|
|
215
|
+
feature_names = [f"X{i+1}" for i in range(k)]
|
|
216
|
+
all_feature_names = ["const"] + feature_names
|
|
217
|
+
|
|
218
|
+
# 对每个分位数进行回归
|
|
219
|
+
coefficients_by_quantile = {}
|
|
220
|
+
|
|
221
|
+
for q in quantiles:
|
|
222
|
+
try:
|
|
223
|
+
model = QuantReg(y, X_with_const)
|
|
224
|
+
results = model.fit(q=q)
|
|
225
|
+
coefficients_by_quantile[f"τ={q}"] = results.params.tolist()
|
|
226
|
+
except Exception as e:
|
|
227
|
+
coefficients_by_quantile[f"τ={q}"] = [np.nan] * (k + 1)
|
|
228
|
+
|
|
229
|
+
# 生成摘要
|
|
230
|
+
summary = f"""多分位数回归分析:
|
|
231
|
+
- 观测数量: {n}
|
|
232
|
+
- 协变量数: {k}
|
|
233
|
+
- 分位数: {quantiles}
|
|
234
|
+
|
|
235
|
+
各分位数的系数估计:
|
|
236
|
+
"""
|
|
237
|
+
for name_idx, name in enumerate(all_feature_names):
|
|
238
|
+
summary += f"\n{name}:\n"
|
|
239
|
+
for q in quantiles:
|
|
240
|
+
coef = coefficients_by_quantile[f"τ={q}"][name_idx]
|
|
241
|
+
summary += f" τ={q}: {coef:.4f}\n"
|
|
242
|
+
|
|
243
|
+
return MultiQuantileResult(
|
|
244
|
+
quantiles=quantiles,
|
|
245
|
+
coefficients_by_quantile=coefficients_by_quantile,
|
|
246
|
+
feature_names=all_feature_names,
|
|
247
|
+
n_observations=n,
|
|
248
|
+
summary=summary
|
|
249
|
+
)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
样条回归
|
|
3
|
+
基于 sklearn 和 scipy 实现
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from sklearn.preprocessing import SplineTransformer
|
|
12
|
+
from sklearn.linear_model import LinearRegression
|
|
13
|
+
from sklearn.pipeline import Pipeline
|
|
14
|
+
SKLEARN_AVAILABLE = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
SKLEARN_AVAILABLE = False
|
|
17
|
+
SplineTransformer = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SplineRegressionResult(BaseModel):
|
|
21
|
+
"""样条回归结果"""
|
|
22
|
+
fitted_values: List[float] = Field(..., description="拟合值")
|
|
23
|
+
residuals: List[float] = Field(..., description="残差")
|
|
24
|
+
coefficients: List[float] = Field(..., description="样条基函数系数")
|
|
25
|
+
n_knots: int = Field(..., description="节点数")
|
|
26
|
+
degree: int = Field(..., description="样条次数")
|
|
27
|
+
r_squared: float = Field(..., description="R²")
|
|
28
|
+
n_observations: int = Field(..., description="观测数量")
|
|
29
|
+
summary: str = Field(..., description="摘要信息")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def spline_regression(
|
|
33
|
+
y_data: List[float],
|
|
34
|
+
x_data: List[float],
|
|
35
|
+
n_knots: int = 5,
|
|
36
|
+
degree: int = 3,
|
|
37
|
+
knots: str = "uniform"
|
|
38
|
+
) -> SplineRegressionResult:
|
|
39
|
+
"""
|
|
40
|
+
样条回归
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
y_data: 因变量
|
|
44
|
+
x_data: 自变量(单变量)
|
|
45
|
+
n_knots: 节点数量
|
|
46
|
+
degree: 样条次数(通常3表示三次样条)
|
|
47
|
+
knots: 节点分布 - "uniform"(均匀), "quantile"(分位数)
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
SplineRegressionResult: 样条回归结果
|
|
51
|
+
"""
|
|
52
|
+
if not SKLEARN_AVAILABLE:
|
|
53
|
+
raise ImportError("sklearn库未安装。请运行: pip install scikit-learn")
|
|
54
|
+
|
|
55
|
+
# 数据准备
|
|
56
|
+
y = np.array(y_data, dtype=np.float64)
|
|
57
|
+
X = np.array(x_data, dtype=np.float64).reshape(-1, 1)
|
|
58
|
+
|
|
59
|
+
n = len(y)
|
|
60
|
+
|
|
61
|
+
# 创建样条转换器+线性回归管道
|
|
62
|
+
pipeline = Pipeline([
|
|
63
|
+
('spline', SplineTransformer(n_knots=n_knots, degree=degree, knots=knots)),
|
|
64
|
+
('linear', LinearRegression())
|
|
65
|
+
])
|
|
66
|
+
|
|
67
|
+
# 拟合模型
|
|
68
|
+
pipeline.fit(X, y)
|
|
69
|
+
|
|
70
|
+
# 预测
|
|
71
|
+
y_pred = pipeline.predict(X)
|
|
72
|
+
|
|
73
|
+
# 残差和R²
|
|
74
|
+
residuals = y - y_pred
|
|
75
|
+
ss_res = np.sum(residuals ** 2)
|
|
76
|
+
ss_tot = np.sum((y - y.mean()) ** 2)
|
|
77
|
+
r_squared = float(1 - ss_res / ss_tot) if ss_tot > 0 else 0.0
|
|
78
|
+
|
|
79
|
+
# 系数
|
|
80
|
+
coefficients = pipeline.named_steps['linear'].coef_.tolist()
|
|
81
|
+
|
|
82
|
+
summary = f"""样条回归:
|
|
83
|
+
- 观测数量: {n}
|
|
84
|
+
- 节点数: {n_knots}
|
|
85
|
+
- 样条次数: {degree}
|
|
86
|
+
- 节点分布: {knots}
|
|
87
|
+
- R²: {r_squared:.4f}
|
|
88
|
+
- 样条基函数数量: {len(coefficients)}
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
return SplineRegressionResult(
|
|
92
|
+
fitted_values=y_pred.tolist(),
|
|
93
|
+
residuals=residuals.tolist(),
|
|
94
|
+
coefficients=coefficients,
|
|
95
|
+
n_knots=n_knots,
|
|
96
|
+
degree=degree,
|
|
97
|
+
r_squared=r_squared,
|
|
98
|
+
n_observations=n,
|
|
99
|
+
summary=summary
|
|
100
|
+
)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
空间计量经济学模块
|
|
3
|
+
处理空间依赖性和空间异质性
|
|
4
|
+
|
|
5
|
+
主要功能:
|
|
6
|
+
1. 空间权重矩阵构建
|
|
7
|
+
2. 空间自相关检验(Moran's I, Geary's C, Local LISA)
|
|
8
|
+
3. 空间回归模型(SAR, SEM, SDM)
|
|
9
|
+
4. 地理加权回归(GWR)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
# 空间权重矩阵
|
|
13
|
+
from .spatial_weights import (
|
|
14
|
+
create_spatial_weights,
|
|
15
|
+
SpatialWeightsResult
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# 空间自相关检验
|
|
19
|
+
from .spatial_autocorrelation import (
|
|
20
|
+
morans_i_test,
|
|
21
|
+
gearys_c_test,
|
|
22
|
+
local_morans_i,
|
|
23
|
+
MoranIResult,
|
|
24
|
+
GearysCResult,
|
|
25
|
+
LocalMoranResult
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# 空间回归模型
|
|
29
|
+
from .spatial_regression import (
|
|
30
|
+
spatial_lag_model,
|
|
31
|
+
spatial_error_model,
|
|
32
|
+
SpatialRegressionResult
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# 空间杜宾模型
|
|
36
|
+
from .spatial_durbin_model import (
|
|
37
|
+
spatial_durbin_model,
|
|
38
|
+
SpatialDurbinResult
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# 地理加权回归
|
|
42
|
+
from .geographically_weighted_regression import (
|
|
43
|
+
geographically_weighted_regression,
|
|
44
|
+
GWRResult
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
__all__ = [
|
|
48
|
+
# 空间权重
|
|
49
|
+
'create_spatial_weights',
|
|
50
|
+
'SpatialWeightsResult',
|
|
51
|
+
# 空间自相关
|
|
52
|
+
'morans_i_test',
|
|
53
|
+
'gearys_c_test',
|
|
54
|
+
'local_morans_i',
|
|
55
|
+
'MoranIResult',
|
|
56
|
+
'GearysCResult',
|
|
57
|
+
'LocalMoranResult',
|
|
58
|
+
# 空间回归
|
|
59
|
+
'spatial_lag_model',
|
|
60
|
+
'spatial_error_model',
|
|
61
|
+
'SpatialRegressionResult',
|
|
62
|
+
# 空间杜宾模型
|
|
63
|
+
'spatial_durbin_model',
|
|
64
|
+
'SpatialDurbinResult',
|
|
65
|
+
# 地理加权回归
|
|
66
|
+
'geographically_weighted_regression',
|
|
67
|
+
'GWRResult'
|
|
68
|
+
]
|