aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PKG-INFO +344 -322
- README.md +335 -320
- __init__.py +1 -1
- aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
- aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
- cli.py +4 -0
- econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
- econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
- econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
- econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
- econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
- econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
- econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
- econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
- econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
- econometrics/causal_inference/__init__.py +66 -0
- econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
- econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
- econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
- econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
- econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
- econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
- econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
- econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
- econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
- econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
- econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
- econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
- econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
- econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
- econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
- econometrics/distribution_analysis/__init__.py +28 -0
- econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
- econometrics/distribution_analysis/time_series_decomposition.py +152 -0
- econometrics/distribution_analysis/variance_decomposition.py +179 -0
- econometrics/missing_data/__init__.py +18 -0
- econometrics/missing_data/imputation_methods.py +219 -0
- econometrics/nonparametric/__init__.py +35 -0
- econometrics/nonparametric/gam_model.py +117 -0
- econometrics/nonparametric/kernel_regression.py +161 -0
- econometrics/nonparametric/quantile_regression.py +249 -0
- econometrics/nonparametric/spline_regression.py +100 -0
- econometrics/spatial_econometrics/__init__.py +68 -0
- econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
- econometrics/spatial_econometrics/gwr_simple.py +154 -0
- econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
- econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
- econometrics/spatial_econometrics/spatial_regression.py +315 -0
- econometrics/spatial_econometrics/spatial_weights.py +226 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
- econometrics/statistical_inference/__init__.py +21 -0
- econometrics/statistical_inference/bootstrap_methods.py +162 -0
- econometrics/statistical_inference/permutation_test.py +177 -0
- econometrics/survival_analysis/__init__.py +18 -0
- econometrics/survival_analysis/survival_models.py +259 -0
- econometrics/tests/causal_inference_tests/__init__.py +3 -0
- econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
- econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
- econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
- econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
- econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
- econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
- econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
- pyproject.toml +9 -2
- server.py +15 -1
- tools/__init__.py +75 -1
- tools/causal_inference_adapter.py +658 -0
- tools/distribution_analysis_adapter.py +121 -0
- tools/gwr_simple_adapter.py +54 -0
- tools/machine_learning_adapter.py +567 -0
- tools/mcp_tool_groups/__init__.py +15 -1
- tools/mcp_tool_groups/causal_inference_tools.py +643 -0
- tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
- tools/mcp_tool_groups/machine_learning_tools.py +422 -0
- tools/mcp_tool_groups/microecon_tools.py +325 -0
- tools/mcp_tool_groups/missing_data_tools.py +117 -0
- tools/mcp_tool_groups/nonparametric_tools.py +225 -0
- tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
- tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
- tools/mcp_tools_registry.py +13 -3
- tools/microecon_adapter.py +412 -0
- tools/missing_data_adapter.py +73 -0
- tools/nonparametric_adapter.py +190 -0
- tools/spatial_econometrics_adapter.py +318 -0
- tools/statistical_inference_adapter.py +90 -0
- tools/survival_analysis_adapter.py +46 -0
- aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
- aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""
|
|
2
|
+
空间回归模型
|
|
3
|
+
基于 spreg 库实现空间滞后模型(SAR)和空间误差模型(SEM)
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Optional, Dict, Any
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from spreg import OLS as Spreg_OLS
|
|
12
|
+
from spreg import ML_Lag, ML_Error, GM_Lag, GM_Error
|
|
13
|
+
from libpysal.weights import W
|
|
14
|
+
SPREG_AVAILABLE = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
SPREG_AVAILABLE = False
|
|
17
|
+
ML_Lag = None
|
|
18
|
+
ML_Error = None
|
|
19
|
+
GM_Lag = None
|
|
20
|
+
GM_Error = None
|
|
21
|
+
W = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SpatialRegressionResult(BaseModel):
|
|
25
|
+
"""空间回归模型结果"""
|
|
26
|
+
model_type: str = Field(..., description="模型类型 (SAR/SEM)")
|
|
27
|
+
method: str = Field(..., description="估计方法 (ML/GMM)")
|
|
28
|
+
coefficients: List[float] = Field(..., description="回归系数")
|
|
29
|
+
std_errors: List[float] = Field(..., description="标准误")
|
|
30
|
+
z_scores: List[float] = Field(..., description="Z统计量")
|
|
31
|
+
p_values: List[float] = Field(..., description="P值")
|
|
32
|
+
feature_names: List[str] = Field(..., description="特征名称")
|
|
33
|
+
spatial_param: float = Field(..., description="空间参数(rho或lambda)")
|
|
34
|
+
spatial_param_se: float = Field(..., description="空间参数标准误")
|
|
35
|
+
r_squared: Optional[float] = Field(None, description="伪R方")
|
|
36
|
+
log_likelihood: float = Field(..., description="对数似然值")
|
|
37
|
+
aic: float = Field(..., description="AIC信息准则")
|
|
38
|
+
schwarz: float = Field(..., description="Schwarz准则(BIC)")
|
|
39
|
+
n_observations: int = Field(..., description="观测数量")
|
|
40
|
+
summary: str = Field(..., description="摘要信息")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def spatial_lag_model(
|
|
44
|
+
y_data: List[float],
|
|
45
|
+
x_data: List[List[float]],
|
|
46
|
+
neighbors: dict,
|
|
47
|
+
weights: Optional[dict] = None,
|
|
48
|
+
feature_names: Optional[List[str]] = None,
|
|
49
|
+
method: str = "ml"
|
|
50
|
+
) -> SpatialRegressionResult:
|
|
51
|
+
"""
|
|
52
|
+
空间滞后模型 (Spatial Lag Model - SAR)
|
|
53
|
+
模型形式: y = ρWy + Xβ + ε
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
y_data: 因变量
|
|
57
|
+
x_data: 自变量(二维列表)
|
|
58
|
+
neighbors: 邻居字典
|
|
59
|
+
weights: 权重字典
|
|
60
|
+
feature_names: 特征名称
|
|
61
|
+
method: 估计方法 - "ml"(最大似然) 或 "gmm"(广义矩估计)
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
SpatialRegressionResult: 空间滞后模型结果
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
ImportError: spreg库未安装
|
|
68
|
+
ValueError: 输入数据无效
|
|
69
|
+
"""
|
|
70
|
+
if not SPREG_AVAILABLE:
|
|
71
|
+
raise ImportError(
|
|
72
|
+
"spreg库未安装。请运行: pip install spreg\n"
|
|
73
|
+
"或: pip install pysal"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# 输入验证
|
|
77
|
+
if not y_data or not x_data:
|
|
78
|
+
raise ValueError("y_data和x_data不能为空")
|
|
79
|
+
|
|
80
|
+
# 数据准备
|
|
81
|
+
y = np.array(y_data).reshape(-1, 1)
|
|
82
|
+
X = np.array(x_data)
|
|
83
|
+
|
|
84
|
+
# 确保X是二维数组
|
|
85
|
+
if X.ndim == 1:
|
|
86
|
+
X = X.reshape(-1, 1)
|
|
87
|
+
|
|
88
|
+
n = len(y)
|
|
89
|
+
k = X.shape[1]
|
|
90
|
+
|
|
91
|
+
# 构建权重对象
|
|
92
|
+
if weights is None:
|
|
93
|
+
weights = {i: [1.0] * len(neighbors[i]) for i in neighbors}
|
|
94
|
+
|
|
95
|
+
# 确保邻居字典的键是整数
|
|
96
|
+
neighbors_int = {int(k): [int(n) for n in v] for k, v in neighbors.items()}
|
|
97
|
+
weights_int = {int(k): v for k, v in weights.items()}
|
|
98
|
+
|
|
99
|
+
w = W(neighbors_int, weights_int)
|
|
100
|
+
w.transform = 'r' # 行标准化
|
|
101
|
+
|
|
102
|
+
# 特征名称
|
|
103
|
+
if feature_names is None:
|
|
104
|
+
feature_names = [f"X{i+1}" for i in range(k)]
|
|
105
|
+
|
|
106
|
+
# 估计模型
|
|
107
|
+
if method.lower() == "ml":
|
|
108
|
+
model = ML_Lag(y, X, w, name_y='y', name_x=feature_names)
|
|
109
|
+
elif method.lower() == "gmm":
|
|
110
|
+
model = GM_Lag(y, X, w, name_y='y', name_x=feature_names)
|
|
111
|
+
else:
|
|
112
|
+
raise ValueError(f"不支持的方法: {method}。支持: ml, gmm")
|
|
113
|
+
|
|
114
|
+
# 提取结果
|
|
115
|
+
# 系数包括常数项和自变量系数
|
|
116
|
+
coefficients = model.betas.flatten().tolist()
|
|
117
|
+
|
|
118
|
+
# 标准误、Z值、P值
|
|
119
|
+
std_errors = np.sqrt(np.diag(model.vm)).tolist()
|
|
120
|
+
|
|
121
|
+
# 处理z_stat - 可能是列表或numpy数组
|
|
122
|
+
if hasattr(model.z_stat, 'shape'):
|
|
123
|
+
# numpy数组
|
|
124
|
+
z_scores = model.z_stat[:, 0].tolist()
|
|
125
|
+
p_values = model.z_stat[:, 1].tolist()
|
|
126
|
+
else:
|
|
127
|
+
# 列表
|
|
128
|
+
z_scores = [stat[0] for stat in model.z_stat] if model.z_stat else []
|
|
129
|
+
p_values = [stat[1] for stat in model.z_stat] if model.z_stat else []
|
|
130
|
+
|
|
131
|
+
# 空间参数(rho)
|
|
132
|
+
# 在ML_Lag中,rho是最后一个参数
|
|
133
|
+
spatial_param = float(model.rho)
|
|
134
|
+
|
|
135
|
+
# 尝试获取rho的标准误
|
|
136
|
+
try:
|
|
137
|
+
# rho的标准误通常在vm矩阵的最后一个对角元素
|
|
138
|
+
spatial_param_se = float(np.sqrt(model.vm[-1, -1]))
|
|
139
|
+
except:
|
|
140
|
+
spatial_param_se = 0.0
|
|
141
|
+
|
|
142
|
+
# 伪R方(如果可用)
|
|
143
|
+
try:
|
|
144
|
+
r_squared = float(model.pr2) if hasattr(model, 'pr2') else None
|
|
145
|
+
except:
|
|
146
|
+
r_squared = None
|
|
147
|
+
|
|
148
|
+
# 对数似然值
|
|
149
|
+
log_likelihood = float(model.logll) if hasattr(model, 'logll') else 0.0
|
|
150
|
+
|
|
151
|
+
# 信息准则
|
|
152
|
+
aic = float(model.aic) if hasattr(model, 'aic') else 0.0
|
|
153
|
+
schwarz = float(model.schwarz) if hasattr(model, 'schwarz') else 0.0
|
|
154
|
+
|
|
155
|
+
# 添加常数项到特征名称
|
|
156
|
+
all_feature_names = ['const'] + feature_names
|
|
157
|
+
|
|
158
|
+
# 生成摘要
|
|
159
|
+
summary = f"""空间滞后模型 (SAR) - {method.upper()}估计:
|
|
160
|
+
- 观测数量: {n}
|
|
161
|
+
- 自变量数: {k}
|
|
162
|
+
- 空间参数 ρ: {spatial_param:.4f} (标准误: {spatial_param_se:.4f})
|
|
163
|
+
- 对数似然: {log_likelihood:.2f}
|
|
164
|
+
- AIC: {aic:.2f}
|
|
165
|
+
- BIC: {schwarz:.2f}
|
|
166
|
+
"""
|
|
167
|
+
if r_squared is not None:
|
|
168
|
+
summary += f"- 伪R²: {r_squared:.4f}\n"
|
|
169
|
+
|
|
170
|
+
return SpatialRegressionResult(
|
|
171
|
+
model_type="SAR",
|
|
172
|
+
method=method.upper(),
|
|
173
|
+
coefficients=coefficients,
|
|
174
|
+
std_errors=std_errors,
|
|
175
|
+
z_scores=z_scores,
|
|
176
|
+
p_values=p_values,
|
|
177
|
+
feature_names=all_feature_names,
|
|
178
|
+
spatial_param=spatial_param,
|
|
179
|
+
spatial_param_se=spatial_param_se,
|
|
180
|
+
r_squared=r_squared,
|
|
181
|
+
log_likelihood=log_likelihood,
|
|
182
|
+
aic=aic,
|
|
183
|
+
schwarz=schwarz,
|
|
184
|
+
n_observations=n,
|
|
185
|
+
summary=summary
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def spatial_error_model(
|
|
190
|
+
y_data: List[float],
|
|
191
|
+
x_data: List[List[float]],
|
|
192
|
+
neighbors: dict,
|
|
193
|
+
weights: Optional[dict] = None,
|
|
194
|
+
feature_names: Optional[List[str]] = None,
|
|
195
|
+
method: str = "ml"
|
|
196
|
+
) -> SpatialRegressionResult:
|
|
197
|
+
"""
|
|
198
|
+
空间误差模型 (Spatial Error Model - SEM)
|
|
199
|
+
模型形式: y = Xβ + u, u = λWu + ε
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
y_data: 因变量
|
|
203
|
+
x_data: 自变量(二维列表)
|
|
204
|
+
neighbors: 邻居字典
|
|
205
|
+
weights: 权重字典
|
|
206
|
+
feature_names: 特征名称
|
|
207
|
+
method: 估计方法 - "ml"(最大似然) 或 "gmm"(广义矩估计)
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
SpatialRegressionResult: 空间误差模型结果
|
|
211
|
+
"""
|
|
212
|
+
if not SPREG_AVAILABLE:
|
|
213
|
+
raise ImportError("spreg库未安装")
|
|
214
|
+
|
|
215
|
+
# 输入验证
|
|
216
|
+
if not y_data or not x_data:
|
|
217
|
+
raise ValueError("y_data和x_data不能为空")
|
|
218
|
+
|
|
219
|
+
# 数据准备
|
|
220
|
+
y = np.array(y_data).reshape(-1, 1)
|
|
221
|
+
X = np.array(x_data)
|
|
222
|
+
|
|
223
|
+
if X.ndim == 1:
|
|
224
|
+
X = X.reshape(-1, 1)
|
|
225
|
+
|
|
226
|
+
n = len(y)
|
|
227
|
+
k = X.shape[1]
|
|
228
|
+
|
|
229
|
+
# 构建权重对象
|
|
230
|
+
if weights is None:
|
|
231
|
+
weights = {i: [1.0] * len(neighbors[i]) for i in neighbors}
|
|
232
|
+
|
|
233
|
+
# 确保邻居字典的键是整数
|
|
234
|
+
neighbors_int = {int(k): [int(n) for n in v] for k, v in neighbors.items()}
|
|
235
|
+
weights_int = {int(k): v for k, v in weights.items()}
|
|
236
|
+
|
|
237
|
+
w = W(neighbors_int, weights_int)
|
|
238
|
+
w.transform = 'r'
|
|
239
|
+
|
|
240
|
+
# 特征名称
|
|
241
|
+
if feature_names is None:
|
|
242
|
+
feature_names = [f"X{i+1}" for i in range(k)]
|
|
243
|
+
|
|
244
|
+
# 估计模型
|
|
245
|
+
if method.lower() == "ml":
|
|
246
|
+
model = ML_Error(y, X, w, name_y='y', name_x=feature_names)
|
|
247
|
+
elif method.lower() == "gmm":
|
|
248
|
+
model = GM_Error(y, X, w, name_y='y', name_x=feature_names)
|
|
249
|
+
else:
|
|
250
|
+
raise ValueError(f"不支持的方法: {method}")
|
|
251
|
+
|
|
252
|
+
# 提取结果
|
|
253
|
+
coefficients = model.betas.flatten().tolist()
|
|
254
|
+
std_errors = np.sqrt(np.diag(model.vm)).tolist()
|
|
255
|
+
|
|
256
|
+
# 处理z_stat - 可能是列表或numpy数组
|
|
257
|
+
if hasattr(model.z_stat, 'shape'):
|
|
258
|
+
# numpy数组
|
|
259
|
+
z_scores = model.z_stat[:, 0].tolist()
|
|
260
|
+
p_values = model.z_stat[:, 1].tolist()
|
|
261
|
+
else:
|
|
262
|
+
# 列表
|
|
263
|
+
z_scores = [stat[0] for stat in model.z_stat] if model.z_stat else []
|
|
264
|
+
p_values = [stat[1] for stat in model.z_stat] if model.z_stat else []
|
|
265
|
+
|
|
266
|
+
# 空间参数(lambda)
|
|
267
|
+
spatial_param = float(model.lam)
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
spatial_param_se = float(np.sqrt(model.vm[-1, -1]))
|
|
271
|
+
except:
|
|
272
|
+
spatial_param_se = 0.0
|
|
273
|
+
|
|
274
|
+
# 伪R方
|
|
275
|
+
try:
|
|
276
|
+
r_squared = float(model.pr2) if hasattr(model, 'pr2') else None
|
|
277
|
+
except:
|
|
278
|
+
r_squared = None
|
|
279
|
+
|
|
280
|
+
# 对数似然值和信息准则
|
|
281
|
+
log_likelihood = float(model.logll) if hasattr(model, 'logll') else 0.0
|
|
282
|
+
aic = float(model.aic) if hasattr(model, 'aic') else 0.0
|
|
283
|
+
schwarz = float(model.schwarz) if hasattr(model, 'schwarz') else 0.0
|
|
284
|
+
|
|
285
|
+
all_feature_names = ['const'] + feature_names
|
|
286
|
+
|
|
287
|
+
# 生成摘要
|
|
288
|
+
summary = f"""空间误差模型 (SEM) - {method.upper()}估计:
|
|
289
|
+
- 观测数量: {n}
|
|
290
|
+
- 自变量数: {k}
|
|
291
|
+
- 空间参数 λ: {spatial_param:.4f} (标准误: {spatial_param_se:.4f})
|
|
292
|
+
- 对数似然: {log_likelihood:.2f}
|
|
293
|
+
- AIC: {aic:.2f}
|
|
294
|
+
- BIC: {schwarz:.2f}
|
|
295
|
+
"""
|
|
296
|
+
if r_squared is not None:
|
|
297
|
+
summary += f"- 伪R²: {r_squared:.4f}\n"
|
|
298
|
+
|
|
299
|
+
return SpatialRegressionResult(
|
|
300
|
+
model_type="SEM",
|
|
301
|
+
method=method.upper(),
|
|
302
|
+
coefficients=coefficients,
|
|
303
|
+
std_errors=std_errors,
|
|
304
|
+
z_scores=z_scores,
|
|
305
|
+
p_values=p_values,
|
|
306
|
+
feature_names=all_feature_names,
|
|
307
|
+
spatial_param=spatial_param,
|
|
308
|
+
spatial_param_se=spatial_param_se,
|
|
309
|
+
r_squared=r_squared,
|
|
310
|
+
log_likelihood=log_likelihood,
|
|
311
|
+
aic=aic,
|
|
312
|
+
schwarz=schwarz,
|
|
313
|
+
n_observations=n,
|
|
314
|
+
summary=summary
|
|
315
|
+
)
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""
|
|
2
|
+
空间权重矩阵构建
|
|
3
|
+
基于 libpysal 库实现多种空间权重矩阵构建方法
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from libpysal.weights import Queen, Rook, KNN, DistanceBand, Kernel
|
|
12
|
+
from libpysal.weights import W
|
|
13
|
+
LIBPYSAL_AVAILABLE = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
LIBPYSAL_AVAILABLE = False
|
|
16
|
+
W = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SpatialWeightsResult(BaseModel):
|
|
20
|
+
"""空间权重矩阵结果"""
|
|
21
|
+
n_observations: int = Field(..., description="观测数量")
|
|
22
|
+
weight_type: str = Field(..., description="权重类型")
|
|
23
|
+
n_neighbors_mean: float = Field(..., description="平均邻居数")
|
|
24
|
+
n_neighbors_min: int = Field(..., description="最小邻居数")
|
|
25
|
+
n_neighbors_max: int = Field(..., description="最大邻居数")
|
|
26
|
+
pct_nonzero: float = Field(..., description="非零权重百分比")
|
|
27
|
+
weights_matrix: List[List[float]] = Field(..., description="权重矩阵(稀疏表示)")
|
|
28
|
+
neighbors: Dict[int, List[int]] = Field(..., description="邻居字典")
|
|
29
|
+
is_symmetric: bool = Field(..., description="是否对称")
|
|
30
|
+
summary: str = Field(..., description="摘要信息")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def create_spatial_weights(
|
|
34
|
+
coordinates: Optional[List[Tuple[float, float]]] = None,
|
|
35
|
+
adjacency_matrix: Optional[List[List[int]]] = None,
|
|
36
|
+
weight_type: str = "queen",
|
|
37
|
+
k: int = 4,
|
|
38
|
+
distance_threshold: Optional[float] = None,
|
|
39
|
+
bandwidth: Optional[float] = None,
|
|
40
|
+
kernel_type: str = "triangular",
|
|
41
|
+
row_standardize: bool = True
|
|
42
|
+
) -> SpatialWeightsResult:
|
|
43
|
+
"""
|
|
44
|
+
创建空间权重矩阵
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
coordinates: 坐标列表 [(x1,y1), (x2,y2), ...]
|
|
48
|
+
adjacency_matrix: 邻接矩阵(用于基于邻接的权重)
|
|
49
|
+
weight_type: 权重类型 - "queen"(皇后邻接), "rook"(车邻接),
|
|
50
|
+
"knn"(K近邻), "distance"(距离带), "kernel"(核权重)
|
|
51
|
+
k: K近邻中的邻居数量
|
|
52
|
+
distance_threshold: 距离带阈值
|
|
53
|
+
bandwidth: 核权重带宽
|
|
54
|
+
kernel_type: 核函数类型
|
|
55
|
+
row_standardize: 是否进行行标准化
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
SpatialWeightsResult: 空间权重矩阵结果
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
ImportError: libpysal库未安装
|
|
62
|
+
ValueError: 输入数据无效
|
|
63
|
+
"""
|
|
64
|
+
if not LIBPYSAL_AVAILABLE:
|
|
65
|
+
raise ImportError(
|
|
66
|
+
"libpysal库未安装。请运行: pip install libpysal\n"
|
|
67
|
+
"或: pip install pysal"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# 输入验证
|
|
71
|
+
if coordinates is None and adjacency_matrix is None:
|
|
72
|
+
raise ValueError("必须提供coordinates或adjacency_matrix之一")
|
|
73
|
+
|
|
74
|
+
# 构建空间权重对象
|
|
75
|
+
w = None
|
|
76
|
+
|
|
77
|
+
if weight_type == "queen":
|
|
78
|
+
if adjacency_matrix is not None:
|
|
79
|
+
# 基于邻接矩阵构建
|
|
80
|
+
w = _create_from_adjacency(adjacency_matrix, "queen")
|
|
81
|
+
else:
|
|
82
|
+
raise ValueError("Queen邻接需要提供adjacency_matrix")
|
|
83
|
+
|
|
84
|
+
elif weight_type == "rook":
|
|
85
|
+
if adjacency_matrix is not None:
|
|
86
|
+
w = _create_from_adjacency(adjacency_matrix, "rook")
|
|
87
|
+
else:
|
|
88
|
+
raise ValueError("Rook邻接需要提供adjacency_matrix")
|
|
89
|
+
|
|
90
|
+
elif weight_type == "knn":
|
|
91
|
+
if coordinates is None:
|
|
92
|
+
raise ValueError("KNN需要提供coordinates")
|
|
93
|
+
coords_array = np.array(coordinates)
|
|
94
|
+
w = KNN.from_array(coords_array, k=k)
|
|
95
|
+
|
|
96
|
+
elif weight_type == "distance":
|
|
97
|
+
if coordinates is None:
|
|
98
|
+
raise ValueError("距离带需要提供coordinates")
|
|
99
|
+
if distance_threshold is None:
|
|
100
|
+
raise ValueError("距离带需要提供distance_threshold")
|
|
101
|
+
coords_array = np.array(coordinates)
|
|
102
|
+
w = DistanceBand.from_array(coords_array, threshold=distance_threshold)
|
|
103
|
+
|
|
104
|
+
elif weight_type == "kernel":
|
|
105
|
+
if coordinates is None:
|
|
106
|
+
raise ValueError("核权重需要提供coordinates")
|
|
107
|
+
coords_array = np.array(coordinates)
|
|
108
|
+
if bandwidth is None:
|
|
109
|
+
# 使用默认带宽
|
|
110
|
+
bandwidth = "auto"
|
|
111
|
+
w = Kernel.from_array(coords_array, bandwidth=bandwidth, function=kernel_type)
|
|
112
|
+
|
|
113
|
+
else:
|
|
114
|
+
raise ValueError(
|
|
115
|
+
f"不支持的权重类型: {weight_type}。"
|
|
116
|
+
f"支持的类型: queen, rook, knn, distance, kernel"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# 行标准化
|
|
120
|
+
if row_standardize and weight_type != "kernel": # 核权重通常已经标准化
|
|
121
|
+
w.transform = 'r'
|
|
122
|
+
|
|
123
|
+
# 提取结果
|
|
124
|
+
n = w.n
|
|
125
|
+
|
|
126
|
+
# 邻居统计
|
|
127
|
+
cardinalities = w.cardinalities
|
|
128
|
+
n_neighbors_mean = float(np.mean(list(cardinalities.values())))
|
|
129
|
+
n_neighbors_min = int(min(cardinalities.values()))
|
|
130
|
+
n_neighbors_max = int(max(cardinalities.values()))
|
|
131
|
+
|
|
132
|
+
# 非零权重百分比
|
|
133
|
+
total_possible = n * n
|
|
134
|
+
pct_nonzero = float(w.pct_nonzero)
|
|
135
|
+
|
|
136
|
+
# 转换为稀疏矩阵表示(字典格式)
|
|
137
|
+
weights_matrix = _convert_to_sparse_matrix(w)
|
|
138
|
+
|
|
139
|
+
# 邻居字典
|
|
140
|
+
neighbors = {int(i): [int(j) for j in w.neighbors[i]] for i in w.neighbors}
|
|
141
|
+
|
|
142
|
+
# 检查对称性
|
|
143
|
+
is_symmetric = _check_symmetry(w)
|
|
144
|
+
|
|
145
|
+
# 生成摘要
|
|
146
|
+
summary = f"""空间权重矩阵摘要:
|
|
147
|
+
- 观测数量: {n}
|
|
148
|
+
- 权重类型: {weight_type}
|
|
149
|
+
- 平均邻居数: {n_neighbors_mean:.2f}
|
|
150
|
+
- 邻居数范围: [{n_neighbors_min}, {n_neighbors_max}]
|
|
151
|
+
- 非零权重: {pct_nonzero:.2f}%
|
|
152
|
+
- 是否对称: {'是' if is_symmetric else '否'}
|
|
153
|
+
- 是否行标准化: {'是' if row_standardize else '否'}
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
return SpatialWeightsResult(
|
|
157
|
+
n_observations=n,
|
|
158
|
+
weight_type=weight_type,
|
|
159
|
+
n_neighbors_mean=n_neighbors_mean,
|
|
160
|
+
n_neighbors_min=n_neighbors_min,
|
|
161
|
+
n_neighbors_max=n_neighbors_max,
|
|
162
|
+
pct_nonzero=pct_nonzero,
|
|
163
|
+
weights_matrix=weights_matrix,
|
|
164
|
+
neighbors=neighbors,
|
|
165
|
+
is_symmetric=is_symmetric,
|
|
166
|
+
summary=summary
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _create_from_adjacency(adjacency_matrix: List[List[int]], contiguity_type: str) -> W:
|
|
171
|
+
"""从邻接矩阵创建空间权重"""
|
|
172
|
+
n = len(adjacency_matrix)
|
|
173
|
+
|
|
174
|
+
# 转换为邻居字典
|
|
175
|
+
neighbors = {}
|
|
176
|
+
weights = {}
|
|
177
|
+
|
|
178
|
+
for i in range(n):
|
|
179
|
+
neighbors[i] = []
|
|
180
|
+
weights[i] = []
|
|
181
|
+
for j in range(n):
|
|
182
|
+
if i != j and adjacency_matrix[i][j] > 0:
|
|
183
|
+
neighbors[i].append(j)
|
|
184
|
+
weights[i].append(float(adjacency_matrix[i][j]))
|
|
185
|
+
|
|
186
|
+
# 创建权重对象
|
|
187
|
+
w = W(neighbors, weights)
|
|
188
|
+
return w
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _convert_to_sparse_matrix(w: W) -> List[List[float]]:
|
|
192
|
+
"""将权重对象转换为稀疏矩阵表示(用于返回)"""
|
|
193
|
+
# 返回前100个非零元素(避免过大)
|
|
194
|
+
sparse_repr = []
|
|
195
|
+
count = 0
|
|
196
|
+
max_elements = 100
|
|
197
|
+
|
|
198
|
+
for i in w.neighbors:
|
|
199
|
+
for j_idx, j in enumerate(w.neighbors[i]):
|
|
200
|
+
if count >= max_elements:
|
|
201
|
+
break
|
|
202
|
+
weight = w.weights[i][j_idx]
|
|
203
|
+
sparse_repr.append([int(i), int(j), float(weight)])
|
|
204
|
+
count += 1
|
|
205
|
+
if count >= max_elements:
|
|
206
|
+
break
|
|
207
|
+
|
|
208
|
+
return sparse_repr
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _check_symmetry(w: W) -> bool:
|
|
212
|
+
"""检查权重矩阵是否对称"""
|
|
213
|
+
try:
|
|
214
|
+
# 简单检查:对于每个i->j,是否存在j->i
|
|
215
|
+
for i in w.neighbors:
|
|
216
|
+
for j_idx, j in enumerate(w.neighbors[i]):
|
|
217
|
+
# 检查j的邻居中是否有i
|
|
218
|
+
if i not in w.neighbors.get(j, []):
|
|
219
|
+
return False
|
|
220
|
+
# 检查权重是否相同
|
|
221
|
+
j_i_idx = w.neighbors[j].index(i)
|
|
222
|
+
if abs(w.weights[i][j_idx] - w.weights[j][j_i_idx]) > 1e-10:
|
|
223
|
+
return False
|
|
224
|
+
return True
|
|
225
|
+
except:
|
|
226
|
+
return False
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# 微观离散与受限数据模型
|
|
2
|
+
|
|
3
|
+
该模块实现了微观计量经济学中常用的离散选择模型和受限因变量模型,适用于因变量为分类、计数、截断等非连续情况的数据分析。本模块基于statsmodels等现有库构建,避免重复造轮子。
|
|
4
|
+
|
|
5
|
+
## 依赖库
|
|
6
|
+
|
|
7
|
+
- statsmodels >= 0.13.0
|
|
8
|
+
- numpy
|
|
9
|
+
- pandas
|
|
10
|
+
- scipy
|
|
11
|
+
|
|
12
|
+
安装依赖:
|
|
13
|
+
```bash
|
|
14
|
+
pip install statsmodels numpy pandas scipy
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## 模型列表
|
|
18
|
+
|
|
19
|
+
### 离散选择模型
|
|
20
|
+
|
|
21
|
+
1. **LogitModel** - Logistic回归模型
|
|
22
|
+
- 基于statsmodels的Logit模型实现
|
|
23
|
+
- 适用于二元选择问题
|
|
24
|
+
|
|
25
|
+
2. **ProbitModel** - Probit回归模型
|
|
26
|
+
- 基于statsmodels的Probit模型实现
|
|
27
|
+
- 基于正态分布假设
|
|
28
|
+
|
|
29
|
+
3. **MultinomialLogit** - 多项Logit模型
|
|
30
|
+
- 基于statsmodels的MNLogit模型实现
|
|
31
|
+
- 适用于无序多分类选择问题
|
|
32
|
+
|
|
33
|
+
4. **OrderedLogit** - 有序Logit模型
|
|
34
|
+
- 基于statsmodels的OrderedModel实现
|
|
35
|
+
- 适用于有序多分类选择问题
|
|
36
|
+
|
|
37
|
+
5. **ConditionalLogit** - 条件Logit模型
|
|
38
|
+
- 基于statsmodels的ConditionalLogit实现
|
|
39
|
+
- 适用于配对选择等条件选择模型
|
|
40
|
+
|
|
41
|
+
### 受限因变量模型
|
|
42
|
+
|
|
43
|
+
1. **TobitModel** - Tobit模型(截断回归模型)
|
|
44
|
+
- 基于statsmodels的Tobit模型实现
|
|
45
|
+
- 适用于因变量在某个阈值处被截断的情况
|
|
46
|
+
- 支持上下界截断
|
|
47
|
+
|
|
48
|
+
2. **HeckmanModel** - Heckman两阶段选择模型
|
|
49
|
+
- 基于statsmodels构建的两阶段选择模型
|
|
50
|
+
- 用于处理样本选择偏差问题
|
|
51
|
+
|
|
52
|
+
### 计数数据模型
|
|
53
|
+
|
|
54
|
+
1. **PoissonModel** - 泊松回归模型
|
|
55
|
+
- 基于statsmodels的Poisson模型实现
|
|
56
|
+
- 适用于计数数据建模
|
|
57
|
+
- 假设均值等于方差
|
|
58
|
+
|
|
59
|
+
2. **NegativeBinomialModel** - 负二项回归模型
|
|
60
|
+
- 基于statsmodels的NegativeBinomial模型实现
|
|
61
|
+
- 适用于过度离散的计数数据
|
|
62
|
+
- 允许方差大于均值
|
|
63
|
+
|
|
64
|
+
3. **ZeroInflatedPoissonModel** - 零膨胀泊松模型
|
|
65
|
+
- 基于statsmodels的ZeroInflatedPoisson实现
|
|
66
|
+
- 适用于零值过多的计数数据
|
|
67
|
+
|
|
68
|
+
4. **ZeroInflatedNegativeBinomialModel** - 零膨胀负二项模型
|
|
69
|
+
- 基于statsmodels的ZeroInflatedNegativeBinomialP实现
|
|
70
|
+
- 适用于零值过多且过度离散的计数数据
|
|
71
|
+
|
|
72
|
+
## 使用示例
|
|
73
|
+
|
|
74
|
+
### Logit模型示例
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from econometrics.specific_data_modeling.micro_discrete_limited_data import LogitModel
|
|
78
|
+
import numpy as np
|
|
79
|
+
|
|
80
|
+
# 生成示例数据
|
|
81
|
+
X = np.random.normal(0, 1, (1000, 2))
|
|
82
|
+
y = np.random.binomial(1, 1 / (1 + np.exp(-(0.5*X[:, 0] - 0.3*X[:, 1]))))
|
|
83
|
+
|
|
84
|
+
# 拟合模型
|
|
85
|
+
model = LogitModel()
|
|
86
|
+
model.fit(X, y)
|
|
87
|
+
|
|
88
|
+
# 预测
|
|
89
|
+
probabilities = model.predict_proba(X)
|
|
90
|
+
predictions = model.predict(X)
|
|
91
|
+
|
|
92
|
+
# 查看模型摘要
|
|
93
|
+
print(model.summary())
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Tobit模型示例
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from econometrics.specific_data_modeling.micro_discrete_limited_data import TobitModel
|
|
100
|
+
import numpy as np
|
|
101
|
+
|
|
102
|
+
# 生成示例数据
|
|
103
|
+
X = np.random.normal(0, 1, (1000, 2))
|
|
104
|
+
y_latent = 1.0*X[:, 0] - 0.5*X[:, 1] + np.random.normal(0, 0.5, 1000)
|
|
105
|
+
y = np.maximum(y_latent, 0) # 左截断于0
|
|
106
|
+
|
|
107
|
+
# 拟合模型
|
|
108
|
+
model = TobitModel(lower_bound=0)
|
|
109
|
+
model.fit(X, y)
|
|
110
|
+
|
|
111
|
+
# 预测
|
|
112
|
+
predictions = model.predict(X)
|
|
113
|
+
|
|
114
|
+
# 查看模型摘要
|
|
115
|
+
print(model.summary())
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### 泊松模型示例
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from econometrics.specific_data_modeling.micro_discrete_limited_data import PoissonModel
|
|
122
|
+
import numpy as np
|
|
123
|
+
|
|
124
|
+
# 生成示例数据
|
|
125
|
+
X = np.random.normal(0, 1, (1000, 2))
|
|
126
|
+
mu = np.exp(0.5*X[:, 0] - 0.3*X[:, 1])
|
|
127
|
+
y = np.random.poisson(mu)
|
|
128
|
+
|
|
129
|
+
# 拟合模型
|
|
130
|
+
model = PoissonModel()
|
|
131
|
+
model.fit(X, y)
|
|
132
|
+
|
|
133
|
+
# 预测
|
|
134
|
+
predictions = model.predict(X)
|
|
135
|
+
|
|
136
|
+
# 查看模型摘要
|
|
137
|
+
print(model.summary())
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## 模型输出
|
|
141
|
+
|
|
142
|
+
所有模型都提供以下输出:
|
|
143
|
+
- 参数估计值 (通过.results_.params访问)
|
|
144
|
+
- 标准误差 (通过.results_.bse访问)
|
|
145
|
+
- t统计量 (通过.results_.tvalues访问)
|
|
146
|
+
- p值 (通过.results_.pvalues访问)
|
|
147
|
+
- 模型拟合统计量 (AIC, BIC, 对数似然等)
|
|
148
|
+
- 预测方法 (predict, predict_proba等)
|
|
149
|
+
- 模型摘要 (summary方法)
|
|
150
|
+
|
|
151
|
+
## 注意事项
|
|
152
|
+
|
|
153
|
+
1. 数据预处理:确保数据符合模型假设
|
|
154
|
+
2. 模型诊断:检查模型拟合优度和残差
|
|
155
|
+
3. 过度离散:对于计数数据,如果存在过度离散,应使用负二项模型而非泊松模型
|
|
156
|
+
4. 边界值处理:模型中已对数值边界情况进行处理,但用户仍需注意数据质量
|
|
157
|
+
5. 依赖库:确保安装了statsmodels等依赖库
|
|
158
|
+
|
|
159
|
+
## 参考文献
|
|
160
|
+
|
|
161
|
+
1. Cameron, A. C., & Trivedi, P. K. (2013). Regression analysis of count data. Cambridge university press.
|
|
162
|
+
2. Greene, W. H. (2003). Econometric analysis. Pearson Education India.
|
|
163
|
+
3. Wooldridge, J. M. (2010). Econometric analysis of cross section and panel data. MIT press.
|
|
164
|
+
4. Statsmodels Documentation: https://www.statsmodels.org/stable/index.html
|