aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PKG-INFO +344 -322
- README.md +335 -320
- __init__.py +1 -1
- aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
- aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
- cli.py +4 -0
- econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
- econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
- econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
- econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
- econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
- econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
- econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
- econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
- econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
- econometrics/causal_inference/__init__.py +66 -0
- econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
- econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
- econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
- econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
- econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
- econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
- econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
- econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
- econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
- econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
- econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
- econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
- econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
- econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
- econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
- econometrics/distribution_analysis/__init__.py +28 -0
- econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
- econometrics/distribution_analysis/time_series_decomposition.py +152 -0
- econometrics/distribution_analysis/variance_decomposition.py +179 -0
- econometrics/missing_data/__init__.py +18 -0
- econometrics/missing_data/imputation_methods.py +219 -0
- econometrics/nonparametric/__init__.py +35 -0
- econometrics/nonparametric/gam_model.py +117 -0
- econometrics/nonparametric/kernel_regression.py +161 -0
- econometrics/nonparametric/quantile_regression.py +249 -0
- econometrics/nonparametric/spline_regression.py +100 -0
- econometrics/spatial_econometrics/__init__.py +68 -0
- econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
- econometrics/spatial_econometrics/gwr_simple.py +154 -0
- econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
- econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
- econometrics/spatial_econometrics/spatial_regression.py +315 -0
- econometrics/spatial_econometrics/spatial_weights.py +226 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
- econometrics/statistical_inference/__init__.py +21 -0
- econometrics/statistical_inference/bootstrap_methods.py +162 -0
- econometrics/statistical_inference/permutation_test.py +177 -0
- econometrics/survival_analysis/__init__.py +18 -0
- econometrics/survival_analysis/survival_models.py +259 -0
- econometrics/tests/causal_inference_tests/__init__.py +3 -0
- econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
- econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
- econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
- econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
- econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
- econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
- econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
- pyproject.toml +9 -2
- server.py +15 -1
- tools/__init__.py +75 -1
- tools/causal_inference_adapter.py +658 -0
- tools/distribution_analysis_adapter.py +121 -0
- tools/gwr_simple_adapter.py +54 -0
- tools/machine_learning_adapter.py +567 -0
- tools/mcp_tool_groups/__init__.py +15 -1
- tools/mcp_tool_groups/causal_inference_tools.py +643 -0
- tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
- tools/mcp_tool_groups/machine_learning_tools.py +422 -0
- tools/mcp_tool_groups/microecon_tools.py +325 -0
- tools/mcp_tool_groups/missing_data_tools.py +117 -0
- tools/mcp_tool_groups/nonparametric_tools.py +225 -0
- tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
- tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
- tools/mcp_tools_registry.py +13 -3
- tools/microecon_adapter.py +412 -0
- tools/missing_data_adapter.py +73 -0
- tools/nonparametric_adapter.py +190 -0
- tools/spatial_econometrics_adapter.py +318 -0
- tools/statistical_inference_adapter.py +90 -0
- tools/survival_analysis_adapter.py +46 -0
- aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
- aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
微观离散与受限数据模型模块
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# 离散选择模型
|
|
6
|
+
from .discrete_choice_models import (
|
|
7
|
+
LogitModel,
|
|
8
|
+
ProbitModel,
|
|
9
|
+
MultinomialLogit,
|
|
10
|
+
OrderedLogit,
|
|
11
|
+
ConditionalLogit
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# 受限因变量模型
|
|
15
|
+
from .limited_dependent_variable_models import (
|
|
16
|
+
TobitModel,
|
|
17
|
+
HeckmanModel
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# 计数数据模型
|
|
21
|
+
from .count_data_models import (
|
|
22
|
+
PoissonModel,
|
|
23
|
+
NegativeBinomialModel,
|
|
24
|
+
ZeroInflatedPoissonModel,
|
|
25
|
+
ZeroInflatedNegativeBinomialModel
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
'LogitModel',
|
|
30
|
+
'ProbitModel',
|
|
31
|
+
'MultinomialLogit',
|
|
32
|
+
'OrderedLogit',
|
|
33
|
+
'ConditionalLogit',
|
|
34
|
+
'TobitModel',
|
|
35
|
+
'HeckmanModel',
|
|
36
|
+
'PoissonModel',
|
|
37
|
+
'NegativeBinomialModel',
|
|
38
|
+
'ZeroInflatedPoissonModel',
|
|
39
|
+
'ZeroInflatedNegativeBinomialModel'
|
|
40
|
+
]
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""
|
|
2
|
+
计数数据模型模块
|
|
3
|
+
基于statsmodels等现有库实现
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import math
|
|
9
|
+
from scipy import stats
|
|
10
|
+
try:
|
|
11
|
+
import statsmodels.api as sm
|
|
12
|
+
from statsmodels.discrete.discrete_model import Poisson, NegativeBinomial
|
|
13
|
+
from statsmodels.discrete.count_model import ZeroInflatedPoisson, ZeroInflatedNegativeBinomialP
|
|
14
|
+
HAS_STATSMODELS = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
HAS_STATSMODELS = False
|
|
17
|
+
Poisson = NegativeBinomial = ZeroInflatedPoisson = ZeroInflatedNegativeBinomialP = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class _PlaceholderModel:
|
|
21
|
+
def __init__(self, *args, **kwargs):
|
|
22
|
+
if not HAS_STATSMODELS:
|
|
23
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
24
|
+
|
|
25
|
+
def fit(self, *args, **kwargs):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PoissonModel:
|
|
30
|
+
"""
|
|
31
|
+
泊松回归模型 (基于statsmodels实现)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self):
|
|
35
|
+
if not HAS_STATSMODELS:
|
|
36
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
37
|
+
self.model_ = None
|
|
38
|
+
self.results_ = None
|
|
39
|
+
self.fitted_ = False
|
|
40
|
+
|
|
41
|
+
def fit(self, X, y):
|
|
42
|
+
"""拟合泊松回归模型"""
|
|
43
|
+
X = np.array(X)
|
|
44
|
+
y = np.array(y)
|
|
45
|
+
|
|
46
|
+
if np.any(y < 0) or np.any(y != np.floor(y)):
|
|
47
|
+
raise ValueError("因变量必须是非负整数")
|
|
48
|
+
|
|
49
|
+
# 添加常数项
|
|
50
|
+
X_with_const = sm.add_constant(X)
|
|
51
|
+
|
|
52
|
+
# 拟合模型
|
|
53
|
+
self.model_ = Poisson(y, X_with_const)
|
|
54
|
+
self.results_ = self.model_.fit(disp=0)
|
|
55
|
+
self.fitted_ = True
|
|
56
|
+
return self
|
|
57
|
+
|
|
58
|
+
def predict(self, X):
|
|
59
|
+
"""预测计数期望值"""
|
|
60
|
+
if not self.fitted_:
|
|
61
|
+
raise ValueError("模型尚未拟合")
|
|
62
|
+
X = np.array(X)
|
|
63
|
+
X_with_const = sm.add_constant(X)
|
|
64
|
+
return self.results_.predict(X_with_const)
|
|
65
|
+
|
|
66
|
+
def predict_proba(self, X, max_count=20):
|
|
67
|
+
"""预测计数概率分布"""
|
|
68
|
+
if not self.fitted_:
|
|
69
|
+
raise ValueError("模型尚未拟合")
|
|
70
|
+
# 使用statsmodels的预测方法
|
|
71
|
+
X = np.array(X)
|
|
72
|
+
X_with_const = sm.add_constant(X)
|
|
73
|
+
mu = self.results_.predict(X_with_const)
|
|
74
|
+
|
|
75
|
+
# 计算泊松概率
|
|
76
|
+
probas = []
|
|
77
|
+
for k in range(max_count + 1):
|
|
78
|
+
prob = np.exp(-mu) * (mu ** k) / math.factorial(k)
|
|
79
|
+
probas.append(prob)
|
|
80
|
+
|
|
81
|
+
return np.array(probas).T
|
|
82
|
+
|
|
83
|
+
def summary(self):
|
|
84
|
+
"""返回模型摘要"""
|
|
85
|
+
if not self.fitted_:
|
|
86
|
+
raise ValueError("模型尚未拟合")
|
|
87
|
+
return self.results_.summary()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class NegativeBinomialModel:
|
|
91
|
+
"""
|
|
92
|
+
负二项回归模型 (基于statsmodels实现)
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(self, distr='nb2'):
|
|
96
|
+
"""
|
|
97
|
+
初始化负二项回归模型
|
|
98
|
+
|
|
99
|
+
参数:
|
|
100
|
+
distr: 分布类型,'nb1' 或 'nb2' (默认)
|
|
101
|
+
"""
|
|
102
|
+
if not HAS_STATSMODELS:
|
|
103
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
104
|
+
self.distr = distr
|
|
105
|
+
self.model_ = None
|
|
106
|
+
self.results_ = None
|
|
107
|
+
self.fitted_ = False
|
|
108
|
+
|
|
109
|
+
def fit(self, X, y):
|
|
110
|
+
"""拟合负二项回归模型"""
|
|
111
|
+
X = np.array(X)
|
|
112
|
+
y = np.array(y)
|
|
113
|
+
|
|
114
|
+
if np.any(y < 0) or np.any(y != np.floor(y)):
|
|
115
|
+
raise ValueError("因变量必须是非负整数")
|
|
116
|
+
|
|
117
|
+
# 添加常数项
|
|
118
|
+
X_with_const = sm.add_constant(X)
|
|
119
|
+
|
|
120
|
+
# 拟合模型
|
|
121
|
+
self.model_ = NegativeBinomial(y, X_with_const, loglike_method=self.distr)
|
|
122
|
+
self.results_ = self.model_.fit(disp=0)
|
|
123
|
+
self.fitted_ = True
|
|
124
|
+
return self
|
|
125
|
+
|
|
126
|
+
def predict(self, X):
|
|
127
|
+
"""预测计数期望值"""
|
|
128
|
+
if not self.fitted_:
|
|
129
|
+
raise ValueError("模型尚未拟合")
|
|
130
|
+
X = np.array(X)
|
|
131
|
+
X_with_const = sm.add_constant(X)
|
|
132
|
+
return self.results_.predict(X_with_const)
|
|
133
|
+
|
|
134
|
+
def predict_proba(self, X, max_count=20):
|
|
135
|
+
"""预测计数概率分布"""
|
|
136
|
+
if not self.fitted_:
|
|
137
|
+
raise ValueError("模型尚未拟合")
|
|
138
|
+
# 使用模型预测均值
|
|
139
|
+
X = np.array(X)
|
|
140
|
+
X_with_const = sm.add_constant(X)
|
|
141
|
+
mu = self.results_.predict(X_with_const)
|
|
142
|
+
|
|
143
|
+
# 获取alpha参数
|
|
144
|
+
alpha = self.results_.params[-1] # 最后一个参数是ln(alpha)
|
|
145
|
+
alpha = np.exp(alpha)
|
|
146
|
+
|
|
147
|
+
# 计算负二项概率 (NB2参数化)
|
|
148
|
+
probas = []
|
|
149
|
+
for k in range(max_count + 1):
|
|
150
|
+
# 负二项概率质量函数 - 使用scipy.stats
|
|
151
|
+
prob = stats.nbinom.pmf(k, 1/alpha, 1/(1 + alpha * mu))
|
|
152
|
+
probas.append(prob)
|
|
153
|
+
|
|
154
|
+
return np.array(probas).T
|
|
155
|
+
|
|
156
|
+
def summary(self):
|
|
157
|
+
"""返回模型摘要"""
|
|
158
|
+
if not self.fitted_:
|
|
159
|
+
raise ValueError("模型尚未拟合")
|
|
160
|
+
return self.results_.summary()
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class ZeroInflatedPoissonModel:
|
|
164
|
+
"""
|
|
165
|
+
零膨胀泊松模型 (基于statsmodels实现)
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
def __init__(self, exog_infl=None):
|
|
169
|
+
"""
|
|
170
|
+
初始化零膨胀泊松模型
|
|
171
|
+
|
|
172
|
+
参数:
|
|
173
|
+
exog_infl: 用于零膨胀部分的解释变量,默认为None(使用与计数部分相同的变量)
|
|
174
|
+
"""
|
|
175
|
+
if not HAS_STATSMODELS:
|
|
176
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
177
|
+
self.exog_infl = exog_infl
|
|
178
|
+
self.model_ = None
|
|
179
|
+
self.results_ = None
|
|
180
|
+
self.fitted_ = False
|
|
181
|
+
|
|
182
|
+
def fit(self, X, y):
|
|
183
|
+
"""拟合零膨胀泊松模型"""
|
|
184
|
+
X = np.array(X)
|
|
185
|
+
y = np.array(y)
|
|
186
|
+
|
|
187
|
+
if np.any(y < 0) or np.any(y != np.floor(y)):
|
|
188
|
+
raise ValueError("因变量必须是非负整数")
|
|
189
|
+
|
|
190
|
+
# 添加常数项
|
|
191
|
+
X_with_const = sm.add_constant(X)
|
|
192
|
+
|
|
193
|
+
# 零膨胀部分的解释变量
|
|
194
|
+
if self.exog_infl is not None:
|
|
195
|
+
exog_infl = sm.add_constant(np.array(self.exog_infl))
|
|
196
|
+
else:
|
|
197
|
+
exog_infl = X_with_const
|
|
198
|
+
|
|
199
|
+
# 拟合模型
|
|
200
|
+
self.model_ = ZeroInflatedPoisson(
|
|
201
|
+
endog=y,
|
|
202
|
+
exog=X_with_const,
|
|
203
|
+
exog_infl=exog_infl,
|
|
204
|
+
inflation='logit'
|
|
205
|
+
)
|
|
206
|
+
self.results_ = self.model_.fit(disp=0)
|
|
207
|
+
self.fitted_ = True
|
|
208
|
+
return self
|
|
209
|
+
|
|
210
|
+
def predict(self, X):
|
|
211
|
+
"""预测计数期望值"""
|
|
212
|
+
if not self.fitted_:
|
|
213
|
+
raise ValueError("模型尚未拟合")
|
|
214
|
+
X = np.array(X)
|
|
215
|
+
X_with_const = sm.add_constant(X)
|
|
216
|
+
return self.results_.predict(X_with_const)
|
|
217
|
+
|
|
218
|
+
def predict_proba(self, X):
|
|
219
|
+
"""预测计数概率分布"""
|
|
220
|
+
if not self.fitted_:
|
|
221
|
+
raise ValueError("模型尚未拟合")
|
|
222
|
+
X = np.array(X)
|
|
223
|
+
X_with_const = sm.add_constant(X)
|
|
224
|
+
return self.results_.predict(which='prob', exog=X_with_const)
|
|
225
|
+
|
|
226
|
+
def summary(self):
|
|
227
|
+
"""返回模型摘要"""
|
|
228
|
+
if not self.fitted_:
|
|
229
|
+
raise ValueError("模型尚未拟合")
|
|
230
|
+
return self.results_.summary()
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class ZeroInflatedNegativeBinomialModel:
|
|
234
|
+
"""
|
|
235
|
+
零膨胀负二项模型 (基于statsmodels实现)
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
def __init__(self, exog_infl=None, distr='nb2'):
|
|
239
|
+
"""
|
|
240
|
+
初始化零膨胀负二项模型
|
|
241
|
+
|
|
242
|
+
参数:
|
|
243
|
+
exog_infl: 用于零膨胀部分的解释变量,默认为None(使用与计数部分相同的变量)
|
|
244
|
+
distr: 分布类型,'nb1' 或 'nb2' (默认)
|
|
245
|
+
"""
|
|
246
|
+
if not HAS_STATSMODELS:
|
|
247
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
248
|
+
self.exog_infl = exog_infl
|
|
249
|
+
self.distr = distr
|
|
250
|
+
self.model_ = None
|
|
251
|
+
self.results_ = None
|
|
252
|
+
self.fitted_ = False
|
|
253
|
+
|
|
254
|
+
def fit(self, X, y):
|
|
255
|
+
"""拟合零膨胀负二项模型"""
|
|
256
|
+
X = np.array(X)
|
|
257
|
+
y = np.array(y)
|
|
258
|
+
|
|
259
|
+
if np.any(y < 0) or np.any(y != np.floor(y)):
|
|
260
|
+
raise ValueError("因变量必须是非负整数")
|
|
261
|
+
|
|
262
|
+
# 添加常数项
|
|
263
|
+
X_with_const = sm.add_constant(X)
|
|
264
|
+
|
|
265
|
+
# 零膨胀部分的解释变量
|
|
266
|
+
if self.exog_infl is not None:
|
|
267
|
+
exog_infl = sm.add_constant(np.array(self.exog_infl))
|
|
268
|
+
else:
|
|
269
|
+
exog_infl = X_with_const
|
|
270
|
+
|
|
271
|
+
# 拟合模型
|
|
272
|
+
self.model_ = ZeroInflatedNegativeBinomialP(
|
|
273
|
+
endog=y,
|
|
274
|
+
exog=X_with_const,
|
|
275
|
+
exog_infl=exog_infl,
|
|
276
|
+
inflation='logit',
|
|
277
|
+
loglike_method=self.distr
|
|
278
|
+
)
|
|
279
|
+
self.results_ = self.model_.fit(disp=0)
|
|
280
|
+
self.fitted_ = True
|
|
281
|
+
return self
|
|
282
|
+
|
|
283
|
+
def predict(self, X):
|
|
284
|
+
"""预测计数期望值"""
|
|
285
|
+
if not self.fitted_:
|
|
286
|
+
raise ValueError("模型尚未拟合")
|
|
287
|
+
X = np.array(X)
|
|
288
|
+
X_with_const = sm.add_constant(X)
|
|
289
|
+
return self.results_.predict(X_with_const)
|
|
290
|
+
|
|
291
|
+
def predict_proba(self, X):
|
|
292
|
+
"""预测计数概率分布"""
|
|
293
|
+
if not self.fitted_:
|
|
294
|
+
raise ValueError("模型尚未拟合")
|
|
295
|
+
X = np.array(X)
|
|
296
|
+
X_with_const = sm.add_constant(X)
|
|
297
|
+
return self.results_.predict(which='prob', exog=X_with_const)
|
|
298
|
+
|
|
299
|
+
def summary(self):
|
|
300
|
+
"""返回模型摘要"""
|
|
301
|
+
if not self.fitted_:
|
|
302
|
+
raise ValueError("模型尚未拟合")
|
|
303
|
+
return self.results_.summary()
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
# 如果statsmodels不可用,则使用占位符
|
|
307
|
+
if not HAS_STATSMODELS:
|
|
308
|
+
PoissonModel = _PlaceholderModel
|
|
309
|
+
NegativeBinomialModel = _PlaceholderModel
|
|
310
|
+
ZeroInflatedPoissonModel = _PlaceholderModel
|
|
311
|
+
ZeroInflatedNegativeBinomialModel = _PlaceholderModel
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""
|
|
2
|
+
离散选择模型模块
|
|
3
|
+
基于statsmodels等现有库实现
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
try:
|
|
9
|
+
import statsmodels.api as sm
|
|
10
|
+
from statsmodels.discrete.discrete_model import Logit, Probit, MNLogit
|
|
11
|
+
from statsmodels.miscmodels.ordinal_model import OrderedModel
|
|
12
|
+
# 注意: statsmodels目前没有内置ConditionalLogit,需要自定义或使用其他库
|
|
13
|
+
HAS_STATSMODELS = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
HAS_STATSMODELS = False
|
|
16
|
+
Logit = Probit = MNLogit = OrderedModel = None
|
|
17
|
+
|
|
18
|
+
# 占位符类以防statsmodels不可用
|
|
19
|
+
class _PlaceholderModel:
|
|
20
|
+
def __init__(self, *args, **kwargs):
|
|
21
|
+
if not HAS_STATSMODELS:
|
|
22
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
23
|
+
|
|
24
|
+
def fit(self, *args, **kwargs):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class LogitModel:
|
|
29
|
+
"""
|
|
30
|
+
Logistic回归模型 (基于statsmodels实现)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self):
|
|
34
|
+
self.model_ = None
|
|
35
|
+
self.results_ = None
|
|
36
|
+
self.fitted_ = False
|
|
37
|
+
|
|
38
|
+
def fit(self, X, y):
|
|
39
|
+
"""拟合Logistic回归模型"""
|
|
40
|
+
if not HAS_STATSMODELS:
|
|
41
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
42
|
+
|
|
43
|
+
X = np.array(X)
|
|
44
|
+
y = np.array(y)
|
|
45
|
+
|
|
46
|
+
# 添加常数项
|
|
47
|
+
X_with_const = sm.add_constant(X)
|
|
48
|
+
|
|
49
|
+
# 拟合模型
|
|
50
|
+
self.model_ = Logit(y, X_with_const)
|
|
51
|
+
self.results_ = self.model_.fit(disp=0)
|
|
52
|
+
self.fitted_ = True
|
|
53
|
+
return self
|
|
54
|
+
|
|
55
|
+
def predict_proba(self, X):
|
|
56
|
+
"""预测概率"""
|
|
57
|
+
if not self.fitted_:
|
|
58
|
+
raise ValueError("模型尚未拟合")
|
|
59
|
+
X = np.array(X)
|
|
60
|
+
X_with_const = sm.add_constant(X)
|
|
61
|
+
return self.results_.predict(X_with_const)
|
|
62
|
+
|
|
63
|
+
def predict(self, X, threshold=0.5):
|
|
64
|
+
"""预测类别"""
|
|
65
|
+
proba = self.predict_proba(X)
|
|
66
|
+
return (proba >= threshold).astype(int)
|
|
67
|
+
|
|
68
|
+
def summary(self):
|
|
69
|
+
"""返回模型摘要"""
|
|
70
|
+
if not self.fitted_:
|
|
71
|
+
raise ValueError("模型尚未拟合")
|
|
72
|
+
return self.results_.summary()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ProbitModel:
|
|
76
|
+
"""
|
|
77
|
+
Probit回归模型 (基于statsmodels实现)
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self):
|
|
81
|
+
self.model_ = None
|
|
82
|
+
self.results_ = None
|
|
83
|
+
self.fitted_ = False
|
|
84
|
+
|
|
85
|
+
def fit(self, X, y):
|
|
86
|
+
"""拟合Probit回归模型"""
|
|
87
|
+
if not HAS_STATSMODELS:
|
|
88
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
89
|
+
|
|
90
|
+
X = np.array(X)
|
|
91
|
+
y = np.array(y)
|
|
92
|
+
|
|
93
|
+
# 添加常数项
|
|
94
|
+
X_with_const = sm.add_constant(X)
|
|
95
|
+
|
|
96
|
+
# 拟合模型
|
|
97
|
+
self.model_ = Probit(y, X_with_const)
|
|
98
|
+
self.results_ = self.model_.fit(disp=0)
|
|
99
|
+
self.fitted_ = True
|
|
100
|
+
return self
|
|
101
|
+
|
|
102
|
+
def predict_proba(self, X):
|
|
103
|
+
"""预测概率"""
|
|
104
|
+
if not self.fitted_:
|
|
105
|
+
raise ValueError("模型尚未拟合")
|
|
106
|
+
X = np.array(X)
|
|
107
|
+
X_with_const = sm.add_constant(X)
|
|
108
|
+
return self.results_.predict(X_with_const)
|
|
109
|
+
|
|
110
|
+
def predict(self, X, threshold=0.5):
|
|
111
|
+
"""预测类别"""
|
|
112
|
+
proba = self.predict_proba(X)
|
|
113
|
+
return (proba >= threshold).astype(int)
|
|
114
|
+
|
|
115
|
+
def summary(self):
|
|
116
|
+
"""返回模型摘要"""
|
|
117
|
+
if not self.fitted_:
|
|
118
|
+
raise ValueError("模型尚未拟合")
|
|
119
|
+
return self.results_.summary()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class MultinomialLogit:
|
|
123
|
+
"""
|
|
124
|
+
多项Logit模型 (基于statsmodels实现)
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
def __init__(self):
|
|
128
|
+
self.model_ = None
|
|
129
|
+
self.results_ = None
|
|
130
|
+
self.fitted_ = False
|
|
131
|
+
self.classes_ = None
|
|
132
|
+
|
|
133
|
+
def fit(self, X, y):
|
|
134
|
+
"""拟合多项Logit模型"""
|
|
135
|
+
if not HAS_STATSMODELS:
|
|
136
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
137
|
+
|
|
138
|
+
X = np.array(X)
|
|
139
|
+
y = np.array(y)
|
|
140
|
+
|
|
141
|
+
self.classes_ = np.unique(y)
|
|
142
|
+
|
|
143
|
+
# 添加常数项
|
|
144
|
+
X_with_const = sm.add_constant(X)
|
|
145
|
+
|
|
146
|
+
# 拟合模型
|
|
147
|
+
self.model_ = MNLogit(y, X_with_const)
|
|
148
|
+
self.results_ = self.model_.fit(disp=0)
|
|
149
|
+
self.fitted_ = True
|
|
150
|
+
return self
|
|
151
|
+
|
|
152
|
+
def predict_proba(self, X):
|
|
153
|
+
"""预测各类别的概率"""
|
|
154
|
+
if not self.fitted_:
|
|
155
|
+
raise ValueError("模型尚未拟合")
|
|
156
|
+
X = np.array(X)
|
|
157
|
+
X_with_const = sm.add_constant(X)
|
|
158
|
+
return self.results_.predict(X_with_const)
|
|
159
|
+
|
|
160
|
+
def predict(self, X):
|
|
161
|
+
"""预测类别"""
|
|
162
|
+
proba = self.predict_proba(X)
|
|
163
|
+
return self.classes_[np.argmax(proba, axis=1)]
|
|
164
|
+
|
|
165
|
+
def summary(self):
|
|
166
|
+
"""返回模型摘要"""
|
|
167
|
+
if not self.fitted_:
|
|
168
|
+
raise ValueError("模型尚未拟合")
|
|
169
|
+
return self.results_.summary()
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class OrderedLogit:
|
|
173
|
+
"""
|
|
174
|
+
有序Logit模型 (基于statsmodels实现)
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
def __init__(self):
|
|
178
|
+
self.model_ = None
|
|
179
|
+
self.results_ = None
|
|
180
|
+
self.fitted_ = False
|
|
181
|
+
self.classes_ = None
|
|
182
|
+
|
|
183
|
+
def fit(self, X, y):
|
|
184
|
+
"""拟合有序Logit模型"""
|
|
185
|
+
if not HAS_STATSMODELS:
|
|
186
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
187
|
+
|
|
188
|
+
X = np.array(X)
|
|
189
|
+
y = np.array(y)
|
|
190
|
+
|
|
191
|
+
self.classes_ = np.unique(y)
|
|
192
|
+
|
|
193
|
+
# OrderedModel不允许包含常数项
|
|
194
|
+
# 直接使用X,不添加常数项
|
|
195
|
+
self.model_ = OrderedModel(y, X, distr='logit')
|
|
196
|
+
self.results_ = self.model_.fit(method='bfgs', disp=0)
|
|
197
|
+
self.fitted_ = True
|
|
198
|
+
return self
|
|
199
|
+
|
|
200
|
+
def predict_proba(self, X):
|
|
201
|
+
"""预测各类别的概率"""
|
|
202
|
+
if not self.fitted_:
|
|
203
|
+
raise ValueError("模型尚未拟合")
|
|
204
|
+
X = np.array(X)
|
|
205
|
+
# 预测时也不添加常数项
|
|
206
|
+
return self.results_.predict(X)
|
|
207
|
+
|
|
208
|
+
def predict(self, X):
|
|
209
|
+
"""预测类别"""
|
|
210
|
+
proba = self.predict_proba(X)
|
|
211
|
+
return self.classes_[np.argmax(proba, axis=1)]
|
|
212
|
+
|
|
213
|
+
def summary(self):
|
|
214
|
+
"""返回模型摘要"""
|
|
215
|
+
if not self.fitted_:
|
|
216
|
+
raise ValueError("模型尚未拟合")
|
|
217
|
+
return self.results_.summary()
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class ConditionalLogit:
|
|
221
|
+
"""
|
|
222
|
+
条件Logit模型
|
|
223
|
+
注意: statsmodels目前没有内置实现,此为简化版本
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
def __init__(self):
|
|
227
|
+
self.params_ = None
|
|
228
|
+
self.fitted_ = False
|
|
229
|
+
|
|
230
|
+
def fit(self, X, y, groups):
|
|
231
|
+
"""拟合条件Logit模型"""
|
|
232
|
+
# 简化的条件Logit实现
|
|
233
|
+
# 在实际应用中可能需要使用其他专门库如pylogit或mne-logit
|
|
234
|
+
X = np.array(X)
|
|
235
|
+
y = np.array(y)
|
|
236
|
+
groups = np.array(groups)
|
|
237
|
+
|
|
238
|
+
n_samples, n_features = X.shape
|
|
239
|
+
|
|
240
|
+
# 使用scipy优化器进行简单实现
|
|
241
|
+
from scipy.optimize import minimize
|
|
242
|
+
|
|
243
|
+
def neg_log_likelihood(params):
|
|
244
|
+
beta = params
|
|
245
|
+
loglik = 0
|
|
246
|
+
unique_groups = np.unique(groups)
|
|
247
|
+
|
|
248
|
+
for group_id in unique_groups:
|
|
249
|
+
group_mask = (groups == group_id)
|
|
250
|
+
X_g = X[group_mask]
|
|
251
|
+
y_g = y[group_mask]
|
|
252
|
+
|
|
253
|
+
scores = np.dot(X_g, beta)
|
|
254
|
+
probs = np.exp(scores)
|
|
255
|
+
probs = probs / np.sum(probs)
|
|
256
|
+
probs = np.clip(probs, 1e-15, 1-1e-15)
|
|
257
|
+
|
|
258
|
+
loglik += np.sum(y_g * np.log(probs))
|
|
259
|
+
|
|
260
|
+
return -loglik
|
|
261
|
+
|
|
262
|
+
initial_params = np.random.normal(0, 0.1, n_features)
|
|
263
|
+
result = minimize(neg_log_likelihood, initial_params, method='BFGS')
|
|
264
|
+
|
|
265
|
+
if result.success:
|
|
266
|
+
self.params_ = result.x
|
|
267
|
+
self.fitted_ = True
|
|
268
|
+
else:
|
|
269
|
+
raise RuntimeError("模型优化失败")
|
|
270
|
+
|
|
271
|
+
return self
|
|
272
|
+
|
|
273
|
+
def predict_proba(self, X):
|
|
274
|
+
"""预测概率"""
|
|
275
|
+
if not self.fitted_:
|
|
276
|
+
raise ValueError("模型尚未拟合")
|
|
277
|
+
X = np.array(X)
|
|
278
|
+
scores = np.dot(X, self.params_)
|
|
279
|
+
exp_scores = np.exp(scores - np.max(scores)) # 数值稳定性
|
|
280
|
+
return exp_scores / np.sum(exp_scores)
|
|
281
|
+
|
|
282
|
+
def predict(self, X):
|
|
283
|
+
"""预测类别"""
|
|
284
|
+
proba = self.predict_proba(X)
|
|
285
|
+
return (proba >= 0.5).astype(int)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# 如果statsmodels不可用,则使用占位符
|
|
289
|
+
if not HAS_STATSMODELS:
|
|
290
|
+
LogitModel = _PlaceholderModel
|
|
291
|
+
ProbitModel = _PlaceholderModel
|
|
292
|
+
MultinomialLogit = _PlaceholderModel
|
|
293
|
+
OrderedLogit = _PlaceholderModel
|
|
294
|
+
ConditionalLogit = _PlaceholderModel
|