aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .gitignore +253 -0
- PKG-INFO +732 -0
- README.md +687 -0
- __init__.py +14 -0
- aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
- aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
- aigroup_econ_mcp-2.0.1.dist-info/entry_points.txt +2 -0
- aigroup_econ_mcp-2.0.1.dist-info/licenses/LICENSE +21 -0
- cli.py +32 -0
- econometrics/README.md +18 -0
- econometrics/__init__.py +191 -0
- econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
- econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
- econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
- econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
- econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
- econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
- econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
- econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
- econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
- econometrics/basic_parametric_estimation/__init__.py +31 -0
- econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
- econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
- econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
- econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
- econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
- econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
- econometrics/causal_inference/__init__.py +66 -0
- econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
- econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
- econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
- econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
- econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
- econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
- econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
- econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
- econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
- econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
- econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
- econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
- econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
- econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
- econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
- econometrics/distribution_analysis/__init__.py +28 -0
- econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
- econometrics/distribution_analysis/time_series_decomposition.py +152 -0
- econometrics/distribution_analysis/variance_decomposition.py +179 -0
- econometrics/missing_data/__init__.py +18 -0
- econometrics/missing_data/imputation_methods.py +219 -0
- econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
- econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
- econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
- econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
- econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
- econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
- econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
- econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
- econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
- econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
- econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
- econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
- econometrics/nonparametric/__init__.py +35 -0
- econometrics/nonparametric/gam_model.py +117 -0
- econometrics/nonparametric/kernel_regression.py +161 -0
- econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
- econometrics/nonparametric/quantile_regression.py +249 -0
- econometrics/nonparametric/spline_regression.py +100 -0
- econometrics/spatial_econometrics/__init__.py +68 -0
- econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
- econometrics/spatial_econometrics/gwr_simple.py +154 -0
- econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
- econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
- econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
- econometrics/spatial_econometrics/spatial_regression.py +315 -0
- econometrics/spatial_econometrics/spatial_weights.py +226 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
- econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
- econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
- econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
- econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
- econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
- econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
- econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
- econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
- econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
- econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
- econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
- econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
- econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
- econometrics/statistical_inference/__init__.py +21 -0
- econometrics/statistical_inference/bootstrap_methods.py +162 -0
- econometrics/statistical_inference/permutation_test.py +177 -0
- econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
- econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
- econometrics/survival_analysis/__init__.py +18 -0
- econometrics/survival_analysis/survival_models.py +259 -0
- econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
- econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
- econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
- econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
- econometrics/tests/causal_inference_tests/__init__.py +3 -0
- econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
- econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
- econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
- econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
- econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
- econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
- econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
- econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
- econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
- econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
- econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
- econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
- econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
- econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
- econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
- econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
- econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
- prompts/__init__.py +0 -0
- prompts/analysis_guides.py +43 -0
- pyproject.toml +85 -0
- resources/MCP_MASTER_GUIDE.md +422 -0
- resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
- resources/__init__.py +0 -0
- server.py +97 -0
- tools/README.md +88 -0
- tools/__init__.py +119 -0
- tools/causal_inference_adapter.py +658 -0
- tools/data_loader.py +213 -0
- tools/decorators.py +38 -0
- tools/distribution_analysis_adapter.py +121 -0
- tools/econometrics_adapter.py +286 -0
- tools/gwr_simple_adapter.py +54 -0
- tools/machine_learning_adapter.py +567 -0
- tools/mcp_tool_groups/__init__.py +15 -0
- tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
- tools/mcp_tool_groups/causal_inference_tools.py +643 -0
- tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
- tools/mcp_tool_groups/machine_learning_tools.py +422 -0
- tools/mcp_tool_groups/microecon_tools.py +325 -0
- tools/mcp_tool_groups/missing_data_tools.py +117 -0
- tools/mcp_tool_groups/model_specification_tools.py +402 -0
- tools/mcp_tool_groups/nonparametric_tools.py +225 -0
- tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
- tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
- tools/mcp_tool_groups/time_series_tools.py +494 -0
- tools/mcp_tools_registry.py +124 -0
- tools/microecon_adapter.py +412 -0
- tools/missing_data_adapter.py +73 -0
- tools/model_specification_adapter.py +369 -0
- tools/nonparametric_adapter.py +190 -0
- tools/output_formatter.py +563 -0
- tools/spatial_econometrics_adapter.py +318 -0
- tools/statistical_inference_adapter.py +90 -0
- tools/survival_analysis_adapter.py +46 -0
- tools/time_series_panel_data_adapter.py +858 -0
- tools/time_series_panel_data_tools.py +65 -0
- aigroup_econ_mcp/__init__.py +0 -19
- aigroup_econ_mcp/cli.py +0 -82
- aigroup_econ_mcp/config.py +0 -561
- aigroup_econ_mcp/server.py +0 -452
- aigroup_econ_mcp/tools/__init__.py +0 -19
- aigroup_econ_mcp/tools/base.py +0 -470
- aigroup_econ_mcp/tools/cache.py +0 -533
- aigroup_econ_mcp/tools/data_loader.py +0 -195
- aigroup_econ_mcp/tools/file_parser.py +0 -1027
- aigroup_econ_mcp/tools/machine_learning.py +0 -60
- aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
- aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
- aigroup_econ_mcp/tools/ml_models.py +0 -54
- aigroup_econ_mcp/tools/ml_regularization.py +0 -186
- aigroup_econ_mcp/tools/monitoring.py +0 -555
- aigroup_econ_mcp/tools/optimized_example.py +0 -229
- aigroup_econ_mcp/tools/panel_data.py +0 -619
- aigroup_econ_mcp/tools/regression.py +0 -214
- aigroup_econ_mcp/tools/statistics.py +0 -154
- aigroup_econ_mcp/tools/time_series.py +0 -698
- aigroup_econ_mcp/tools/timeout.py +0 -283
- aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
- aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
- aigroup_econ_mcp/tools/tool_registry.py +0 -478
- aigroup_econ_mcp/tools/validation.py +0 -482
- aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
- aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
- aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
- /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
- {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"""
|
|
2
|
+
受限因变量模型模块
|
|
3
|
+
基于statsmodels等现有库实现
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from scipy import stats
|
|
9
|
+
try:
|
|
10
|
+
import statsmodels.api as sm
|
|
11
|
+
from statsmodels.regression.linear_model import OLS
|
|
12
|
+
HAS_STATSMODELS = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
HAS_STATSMODELS = False
|
|
15
|
+
OLS = None
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from statsmodels.base.model import GenericLikelihoodModel
|
|
19
|
+
HAS_GENERIC_MODEL = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
HAS_GENERIC_MODEL = False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class _PlaceholderModel:
|
|
25
|
+
def __init__(self, *args, **kwargs):
|
|
26
|
+
if not HAS_STATSMODELS:
|
|
27
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
28
|
+
|
|
29
|
+
def fit(self, *args, **kwargs):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TobitModel:
|
|
34
|
+
"""
|
|
35
|
+
Tobit模型(截断回归模型)
|
|
36
|
+
由于statsmodels中没有内置的Tobit模型,这里提供一个基于GenericLikelihoodModel的实现
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, lower_bound=0, upper_bound=None):
|
|
40
|
+
"""
|
|
41
|
+
初始化Tobit模型
|
|
42
|
+
|
|
43
|
+
参数:
|
|
44
|
+
lower_bound: 下界阈值,默认为0
|
|
45
|
+
upper_bound: 上界阈值,默认为None(无上界)
|
|
46
|
+
"""
|
|
47
|
+
if not HAS_STATSMODELS:
|
|
48
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
49
|
+
|
|
50
|
+
if not HAS_GENERIC_MODEL:
|
|
51
|
+
raise ImportError("需要statsmodels的GenericLikelihoodModel支持")
|
|
52
|
+
|
|
53
|
+
self.lower_bound = lower_bound
|
|
54
|
+
self.upper_bound = upper_bound
|
|
55
|
+
self.model_ = None
|
|
56
|
+
self.results_ = None
|
|
57
|
+
self.fitted_ = False
|
|
58
|
+
|
|
59
|
+
def fit(self, X, y):
|
|
60
|
+
"""拟合Tobit模型"""
|
|
61
|
+
X = np.array(X)
|
|
62
|
+
y = np.array(y)
|
|
63
|
+
|
|
64
|
+
# 添加常数项
|
|
65
|
+
X_with_const = sm.add_constant(X)
|
|
66
|
+
|
|
67
|
+
# 定义Tobit似然函数
|
|
68
|
+
class TobitLikelihoodModel(GenericLikelihoodModel):
|
|
69
|
+
def __init__(self, endog, exog, lower_bound=0, upper_bound=None, **kwds):
|
|
70
|
+
self.lower_bound = lower_bound
|
|
71
|
+
self.upper_bound = upper_bound
|
|
72
|
+
super(TobitLikelihoodModel, self).__init__(endog, exog, **kwds)
|
|
73
|
+
|
|
74
|
+
def loglikeobs(self, params):
|
|
75
|
+
# 分离系数和sigma
|
|
76
|
+
beta = params[:-1]
|
|
77
|
+
sigma = params[-1]
|
|
78
|
+
|
|
79
|
+
if sigma <= 0:
|
|
80
|
+
return np.full_like(self.endog, -np.inf)
|
|
81
|
+
|
|
82
|
+
# 预测值
|
|
83
|
+
xb = np.dot(self.exog, beta)
|
|
84
|
+
z = (self.endog - xb) / sigma
|
|
85
|
+
|
|
86
|
+
# 计算对数似然
|
|
87
|
+
if self.upper_bound is None:
|
|
88
|
+
# 只有下界的情况
|
|
89
|
+
censored = self.endog <= self.lower_bound
|
|
90
|
+
uncensored = ~censored
|
|
91
|
+
|
|
92
|
+
ll = np.zeros_like(self.endog)
|
|
93
|
+
# 截断观测的对数似然
|
|
94
|
+
ll[censored] = stats.norm.logcdf((self.lower_bound - xb[censored]) / sigma)
|
|
95
|
+
# 未截断观测的对数似然
|
|
96
|
+
ll[uncensored] = -0.5 * np.log(2 * np.pi * sigma**2) - 0.5 * z[uncensored]**2
|
|
97
|
+
else:
|
|
98
|
+
# 双边截断的情况
|
|
99
|
+
left_censored = self.endog <= self.lower_bound
|
|
100
|
+
right_censored = self.endog >= self.upper_bound
|
|
101
|
+
uncensored = ~(left_censored | right_censored)
|
|
102
|
+
|
|
103
|
+
ll = np.zeros_like(self.endog)
|
|
104
|
+
# 左截断观测的对数似然
|
|
105
|
+
ll[left_censored] = stats.norm.logcdf((self.lower_bound - xb[left_censored]) / sigma)
|
|
106
|
+
# 右截断观测的对数似然
|
|
107
|
+
ll[right_censored] = stats.norm.logsf((self.upper_bound - xb[right_censored]) / sigma)
|
|
108
|
+
# 未截断观测的对数似然
|
|
109
|
+
ll[uncensored] = -0.5 * np.log(2 * np.pi * sigma**2) - 0.5 * z[uncensored]**2
|
|
110
|
+
|
|
111
|
+
return ll
|
|
112
|
+
|
|
113
|
+
# 创建并拟合模型
|
|
114
|
+
self.model_ = TobitLikelihoodModel(
|
|
115
|
+
endog=y,
|
|
116
|
+
exog=X_with_const,
|
|
117
|
+
lower_bound=self.lower_bound,
|
|
118
|
+
upper_bound=self.upper_bound
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# 初始化参数
|
|
122
|
+
n_features = X_with_const.shape[1]
|
|
123
|
+
initial_params = np.concatenate([
|
|
124
|
+
np.zeros(n_features), # beta
|
|
125
|
+
[np.std(y[y > self.lower_bound]) if self.upper_bound is None else np.std(y)] # sigma
|
|
126
|
+
])
|
|
127
|
+
|
|
128
|
+
self.results_ = self.model_.fit(start_params=initial_params, method='bfgs', disp=0)
|
|
129
|
+
self.fitted_ = True
|
|
130
|
+
return self
|
|
131
|
+
|
|
132
|
+
def predict(self, X):
|
|
133
|
+
"""预测期望值"""
|
|
134
|
+
if not self.fitted_:
|
|
135
|
+
raise ValueError("模型尚未拟合")
|
|
136
|
+
X = np.array(X)
|
|
137
|
+
X_with_const = sm.add_constant(X)
|
|
138
|
+
|
|
139
|
+
# 手动计算预测值
|
|
140
|
+
beta = self.results_.params[:-1] # 排除sigma参数
|
|
141
|
+
sigma = self.results_.params[-1]
|
|
142
|
+
|
|
143
|
+
xb = np.dot(X_with_const, beta)
|
|
144
|
+
|
|
145
|
+
if self.upper_bound is None:
|
|
146
|
+
# 只有下界的情况
|
|
147
|
+
z = (self.lower_bound - xb) / sigma
|
|
148
|
+
lambda_val = stats.norm.pdf(z) / np.clip(1 - stats.norm.cdf(z), 1e-10, 1)
|
|
149
|
+
return xb + sigma * lambda_val
|
|
150
|
+
else:
|
|
151
|
+
# 双边截断的情况
|
|
152
|
+
z_lower = (self.lower_bound - xb) / sigma
|
|
153
|
+
z_upper = (self.upper_bound - xb) / sigma
|
|
154
|
+
|
|
155
|
+
lambda_lower = stats.norm.pdf(z_lower) / np.clip(stats.norm.cdf(z_upper) - stats.norm.cdf(z_lower), 1e-10, 1)
|
|
156
|
+
lambda_upper = stats.norm.pdf(z_upper) / np.clip(stats.norm.cdf(z_upper) - stats.norm.cdf(z_lower), 1e-10, 1)
|
|
157
|
+
|
|
158
|
+
return xb + sigma * (lambda_lower - lambda_upper)
|
|
159
|
+
|
|
160
|
+
def predict_linear(self, X):
|
|
161
|
+
"""预测线性预测值"""
|
|
162
|
+
if not self.fitted_:
|
|
163
|
+
raise ValueError("模型尚未拟合")
|
|
164
|
+
X = np.array(X)
|
|
165
|
+
X_with_const = sm.add_constant(X)
|
|
166
|
+
xb = np.dot(X_with_const, self.results_.params[:-1]) # 排除sigma参数
|
|
167
|
+
return xb
|
|
168
|
+
|
|
169
|
+
def summary(self):
|
|
170
|
+
"""返回模型摘要"""
|
|
171
|
+
if not self.fitted_:
|
|
172
|
+
raise ValueError("模型尚未拟合")
|
|
173
|
+
return self.results_.summary()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class HeckmanModel:
|
|
177
|
+
"""
|
|
178
|
+
Heckman两阶段选择模型 (基于statsmodels实现)
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
def __init__(self):
|
|
182
|
+
self.selection_model_ = None
|
|
183
|
+
self.selection_results_ = None
|
|
184
|
+
self.outcome_model_ = None
|
|
185
|
+
self.outcome_results_ = None
|
|
186
|
+
self.fitted_ = False
|
|
187
|
+
|
|
188
|
+
def fit(self, X_select, Z, y, s):
|
|
189
|
+
"""
|
|
190
|
+
拟合Heckman模型
|
|
191
|
+
|
|
192
|
+
参数:
|
|
193
|
+
X_select: 选择方程的解释变量矩阵
|
|
194
|
+
Z: 结果方程的解释变量矩阵
|
|
195
|
+
y: 结果变量向量(仅对选择样本可观测)
|
|
196
|
+
s: 选择指示变量向量(1表示被选择,0表示未被选择)
|
|
197
|
+
"""
|
|
198
|
+
if not HAS_STATSMODELS:
|
|
199
|
+
raise ImportError("需要安装statsmodels库: pip install statsmodels")
|
|
200
|
+
|
|
201
|
+
X_select = np.array(X_select)
|
|
202
|
+
Z = np.array(Z)
|
|
203
|
+
y = np.array(y)
|
|
204
|
+
s = np.array(s)
|
|
205
|
+
|
|
206
|
+
# 第一阶段:Probit模型估计选择方程
|
|
207
|
+
X_select_with_const = sm.add_constant(X_select)
|
|
208
|
+
self.selection_model_ = sm.Probit(s, X_select_with_const)
|
|
209
|
+
self.selection_results_ = self.selection_model_.fit(disp=0)
|
|
210
|
+
|
|
211
|
+
# 计算逆米尔斯比率 (Inverse Mills Ratio)
|
|
212
|
+
X_select_linpred = np.dot(X_select_with_const, self.selection_results_.params)
|
|
213
|
+
mills_ratio = stats.norm.pdf(X_select_linpred) / np.clip(stats.norm.cdf(X_select_linpred), 1e-10, 1-1e-10)
|
|
214
|
+
# 对于未被选择的样本,米尔斯比率为0
|
|
215
|
+
mills_ratio = mills_ratio * s
|
|
216
|
+
|
|
217
|
+
# 第二阶段:加入逆米尔斯比率的结果方程OLS
|
|
218
|
+
Z_with_mills = np.column_stack([Z, mills_ratio])
|
|
219
|
+
Z_with_mills_const = sm.add_constant(Z_with_mills)
|
|
220
|
+
|
|
221
|
+
# 只对被选择的样本进行回归
|
|
222
|
+
selected_mask = s == 1
|
|
223
|
+
Z_selected = Z_with_mills_const[selected_mask]
|
|
224
|
+
y_selected = y[selected_mask]
|
|
225
|
+
|
|
226
|
+
self.outcome_model_ = OLS(y_selected, Z_selected)
|
|
227
|
+
self.outcome_results_ = self.outcome_model_.fit()
|
|
228
|
+
|
|
229
|
+
self.fitted_ = True
|
|
230
|
+
return self
|
|
231
|
+
|
|
232
|
+
def predict(self, X_select, Z):
|
|
233
|
+
"""预测结果值"""
|
|
234
|
+
if not self.fitted_:
|
|
235
|
+
raise ValueError("模型尚未拟合")
|
|
236
|
+
|
|
237
|
+
X_select = np.array(X_select)
|
|
238
|
+
Z = np.array(Z)
|
|
239
|
+
|
|
240
|
+
# 添加常数项
|
|
241
|
+
X_select_with_const = sm.add_constant(X_select)
|
|
242
|
+
|
|
243
|
+
# 计算逆米尔斯比率
|
|
244
|
+
X_select_linpred = np.dot(X_select_with_const, self.selection_results_.params)
|
|
245
|
+
mills_ratio = stats.norm.pdf(X_select_linpred) / np.clip(stats.norm.cdf(X_select_linpred), 1e-10, 1-1e-10)
|
|
246
|
+
|
|
247
|
+
# 构建预测矩阵:Z + 逆米尔斯比率 + 常数项
|
|
248
|
+
Z_with_mills = np.column_stack([Z, mills_ratio])
|
|
249
|
+
Z_with_mills_const = sm.add_constant(Z_with_mills)
|
|
250
|
+
|
|
251
|
+
# 计算结果方程预测值
|
|
252
|
+
outcome_pred = self.outcome_results_.predict(Z_with_mills_const)
|
|
253
|
+
|
|
254
|
+
return outcome_pred
|
|
255
|
+
|
|
256
|
+
def summary(self):
|
|
257
|
+
"""返回模型摘要"""
|
|
258
|
+
if not self.fitted_:
|
|
259
|
+
raise ValueError("模型尚未拟合")
|
|
260
|
+
return {
|
|
261
|
+
'selection_summary': self.selection_results_.summary(),
|
|
262
|
+
'outcome_summary': self.outcome_results_.summary()
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# 如果statsmodels不可用,则使用占位符
|
|
267
|
+
if not HAS_STATSMODELS:
|
|
268
|
+
TobitModel = _PlaceholderModel
|
|
269
|
+
HeckmanModel = _PlaceholderModel
|
|
270
|
+
|
|
271
|
+
def multinomial_logit():
|
|
272
|
+
"""
|
|
273
|
+
多项Logit模型占位符
|
|
274
|
+
"""
|
|
275
|
+
pass
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def nested_logit():
|
|
279
|
+
"""
|
|
280
|
+
嵌套Logit模型占位符
|
|
281
|
+
"""
|
|
282
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
时间序列与面板数据模块
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# ARIMA模型
|
|
6
|
+
from .arima_model import (
|
|
7
|
+
ARIMAResult,
|
|
8
|
+
arima_model
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
# 指数平滑法
|
|
12
|
+
from .exponential_smoothing import (
|
|
13
|
+
ExponentialSmoothingResult,
|
|
14
|
+
exponential_smoothing_model
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# VAR/SVAR模型
|
|
18
|
+
from .var_svar_model import (
|
|
19
|
+
VARResult,
|
|
20
|
+
var_model,
|
|
21
|
+
svar_model
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# GARCH模型
|
|
25
|
+
from .garch_model import (
|
|
26
|
+
GARCHResult,
|
|
27
|
+
garch_model
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# 协整分析/VECM
|
|
31
|
+
from .cointegration_vecm import (
|
|
32
|
+
CointegrationResult,
|
|
33
|
+
VECMResult,
|
|
34
|
+
engle_granger_cointegration_test,
|
|
35
|
+
johansen_cointegration_test,
|
|
36
|
+
vecm_model
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# 面板VAR
|
|
40
|
+
from .panel_var import (
|
|
41
|
+
PanelVARResult,
|
|
42
|
+
panel_var_model
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# 单位根检验
|
|
46
|
+
from .unit_root_tests import (
|
|
47
|
+
UnitRootTestResult,
|
|
48
|
+
adf_test,
|
|
49
|
+
pp_test,
|
|
50
|
+
kpss_test
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# 动态面板模型
|
|
54
|
+
from .dynamic_panel_models import (
|
|
55
|
+
DynamicPanelResult,
|
|
56
|
+
diff_gmm_model,
|
|
57
|
+
sys_gmm_model
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# 结构突变检验
|
|
61
|
+
from .structural_break_tests import (
|
|
62
|
+
StructuralBreakResult,
|
|
63
|
+
chow_test,
|
|
64
|
+
quandt_andrews_test,
|
|
65
|
+
bai_perron_test
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# 面板数据诊断
|
|
69
|
+
from .panel_diagnostics import (
|
|
70
|
+
PanelDiagnosticResult,
|
|
71
|
+
hausman_test,
|
|
72
|
+
pooling_f_test,
|
|
73
|
+
lm_test,
|
|
74
|
+
within_correlation_test
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# 时变参数模型
|
|
78
|
+
from .time_varying_parameter_models import (
|
|
79
|
+
TimeVaryingParameterResult,
|
|
80
|
+
tar_model,
|
|
81
|
+
star_model,
|
|
82
|
+
markov_switching_model
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
__all__ = [
|
|
86
|
+
# ARIMA模型
|
|
87
|
+
"ARIMAResult",
|
|
88
|
+
"arima_model",
|
|
89
|
+
|
|
90
|
+
# 指数平滑法
|
|
91
|
+
"ExponentialSmoothingResult",
|
|
92
|
+
"exponential_smoothing_model",
|
|
93
|
+
|
|
94
|
+
# VAR/SVAR模型
|
|
95
|
+
"VARResult",
|
|
96
|
+
"var_model",
|
|
97
|
+
"svar_model",
|
|
98
|
+
|
|
99
|
+
# GARCH模型
|
|
100
|
+
"GARCHResult",
|
|
101
|
+
"garch_model",
|
|
102
|
+
|
|
103
|
+
# 协整分析/VECM
|
|
104
|
+
"CointegrationResult",
|
|
105
|
+
"VECMResult",
|
|
106
|
+
"engle_granger_cointegration_test",
|
|
107
|
+
"johansen_cointegration_test",
|
|
108
|
+
"vecm_model",
|
|
109
|
+
|
|
110
|
+
# 面板VAR
|
|
111
|
+
"PanelVARResult",
|
|
112
|
+
"panel_var_model",
|
|
113
|
+
|
|
114
|
+
# 单位根检验
|
|
115
|
+
"UnitRootTestResult",
|
|
116
|
+
"adf_test",
|
|
117
|
+
"pp_test",
|
|
118
|
+
"kpss_test",
|
|
119
|
+
|
|
120
|
+
# 动态面板模型
|
|
121
|
+
"DynamicPanelResult",
|
|
122
|
+
"diff_gmm_model",
|
|
123
|
+
"sys_gmm_model",
|
|
124
|
+
|
|
125
|
+
# 结构突变检验
|
|
126
|
+
"StructuralBreakResult",
|
|
127
|
+
"chow_test",
|
|
128
|
+
"quandt_andrews_test",
|
|
129
|
+
"bai_perron_test",
|
|
130
|
+
|
|
131
|
+
# 面板数据诊断
|
|
132
|
+
"PanelDiagnosticResult",
|
|
133
|
+
"hausman_test",
|
|
134
|
+
"pooling_f_test",
|
|
135
|
+
"lm_test",
|
|
136
|
+
"within_correlation_test",
|
|
137
|
+
|
|
138
|
+
# 时变参数模型
|
|
139
|
+
"TimeVaryingParameterResult",
|
|
140
|
+
"tar_model",
|
|
141
|
+
"star_model",
|
|
142
|
+
"markov_switching_model"
|
|
143
|
+
]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ARIMA模型实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ARIMAResult(BaseModel):
|
|
11
|
+
"""ARIMA模型结果"""
|
|
12
|
+
model_type: str = Field(..., description="模型类型")
|
|
13
|
+
order: tuple = Field(..., description="模型阶数(p, d, q)")
|
|
14
|
+
coefficients: List[float] = Field(..., description="回归系数")
|
|
15
|
+
std_errors: Optional[List[float]] = Field(None, description="系数标准误")
|
|
16
|
+
t_values: Optional[List[float]] = Field(None, description="t统计量")
|
|
17
|
+
p_values: Optional[List[float]] = Field(None, description="p值")
|
|
18
|
+
conf_int_lower: Optional[List[float]] = Field(None, description="置信区间下界")
|
|
19
|
+
conf_int_upper: Optional[List[float]] = Field(None, description="置信区间上界")
|
|
20
|
+
aic: Optional[float] = Field(None, description="赤池信息准则")
|
|
21
|
+
bic: Optional[float] = Field(None, description="贝叶斯信息准则")
|
|
22
|
+
hqic: Optional[float] = Field(None, description="汉南-奎因信息准则")
|
|
23
|
+
r_squared: Optional[float] = Field(None, description="R方")
|
|
24
|
+
adj_r_squared: Optional[float] = Field(None, description="调整R方")
|
|
25
|
+
n_obs: int = Field(..., description="观测数量")
|
|
26
|
+
forecast: Optional[List[float]] = Field(None, description="预测值")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def arima_model(
|
|
30
|
+
data: List[float],
|
|
31
|
+
order: tuple = (1, 1, 1),
|
|
32
|
+
forecast_steps: int = 1
|
|
33
|
+
) -> ARIMAResult:
|
|
34
|
+
"""
|
|
35
|
+
ARIMA模型实现
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
data: 时间序列数据
|
|
39
|
+
order: (p,d,q) 参数设置
|
|
40
|
+
forecast_steps: 预测步数
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
ARIMAResult: ARIMA模型结果
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
# 导入statsmodels ARIMA模型
|
|
47
|
+
from statsmodels.tsa.arima.model import ARIMA as StatsARIMA
|
|
48
|
+
from statsmodels.tsa.stattools import arma_order_select_ic
|
|
49
|
+
|
|
50
|
+
# 拟合ARIMA模型
|
|
51
|
+
model = StatsARIMA(data, order=order)
|
|
52
|
+
fitted_model = model.fit()
|
|
53
|
+
|
|
54
|
+
# 提取模型参数
|
|
55
|
+
params = fitted_model.params.tolist()
|
|
56
|
+
std_errors = fitted_model.bse.tolist() if fitted_model.bse is not None else None
|
|
57
|
+
t_values = fitted_model.tvalues.tolist() if fitted_model.tvalues is not None else None
|
|
58
|
+
p_values = fitted_model.pvalues.tolist() if fitted_model.pvalues is not None else None
|
|
59
|
+
|
|
60
|
+
# 计算置信区间
|
|
61
|
+
if fitted_model.conf_int() is not None:
|
|
62
|
+
conf_int = fitted_model.conf_int()
|
|
63
|
+
conf_int_lower = conf_int[:, 0].tolist()
|
|
64
|
+
conf_int_upper = conf_int[:, 1].tolist()
|
|
65
|
+
else:
|
|
66
|
+
conf_int_lower = None
|
|
67
|
+
conf_int_upper = None
|
|
68
|
+
|
|
69
|
+
# 进行预测
|
|
70
|
+
forecast_result = fitted_model.forecast(steps=forecast_steps)
|
|
71
|
+
forecast = forecast_result.tolist()
|
|
72
|
+
|
|
73
|
+
# 获取模型统计信息
|
|
74
|
+
aic = float(fitted_model.aic) if hasattr(fitted_model, 'aic') else None
|
|
75
|
+
bic = float(fitted_model.bic) if hasattr(fitted_model, 'bic') else None
|
|
76
|
+
hqic = float(fitted_model.hqic) if hasattr(fitted_model, 'hqic') else None
|
|
77
|
+
|
|
78
|
+
# 对于ARIMA模型,通常不计算R方,因为它是基于预测误差而不是解释方差
|
|
79
|
+
# 但我们仍可以尝试获取,如果没有就设为None
|
|
80
|
+
r_squared = float(getattr(fitted_model, 'rsquared', None)) if hasattr(fitted_model, 'rsquared') else None
|
|
81
|
+
adj_r_squared = float(getattr(fitted_model, 'rsquared_adj', None)) if hasattr(fitted_model, 'rsquared_adj') else None
|
|
82
|
+
|
|
83
|
+
p, d, q = order
|
|
84
|
+
|
|
85
|
+
return ARIMAResult(
|
|
86
|
+
model_type=f"ARIMA({p},{d},{q})",
|
|
87
|
+
order=order,
|
|
88
|
+
coefficients=params,
|
|
89
|
+
std_errors=std_errors,
|
|
90
|
+
t_values=t_values,
|
|
91
|
+
p_values=p_values,
|
|
92
|
+
conf_int_lower=conf_int_lower,
|
|
93
|
+
conf_int_upper=conf_int_upper,
|
|
94
|
+
aic=aic,
|
|
95
|
+
bic=bic,
|
|
96
|
+
hqic=hqic,
|
|
97
|
+
r_squared=r_squared,
|
|
98
|
+
adj_r_squared=adj_r_squared,
|
|
99
|
+
n_obs=len(data),
|
|
100
|
+
forecast=forecast
|
|
101
|
+
)
|
|
102
|
+
except Exception as e:
|
|
103
|
+
# 出现错误时抛出异常
|
|
104
|
+
raise ValueError(f"ARIMA模型拟合失败: {str(e)}")
|