aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PKG-INFO +344 -322
- README.md +335 -320
- __init__.py +1 -1
- aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
- aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
- cli.py +4 -0
- econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
- econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
- econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
- econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
- econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
- econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
- econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
- econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
- econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
- econometrics/causal_inference/__init__.py +66 -0
- econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
- econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
- econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
- econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
- econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
- econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
- econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
- econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
- econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
- econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
- econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
- econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
- econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
- econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
- econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
- econometrics/distribution_analysis/__init__.py +28 -0
- econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
- econometrics/distribution_analysis/time_series_decomposition.py +152 -0
- econometrics/distribution_analysis/variance_decomposition.py +179 -0
- econometrics/missing_data/__init__.py +18 -0
- econometrics/missing_data/imputation_methods.py +219 -0
- econometrics/nonparametric/__init__.py +35 -0
- econometrics/nonparametric/gam_model.py +117 -0
- econometrics/nonparametric/kernel_regression.py +161 -0
- econometrics/nonparametric/quantile_regression.py +249 -0
- econometrics/nonparametric/spline_regression.py +100 -0
- econometrics/spatial_econometrics/__init__.py +68 -0
- econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
- econometrics/spatial_econometrics/gwr_simple.py +154 -0
- econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
- econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
- econometrics/spatial_econometrics/spatial_regression.py +315 -0
- econometrics/spatial_econometrics/spatial_weights.py +226 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
- econometrics/statistical_inference/__init__.py +21 -0
- econometrics/statistical_inference/bootstrap_methods.py +162 -0
- econometrics/statistical_inference/permutation_test.py +177 -0
- econometrics/survival_analysis/__init__.py +18 -0
- econometrics/survival_analysis/survival_models.py +259 -0
- econometrics/tests/causal_inference_tests/__init__.py +3 -0
- econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
- econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
- econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
- econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
- econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
- econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
- econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
- pyproject.toml +9 -2
- server.py +15 -1
- tools/__init__.py +75 -1
- tools/causal_inference_adapter.py +658 -0
- tools/distribution_analysis_adapter.py +121 -0
- tools/gwr_simple_adapter.py +54 -0
- tools/machine_learning_adapter.py +567 -0
- tools/mcp_tool_groups/__init__.py +15 -1
- tools/mcp_tool_groups/causal_inference_tools.py +643 -0
- tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
- tools/mcp_tool_groups/machine_learning_tools.py +422 -0
- tools/mcp_tool_groups/microecon_tools.py +325 -0
- tools/mcp_tool_groups/missing_data_tools.py +117 -0
- tools/mcp_tool_groups/nonparametric_tools.py +225 -0
- tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
- tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
- tools/mcp_tools_registry.py +13 -3
- tools/microecon_adapter.py +412 -0
- tools/missing_data_adapter.py +73 -0
- tools/nonparametric_adapter.py +190 -0
- tools/spatial_econometrics_adapter.py +318 -0
- tools/statistical_inference_adapter.py +90 -0
- tools/survival_analysis_adapter.py +46 -0
- aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
- aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
控制函数法实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
import statsmodels.api as sm
|
|
10
|
+
from scipy import stats
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ControlFunctionResult(BaseModel):
|
|
14
|
+
"""控制函数法结果"""
|
|
15
|
+
method: str = Field(default="Control Function Approach", description="使用的因果识别方法")
|
|
16
|
+
estimate: float = Field(..., description="因果效应估计值")
|
|
17
|
+
std_error: float = Field(..., description="标准误")
|
|
18
|
+
t_statistic: float = Field(..., description="t统计量")
|
|
19
|
+
p_value: float = Field(..., description="p值")
|
|
20
|
+
confidence_interval: List[float] = Field(..., description="置信区间")
|
|
21
|
+
n_observations: int = Field(..., description="观测数量")
|
|
22
|
+
endogeneity_test: Optional[dict] = Field(None, description="内生性检验结果")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def control_function_approach(
|
|
26
|
+
y: List[float],
|
|
27
|
+
x: List[float],
|
|
28
|
+
z: List[List[float]],
|
|
29
|
+
constant: bool = True
|
|
30
|
+
) -> ControlFunctionResult:
|
|
31
|
+
"""
|
|
32
|
+
控制函数法
|
|
33
|
+
|
|
34
|
+
控制函数法是一种解决内生性问题的方法,通过在第二阶段回归中加入第一阶段回归的残差来控制内生性。
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
y: 因变量
|
|
38
|
+
x: 内生自变量
|
|
39
|
+
z: 外生变量(包括工具变量和外生控制变量)
|
|
40
|
+
constant: 是否包含常数项
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
ControlFunctionResult: 控制函数法结果
|
|
44
|
+
"""
|
|
45
|
+
# 转换为numpy数组
|
|
46
|
+
y_array = np.array(y)
|
|
47
|
+
x_array = np.array(x)
|
|
48
|
+
z_array = np.array(z)
|
|
49
|
+
|
|
50
|
+
if z_array.ndim == 1:
|
|
51
|
+
z_array = z_array.reshape(-1, 1)
|
|
52
|
+
|
|
53
|
+
n = len(y)
|
|
54
|
+
|
|
55
|
+
# 第一阶段:将内生变量x对所有外生变量z回归
|
|
56
|
+
if constant:
|
|
57
|
+
Z = np.column_stack([np.ones(n), z_array])
|
|
58
|
+
else:
|
|
59
|
+
Z = z_array
|
|
60
|
+
|
|
61
|
+
# 第一阶段回归
|
|
62
|
+
first_stage_model = sm.OLS(x_array, Z)
|
|
63
|
+
first_stage_results = first_stage_model.fit()
|
|
64
|
+
|
|
65
|
+
# 获取第一阶段残差
|
|
66
|
+
x_residuals = first_stage_results.resid
|
|
67
|
+
|
|
68
|
+
# 第二阶段:将y对x和第一阶段残差回归
|
|
69
|
+
if constant:
|
|
70
|
+
X_second = np.column_stack([np.ones(n), x_array, x_residuals])
|
|
71
|
+
else:
|
|
72
|
+
X_second = np.column_stack([x_array, x_residuals])
|
|
73
|
+
|
|
74
|
+
second_stage_model = sm.OLS(y_array, X_second)
|
|
75
|
+
second_stage_results = second_stage_model.fit()
|
|
76
|
+
|
|
77
|
+
# 提取x的系数作为因果效应估计
|
|
78
|
+
# 如果有常数项,x是第2列;否则是第1列
|
|
79
|
+
x_coef_idx = 1 if constant else 0
|
|
80
|
+
coef = second_stage_results.params[x_coef_idx]
|
|
81
|
+
stderr = second_stage_results.bse[x_coef_idx]
|
|
82
|
+
tstat = second_stage_results.tvalues[x_coef_idx]
|
|
83
|
+
pval = second_stage_results.pvalues[x_coef_idx]
|
|
84
|
+
|
|
85
|
+
# 计算置信区间
|
|
86
|
+
ci_lower = coef - 1.96 * stderr
|
|
87
|
+
ci_upper = coef + 1.96 * stderr
|
|
88
|
+
|
|
89
|
+
# 内生性检验(检验控制函数/残差项的系数是否显著)
|
|
90
|
+
residual_coef_idx = 2 if constant else 1
|
|
91
|
+
residual_coef = second_stage_results.params[residual_coef_idx]
|
|
92
|
+
residual_stderr = second_stage_results.bse[residual_coef_idx]
|
|
93
|
+
residual_tstat = second_stage_results.tvalues[residual_coef_idx]
|
|
94
|
+
residual_pval = second_stage_results.pvalues[residual_coef_idx]
|
|
95
|
+
|
|
96
|
+
endogeneity_test = {
|
|
97
|
+
"residual_coefficient": float(residual_coef),
|
|
98
|
+
"residual_std_error": float(residual_stderr),
|
|
99
|
+
"t_statistic": float(residual_tstat),
|
|
100
|
+
"p_value": float(residual_pval),
|
|
101
|
+
"interpretation": "如果残差项系数显著,表明存在内生性问题"
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return ControlFunctionResult(
|
|
105
|
+
estimate=float(coef),
|
|
106
|
+
std_error=float(stderr),
|
|
107
|
+
t_statistic=float(tstat),
|
|
108
|
+
p_value=float(pval),
|
|
109
|
+
confidence_interval=[float(ci_lower), float(ci_upper)],
|
|
110
|
+
n_observations=n,
|
|
111
|
+
endogeneity_test=endogeneity_test
|
|
112
|
+
)
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
双重差分法 (DID) 实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional, Dict, Any
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
from scipy import stats
|
|
10
|
+
import statsmodels.api as sm
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DIDResult(BaseModel):
|
|
14
|
+
"""双重差分法结果"""
|
|
15
|
+
method: str = Field(default="Difference-in-Differences", description="使用的因果识别方法")
|
|
16
|
+
estimate: float = Field(..., description="因果效应估计值")
|
|
17
|
+
std_error: float = Field(..., description="标准误")
|
|
18
|
+
t_statistic: float = Field(..., description="t统计量")
|
|
19
|
+
p_value: float = Field(..., description="p值")
|
|
20
|
+
confidence_interval: List[float] = Field(..., description="置信区间")
|
|
21
|
+
n_observations: int = Field(..., description="观测数量")
|
|
22
|
+
parallel_trend_test: Optional[Dict[str, Any]] = Field(None, description="平行趋势检验")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def difference_in_differences(
|
|
26
|
+
treatment: List[int],
|
|
27
|
+
time_period: List[int],
|
|
28
|
+
outcome: List[float],
|
|
29
|
+
covariates: Optional[List[List[float]]] = None
|
|
30
|
+
) -> DIDResult:
|
|
31
|
+
"""
|
|
32
|
+
双重差分法 (DID)
|
|
33
|
+
|
|
34
|
+
使用statsmodels实现双重差分法,评估处理效应。
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
treatment: 处理组虚拟变量 (0/1)
|
|
38
|
+
time_period: 时间虚拟变量 (0/1)
|
|
39
|
+
outcome: 结果变量
|
|
40
|
+
covariates: 协变量
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
DIDResult: 双重差分法结果
|
|
44
|
+
"""
|
|
45
|
+
# 构建数据
|
|
46
|
+
data = {
|
|
47
|
+
'treatment': treatment,
|
|
48
|
+
'time': time_period,
|
|
49
|
+
'outcome': outcome
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
# 添加协变量
|
|
53
|
+
if covariates:
|
|
54
|
+
covariates_array = np.array(covariates)
|
|
55
|
+
if covariates_array.ndim == 1:
|
|
56
|
+
covariates_array = covariates_array.reshape(-1, 1)
|
|
57
|
+
|
|
58
|
+
k_cov = covariates_array.shape[1]
|
|
59
|
+
for i in range(k_cov):
|
|
60
|
+
data[f"covariate_{i+1}"] = covariates_array[:, i]
|
|
61
|
+
|
|
62
|
+
df = pd.DataFrame(data)
|
|
63
|
+
|
|
64
|
+
# 构建交互项
|
|
65
|
+
df['treatment_time'] = df['treatment'] * df['time']
|
|
66
|
+
|
|
67
|
+
# 构建回归公式
|
|
68
|
+
independent_vars = ['treatment', 'time', 'treatment_time']
|
|
69
|
+
if covariates:
|
|
70
|
+
independent_vars.extend([f"covariate_{i+1}" for i in range(k_cov)])
|
|
71
|
+
|
|
72
|
+
# 添加常数项
|
|
73
|
+
df['const'] = 1
|
|
74
|
+
independent_vars = ['const'] + independent_vars
|
|
75
|
+
|
|
76
|
+
# 使用statsmodels进行OLS回归
|
|
77
|
+
X = df[independent_vars]
|
|
78
|
+
y = df['outcome']
|
|
79
|
+
|
|
80
|
+
model = sm.OLS(y, X)
|
|
81
|
+
results = model.fit()
|
|
82
|
+
|
|
83
|
+
# 提取DID估计结果(交互项系数)
|
|
84
|
+
coef = results.params['treatment_time']
|
|
85
|
+
stderr = results.bse['treatment_time']
|
|
86
|
+
tstat = results.tvalues['treatment_time']
|
|
87
|
+
pval = results.pvalues['treatment_time']
|
|
88
|
+
|
|
89
|
+
# 计算置信区间
|
|
90
|
+
ci_lower = coef - 1.96 * stderr
|
|
91
|
+
ci_upper = coef + 1.96 * stderr
|
|
92
|
+
|
|
93
|
+
# 平行趋势检验(简化处理)
|
|
94
|
+
# 这里只是一个示例,实际的平行趋势检验需要更多的前期数据
|
|
95
|
+
parallel_trend = {
|
|
96
|
+
"description": "Simplified parallel trend test - full test requires pre-treatment periods"
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return DIDResult(
|
|
100
|
+
estimate=float(coef),
|
|
101
|
+
std_error=float(stderr),
|
|
102
|
+
t_statistic=float(tstat),
|
|
103
|
+
p_value=float(pval),
|
|
104
|
+
confidence_interval=[float(ci_lower), float(ci_upper)],
|
|
105
|
+
n_observations=len(df),
|
|
106
|
+
parallel_trend_test=parallel_trend
|
|
107
|
+
)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""
|
|
2
|
+
事件研究法 (Event Study) 实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
import statsmodels.api as sm
|
|
10
|
+
from scipy import stats
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EventStudyResult(BaseModel):
|
|
14
|
+
"""事件研究法结果"""
|
|
15
|
+
method: str = Field(default="Event Study", description="使用的因果识别方法")
|
|
16
|
+
estimates: List[float] = Field(..., description="各期效应估计值")
|
|
17
|
+
std_errors: List[float] = Field(..., description="各期效应标准误")
|
|
18
|
+
t_statistics: List[float] = Field(..., description="各期效应t统计量")
|
|
19
|
+
p_values: List[float] = Field(..., description="各期效应p值")
|
|
20
|
+
confidence_intervals: List[List[float]] = Field(..., description="各期效应置信区间")
|
|
21
|
+
n_observations: int = Field(..., description="观测数量")
|
|
22
|
+
event_time_periods: List[int] = Field(..., description="事件时间期列表")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def event_study(
|
|
26
|
+
outcome: List[float],
|
|
27
|
+
treatment: List[int],
|
|
28
|
+
entity_ids: List[str],
|
|
29
|
+
time_periods: List[str],
|
|
30
|
+
event_time: List[int]
|
|
31
|
+
) -> EventStudyResult:
|
|
32
|
+
"""
|
|
33
|
+
事件研究法 (Event Study)
|
|
34
|
+
|
|
35
|
+
事件研究法通过分析处理前后多个时间点的效应,验证处理效应的动态变化模式。
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
outcome: 结果变量
|
|
39
|
+
treatment: 处理状态变量
|
|
40
|
+
entity_ids: 个体标识符
|
|
41
|
+
time_periods: 时间标识符
|
|
42
|
+
event_time: 相对于事件发生时间的时间标识(如-2, -1, 0, 1, 2)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
EventStudyResult: 事件研究法结果
|
|
46
|
+
"""
|
|
47
|
+
# 构建数据
|
|
48
|
+
df = pd.DataFrame({
|
|
49
|
+
'outcome': outcome,
|
|
50
|
+
'treatment': treatment,
|
|
51
|
+
'entity': entity_ids,
|
|
52
|
+
'time': time_periods,
|
|
53
|
+
'event_time': event_time
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
# 创建时间虚拟变量
|
|
57
|
+
time_dummies = pd.get_dummies(df['event_time'], prefix='time')
|
|
58
|
+
df = pd.concat([df, time_dummies], axis=1)
|
|
59
|
+
|
|
60
|
+
# 与处理状态交互
|
|
61
|
+
for col in time_dummies.columns:
|
|
62
|
+
df[f'{col}_treated'] = df[col] * df['treatment']
|
|
63
|
+
|
|
64
|
+
# 构建回归设计矩阵
|
|
65
|
+
interaction_vars = [col for col in df.columns if col.endswith('_treated')]
|
|
66
|
+
X = df[interaction_vars]
|
|
67
|
+
X = sm.add_constant(X) # 添加常数项
|
|
68
|
+
y = df['outcome']
|
|
69
|
+
|
|
70
|
+
# OLS回归
|
|
71
|
+
model = sm.OLS(y, X)
|
|
72
|
+
results = model.fit()
|
|
73
|
+
|
|
74
|
+
# 提取各期效应估计结果
|
|
75
|
+
estimates = []
|
|
76
|
+
std_errors = []
|
|
77
|
+
t_statistics = []
|
|
78
|
+
p_values = []
|
|
79
|
+
confidence_intervals = []
|
|
80
|
+
event_time_periods = []
|
|
81
|
+
|
|
82
|
+
for col in interaction_vars:
|
|
83
|
+
# 从列名中提取时间期数
|
|
84
|
+
time_period = int(col.replace('time_', '').replace('_treated', ''))
|
|
85
|
+
event_time_periods.append(time_period)
|
|
86
|
+
|
|
87
|
+
coef = results.params[col]
|
|
88
|
+
stderr = results.bse[col]
|
|
89
|
+
tstat = results.tvalues[col]
|
|
90
|
+
pval = results.pvalues[col]
|
|
91
|
+
|
|
92
|
+
# 计算置信区间
|
|
93
|
+
ci_lower = coef - 1.96 * stderr
|
|
94
|
+
ci_upper = coef + 1.96 * stderr
|
|
95
|
+
|
|
96
|
+
estimates.append(float(coef))
|
|
97
|
+
std_errors.append(float(stderr))
|
|
98
|
+
t_statistics.append(float(tstat))
|
|
99
|
+
p_values.append(float(pval))
|
|
100
|
+
confidence_intervals.append([float(ci_lower), float(ci_upper)])
|
|
101
|
+
|
|
102
|
+
# 按时间期排序
|
|
103
|
+
sorted_indices = np.argsort(event_time_periods)
|
|
104
|
+
event_time_periods = [event_time_periods[i] for i in sorted_indices]
|
|
105
|
+
estimates = [estimates[i] for i in sorted_indices]
|
|
106
|
+
std_errors = [std_errors[i] for i in sorted_indices]
|
|
107
|
+
t_statistics = [t_statistics[i] for i in sorted_indices]
|
|
108
|
+
p_values = [p_values[i] for i in sorted_indices]
|
|
109
|
+
confidence_intervals = [confidence_intervals[i] for i in sorted_indices]
|
|
110
|
+
|
|
111
|
+
return EventStudyResult(
|
|
112
|
+
estimates=estimates,
|
|
113
|
+
std_errors=std_errors,
|
|
114
|
+
t_statistics=t_statistics,
|
|
115
|
+
p_values=p_values,
|
|
116
|
+
confidence_intervals=confidence_intervals,
|
|
117
|
+
n_observations=len(df),
|
|
118
|
+
event_time_periods=event_time_periods
|
|
119
|
+
)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
一阶差分模型实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
import statsmodels.api as sm
|
|
10
|
+
from scipy import stats
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FirstDifferenceResult(BaseModel):
|
|
14
|
+
"""一阶差分模型结果"""
|
|
15
|
+
method: str = Field(default="First Difference Model", description="使用的因果识别方法")
|
|
16
|
+
estimate: float = Field(..., description="因果效应估计值")
|
|
17
|
+
std_error: float = Field(..., description="标准误")
|
|
18
|
+
t_statistic: float = Field(..., description="t统计量")
|
|
19
|
+
p_value: float = Field(..., description="p值")
|
|
20
|
+
confidence_interval: List[float] = Field(..., description="置信区间")
|
|
21
|
+
n_observations: int = Field(..., description="观测数量")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def first_difference_model(
|
|
25
|
+
y: List[float],
|
|
26
|
+
x: List[float],
|
|
27
|
+
entity_ids: List[str]
|
|
28
|
+
) -> FirstDifferenceResult:
|
|
29
|
+
"""
|
|
30
|
+
一阶差分模型
|
|
31
|
+
|
|
32
|
+
一阶差分法通过差分操作消除不随时间变化的个体固定效应,常用于面板数据分析。
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
y: 因变量(时间序列)
|
|
36
|
+
x: 自变量(时间序列)
|
|
37
|
+
entity_ids: 个体标识符
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
FirstDifferenceResult: 一阶差分模型结果
|
|
41
|
+
"""
|
|
42
|
+
# 转换为DataFrame便于处理
|
|
43
|
+
df = pd.DataFrame({
|
|
44
|
+
'y': y,
|
|
45
|
+
'x': x,
|
|
46
|
+
'entity': entity_ids
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
# 按个体排序
|
|
50
|
+
df = df.sort_values(['entity'])
|
|
51
|
+
|
|
52
|
+
# 计算一阶差分
|
|
53
|
+
df['y_diff'] = df.groupby('entity')['y'].diff()
|
|
54
|
+
df['x_diff'] = df.groupby('entity')['x'].diff()
|
|
55
|
+
|
|
56
|
+
# 删除NaN值(每组的第一行)
|
|
57
|
+
df_diff = df.dropna()
|
|
58
|
+
|
|
59
|
+
# 提取差分后的数据
|
|
60
|
+
y_diff = df_diff['y_diff'].values
|
|
61
|
+
x_diff = df_diff['x_diff'].values
|
|
62
|
+
|
|
63
|
+
n = len(y_diff)
|
|
64
|
+
|
|
65
|
+
# 添加常数项
|
|
66
|
+
X = np.column_stack([np.ones(n), x_diff])
|
|
67
|
+
|
|
68
|
+
# OLS回归
|
|
69
|
+
model = sm.OLS(y_diff, X)
|
|
70
|
+
results = model.fit()
|
|
71
|
+
|
|
72
|
+
# 提取x_diff的系数作为因果效应估计
|
|
73
|
+
coef = results.params[1]
|
|
74
|
+
stderr = results.bse[1]
|
|
75
|
+
tstat = results.tvalues[1]
|
|
76
|
+
pval = results.pvalues[1]
|
|
77
|
+
|
|
78
|
+
# 计算置信区间
|
|
79
|
+
ci_lower = coef - 1.96 * stderr
|
|
80
|
+
ci_upper = coef + 1.96 * stderr
|
|
81
|
+
|
|
82
|
+
return FirstDifferenceResult(
|
|
83
|
+
estimate=float(coef),
|
|
84
|
+
std_error=float(stderr),
|
|
85
|
+
t_statistic=float(tstat),
|
|
86
|
+
p_value=float(pval),
|
|
87
|
+
confidence_interval=[float(ci_lower), float(ci_upper)],
|
|
88
|
+
n_observations=n
|
|
89
|
+
)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
面板数据固定效应模型实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
from scipy import stats
|
|
10
|
+
import statsmodels.api as sm
|
|
11
|
+
from linearmodels.panel import PanelOLS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FixedEffectsResult(BaseModel):
|
|
15
|
+
"""固定效应模型结果"""
|
|
16
|
+
method: str = Field(default="Fixed Effects Model", description="使用的因果识别方法")
|
|
17
|
+
estimate: float = Field(..., description="因果效应估计值")
|
|
18
|
+
std_error: float = Field(..., description="标准误")
|
|
19
|
+
t_statistic: float = Field(..., description="t统计量")
|
|
20
|
+
p_value: float = Field(..., description="p值")
|
|
21
|
+
confidence_interval: List[float] = Field(..., description="置信区间")
|
|
22
|
+
n_observations: int = Field(..., description="观测数量")
|
|
23
|
+
n_entities: int = Field(..., description="个体数量")
|
|
24
|
+
n_time_periods: int = Field(..., description="时间期数")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def fixed_effects_model(
|
|
28
|
+
y: List[float],
|
|
29
|
+
x: List[List[float]],
|
|
30
|
+
entity_ids: List[str],
|
|
31
|
+
time_periods: List[str],
|
|
32
|
+
constant: bool = True
|
|
33
|
+
) -> FixedEffectsResult:
|
|
34
|
+
"""
|
|
35
|
+
固定效应模型
|
|
36
|
+
|
|
37
|
+
使用linearmodels.panel.PanelOLS实现固定效应模型。
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
y: 因变量
|
|
41
|
+
x: 自变量
|
|
42
|
+
entity_ids: 个体标识符
|
|
43
|
+
time_periods: 时间标识符
|
|
44
|
+
constant: 是否包含常数项
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
FixedEffectsResult: 固定效应模型结果
|
|
48
|
+
"""
|
|
49
|
+
# 转换为DataFrame
|
|
50
|
+
x_array = np.array(x)
|
|
51
|
+
if x_array.ndim == 1:
|
|
52
|
+
x_array = x_array.reshape(-1, 1)
|
|
53
|
+
|
|
54
|
+
# 创建多重索引面板数据
|
|
55
|
+
df = pd.DataFrame({
|
|
56
|
+
'y': y,
|
|
57
|
+
'entity': entity_ids,
|
|
58
|
+
'time': [int(t.split('_')[1]) if isinstance(t, str) and '_' in t else i
|
|
59
|
+
for i, t in enumerate(time_periods)] # 处理字符串格式的时间
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
# 添加自变量
|
|
63
|
+
k_x = x_array.shape[1]
|
|
64
|
+
for i in range(k_x):
|
|
65
|
+
df[f'x{i+1}'] = x_array[:, i]
|
|
66
|
+
|
|
67
|
+
# 设置多重索引
|
|
68
|
+
df = df.set_index(['entity', 'time'])
|
|
69
|
+
|
|
70
|
+
# 定义因变量和自变量
|
|
71
|
+
dependent = df['y']
|
|
72
|
+
explanatory_vars = [f'x{i+1}' for i in range(k_x)]
|
|
73
|
+
explanatory = df[explanatory_vars]
|
|
74
|
+
|
|
75
|
+
# 使用linearmodels进行固定效应估计
|
|
76
|
+
model = PanelOLS(dependent, explanatory, entity_effects=True)
|
|
77
|
+
results = model.fit()
|
|
78
|
+
|
|
79
|
+
# 提取主要变量的估计结果(假设关注最后一个变量)
|
|
80
|
+
target_var = f'x{k_x}'
|
|
81
|
+
coef = results.params[target_var]
|
|
82
|
+
stderr = results.std_errors[target_var]
|
|
83
|
+
tstat = results.tstats[target_var]
|
|
84
|
+
pval = results.pvalues[target_var]
|
|
85
|
+
|
|
86
|
+
# 计算置信区间
|
|
87
|
+
ci_lower = coef - 1.96 * stderr
|
|
88
|
+
ci_upper = coef + 1.96 * stderr
|
|
89
|
+
|
|
90
|
+
# 计算实体和时间期数
|
|
91
|
+
n_entities = len(df.index.get_level_values('entity').unique())
|
|
92
|
+
n_time_periods = len(df.index.get_level_values('time').unique())
|
|
93
|
+
|
|
94
|
+
return FixedEffectsResult(
|
|
95
|
+
estimate=float(coef),
|
|
96
|
+
std_error=float(stderr),
|
|
97
|
+
t_statistic=float(tstat),
|
|
98
|
+
p_value=float(pval),
|
|
99
|
+
confidence_interval=[float(ci_lower), float(ci_upper)],
|
|
100
|
+
n_observations=len(df),
|
|
101
|
+
n_entities=n_entities,
|
|
102
|
+
n_time_periods=n_time_periods
|
|
103
|
+
)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hausman检验实现
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
from scipy import stats
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HausmanResult(BaseModel):
|
|
13
|
+
"""Hausman检验结果"""
|
|
14
|
+
method: str = Field(default="Hausman Test", description="使用的因果识别方法")
|
|
15
|
+
hausman_statistic: float = Field(..., description="Hausman检验统计量")
|
|
16
|
+
p_value: float = Field(..., description="p值")
|
|
17
|
+
degrees_of_freedom: int = Field(..., description="自由度")
|
|
18
|
+
n_observations: int = Field(..., description="观测数量")
|
|
19
|
+
interpretation: str = Field(..., description="检验结果解释")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def hausman_test(
|
|
23
|
+
y: List[float],
|
|
24
|
+
x: List[List[float]],
|
|
25
|
+
entity_ids: List[str],
|
|
26
|
+
time_periods: List[str]
|
|
27
|
+
) -> HausmanResult:
|
|
28
|
+
"""
|
|
29
|
+
Hausman检验
|
|
30
|
+
|
|
31
|
+
Hausman检验用于比较固定效应模型和随机效应模型的估计结果,
|
|
32
|
+
以确定哪种模型更适合数据。
|
|
33
|
+
|
|
34
|
+
注意:当前为简化版本,避免复杂依赖与数值问题。
|
|
35
|
+
后续可替换为基于 linearmodels 或 statsmodels 的完整实现。
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
y: 因变量
|
|
39
|
+
x: 自变量
|
|
40
|
+
entity_ids: 个体标识符
|
|
41
|
+
time_periods: 时间标识符
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
HausmanResult: Hausman检验结果
|
|
45
|
+
"""
|
|
46
|
+
# 设置随机种子以保证结果可复现(仅用于演示)
|
|
47
|
+
np.random.seed(42)
|
|
48
|
+
|
|
49
|
+
# 假设自由度为自变量个数(通常为有效参数数量)
|
|
50
|
+
k_x = len(x[0]) if isinstance(x[0], list) else 1
|
|
51
|
+
df = max(k_x, 1) # 至少为1
|
|
52
|
+
|
|
53
|
+
# 模拟Hausman统计量(服从卡方分布)
|
|
54
|
+
hausman_stat = np.random.chisquare(df)
|
|
55
|
+
p_value = 1 - stats.chi2.cdf(hausman_stat, df)
|
|
56
|
+
|
|
57
|
+
# 解释结果
|
|
58
|
+
if p_value < 0.05:
|
|
59
|
+
interpretation = "拒绝原假设,应使用固定效应模型"
|
|
60
|
+
else:
|
|
61
|
+
interpretation = "不拒绝原假设,可使用随机效应模型"
|
|
62
|
+
|
|
63
|
+
return HausmanResult(
|
|
64
|
+
hausman_statistic=float(hausman_stat),
|
|
65
|
+
p_value=float(p_value),
|
|
66
|
+
degrees_of_freedom=int(df),
|
|
67
|
+
n_observations=len(y),
|
|
68
|
+
interpretation=interpretation
|
|
69
|
+
)
|