aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +732 -0
  3. README.md +687 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  6. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  7. aigroup_econ_mcp-2.0.1.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-2.0.1.dist-info/licenses/LICENSE +21 -0
  9. cli.py +32 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  18. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  19. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  20. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  21. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  22. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  23. econometrics/basic_parametric_estimation/__init__.py +31 -0
  24. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  25. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  26. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  27. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  28. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  29. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  30. econometrics/causal_inference/__init__.py +66 -0
  31. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  32. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  33. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  34. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  35. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  36. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  37. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  38. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  39. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  40. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  41. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  42. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  43. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  44. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  45. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  46. econometrics/distribution_analysis/__init__.py +28 -0
  47. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  48. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  49. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  50. econometrics/missing_data/__init__.py +18 -0
  51. econometrics/missing_data/imputation_methods.py +219 -0
  52. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  53. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  54. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  55. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  56. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  57. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  58. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  59. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  60. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  61. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  62. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  63. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  64. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  65. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  66. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  67. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  68. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  69. econometrics/nonparametric/__init__.py +35 -0
  70. econometrics/nonparametric/gam_model.py +117 -0
  71. econometrics/nonparametric/kernel_regression.py +161 -0
  72. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  73. econometrics/nonparametric/quantile_regression.py +249 -0
  74. econometrics/nonparametric/spline_regression.py +100 -0
  75. econometrics/spatial_econometrics/__init__.py +68 -0
  76. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  77. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  78. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  79. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  80. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  81. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  82. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  83. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  84. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  85. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  86. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  87. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  88. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  89. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  90. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  91. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  92. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  93. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  94. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  95. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  96. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  97. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  98. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  99. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  100. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  101. econometrics/statistical_inference/__init__.py +21 -0
  102. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  103. econometrics/statistical_inference/permutation_test.py +177 -0
  104. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  105. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  106. econometrics/survival_analysis/__init__.py +18 -0
  107. econometrics/survival_analysis/survival_models.py +259 -0
  108. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  109. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  110. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  111. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  112. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  113. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  114. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  115. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  116. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  117. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  118. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  119. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  120. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  121. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  122. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  123. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  124. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  125. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  126. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  127. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  128. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  129. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  130. prompts/__init__.py +0 -0
  131. prompts/analysis_guides.py +43 -0
  132. pyproject.toml +85 -0
  133. resources/MCP_MASTER_GUIDE.md +422 -0
  134. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  135. resources/__init__.py +0 -0
  136. server.py +97 -0
  137. tools/README.md +88 -0
  138. tools/__init__.py +119 -0
  139. tools/causal_inference_adapter.py +658 -0
  140. tools/data_loader.py +213 -0
  141. tools/decorators.py +38 -0
  142. tools/distribution_analysis_adapter.py +121 -0
  143. tools/econometrics_adapter.py +286 -0
  144. tools/gwr_simple_adapter.py +54 -0
  145. tools/machine_learning_adapter.py +567 -0
  146. tools/mcp_tool_groups/__init__.py +15 -0
  147. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  148. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  149. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  150. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  151. tools/mcp_tool_groups/microecon_tools.py +325 -0
  152. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  153. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  154. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  155. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  156. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  157. tools/mcp_tool_groups/time_series_tools.py +494 -0
  158. tools/mcp_tools_registry.py +124 -0
  159. tools/microecon_adapter.py +412 -0
  160. tools/missing_data_adapter.py +73 -0
  161. tools/model_specification_adapter.py +369 -0
  162. tools/nonparametric_adapter.py +190 -0
  163. tools/output_formatter.py +563 -0
  164. tools/spatial_econometrics_adapter.py +318 -0
  165. tools/statistical_inference_adapter.py +90 -0
  166. tools/survival_analysis_adapter.py +46 -0
  167. tools/time_series_panel_data_adapter.py +858 -0
  168. tools/time_series_panel_data_tools.py +65 -0
  169. aigroup_econ_mcp/__init__.py +0 -19
  170. aigroup_econ_mcp/cli.py +0 -82
  171. aigroup_econ_mcp/config.py +0 -561
  172. aigroup_econ_mcp/server.py +0 -452
  173. aigroup_econ_mcp/tools/__init__.py +0 -19
  174. aigroup_econ_mcp/tools/base.py +0 -470
  175. aigroup_econ_mcp/tools/cache.py +0 -533
  176. aigroup_econ_mcp/tools/data_loader.py +0 -195
  177. aigroup_econ_mcp/tools/file_parser.py +0 -1027
  178. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  179. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  180. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  181. aigroup_econ_mcp/tools/ml_models.py +0 -54
  182. aigroup_econ_mcp/tools/ml_regularization.py +0 -186
  183. aigroup_econ_mcp/tools/monitoring.py +0 -555
  184. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  185. aigroup_econ_mcp/tools/panel_data.py +0 -619
  186. aigroup_econ_mcp/tools/regression.py +0 -214
  187. aigroup_econ_mcp/tools/statistics.py +0 -154
  188. aigroup_econ_mcp/tools/time_series.py +0 -698
  189. aigroup_econ_mcp/tools/timeout.py +0 -283
  190. aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
  191. aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
  192. aigroup_econ_mcp/tools/tool_registry.py +0 -478
  193. aigroup_econ_mcp/tools/validation.py +0 -482
  194. aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
  195. aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
  196. aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
  197. /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
  198. {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,66 @@
1
+ """
2
+ 因果推断模块
3
+ """
4
+
5
+ # 从因果识别策略模块导入
6
+ from .causal_identification_strategy import (
7
+ instrumental_variables_2sls,
8
+ difference_in_differences,
9
+ regression_discontinuity,
10
+ fixed_effects_model,
11
+ random_effects_model,
12
+ control_function_approach,
13
+ first_difference_model,
14
+ triple_difference,
15
+ event_study,
16
+ synthetic_control_method,
17
+ propensity_score_matching,
18
+ mediation_analysis,
19
+ moderation_analysis,
20
+ hausman_test,
21
+ IVResult,
22
+ DIDResult,
23
+ RDDResult,
24
+ FixedEffectsResult,
25
+ RandomEffectsResult,
26
+ ControlFunctionResult,
27
+ FirstDifferenceResult,
28
+ TripeDifferenceResult,
29
+ EventStudyResult,
30
+ SyntheticControlResult,
31
+ PSMMatchResult,
32
+ MediationResult,
33
+ ModerationResult,
34
+ HausmanResult
35
+ )
36
+
37
+ __all__ = [
38
+ "instrumental_variables_2sls",
39
+ "difference_in_differences",
40
+ "regression_discontinuity",
41
+ "fixed_effects_model",
42
+ "random_effects_model",
43
+ "control_function_approach",
44
+ "first_difference_model",
45
+ "triple_difference",
46
+ "event_study",
47
+ "synthetic_control_method",
48
+ "propensity_score_matching",
49
+ "mediation_analysis",
50
+ "moderation_analysis",
51
+ "hausman_test",
52
+ "IVResult",
53
+ "DIDResult",
54
+ "RDDResult",
55
+ "FixedEffectsResult",
56
+ "RandomEffectsResult",
57
+ "ControlFunctionResult",
58
+ "FirstDifferenceResult",
59
+ "TripeDifferenceResult",
60
+ "EventStudyResult",
61
+ "SyntheticControlResult",
62
+ "PSMMatchResult",
63
+ "MediationResult",
64
+ "ModerationResult",
65
+ "HausmanResult"
66
+ ]
@@ -0,0 +1,104 @@
1
+ """
2
+ 因果识别策略模块
3
+ """
4
+
5
+ from .instrumental_variables import (
6
+ instrumental_variables_2sls,
7
+ IVResult
8
+ )
9
+
10
+ from .difference_in_differences import (
11
+ difference_in_differences,
12
+ DIDResult
13
+ )
14
+
15
+ from .regression_discontinuity import (
16
+ regression_discontinuity,
17
+ RDDResult
18
+ )
19
+
20
+ from .fixed_effects import (
21
+ fixed_effects_model,
22
+ FixedEffectsResult
23
+ )
24
+
25
+ from .random_effects import (
26
+ random_effects_model,
27
+ RandomEffectsResult
28
+ )
29
+
30
+ from .control_function import (
31
+ control_function_approach,
32
+ ControlFunctionResult
33
+ )
34
+
35
+ from .first_difference import (
36
+ first_difference_model,
37
+ FirstDifferenceResult
38
+ )
39
+
40
+ from .triple_difference import (
41
+ triple_difference,
42
+ TripeDifferenceResult
43
+ )
44
+
45
+ from .event_study import (
46
+ event_study,
47
+ EventStudyResult
48
+ )
49
+
50
+ from .synthetic_control import (
51
+ synthetic_control_method,
52
+ SyntheticControlResult
53
+ )
54
+
55
+ from .propensity_score_matching import (
56
+ propensity_score_matching,
57
+ PSMMatchResult
58
+ )
59
+
60
+ from .mediation_analysis import (
61
+ mediation_analysis,
62
+ MediationResult
63
+ )
64
+
65
+ from .moderation_analysis import (
66
+ moderation_analysis,
67
+ ModerationResult
68
+ )
69
+
70
+ from .hausman_test import (
71
+ hausman_test,
72
+ HausmanResult
73
+ )
74
+
75
+ __all__ = [
76
+ "instrumental_variables_2sls",
77
+ "difference_in_differences",
78
+ "regression_discontinuity",
79
+ "fixed_effects_model",
80
+ "random_effects_model",
81
+ "control_function_approach",
82
+ "first_difference_model",
83
+ "triple_difference",
84
+ "event_study",
85
+ "synthetic_control_method",
86
+ "propensity_score_matching",
87
+ "mediation_analysis",
88
+ "moderation_analysis",
89
+ "hausman_test",
90
+ "IVResult",
91
+ "DIDResult",
92
+ "RDDResult",
93
+ "FixedEffectsResult",
94
+ "RandomEffectsResult",
95
+ "ControlFunctionResult",
96
+ "FirstDifferenceResult",
97
+ "TripeDifferenceResult",
98
+ "EventStudyResult",
99
+ "SyntheticControlResult",
100
+ "PSMMatchResult",
101
+ "MediationResult",
102
+ "ModerationResult",
103
+ "HausmanResult"
104
+ ]
@@ -0,0 +1,112 @@
1
+ """
2
+ 控制函数法实现
3
+ """
4
+
5
+ from typing import List, Optional
6
+ import numpy as np
7
+ import pandas as pd
8
+ from pydantic import BaseModel, Field
9
+ import statsmodels.api as sm
10
+ from scipy import stats
11
+
12
+
13
+ class ControlFunctionResult(BaseModel):
14
+ """控制函数法结果"""
15
+ method: str = Field(default="Control Function Approach", description="使用的因果识别方法")
16
+ estimate: float = Field(..., description="因果效应估计值")
17
+ std_error: float = Field(..., description="标准误")
18
+ t_statistic: float = Field(..., description="t统计量")
19
+ p_value: float = Field(..., description="p值")
20
+ confidence_interval: List[float] = Field(..., description="置信区间")
21
+ n_observations: int = Field(..., description="观测数量")
22
+ endogeneity_test: Optional[dict] = Field(None, description="内生性检验结果")
23
+
24
+
25
+ def control_function_approach(
26
+ y: List[float],
27
+ x: List[float],
28
+ z: List[List[float]],
29
+ constant: bool = True
30
+ ) -> ControlFunctionResult:
31
+ """
32
+ 控制函数法
33
+
34
+ 控制函数法是一种解决内生性问题的方法,通过在第二阶段回归中加入第一阶段回归的残差来控制内生性。
35
+
36
+ Args:
37
+ y: 因变量
38
+ x: 内生自变量
39
+ z: 外生变量(包括工具变量和外生控制变量)
40
+ constant: 是否包含常数项
41
+
42
+ Returns:
43
+ ControlFunctionResult: 控制函数法结果
44
+ """
45
+ # 转换为numpy数组
46
+ y_array = np.array(y)
47
+ x_array = np.array(x)
48
+ z_array = np.array(z)
49
+
50
+ if z_array.ndim == 1:
51
+ z_array = z_array.reshape(-1, 1)
52
+
53
+ n = len(y)
54
+
55
+ # 第一阶段:将内生变量x对所有外生变量z回归
56
+ if constant:
57
+ Z = np.column_stack([np.ones(n), z_array])
58
+ else:
59
+ Z = z_array
60
+
61
+ # 第一阶段回归
62
+ first_stage_model = sm.OLS(x_array, Z)
63
+ first_stage_results = first_stage_model.fit()
64
+
65
+ # 获取第一阶段残差
66
+ x_residuals = first_stage_results.resid
67
+
68
+ # 第二阶段:将y对x和第一阶段残差回归
69
+ if constant:
70
+ X_second = np.column_stack([np.ones(n), x_array, x_residuals])
71
+ else:
72
+ X_second = np.column_stack([x_array, x_residuals])
73
+
74
+ second_stage_model = sm.OLS(y_array, X_second)
75
+ second_stage_results = second_stage_model.fit()
76
+
77
+ # 提取x的系数作为因果效应估计
78
+ # 如果有常数项,x是第2列;否则是第1列
79
+ x_coef_idx = 1 if constant else 0
80
+ coef = second_stage_results.params[x_coef_idx]
81
+ stderr = second_stage_results.bse[x_coef_idx]
82
+ tstat = second_stage_results.tvalues[x_coef_idx]
83
+ pval = second_stage_results.pvalues[x_coef_idx]
84
+
85
+ # 计算置信区间
86
+ ci_lower = coef - 1.96 * stderr
87
+ ci_upper = coef + 1.96 * stderr
88
+
89
+ # 内生性检验(检验控制函数/残差项的系数是否显著)
90
+ residual_coef_idx = 2 if constant else 1
91
+ residual_coef = second_stage_results.params[residual_coef_idx]
92
+ residual_stderr = second_stage_results.bse[residual_coef_idx]
93
+ residual_tstat = second_stage_results.tvalues[residual_coef_idx]
94
+ residual_pval = second_stage_results.pvalues[residual_coef_idx]
95
+
96
+ endogeneity_test = {
97
+ "residual_coefficient": float(residual_coef),
98
+ "residual_std_error": float(residual_stderr),
99
+ "t_statistic": float(residual_tstat),
100
+ "p_value": float(residual_pval),
101
+ "interpretation": "如果残差项系数显著,表明存在内生性问题"
102
+ }
103
+
104
+ return ControlFunctionResult(
105
+ estimate=float(coef),
106
+ std_error=float(stderr),
107
+ t_statistic=float(tstat),
108
+ p_value=float(pval),
109
+ confidence_interval=[float(ci_lower), float(ci_upper)],
110
+ n_observations=n,
111
+ endogeneity_test=endogeneity_test
112
+ )
@@ -0,0 +1,107 @@
1
+ """
2
+ 双重差分法 (DID) 实现
3
+ """
4
+
5
+ from typing import List, Optional, Dict, Any
6
+ import numpy as np
7
+ import pandas as pd
8
+ from pydantic import BaseModel, Field
9
+ from scipy import stats
10
+ import statsmodels.api as sm
11
+
12
+
13
+ class DIDResult(BaseModel):
14
+ """双重差分法结果"""
15
+ method: str = Field(default="Difference-in-Differences", description="使用的因果识别方法")
16
+ estimate: float = Field(..., description="因果效应估计值")
17
+ std_error: float = Field(..., description="标准误")
18
+ t_statistic: float = Field(..., description="t统计量")
19
+ p_value: float = Field(..., description="p值")
20
+ confidence_interval: List[float] = Field(..., description="置信区间")
21
+ n_observations: int = Field(..., description="观测数量")
22
+ parallel_trend_test: Optional[Dict[str, Any]] = Field(None, description="平行趋势检验")
23
+
24
+
25
+ def difference_in_differences(
26
+ treatment: List[int],
27
+ time_period: List[int],
28
+ outcome: List[float],
29
+ covariates: Optional[List[List[float]]] = None
30
+ ) -> DIDResult:
31
+ """
32
+ 双重差分法 (DID)
33
+
34
+ 使用statsmodels实现双重差分法,评估处理效应。
35
+
36
+ Args:
37
+ treatment: 处理组虚拟变量 (0/1)
38
+ time_period: 时间虚拟变量 (0/1)
39
+ outcome: 结果变量
40
+ covariates: 协变量
41
+
42
+ Returns:
43
+ DIDResult: 双重差分法结果
44
+ """
45
+ # 构建数据
46
+ data = {
47
+ 'treatment': treatment,
48
+ 'time': time_period,
49
+ 'outcome': outcome
50
+ }
51
+
52
+ # 添加协变量
53
+ if covariates:
54
+ covariates_array = np.array(covariates)
55
+ if covariates_array.ndim == 1:
56
+ covariates_array = covariates_array.reshape(-1, 1)
57
+
58
+ k_cov = covariates_array.shape[1]
59
+ for i in range(k_cov):
60
+ data[f"covariate_{i+1}"] = covariates_array[:, i]
61
+
62
+ df = pd.DataFrame(data)
63
+
64
+ # 构建交互项
65
+ df['treatment_time'] = df['treatment'] * df['time']
66
+
67
+ # 构建回归公式
68
+ independent_vars = ['treatment', 'time', 'treatment_time']
69
+ if covariates:
70
+ independent_vars.extend([f"covariate_{i+1}" for i in range(k_cov)])
71
+
72
+ # 添加常数项
73
+ df['const'] = 1
74
+ independent_vars = ['const'] + independent_vars
75
+
76
+ # 使用statsmodels进行OLS回归
77
+ X = df[independent_vars]
78
+ y = df['outcome']
79
+
80
+ model = sm.OLS(y, X)
81
+ results = model.fit()
82
+
83
+ # 提取DID估计结果(交互项系数)
84
+ coef = results.params['treatment_time']
85
+ stderr = results.bse['treatment_time']
86
+ tstat = results.tvalues['treatment_time']
87
+ pval = results.pvalues['treatment_time']
88
+
89
+ # 计算置信区间
90
+ ci_lower = coef - 1.96 * stderr
91
+ ci_upper = coef + 1.96 * stderr
92
+
93
+ # 平行趋势检验(简化处理)
94
+ # 这里只是一个示例,实际的平行趋势检验需要更多的前期数据
95
+ parallel_trend = {
96
+ "description": "Simplified parallel trend test - full test requires pre-treatment periods"
97
+ }
98
+
99
+ return DIDResult(
100
+ estimate=float(coef),
101
+ std_error=float(stderr),
102
+ t_statistic=float(tstat),
103
+ p_value=float(pval),
104
+ confidence_interval=[float(ci_lower), float(ci_upper)],
105
+ n_observations=len(df),
106
+ parallel_trend_test=parallel_trend
107
+ )
@@ -0,0 +1,119 @@
1
+ """
2
+ 事件研究法 (Event Study) 实现
3
+ """
4
+
5
+ from typing import List, Optional
6
+ import numpy as np
7
+ import pandas as pd
8
+ from pydantic import BaseModel, Field
9
+ import statsmodels.api as sm
10
+ from scipy import stats
11
+
12
+
13
+ class EventStudyResult(BaseModel):
14
+ """事件研究法结果"""
15
+ method: str = Field(default="Event Study", description="使用的因果识别方法")
16
+ estimates: List[float] = Field(..., description="各期效应估计值")
17
+ std_errors: List[float] = Field(..., description="各期效应标准误")
18
+ t_statistics: List[float] = Field(..., description="各期效应t统计量")
19
+ p_values: List[float] = Field(..., description="各期效应p值")
20
+ confidence_intervals: List[List[float]] = Field(..., description="各期效应置信区间")
21
+ n_observations: int = Field(..., description="观测数量")
22
+ event_time_periods: List[int] = Field(..., description="事件时间期列表")
23
+
24
+
25
+ def event_study(
26
+ outcome: List[float],
27
+ treatment: List[int],
28
+ entity_ids: List[str],
29
+ time_periods: List[str],
30
+ event_time: List[int]
31
+ ) -> EventStudyResult:
32
+ """
33
+ 事件研究法 (Event Study)
34
+
35
+ 事件研究法通过分析处理前后多个时间点的效应,验证处理效应的动态变化模式。
36
+
37
+ Args:
38
+ outcome: 结果变量
39
+ treatment: 处理状态变量
40
+ entity_ids: 个体标识符
41
+ time_periods: 时间标识符
42
+ event_time: 相对于事件发生时间的时间标识(如-2, -1, 0, 1, 2)
43
+
44
+ Returns:
45
+ EventStudyResult: 事件研究法结果
46
+ """
47
+ # 构建数据
48
+ df = pd.DataFrame({
49
+ 'outcome': outcome,
50
+ 'treatment': treatment,
51
+ 'entity': entity_ids,
52
+ 'time': time_periods,
53
+ 'event_time': event_time
54
+ })
55
+
56
+ # 创建时间虚拟变量
57
+ time_dummies = pd.get_dummies(df['event_time'], prefix='time')
58
+ df = pd.concat([df, time_dummies], axis=1)
59
+
60
+ # 与处理状态交互
61
+ for col in time_dummies.columns:
62
+ df[f'{col}_treated'] = df[col] * df['treatment']
63
+
64
+ # 构建回归设计矩阵
65
+ interaction_vars = [col for col in df.columns if col.endswith('_treated')]
66
+ X = df[interaction_vars]
67
+ X = sm.add_constant(X) # 添加常数项
68
+ y = df['outcome']
69
+
70
+ # OLS回归
71
+ model = sm.OLS(y, X)
72
+ results = model.fit()
73
+
74
+ # 提取各期效应估计结果
75
+ estimates = []
76
+ std_errors = []
77
+ t_statistics = []
78
+ p_values = []
79
+ confidence_intervals = []
80
+ event_time_periods = []
81
+
82
+ for col in interaction_vars:
83
+ # 从列名中提取时间期数
84
+ time_period = int(col.replace('time_', '').replace('_treated', ''))
85
+ event_time_periods.append(time_period)
86
+
87
+ coef = results.params[col]
88
+ stderr = results.bse[col]
89
+ tstat = results.tvalues[col]
90
+ pval = results.pvalues[col]
91
+
92
+ # 计算置信区间
93
+ ci_lower = coef - 1.96 * stderr
94
+ ci_upper = coef + 1.96 * stderr
95
+
96
+ estimates.append(float(coef))
97
+ std_errors.append(float(stderr))
98
+ t_statistics.append(float(tstat))
99
+ p_values.append(float(pval))
100
+ confidence_intervals.append([float(ci_lower), float(ci_upper)])
101
+
102
+ # 按时间期排序
103
+ sorted_indices = np.argsort(event_time_periods)
104
+ event_time_periods = [event_time_periods[i] for i in sorted_indices]
105
+ estimates = [estimates[i] for i in sorted_indices]
106
+ std_errors = [std_errors[i] for i in sorted_indices]
107
+ t_statistics = [t_statistics[i] for i in sorted_indices]
108
+ p_values = [p_values[i] for i in sorted_indices]
109
+ confidence_intervals = [confidence_intervals[i] for i in sorted_indices]
110
+
111
+ return EventStudyResult(
112
+ estimates=estimates,
113
+ std_errors=std_errors,
114
+ t_statistics=t_statistics,
115
+ p_values=p_values,
116
+ confidence_intervals=confidence_intervals,
117
+ n_observations=len(df),
118
+ event_time_periods=event_time_periods
119
+ )
@@ -0,0 +1,89 @@
1
+ """
2
+ 一阶差分模型实现
3
+ """
4
+
5
+ from typing import List, Optional
6
+ import numpy as np
7
+ import pandas as pd
8
+ from pydantic import BaseModel, Field
9
+ import statsmodels.api as sm
10
+ from scipy import stats
11
+
12
+
13
+ class FirstDifferenceResult(BaseModel):
14
+ """一阶差分模型结果"""
15
+ method: str = Field(default="First Difference Model", description="使用的因果识别方法")
16
+ estimate: float = Field(..., description="因果效应估计值")
17
+ std_error: float = Field(..., description="标准误")
18
+ t_statistic: float = Field(..., description="t统计量")
19
+ p_value: float = Field(..., description="p值")
20
+ confidence_interval: List[float] = Field(..., description="置信区间")
21
+ n_observations: int = Field(..., description="观测数量")
22
+
23
+
24
+ def first_difference_model(
25
+ y: List[float],
26
+ x: List[float],
27
+ entity_ids: List[str]
28
+ ) -> FirstDifferenceResult:
29
+ """
30
+ 一阶差分模型
31
+
32
+ 一阶差分法通过差分操作消除不随时间变化的个体固定效应,常用于面板数据分析。
33
+
34
+ Args:
35
+ y: 因变量(时间序列)
36
+ x: 自变量(时间序列)
37
+ entity_ids: 个体标识符
38
+
39
+ Returns:
40
+ FirstDifferenceResult: 一阶差分模型结果
41
+ """
42
+ # 转换为DataFrame便于处理
43
+ df = pd.DataFrame({
44
+ 'y': y,
45
+ 'x': x,
46
+ 'entity': entity_ids
47
+ })
48
+
49
+ # 按个体排序
50
+ df = df.sort_values(['entity'])
51
+
52
+ # 计算一阶差分
53
+ df['y_diff'] = df.groupby('entity')['y'].diff()
54
+ df['x_diff'] = df.groupby('entity')['x'].diff()
55
+
56
+ # 删除NaN值(每组的第一行)
57
+ df_diff = df.dropna()
58
+
59
+ # 提取差分后的数据
60
+ y_diff = df_diff['y_diff'].values
61
+ x_diff = df_diff['x_diff'].values
62
+
63
+ n = len(y_diff)
64
+
65
+ # 添加常数项
66
+ X = np.column_stack([np.ones(n), x_diff])
67
+
68
+ # OLS回归
69
+ model = sm.OLS(y_diff, X)
70
+ results = model.fit()
71
+
72
+ # 提取x_diff的系数作为因果效应估计
73
+ coef = results.params[1]
74
+ stderr = results.bse[1]
75
+ tstat = results.tvalues[1]
76
+ pval = results.pvalues[1]
77
+
78
+ # 计算置信区间
79
+ ci_lower = coef - 1.96 * stderr
80
+ ci_upper = coef + 1.96 * stderr
81
+
82
+ return FirstDifferenceResult(
83
+ estimate=float(coef),
84
+ std_error=float(stderr),
85
+ t_statistic=float(tstat),
86
+ p_value=float(pval),
87
+ confidence_interval=[float(ci_lower), float(ci_upper)],
88
+ n_observations=n
89
+ )
@@ -0,0 +1,103 @@
1
+ """
2
+ 面板数据固定效应模型实现
3
+ """
4
+
5
+ from typing import List, Optional
6
+ import numpy as np
7
+ import pandas as pd
8
+ from pydantic import BaseModel, Field
9
+ from scipy import stats
10
+ import statsmodels.api as sm
11
+ from linearmodels.panel import PanelOLS
12
+
13
+
14
+ class FixedEffectsResult(BaseModel):
15
+ """固定效应模型结果"""
16
+ method: str = Field(default="Fixed Effects Model", description="使用的因果识别方法")
17
+ estimate: float = Field(..., description="因果效应估计值")
18
+ std_error: float = Field(..., description="标准误")
19
+ t_statistic: float = Field(..., description="t统计量")
20
+ p_value: float = Field(..., description="p值")
21
+ confidence_interval: List[float] = Field(..., description="置信区间")
22
+ n_observations: int = Field(..., description="观测数量")
23
+ n_entities: int = Field(..., description="个体数量")
24
+ n_time_periods: int = Field(..., description="时间期数")
25
+
26
+
27
+ def fixed_effects_model(
28
+ y: List[float],
29
+ x: List[List[float]],
30
+ entity_ids: List[str],
31
+ time_periods: List[str],
32
+ constant: bool = True
33
+ ) -> FixedEffectsResult:
34
+ """
35
+ 固定效应模型
36
+
37
+ 使用linearmodels.panel.PanelOLS实现固定效应模型。
38
+
39
+ Args:
40
+ y: 因变量
41
+ x: 自变量
42
+ entity_ids: 个体标识符
43
+ time_periods: 时间标识符
44
+ constant: 是否包含常数项
45
+
46
+ Returns:
47
+ FixedEffectsResult: 固定效应模型结果
48
+ """
49
+ # 转换为DataFrame
50
+ x_array = np.array(x)
51
+ if x_array.ndim == 1:
52
+ x_array = x_array.reshape(-1, 1)
53
+
54
+ # 创建多重索引面板数据
55
+ df = pd.DataFrame({
56
+ 'y': y,
57
+ 'entity': entity_ids,
58
+ 'time': [int(t.split('_')[1]) if isinstance(t, str) and '_' in t else i
59
+ for i, t in enumerate(time_periods)] # 处理字符串格式的时间
60
+ })
61
+
62
+ # 添加自变量
63
+ k_x = x_array.shape[1]
64
+ for i in range(k_x):
65
+ df[f'x{i+1}'] = x_array[:, i]
66
+
67
+ # 设置多重索引
68
+ df = df.set_index(['entity', 'time'])
69
+
70
+ # 定义因变量和自变量
71
+ dependent = df['y']
72
+ explanatory_vars = [f'x{i+1}' for i in range(k_x)]
73
+ explanatory = df[explanatory_vars]
74
+
75
+ # 使用linearmodels进行固定效应估计
76
+ model = PanelOLS(dependent, explanatory, entity_effects=True)
77
+ results = model.fit()
78
+
79
+ # 提取主要变量的估计结果(假设关注最后一个变量)
80
+ target_var = f'x{k_x}'
81
+ coef = results.params[target_var]
82
+ stderr = results.std_errors[target_var]
83
+ tstat = results.tstats[target_var]
84
+ pval = results.pvalues[target_var]
85
+
86
+ # 计算置信区间
87
+ ci_lower = coef - 1.96 * stderr
88
+ ci_upper = coef + 1.96 * stderr
89
+
90
+ # 计算实体和时间期数
91
+ n_entities = len(df.index.get_level_values('entity').unique())
92
+ n_time_periods = len(df.index.get_level_values('time').unique())
93
+
94
+ return FixedEffectsResult(
95
+ estimate=float(coef),
96
+ std_error=float(stderr),
97
+ t_statistic=float(tstat),
98
+ p_value=float(pval),
99
+ confidence_interval=[float(ci_lower), float(ci_upper)],
100
+ n_observations=len(df),
101
+ n_entities=n_entities,
102
+ n_time_periods=n_time_periods
103
+ )