aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +732 -0
  3. README.md +687 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  6. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  7. aigroup_econ_mcp-2.0.1.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-2.0.1.dist-info/licenses/LICENSE +21 -0
  9. cli.py +32 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  18. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  19. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  20. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  21. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  22. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  23. econometrics/basic_parametric_estimation/__init__.py +31 -0
  24. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  25. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  26. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  27. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  28. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  29. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  30. econometrics/causal_inference/__init__.py +66 -0
  31. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  32. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  33. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  34. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  35. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  36. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  37. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  38. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  39. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  40. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  41. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  42. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  43. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  44. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  45. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  46. econometrics/distribution_analysis/__init__.py +28 -0
  47. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  48. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  49. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  50. econometrics/missing_data/__init__.py +18 -0
  51. econometrics/missing_data/imputation_methods.py +219 -0
  52. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  53. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  54. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  55. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  56. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  57. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  58. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  59. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  60. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  61. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  62. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  63. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  64. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  65. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  66. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  67. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  68. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  69. econometrics/nonparametric/__init__.py +35 -0
  70. econometrics/nonparametric/gam_model.py +117 -0
  71. econometrics/nonparametric/kernel_regression.py +161 -0
  72. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  73. econometrics/nonparametric/quantile_regression.py +249 -0
  74. econometrics/nonparametric/spline_regression.py +100 -0
  75. econometrics/spatial_econometrics/__init__.py +68 -0
  76. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  77. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  78. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  79. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  80. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  81. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  82. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  83. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  84. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  85. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  86. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  87. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  88. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  89. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  90. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  91. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  92. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  93. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  94. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  95. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  96. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  97. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  98. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  99. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  100. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  101. econometrics/statistical_inference/__init__.py +21 -0
  102. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  103. econometrics/statistical_inference/permutation_test.py +177 -0
  104. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  105. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  106. econometrics/survival_analysis/__init__.py +18 -0
  107. econometrics/survival_analysis/survival_models.py +259 -0
  108. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  109. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  110. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  111. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  112. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  113. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  114. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  115. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  116. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  117. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  118. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  119. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  120. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  121. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  122. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  123. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  124. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  125. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  126. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  127. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  128. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  129. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  130. prompts/__init__.py +0 -0
  131. prompts/analysis_guides.py +43 -0
  132. pyproject.toml +85 -0
  133. resources/MCP_MASTER_GUIDE.md +422 -0
  134. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  135. resources/__init__.py +0 -0
  136. server.py +97 -0
  137. tools/README.md +88 -0
  138. tools/__init__.py +119 -0
  139. tools/causal_inference_adapter.py +658 -0
  140. tools/data_loader.py +213 -0
  141. tools/decorators.py +38 -0
  142. tools/distribution_analysis_adapter.py +121 -0
  143. tools/econometrics_adapter.py +286 -0
  144. tools/gwr_simple_adapter.py +54 -0
  145. tools/machine_learning_adapter.py +567 -0
  146. tools/mcp_tool_groups/__init__.py +15 -0
  147. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  148. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  149. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  150. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  151. tools/mcp_tool_groups/microecon_tools.py +325 -0
  152. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  153. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  154. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  155. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  156. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  157. tools/mcp_tool_groups/time_series_tools.py +494 -0
  158. tools/mcp_tools_registry.py +124 -0
  159. tools/microecon_adapter.py +412 -0
  160. tools/missing_data_adapter.py +73 -0
  161. tools/model_specification_adapter.py +369 -0
  162. tools/nonparametric_adapter.py +190 -0
  163. tools/output_formatter.py +563 -0
  164. tools/spatial_econometrics_adapter.py +318 -0
  165. tools/statistical_inference_adapter.py +90 -0
  166. tools/survival_analysis_adapter.py +46 -0
  167. tools/time_series_panel_data_adapter.py +858 -0
  168. tools/time_series_panel_data_tools.py +65 -0
  169. aigroup_econ_mcp/__init__.py +0 -19
  170. aigroup_econ_mcp/cli.py +0 -82
  171. aigroup_econ_mcp/config.py +0 -561
  172. aigroup_econ_mcp/server.py +0 -452
  173. aigroup_econ_mcp/tools/__init__.py +0 -19
  174. aigroup_econ_mcp/tools/base.py +0 -470
  175. aigroup_econ_mcp/tools/cache.py +0 -533
  176. aigroup_econ_mcp/tools/data_loader.py +0 -195
  177. aigroup_econ_mcp/tools/file_parser.py +0 -1027
  178. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  179. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  180. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  181. aigroup_econ_mcp/tools/ml_models.py +0 -54
  182. aigroup_econ_mcp/tools/ml_regularization.py +0 -186
  183. aigroup_econ_mcp/tools/monitoring.py +0 -555
  184. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  185. aigroup_econ_mcp/tools/panel_data.py +0 -619
  186. aigroup_econ_mcp/tools/regression.py +0 -214
  187. aigroup_econ_mcp/tools/statistics.py +0 -154
  188. aigroup_econ_mcp/tools/time_series.py +0 -698
  189. aigroup_econ_mcp/tools/timeout.py +0 -283
  190. aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
  191. aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
  192. aigroup_econ_mcp/tools/tool_registry.py +0 -478
  193. aigroup_econ_mcp/tools/validation.py +0 -482
  194. aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
  195. aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
  196. aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
  197. /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
  198. {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,179 @@
1
+ """
2
+ 方差分解 (Variance Decomposition / ANOVA)
3
+ 基于 scipy 和 statsmodels 实现
4
+ """
5
+
6
+ from typing import List, Optional, Dict
7
+ from pydantic import BaseModel, Field
8
+ import numpy as np
9
+
10
+ try:
11
+ from scipy import stats
12
+ import statsmodels.api as sm
13
+ from statsmodels.formula.api import ols
14
+ SCIPY_AVAILABLE = True
15
+ except ImportError:
16
+ SCIPY_AVAILABLE = False
17
+ stats = None
18
+ sm = None
19
+
20
+
21
+ class VarianceDecompositionResult(BaseModel):
22
+ """方差分解结果"""
23
+ total_variance: float = Field(..., description="总方差")
24
+ between_group_variance: float = Field(..., description="组间方差")
25
+ within_group_variance: float = Field(..., description="组内方差")
26
+ f_statistic: float = Field(..., description="F统计量")
27
+ p_value: float = Field(..., description="P值")
28
+ eta_squared: float = Field(..., description="Eta平方(效应量)")
29
+ omega_squared: float = Field(..., description="Omega平方(偏效应量)")
30
+ group_means: Dict[str, float] = Field(..., description="各组均值")
31
+ group_variances: Dict[str, float] = Field(..., description="各组方差")
32
+ group_sizes: Dict[str, int] = Field(..., description="各组样本量")
33
+ n_groups: int = Field(..., description="组数")
34
+ total_n: int = Field(..., description="总样本量")
35
+ summary: str = Field(..., description="摘要信息")
36
+
37
+
38
+ def variance_decomposition(
39
+ values: List[float],
40
+ groups: List[str],
41
+ group_names: Optional[List[str]] = None
42
+ ) -> VarianceDecompositionResult:
43
+ """
44
+ 方差分解 / 单因素ANOVA
45
+
46
+ Args:
47
+ values: 观测值列表
48
+ groups: 组别标识列表
49
+ group_names: 组名称映射
50
+
51
+ Returns:
52
+ VarianceDecompositionResult: 方差分解结果
53
+
54
+ Raises:
55
+ ImportError: scipy库未安装
56
+ ValueError: 输入数据无效
57
+ """
58
+ if not SCIPY_AVAILABLE:
59
+ raise ImportError("scipy和statsmodels库未安装。请运行: pip install scipy statsmodels")
60
+
61
+ # 输入验证
62
+ if not values or not groups:
63
+ raise ValueError("values和groups不能为空")
64
+
65
+ if len(values) != len(groups):
66
+ raise ValueError(f"values长度({len(values)})与groups长度({len(groups)})不一致")
67
+
68
+ # 数据准备
69
+ y = np.array(values, dtype=np.float64)
70
+ g = np.array(groups)
71
+
72
+ # 获取唯一组别
73
+ unique_groups = np.unique(g)
74
+ n_groups = len(unique_groups)
75
+
76
+ if n_groups < 2:
77
+ raise ValueError("至少需要2个组进行方差分解")
78
+
79
+ # 计算总体统计量
80
+ grand_mean = y.mean()
81
+ total_variance = y.var(ddof=1)
82
+ total_n = len(y)
83
+
84
+ # 计算各组统计量
85
+ group_means = {}
86
+ group_variances = {}
87
+ group_sizes = {}
88
+
89
+ # 按组分组数据
90
+ groups_data = []
91
+ for group_id in unique_groups:
92
+ mask = g == group_id
93
+ group_data = y[mask]
94
+ groups_data.append(group_data)
95
+
96
+ group_key = str(group_id)
97
+ group_means[group_key] = float(group_data.mean())
98
+ group_variances[group_key] = float(group_data.var(ddof=1))
99
+ group_sizes[group_key] = int(len(group_data))
100
+
101
+ # 执行单因素ANOVA
102
+ f_stat, p_value = stats.f_oneway(*groups_data)
103
+
104
+ # 计算组间方差和组内方差
105
+ # SS_between = Σnᵢ(ȳᵢ - ȳ)²
106
+ ss_between = sum(
107
+ group_sizes[str(gid)] * (group_means[str(gid)] - grand_mean)**2
108
+ for gid in unique_groups
109
+ )
110
+
111
+ # SS_within = Σ(nᵢ - 1)sᵢ²
112
+ ss_within = sum(
113
+ (group_sizes[str(gid)] - 1) * group_variances[str(gid)]
114
+ for gid in unique_groups
115
+ )
116
+
117
+ # SS_total
118
+ ss_total = (total_n - 1) * total_variance
119
+
120
+ # 自由度
121
+ df_between = n_groups - 1
122
+ df_within = total_n - n_groups
123
+
124
+ # 均方
125
+ ms_between = ss_between / df_between
126
+ ms_within = ss_within / df_within
127
+
128
+ # 组间方差和组内方差(作为总方差的比例)
129
+ between_group_var = ss_between / (total_n - 1)
130
+ within_group_var = ss_within / (total_n - 1)
131
+
132
+ # 效应量
133
+ # Eta平方 = SS_between / SS_total
134
+ eta_squared = ss_between / ss_total if ss_total > 0 else 0.0
135
+
136
+ # Omega平方(偏效应量)
137
+ omega_squared = (ss_between - df_between * ms_within) / (ss_total + ms_within)
138
+ omega_squared = max(0.0, omega_squared) # 确保非负
139
+
140
+ # 生成摘要
141
+ summary = f"""方差分解 (ANOVA) 分析:
142
+ - 总样本量: {total_n}
143
+ - 组数: {n_groups}
144
+ - 总方差: {total_variance:.4f}
145
+
146
+ 方差分解:
147
+ - 组间方差: {between_group_var:.4f} ({eta_squared*100:.1f}%)
148
+ - 组内方差: {within_group_var:.4f} ({(1-eta_squared)*100:.1f}%)
149
+
150
+ F检验:
151
+ - F统计量: {f_stat:.4f}
152
+ - P值: {p_value:.4f}
153
+ - 结论: {'组间差异显著' if p_value < 0.05 else '组间差异不显著'}
154
+
155
+ 效应量:
156
+ - Eta²: {eta_squared:.4f}
157
+ - Omega²: {omega_squared:.4f}
158
+
159
+ 各组均值:
160
+ """
161
+ for gid in unique_groups:
162
+ gkey = str(gid)
163
+ summary += f" {gkey}: {group_means[gkey]:.4f} (n={group_sizes[gkey]}, s²={group_variances[gkey]:.4f})\n"
164
+
165
+ return VarianceDecompositionResult(
166
+ total_variance=float(total_variance),
167
+ between_group_variance=float(between_group_var),
168
+ within_group_variance=float(within_group_var),
169
+ f_statistic=float(f_stat),
170
+ p_value=float(p_value),
171
+ eta_squared=float(eta_squared),
172
+ omega_squared=float(omega_squared),
173
+ group_means=group_means,
174
+ group_variances=group_variances,
175
+ group_sizes=group_sizes,
176
+ n_groups=n_groups,
177
+ total_n=total_n,
178
+ summary=summary
179
+ )
@@ -0,0 +1,18 @@
1
+ """
2
+ 缺失数据处理模块
3
+ 提供多种插补和处理缺失数据的方法
4
+ """
5
+
6
+ from .imputation_methods import (
7
+ simple_imputation,
8
+ multiple_imputation,
9
+ SimpleImputationResult,
10
+ MultipleImputationResult
11
+ )
12
+
13
+ __all__ = [
14
+ 'simple_imputation',
15
+ 'multiple_imputation',
16
+ 'SimpleImputationResult',
17
+ 'MultipleImputationResult'
18
+ ]
@@ -0,0 +1,219 @@
1
+ """
2
+ 缺失数据插补方法
3
+ 基于 sklearn.impute 实现
4
+ """
5
+
6
+ from typing import List, Optional, Dict
7
+ from pydantic import BaseModel, Field
8
+ import numpy as np
9
+
10
+ try:
11
+ from sklearn.impute import SimpleImputer, IterativeImputer
12
+ from sklearn.experimental import enable_iterative_imputer
13
+ SKLEARN_AVAILABLE = True
14
+ except ImportError:
15
+ SKLEARN_AVAILABLE = False
16
+ SimpleImputer = None
17
+ IterativeImputer = None
18
+
19
+
20
+ class SimpleImputationResult(BaseModel):
21
+ """简单插补结果"""
22
+ imputed_data: List[List[float]] = Field(..., description="插补后的数据")
23
+ missing_mask: List[List[bool]] = Field(..., description="缺失值掩码")
24
+ n_missing: int = Field(..., description="缺失值总数")
25
+ missing_rate: float = Field(..., description="缺失率")
26
+ imputation_method: str = Field(..., description="插补方法")
27
+ fill_values: List[float] = Field(..., description="填充值(每列)")
28
+ n_observations: int = Field(..., description="观测数量")
29
+ n_features: int = Field(..., description="特征数量")
30
+ summary: str = Field(..., description="摘要信息")
31
+
32
+
33
+ class MultipleImputationResult(BaseModel):
34
+ """多重插补结果"""
35
+ imputed_datasets: List[List[List[float]]] = Field(..., description="多个插补数据集")
36
+ n_imputations: int = Field(..., description="插补次数")
37
+ missing_mask: List[List[bool]] = Field(..., description="缺失值掩码")
38
+ n_missing: int = Field(..., description="缺失值总数")
39
+ missing_rate: float = Field(..., description="缺失率")
40
+ convergence_info: Dict = Field(..., description="收敛信息")
41
+ n_observations: int = Field(..., description="观测数量")
42
+ n_features: int = Field(..., description="特征数量")
43
+ summary: str = Field(..., description="摘要信息")
44
+
45
+
46
+ def simple_imputation(
47
+ data: List[List[float]],
48
+ strategy: str = "mean",
49
+ fill_value: Optional[float] = None
50
+ ) -> SimpleImputationResult:
51
+ """
52
+ 简单插补方法
53
+
54
+ Args:
55
+ data: 含缺失值的数据(二维列表,NaN表示缺失)
56
+ strategy: 插补策略 - "mean"(均值), "median"(中位数),
57
+ "most_frequent"(众数), "constant"(常数)
58
+ fill_value: 当strategy="constant"时使用的填充值
59
+
60
+ Returns:
61
+ SimpleImputationResult: 简单插补结果
62
+
63
+ Raises:
64
+ ImportError: sklearn库未安装
65
+ ValueError: 输入数据无效
66
+ """
67
+ if not SKLEARN_AVAILABLE:
68
+ raise ImportError("sklearn库未安装。请运行: pip install scikit-learn")
69
+
70
+ # 输入验证
71
+ if not data:
72
+ raise ValueError("data不能为空")
73
+
74
+ # 转换为numpy数组
75
+ X = np.array(data, dtype=np.float64)
76
+
77
+ if X.ndim == 1:
78
+ X = X.reshape(-1, 1)
79
+
80
+ n, k = X.shape
81
+
82
+ # 创建缺失值掩码
83
+ missing_mask = np.isnan(X)
84
+ n_missing = int(missing_mask.sum())
85
+ missing_rate = float(n_missing / (n * k))
86
+
87
+ # 简单插补
88
+ if strategy == "constant":
89
+ if fill_value is None:
90
+ fill_value = 0.0
91
+ imputer = SimpleImputer(strategy=strategy, fill_value=fill_value)
92
+ else:
93
+ imputer = SimpleImputer(strategy=strategy)
94
+
95
+ # 执行插补
96
+ X_imputed = imputer.fit_transform(X)
97
+
98
+ # 填充值
99
+ fill_values = imputer.statistics_.tolist()
100
+
101
+ # 生成摘要
102
+ summary = f"""简单插补:
103
+ - 观测数量: {n}
104
+ - 特征数量: {k}
105
+ - 缺失值数量: {n_missing}
106
+ - 缺失率: {missing_rate*100:.2f}%
107
+ - 插补策略: {strategy}
108
+
109
+ 各列填充值:
110
+ """
111
+ for i, val in enumerate(fill_values):
112
+ col_missing = int(missing_mask[:, i].sum())
113
+ summary += f" 列{i+1}: {val:.4f} (缺失{col_missing}个)\n"
114
+
115
+ return SimpleImputationResult(
116
+ imputed_data=X_imputed.tolist(),
117
+ missing_mask=missing_mask.tolist(),
118
+ n_missing=n_missing,
119
+ missing_rate=missing_rate,
120
+ imputation_method=strategy,
121
+ fill_values=fill_values,
122
+ n_observations=n,
123
+ n_features=k,
124
+ summary=summary
125
+ )
126
+
127
+
128
+ def multiple_imputation(
129
+ data: List[List[float]],
130
+ n_imputations: int = 5,
131
+ max_iter: int = 10,
132
+ random_state: Optional[int] = None
133
+ ) -> MultipleImputationResult:
134
+ """
135
+ 多重插补 (MICE - Multivariate Imputation by Chained Equations)
136
+
137
+ Args:
138
+ data: 含缺失值的数据
139
+ n_imputations: 生成的插补数据集数量
140
+ max_iter: 最大迭代次数
141
+ random_state: 随机种子
142
+
143
+ Returns:
144
+ MultipleImputationResult: 多重插补结果
145
+ """
146
+ if not SKLEARN_AVAILABLE:
147
+ raise ImportError("sklearn库未安装")
148
+
149
+ # 输入验证
150
+ if not data:
151
+ raise ValueError("data不能为空")
152
+
153
+ X = np.array(data, dtype=np.float64)
154
+
155
+ if X.ndim == 1:
156
+ X = X.reshape(-1, 1)
157
+
158
+ n, k = X.shape
159
+
160
+ # 缺失值统计
161
+ missing_mask = np.isnan(X)
162
+ n_missing = int(missing_mask.sum())
163
+ missing_rate = float(n_missing / (n * k))
164
+
165
+ # 执行多重插补
166
+ imputed_datasets = []
167
+ convergence_info = {"iterations": [], "converged": []}
168
+
169
+ for i in range(n_imputations):
170
+ # 设置随机种子
171
+ seed = random_state + i if random_state is not None else None
172
+
173
+ # 创建迭代插补器
174
+ imputer = IterativeImputer(
175
+ max_iter=max_iter,
176
+ random_state=seed,
177
+ verbose=0
178
+ )
179
+
180
+ # 执行插补
181
+ X_imputed = imputer.fit_transform(X)
182
+ imputed_datasets.append(X_imputed.tolist())
183
+
184
+ # 记录收敛信息
185
+ convergence_info["iterations"].append(imputer.n_iter_)
186
+ convergence_info["converged"].append(imputer.n_iter_ < max_iter)
187
+
188
+ # 计算平均收敛迭代数
189
+ avg_iter = np.mean(convergence_info["iterations"])
190
+ n_converged = sum(convergence_info["converged"])
191
+
192
+ # 生成摘要
193
+ summary = f"""多重插补 (MICE):
194
+ - 观测数量: {n}
195
+ - 特征数量: {k}
196
+ - 缺失值数量: {n_missing}
197
+ - 缺失率: {missing_rate*100:.2f}%
198
+ - 插补次数: {n_imputations}
199
+ - 最大迭代: {max_iter}
200
+
201
+ 收敛信息:
202
+ - 平均迭代数: {avg_iter:.1f}
203
+ - 收敛数据集: {n_converged}/{n_imputations}
204
+
205
+ 说明: 生成{n_imputations}个完整的插补数据集,
206
+ 可用于后续分析并合并结果(Rubin规则)
207
+ """
208
+
209
+ return MultipleImputationResult(
210
+ imputed_datasets=imputed_datasets,
211
+ n_imputations=n_imputations,
212
+ missing_mask=missing_mask.tolist(),
213
+ n_missing=n_missing,
214
+ missing_rate=missing_rate,
215
+ convergence_info=convergence_info,
216
+ n_observations=n,
217
+ n_features=k,
218
+ summary=summary
219
+ )
@@ -0,0 +1,173 @@
1
+ # 模型规范、诊断和稳健推断工具
2
+
3
+ 本模块提供了完整的模型规范检验、诊断测试和稳健推断方法工具集。
4
+
5
+ ## 工具列表
6
+
7
+ ### 1. 模型诊断检验 (Model Diagnostic Tests)
8
+ **工具名称**: `model_diagnostic_tests`
9
+
10
+ **功能**: 执行综合的模型诊断测试,包括:
11
+ - 异方差检验(Breusch-Pagan、White检验)
12
+ - 自相关检验(Durbin-Watson检验)
13
+ - 正态性检验(Jarque-Bera检验)
14
+ - 多重共线性诊断(方差膨胀因子VIF)
15
+
16
+ **使用场景**:
17
+ - OLS回归后的模型验证
18
+ - 检测模型假设是否满足
19
+ - 识别数据质量问题
20
+
21
+ ### 2. 广义最小二乘法 (Generalized Least Squares - GLS)
22
+ **工具名称**: `generalized_least_squares`
23
+
24
+ **功能**: 处理异方差性和自相关的回归方法
25
+
26
+ **主要特点**:
27
+ - 可指定误差项协方差矩阵
28
+ - 在满足GLS假设时比OLS更有效
29
+ - 适用于存在异方差或自相关的数据
30
+
31
+ **使用场景**:
32
+ - 时间序列数据回归
33
+ - 存在已知异方差模式的数据
34
+
35
+ ### 3. 加权最小二乘法 (Weighted Least Squares - WLS)
36
+ **工具名称**: `weighted_least_squares`
37
+
38
+ **功能**: 使用权重处理已知异方差性的回归方法
39
+
40
+ **主要特点**:
41
+ - 需要提供观测值权重
42
+ - 权重通常为方差的倒数
43
+ - 适用于分组数据或调查数据
44
+
45
+ **使用场景**:
46
+ - 调查数据分析
47
+ - 分组数据回归
48
+ - 已知误差方差的数据
49
+
50
+ ### 4. 稳健标准误回归 (Robust Standard Errors)
51
+ **工具名称**: `robust_errors_regression`
52
+
53
+ **功能**: 计算异方差稳健的标准误
54
+
55
+ **主要特点**:
56
+ - 支持多种协方差矩阵类型(HC0、HC1、HC2、HC3)
57
+ - 不改变系数估计,只调整标准误
58
+ - 在存在异方差时提供有效推断
59
+
60
+ **使用场景**:
61
+ - 横截面数据分析
62
+ - 异方差问题明显但形式未知
63
+ - 需要稳健推断的场景
64
+
65
+ ### 5. 模型选择准则 (Model Selection Criteria)
66
+ **工具名称**: `model_selection_criteria`
67
+
68
+ **功能**: 计算多种模型选择信息准则
69
+
70
+ **提供指标**:
71
+ - AIC(赤池信息准则)
72
+ - BIC(贝叶斯信息准则)
73
+ - HQIC(汉南-奎因信息准则)
74
+ - 交叉验证得分(可选)
75
+
76
+ **使用场景**:
77
+ - 比较不同模型规格
78
+ - 变量选择
79
+ - 确定最优模型
80
+
81
+ ### 6. 正则化回归 (Regularized Regression)
82
+ **工具名称**: `regularized_regression`
83
+
84
+ **功能**: 处理多重共线性和高维数据的正则化方法
85
+
86
+ **支持方法**:
87
+ - 岭回归(Ridge):L2惩罚
88
+ - LASSO:L1惩罚,可进行变量选择
89
+ - 弹性网络(Elastic Net):L1和L2的组合
90
+
91
+ **使用场景**:
92
+ - 高维数据回归
93
+ - 变量选择
94
+ - 处理多重共线性
95
+
96
+ ### 7. 联立方程模型 (Simultaneous Equations Model)
97
+ **工具名称**: `simultaneous_equations_model`
98
+
99
+ **功能**: 两阶段最小二乘法(2SLS)处理联立方程系统
100
+
101
+ **主要特点**:
102
+ - 处理内生性问题
103
+ - 需要有效的工具变量
104
+ - 支持多方程系统
105
+
106
+ **使用场景**:
107
+ - 供需模型
108
+ - 宏观经济模型
109
+ - 存在双向因果关系的模型
110
+
111
+ ## 使用示例
112
+
113
+ ### 诊断检验示例
114
+ ```python
115
+ # 使用MCP工具
116
+ {
117
+ "y_data": [1.0, 2.0, 3.0, 4.0, 5.0],
118
+ "x_data": [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5.0, 6.0]],
119
+ "feature_names": ["x1", "x2"],
120
+ "constant": true
121
+ }
122
+ ```
123
+
124
+ ### 稳健标准误回归示例
125
+ ```python
126
+ {
127
+ "y_data": [1.0, 2.0, 3.0, 4.0, 5.0],
128
+ "x_data": [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5.0, 6.0]],
129
+ "cov_type": "HC1",
130
+ "confidence_level": 0.95
131
+ }
132
+ ```
133
+
134
+ ### 正则化回归示例
135
+ ```python
136
+ {
137
+ "y_data": [1.0, 2.0, 3.0, 4.0, 5.0],
138
+ "x_data": [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5.0, 6.0]],
139
+ "method": "ridge",
140
+ "alpha": 1.0
141
+ }
142
+ ```
143
+
144
+ ## 技术细节
145
+
146
+ ### 实现架构
147
+ - **核心算法**: 位于各子模块的 `*_model.py` 文件
148
+ - **MCP适配器**: `tools/model_specification_adapter.py`
149
+ - **工具注册**: `tools/mcp_tool_groups/model_specification_tools.py`
150
+
151
+ ### 依赖库
152
+ - `statsmodels`: 用于统计模型和诊断检验
153
+ - `scikit-learn`: 用于正则化方法
154
+ - `linearmodels`: 用于联立方程模型
155
+ - `numpy`, `pandas`: 基础数据处理
156
+
157
+ ### 数据格式支持
158
+ - **输入**: JSON、CSV、Excel、TXT
159
+ - **输出**: JSON、Markdown、HTML
160
+
161
+ ## 注意事项
162
+
163
+ 1. **诊断检验**: 应在OLS回归后使用,检验模型假设
164
+ 2. **GLS/WLS**: 需要正确指定协方差矩阵或权重
165
+ 3. **稳健标准误**: 不改变系数估计,仅影响推断
166
+ 4. **正则化**: alpha参数需要通过交叉验证选择
167
+ 5. **联立方程**: 需要有效且足够数量的工具变量
168
+
169
+ ## 贡献者
170
+ AIGroup Economics Team
171
+
172
+ ## 许可证
173
+ MIT License
@@ -0,0 +1,78 @@
1
+ """
2
+ 模型设定、诊断与稳健推断 (Model Specification, Diagnostics and Robust Inference)
3
+
4
+ 当基础模型的理想假设不成立时,修正模型或调整推断;对模型进行诊断和选择。
5
+
6
+ 主要方法包括:
7
+ - 稳健标准误(处理异方差/自相关)
8
+ - 广义最小二乘法 (GLS)
9
+ - 加权最小二乘法 (WLS)
10
+ - 岭回归/LASSO/弹性网络(处理多重共线性/高维数据)
11
+ - 联立方程模型(处理双向因果关系)
12
+
13
+ 模型诊断:
14
+ - 异方差检验(White、Breusch-Pagan)
15
+ - 自相关检验(Durbin-Watson、Ljung-Box)
16
+ - 正态性检验(Jarque-Bera)
17
+ - 多重共线性诊断(VIF)
18
+ - 内生性检验(Durbin-Wu-Hausman)
19
+ - 残差诊断、影响点分析
20
+
21
+ 模型选择:
22
+ - 信息准则(AIC/BIC/HQIC)
23
+ - 交叉验证(K折、留一法)
24
+ - 格兰杰因果检验
25
+ """
26
+
27
+ # 导入子模块
28
+ from .robust_errors import (
29
+ RobustErrorsResult,
30
+ robust_errors_regression
31
+ )
32
+
33
+ from .diagnostic_tests import (
34
+ DiagnosticTestsResult,
35
+ diagnostic_tests
36
+ )
37
+
38
+ from .model_selection import (
39
+ ModelSelectionResult,
40
+ model_selection_criteria
41
+ )
42
+
43
+ from .generalized_least_squares import (
44
+ GLSResult,
45
+ gls_regression
46
+ )
47
+
48
+ from .weighted_least_squares import (
49
+ WLSResult,
50
+ wls_regression
51
+ )
52
+
53
+ from .regularization import (
54
+ RegularizationResult,
55
+ regularized_regression
56
+ )
57
+
58
+ from .simultaneous_equations import (
59
+ SimultaneousEquationsResult,
60
+ two_stage_least_squares
61
+ )
62
+
63
+ __all__ = [
64
+ "RobustErrorsResult",
65
+ "robust_errors_regression",
66
+ "DiagnosticTestsResult",
67
+ "diagnostic_tests",
68
+ "ModelSelectionResult",
69
+ "model_selection_criteria",
70
+ "GLSResult",
71
+ "gls_regression",
72
+ "WLSResult",
73
+ "wls_regression",
74
+ "RegularizationResult",
75
+ "regularized_regression",
76
+ "SimultaneousEquationsResult",
77
+ "two_stage_least_squares"
78
+ ]
@@ -0,0 +1,20 @@
1
+ """
2
+ 模型诊断测试 (Diagnostic Tests) 模块
3
+
4
+ 包括各种统计检验方法:
5
+ - 异方差检验(White、Breusch-Pagan)
6
+ - 自相关检验(Durbin-Watson、Ljung-Box)
7
+ - 正态性检验(Jarque-Bera)
8
+ - 多重共线性诊断(VIF)
9
+ - 内生性检验(Durbin-Wu-Hausman)
10
+ """
11
+
12
+ from .diagnostic_tests_model import (
13
+ DiagnosticTestsResult,
14
+ diagnostic_tests
15
+ )
16
+
17
+ __all__ = [
18
+ "DiagnosticTestsResult",
19
+ "diagnostic_tests"
20
+ ]