aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +732 -0
  3. README.md +687 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  6. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  7. aigroup_econ_mcp-2.0.1.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-2.0.1.dist-info/licenses/LICENSE +21 -0
  9. cli.py +32 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  18. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  19. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  20. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  21. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  22. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  23. econometrics/basic_parametric_estimation/__init__.py +31 -0
  24. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  25. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  26. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  27. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  28. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  29. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  30. econometrics/causal_inference/__init__.py +66 -0
  31. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  32. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  33. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  34. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  35. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  36. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  37. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  38. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  39. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  40. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  41. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  42. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  43. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  44. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  45. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  46. econometrics/distribution_analysis/__init__.py +28 -0
  47. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  48. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  49. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  50. econometrics/missing_data/__init__.py +18 -0
  51. econometrics/missing_data/imputation_methods.py +219 -0
  52. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  53. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  54. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  55. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  56. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  57. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  58. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  59. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  60. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  61. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  62. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  63. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  64. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  65. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  66. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  67. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  68. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  69. econometrics/nonparametric/__init__.py +35 -0
  70. econometrics/nonparametric/gam_model.py +117 -0
  71. econometrics/nonparametric/kernel_regression.py +161 -0
  72. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  73. econometrics/nonparametric/quantile_regression.py +249 -0
  74. econometrics/nonparametric/spline_regression.py +100 -0
  75. econometrics/spatial_econometrics/__init__.py +68 -0
  76. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  77. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  78. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  79. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  80. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  81. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  82. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  83. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  84. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  85. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  86. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  87. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  88. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  89. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  90. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  91. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  92. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  93. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  94. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  95. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  96. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  97. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  98. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  99. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  100. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  101. econometrics/statistical_inference/__init__.py +21 -0
  102. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  103. econometrics/statistical_inference/permutation_test.py +177 -0
  104. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  105. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  106. econometrics/survival_analysis/__init__.py +18 -0
  107. econometrics/survival_analysis/survival_models.py +259 -0
  108. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  109. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  110. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  111. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  112. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  113. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  114. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  115. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  116. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  117. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  118. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  119. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  120. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  121. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  122. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  123. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  124. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  125. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  126. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  127. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  128. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  129. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  130. prompts/__init__.py +0 -0
  131. prompts/analysis_guides.py +43 -0
  132. pyproject.toml +85 -0
  133. resources/MCP_MASTER_GUIDE.md +422 -0
  134. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  135. resources/__init__.py +0 -0
  136. server.py +97 -0
  137. tools/README.md +88 -0
  138. tools/__init__.py +119 -0
  139. tools/causal_inference_adapter.py +658 -0
  140. tools/data_loader.py +213 -0
  141. tools/decorators.py +38 -0
  142. tools/distribution_analysis_adapter.py +121 -0
  143. tools/econometrics_adapter.py +286 -0
  144. tools/gwr_simple_adapter.py +54 -0
  145. tools/machine_learning_adapter.py +567 -0
  146. tools/mcp_tool_groups/__init__.py +15 -0
  147. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  148. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  149. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  150. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  151. tools/mcp_tool_groups/microecon_tools.py +325 -0
  152. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  153. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  154. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  155. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  156. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  157. tools/mcp_tool_groups/time_series_tools.py +494 -0
  158. tools/mcp_tools_registry.py +124 -0
  159. tools/microecon_adapter.py +412 -0
  160. tools/missing_data_adapter.py +73 -0
  161. tools/model_specification_adapter.py +369 -0
  162. tools/nonparametric_adapter.py +190 -0
  163. tools/output_formatter.py +563 -0
  164. tools/spatial_econometrics_adapter.py +318 -0
  165. tools/statistical_inference_adapter.py +90 -0
  166. tools/survival_analysis_adapter.py +46 -0
  167. tools/time_series_panel_data_adapter.py +858 -0
  168. tools/time_series_panel_data_tools.py +65 -0
  169. aigroup_econ_mcp/__init__.py +0 -19
  170. aigroup_econ_mcp/cli.py +0 -82
  171. aigroup_econ_mcp/config.py +0 -561
  172. aigroup_econ_mcp/server.py +0 -452
  173. aigroup_econ_mcp/tools/__init__.py +0 -19
  174. aigroup_econ_mcp/tools/base.py +0 -470
  175. aigroup_econ_mcp/tools/cache.py +0 -533
  176. aigroup_econ_mcp/tools/data_loader.py +0 -195
  177. aigroup_econ_mcp/tools/file_parser.py +0 -1027
  178. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  179. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  180. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  181. aigroup_econ_mcp/tools/ml_models.py +0 -54
  182. aigroup_econ_mcp/tools/ml_regularization.py +0 -186
  183. aigroup_econ_mcp/tools/monitoring.py +0 -555
  184. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  185. aigroup_econ_mcp/tools/panel_data.py +0 -619
  186. aigroup_econ_mcp/tools/regression.py +0 -214
  187. aigroup_econ_mcp/tools/statistics.py +0 -154
  188. aigroup_econ_mcp/tools/time_series.py +0 -698
  189. aigroup_econ_mcp/tools/timeout.py +0 -283
  190. aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
  191. aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
  192. aigroup_econ_mcp/tools/tool_registry.py +0 -478
  193. aigroup_econ_mcp/tools/validation.py +0 -482
  194. aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
  195. aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
  196. aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
  197. /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
  198. {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,127 @@
1
+ """
2
+ 加权最小二乘法 (Weighted Least Squares, WLS) 模型实现
3
+ """
4
+
5
+ from typing import List, Dict, Any, Optional
6
+ from dataclasses import dataclass
7
+ from pydantic import BaseModel, Field
8
+ import numpy as np
9
+ import pandas as pd
10
+ from scipy import stats
11
+ import statsmodels.api as sm
12
+
13
+ from tools.decorators import with_file_support_decorator as econometric_tool, validate_input
14
+
15
+
16
+ class WLSResult(BaseModel):
17
+ """WLS回归结果"""
18
+ coefficients: List[float] = Field(..., description="回归系数")
19
+ std_errors: List[float] = Field(..., description="系数标准误")
20
+ t_values: List[float] = Field(..., description="t统计量")
21
+ p_values: List[float] = Field(..., description="p值")
22
+ conf_int_lower: List[float] = Field(..., description="置信区间下界")
23
+ conf_int_upper: List[float] = Field(..., description="置信区间上界")
24
+ r_squared: float = Field(..., description="R方")
25
+ adj_r_squared: float = Field(..., description="调整R方")
26
+ f_statistic: float = Field(..., description="F统计量")
27
+ f_p_value: float = Field(..., description="F统计量p值")
28
+ n_obs: int = Field(..., description="观测数量")
29
+ feature_names: List[str] = Field(..., description="特征名称")
30
+ weights: List[float] = Field(..., description="使用的权重")
31
+
32
+
33
+ @econometric_tool("wls_regression")
34
+ @validate_input(data_type="econometric")
35
+ def wls_regression(
36
+ y_data: List[float],
37
+ x_data: List[List[float]],
38
+ weights: List[float],
39
+ feature_names: Optional[List[str]] = None,
40
+ constant: bool = True,
41
+ confidence_level: float = 0.95
42
+ ) -> WLSResult:
43
+ """
44
+ 加权最小二乘法回归
45
+
46
+ Args:
47
+ y_data: 因变量数据
48
+ x_data: 自变量数据
49
+ weights: 权重列表(与观测值一一对应)
50
+ feature_names: 特征名称
51
+ constant: 是否包含常数项
52
+ confidence_level: 置信水平
53
+
54
+ Returns:
55
+ WLSResult: WLS回归结果
56
+ """
57
+ # 转换为numpy数组
58
+ y = np.asarray(y_data, dtype=np.float64)
59
+ X = np.asarray(x_data, dtype=np.float64)
60
+ w = np.asarray(weights, dtype=np.float64)
61
+
62
+ # 检查数据维度
63
+ if len(w) != len(y):
64
+ raise ValueError("权重数量必须与观测值数量相同")
65
+
66
+ # 检查权重是否为正数
67
+ if np.any(w <= 0):
68
+ raise ValueError("所有权重必须为正数")
69
+
70
+ # 添加常数项
71
+ if constant:
72
+ X = sm.add_constant(X)
73
+ if feature_names:
74
+ feature_names = ["const"] + feature_names
75
+ else:
76
+ feature_names = [f"x{i}" for i in range(X.shape[1])]
77
+ else:
78
+ if not feature_names:
79
+ feature_names = [f"x{i}" for i in range(X.shape[1])]
80
+
81
+ # 检查数据维度
82
+ n, k = X.shape
83
+ if n <= k:
84
+ raise ValueError(f"观测数量({n})必须大于变量数量({k})")
85
+
86
+ # 使用statsmodels执行WLS回归
87
+ try:
88
+ model = sm.WLS(y, X, weights=w)
89
+ results = model.fit()
90
+ except Exception as e:
91
+ raise ValueError(f"无法拟合WLS模型: {str(e)}")
92
+
93
+ # 提取结果
94
+ coefficients = results.params.tolist()
95
+ std_errors = results.bse.tolist()
96
+ t_values = results.tvalues.tolist()
97
+ p_values = results.pvalues.tolist()
98
+
99
+ # 计算置信区间
100
+ alpha = 1 - confidence_level
101
+ conf_int = results.conf_int(alpha=alpha)
102
+ conf_int_lower = conf_int[:, 0].tolist()
103
+ conf_int_upper = conf_int[:, 1].tolist()
104
+
105
+ # 其他统计量
106
+ r_squared = float(results.rsquared)
107
+ adj_r_squared = float(results.rsquared_adj)
108
+
109
+ # F统计量
110
+ f_statistic = float(results.fvalue) if not np.isnan(results.fvalue) else 0.0
111
+ f_p_value = float(results.f_pvalue) if not np.isnan(results.f_pvalue) else 1.0
112
+
113
+ return WLSResult(
114
+ coefficients=coefficients,
115
+ std_errors=std_errors,
116
+ t_values=t_values,
117
+ p_values=p_values,
118
+ conf_int_lower=conf_int_lower,
119
+ conf_int_upper=conf_int_upper,
120
+ r_squared=r_squared,
121
+ adj_r_squared=adj_r_squared,
122
+ f_statistic=f_statistic,
123
+ f_p_value=f_p_value,
124
+ n_obs=int(results.nobs),
125
+ feature_names=feature_names,
126
+ weights=weights
127
+ )
@@ -0,0 +1,35 @@
1
+ """
2
+ 非参数与半参数方法模块
3
+ 放宽函数形式的线性或参数化假设
4
+ """
5
+
6
+ from .kernel_regression import (
7
+ kernel_regression,
8
+ KernelRegressionResult
9
+ )
10
+
11
+ from .quantile_regression import (
12
+ quantile_regression,
13
+ QuantileRegressionResult
14
+ )
15
+
16
+ from .spline_regression import (
17
+ spline_regression,
18
+ SplineRegressionResult
19
+ )
20
+
21
+ from .gam_model import (
22
+ gam_model,
23
+ GAMResult
24
+ )
25
+
26
+ __all__ = [
27
+ 'kernel_regression',
28
+ 'KernelRegressionResult',
29
+ 'quantile_regression',
30
+ 'QuantileRegressionResult',
31
+ 'spline_regression',
32
+ 'SplineRegressionResult',
33
+ 'gam_model',
34
+ 'GAMResult'
35
+ ]
@@ -0,0 +1,117 @@
1
+ """
2
+ 广义可加模型 (Generalized Additive Model - GAM)
3
+ 基于 pygam 库实现
4
+ """
5
+
6
+ from typing import List, Optional
7
+ from pydantic import BaseModel, Field
8
+ import numpy as np
9
+
10
+ try:
11
+ from pygam import LinearGAM, LogisticGAM, s, f
12
+ PYGAM_AVAILABLE = True
13
+ except ImportError:
14
+ PYGAM_AVAILABLE = False
15
+ LinearGAM = None
16
+
17
+
18
+ class GAMResult(BaseModel):
19
+ """GAM模型结果"""
20
+ fitted_values: List[float] = Field(..., description="拟合值")
21
+ residuals: List[float] = Field(..., description="残差")
22
+ deviance: float = Field(..., description="偏差")
23
+ aic: float = Field(..., description="AIC信息准则")
24
+ aicc: float = Field(..., description="AICc信息准则")
25
+ r_squared: float = Field(..., description="伪R²")
26
+ n_splines: List[int] = Field(..., description="每个特征的样条数")
27
+ problem_type: str = Field(..., description="问题类型")
28
+ n_observations: int = Field(..., description="观测数量")
29
+ summary: str = Field(..., description="摘要信息")
30
+
31
+
32
+ def gam_model(
33
+ y_data: List[float],
34
+ x_data: List[List[float]],
35
+ problem_type: str = "regression",
36
+ n_splines: int = 10,
37
+ lam: float = 0.6
38
+ ) -> GAMResult:
39
+ """
40
+ 广义可加模型
41
+
42
+ Args:
43
+ y_data: 因变量
44
+ x_data: 自变量(二维列表)
45
+ problem_type: 问题类型 - "regression"(回归) 或 "classification"(分类)
46
+ n_splines: 每个特征的样条数
47
+ lam: 平滑参数(lambda)
48
+
49
+ Returns:
50
+ GAMResult: GAM模型结果
51
+ """
52
+ if not PYGAM_AVAILABLE:
53
+ raise ImportError("pygam库未安装。请运行: pip install pygam")
54
+
55
+ # 数据准备
56
+ y = np.array(y_data, dtype=np.float64)
57
+ X = np.array(x_data, dtype=np.float64)
58
+
59
+ if X.ndim == 1:
60
+ X = X.reshape(-1, 1)
61
+
62
+ n, k = X.shape
63
+
64
+ # 创建GAM模型
65
+ if problem_type == "regression":
66
+ gam = LinearGAM(s(0, n_splines=n_splines, lam=lam))
67
+ for i in range(1, k):
68
+ gam = LinearGAM(s(i, n_splines=n_splines, lam=lam))
69
+ elif problem_type == "classification":
70
+ gam = LogisticGAM(s(0, n_splines=n_splines, lam=lam))
71
+ else:
72
+ raise ValueError(f"不支持的问题类型: {problem_type}")
73
+
74
+ # 拟合模型
75
+ gam.fit(X, y)
76
+
77
+ # 拟合值
78
+ y_pred = gam.predict(X)
79
+
80
+ # 残差
81
+ residuals = y - y_pred
82
+
83
+ # 模型统计量
84
+ deviance = float(gam.statistics_['deviance'])
85
+ aic = float(gam.statistics_['AIC'])
86
+ aicc = float(gam.statistics_['AICc'])
87
+
88
+ # 伪R²
89
+ r_squared = float(gam.statistics_['pseudo_r2']['explained_deviance'])
90
+
91
+ # 样条数信息
92
+ n_splines_list = [n_splines] * k
93
+
94
+ summary = f"""广义可加模型 (GAM):
95
+ - 观测数量: {n}
96
+ - 特征数量: {k}
97
+ - 问题类型: {problem_type}
98
+ - 样条数: {n_splines}
99
+ - 平滑参数: {lam}
100
+ - 偏差: {deviance:.4f}
101
+ - AIC: {aic:.2f}
102
+ - AICc: {aicc:.2f}
103
+ - 伪R²: {r_squared:.4f}
104
+ """
105
+
106
+ return GAMResult(
107
+ fitted_values=y_pred.tolist(),
108
+ residuals=residuals.tolist(),
109
+ deviance=deviance,
110
+ aic=aic,
111
+ aicc=aicc,
112
+ r_squared=r_squared,
113
+ n_splines=n_splines_list,
114
+ problem_type=problem_type,
115
+ n_observations=n,
116
+ summary=summary
117
+ )
@@ -0,0 +1,161 @@
1
+ """
2
+ 核回归 (Kernel Regression)
3
+ 基于 statsmodels.nonparametric 库实现
4
+ """
5
+
6
+ from typing import List, Optional, Tuple
7
+ from pydantic import BaseModel, Field
8
+ import numpy as np
9
+
10
+ try:
11
+ from statsmodels.nonparametric.kernel_regression import KernelReg
12
+ STATSMODELS_AVAILABLE = True
13
+ except ImportError:
14
+ STATSMODELS_AVAILABLE = False
15
+ KernelReg = None
16
+
17
+
18
+ class KernelRegressionResult(BaseModel):
19
+ """核回归结果"""
20
+ fitted_values: List[float] = Field(..., description="拟合值")
21
+ residuals: List[float] = Field(..., description="残差")
22
+ bandwidth: List[float] = Field(..., description="带宽参数")
23
+ kernel_type: str = Field(..., description="核函数类型")
24
+ n_observations: int = Field(..., description="观测数量")
25
+ n_predictors: int = Field(..., description="预测变量数量")
26
+ r_squared: float = Field(..., description="R²统计量")
27
+ aic: Optional[float] = Field(None, description="AIC信息准则")
28
+ summary: str = Field(..., description="摘要信息")
29
+
30
+
31
+ def kernel_regression(
32
+ y_data: List[float],
33
+ x_data: List[List[float]],
34
+ kernel_type: str = "gaussian",
35
+ bandwidth: Optional[List[float]] = None,
36
+ bandwidth_method: str = "cv_ls",
37
+ variable_type: Optional[str] = None
38
+ ) -> KernelRegressionResult:
39
+ """
40
+ 核回归估计
41
+
42
+ Args:
43
+ y_data: 因变量
44
+ x_data: 自变量(二维列表)
45
+ kernel_type: 核函数类型 - "gaussian"(高斯), "epanechnikov"(Epanechnikov核),
46
+ "uniform"(均匀核), "triangular"(三角核), "biweight"(双权核)
47
+ bandwidth: 带宽参数(每个变量一个),如果为None则自动选择
48
+ bandwidth_method: 带宽选择方法 - "cv_ls"(交叉验证最小二乘),
49
+ "aic"(AIC准则), "normal_reference"(正态参考)
50
+ variable_type: 变量类型 - None(全部连续), "c"(连续), "u"(无序分类), "o"(有序分类)
51
+ 可以是字符串(如 "cco"表示3个变量:连续、连续、有序)
52
+
53
+ Returns:
54
+ KernelRegressionResult: 核回归结果
55
+
56
+ Raises:
57
+ ImportError: statsmodels库未安装
58
+ ValueError: 输入数据无效
59
+ """
60
+ if not STATSMODELS_AVAILABLE:
61
+ raise ImportError(
62
+ "statsmodels库未安装。请运行: pip install statsmodels"
63
+ )
64
+
65
+ # 输入验证
66
+ if not y_data or not x_data:
67
+ raise ValueError("y_data和x_data不能为空")
68
+
69
+ # 数据准备
70
+ y = np.array(y_data, dtype=np.float64)
71
+ X = np.array(x_data, dtype=np.float64)
72
+
73
+ # 确保X是二维数组
74
+ if X.ndim == 1:
75
+ X = X.reshape(-1, 1)
76
+
77
+ n = len(y)
78
+ k = X.shape[1]
79
+
80
+ # 数据验证
81
+ if len(y) != X.shape[0]:
82
+ raise ValueError(f"因变量长度({len(y)})与自变量长度({X.shape[0]})不一致")
83
+
84
+ # 变量类型设置
85
+ if variable_type is None:
86
+ var_type = 'c' * k # 默认全部为连续变量
87
+ else:
88
+ var_type = variable_type
89
+ if len(var_type) != k:
90
+ raise ValueError(f"variable_type长度({len(var_type)})与自变量数量({k})不一致")
91
+
92
+ # 构建核回归模型
93
+ try:
94
+ if bandwidth is None:
95
+ # 自动选择带宽
96
+ kr = KernelReg(
97
+ endog=y,
98
+ exog=X,
99
+ var_type=var_type,
100
+ reg_type='ll', # 局部线性回归
101
+ bw=bandwidth_method
102
+ )
103
+ else:
104
+ # 使用指定带宽
105
+ if len(bandwidth) != k:
106
+ raise ValueError(f"bandwidth长度({len(bandwidth)})与自变量数量({k})不一致")
107
+ kr = KernelReg(
108
+ endog=y,
109
+ exog=X,
110
+ var_type=var_type,
111
+ reg_type='ll',
112
+ bw=np.array(bandwidth)
113
+ )
114
+ except Exception as e:
115
+ raise ValueError(f"核回归模型构建失败: {str(e)}")
116
+
117
+ # 拟合值
118
+ fitted_values, _ = kr.fit(X)
119
+ fitted_values = fitted_values.flatten()
120
+
121
+ # 残差
122
+ residuals = y - fitted_values
123
+
124
+ # 带宽
125
+ bw = kr.bw.tolist() if hasattr(kr.bw, 'tolist') else [float(kr.bw)]
126
+
127
+ # R²
128
+ ss_res = np.sum(residuals ** 2)
129
+ ss_tot = np.sum((y - np.mean(y)) ** 2)
130
+ r_squared = float(1 - ss_res / ss_tot) if ss_tot > 0 else 0.0
131
+
132
+ # AIC(近似计算)
133
+ try:
134
+ log_likelihood = -0.5 * n * (np.log(2 * np.pi) + np.log(ss_res / n) + 1)
135
+ aic = float(2 * k - 2 * log_likelihood)
136
+ except:
137
+ aic = None
138
+
139
+ # 生成摘要
140
+ summary = f"""核回归分析:
141
+ - 观测数量: {n}
142
+ - 预测变量: {k}
143
+ - 核函数: {kernel_type}
144
+ - 带宽: {[f'{b:.4f}' for b in bw]}
145
+ - 带宽方法: {bandwidth_method}
146
+ - R²: {r_squared:.4f}
147
+ """
148
+ if aic is not None:
149
+ summary += f"- AIC: {aic:.2f}\n"
150
+
151
+ return KernelRegressionResult(
152
+ fitted_values=fitted_values.tolist(),
153
+ residuals=residuals.tolist(),
154
+ bandwidth=bw,
155
+ kernel_type=kernel_type,
156
+ n_observations=n,
157
+ n_predictors=k,
158
+ r_squared=r_squared,
159
+ aic=aic,
160
+ summary=summary
161
+ )
@@ -0,0 +1,249 @@
1
+ """
2
+ 分位数回归 (Quantile Regression)
3
+ 基于 statsmodels.regression.quantile_regression 库实现
4
+ """
5
+
6
+ from typing import List, Optional, Dict
7
+ from pydantic import BaseModel, Field
8
+ import numpy as np
9
+
10
+ try:
11
+ import statsmodels.api as sm
12
+ from statsmodels.regression.quantile_regression import QuantReg
13
+ STATSMODELS_AVAILABLE = True
14
+ except ImportError:
15
+ STATSMODELS_AVAILABLE = False
16
+ QuantReg = None
17
+
18
+
19
+ class QuantileRegressionResult(BaseModel):
20
+ """分位数回归结果"""
21
+ quantile: float = Field(..., description="分位数水平")
22
+ coefficients: List[float] = Field(..., description="回归系数")
23
+ std_errors: List[float] = Field(..., description="标准误")
24
+ t_values: List[float] = Field(..., description="t统计量")
25
+ p_values: List[float] = Field(..., description="p值")
26
+ conf_int_lower: List[float] = Field(..., description="置信区间下界")
27
+ conf_int_upper: List[float] = Field(..., description="置信区间上界")
28
+ feature_names: List[str] = Field(..., description="特征名称")
29
+ pseudo_r_squared: float = Field(..., description="伪R²")
30
+ n_observations: int = Field(..., description="观测数量")
31
+ summary: str = Field(..., description="摘要信息")
32
+
33
+
34
+ class MultiQuantileResult(BaseModel):
35
+ """多分位数回归结果"""
36
+ quantiles: List[float] = Field(..., description="分位数水平列表")
37
+ coefficients_by_quantile: Dict[str, List[float]] = Field(..., description="各分位数的系数")
38
+ feature_names: List[str] = Field(..., description="特征名称")
39
+ n_observations: int = Field(..., description="观测数量")
40
+ summary: str = Field(..., description="摘要信息")
41
+
42
+
43
+ def quantile_regression(
44
+ y_data: List[float],
45
+ x_data: List[List[float]],
46
+ quantile: float = 0.5,
47
+ feature_names: Optional[List[str]] = None,
48
+ confidence_level: float = 0.95
49
+ ) -> QuantileRegressionResult:
50
+ """
51
+ 分位数回归
52
+
53
+ Args:
54
+ y_data: 因变量
55
+ x_data: 自变量(二维列表)
56
+ quantile: 分位数水平(0-1之间),默认0.5为中位数回归
57
+ feature_names: 特征名称
58
+ confidence_level: 置信水平
59
+
60
+ Returns:
61
+ QuantileRegressionResult: 分位数回归结果
62
+
63
+ Raises:
64
+ ImportError: statsmodels库未安装
65
+ ValueError: 输入数据无效
66
+ """
67
+ if not STATSMODELS_AVAILABLE:
68
+ raise ImportError(
69
+ "statsmodels库未安装。请运行: pip install statsmodels"
70
+ )
71
+
72
+ # 输入验证
73
+ if not y_data or not x_data:
74
+ raise ValueError("y_data和x_data不能为空")
75
+
76
+ if not 0 < quantile < 1:
77
+ raise ValueError("quantile必须在0和1之间")
78
+
79
+ # 数据准备
80
+ y = np.array(y_data, dtype=np.float64)
81
+ X = np.array(x_data, dtype=np.float64)
82
+
83
+ # 确保X是二维数组
84
+ if X.ndim == 1:
85
+ X = X.reshape(-1, 1)
86
+
87
+ n = len(y)
88
+ k = X.shape[1]
89
+
90
+ # 数据验证
91
+ if len(y) != X.shape[0]:
92
+ raise ValueError(f"因变量长度({len(y)})与自变量长度({X.shape[0]})不一致")
93
+
94
+ # 添加常数项
95
+ X_with_const = sm.add_constant(X)
96
+
97
+ # 特征名称
98
+ if feature_names is None:
99
+ feature_names = [f"X{i+1}" for i in range(k)]
100
+ all_feature_names = ["const"] + feature_names
101
+
102
+ # 构建并拟合分位数回归模型
103
+ try:
104
+ model = QuantReg(y, X_with_const)
105
+ results = model.fit(q=quantile)
106
+ except Exception as e:
107
+ raise ValueError(f"分位数回归拟合失败: {str(e)}")
108
+
109
+ # 提取结果
110
+ coefficients = results.params.tolist()
111
+
112
+ # 标准误(使用稳健标准误)
113
+ try:
114
+ # 尝试使用稳健标准误
115
+ std_errors = results.bse.tolist()
116
+ except:
117
+ # 如果失败,使用常规标准误
118
+ std_errors = [0.0] * len(coefficients)
119
+
120
+ # t统计量和p值
121
+ try:
122
+ t_values = results.tvalues.tolist()
123
+ p_values = results.pvalues.tolist()
124
+ except:
125
+ t_values = [0.0] * len(coefficients)
126
+ p_values = [1.0] * len(coefficients)
127
+
128
+ # 置信区间
129
+ try:
130
+ alpha = 1 - confidence_level
131
+ conf_int = results.conf_int(alpha=alpha)
132
+ conf_int_lower = conf_int.iloc[:, 0].tolist()
133
+ conf_int_upper = conf_int.iloc[:, 1].tolist()
134
+ except:
135
+ conf_int_lower = [c - 1.96 * se for c, se in zip(coefficients, std_errors)]
136
+ conf_int_upper = [c + 1.96 * se for c, se in zip(coefficients, std_errors)]
137
+
138
+ # 伪R²
139
+ try:
140
+ pseudo_r_squared = float(results.prsquared)
141
+ except:
142
+ pseudo_r_squared = 0.0
143
+
144
+ # 生成摘要
145
+ summary = f"""分位数回归分析:
146
+ - 分位数τ: {quantile}
147
+ - 观测数量: {n}
148
+ - 协变量数: {k}
149
+ - 伪R²: {pseudo_r_squared:.4f}
150
+
151
+ 系数估计:
152
+ """
153
+ for i, (name, coef, se, t, p) in enumerate(zip(
154
+ all_feature_names, coefficients, std_errors, t_values, p_values
155
+ )):
156
+ sig = "***" if p < 0.01 else "**" if p < 0.05 else "*" if p < 0.10 else ""
157
+ summary += f" {name}: {coef:.4f} (SE: {se:.4f}, t={t:.2f}, p={p:.4f}){sig}\n"
158
+
159
+ return QuantileRegressionResult(
160
+ quantile=quantile,
161
+ coefficients=coefficients,
162
+ std_errors=std_errors,
163
+ t_values=t_values,
164
+ p_values=p_values,
165
+ conf_int_lower=conf_int_lower,
166
+ conf_int_upper=conf_int_upper,
167
+ feature_names=all_feature_names,
168
+ pseudo_r_squared=pseudo_r_squared,
169
+ n_observations=n,
170
+ summary=summary
171
+ )
172
+
173
+
174
+ def multi_quantile_regression(
175
+ y_data: List[float],
176
+ x_data: List[List[float]],
177
+ quantiles: List[float] = [0.1, 0.25, 0.5, 0.75, 0.9],
178
+ feature_names: Optional[List[str]] = None
179
+ ) -> MultiQuantileResult:
180
+ """
181
+ 多分位数回归
182
+ 同时估计多个分位数水平的回归系数
183
+
184
+ Args:
185
+ y_data: 因变量
186
+ x_data: 自变量
187
+ quantiles: 分位数水平列表
188
+ feature_names: 特征名称
189
+
190
+ Returns:
191
+ MultiQuantileResult: 多分位数回归结果
192
+ """
193
+ if not STATSMODELS_AVAILABLE:
194
+ raise ImportError("statsmodels库未安装")
195
+
196
+ # 输入验证
197
+ if not y_data or not x_data:
198
+ raise ValueError("y_data和x_data不能为空")
199
+
200
+ # 数据准备
201
+ y = np.array(y_data, dtype=np.float64)
202
+ X = np.array(x_data, dtype=np.float64)
203
+
204
+ if X.ndim == 1:
205
+ X = X.reshape(-1, 1)
206
+
207
+ n = len(y)
208
+ k = X.shape[1]
209
+
210
+ # 添加常数项
211
+ X_with_const = sm.add_constant(X)
212
+
213
+ # 特征名称
214
+ if feature_names is None:
215
+ feature_names = [f"X{i+1}" for i in range(k)]
216
+ all_feature_names = ["const"] + feature_names
217
+
218
+ # 对每个分位数进行回归
219
+ coefficients_by_quantile = {}
220
+
221
+ for q in quantiles:
222
+ try:
223
+ model = QuantReg(y, X_with_const)
224
+ results = model.fit(q=q)
225
+ coefficients_by_quantile[f"τ={q}"] = results.params.tolist()
226
+ except Exception as e:
227
+ coefficients_by_quantile[f"τ={q}"] = [np.nan] * (k + 1)
228
+
229
+ # 生成摘要
230
+ summary = f"""多分位数回归分析:
231
+ - 观测数量: {n}
232
+ - 协变量数: {k}
233
+ - 分位数: {quantiles}
234
+
235
+ 各分位数的系数估计:
236
+ """
237
+ for name_idx, name in enumerate(all_feature_names):
238
+ summary += f"\n{name}:\n"
239
+ for q in quantiles:
240
+ coef = coefficients_by_quantile[f"τ={q}"][name_idx]
241
+ summary += f" τ={q}: {coef:.4f}\n"
242
+
243
+ return MultiQuantileResult(
244
+ quantiles=quantiles,
245
+ coefficients_by_quantile=coefficients_by_quantile,
246
+ feature_names=all_feature_names,
247
+ n_observations=n,
248
+ summary=summary
249
+ )