aigroup-econ-mcp 1.3.3__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +710 -0
  3. README.md +672 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-1.4.3.dist-info/METADATA +710 -0
  6. aigroup_econ_mcp-1.4.3.dist-info/RECORD +92 -0
  7. aigroup_econ_mcp-1.4.3.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-1.4.3.dist-info/licenses/LICENSE +21 -0
  9. cli.py +28 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +0 -0
  13. econometrics/basic_parametric_estimation/__init__.py +31 -0
  14. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  15. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  16. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  17. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  18. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  19. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  20. econometrics/causal_inference/causal_identification_strategy/__init__.py +0 -0
  21. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  22. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  23. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  24. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  25. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  26. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  27. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  28. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  29. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  30. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  31. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  32. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  33. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  34. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  35. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  36. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  37. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  38. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  39. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  40. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +0 -0
  41. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  42. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  43. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  44. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  45. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  46. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  47. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  48. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  49. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  50. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  51. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  52. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  53. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  54. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  55. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  56. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  57. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  58. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  59. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  60. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  61. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  62. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  63. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  64. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  65. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  66. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  67. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  68. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  69. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  70. prompts/__init__.py +0 -0
  71. prompts/analysis_guides.py +43 -0
  72. pyproject.toml +78 -0
  73. resources/MCP_MASTER_GUIDE.md +422 -0
  74. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  75. resources/__init__.py +0 -0
  76. server.py +83 -0
  77. tools/README.md +88 -0
  78. tools/__init__.py +45 -0
  79. tools/data_loader.py +213 -0
  80. tools/decorators.py +38 -0
  81. tools/econometrics_adapter.py +286 -0
  82. tools/mcp_tool_groups/__init__.py +1 -0
  83. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  84. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  85. tools/mcp_tool_groups/time_series_tools.py +494 -0
  86. tools/mcp_tools_registry.py +114 -0
  87. tools/model_specification_adapter.py +369 -0
  88. tools/output_formatter.py +563 -0
  89. tools/time_series_panel_data_adapter.py +858 -0
  90. tools/time_series_panel_data_tools.py +65 -0
  91. aigroup_econ_mcp/__init__.py +0 -19
  92. aigroup_econ_mcp/cli.py +0 -82
  93. aigroup_econ_mcp/config.py +0 -561
  94. aigroup_econ_mcp/server.py +0 -452
  95. aigroup_econ_mcp/tools/__init__.py +0 -19
  96. aigroup_econ_mcp/tools/base.py +0 -470
  97. aigroup_econ_mcp/tools/cache.py +0 -533
  98. aigroup_econ_mcp/tools/data_loader.py +0 -195
  99. aigroup_econ_mcp/tools/file_parser.py +0 -1027
  100. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  101. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  102. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  103. aigroup_econ_mcp/tools/ml_models.py +0 -54
  104. aigroup_econ_mcp/tools/ml_regularization.py +0 -186
  105. aigroup_econ_mcp/tools/monitoring.py +0 -555
  106. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  107. aigroup_econ_mcp/tools/panel_data.py +0 -619
  108. aigroup_econ_mcp/tools/regression.py +0 -214
  109. aigroup_econ_mcp/tools/statistics.py +0 -154
  110. aigroup_econ_mcp/tools/time_series.py +0 -698
  111. aigroup_econ_mcp/tools/timeout.py +0 -283
  112. aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
  113. aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
  114. aigroup_econ_mcp/tools/tool_registry.py +0 -478
  115. aigroup_econ_mcp/tools/validation.py +0 -482
  116. aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
  117. aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
  118. aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
  119. /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
  120. {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-1.4.3.dist-info}/WHEEL +0 -0
@@ -1,1016 +0,0 @@
1
- """
2
- 工具处理器模块
3
- 集中管理所有工具的核心业务逻辑
4
- """
5
-
6
- import pandas as pd
7
- import numpy as np
8
- import statsmodels.api as sm
9
- from statsmodels.tsa import stattools
10
- from scipy import stats
11
- from typing import Dict, List, Any, Optional
12
- from mcp.types import CallToolResult, TextContent
13
-
14
- from .statistics import calculate_descriptive_stats, calculate_correlation_matrix, perform_hypothesis_test
15
- from .regression import perform_ols_regression
16
- from .panel_data import fixed_effects_model, random_effects_model, hausman_test, panel_unit_root_test
17
- from .time_series import var_model, vecm_model, garch_model, state_space_model, variance_decomposition
18
- from .machine_learning import (
19
- random_forest_regression, gradient_boosting_regression,
20
- lasso_regression, ridge_regression, cross_validation, feature_importance_analysis
21
- )
22
- from .timeout import with_timeout, TimeoutError
23
-
24
-
25
- async def handle_descriptive_statistics(ctx, data: Dict[str, List[float]], **kwargs) -> CallToolResult:
26
- """处理描述性统计"""
27
- if not data:
28
- raise ValueError("数据不能为空")
29
-
30
- df = pd.DataFrame(data)
31
-
32
- # 计算统计量
33
- result_data = {
34
- "count": len(df),
35
- "mean": float(df.mean().mean()),
36
- "std": float(df.std().mean()),
37
- "min": float(df.min().min()),
38
- "max": float(df.max().max()),
39
- "median": float(df.median().mean()),
40
- "skewness": float(df.skew().mean()),
41
- "kurtosis": float(df.kurtosis().mean())
42
- }
43
-
44
- correlation_matrix = df.corr().round(4)
45
-
46
- return CallToolResult(
47
- content=[
48
- TextContent(
49
- type="text",
50
- text=f"描述性统计结果:\n"
51
- f"均值: {result_data['mean']:.4f}\n"
52
- f"标准差: {result_data['std']:.4f}\n"
53
- f"最小值: {result_data['min']:.4f}\n"
54
- f"最大值: {result_data['max']:.4f}\n"
55
- f"中位数: {result_data['median']:.4f}\n"
56
- f"偏度: {result_data['skewness']:.4f}\n"
57
- f"峰度: {result_data['kurtosis']:.4f}\n\n"
58
- f"相关系数矩阵:\n{correlation_matrix.to_string()}"
59
- )
60
- ],
61
- structuredContent=result_data
62
- )
63
-
64
-
65
- async def handle_ols_regression(ctx, y_data: List[float], x_data: List[List[float]],
66
- feature_names: Optional[List[str]] = None, **kwargs) -> CallToolResult:
67
- """处理OLS回归"""
68
- if not y_data or not x_data:
69
- raise ValueError("因变量和自变量数据不能为空")
70
-
71
- X = np.array(x_data)
72
- y = np.array(y_data)
73
- X_with_const = sm.add_constant(X)
74
- model = sm.OLS(y, X_with_const).fit()
75
-
76
- if feature_names is None:
77
- feature_names = [f"x{i+1}" for i in range(X.shape[1])]
78
-
79
- conf_int = model.conf_int()
80
- coefficients = {}
81
-
82
- for i, coef in enumerate(model.params):
83
- var_name = "const" if i == 0 else feature_names[i-1]
84
- coefficients[var_name] = {
85
- "coef": float(coef),
86
- "std_err": float(model.bse[i]),
87
- "t_value": float(model.tvalues[i]),
88
- "p_value": float(model.pvalues[i]),
89
- "ci_lower": float(conf_int[i][0]),
90
- "ci_upper": float(conf_int[i][1])
91
- }
92
-
93
- result_data = {
94
- "rsquared": float(model.rsquared),
95
- "rsquared_adj": float(model.rsquared_adj),
96
- "f_statistic": float(model.fvalue),
97
- "f_pvalue": float(model.f_pvalue),
98
- "aic": float(model.aic),
99
- "bic": float(model.bic),
100
- "coefficients": coefficients
101
- }
102
-
103
- return CallToolResult(
104
- content=[
105
- TextContent(
106
- type="text",
107
- text=f"OLS回归分析结果:\n"
108
- f"R² = {result_data['rsquared']:.4f}\n"
109
- f"调整R² = {result_data['rsquared_adj']:.4f}\n"
110
- f"F统计量 = {result_data['f_statistic']:.4f} (p = {result_data['f_pvalue']:.4f})\n"
111
- f"AIC = {result_data['aic']:.2f}, BIC = {result_data['bic']:.2f}\n\n"
112
- f"回归系数:\n{model.summary().tables[1]}"
113
- )
114
- ],
115
- structuredContent=result_data
116
- )
117
-
118
-
119
- async def handle_hypothesis_testing(ctx, data1: List[float], data2: Optional[List[float]] = None,
120
- test_type: str = "t_test", **kwargs) -> CallToolResult:
121
- """处理假设检验"""
122
- if test_type == "t_test":
123
- if data2 is None:
124
- result = stats.ttest_1samp(data1, 0)
125
- ci = stats.t.interval(0.95, len(data1)-1, loc=np.mean(data1), scale=stats.sem(data1))
126
- else:
127
- result = stats.ttest_ind(data1, data2)
128
- ci = None
129
-
130
- test_result = {
131
- "test_type": test_type,
132
- "statistic": float(result.statistic),
133
- "p_value": float(result.pvalue),
134
- "significant": bool(result.pvalue < 0.05),
135
- "confidence_interval": list(ci) if ci else None
136
- }
137
- elif test_type == "adf":
138
- result = stattools.adfuller(data1)
139
- test_result = {
140
- "test_type": "adf",
141
- "statistic": float(result[0]),
142
- "p_value": float(result[1]),
143
- "significant": bool(result[1] < 0.05),
144
- "confidence_interval": None
145
- }
146
- else:
147
- raise ValueError(f"不支持的检验类型: {test_type}")
148
-
149
- ci_text = ""
150
- if test_result['confidence_interval']:
151
- ci_lower = test_result['confidence_interval'][0]
152
- ci_upper = test_result['confidence_interval'][1]
153
- ci_text = f"95%置信区间: [{ci_lower:.4f}, {ci_upper:.4f}]"
154
-
155
- return CallToolResult(
156
- content=[
157
- TextContent(
158
- type="text",
159
- text=f"{test_type.upper()}检验结果:\n"
160
- f"检验统计量 = {test_result['statistic']:.4f}\n"
161
- f"p值 = {test_result['p_value']:.4f}\n"
162
- f"{'显著' if test_result['significant'] else '不显著'} (5%水平)\n"
163
- f"{ci_text}"
164
- )
165
- ],
166
- structuredContent=test_result
167
- )
168
-
169
-
170
- async def handle_time_series_analysis(ctx, data: List[float], **kwargs) -> CallToolResult:
171
- """处理时间序列分析 - 优化版(添加超时保护和性能优化)"""
172
- if not data or len(data) < 5:
173
- raise ValueError("时间序列数据至少需要5个观测点")
174
-
175
- # 数据长度检查和优化
176
- original_length = len(data)
177
- max_data_points = 1000 # 限制最大数据点数以避免超时
178
-
179
- # 如果数据过长,进行采样
180
- if original_length > max_data_points:
181
- # 使用等间隔采样保留数据特征
182
- step = original_length // max_data_points
183
- data = data[::step]
184
-
185
- series = pd.Series(data)
186
-
187
- # 基本统计量(快速计算)
188
- try:
189
- basic_stats = {
190
- "count": original_length, # 使用原始长度
191
- "mean": float(series.mean()),
192
- "std": float(series.std()),
193
- "min": float(series.min()),
194
- "max": float(series.max()),
195
- "median": float(series.median()),
196
- "skewness": float(series.skew()),
197
- "kurtosis": float(series.kurtosis()),
198
- "variance": float(series.var()),
199
- "range": float(series.max() - series.min()),
200
- "cv": float(series.std() / series.mean()) if series.mean() != 0 else 0
201
- }
202
- except Exception as e:
203
- raise ValueError(f"基本统计量计算失败: {str(e)}")
204
-
205
- # 平稳性检验(添加超时保护)
206
- try:
207
- adf_result = stattools.adfuller(data, maxlag=min(12, len(data)//5))
208
- except Exception as e:
209
- # 如果检验失败,使用简化判断
210
- adf_result = (0.0, 0.5, 0, len(data)-1, {}, 0.0)
211
-
212
- try:
213
- kpss_result = stattools.kpss(data, regression='c', nlags=min(12, len(data)//5))
214
- except Exception as e:
215
- # 如果检验失败,使用简化判断
216
- kpss_result = (0.0, 0.5, 0, {})
217
-
218
- # 自相关分析(优化滞后阶数)
219
- max_nlags = min(15, len(data) // 3, 40) # 减少最大滞后阶数
220
- if max_nlags < 1:
221
- max_nlags = 1
222
-
223
- try:
224
- acf_values = stattools.acf(data, nlags=max_nlags, fft=True) # 使用FFT加速
225
- pacf_values = stattools.pacf(data, nlags=max_nlags, method='ywm') # 使用更快的方法
226
- except Exception as e:
227
- # 如果计算失败,使用默认值
228
- acf_values = np.zeros(max_nlags + 1)
229
- pacf_values = np.zeros(max_nlags + 1)
230
- acf_values[0] = pacf_values[0] = 1.0
231
-
232
- # 趋势强度(快速计算)
233
- try:
234
- if len(data) > 1:
235
- trend_strength = abs(np.corrcoef(range(len(data)), data)[0, 1])
236
- else:
237
- trend_strength = 0.0
238
- except:
239
- trend_strength = 0.0
240
-
241
- # 季节性检测(简化版,仅对中等长度数据进行)
242
- seasonal_pattern = False
243
- if 12 <= len(data) <= 500: # 只对中等长度数据检测季节性
244
- try:
245
- seasonal_lag = min(12, len(data)//3)
246
- seasonal_acf = stattools.acf(data, nlags=seasonal_lag, fft=True)
247
- seasonal_pattern = any(abs(x) > 0.3 for x in seasonal_acf[1:])
248
- except:
249
- seasonal_pattern = False
250
-
251
- # 构建详细的结果文本
252
- sampling_notice = ""
253
- if original_length > max_data_points:
254
- sampling_notice = f"\n⚡ 性能优化:数据量较大({original_length}个观测点),已自动采样至{len(data)}个点进行分析\n"
255
-
256
- result_text = f"""📊 时间序列分析结果{sampling_notice}
257
-
258
- 🔍 基本统计信息:
259
- - 观测数量 = {basic_stats['count']}
260
- - 均值 = {basic_stats['mean']:.4f}
261
- - 标准差 = {basic_stats['std']:.4f}
262
- - 方差 = {basic_stats['variance']:.4f}
263
- - 最小值 = {basic_stats['min']:.4f}
264
- - 最大值 = {basic_stats['max']:.4f}
265
- - 极差 = {basic_stats['range']:.4f}
266
- - 中位数 = {basic_stats['median']:.4f}
267
- - 偏度 = {basic_stats['skewness']:.4f}
268
- - 峰度 = {basic_stats['kurtosis']:.4f}
269
- - 变异系数 = {basic_stats['cv']:.4f}
270
-
271
- 📈 平稳性检验:
272
- - ADF检验统计量 = {adf_result[0]:.4f}
273
- - ADF检验p值 = {adf_result[1]:.4f}
274
- - KPSS检验统计量 = {kpss_result[0]:.4f}
275
- - KPSS检验p值 = {kpss_result[1]:.4f}
276
- - 平稳性判断 = {'平稳' if adf_result[1] < 0.05 and kpss_result[1] > 0.05 else '非平稳'}
277
-
278
- 🔬 自相关分析:
279
- - ACF前5阶: {[f'{x:.4f}' for x in acf_values[:min(5, len(acf_values))]]}
280
- - PACF前5阶: {[f'{x:.4f}' for x in pacf_values[:min(5, len(pacf_values))]]}
281
- - 最大自相关: {max(abs(acf_values[1:])) if len(acf_values) > 1 else 0:.4f}
282
- - 最大偏自相关: {max(abs(pacf_values[1:])) if len(pacf_values) > 1 else 0:.4f}
283
-
284
- 📊 诊断统计量:
285
- - 趋势强度: {trend_strength:.4f}
286
- - 季节性模式: {'存在' if seasonal_pattern else '未检测到'}
287
- - 数据波动性: {'高' if basic_stats['cv'] > 0.5 else '中等' if basic_stats['cv'] > 0.2 else '低'}
288
- - 分布形态: {'右偏' if basic_stats['skewness'] > 0.5 else '左偏' if basic_stats['skewness'] < -0.5 else '近似对称'}
289
- - 峰度类型: {'尖峰' if basic_stats['kurtosis'] > 3 else '低峰' if basic_stats['kurtosis'] < 3 else '正态'}"""
290
-
291
- # 详细的模型建议
292
- result_text += f"\n\n💡 详细模型建议:"
293
-
294
- if adf_result[1] < 0.05: # 平稳序列
295
- result_text += f"\n- 数据为平稳序列,可直接建模"
296
-
297
- # 根据ACF/PACF模式给出详细建议
298
- acf_decay = abs(acf_values[1]) > 0.5
299
- pacf_cutoff = abs(pacf_values[1]) > 0.5 and all(abs(x) < 0.3 for x in pacf_values[2:5])
300
-
301
- if acf_decay and pacf_cutoff:
302
- result_text += f"\n- ACF缓慢衰减,PACF在1阶截尾,建议尝试AR(1)模型"
303
- result_text += f"\n- 可考虑ARMA(1,1)作为备选模型"
304
- elif not acf_decay and pacf_cutoff:
305
- result_text += f"\n- ACF快速衰减,PACF截尾,建议尝试MA模型"
306
- elif acf_decay and not pacf_cutoff:
307
- result_text += f"\n- ACF缓慢衰减,PACF无截尾,建议尝试AR模型"
308
- else:
309
- result_text += f"\n- ACF和PACF均缓慢衰减,建议尝试ARMA模型"
310
-
311
- # 根据数据特征给出额外建议
312
- if seasonal_pattern:
313
- result_text += f"\n- 检测到季节性模式,可考虑SARIMA模型"
314
- if trend_strength > 0.7:
315
- result_text += f"\n- 强趋势模式,可考虑带趋势项的模型"
316
-
317
- else: # 非平稳序列
318
- result_text += f"\n- 数据为非平稳序列,建议进行差分处理"
319
- result_text += f"\n- 可尝试ARIMA(p,d,q)模型,其中d为差分阶数"
320
-
321
- # 根据趋势强度建议差分阶数
322
- if trend_strength > 0.8:
323
- result_text += f"\n- 强趋势,建议尝试1-2阶差分"
324
- elif trend_strength > 0.5:
325
- result_text += f"\n- 中等趋势,建议尝试1阶差分"
326
- else:
327
- result_text += f"\n- 弱趋势,可尝试1阶差分"
328
-
329
- if seasonal_pattern:
330
- result_text += f"\n- 检测到季节性模式,可考虑SARIMA模型"
331
-
332
- # 根据数据长度给出建议
333
- if len(data) < 30:
334
- result_text += f"\n- 数据量较少({len(data)}个观测点),建议谨慎解释结果"
335
- elif len(data) < 100:
336
- result_text += f"\n- 数据量适中({len(data)}个观测点),适合大多数时间序列模型"
337
- else:
338
- result_text += f"\n- 数据量充足({len(data)}个观测点),可考虑复杂模型"
339
-
340
- result_text += f"\n\n⚠️ 建模注意事项:"
341
- result_text += f"\n- 平稳性是时间序列建模的重要前提"
342
- result_text += f"\n- ACF和PACF模式有助于识别合适的模型阶数"
343
- result_text += f"\n- 建议结合信息准则(AIC/BIC)进行模型选择"
344
- result_text += f"\n- 模型诊断:检查残差的自相关性和正态性"
345
- result_text += f"\n- 模型验证:使用样本外数据进行预测验证"
346
- result_text += f"\n- 参数稳定性:确保模型参数在整个样本期内稳定"
347
-
348
- result_data = {
349
- "basic_statistics": basic_stats,
350
- "adf_statistic": float(adf_result[0]),
351
- "adf_pvalue": float(adf_result[1]),
352
- "kpss_statistic": float(kpss_result[0]),
353
- "kpss_pvalue": float(kpss_result[1]),
354
- "stationary": bool(adf_result[1] < 0.05 and kpss_result[1] > 0.05),
355
- "acf": [float(x) for x in acf_values.tolist()],
356
- "pacf": [float(x) for x in pacf_values.tolist()],
357
- "diagnostic_stats": {
358
- "trend_strength": trend_strength,
359
- "seasonal_pattern": seasonal_pattern,
360
- "volatility_level": "high" if basic_stats['cv'] > 0.5 else "medium" if basic_stats['cv'] > 0.2 else "low",
361
- "distribution_shape": "right_skewed" if basic_stats['skewness'] > 0.5 else "left_skewed" if basic_stats['skewness'] < -0.5 else "symmetric",
362
- "kurtosis_type": "leptokurtic" if basic_stats['kurtosis'] > 3 else "platykurtic" if basic_stats['kurtosis'] < 3 else "mesokurtic"
363
- },
364
- "model_suggestions": {
365
- "is_stationary": adf_result[1] < 0.05,
366
- "suggested_models": ["ARMA", "ARIMA"] if adf_result[1] < 0.05 else ["ARIMA", "SARIMA"],
367
- "data_sufficiency": "low" if len(data) < 30 else "medium" if len(data) < 100 else "high",
368
- "trend_recommendation": "strong_diff" if trend_strength > 0.8 else "moderate_diff" if trend_strength > 0.5 else "weak_diff",
369
- "seasonal_recommendation": "consider_seasonal" if seasonal_pattern else "no_seasonal"
370
- }
371
- }
372
-
373
- return CallToolResult(
374
- content=[TextContent(type="text", text=result_text)],
375
- structuredContent=result_data
376
- )
377
-
378
-
379
- async def handle_correlation_analysis(ctx, data: Dict[str, List[float]],
380
- method: str = "pearson", **kwargs) -> CallToolResult:
381
- """处理相关性分析"""
382
- if not data or len(data) < 2:
383
- raise ValueError("至少需要2个变量进行相关性分析")
384
-
385
- df = pd.DataFrame(data)
386
- correlation_matrix = df.corr(method=method)
387
-
388
- return CallToolResult(
389
- content=[
390
- TextContent(
391
- type="text",
392
- text=f"{method.title()}相关系数矩阵:\n{correlation_matrix.round(4).to_string()}"
393
- )
394
- ]
395
- )
396
-
397
-
398
- # 面板数据处理器
399
- async def handle_panel_fixed_effects(ctx, y_data, x_data, entity_ids, time_periods,
400
- feature_names=None, entity_effects=True, time_effects=False, **kwargs):
401
- """处理固定效应模型 - 统一输出格式"""
402
- result = fixed_effects_model(y_data, x_data, entity_ids, time_periods, feature_names, entity_effects, time_effects)
403
-
404
- # 构建详细的结果文本
405
- result_text = f"""📊 固定效应模型分析结果
406
-
407
- 🔍 模型拟合信息:
408
- - R² = {result.rsquared:.4f}
409
- - 调整R² = {result.rsquared_adj:.4f}
410
- - F统计量 = {result.f_statistic:.4f} (p = {result.f_pvalue:.4f})
411
- - AIC = {result.aic:.2f}, BIC = {result.bic:.2f}
412
- - 观测数量 = {result.n_obs}
413
- - 个体效应 = {'是' if result.entity_effects else '否'}
414
- - 时间效应 = {'是' if result.time_effects else '否'}
415
-
416
- 📈 回归系数详情:"""
417
-
418
- # 添加系数信息
419
- for var_name, coef_info in result.coefficients.items():
420
- significance = "***" if coef_info["p_value"] < 0.01 else "**" if coef_info["p_value"] < 0.05 else "*" if coef_info["p_value"] < 0.1 else ""
421
- result_text += f"\n- {var_name}: {coef_info['coef']:.4f}{significance} (se={coef_info['std_err']:.4f}, p={coef_info['p_value']:.4f})"
422
-
423
- result_text += "\n\n💡 模型说明:固定效应模型通过组内变换消除个体固定差异,适用于个体间存在不可观测固定特征的情况。"
424
-
425
- return CallToolResult(
426
- content=[TextContent(type="text", text=result_text)],
427
- structuredContent=result.model_dump()
428
- )
429
-
430
-
431
- async def handle_panel_random_effects(ctx, y_data, x_data, entity_ids, time_periods,
432
- feature_names=None, entity_effects=True, time_effects=False, **kwargs):
433
- """处理随机效应模型 - 统一输出格式"""
434
- result = random_effects_model(y_data, x_data, entity_ids, time_periods, feature_names, entity_effects, time_effects)
435
-
436
- # 构建详细的结果文本
437
- result_text = f"""📊 随机效应模型分析结果
438
-
439
- 🔍 模型拟合信息:
440
- - R² = {result.rsquared:.4f}
441
- - 调整R² = {result.rsquared_adj:.4f}
442
- - F统计量 = {result.f_statistic:.4f} (p = {result.f_pvalue:.4f})
443
- - AIC = {result.aic:.2f}, BIC = {result.bic:.2f}
444
- - 观测数量 = {result.n_obs}
445
- - 个体效应 = {'是' if result.entity_effects else '否'}
446
- - 时间效应 = {'是' if result.time_effects else '否'}
447
-
448
- 📈 回归系数详情:"""
449
-
450
- # 添加系数信息
451
- for var_name, coef_info in result.coefficients.items():
452
- significance = "***" if coef_info["p_value"] < 0.01 else "**" if coef_info["p_value"] < 0.05 else "*" if coef_info["p_value"] < 0.1 else ""
453
- result_text += f"\n- {var_name}: {coef_info['coef']:.4f}{significance} (se={coef_info['std_err']:.4f}, p={coef_info['p_value']:.4f})"
454
-
455
- result_text += "\n\n💡 模型说明:随机效应模型假设个体差异是随机的,比固定效应模型更有效率,但需要满足个体效应与解释变量不相关的假设。"
456
-
457
- return CallToolResult(
458
- content=[TextContent(type="text", text=result_text)],
459
- structuredContent=result.model_dump()
460
- )
461
-
462
-
463
- async def handle_panel_hausman_test(ctx, y_data, x_data, entity_ids, time_periods, feature_names=None, **kwargs):
464
- """处理Hausman检验 - 统一输出格式"""
465
- result = hausman_test(y_data, x_data, entity_ids, time_periods, feature_names)
466
-
467
- result_text = f"""📊 Hausman检验结果
468
-
469
- 🔍 检验信息:
470
- - 检验统计量 = {result.statistic:.4f}
471
- - p值 = {result.p_value:.4f}
472
- - 显著性 = {'是' if result.significant else '否'} (5%水平)
473
-
474
- 💡 模型选择建议:
475
- {result.recommendation}
476
-
477
- 📋 决策规则:
478
- - p值 < 0.05: 拒绝原假设,选择固定效应模型
479
- - p值 >= 0.05: 不能拒绝原假设,选择随机效应模型
480
-
481
- 🔬 检验原理:Hausman检验用于判断个体效应是否与解释变量相关。原假设为随机效应模型是一致的。"""
482
-
483
- return CallToolResult(
484
- content=[TextContent(type="text", text=result_text)],
485
- structuredContent=result.model_dump()
486
- )
487
-
488
-
489
- async def handle_panel_unit_root_test(ctx, **kwargs):
490
- """
491
- 处理面板单位根检验 - 统一输出格式
492
-
493
- panel_unit_root_test函数期望:data, entity_ids, time_periods
494
- 但panel装饰器会传入:y_data, x_data, entity_ids, time_periods
495
- """
496
- # 提取参数
497
- data = kwargs.get('data')
498
- y_data = kwargs.get('y_data')
499
- entity_ids = kwargs.get('entity_ids')
500
- time_periods = kwargs.get('time_periods')
501
- test_type = kwargs.get('test_type', 'levinlin')
502
-
503
- # 如果没有data但有y_data,使用y_data(来自panel装饰器)
504
- if data is None and y_data is not None:
505
- data = y_data
506
-
507
- if data is None:
508
- raise ValueError("需要提供数据(data或y_data)")
509
-
510
- if entity_ids is None or time_periods is None:
511
- raise ValueError("需要提供entity_ids和time_periods")
512
-
513
- # 只传递panel_unit_root_test需要的参数
514
- result = panel_unit_root_test(data, entity_ids, time_periods, test_type)
515
-
516
- # 构建详细的结果文本
517
- result_text = f"""📊 面板单位根检验结果
518
-
519
- 🔍 检验信息:
520
- - 检验方法 = {test_type.upper()}
521
- - 个体数量 = {len(set(entity_ids))}
522
- - 时间期数 = {len(set(time_periods))}
523
- - 检验统计量 = {result.statistic:.4f}
524
- - p值 = {result.p_value:.4f}
525
- - 平稳性 = {'平稳' if result.stationary else '非平稳'} (5%水平)
526
-
527
- 📈 检验详情:"""
528
-
529
- # 添加检验详情信息
530
- if hasattr(result, 'critical_values'):
531
- result_text += f"\n- 临界值: {result.critical_values}"
532
- if hasattr(result, 'lags_used'):
533
- result_text += f"\n- 使用滞后阶数: {result.lags_used}"
534
- if hasattr(result, 'test_statistic'):
535
- result_text += f"\n- 检验统计量: {result.test_statistic:.4f}"
536
-
537
- result_text += f"\n\n💡 检验说明:面板单位根检验用于判断面板数据是否平稳,是面板数据分析的重要前提检验。"
538
- result_text += f"\n\n⚠️ 注意事项:如果数据非平稳,需要进行差分处理或使用面板协整检验。"
539
-
540
- return CallToolResult(
541
- content=[TextContent(type="text", text=result_text)],
542
- structuredContent=result.model_dump()
543
- )
544
-
545
-
546
- # 时间序列处理器
547
- @with_timeout(seconds=60)
548
- async def handle_var_model(ctx, data, max_lags=5, ic="aic", **kwargs):
549
- """处理VAR模型分析 - 统一输出格式"""
550
- try:
551
- result = var_model(data, max_lags=max_lags, ic=ic)
552
- except TimeoutError:
553
- raise TimeoutError("VAR模型分析超时(60秒),请尝试减少变量数量或滞后阶数")
554
-
555
- # 构建详细的结果文本
556
- result_text = f"""📊 VAR模型分析结果
557
-
558
- 🔍 模型基本信息:
559
- - 最优滞后阶数 = {result.order}
560
- - 变量数量 = {len(result.variables) if hasattr(result, 'variables') else '未知'}
561
- - 信息准则 = {ic.upper()}
562
- - AIC = {result.aic:.2f}
563
- - BIC = {getattr(result, 'bic', 'N/A')}
564
- - HQIC = {getattr(result, 'hqic', 'N/A')}
565
-
566
- 📈 模型诊断信息:"""
567
-
568
- # 添加模型诊断信息
569
- if hasattr(result, 'residuals_normality'):
570
- result_text += f"\n- 残差正态性检验: {result.residuals_normality}"
571
- if hasattr(result, 'serial_correlation'):
572
- result_text += f"\n- 序列相关性检验: {result.serial_correlation}"
573
- if hasattr(result, 'stability'):
574
- result_text += f"\n- 模型稳定性: {result.stability}"
575
-
576
- # 添加变量信息
577
- if hasattr(result, 'variables'):
578
- result_text += f"\n\n🔬 分析变量:"
579
- for var in result.variables:
580
- result_text += f"\n- {var}"
581
-
582
- result_text += f"\n\n💡 模型说明:VAR模型用于分析多个时间序列变量间的动态关系,能够捕捉变量间的相互影响和滞后效应。"
583
- result_text += f"\n\n⚠️ 注意事项:VAR模型假设所有变量都是内生的,适用于分析变量间的动态交互关系。"
584
-
585
- return CallToolResult(
586
- content=[TextContent(type="text", text=result_text)],
587
- structuredContent=result.model_dump()
588
- )
589
-
590
-
591
- @with_timeout(seconds=60)
592
- async def handle_vecm_model(ctx, data, coint_rank=1, deterministic="co", max_lags=5, **kwargs):
593
- """处理VECM模型分析 - 统一输出格式"""
594
- try:
595
- result = vecm_model(data, coint_rank=coint_rank, deterministic=deterministic, max_lags=max_lags)
596
- except TimeoutError:
597
- raise TimeoutError("VECM模型分析超时(60秒),请尝试减少变量数量或滞后阶数")
598
-
599
- # 构建详细的结果文本
600
- result_text = f"""📊 VECM模型分析结果
601
-
602
- 🔍 模型基本信息:
603
- - 协整秩 = {result.coint_rank}
604
- - 确定性项类型 = {deterministic}
605
- - 最大滞后阶数 = {max_lags}
606
- - AIC = {result.aic:.2f}
607
- - BIC = {getattr(result, 'bic', 'N/A')}
608
- - HQIC = {getattr(result, 'hqic', 'N/A')}
609
-
610
- 📈 协整关系分析:"""
611
-
612
- # 添加协整关系信息
613
- if hasattr(result, 'coint_relations'):
614
- result_text += f"\n- 协整关系数量: {len(result.coint_relations)}"
615
- for i, relation in enumerate(result.coint_relations[:3], 1): # 显示前3个关系
616
- result_text += f"\n- 关系{i}: {relation}"
617
- if len(result.coint_relations) > 3:
618
- result_text += f"\n- ... 还有{len(result.coint_relations) - 3}个协整关系"
619
-
620
- # 添加误差修正项信息
621
- if hasattr(result, 'error_correction'):
622
- result_text += f"\n\n🔧 误差修正机制:"
623
- result_text += f"\n- 误差修正项显著性: {result.error_correction}"
624
-
625
- result_text += f"\n\n💡 模型说明:VECM模型用于分析非平稳时间序列的长期均衡关系,包含误差修正机制来反映短期调整过程。"
626
- result_text += f"\n\n⚠️ 注意事项:VECM模型要求变量间存在协整关系,适用于分析经济变量的长期均衡和短期动态调整。"
627
-
628
- return CallToolResult(
629
- content=[TextContent(type="text", text=result_text)],
630
- structuredContent=result.model_dump()
631
- )
632
-
633
-
634
- @with_timeout(seconds=30)
635
- async def handle_garch_model(ctx, data, order=(1, 1), dist="normal", **kwargs):
636
- """处理GARCH模型分析 - 统一输出格式"""
637
- try:
638
- result = garch_model(data, order=order, dist=dist)
639
- except TimeoutError:
640
- raise TimeoutError("GARCH模型分析超时(30秒),请尝试减少数据量或降低模型阶数")
641
-
642
- # 构建详细的结果文本
643
- result_text = f"""📊 GARCH模型分析结果
644
-
645
- 🔍 模型基本信息:
646
- - GARCH阶数 = ({order[0]}, {order[1]})
647
- - 误差分布 = {dist}
648
- - 持久性 = {result.persistence:.4f}
649
- - AIC = {result.aic:.2f}
650
- - BIC = {getattr(result, 'bic', 'N/A')}
651
-
652
- 📈 波动率特征:"""
653
-
654
- # 添加波动率特征信息
655
- if hasattr(result, 'volatility_persistence'):
656
- result_text += f"\n- 波动率持续性: {result.volatility_persistence:.4f}"
657
- if hasattr(result, 'unconditional_variance'):
658
- result_text += f"\n- 无条件方差: {result.unconditional_variance:.4f}"
659
- if hasattr(result, 'leverage_effect'):
660
- result_text += f"\n- 杠杆效应: {result.leverage_effect}"
661
-
662
- # 添加模型诊断信息
663
- if hasattr(result, 'residuals_test'):
664
- result_text += f"\n\n🔧 模型诊断:"
665
- result_text += f"\n- 残差检验: {result.residuals_test}"
666
-
667
- result_text += f"\n\n💡 模型说明:GARCH模型用于分析金融时间序列的波动率聚类现象,能够捕捉条件异方差性。"
668
- result_text += f"\n\n⚠️ 注意事项:GARCH模型适用于金融数据波动率建模,阶数选择影响模型对波动率持续性的捕捉能力。"
669
-
670
- return CallToolResult(
671
- content=[TextContent(type="text", text=result_text)],
672
- structuredContent=result.model_dump()
673
- )
674
-
675
-
676
- @with_timeout(seconds=45)
677
- async def handle_state_space_model(ctx, data, state_dim=1, observation_dim=1,
678
- trend=True, seasonal=False, period=12, **kwargs):
679
- """处理状态空间模型分析 - 统一输出格式"""
680
- try:
681
- result = state_space_model(data, state_dim, observation_dim, trend, seasonal, period)
682
- except TimeoutError:
683
- raise TimeoutError("状态空间模型分析超时(45秒),请尝试减少状态维度或数据量")
684
-
685
- # 构建详细的结果文本
686
- result_text = f"""📊 状态空间模型分析结果
687
-
688
- 🔍 模型结构信息:
689
- - 状态维度 = {state_dim}
690
- - 观测维度 = {observation_dim}
691
- - 趋势项 = {'包含' if trend else '不包含'}
692
- - 季节项 = {'包含' if seasonal else '不包含'}
693
- - 季节周期 = {period if seasonal else 'N/A'}
694
- - AIC = {result.aic:.2f}
695
- - BIC = {result.bic:.2f}
696
- - 对数似然值 = {result.log_likelihood:.2f}
697
-
698
- 📈 状态分析:"""
699
-
700
- # 添加状态信息
701
- if result.state_names:
702
- result_text += f"\n- 状态变量: {', '.join(result.state_names)}"
703
- if result.observation_names:
704
- result_text += f"\n- 观测变量: {', '.join(result.observation_names)}"
705
-
706
- # 添加状态估计信息
707
- if result.filtered_state:
708
- result_text += f"\n- 滤波状态估计: 已计算"
709
- if result.smoothed_state:
710
- result_text += f"\n- 平滑状态估计: 已计算"
711
-
712
- result_text += f"\n\n💡 模型说明:状态空间模型用于分析时间序列的潜在状态和观测关系,能够处理复杂的动态系统,特别适用于具有不可观测状态的时间序列建模。"
713
- result_text += f"\n\n⚠️ 注意事项:状态空间模型参数估计可能对初始值敏感,建议进行多次初始化尝试以获得稳定结果。"
714
-
715
- return CallToolResult(
716
- content=[TextContent(type="text", text=result_text)],
717
- structuredContent=result.model_dump()
718
- )
719
-
720
-
721
- @with_timeout(seconds=30)
722
- async def handle_variance_decomposition(ctx, data, periods=10, max_lags=5, **kwargs):
723
- """处理方差分解分析 - 统一输出格式"""
724
- try:
725
- result = variance_decomposition(data, periods=periods, max_lags=max_lags)
726
- except TimeoutError:
727
- raise TimeoutError("方差分解分析超时(30秒),请尝试减少分解期数或滞后阶数")
728
-
729
- # 构建详细的结果文本
730
- result_text = f"""📊 方差分解分析结果
731
-
732
- 🔍 分析设置:
733
- - 分解期数 = {periods}
734
- - 最大滞后阶数 = {max_lags}
735
- - 变量数量 = {len(data) if data else '未知'}
736
-
737
- 📈 方差分解结果:"""
738
-
739
- # 添加方差分解结果
740
- if isinstance(result, dict) and "variance_decomposition" in result:
741
- variance_decomp = result["variance_decomposition"]
742
- horizon = result.get("horizon", periods)
743
-
744
- result_text += f"\n- 分析期数: {horizon}期"
745
-
746
- for var_name, decomposition in variance_decomp.items():
747
- result_text += f"\n\n🔬 变量 '{var_name}' 的方差来源:"
748
- if isinstance(decomposition, dict):
749
- for source, percentages in decomposition.items():
750
- if isinstance(percentages, list) and len(percentages) > 0:
751
- # 显示最后一期的贡献度
752
- final_percentage = percentages[-1] * 100 if isinstance(percentages[-1], (int, float)) else 0
753
- result_text += f"\n- {source}: {final_percentage:.1f}%"
754
- else:
755
- result_text += f"\n- {source}: {percentages:.1f}%"
756
- else:
757
- result_text += f"\n- 总方差: {decomposition:.1f}%"
758
- else:
759
- result_text += f"\n- 结果格式异常,无法解析方差分解结果"
760
-
761
- result_text += f"\n\n💡 分析说明:方差分解用于分析多变量系统中各变量对预测误差方差的贡献程度,反映变量间的动态影响关系。"
762
- result_text += f"\n\n⚠️ 注意事项:方差分解结果依赖于VAR模型的滞后阶数选择,不同期数的分解结果反映短期和长期影响。"
763
-
764
- return CallToolResult(
765
- content=[TextContent(type="text", text=result_text)],
766
- structuredContent=result
767
- )
768
-
769
-
770
- # 机器学习处理器
771
- async def handle_random_forest(ctx, y_data, x_data, feature_names=None, n_estimators=100, max_depth=None, **kwargs):
772
- """处理随机森林回归 - 统一输出格式"""
773
- result = random_forest_regression(y_data, x_data, feature_names, n_estimators, max_depth)
774
-
775
- # 检查R²是否为负值
776
- r2_warning = ""
777
- if result.r2_score < 0:
778
- r2_warning = f"\n⚠️ 警告:R²为负值({result.r2_score:.4f}),表明模型性能比简单均值预测更差。建议:1) 检查数据质量 2) 增加样本数量 3) 调整模型参数"
779
-
780
- # 构建详细的结果文本
781
- result_text = f"""📊 随机森林回归分析结果
782
-
783
- 🔍 模型拟合信息:
784
- - R² = {result.r2_score:.4f}
785
- - 均方误差(MSE) = {result.mse:.4f}
786
- - 平均绝对误差(MAE) = {result.mae:.4f}
787
- - 样本数量 = {result.n_obs}
788
- - 树的数量 = {result.n_estimators}
789
- - 最大深度 = {result.max_depth if result.max_depth else '无限制'}
790
- - 袋外得分 = {f"{result.oob_score:.4f}" if result.oob_score else '未计算'}
791
- {r2_warning}
792
-
793
- 📈 特征重要性(前10个):"""
794
-
795
- # 添加特征重要性信息,按重要性排序
796
- if result.feature_importance:
797
- sorted_features = sorted(result.feature_importance.items(), key=lambda x: x[1], reverse=True)
798
- for i, (feature, importance) in enumerate(sorted_features[:10]):
799
- result_text += f"\n- {feature}: {importance:.4f}"
800
- if len(sorted_features) > 10:
801
- result_text += f"\n- ... 还有{len(sorted_features) - 10}个特征"
802
- else:
803
- result_text += "\n- 特征重要性未计算"
804
-
805
- result_text += f"\n\n💡 模型说明:随机森林通过构建多个决策树并集成结果,能够处理非线性关系和特征交互,对异常值稳健且不易过拟合。"
806
- result_text += f"\n\n⚠️ 注意事项:随机森林是黑盒模型,可解释性较差,但预测性能通常较好。"
807
-
808
- return CallToolResult(
809
- content=[TextContent(type="text", text=result_text)],
810
- structuredContent=result.model_dump()
811
- )
812
-
813
-
814
- async def handle_gradient_boosting(ctx, y_data, x_data, feature_names=None,
815
- n_estimators=100, learning_rate=0.1, max_depth=3, **kwargs):
816
- """处理梯度提升树回归 - 统一输出格式"""
817
- result = gradient_boosting_regression(y_data, x_data, feature_names, n_estimators, learning_rate, max_depth)
818
-
819
- # 检查R²是否为负值
820
- r2_warning = ""
821
- if result.r2_score < 0:
822
- r2_warning = f"\n⚠️ 警告:R²为负值({result.r2_score:.4f}),表明模型性能比简单均值预测更差。建议:1) 检查数据质量 2) 增加样本数量 3) 调整模型参数"
823
-
824
- # 构建详细的结果文本
825
- result_text = f"""📊 梯度提升树回归分析结果
826
-
827
- 🔍 模型拟合信息:
828
- - R² = {result.r2_score:.4f}
829
- - 均方误差(MSE) = {result.mse:.4f}
830
- - 平均绝对误差(MAE) = {result.mae:.4f}
831
- - 样本数量 = {result.n_obs}
832
- - 树的数量 = {result.n_estimators}
833
- - 学习率 = {result.learning_rate}
834
- - 最大深度 = {result.max_depth}
835
- {r2_warning}
836
-
837
- 📈 特征重要性(前10个):"""
838
-
839
- # 添加特征重要性信息,按重要性排序
840
- if result.feature_importance:
841
- sorted_features = sorted(result.feature_importance.items(), key=lambda x: x[1], reverse=True)
842
- for i, (feature, importance) in enumerate(sorted_features[:10]):
843
- result_text += f"\n- {feature}: {importance:.4f}"
844
- if len(sorted_features) > 10:
845
- result_text += f"\n- ... 还有{len(sorted_features) - 10}个特征"
846
- else:
847
- result_text += "\n- 特征重要性未计算"
848
-
849
- result_text += f"\n\n💡 模型说明:梯度提升树通过顺序构建决策树,每棵树修正前一棵树的错误,能够处理复杂的非线性关系,通常具有很高的预测精度。"
850
- result_text += f"\n\n⚠️ 注意事项:梯度提升树对参数敏感,需要仔细调优,训练时间较长但预测性能优秀。"
851
-
852
- return CallToolResult(
853
- content=[TextContent(type="text", text=result_text)],
854
- structuredContent=result.model_dump()
855
- )
856
-
857
-
858
- async def handle_lasso_regression(ctx, y_data, x_data, feature_names=None, alpha=1.0, **kwargs):
859
- """处理Lasso回归 - 统一输出格式"""
860
- result = lasso_regression(y_data, x_data, feature_names, alpha)
861
-
862
- # 检查R²是否为负值
863
- r2_warning = ""
864
- if result.r2_score < 0:
865
- r2_warning = f"\n⚠️ 警告:R²为负值({result.r2_score:.4f}),表明模型性能比简单均值预测更差。建议:1) 检查数据质量 2) 尝试更小的alpha值 3) 增加样本数量"
866
-
867
- # 检查系数是否全为0
868
- coef_warning = ""
869
- if all(abs(coef) < 1e-10 for coef in result.coefficients.values()):
870
- coef_warning = f"\n⚠️ 警告:所有系数都被压缩为0,正则化参数alpha={alpha}可能过大,建议减小alpha值"
871
-
872
- # 构建详细的结果文本
873
- result_text = f"""📊 Lasso回归分析结果
874
-
875
- 🔍 模型拟合信息:
876
- - R² = {result.r2_score:.4f}
877
- - 均方误差(MSE) = {result.mse:.4f}
878
- - 平均绝对误差(MAE) = {result.mae:.4f}
879
- - 样本数量 = {result.n_obs}
880
- - 正则化参数(alpha) = {result.alpha}
881
- {r2_warning}{coef_warning}
882
-
883
- 📈 回归系数详情:"""
884
-
885
- # 添加系数信息,按绝对值排序
886
- sorted_coefficients = sorted(result.coefficients.items(), key=lambda x: abs(x[1]), reverse=True)
887
- for var_name, coef in sorted_coefficients:
888
- if abs(coef) > 1e-10: # 只显示非零系数
889
- result_text += f"\n- {var_name}: {coef:.4f}"
890
- else:
891
- result_text += f"\n- {var_name}: 0.0000 (被压缩)"
892
-
893
- result_text += f"\n\n💡 模型说明:Lasso回归使用L1正则化进行特征选择,能够自动将不重要的特征系数压缩为0,适用于高维数据和特征选择场景。"
894
- result_text += f"\n\n⚠️ 注意事项:由于数据标准化,系数大小需要谨慎解释。"
895
-
896
- return CallToolResult(
897
- content=[TextContent(type="text", text=result_text)],
898
- structuredContent=result.model_dump()
899
- )
900
-
901
-
902
- async def handle_ridge_regression(ctx, y_data, x_data, feature_names=None, alpha=1.0, **kwargs):
903
- """处理Ridge回归 - 统一输出格式"""
904
- result = ridge_regression(y_data, x_data, feature_names, alpha)
905
-
906
- # 检查R²是否为负值
907
- r2_warning = ""
908
- if result.r2_score < 0:
909
- r2_warning = f"\n⚠️ 警告:R²为负值({result.r2_score:.4f}),表明模型性能比简单均值预测更差。建议:1) 检查数据质量 2) 尝试更小的alpha值 3) 增加样本数量"
910
-
911
- # 构建详细的结果文本
912
- result_text = f"""📊 Ridge回归分析结果
913
-
914
- 🔍 模型拟合信息:
915
- - R² = {result.r2_score:.4f}
916
- - 均方误差(MSE) = {result.mse:.4f}
917
- - 平均绝对误差(MAE) = {result.mae:.4f}
918
- - 样本数量 = {result.n_obs}
919
- - 正则化参数(alpha) = {result.alpha}
920
- {r2_warning}
921
-
922
- 📈 回归系数详情:"""
923
-
924
- # 添加系数信息,按绝对值排序
925
- sorted_coefficients = sorted(result.coefficients.items(), key=lambda x: abs(x[1]), reverse=True)
926
- for var_name, coef in sorted_coefficients:
927
- result_text += f"\n- {var_name}: {coef:.4f}"
928
-
929
- result_text += f"\n\n💡 模型说明:Ridge回归使用L2正则化处理多重共线性问题,对所有系数进行收缩但不进行特征选择,适用于需要稳定估计的场景。"
930
- result_text += f"\n\n⚠️ 注意事项:由于数据标准化,系数大小需要谨慎解释。"
931
-
932
- return CallToolResult(
933
- content=[TextContent(type="text", text=result_text)],
934
- structuredContent=result.model_dump()
935
- )
936
-
937
-
938
- async def handle_cross_validation(ctx, y_data, x_data, model_type="random_forest", cv_folds=5, scoring="r2", **kwargs):
939
- """处理交叉验证 - 统一输出格式"""
940
- result = cross_validation(y_data, x_data, model_type, cv_folds, scoring)
941
-
942
- # 构建详细的结果文本
943
- result_text = f"""📊 交叉验证分析结果
944
-
945
- 🔍 验证信息:
946
- - 模型类型 = {result.model_type}
947
- - 交叉验证折数 = {result.n_splits}
948
- - 评分指标 = {scoring}
949
- - 平均得分 = {result.mean_score:.4f}
950
- - 得分标准差 = {result.std_score:.4f}
951
- - 变异系数 = {(result.std_score / abs(result.mean_score)) * 100 if result.mean_score != 0 else 0:.2f}%
952
-
953
- 📈 各折得分详情:"""
954
-
955
- # 添加各折得分
956
- for i, score in enumerate(result.cv_scores, 1):
957
- result_text += f"\n- 第{i}折: {score:.4f}"
958
-
959
- # 评估模型稳定性
960
- stability_assessment = ""
961
- cv_threshold = 0.1 # 10%的变异系数阈值
962
- cv_value = (result.std_score / abs(result.mean_score)) if result.mean_score != 0 else 0
963
-
964
- if cv_value < cv_threshold:
965
- stability_assessment = f"\n\n✅ 模型稳定性:优秀(变异系数{cv_value*100:.2f}% < {cv_threshold*100:.0f}%)"
966
- elif cv_value < cv_threshold * 2:
967
- stability_assessment = f"\n\n⚠️ 模型稳定性:一般(变异系数{cv_value*100:.2f}% 在{cv_threshold*100:.0f}%-{cv_threshold*2*100:.0f}%之间)"
968
- else:
969
- stability_assessment = f"\n\n❌ 模型稳定性:较差(变异系数{cv_value*100:.2f}% > {cv_threshold*2*100:.0f}%)"
970
-
971
- result_text += stability_assessment
972
- result_text += f"\n\n💡 模型说明:交叉验证通过将数据分成多个子集进行训练和测试,评估模型的泛化能力和稳定性。"
973
- result_text += f"\n\n⚠️ 注意事项:变异系数越小表明模型越稳定,建议选择变异系数小于10%的模型。"
974
-
975
- return CallToolResult(
976
- content=[TextContent(type="text", text=result_text)],
977
- structuredContent=result.model_dump()
978
- )
979
-
980
-
981
- async def handle_feature_importance(ctx, y_data, x_data, feature_names=None, method="random_forest", top_k=5, **kwargs):
982
- """处理特征重要性分析 - 统一输出格式"""
983
- result = feature_importance_analysis(y_data, x_data, feature_names, method, top_k)
984
-
985
- # 构建详细的结果文本
986
- result_text = f"""📊 特征重要性分析结果
987
-
988
- 🔍 分析信息:
989
- - 分析方法 = {method}
990
- - 显示Top特征数量 = {top_k}
991
- - 总特征数量 = {len(result.feature_importance)}
992
-
993
- 📈 特征重要性排名:"""
994
-
995
- # 添加特征重要性信息
996
- for i, (feature, importance) in enumerate(result.sorted_features[:top_k], 1):
997
- percentage = (importance / sum(result.feature_importance.values())) * 100 if sum(result.feature_importance.values()) > 0 else 0
998
- result_text += f"\n{i}. {feature}: {importance:.4f} ({percentage:.1f}%)"
999
-
1000
- # 添加重要性分布信息
1001
- if len(result.sorted_features) > 0:
1002
- top_k_importance = sum(imp for _, imp in result.sorted_features[:top_k])
1003
- total_importance = sum(result.feature_importance.values())
1004
- top_k_percentage = (top_k_importance / total_importance) * 100 if total_importance > 0 else 0
1005
-
1006
- result_text += f"\n\n📊 重要性分布:"
1007
- result_text += f"\n- Top {top_k}特征累计重要性: {top_k_percentage:.1f}%"
1008
- result_text += f"\n- 剩余特征重要性: {100 - top_k_percentage:.1f}%"
1009
-
1010
- result_text += f"\n\n💡 分析说明:特征重要性分析帮助识别对预测目标最重要的变量,可用于特征选择和模型解释。"
1011
- result_text += f"\n\n⚠️ 注意事项:不同方法计算的特征重要性可能不同,建议结合业务知识进行解释。"
1012
-
1013
- return CallToolResult(
1014
- content=[TextContent(type="text", text=result_text)],
1015
- structuredContent=result.model_dump()
1016
- )