aigroup-econ-mcp 0.4.2__py3-none-any.whl → 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ from .machine_learning import (
19
19
  random_forest_regression, gradient_boosting_regression,
20
20
  lasso_regression, ridge_regression, cross_validation, feature_importance_analysis
21
21
  )
22
+ from .timeout import with_timeout, TimeoutError
22
23
 
23
24
 
24
25
  async def handle_descriptive_statistics(ctx, data: Dict[str, List[float]], **kwargs) -> CallToolResult:
@@ -167,43 +168,210 @@ async def handle_hypothesis_testing(ctx, data1: List[float], data2: Optional[Lis
167
168
 
168
169
 
169
170
  async def handle_time_series_analysis(ctx, data: List[float], **kwargs) -> CallToolResult:
170
- """处理时间序列分析"""
171
+ """处理时间序列分析 - 优化版(添加超时保护和性能优化)"""
171
172
  if not data or len(data) < 5:
172
173
  raise ValueError("时间序列数据至少需要5个观测点")
173
174
 
174
- adf_result = stattools.adfuller(data)
175
- max_nlags = min(20, len(data) - 1, len(data) // 2)
175
+ # 数据长度检查和优化
176
+ original_length = len(data)
177
+ max_data_points = 1000 # 限制最大数据点数以避免超时
178
+
179
+ # 如果数据过长,进行采样
180
+ if original_length > max_data_points:
181
+ # 使用等间隔采样保留数据特征
182
+ step = original_length // max_data_points
183
+ data = data[::step]
184
+
185
+ series = pd.Series(data)
186
+
187
+ # 基本统计量(快速计算)
188
+ try:
189
+ basic_stats = {
190
+ "count": original_length, # 使用原始长度
191
+ "mean": float(series.mean()),
192
+ "std": float(series.std()),
193
+ "min": float(series.min()),
194
+ "max": float(series.max()),
195
+ "median": float(series.median()),
196
+ "skewness": float(series.skew()),
197
+ "kurtosis": float(series.kurtosis()),
198
+ "variance": float(series.var()),
199
+ "range": float(series.max() - series.min()),
200
+ "cv": float(series.std() / series.mean()) if series.mean() != 0 else 0
201
+ }
202
+ except Exception as e:
203
+ raise ValueError(f"基本统计量计算失败: {str(e)}")
204
+
205
+ # 平稳性检验(添加超时保护)
206
+ try:
207
+ adf_result = stattools.adfuller(data, maxlag=min(12, len(data)//5))
208
+ except Exception as e:
209
+ # 如果检验失败,使用简化判断
210
+ adf_result = (0.0, 0.5, 0, len(data)-1, {}, 0.0)
211
+
212
+ try:
213
+ kpss_result = stattools.kpss(data, regression='c', nlags=min(12, len(data)//5))
214
+ except Exception as e:
215
+ # 如果检验失败,使用简化判断
216
+ kpss_result = (0.0, 0.5, 0, {})
217
+
218
+ # 自相关分析(优化滞后阶数)
219
+ max_nlags = min(15, len(data) // 3, 40) # 减少最大滞后阶数
176
220
  if max_nlags < 1:
177
221
  max_nlags = 1
178
222
 
179
223
  try:
180
- acf_values = stattools.acf(data, nlags=max_nlags)
181
- pacf_values = stattools.pacf(data, nlags=max_nlags)
182
- except:
224
+ acf_values = stattools.acf(data, nlags=max_nlags, fft=True) # 使用FFT加速
225
+ pacf_values = stattools.pacf(data, nlags=max_nlags, method='ywm') # 使用更快的方法
226
+ except Exception as e:
227
+ # 如果计算失败,使用默认值
183
228
  acf_values = np.zeros(max_nlags + 1)
184
229
  pacf_values = np.zeros(max_nlags + 1)
185
230
  acf_values[0] = pacf_values[0] = 1.0
186
231
 
232
+ # 趋势强度(快速计算)
233
+ try:
234
+ if len(data) > 1:
235
+ trend_strength = abs(np.corrcoef(range(len(data)), data)[0, 1])
236
+ else:
237
+ trend_strength = 0.0
238
+ except:
239
+ trend_strength = 0.0
240
+
241
+ # 季节性检测(简化版,仅对中等长度数据进行)
242
+ seasonal_pattern = False
243
+ if 12 <= len(data) <= 500: # 只对中等长度数据检测季节性
244
+ try:
245
+ seasonal_lag = min(12, len(data)//3)
246
+ seasonal_acf = stattools.acf(data, nlags=seasonal_lag, fft=True)
247
+ seasonal_pattern = any(abs(x) > 0.3 for x in seasonal_acf[1:])
248
+ except:
249
+ seasonal_pattern = False
250
+
251
+ # 构建详细的结果文本
252
+ sampling_notice = ""
253
+ if original_length > max_data_points:
254
+ sampling_notice = f"\n⚡ 性能优化:数据量较大({original_length}个观测点),已自动采样至{len(data)}个点进行分析\n"
255
+
256
+ result_text = f"""📊 时间序列分析结果{sampling_notice}
257
+
258
+ 🔍 基本统计信息:
259
+ - 观测数量 = {basic_stats['count']}
260
+ - 均值 = {basic_stats['mean']:.4f}
261
+ - 标准差 = {basic_stats['std']:.4f}
262
+ - 方差 = {basic_stats['variance']:.4f}
263
+ - 最小值 = {basic_stats['min']:.4f}
264
+ - 最大值 = {basic_stats['max']:.4f}
265
+ - 极差 = {basic_stats['range']:.4f}
266
+ - 中位数 = {basic_stats['median']:.4f}
267
+ - 偏度 = {basic_stats['skewness']:.4f}
268
+ - 峰度 = {basic_stats['kurtosis']:.4f}
269
+ - 变异系数 = {basic_stats['cv']:.4f}
270
+
271
+ 📈 平稳性检验:
272
+ - ADF检验统计量 = {adf_result[0]:.4f}
273
+ - ADF检验p值 = {adf_result[1]:.4f}
274
+ - KPSS检验统计量 = {kpss_result[0]:.4f}
275
+ - KPSS检验p值 = {kpss_result[1]:.4f}
276
+ - 平稳性判断 = {'平稳' if adf_result[1] < 0.05 and kpss_result[1] > 0.05 else '非平稳'}
277
+
278
+ 🔬 自相关分析:
279
+ - ACF前5阶: {[f'{x:.4f}' for x in acf_values[:min(5, len(acf_values))]]}
280
+ - PACF前5阶: {[f'{x:.4f}' for x in pacf_values[:min(5, len(pacf_values))]]}
281
+ - 最大自相关: {max(abs(acf_values[1:])) if len(acf_values) > 1 else 0:.4f}
282
+ - 最大偏自相关: {max(abs(pacf_values[1:])) if len(pacf_values) > 1 else 0:.4f}
283
+
284
+ 📊 诊断统计量:
285
+ - 趋势强度: {trend_strength:.4f}
286
+ - 季节性模式: {'存在' if seasonal_pattern else '未检测到'}
287
+ - 数据波动性: {'高' if basic_stats['cv'] > 0.5 else '中等' if basic_stats['cv'] > 0.2 else '低'}
288
+ - 分布形态: {'右偏' if basic_stats['skewness'] > 0.5 else '左偏' if basic_stats['skewness'] < -0.5 else '近似对称'}
289
+ - 峰度类型: {'尖峰' if basic_stats['kurtosis'] > 3 else '低峰' if basic_stats['kurtosis'] < 3 else '正态'}"""
290
+
291
+ # 详细的模型建议
292
+ result_text += f"\n\n💡 详细模型建议:"
293
+
294
+ if adf_result[1] < 0.05: # 平稳序列
295
+ result_text += f"\n- 数据为平稳序列,可直接建模"
296
+
297
+ # 根据ACF/PACF模式给出详细建议
298
+ acf_decay = abs(acf_values[1]) > 0.5
299
+ pacf_cutoff = abs(pacf_values[1]) > 0.5 and all(abs(x) < 0.3 for x in pacf_values[2:5])
300
+
301
+ if acf_decay and pacf_cutoff:
302
+ result_text += f"\n- ACF缓慢衰减,PACF在1阶截尾,建议尝试AR(1)模型"
303
+ result_text += f"\n- 可考虑ARMA(1,1)作为备选模型"
304
+ elif not acf_decay and pacf_cutoff:
305
+ result_text += f"\n- ACF快速衰减,PACF截尾,建议尝试MA模型"
306
+ elif acf_decay and not pacf_cutoff:
307
+ result_text += f"\n- ACF缓慢衰减,PACF无截尾,建议尝试AR模型"
308
+ else:
309
+ result_text += f"\n- ACF和PACF均缓慢衰减,建议尝试ARMA模型"
310
+
311
+ # 根据数据特征给出额外建议
312
+ if seasonal_pattern:
313
+ result_text += f"\n- 检测到季节性模式,可考虑SARIMA模型"
314
+ if trend_strength > 0.7:
315
+ result_text += f"\n- 强趋势模式,可考虑带趋势项的模型"
316
+
317
+ else: # 非平稳序列
318
+ result_text += f"\n- 数据为非平稳序列,建议进行差分处理"
319
+ result_text += f"\n- 可尝试ARIMA(p,d,q)模型,其中d为差分阶数"
320
+
321
+ # 根据趋势强度建议差分阶数
322
+ if trend_strength > 0.8:
323
+ result_text += f"\n- 强趋势,建议尝试1-2阶差分"
324
+ elif trend_strength > 0.5:
325
+ result_text += f"\n- 中等趋势,建议尝试1阶差分"
326
+ else:
327
+ result_text += f"\n- 弱趋势,可尝试1阶差分"
328
+
329
+ if seasonal_pattern:
330
+ result_text += f"\n- 检测到季节性模式,可考虑SARIMA模型"
331
+
332
+ # 根据数据长度给出建议
333
+ if len(data) < 30:
334
+ result_text += f"\n- 数据量较少({len(data)}个观测点),建议谨慎解释结果"
335
+ elif len(data) < 100:
336
+ result_text += f"\n- 数据量适中({len(data)}个观测点),适合大多数时间序列模型"
337
+ else:
338
+ result_text += f"\n- 数据量充足({len(data)}个观测点),可考虑复杂模型"
339
+
340
+ result_text += f"\n\n⚠️ 建模注意事项:"
341
+ result_text += f"\n- 平稳性是时间序列建模的重要前提"
342
+ result_text += f"\n- ACF和PACF模式有助于识别合适的模型阶数"
343
+ result_text += f"\n- 建议结合信息准则(AIC/BIC)进行模型选择"
344
+ result_text += f"\n- 模型诊断:检查残差的自相关性和正态性"
345
+ result_text += f"\n- 模型验证:使用样本外数据进行预测验证"
346
+ result_text += f"\n- 参数稳定性:确保模型参数在整个样本期内稳定"
347
+
187
348
  result_data = {
349
+ "basic_statistics": basic_stats,
188
350
  "adf_statistic": float(adf_result[0]),
189
351
  "adf_pvalue": float(adf_result[1]),
190
- "stationary": bool(adf_result[1] < 0.05),
352
+ "kpss_statistic": float(kpss_result[0]),
353
+ "kpss_pvalue": float(kpss_result[1]),
354
+ "stationary": bool(adf_result[1] < 0.05 and kpss_result[1] > 0.05),
191
355
  "acf": [float(x) for x in acf_values.tolist()],
192
- "pacf": [float(x) for x in pacf_values.tolist()]
356
+ "pacf": [float(x) for x in pacf_values.tolist()],
357
+ "diagnostic_stats": {
358
+ "trend_strength": trend_strength,
359
+ "seasonal_pattern": seasonal_pattern,
360
+ "volatility_level": "high" if basic_stats['cv'] > 0.5 else "medium" if basic_stats['cv'] > 0.2 else "low",
361
+ "distribution_shape": "right_skewed" if basic_stats['skewness'] > 0.5 else "left_skewed" if basic_stats['skewness'] < -0.5 else "symmetric",
362
+ "kurtosis_type": "leptokurtic" if basic_stats['kurtosis'] > 3 else "platykurtic" if basic_stats['kurtosis'] < 3 else "mesokurtic"
363
+ },
364
+ "model_suggestions": {
365
+ "is_stationary": adf_result[1] < 0.05,
366
+ "suggested_models": ["ARMA", "ARIMA"] if adf_result[1] < 0.05 else ["ARIMA", "SARIMA"],
367
+ "data_sufficiency": "low" if len(data) < 30 else "medium" if len(data) < 100 else "high",
368
+ "trend_recommendation": "strong_diff" if trend_strength > 0.8 else "moderate_diff" if trend_strength > 0.5 else "weak_diff",
369
+ "seasonal_recommendation": "consider_seasonal" if seasonal_pattern else "no_seasonal"
370
+ }
193
371
  }
194
372
 
195
373
  return CallToolResult(
196
- content=[
197
- TextContent(
198
- type="text",
199
- text=f"时间序列分析结果:\n"
200
- f"ADF检验统计量 = {result_data['adf_statistic']:.4f}\n"
201
- f"ADF检验p值 = {result_data['adf_pvalue']:.4f}\n"
202
- f"{'平稳' if result_data['stationary'] else '非平稳'}序列\n"
203
- f"ACF前5阶: {result_data['acf'][:5]}\n"
204
- f"PACF前5阶: {result_data['pacf'][:5]}"
205
- )
206
- ],
374
+ content=[TextContent(type="text", text=result_text)],
207
375
  structuredContent=result_data
208
376
  )
209
377
 
@@ -228,35 +396,99 @@ async def handle_correlation_analysis(ctx, data: Dict[str, List[float]],
228
396
 
229
397
 
230
398
  # 面板数据处理器
231
- async def handle_panel_fixed_effects(ctx, y_data, x_data, entity_ids, time_periods,
399
+ async def handle_panel_fixed_effects(ctx, y_data, x_data, entity_ids, time_periods,
232
400
  feature_names=None, entity_effects=True, time_effects=False, **kwargs):
401
+ """处理固定效应模型 - 统一输出格式"""
233
402
  result = fixed_effects_model(y_data, x_data, entity_ids, time_periods, feature_names, entity_effects, time_effects)
403
+
404
+ # 构建详细的结果文本
405
+ result_text = f"""📊 固定效应模型分析结果
406
+
407
+ 🔍 模型拟合信息:
408
+ - R² = {result.rsquared:.4f}
409
+ - 调整R² = {result.rsquared_adj:.4f}
410
+ - F统计量 = {result.f_statistic:.4f} (p = {result.f_pvalue:.4f})
411
+ - AIC = {result.aic:.2f}, BIC = {result.bic:.2f}
412
+ - 观测数量 = {result.n_obs}
413
+ - 个体效应 = {'是' if result.entity_effects else '否'}
414
+ - 时间效应 = {'是' if result.time_effects else '否'}
415
+
416
+ 📈 回归系数详情:"""
417
+
418
+ # 添加系数信息
419
+ for var_name, coef_info in result.coefficients.items():
420
+ significance = "***" if coef_info["p_value"] < 0.01 else "**" if coef_info["p_value"] < 0.05 else "*" if coef_info["p_value"] < 0.1 else ""
421
+ result_text += f"\n- {var_name}: {coef_info['coef']:.4f}{significance} (se={coef_info['std_err']:.4f}, p={coef_info['p_value']:.4f})"
422
+
423
+ result_text += "\n\n💡 模型说明:固定效应模型通过组内变换消除个体固定差异,适用于个体间存在不可观测固定特征的情况。"
424
+
234
425
  return CallToolResult(
235
- content=[TextContent(type="text", text=f"固定效应模型: R²={result.rsquared:.4f}")],
426
+ content=[TextContent(type="text", text=result_text)],
236
427
  structuredContent=result.model_dump()
237
428
  )
238
429
 
239
430
 
240
431
  async def handle_panel_random_effects(ctx, y_data, x_data, entity_ids, time_periods,
241
432
  feature_names=None, entity_effects=True, time_effects=False, **kwargs):
433
+ """处理随机效应模型 - 统一输出格式"""
242
434
  result = random_effects_model(y_data, x_data, entity_ids, time_periods, feature_names, entity_effects, time_effects)
435
+
436
+ # 构建详细的结果文本
437
+ result_text = f"""📊 随机效应模型分析结果
438
+
439
+ 🔍 模型拟合信息:
440
+ - R² = {result.rsquared:.4f}
441
+ - 调整R² = {result.rsquared_adj:.4f}
442
+ - F统计量 = {result.f_statistic:.4f} (p = {result.f_pvalue:.4f})
443
+ - AIC = {result.aic:.2f}, BIC = {result.bic:.2f}
444
+ - 观测数量 = {result.n_obs}
445
+ - 个体效应 = {'是' if result.entity_effects else '否'}
446
+ - 时间效应 = {'是' if result.time_effects else '否'}
447
+
448
+ 📈 回归系数详情:"""
449
+
450
+ # 添加系数信息
451
+ for var_name, coef_info in result.coefficients.items():
452
+ significance = "***" if coef_info["p_value"] < 0.01 else "**" if coef_info["p_value"] < 0.05 else "*" if coef_info["p_value"] < 0.1 else ""
453
+ result_text += f"\n- {var_name}: {coef_info['coef']:.4f}{significance} (se={coef_info['std_err']:.4f}, p={coef_info['p_value']:.4f})"
454
+
455
+ result_text += "\n\n💡 模型说明:随机效应模型假设个体差异是随机的,比固定效应模型更有效率,但需要满足个体效应与解释变量不相关的假设。"
456
+
243
457
  return CallToolResult(
244
- content=[TextContent(type="text", text=f"随机效应模型: R²={result.rsquared:.4f}")],
458
+ content=[TextContent(type="text", text=result_text)],
245
459
  structuredContent=result.model_dump()
246
460
  )
247
461
 
248
462
 
249
463
  async def handle_panel_hausman_test(ctx, y_data, x_data, entity_ids, time_periods, feature_names=None, **kwargs):
464
+ """处理Hausman检验 - 统一输出格式"""
250
465
  result = hausman_test(y_data, x_data, entity_ids, time_periods, feature_names)
466
+
467
+ result_text = f"""📊 Hausman检验结果
468
+
469
+ 🔍 检验信息:
470
+ - 检验统计量 = {result.statistic:.4f}
471
+ - p值 = {result.p_value:.4f}
472
+ - 显著性 = {'是' if result.significant else '否'} (5%水平)
473
+
474
+ 💡 模型选择建议:
475
+ {result.recommendation}
476
+
477
+ 📋 决策规则:
478
+ - p值 < 0.05: 拒绝原假设,选择固定效应模型
479
+ - p值 >= 0.05: 不能拒绝原假设,选择随机效应模型
480
+
481
+ 🔬 检验原理:Hausman检验用于判断个体效应是否与解释变量相关。原假设为随机效应模型是一致的。"""
482
+
251
483
  return CallToolResult(
252
- content=[TextContent(type="text", text=f"Hausman检验: p={result.p_value:.4f}, 建议={result.recommendation}")],
484
+ content=[TextContent(type="text", text=result_text)],
253
485
  structuredContent=result.model_dump()
254
486
  )
255
487
 
256
488
 
257
489
  async def handle_panel_unit_root_test(ctx, **kwargs):
258
490
  """
259
- 处理面板单位根检验
491
+ 处理面板单位根检验 - 统一输出格式
260
492
 
261
493
  panel_unit_root_test函数期望:data, entity_ids, time_periods
262
494
  但panel装饰器会传入:y_data, x_data, entity_ids, time_periods
@@ -280,99 +512,505 @@ async def handle_panel_unit_root_test(ctx, **kwargs):
280
512
 
281
513
  # 只传递panel_unit_root_test需要的参数
282
514
  result = panel_unit_root_test(data, entity_ids, time_periods, test_type)
515
+
516
+ # 构建详细的结果文本
517
+ result_text = f"""📊 面板单位根检验结果
518
+
519
+ 🔍 检验信息:
520
+ - 检验方法 = {test_type.upper()}
521
+ - 个体数量 = {len(set(entity_ids))}
522
+ - 时间期数 = {len(set(time_periods))}
523
+ - 检验统计量 = {result.statistic:.4f}
524
+ - p值 = {result.p_value:.4f}
525
+ - 平稳性 = {'平稳' if result.stationary else '非平稳'} (5%水平)
526
+
527
+ 📈 检验详情:"""
528
+
529
+ # 添加检验详情信息
530
+ if hasattr(result, 'critical_values'):
531
+ result_text += f"\n- 临界值: {result.critical_values}"
532
+ if hasattr(result, 'lags_used'):
533
+ result_text += f"\n- 使用滞后阶数: {result.lags_used}"
534
+ if hasattr(result, 'test_statistic'):
535
+ result_text += f"\n- 检验统计量: {result.test_statistic:.4f}"
536
+
537
+ result_text += f"\n\n💡 检验说明:面板单位根检验用于判断面板数据是否平稳,是面板数据分析的重要前提检验。"
538
+ result_text += f"\n\n⚠️ 注意事项:如果数据非平稳,需要进行差分处理或使用面板协整检验。"
539
+
283
540
  return CallToolResult(
284
- content=[TextContent(type="text", text=f"面板单位根检验: {'平稳' if result.stationary else '非平稳'}")],
541
+ content=[TextContent(type="text", text=result_text)],
285
542
  structuredContent=result.model_dump()
286
543
  )
287
544
 
288
545
 
289
546
  # 时间序列处理器
547
+ @with_timeout(seconds=60)
290
548
  async def handle_var_model(ctx, data, max_lags=5, ic="aic", **kwargs):
291
- result = var_model(data, max_lags=max_lags, ic=ic)
549
+ """处理VAR模型分析 - 统一输出格式"""
550
+ try:
551
+ result = var_model(data, max_lags=max_lags, ic=ic)
552
+ except TimeoutError:
553
+ raise TimeoutError("VAR模型分析超时(60秒),请尝试减少变量数量或滞后阶数")
554
+
555
+ # 构建详细的结果文本
556
+ result_text = f"""📊 VAR模型分析结果
557
+
558
+ 🔍 模型基本信息:
559
+ - 最优滞后阶数 = {result.order}
560
+ - 变量数量 = {len(result.variables) if hasattr(result, 'variables') else '未知'}
561
+ - 信息准则 = {ic.upper()}
562
+ - AIC = {result.aic:.2f}
563
+ - BIC = {getattr(result, 'bic', 'N/A')}
564
+ - HQIC = {getattr(result, 'hqic', 'N/A')}
565
+
566
+ 📈 模型诊断信息:"""
567
+
568
+ # 添加模型诊断信息
569
+ if hasattr(result, 'residuals_normality'):
570
+ result_text += f"\n- 残差正态性检验: {result.residuals_normality}"
571
+ if hasattr(result, 'serial_correlation'):
572
+ result_text += f"\n- 序列相关性检验: {result.serial_correlation}"
573
+ if hasattr(result, 'stability'):
574
+ result_text += f"\n- 模型稳定性: {result.stability}"
575
+
576
+ # 添加变量信息
577
+ if hasattr(result, 'variables'):
578
+ result_text += f"\n\n🔬 分析变量:"
579
+ for var in result.variables:
580
+ result_text += f"\n- {var}"
581
+
582
+ result_text += f"\n\n💡 模型说明:VAR模型用于分析多个时间序列变量间的动态关系,能够捕捉变量间的相互影响和滞后效应。"
583
+ result_text += f"\n\n⚠️ 注意事项:VAR模型假设所有变量都是内生的,适用于分析变量间的动态交互关系。"
584
+
292
585
  return CallToolResult(
293
- content=[TextContent(type="text", text=f"VAR模型: 滞后阶数={result.order}, AIC={result.aic:.2f}")],
586
+ content=[TextContent(type="text", text=result_text)],
294
587
  structuredContent=result.model_dump()
295
588
  )
296
589
 
297
590
 
591
+ @with_timeout(seconds=60)
298
592
  async def handle_vecm_model(ctx, data, coint_rank=1, deterministic="co", max_lags=5, **kwargs):
299
- result = vecm_model(data, coint_rank=coint_rank, deterministic=deterministic, max_lags=max_lags)
593
+ """处理VECM模型分析 - 统一输出格式"""
594
+ try:
595
+ result = vecm_model(data, coint_rank=coint_rank, deterministic=deterministic, max_lags=max_lags)
596
+ except TimeoutError:
597
+ raise TimeoutError("VECM模型分析超时(60秒),请尝试减少变量数量或滞后阶数")
598
+
599
+ # 构建详细的结果文本
600
+ result_text = f"""📊 VECM模型分析结果
601
+
602
+ 🔍 模型基本信息:
603
+ - 协整秩 = {result.coint_rank}
604
+ - 确定性项类型 = {deterministic}
605
+ - 最大滞后阶数 = {max_lags}
606
+ - AIC = {result.aic:.2f}
607
+ - BIC = {getattr(result, 'bic', 'N/A')}
608
+ - HQIC = {getattr(result, 'hqic', 'N/A')}
609
+
610
+ 📈 协整关系分析:"""
611
+
612
+ # 添加协整关系信息
613
+ if hasattr(result, 'coint_relations'):
614
+ result_text += f"\n- 协整关系数量: {len(result.coint_relations)}"
615
+ for i, relation in enumerate(result.coint_relations[:3], 1): # 显示前3个关系
616
+ result_text += f"\n- 关系{i}: {relation}"
617
+ if len(result.coint_relations) > 3:
618
+ result_text += f"\n- ... 还有{len(result.coint_relations) - 3}个协整关系"
619
+
620
+ # 添加误差修正项信息
621
+ if hasattr(result, 'error_correction'):
622
+ result_text += f"\n\n🔧 误差修正机制:"
623
+ result_text += f"\n- 误差修正项显著性: {result.error_correction}"
624
+
625
+ result_text += f"\n\n💡 模型说明:VECM模型用于分析非平稳时间序列的长期均衡关系,包含误差修正机制来反映短期调整过程。"
626
+ result_text += f"\n\n⚠️ 注意事项:VECM模型要求变量间存在协整关系,适用于分析经济变量的长期均衡和短期动态调整。"
627
+
300
628
  return CallToolResult(
301
- content=[TextContent(type="text", text=f"VECM模型: 协整秩={result.coint_rank}, AIC={result.aic:.2f}")],
629
+ content=[TextContent(type="text", text=result_text)],
302
630
  structuredContent=result.model_dump()
303
631
  )
304
632
 
305
633
 
634
+ @with_timeout(seconds=30)
306
635
  async def handle_garch_model(ctx, data, order=(1, 1), dist="normal", **kwargs):
307
- result = garch_model(data, order=order, dist=dist)
636
+ """处理GARCH模型分析 - 统一输出格式"""
637
+ try:
638
+ result = garch_model(data, order=order, dist=dist)
639
+ except TimeoutError:
640
+ raise TimeoutError("GARCH模型分析超时(30秒),请尝试减少数据量或降低模型阶数")
641
+
642
+ # 构建详细的结果文本
643
+ result_text = f"""📊 GARCH模型分析结果
644
+
645
+ 🔍 模型基本信息:
646
+ - GARCH阶数 = ({order[0]}, {order[1]})
647
+ - 误差分布 = {dist}
648
+ - 持久性 = {result.persistence:.4f}
649
+ - AIC = {result.aic:.2f}
650
+ - BIC = {getattr(result, 'bic', 'N/A')}
651
+
652
+ 📈 波动率特征:"""
653
+
654
+ # 添加波动率特征信息
655
+ if hasattr(result, 'volatility_persistence'):
656
+ result_text += f"\n- 波动率持续性: {result.volatility_persistence:.4f}"
657
+ if hasattr(result, 'unconditional_variance'):
658
+ result_text += f"\n- 无条件方差: {result.unconditional_variance:.4f}"
659
+ if hasattr(result, 'leverage_effect'):
660
+ result_text += f"\n- 杠杆效应: {result.leverage_effect}"
661
+
662
+ # 添加模型诊断信息
663
+ if hasattr(result, 'residuals_test'):
664
+ result_text += f"\n\n🔧 模型诊断:"
665
+ result_text += f"\n- 残差检验: {result.residuals_test}"
666
+
667
+ result_text += f"\n\n💡 模型说明:GARCH模型用于分析金融时间序列的波动率聚类现象,能够捕捉条件异方差性。"
668
+ result_text += f"\n\n⚠️ 注意事项:GARCH模型适用于金融数据波动率建模,阶数选择影响模型对波动率持续性的捕捉能力。"
669
+
308
670
  return CallToolResult(
309
- content=[TextContent(type="text", text=f"GARCH模型: 持久性={result.persistence:.4f}")],
671
+ content=[TextContent(type="text", text=result_text)],
310
672
  structuredContent=result.model_dump()
311
673
  )
312
674
 
313
675
 
314
- async def handle_state_space_model(ctx, data, state_dim=1, observation_dim=1,
676
+ @with_timeout(seconds=45)
677
+ async def handle_state_space_model(ctx, data, state_dim=1, observation_dim=1,
315
678
  trend=True, seasonal=False, period=12, **kwargs):
316
- result = state_space_model(data, state_dim, observation_dim, trend, seasonal, period)
679
+ """处理状态空间模型分析 - 统一输出格式"""
680
+ try:
681
+ result = state_space_model(data, state_dim, observation_dim, trend, seasonal, period)
682
+ except TimeoutError:
683
+ raise TimeoutError("状态空间模型分析超时(45秒),请尝试减少状态维度或数据量")
684
+
685
+ # 构建详细的结果文本
686
+ result_text = f"""📊 状态空间模型分析结果
687
+
688
+ 🔍 模型结构信息:
689
+ - 状态维度 = {state_dim}
690
+ - 观测维度 = {observation_dim}
691
+ - 趋势项 = {'包含' if trend else '不包含'}
692
+ - 季节项 = {'包含' if seasonal else '不包含'}
693
+ - 季节周期 = {period if seasonal else 'N/A'}
694
+ - AIC = {result.aic:.2f}
695
+ - BIC = {result.bic:.2f}
696
+ - 对数似然值 = {result.log_likelihood:.2f}
697
+
698
+ 📈 状态分析:"""
699
+
700
+ # 添加状态信息
701
+ if result.state_names:
702
+ result_text += f"\n- 状态变量: {', '.join(result.state_names)}"
703
+ if result.observation_names:
704
+ result_text += f"\n- 观测变量: {', '.join(result.observation_names)}"
705
+
706
+ # 添加状态估计信息
707
+ if result.filtered_state:
708
+ result_text += f"\n- 滤波状态估计: 已计算"
709
+ if result.smoothed_state:
710
+ result_text += f"\n- 平滑状态估计: 已计算"
711
+
712
+ result_text += f"\n\n💡 模型说明:状态空间模型用于分析时间序列的潜在状态和观测关系,能够处理复杂的动态系统,特别适用于具有不可观测状态的时间序列建模。"
713
+ result_text += f"\n\n⚠️ 注意事项:状态空间模型参数估计可能对初始值敏感,建议进行多次初始化尝试以获得稳定结果。"
714
+
317
715
  return CallToolResult(
318
- content=[TextContent(type="text", text=f"状态空间模型: AIC={result.aic:.2f}")],
716
+ content=[TextContent(type="text", text=result_text)],
319
717
  structuredContent=result.model_dump()
320
718
  )
321
719
 
322
720
 
721
+ @with_timeout(seconds=30)
323
722
  async def handle_variance_decomposition(ctx, data, periods=10, max_lags=5, **kwargs):
324
- result = variance_decomposition(data, periods=periods, max_lags=max_lags)
723
+ """处理方差分解分析 - 统一输出格式"""
724
+ try:
725
+ result = variance_decomposition(data, periods=periods, max_lags=max_lags)
726
+ except TimeoutError:
727
+ raise TimeoutError("方差分解分析超时(30秒),请尝试减少分解期数或滞后阶数")
728
+
729
+ # 构建详细的结果文本
730
+ result_text = f"""📊 方差分解分析结果
731
+
732
+ 🔍 分析设置:
733
+ - 分解期数 = {periods}
734
+ - 最大滞后阶数 = {max_lags}
735
+ - 变量数量 = {len(data) if data else '未知'}
736
+
737
+ 📈 方差分解结果:"""
738
+
739
+ # 添加方差分解结果
740
+ if isinstance(result, dict) and "variance_decomposition" in result:
741
+ variance_decomp = result["variance_decomposition"]
742
+ horizon = result.get("horizon", periods)
743
+
744
+ result_text += f"\n- 分析期数: {horizon}期"
745
+
746
+ for var_name, decomposition in variance_decomp.items():
747
+ result_text += f"\n\n🔬 变量 '{var_name}' 的方差来源:"
748
+ if isinstance(decomposition, dict):
749
+ for source, percentages in decomposition.items():
750
+ if isinstance(percentages, list) and len(percentages) > 0:
751
+ # 显示最后一期的贡献度
752
+ final_percentage = percentages[-1] * 100 if isinstance(percentages[-1], (int, float)) else 0
753
+ result_text += f"\n- {source}: {final_percentage:.1f}%"
754
+ else:
755
+ result_text += f"\n- {source}: {percentages:.1f}%"
756
+ else:
757
+ result_text += f"\n- 总方差: {decomposition:.1f}%"
758
+ else:
759
+ result_text += f"\n- 结果格式异常,无法解析方差分解结果"
760
+
761
+ result_text += f"\n\n💡 分析说明:方差分解用于分析多变量系统中各变量对预测误差方差的贡献程度,反映变量间的动态影响关系。"
762
+ result_text += f"\n\n⚠️ 注意事项:方差分解结果依赖于VAR模型的滞后阶数选择,不同期数的分解结果反映短期和长期影响。"
763
+
325
764
  return CallToolResult(
326
- content=[TextContent(type="text", text=f"方差分解: {periods}期")],
765
+ content=[TextContent(type="text", text=result_text)],
327
766
  structuredContent=result
328
767
  )
329
768
 
330
769
 
331
770
  # 机器学习处理器
332
771
  async def handle_random_forest(ctx, y_data, x_data, feature_names=None, n_estimators=100, max_depth=None, **kwargs):
772
+ """处理随机森林回归 - 统一输出格式"""
333
773
  result = random_forest_regression(y_data, x_data, feature_names, n_estimators, max_depth)
774
+
775
+ # 检查R²是否为负值
776
+ r2_warning = ""
777
+ if result.r2_score < 0:
778
+ r2_warning = f"\n⚠️ 警告:R²为负值({result.r2_score:.4f}),表明模型性能比简单均值预测更差。建议:1) 检查数据质量 2) 增加样本数量 3) 调整模型参数"
779
+
780
+ # 构建详细的结果文本
781
+ result_text = f"""📊 随机森林回归分析结果
782
+
783
+ 🔍 模型拟合信息:
784
+ - R² = {result.r2_score:.4f}
785
+ - 均方误差(MSE) = {result.mse:.4f}
786
+ - 平均绝对误差(MAE) = {result.mae:.4f}
787
+ - 样本数量 = {result.n_obs}
788
+ - 树的数量 = {result.n_estimators}
789
+ - 最大深度 = {result.max_depth if result.max_depth else '无限制'}
790
+ - 袋外得分 = {f"{result.oob_score:.4f}" if result.oob_score else '未计算'}
791
+ {r2_warning}
792
+
793
+ 📈 特征重要性(前10个):"""
794
+
795
+ # 添加特征重要性信息,按重要性排序
796
+ if result.feature_importance:
797
+ sorted_features = sorted(result.feature_importance.items(), key=lambda x: x[1], reverse=True)
798
+ for i, (feature, importance) in enumerate(sorted_features[:10]):
799
+ result_text += f"\n- {feature}: {importance:.4f}"
800
+ if len(sorted_features) > 10:
801
+ result_text += f"\n- ... 还有{len(sorted_features) - 10}个特征"
802
+ else:
803
+ result_text += "\n- 特征重要性未计算"
804
+
805
+ result_text += f"\n\n💡 模型说明:随机森林通过构建多个决策树并集成结果,能够处理非线性关系和特征交互,对异常值稳健且不易过拟合。"
806
+ result_text += f"\n\n⚠️ 注意事项:随机森林是黑盒模型,可解释性较差,但预测性能通常较好。"
807
+
334
808
  return CallToolResult(
335
- content=[TextContent(type="text", text=f"随机森林: R²={result.r2_score:.4f}")],
809
+ content=[TextContent(type="text", text=result_text)],
336
810
  structuredContent=result.model_dump()
337
811
  )
338
812
 
339
813
 
340
- async def handle_gradient_boosting(ctx, y_data, x_data, feature_names=None,
814
+ async def handle_gradient_boosting(ctx, y_data, x_data, feature_names=None,
341
815
  n_estimators=100, learning_rate=0.1, max_depth=3, **kwargs):
816
+ """处理梯度提升树回归 - 统一输出格式"""
342
817
  result = gradient_boosting_regression(y_data, x_data, feature_names, n_estimators, learning_rate, max_depth)
818
+
819
+ # 检查R²是否为负值
820
+ r2_warning = ""
821
+ if result.r2_score < 0:
822
+ r2_warning = f"\n⚠️ 警告:R²为负值({result.r2_score:.4f}),表明模型性能比简单均值预测更差。建议:1) 检查数据质量 2) 增加样本数量 3) 调整模型参数"
823
+
824
+ # 构建详细的结果文本
825
+ result_text = f"""📊 梯度提升树回归分析结果
826
+
827
+ 🔍 模型拟合信息:
828
+ - R² = {result.r2_score:.4f}
829
+ - 均方误差(MSE) = {result.mse:.4f}
830
+ - 平均绝对误差(MAE) = {result.mae:.4f}
831
+ - 样本数量 = {result.n_obs}
832
+ - 树的数量 = {result.n_estimators}
833
+ - 学习率 = {result.learning_rate}
834
+ - 最大深度 = {result.max_depth}
835
+ {r2_warning}
836
+
837
+ 📈 特征重要性(前10个):"""
838
+
839
+ # 添加特征重要性信息,按重要性排序
840
+ if result.feature_importance:
841
+ sorted_features = sorted(result.feature_importance.items(), key=lambda x: x[1], reverse=True)
842
+ for i, (feature, importance) in enumerate(sorted_features[:10]):
843
+ result_text += f"\n- {feature}: {importance:.4f}"
844
+ if len(sorted_features) > 10:
845
+ result_text += f"\n- ... 还有{len(sorted_features) - 10}个特征"
846
+ else:
847
+ result_text += "\n- 特征重要性未计算"
848
+
849
+ result_text += f"\n\n💡 模型说明:梯度提升树通过顺序构建决策树,每棵树修正前一棵树的错误,能够处理复杂的非线性关系,通常具有很高的预测精度。"
850
+ result_text += f"\n\n⚠️ 注意事项:梯度提升树对参数敏感,需要仔细调优,训练时间较长但预测性能优秀。"
851
+
343
852
  return CallToolResult(
344
- content=[TextContent(type="text", text=f"梯度提升树: R²={result.r2_score:.4f}")],
853
+ content=[TextContent(type="text", text=result_text)],
345
854
  structuredContent=result.model_dump()
346
855
  )
347
856
 
348
857
 
349
858
  async def handle_lasso_regression(ctx, y_data, x_data, feature_names=None, alpha=1.0, **kwargs):
859
+ """处理Lasso回归 - 统一输出格式"""
350
860
  result = lasso_regression(y_data, x_data, feature_names, alpha)
861
+
862
+ # 检查R²是否为负值
863
+ r2_warning = ""
864
+ if result.r2_score < 0:
865
+ r2_warning = f"\n⚠️ 警告:R²为负值({result.r2_score:.4f}),表明模型性能比简单均值预测更差。建议:1) 检查数据质量 2) 尝试更小的alpha值 3) 增加样本数量"
866
+
867
+ # 检查系数是否全为0
868
+ coef_warning = ""
869
+ if all(abs(coef) < 1e-10 for coef in result.coefficients.values()):
870
+ coef_warning = f"\n⚠️ 警告:所有系数都被压缩为0,正则化参数alpha={alpha}可能过大,建议减小alpha值"
871
+
872
+ # 构建详细的结果文本
873
+ result_text = f"""📊 Lasso回归分析结果
874
+
875
+ 🔍 模型拟合信息:
876
+ - R² = {result.r2_score:.4f}
877
+ - 均方误差(MSE) = {result.mse:.4f}
878
+ - 平均绝对误差(MAE) = {result.mae:.4f}
879
+ - 样本数量 = {result.n_obs}
880
+ - 正则化参数(alpha) = {result.alpha}
881
+ {r2_warning}{coef_warning}
882
+
883
+ 📈 回归系数详情:"""
884
+
885
+ # 添加系数信息,按绝对值排序
886
+ sorted_coefficients = sorted(result.coefficients.items(), key=lambda x: abs(x[1]), reverse=True)
887
+ for var_name, coef in sorted_coefficients:
888
+ if abs(coef) > 1e-10: # 只显示非零系数
889
+ result_text += f"\n- {var_name}: {coef:.4f}"
890
+ else:
891
+ result_text += f"\n- {var_name}: 0.0000 (被压缩)"
892
+
893
+ result_text += f"\n\n💡 模型说明:Lasso回归使用L1正则化进行特征选择,能够自动将不重要的特征系数压缩为0,适用于高维数据和特征选择场景。"
894
+ result_text += f"\n\n⚠️ 注意事项:由于数据标准化,系数大小需要谨慎解释。"
895
+
351
896
  return CallToolResult(
352
- content=[TextContent(type="text", text=f"Lasso回归: R²={result.r2_score:.4f}")],
897
+ content=[TextContent(type="text", text=result_text)],
353
898
  structuredContent=result.model_dump()
354
899
  )
355
900
 
356
901
 
357
902
  async def handle_ridge_regression(ctx, y_data, x_data, feature_names=None, alpha=1.0, **kwargs):
903
+ """处理Ridge回归 - 统一输出格式"""
358
904
  result = ridge_regression(y_data, x_data, feature_names, alpha)
905
+
906
+ # 检查R²是否为负值
907
+ r2_warning = ""
908
+ if result.r2_score < 0:
909
+ r2_warning = f"\n⚠️ 警告:R²为负值({result.r2_score:.4f}),表明模型性能比简单均值预测更差。建议:1) 检查数据质量 2) 尝试更小的alpha值 3) 增加样本数量"
910
+
911
+ # 构建详细的结果文本
912
+ result_text = f"""📊 Ridge回归分析结果
913
+
914
+ 🔍 模型拟合信息:
915
+ - R² = {result.r2_score:.4f}
916
+ - 均方误差(MSE) = {result.mse:.4f}
917
+ - 平均绝对误差(MAE) = {result.mae:.4f}
918
+ - 样本数量 = {result.n_obs}
919
+ - 正则化参数(alpha) = {result.alpha}
920
+ {r2_warning}
921
+
922
+ 📈 回归系数详情:"""
923
+
924
+ # 添加系数信息,按绝对值排序
925
+ sorted_coefficients = sorted(result.coefficients.items(), key=lambda x: abs(x[1]), reverse=True)
926
+ for var_name, coef in sorted_coefficients:
927
+ result_text += f"\n- {var_name}: {coef:.4f}"
928
+
929
+ result_text += f"\n\n💡 模型说明:Ridge回归使用L2正则化处理多重共线性问题,对所有系数进行收缩但不进行特征选择,适用于需要稳定估计的场景。"
930
+ result_text += f"\n\n⚠️ 注意事项:由于数据标准化,系数大小需要谨慎解释。"
931
+
359
932
  return CallToolResult(
360
- content=[TextContent(type="text", text=f"Ridge回归: R²={result.r2_score:.4f}")],
933
+ content=[TextContent(type="text", text=result_text)],
361
934
  structuredContent=result.model_dump()
362
935
  )
363
936
 
364
937
 
365
938
  async def handle_cross_validation(ctx, y_data, x_data, model_type="random_forest", cv_folds=5, scoring="r2", **kwargs):
939
+ """处理交叉验证 - 统一输出格式"""
366
940
  result = cross_validation(y_data, x_data, model_type, cv_folds, scoring)
941
+
942
+ # 构建详细的结果文本
943
+ result_text = f"""📊 交叉验证分析结果
944
+
945
+ 🔍 验证信息:
946
+ - 模型类型 = {result.model_type}
947
+ - 交叉验证折数 = {result.n_splits}
948
+ - 评分指标 = {scoring}
949
+ - 平均得分 = {result.mean_score:.4f}
950
+ - 得分标准差 = {result.std_score:.4f}
951
+ - 变异系数 = {(result.std_score / abs(result.mean_score)) * 100 if result.mean_score != 0 else 0:.2f}%
952
+
953
+ 📈 各折得分详情:"""
954
+
955
+ # 添加各折得分
956
+ for i, score in enumerate(result.cv_scores, 1):
957
+ result_text += f"\n- 第{i}折: {score:.4f}"
958
+
959
+ # 评估模型稳定性
960
+ stability_assessment = ""
961
+ cv_threshold = 0.1 # 10%的变异系数阈值
962
+ cv_value = (result.std_score / abs(result.mean_score)) if result.mean_score != 0 else 0
963
+
964
+ if cv_value < cv_threshold:
965
+ stability_assessment = f"\n\n✅ 模型稳定性:优秀(变异系数{cv_value*100:.2f}% < {cv_threshold*100:.0f}%)"
966
+ elif cv_value < cv_threshold * 2:
967
+ stability_assessment = f"\n\n⚠️ 模型稳定性:一般(变异系数{cv_value*100:.2f}% 在{cv_threshold*100:.0f}%-{cv_threshold*2*100:.0f}%之间)"
968
+ else:
969
+ stability_assessment = f"\n\n❌ 模型稳定性:较差(变异系数{cv_value*100:.2f}% > {cv_threshold*2*100:.0f}%)"
970
+
971
+ result_text += stability_assessment
972
+ result_text += f"\n\n💡 模型说明:交叉验证通过将数据分成多个子集进行训练和测试,评估模型的泛化能力和稳定性。"
973
+ result_text += f"\n\n⚠️ 注意事项:变异系数越小表明模型越稳定,建议选择变异系数小于10%的模型。"
974
+
367
975
  return CallToolResult(
368
- content=[TextContent(type="text", text=f"交叉验证: 平均得分={result.mean_score:.4f}")],
976
+ content=[TextContent(type="text", text=result_text)],
369
977
  structuredContent=result.model_dump()
370
978
  )
371
979
 
372
980
 
373
981
  async def handle_feature_importance(ctx, y_data, x_data, feature_names=None, method="random_forest", top_k=5, **kwargs):
982
+ """处理特征重要性分析 - 统一输出格式"""
374
983
  result = feature_importance_analysis(y_data, x_data, feature_names, method, top_k)
984
+
985
+ # 构建详细的结果文本
986
+ result_text = f"""📊 特征重要性分析结果
987
+
988
+ 🔍 分析信息:
989
+ - 分析方法 = {method}
990
+ - 显示Top特征数量 = {top_k}
991
+ - 总特征数量 = {len(result.feature_importance)}
992
+
993
+ 📈 特征重要性排名:"""
994
+
995
+ # 添加特征重要性信息
996
+ for i, (feature, importance) in enumerate(result.sorted_features[:top_k], 1):
997
+ percentage = (importance / sum(result.feature_importance.values())) * 100 if sum(result.feature_importance.values()) > 0 else 0
998
+ result_text += f"\n{i}. {feature}: {importance:.4f} ({percentage:.1f}%)"
999
+
1000
+ # 添加重要性分布信息
1001
+ if len(result.sorted_features) > 0:
1002
+ top_k_importance = sum(imp for _, imp in result.sorted_features[:top_k])
1003
+ total_importance = sum(result.feature_importance.values())
1004
+ top_k_percentage = (top_k_importance / total_importance) * 100 if total_importance > 0 else 0
1005
+
1006
+ result_text += f"\n\n📊 重要性分布:"
1007
+ result_text += f"\n- Top {top_k}特征累计重要性: {top_k_percentage:.1f}%"
1008
+ result_text += f"\n- 剩余特征重要性: {100 - top_k_percentage:.1f}%"
1009
+
1010
+ result_text += f"\n\n💡 分析说明:特征重要性分析帮助识别对预测目标最重要的变量,可用于特征选择和模型解释。"
1011
+ result_text += f"\n\n⚠️ 注意事项:不同方法计算的特征重要性可能不同,建议结合业务知识进行解释。"
1012
+
375
1013
  return CallToolResult(
376
- content=[TextContent(type="text", text=f"特征重要性: Top特征={result.top_features}")],
1014
+ content=[TextContent(type="text", text=result_text)],
377
1015
  structuredContent=result.model_dump()
378
1016
  )