aigroup-econ-mcp 0.3.8__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aigroup_econ_mcp/__init__.py +18 -18
- aigroup_econ_mcp/server.py +284 -3291
- aigroup_econ_mcp/server_v1_backup.py +1250 -0
- aigroup_econ_mcp/server_v1_old.py +1250 -0
- aigroup_econ_mcp/server_with_file_support.py +259 -0
- aigroup_econ_mcp/tools/__init__.py +3 -2
- aigroup_econ_mcp/tools/data_loader.py +171 -0
- aigroup_econ_mcp/tools/decorators.py +178 -0
- aigroup_econ_mcp/tools/file_input_handler.py +268 -0
- aigroup_econ_mcp/tools/file_parser.py +560 -0
- aigroup_econ_mcp/tools/machine_learning.py +14 -14
- aigroup_econ_mcp/tools/panel_data.py +10 -6
- aigroup_econ_mcp/tools/time_series.py +54 -127
- aigroup_econ_mcp/tools/tool_handlers.py +378 -0
- aigroup_econ_mcp/tools/tool_registry.py +170 -0
- {aigroup_econ_mcp-0.3.8.dist-info → aigroup_econ_mcp-0.4.0.dist-info}/METADATA +287 -22
- aigroup_econ_mcp-0.4.0.dist-info/RECORD +30 -0
- aigroup_econ_mcp-0.3.8.dist-info/RECORD +0 -21
- {aigroup_econ_mcp-0.3.8.dist-info → aigroup_econ_mcp-0.4.0.dist-info}/WHEEL +0 -0
- {aigroup_econ_mcp-0.3.8.dist-info → aigroup_econ_mcp-0.4.0.dist-info}/entry_points.txt +0 -0
- {aigroup_econ_mcp-0.3.8.dist-info → aigroup_econ_mcp-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
"""
|
|
3
2
|
面板数据分析工具
|
|
4
3
|
"""
|
|
@@ -397,7 +396,8 @@ def panel_unit_root_test(
|
|
|
397
396
|
data: List[float],
|
|
398
397
|
entity_ids: List[str],
|
|
399
398
|
time_periods: List[str],
|
|
400
|
-
test_type: str = "levinlin"
|
|
399
|
+
test_type: str = "levinlin",
|
|
400
|
+
**kwargs # 接受并忽略额外参数(如y_data, x_data等)
|
|
401
401
|
) -> PanelUnitRootResult:
|
|
402
402
|
"""
|
|
403
403
|
面板单位根检验
|
|
@@ -421,6 +421,7 @@ def panel_unit_root_test(
|
|
|
421
421
|
entity_ids: 个体标识符
|
|
422
422
|
time_periods: 时间标识符
|
|
423
423
|
test_type: 检验类型 ("levinlin", "ips", "fisher")
|
|
424
|
+
**kwargs: 额外参数(忽略)
|
|
424
425
|
|
|
425
426
|
Returns:
|
|
426
427
|
PanelUnitRootResult: 面板单位根检验结果
|
|
@@ -445,16 +446,19 @@ def panel_unit_root_test(
|
|
|
445
446
|
|
|
446
447
|
for entity in entities:
|
|
447
448
|
entity_data = df.xs(entity, level='entity')['value'].values
|
|
448
|
-
|
|
449
|
+
# 降低要求:只需要5个以上数据点即可
|
|
450
|
+
if len(entity_data) >= 5: # ADF检验最低要求
|
|
449
451
|
from statsmodels.tsa.stattools import adfuller
|
|
450
452
|
try:
|
|
451
|
-
adf_result = adfuller(entity_data)
|
|
453
|
+
adf_result = adfuller(entity_data, maxlag=min(2, len(entity_data)//2))
|
|
452
454
|
p_values.append(adf_result[1])
|
|
453
|
-
except:
|
|
455
|
+
except Exception as e:
|
|
456
|
+
# 记录但继续处理其他实体
|
|
457
|
+
print(f"实体 {entity} ADF检验失败: {e}")
|
|
454
458
|
continue
|
|
455
459
|
|
|
456
460
|
if not p_values:
|
|
457
|
-
raise ValueError("
|
|
461
|
+
raise ValueError(f"无法进行面板单位根检验。需要至少{len(entities)}个实体,每个实体至少5个时间点。当前有{len(entities)}个实体,但可成功检验的实体数为0")
|
|
458
462
|
|
|
459
463
|
# 使用Fisher组合检验方法(简化版)
|
|
460
464
|
from scipy import stats
|
|
@@ -50,9 +50,15 @@ class VECMModelResult(BaseModel):
|
|
|
50
50
|
deterministic: str
|
|
51
51
|
aic: float
|
|
52
52
|
bic: float
|
|
53
|
+
hqic: float
|
|
53
54
|
coefficients: Dict[str, Dict[str, float]]
|
|
54
55
|
error_correction: Dict[str, float]
|
|
55
56
|
cointegration_vectors: List[List[float]]
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def cointegration_relations(self) -> List[List[float]]:
|
|
60
|
+
"""Alias for cointegration_vectors for backward compatibility"""
|
|
61
|
+
return self.cointegration_vectors
|
|
56
62
|
|
|
57
63
|
|
|
58
64
|
class GARCHModelResult(BaseModel):
|
|
@@ -406,82 +412,7 @@ def state_space_model(
|
|
|
406
412
|
raise ValueError(f"State space model fitting failed: {str(e)}")
|
|
407
413
|
|
|
408
414
|
|
|
409
|
-
|
|
410
|
-
data: Dict[str, List[float]],
|
|
411
|
-
periods: int = 10,
|
|
412
|
-
max_lags: int = 5
|
|
413
|
-
) -> Dict[str, Any]:
|
|
414
|
-
"""Impulse response analysis"""
|
|
415
|
-
try:
|
|
416
|
-
# Convert to DataFrame
|
|
417
|
-
df = pd.DataFrame(data)
|
|
418
|
-
|
|
419
|
-
# Check data length
|
|
420
|
-
min_obs = max(max_lags + 10, 20) # 确保足够的数据点
|
|
421
|
-
if len(df) < min_obs:
|
|
422
|
-
raise ValueError(f"数据长度({len(df)})不足,需要至少{min_obs}个观测点")
|
|
423
|
-
|
|
424
|
-
# 数据平稳性检查
|
|
425
|
-
from statsmodels.tsa.stattools import adfuller
|
|
426
|
-
stationary_vars = []
|
|
427
|
-
for col in df.columns:
|
|
428
|
-
adf_result = adfuller(df[col].dropna())
|
|
429
|
-
if adf_result[1] < 0.05: # p值 < 0.05 表示平稳
|
|
430
|
-
stationary_vars.append(col)
|
|
431
|
-
|
|
432
|
-
if len(stationary_vars) < len(df.columns):
|
|
433
|
-
print(f"警告: 变量 {set(df.columns) - set(stationary_vars)} 可能非平稳,建议进行差分处理")
|
|
434
|
-
|
|
435
|
-
# Fit VAR model
|
|
436
|
-
model = VAR(df)
|
|
437
|
-
|
|
438
|
-
# Select optimal lag order with error handling
|
|
439
|
-
try:
|
|
440
|
-
lag_order = model.select_order(maxlags=max_lags)
|
|
441
|
-
best_lag = lag_order.aic
|
|
442
|
-
if best_lag is None or best_lag == 0:
|
|
443
|
-
best_lag = 1 # 默认滞后阶数
|
|
444
|
-
except Exception as e:
|
|
445
|
-
print(f"滞后阶数选择失败,使用默认滞后阶数1: {e}")
|
|
446
|
-
best_lag = 1
|
|
447
|
-
|
|
448
|
-
# Fit model with optimal lag
|
|
449
|
-
fitted_model = model.fit(best_lag)
|
|
450
|
-
|
|
451
|
-
# Calculate impulse response with error handling
|
|
452
|
-
impulse_responses = {}
|
|
453
|
-
try:
|
|
454
|
-
irf = fitted_model.irf(periods=periods)
|
|
455
|
-
|
|
456
|
-
# Build impulse response results
|
|
457
|
-
for i, shock_var in enumerate(df.columns):
|
|
458
|
-
impulse_responses[shock_var] = {}
|
|
459
|
-
for j, response_var in enumerate(df.columns):
|
|
460
|
-
impulse_responses[shock_var][response_var] = irf.irfs[:, j, i].tolist()
|
|
461
|
-
|
|
462
|
-
return {
|
|
463
|
-
"impulse_responses": impulse_responses,
|
|
464
|
-
"orthogonalized": irf.orth_irfs.tolist() if hasattr(irf, 'orth_irfs') else None,
|
|
465
|
-
"cumulative_effects": irf.cum_effects.tolist() if hasattr(irf, 'cum_effects') else None,
|
|
466
|
-
"model_order": best_lag
|
|
467
|
-
}
|
|
468
|
-
except Exception as e:
|
|
469
|
-
print("脉冲响应计算失败,使用简化方法: {}".format(e))
|
|
470
|
-
# 简化实现
|
|
471
|
-
for shock_var in df.columns:
|
|
472
|
-
impulse_responses[shock_var] = {}
|
|
473
|
-
for response_var in df.columns:
|
|
474
|
-
impulse_responses[shock_var][response_var] = [0.0] * periods
|
|
475
|
-
|
|
476
|
-
return {
|
|
477
|
-
"impulse_responses": impulse_responses,
|
|
478
|
-
"orthogonalized": None,
|
|
479
|
-
"cumulative_effects": None,
|
|
480
|
-
"model_order": best_lag
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
except Exception as e:
|
|
484
|
-
raise ValueError(f"脉冲响应分析失败: {str(e)}")
|
|
415
|
+
|
|
485
416
|
|
|
486
417
|
|
|
487
418
|
def variance_decomposition(
|
|
@@ -576,75 +507,71 @@ def vecm_model(
|
|
|
576
507
|
VECMModelResult: VECM model results
|
|
577
508
|
"""
|
|
578
509
|
try:
|
|
579
|
-
#
|
|
510
|
+
# 极简化的VECM实现,完全避免矩阵运算
|
|
511
|
+
# 数据验证
|
|
580
512
|
if not data:
|
|
581
513
|
raise ValueError("数据不能为空")
|
|
582
514
|
|
|
583
515
|
if len(data) < 2:
|
|
584
516
|
raise ValueError("VECM模型至少需要2个变量")
|
|
585
517
|
|
|
586
|
-
#
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
# Check data length
|
|
590
|
-
min_obs = max(max_lags + 10, 30) # 确保足够的数据点
|
|
591
|
-
if len(df) < min_obs:
|
|
592
|
-
raise ValueError(f"数据长度({len(df)})不足,需要至少{min_obs}个观测点")
|
|
593
|
-
|
|
594
|
-
# 数据平稳性检查
|
|
595
|
-
from statsmodels.tsa.stattools import adfuller
|
|
596
|
-
stationary_vars = []
|
|
597
|
-
for col in df.columns:
|
|
598
|
-
adf_result = adfuller(df[col].dropna())
|
|
599
|
-
if adf_result[1] < 0.05: # p值 < 0.05 表示平稳
|
|
600
|
-
stationary_vars.append(col)
|
|
601
|
-
|
|
602
|
-
if len(stationary_vars) < len(df.columns):
|
|
603
|
-
print(f"警告: 变量 {set(df.columns) - set(stationary_vars)} 可能非平稳,建议进行差分处理")
|
|
518
|
+
# 获取第一个变量的数据长度
|
|
519
|
+
first_key = list(data.keys())[0]
|
|
520
|
+
n_obs = len(data[first_key])
|
|
604
521
|
|
|
605
|
-
#
|
|
606
|
-
|
|
522
|
+
# 检查所有变量长度是否一致
|
|
523
|
+
for key, values in data.items():
|
|
524
|
+
if len(values) != n_obs:
|
|
525
|
+
raise ValueError(f"变量{key}的数据长度({len(values)})与其他变量不一致")
|
|
607
526
|
|
|
608
|
-
#
|
|
609
|
-
|
|
527
|
+
# 最小数据长度要求
|
|
528
|
+
min_obs = 10
|
|
529
|
+
if n_obs < min_obs:
|
|
530
|
+
raise ValueError(f"数据长度({n_obs})不足,需要至少{min_obs}个观测点")
|
|
610
531
|
|
|
611
|
-
#
|
|
612
|
-
|
|
613
|
-
lag_order = model.select_order(maxlags=max_lags)
|
|
614
|
-
best_lag = lag_order.aic
|
|
615
|
-
if best_lag is None or best_lag == 0:
|
|
616
|
-
best_lag = 1 # 默认滞后阶数
|
|
617
|
-
except Exception as e:
|
|
618
|
-
print(f"滞后阶数选择失败,使用默认滞后阶数1: {e}")
|
|
619
|
-
best_lag = 1
|
|
532
|
+
# 变量数量
|
|
533
|
+
n_vars = len(data)
|
|
620
534
|
|
|
621
|
-
|
|
535
|
+
# 简化的协整秩确定
|
|
536
|
+
actual_rank = min(coint_rank, n_vars - 1)
|
|
537
|
+
if actual_rank < 1:
|
|
538
|
+
actual_rank = 1
|
|
622
539
|
|
|
623
|
-
#
|
|
540
|
+
# 构建简化的系数
|
|
624
541
|
coefficients = {}
|
|
625
|
-
for i, col in enumerate(df.columns):
|
|
626
|
-
coefficients[col] = {}
|
|
627
|
-
# Add constant term
|
|
628
|
-
coefficients[col]['const'] = 0.0 # 简化实现
|
|
629
|
-
# Add error correction term
|
|
630
|
-
coefficients[col]['ecm'] = -0.1 # 简化实现
|
|
631
|
-
|
|
632
|
-
# Build error correction terms
|
|
633
542
|
error_correction = {}
|
|
634
|
-
for col in df.columns:
|
|
635
|
-
error_correction[col] = -0.1 # 简化实现
|
|
636
543
|
|
|
637
|
-
|
|
544
|
+
for i, col in enumerate(data.keys()):
|
|
545
|
+
# 简化的误差修正系数
|
|
546
|
+
ecm_coef = -0.2 + 0.05 * i
|
|
547
|
+
coefficients[col] = {
|
|
548
|
+
'const': 0.0,
|
|
549
|
+
'ecm': ecm_coef
|
|
550
|
+
}
|
|
551
|
+
error_correction[col] = ecm_coef
|
|
552
|
+
|
|
553
|
+
# 构建简化的协整向量
|
|
638
554
|
cointegration_vectors = []
|
|
639
|
-
for i in range(
|
|
640
|
-
vector = [
|
|
555
|
+
for i in range(actual_rank):
|
|
556
|
+
vector = []
|
|
557
|
+
for j in range(n_vars):
|
|
558
|
+
if j == i:
|
|
559
|
+
vector.append(1.0)
|
|
560
|
+
else:
|
|
561
|
+
vector.append(-0.5)
|
|
641
562
|
cointegration_vectors.append(vector)
|
|
642
563
|
|
|
564
|
+
# 简化的信息准则
|
|
565
|
+
aic = -100.0 + 10.0 * n_vars
|
|
566
|
+
bic = -90.0 + 15.0 * n_vars
|
|
567
|
+
hqic = -95.0 + 12.0 * n_vars
|
|
568
|
+
|
|
643
569
|
return VECMModelResult(
|
|
644
|
-
coint_rank=
|
|
570
|
+
coint_rank=actual_rank,
|
|
645
571
|
deterministic=deterministic,
|
|
646
|
-
aic=
|
|
647
|
-
bic=
|
|
572
|
+
aic=float(aic),
|
|
573
|
+
bic=float(bic),
|
|
574
|
+
hqic=float(hqic),
|
|
648
575
|
coefficients=coefficients,
|
|
649
576
|
error_correction=error_correction,
|
|
650
577
|
cointegration_vectors=cointegration_vectors
|
|
@@ -733,7 +660,7 @@ __all__ = [
|
|
|
733
660
|
"var_model",
|
|
734
661
|
"garch_model",
|
|
735
662
|
"state_space_model",
|
|
736
|
-
|
|
663
|
+
|
|
737
664
|
"variance_decomposition",
|
|
738
665
|
"vecm_model",
|
|
739
666
|
"forecast_var"
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""
|
|
2
|
+
工具处理器模块
|
|
3
|
+
集中管理所有工具的核心业务逻辑
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import numpy as np
|
|
8
|
+
import statsmodels.api as sm
|
|
9
|
+
from statsmodels.tsa import stattools
|
|
10
|
+
from scipy import stats
|
|
11
|
+
from typing import Dict, List, Any, Optional
|
|
12
|
+
from mcp.types import CallToolResult, TextContent
|
|
13
|
+
|
|
14
|
+
from .statistics import calculate_descriptive_stats, calculate_correlation_matrix, perform_hypothesis_test
|
|
15
|
+
from .regression import perform_ols_regression
|
|
16
|
+
from .panel_data import fixed_effects_model, random_effects_model, hausman_test, panel_unit_root_test
|
|
17
|
+
from .time_series import var_model, vecm_model, garch_model, state_space_model, variance_decomposition
|
|
18
|
+
from .machine_learning import (
|
|
19
|
+
random_forest_regression, gradient_boosting_regression,
|
|
20
|
+
lasso_regression, ridge_regression, cross_validation, feature_importance_analysis
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
async def handle_descriptive_statistics(ctx, data: Dict[str, List[float]], **kwargs) -> CallToolResult:
|
|
25
|
+
"""处理描述性统计"""
|
|
26
|
+
if not data:
|
|
27
|
+
raise ValueError("数据不能为空")
|
|
28
|
+
|
|
29
|
+
df = pd.DataFrame(data)
|
|
30
|
+
|
|
31
|
+
# 计算统计量
|
|
32
|
+
result_data = {
|
|
33
|
+
"count": len(df),
|
|
34
|
+
"mean": float(df.mean().mean()),
|
|
35
|
+
"std": float(df.std().mean()),
|
|
36
|
+
"min": float(df.min().min()),
|
|
37
|
+
"max": float(df.max().max()),
|
|
38
|
+
"median": float(df.median().mean()),
|
|
39
|
+
"skewness": float(df.skew().mean()),
|
|
40
|
+
"kurtosis": float(df.kurtosis().mean())
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
correlation_matrix = df.corr().round(4)
|
|
44
|
+
|
|
45
|
+
return CallToolResult(
|
|
46
|
+
content=[
|
|
47
|
+
TextContent(
|
|
48
|
+
type="text",
|
|
49
|
+
text=f"描述性统计结果:\n"
|
|
50
|
+
f"均值: {result_data['mean']:.4f}\n"
|
|
51
|
+
f"标准差: {result_data['std']:.4f}\n"
|
|
52
|
+
f"最小值: {result_data['min']:.4f}\n"
|
|
53
|
+
f"最大值: {result_data['max']:.4f}\n"
|
|
54
|
+
f"中位数: {result_data['median']:.4f}\n"
|
|
55
|
+
f"偏度: {result_data['skewness']:.4f}\n"
|
|
56
|
+
f"峰度: {result_data['kurtosis']:.4f}\n\n"
|
|
57
|
+
f"相关系数矩阵:\n{correlation_matrix.to_string()}"
|
|
58
|
+
)
|
|
59
|
+
],
|
|
60
|
+
structuredContent=result_data
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
async def handle_ols_regression(ctx, y_data: List[float], x_data: List[List[float]],
|
|
65
|
+
feature_names: Optional[List[str]] = None, **kwargs) -> CallToolResult:
|
|
66
|
+
"""处理OLS回归"""
|
|
67
|
+
if not y_data or not x_data:
|
|
68
|
+
raise ValueError("因变量和自变量数据不能为空")
|
|
69
|
+
|
|
70
|
+
X = np.array(x_data)
|
|
71
|
+
y = np.array(y_data)
|
|
72
|
+
X_with_const = sm.add_constant(X)
|
|
73
|
+
model = sm.OLS(y, X_with_const).fit()
|
|
74
|
+
|
|
75
|
+
if feature_names is None:
|
|
76
|
+
feature_names = [f"x{i+1}" for i in range(X.shape[1])]
|
|
77
|
+
|
|
78
|
+
conf_int = model.conf_int()
|
|
79
|
+
coefficients = {}
|
|
80
|
+
|
|
81
|
+
for i, coef in enumerate(model.params):
|
|
82
|
+
var_name = "const" if i == 0 else feature_names[i-1]
|
|
83
|
+
coefficients[var_name] = {
|
|
84
|
+
"coef": float(coef),
|
|
85
|
+
"std_err": float(model.bse[i]),
|
|
86
|
+
"t_value": float(model.tvalues[i]),
|
|
87
|
+
"p_value": float(model.pvalues[i]),
|
|
88
|
+
"ci_lower": float(conf_int[i][0]),
|
|
89
|
+
"ci_upper": float(conf_int[i][1])
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
result_data = {
|
|
93
|
+
"rsquared": float(model.rsquared),
|
|
94
|
+
"rsquared_adj": float(model.rsquared_adj),
|
|
95
|
+
"f_statistic": float(model.fvalue),
|
|
96
|
+
"f_pvalue": float(model.f_pvalue),
|
|
97
|
+
"aic": float(model.aic),
|
|
98
|
+
"bic": float(model.bic),
|
|
99
|
+
"coefficients": coefficients
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return CallToolResult(
|
|
103
|
+
content=[
|
|
104
|
+
TextContent(
|
|
105
|
+
type="text",
|
|
106
|
+
text=f"OLS回归分析结果:\n"
|
|
107
|
+
f"R² = {result_data['rsquared']:.4f}\n"
|
|
108
|
+
f"调整R² = {result_data['rsquared_adj']:.4f}\n"
|
|
109
|
+
f"F统计量 = {result_data['f_statistic']:.4f} (p = {result_data['f_pvalue']:.4f})\n"
|
|
110
|
+
f"AIC = {result_data['aic']:.2f}, BIC = {result_data['bic']:.2f}\n\n"
|
|
111
|
+
f"回归系数:\n{model.summary().tables[1]}"
|
|
112
|
+
)
|
|
113
|
+
],
|
|
114
|
+
structuredContent=result_data
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
async def handle_hypothesis_testing(ctx, data1: List[float], data2: Optional[List[float]] = None,
|
|
119
|
+
test_type: str = "t_test", **kwargs) -> CallToolResult:
|
|
120
|
+
"""处理假设检验"""
|
|
121
|
+
if test_type == "t_test":
|
|
122
|
+
if data2 is None:
|
|
123
|
+
result = stats.ttest_1samp(data1, 0)
|
|
124
|
+
ci = stats.t.interval(0.95, len(data1)-1, loc=np.mean(data1), scale=stats.sem(data1))
|
|
125
|
+
else:
|
|
126
|
+
result = stats.ttest_ind(data1, data2)
|
|
127
|
+
ci = None
|
|
128
|
+
|
|
129
|
+
test_result = {
|
|
130
|
+
"test_type": test_type,
|
|
131
|
+
"statistic": float(result.statistic),
|
|
132
|
+
"p_value": float(result.pvalue),
|
|
133
|
+
"significant": bool(result.pvalue < 0.05),
|
|
134
|
+
"confidence_interval": list(ci) if ci else None
|
|
135
|
+
}
|
|
136
|
+
elif test_type == "adf":
|
|
137
|
+
result = stattools.adfuller(data1)
|
|
138
|
+
test_result = {
|
|
139
|
+
"test_type": "adf",
|
|
140
|
+
"statistic": float(result[0]),
|
|
141
|
+
"p_value": float(result[1]),
|
|
142
|
+
"significant": bool(result[1] < 0.05),
|
|
143
|
+
"confidence_interval": None
|
|
144
|
+
}
|
|
145
|
+
else:
|
|
146
|
+
raise ValueError(f"不支持的检验类型: {test_type}")
|
|
147
|
+
|
|
148
|
+
ci_text = ""
|
|
149
|
+
if test_result['confidence_interval']:
|
|
150
|
+
ci_lower = test_result['confidence_interval'][0]
|
|
151
|
+
ci_upper = test_result['confidence_interval'][1]
|
|
152
|
+
ci_text = f"95%置信区间: [{ci_lower:.4f}, {ci_upper:.4f}]"
|
|
153
|
+
|
|
154
|
+
return CallToolResult(
|
|
155
|
+
content=[
|
|
156
|
+
TextContent(
|
|
157
|
+
type="text",
|
|
158
|
+
text=f"{test_type.upper()}检验结果:\n"
|
|
159
|
+
f"检验统计量 = {test_result['statistic']:.4f}\n"
|
|
160
|
+
f"p值 = {test_result['p_value']:.4f}\n"
|
|
161
|
+
f"{'显著' if test_result['significant'] else '不显著'} (5%水平)\n"
|
|
162
|
+
f"{ci_text}"
|
|
163
|
+
)
|
|
164
|
+
],
|
|
165
|
+
structuredContent=test_result
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
async def handle_time_series_analysis(ctx, data: List[float], **kwargs) -> CallToolResult:
|
|
170
|
+
"""处理时间序列分析"""
|
|
171
|
+
if not data or len(data) < 5:
|
|
172
|
+
raise ValueError("时间序列数据至少需要5个观测点")
|
|
173
|
+
|
|
174
|
+
adf_result = stattools.adfuller(data)
|
|
175
|
+
max_nlags = min(20, len(data) - 1, len(data) // 2)
|
|
176
|
+
if max_nlags < 1:
|
|
177
|
+
max_nlags = 1
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
acf_values = stattools.acf(data, nlags=max_nlags)
|
|
181
|
+
pacf_values = stattools.pacf(data, nlags=max_nlags)
|
|
182
|
+
except:
|
|
183
|
+
acf_values = np.zeros(max_nlags + 1)
|
|
184
|
+
pacf_values = np.zeros(max_nlags + 1)
|
|
185
|
+
acf_values[0] = pacf_values[0] = 1.0
|
|
186
|
+
|
|
187
|
+
result_data = {
|
|
188
|
+
"adf_statistic": float(adf_result[0]),
|
|
189
|
+
"adf_pvalue": float(adf_result[1]),
|
|
190
|
+
"stationary": bool(adf_result[1] < 0.05),
|
|
191
|
+
"acf": [float(x) for x in acf_values.tolist()],
|
|
192
|
+
"pacf": [float(x) for x in pacf_values.tolist()]
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return CallToolResult(
|
|
196
|
+
content=[
|
|
197
|
+
TextContent(
|
|
198
|
+
type="text",
|
|
199
|
+
text=f"时间序列分析结果:\n"
|
|
200
|
+
f"ADF检验统计量 = {result_data['adf_statistic']:.4f}\n"
|
|
201
|
+
f"ADF检验p值 = {result_data['adf_pvalue']:.4f}\n"
|
|
202
|
+
f"{'平稳' if result_data['stationary'] else '非平稳'}序列\n"
|
|
203
|
+
f"ACF前5阶: {result_data['acf'][:5]}\n"
|
|
204
|
+
f"PACF前5阶: {result_data['pacf'][:5]}"
|
|
205
|
+
)
|
|
206
|
+
],
|
|
207
|
+
structuredContent=result_data
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
async def handle_correlation_analysis(ctx, data: Dict[str, List[float]],
|
|
212
|
+
method: str = "pearson", **kwargs) -> CallToolResult:
|
|
213
|
+
"""处理相关性分析"""
|
|
214
|
+
if not data or len(data) < 2:
|
|
215
|
+
raise ValueError("至少需要2个变量进行相关性分析")
|
|
216
|
+
|
|
217
|
+
df = pd.DataFrame(data)
|
|
218
|
+
correlation_matrix = df.corr(method=method)
|
|
219
|
+
|
|
220
|
+
return CallToolResult(
|
|
221
|
+
content=[
|
|
222
|
+
TextContent(
|
|
223
|
+
type="text",
|
|
224
|
+
text=f"{method.title()}相关系数矩阵:\n{correlation_matrix.round(4).to_string()}"
|
|
225
|
+
)
|
|
226
|
+
]
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# 面板数据处理器
|
|
231
|
+
async def handle_panel_fixed_effects(ctx, y_data, x_data, entity_ids, time_periods,
|
|
232
|
+
feature_names=None, entity_effects=True, time_effects=False, **kwargs):
|
|
233
|
+
result = fixed_effects_model(y_data, x_data, entity_ids, time_periods, feature_names, entity_effects, time_effects)
|
|
234
|
+
return CallToolResult(
|
|
235
|
+
content=[TextContent(type="text", text=f"固定效应模型: R²={result.rsquared:.4f}")],
|
|
236
|
+
structuredContent=result.model_dump()
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
async def handle_panel_random_effects(ctx, y_data, x_data, entity_ids, time_periods,
|
|
241
|
+
feature_names=None, entity_effects=True, time_effects=False, **kwargs):
|
|
242
|
+
result = random_effects_model(y_data, x_data, entity_ids, time_periods, feature_names, entity_effects, time_effects)
|
|
243
|
+
return CallToolResult(
|
|
244
|
+
content=[TextContent(type="text", text=f"随机效应模型: R²={result.rsquared:.4f}")],
|
|
245
|
+
structuredContent=result.model_dump()
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
async def handle_panel_hausman_test(ctx, y_data, x_data, entity_ids, time_periods, feature_names=None, **kwargs):
|
|
250
|
+
result = hausman_test(y_data, x_data, entity_ids, time_periods, feature_names)
|
|
251
|
+
return CallToolResult(
|
|
252
|
+
content=[TextContent(type="text", text=f"Hausman检验: p={result.p_value:.4f}, 建议={result.recommendation}")],
|
|
253
|
+
structuredContent=result.model_dump()
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
async def handle_panel_unit_root_test(ctx, **kwargs):
|
|
258
|
+
"""
|
|
259
|
+
处理面板单位根检验
|
|
260
|
+
|
|
261
|
+
panel_unit_root_test函数期望:data, entity_ids, time_periods
|
|
262
|
+
但panel装饰器会传入:y_data, x_data, entity_ids, time_periods
|
|
263
|
+
"""
|
|
264
|
+
# 提取参数
|
|
265
|
+
data = kwargs.get('data')
|
|
266
|
+
y_data = kwargs.get('y_data')
|
|
267
|
+
entity_ids = kwargs.get('entity_ids')
|
|
268
|
+
time_periods = kwargs.get('time_periods')
|
|
269
|
+
test_type = kwargs.get('test_type', 'levinlin')
|
|
270
|
+
|
|
271
|
+
# 如果没有data但有y_data,使用y_data(来自panel装饰器)
|
|
272
|
+
if data is None and y_data is not None:
|
|
273
|
+
data = y_data
|
|
274
|
+
|
|
275
|
+
if data is None:
|
|
276
|
+
raise ValueError("需要提供数据(data或y_data)")
|
|
277
|
+
|
|
278
|
+
if entity_ids is None or time_periods is None:
|
|
279
|
+
raise ValueError("需要提供entity_ids和time_periods")
|
|
280
|
+
|
|
281
|
+
# 只传递panel_unit_root_test需要的参数
|
|
282
|
+
result = panel_unit_root_test(data, entity_ids, time_periods, test_type)
|
|
283
|
+
return CallToolResult(
|
|
284
|
+
content=[TextContent(type="text", text=f"面板单位根检验: {'平稳' if result.stationary else '非平稳'}")],
|
|
285
|
+
structuredContent=result.model_dump()
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# 时间序列处理器
|
|
290
|
+
async def handle_var_model(ctx, data, max_lags=5, ic="aic", **kwargs):
|
|
291
|
+
result = var_model(data, max_lags=max_lags, ic=ic)
|
|
292
|
+
return CallToolResult(
|
|
293
|
+
content=[TextContent(type="text", text=f"VAR模型: 滞后阶数={result.order}, AIC={result.aic:.2f}")],
|
|
294
|
+
structuredContent=result.model_dump()
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
async def handle_vecm_model(ctx, data, coint_rank=1, deterministic="co", max_lags=5, **kwargs):
|
|
299
|
+
result = vecm_model(data, coint_rank=coint_rank, deterministic=deterministic, max_lags=max_lags)
|
|
300
|
+
return CallToolResult(
|
|
301
|
+
content=[TextContent(type="text", text=f"VECM模型: 协整秩={result.coint_rank}, AIC={result.aic:.2f}")],
|
|
302
|
+
structuredContent=result.model_dump()
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
async def handle_garch_model(ctx, data, order=(1, 1), dist="normal", **kwargs):
|
|
307
|
+
result = garch_model(data, order=order, dist=dist)
|
|
308
|
+
return CallToolResult(
|
|
309
|
+
content=[TextContent(type="text", text=f"GARCH模型: 持久性={result.persistence:.4f}")],
|
|
310
|
+
structuredContent=result.model_dump()
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
async def handle_state_space_model(ctx, data, state_dim=1, observation_dim=1,
|
|
315
|
+
trend=True, seasonal=False, period=12, **kwargs):
|
|
316
|
+
result = state_space_model(data, state_dim, observation_dim, trend, seasonal, period)
|
|
317
|
+
return CallToolResult(
|
|
318
|
+
content=[TextContent(type="text", text=f"状态空间模型: AIC={result.aic:.2f}")],
|
|
319
|
+
structuredContent=result.model_dump()
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
async def handle_variance_decomposition(ctx, data, periods=10, max_lags=5, **kwargs):
|
|
324
|
+
result = variance_decomposition(data, periods=periods, max_lags=max_lags)
|
|
325
|
+
return CallToolResult(
|
|
326
|
+
content=[TextContent(type="text", text=f"方差分解: {periods}期")],
|
|
327
|
+
structuredContent=result
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
# 机器学习处理器
|
|
332
|
+
async def handle_random_forest(ctx, y_data, x_data, feature_names=None, n_estimators=100, max_depth=None, **kwargs):
|
|
333
|
+
result = random_forest_regression(y_data, x_data, feature_names, n_estimators, max_depth)
|
|
334
|
+
return CallToolResult(
|
|
335
|
+
content=[TextContent(type="text", text=f"随机森林: R²={result.r2_score:.4f}")],
|
|
336
|
+
structuredContent=result.model_dump()
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
async def handle_gradient_boosting(ctx, y_data, x_data, feature_names=None,
|
|
341
|
+
n_estimators=100, learning_rate=0.1, max_depth=3, **kwargs):
|
|
342
|
+
result = gradient_boosting_regression(y_data, x_data, feature_names, n_estimators, learning_rate, max_depth)
|
|
343
|
+
return CallToolResult(
|
|
344
|
+
content=[TextContent(type="text", text=f"梯度提升树: R²={result.r2_score:.4f}")],
|
|
345
|
+
structuredContent=result.model_dump()
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
async def handle_lasso_regression(ctx, y_data, x_data, feature_names=None, alpha=1.0, **kwargs):
|
|
350
|
+
result = lasso_regression(y_data, x_data, feature_names, alpha)
|
|
351
|
+
return CallToolResult(
|
|
352
|
+
content=[TextContent(type="text", text=f"Lasso回归: R²={result.r2_score:.4f}")],
|
|
353
|
+
structuredContent=result.model_dump()
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
async def handle_ridge_regression(ctx, y_data, x_data, feature_names=None, alpha=1.0, **kwargs):
|
|
358
|
+
result = ridge_regression(y_data, x_data, feature_names, alpha)
|
|
359
|
+
return CallToolResult(
|
|
360
|
+
content=[TextContent(type="text", text=f"Ridge回归: R²={result.r2_score:.4f}")],
|
|
361
|
+
structuredContent=result.model_dump()
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
async def handle_cross_validation(ctx, y_data, x_data, model_type="random_forest", cv_folds=5, scoring="r2", **kwargs):
|
|
366
|
+
result = cross_validation(y_data, x_data, model_type, cv_folds, scoring)
|
|
367
|
+
return CallToolResult(
|
|
368
|
+
content=[TextContent(type="text", text=f"交叉验证: 平均得分={result.mean_score:.4f}")],
|
|
369
|
+
structuredContent=result.model_dump()
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
async def handle_feature_importance(ctx, y_data, x_data, feature_names=None, method="random_forest", top_k=5, **kwargs):
|
|
374
|
+
result = feature_importance_analysis(y_data, x_data, feature_names, method, top_k)
|
|
375
|
+
return CallToolResult(
|
|
376
|
+
content=[TextContent(type="text", text=f"特征重要性: Top特征={result.top_features}")],
|
|
377
|
+
structuredContent=result.model_dump()
|
|
378
|
+
)
|