aigroup-econ-mcp 0.4.2__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .gitignore +253 -0
- PKG-INFO +710 -0
- README.md +672 -0
- __init__.py +14 -0
- aigroup_econ_mcp-1.4.3.dist-info/METADATA +710 -0
- aigroup_econ_mcp-1.4.3.dist-info/RECORD +92 -0
- aigroup_econ_mcp-1.4.3.dist-info/entry_points.txt +2 -0
- aigroup_econ_mcp-1.4.3.dist-info/licenses/LICENSE +21 -0
- cli.py +28 -0
- econometrics/README.md +18 -0
- econometrics/__init__.py +191 -0
- econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +0 -0
- econometrics/basic_parametric_estimation/__init__.py +31 -0
- econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
- econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
- econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
- econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
- econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
- econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
- econometrics/causal_inference/causal_identification_strategy/__init__.py +0 -0
- econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
- econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
- econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
- econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
- econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
- econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
- econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
- econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
- econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
- econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
- econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
- econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
- econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
- econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
- econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +0 -0
- econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
- econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
- econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
- econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
- econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
- econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
- econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
- econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
- econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
- econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
- econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
- econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
- econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
- econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
- econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
- econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
- econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
- econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
- econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
- econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
- econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
- econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
- econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
- econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
- econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
- econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
- econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
- econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
- econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
- prompts/__init__.py +0 -0
- prompts/analysis_guides.py +43 -0
- pyproject.toml +78 -0
- resources/MCP_MASTER_GUIDE.md +422 -0
- resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
- resources/__init__.py +0 -0
- server.py +83 -0
- tools/README.md +88 -0
- tools/__init__.py +45 -0
- tools/data_loader.py +213 -0
- tools/decorators.py +38 -0
- tools/econometrics_adapter.py +286 -0
- tools/mcp_tool_groups/__init__.py +1 -0
- tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
- tools/mcp_tool_groups/model_specification_tools.py +402 -0
- tools/mcp_tool_groups/time_series_tools.py +494 -0
- tools/mcp_tools_registry.py +114 -0
- tools/model_specification_adapter.py +369 -0
- tools/output_formatter.py +563 -0
- tools/time_series_panel_data_adapter.py +858 -0
- tools/time_series_panel_data_tools.py +65 -0
- aigroup_econ_mcp/__init__.py +0 -19
- aigroup_econ_mcp/cli.py +0 -82
- aigroup_econ_mcp/config.py +0 -561
- aigroup_econ_mcp/server.py +0 -452
- aigroup_econ_mcp/tools/__init__.py +0 -18
- aigroup_econ_mcp/tools/base.py +0 -470
- aigroup_econ_mcp/tools/cache.py +0 -533
- aigroup_econ_mcp/tools/data_loader.py +0 -171
- aigroup_econ_mcp/tools/file_parser.py +0 -829
- aigroup_econ_mcp/tools/machine_learning.py +0 -60
- aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
- aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
- aigroup_econ_mcp/tools/ml_models.py +0 -54
- aigroup_econ_mcp/tools/ml_regularization.py +0 -172
- aigroup_econ_mcp/tools/monitoring.py +0 -555
- aigroup_econ_mcp/tools/optimized_example.py +0 -229
- aigroup_econ_mcp/tools/panel_data.py +0 -553
- aigroup_econ_mcp/tools/regression.py +0 -214
- aigroup_econ_mcp/tools/statistics.py +0 -154
- aigroup_econ_mcp/tools/time_series.py +0 -667
- aigroup_econ_mcp/tools/timeout.py +0 -283
- aigroup_econ_mcp/tools/tool_handlers.py +0 -378
- aigroup_econ_mcp/tools/tool_registry.py +0 -170
- aigroup_econ_mcp/tools/validation.py +0 -482
- aigroup_econ_mcp-0.4.2.dist-info/METADATA +0 -360
- aigroup_econ_mcp-0.4.2.dist-info/RECORD +0 -29
- aigroup_econ_mcp-0.4.2.dist-info/entry_points.txt +0 -2
- /aigroup_econ_mcp-0.4.2.dist-info/licenses/LICENSE → /LICENSE +0 -0
- {aigroup_econ_mcp-0.4.2.dist-info → aigroup_econ_mcp-1.4.3.dist-info}/WHEEL +0 -0
|
@@ -1,283 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
超时控制模块
|
|
3
|
-
为复杂计算任务提供超时控制和资源管理
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import asyncio
|
|
7
|
-
import signal
|
|
8
|
-
import threading
|
|
9
|
-
import time
|
|
10
|
-
from typing import Any, Callable, Optional, TypeVar, Union
|
|
11
|
-
from functools import wraps
|
|
12
|
-
from contextlib import contextmanager
|
|
13
|
-
import warnings
|
|
14
|
-
|
|
15
|
-
T = TypeVar('T')
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class TimeoutError(Exception):
|
|
19
|
-
"""超时错误"""
|
|
20
|
-
pass
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class TimeoutManager:
|
|
24
|
-
"""
|
|
25
|
-
超时管理器
|
|
26
|
-
提供同步和异步的超时控制
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
def __init__(self):
|
|
30
|
-
self._timeout_config = {}
|
|
31
|
-
|
|
32
|
-
def set_timeout_config(self, config: dict) -> None:
|
|
33
|
-
"""设置超时配置"""
|
|
34
|
-
self._timeout_config = config
|
|
35
|
-
|
|
36
|
-
def get_timeout_for_function(self, function_name: str, default: int = 60) -> int:
|
|
37
|
-
"""获取函数的超时时间"""
|
|
38
|
-
# 从配置中获取超时时间
|
|
39
|
-
if function_name in self._timeout_config:
|
|
40
|
-
return self._timeout_config[function_name]
|
|
41
|
-
|
|
42
|
-
# 根据函数类型返回默认超时
|
|
43
|
-
if function_name.startswith(('descriptive_', 'correlation_')):
|
|
44
|
-
return 30
|
|
45
|
-
elif function_name.startswith(('ols_', 'hypothesis_')):
|
|
46
|
-
return 60
|
|
47
|
-
elif function_name.startswith(('time_series_', 'panel_')):
|
|
48
|
-
return 120
|
|
49
|
-
elif function_name.startswith(('var_', 'vecm_', 'garch_')):
|
|
50
|
-
return 180
|
|
51
|
-
elif function_name.startswith(('random_forest_', 'gradient_boosting_')):
|
|
52
|
-
return 300
|
|
53
|
-
else:
|
|
54
|
-
return default
|
|
55
|
-
|
|
56
|
-
async def execute_with_timeout(self, model_name: str, timeout_seconds: int, func: callable, *args, **kwargs):
|
|
57
|
-
"""使用超时执行函数"""
|
|
58
|
-
try:
|
|
59
|
-
if asyncio.iscoroutinefunction(func):
|
|
60
|
-
# 异步函数
|
|
61
|
-
return await asyncio.wait_for(
|
|
62
|
-
func(*args, **kwargs),
|
|
63
|
-
timeout=timeout_seconds
|
|
64
|
-
)
|
|
65
|
-
else:
|
|
66
|
-
# 同步函数 - 在线程池中执行
|
|
67
|
-
loop = asyncio.get_event_loop()
|
|
68
|
-
return await loop.run_in_executor(
|
|
69
|
-
None,
|
|
70
|
-
lambda: self._execute_sync_with_timeout(func, timeout_seconds, *args, **kwargs)
|
|
71
|
-
)
|
|
72
|
-
except asyncio.TimeoutError:
|
|
73
|
-
raise TimeoutError(f"模型 '{model_name}' 执行超时 ({timeout_seconds}秒)")
|
|
74
|
-
|
|
75
|
-
def _execute_sync_with_timeout(self, func: callable, timeout_seconds: int, *args, **kwargs):
|
|
76
|
-
"""同步函数超时执行"""
|
|
77
|
-
import threading
|
|
78
|
-
import queue
|
|
79
|
-
|
|
80
|
-
result_queue = queue.Queue()
|
|
81
|
-
exception_queue = queue.Queue()
|
|
82
|
-
|
|
83
|
-
def worker():
|
|
84
|
-
try:
|
|
85
|
-
result = func(*args, **kwargs)
|
|
86
|
-
result_queue.put(result)
|
|
87
|
-
except Exception as e:
|
|
88
|
-
exception_queue.put(e)
|
|
89
|
-
|
|
90
|
-
thread = threading.Thread(target=worker)
|
|
91
|
-
thread.daemon = True
|
|
92
|
-
thread.start()
|
|
93
|
-
thread.join(timeout_seconds)
|
|
94
|
-
|
|
95
|
-
if thread.is_alive():
|
|
96
|
-
raise TimeoutError(f"同步函数执行超时 ({timeout_seconds}秒)")
|
|
97
|
-
|
|
98
|
-
if not exception_queue.empty():
|
|
99
|
-
raise exception_queue.get()
|
|
100
|
-
|
|
101
|
-
return result_queue.get()
|
|
102
|
-
|
|
103
|
-
@contextmanager
|
|
104
|
-
def timeout_context(self, seconds: int):
|
|
105
|
-
"""
|
|
106
|
-
同步超时上下文管理器
|
|
107
|
-
|
|
108
|
-
Args:
|
|
109
|
-
seconds: 超时时间(秒)
|
|
110
|
-
"""
|
|
111
|
-
def timeout_handler(signum, frame):
|
|
112
|
-
raise TimeoutError(f"操作超时 ({seconds}秒)")
|
|
113
|
-
|
|
114
|
-
# 设置信号处理(仅适用于Unix系统)
|
|
115
|
-
original_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
|
116
|
-
signal.alarm(seconds)
|
|
117
|
-
|
|
118
|
-
try:
|
|
119
|
-
yield
|
|
120
|
-
finally:
|
|
121
|
-
# 取消警报
|
|
122
|
-
signal.alarm(0)
|
|
123
|
-
signal.signal(signal.SIGALRM, original_handler)
|
|
124
|
-
|
|
125
|
-
async def async_timeout_context(self, seconds: int):
|
|
126
|
-
"""
|
|
127
|
-
异步超时上下文管理器
|
|
128
|
-
|
|
129
|
-
Args:
|
|
130
|
-
seconds: 超时时间(秒)
|
|
131
|
-
"""
|
|
132
|
-
try:
|
|
133
|
-
await asyncio.wait_for(asyncio.sleep(0), timeout=seconds)
|
|
134
|
-
except asyncio.TimeoutError:
|
|
135
|
-
raise TimeoutError(f"异步操作超时 ({seconds}秒)")
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
def timeout(seconds: int = 60):
|
|
139
|
-
"""
|
|
140
|
-
同步函数超时装饰器
|
|
141
|
-
|
|
142
|
-
Args:
|
|
143
|
-
seconds: 超时时间(秒)
|
|
144
|
-
"""
|
|
145
|
-
def decorator(func):
|
|
146
|
-
@wraps(func)
|
|
147
|
-
def wrapper(*args, **kwargs):
|
|
148
|
-
manager = TimeoutManager()
|
|
149
|
-
|
|
150
|
-
# 在Windows上使用线程实现超时
|
|
151
|
-
if hasattr(signal, 'SIGALRM'):
|
|
152
|
-
# Unix系统使用信号
|
|
153
|
-
with manager.timeout_context(seconds):
|
|
154
|
-
return func(*args, **kwargs)
|
|
155
|
-
else:
|
|
156
|
-
# Windows系统使用线程
|
|
157
|
-
result = [None]
|
|
158
|
-
exception = [None]
|
|
159
|
-
|
|
160
|
-
def target():
|
|
161
|
-
try:
|
|
162
|
-
result[0] = func(*args, **kwargs)
|
|
163
|
-
except Exception as e:
|
|
164
|
-
exception[0] = e
|
|
165
|
-
|
|
166
|
-
thread = threading.Thread(target=target)
|
|
167
|
-
thread.daemon = True
|
|
168
|
-
thread.start()
|
|
169
|
-
thread.join(seconds)
|
|
170
|
-
|
|
171
|
-
if thread.is_alive():
|
|
172
|
-
raise TimeoutError(f"函数 {func.__name__} 执行超时 ({seconds}秒)")
|
|
173
|
-
|
|
174
|
-
if exception[0]:
|
|
175
|
-
raise exception[0]
|
|
176
|
-
|
|
177
|
-
return result[0]
|
|
178
|
-
|
|
179
|
-
return wrapper
|
|
180
|
-
return decorator
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
def async_timeout(seconds: int = 60):
|
|
184
|
-
"""
|
|
185
|
-
异步函数超时装饰器
|
|
186
|
-
|
|
187
|
-
Args:
|
|
188
|
-
seconds: 超时时间(秒)
|
|
189
|
-
"""
|
|
190
|
-
def decorator(func):
|
|
191
|
-
@wraps(func)
|
|
192
|
-
async def wrapper(*args, **kwargs):
|
|
193
|
-
try:
|
|
194
|
-
return await asyncio.wait_for(
|
|
195
|
-
func(*args, **kwargs),
|
|
196
|
-
timeout=seconds
|
|
197
|
-
)
|
|
198
|
-
except asyncio.TimeoutError:
|
|
199
|
-
raise TimeoutError(f"异步函数 {func.__name__} 执行超时 ({seconds}秒)")
|
|
200
|
-
|
|
201
|
-
return wrapper
|
|
202
|
-
return decorator
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
class ResourceMonitor:
|
|
206
|
-
"""
|
|
207
|
-
资源监控器
|
|
208
|
-
监控内存和CPU使用情况
|
|
209
|
-
"""
|
|
210
|
-
|
|
211
|
-
def __init__(self):
|
|
212
|
-
self._start_time = None
|
|
213
|
-
self._peak_memory = 0
|
|
214
|
-
|
|
215
|
-
@contextmanager
|
|
216
|
-
def monitor_resources(self):
|
|
217
|
-
"""监控资源使用的上下文管理器"""
|
|
218
|
-
import psutil
|
|
219
|
-
import os
|
|
220
|
-
|
|
221
|
-
process = psutil.Process(os.getpid())
|
|
222
|
-
self._start_time = time.time()
|
|
223
|
-
initial_memory = process.memory_info().rss
|
|
224
|
-
|
|
225
|
-
try:
|
|
226
|
-
yield
|
|
227
|
-
finally:
|
|
228
|
-
current_memory = process.memory_info().rss
|
|
229
|
-
memory_increase = (current_memory - initial_memory) / 1024 / 1024 # MB
|
|
230
|
-
|
|
231
|
-
execution_time = time.time() - self._start_time
|
|
232
|
-
|
|
233
|
-
# 记录资源使用情况
|
|
234
|
-
if memory_increase > 100: # 超过100MB
|
|
235
|
-
warnings.warn(
|
|
236
|
-
f"高内存使用警告: 内存增加 {memory_increase:.2f}MB, "
|
|
237
|
-
f"执行时间: {execution_time:.2f}秒"
|
|
238
|
-
)
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
# 全局超时管理器实例
|
|
242
|
-
global_timeout_manager = TimeoutManager()
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
# 便捷装饰器
|
|
246
|
-
def with_timeout(seconds: int = 60):
|
|
247
|
-
"""便捷超时装饰器,自动选择同步或异步"""
|
|
248
|
-
def decorator(func):
|
|
249
|
-
if asyncio.iscoroutinefunction(func):
|
|
250
|
-
return async_timeout(seconds)(func)
|
|
251
|
-
else:
|
|
252
|
-
return timeout(seconds)(func)
|
|
253
|
-
return decorator
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def econometric_timeout(function_name: str = None):
|
|
257
|
-
"""
|
|
258
|
-
计量经济学专用超时装饰器
|
|
259
|
-
根据函数类型自动设置合适的超时时间
|
|
260
|
-
"""
|
|
261
|
-
def decorator(func):
|
|
262
|
-
name = function_name or func.__name__
|
|
263
|
-
timeout_seconds = global_timeout_manager.get_timeout_for_function(name)
|
|
264
|
-
|
|
265
|
-
if asyncio.iscoroutinefunction(func):
|
|
266
|
-
return async_timeout(timeout_seconds)(func)
|
|
267
|
-
else:
|
|
268
|
-
return timeout(timeout_seconds)(func)
|
|
269
|
-
|
|
270
|
-
return decorator
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
# 导出主要类和函数
|
|
274
|
-
__all__ = [
|
|
275
|
-
"TimeoutError",
|
|
276
|
-
"TimeoutManager",
|
|
277
|
-
"timeout",
|
|
278
|
-
"async_timeout",
|
|
279
|
-
"with_timeout",
|
|
280
|
-
"econometric_timeout",
|
|
281
|
-
"ResourceMonitor",
|
|
282
|
-
"global_timeout_manager"
|
|
283
|
-
]
|
|
@@ -1,378 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
工具处理器模块
|
|
3
|
-
集中管理所有工具的核心业务逻辑
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import pandas as pd
|
|
7
|
-
import numpy as np
|
|
8
|
-
import statsmodels.api as sm
|
|
9
|
-
from statsmodels.tsa import stattools
|
|
10
|
-
from scipy import stats
|
|
11
|
-
from typing import Dict, List, Any, Optional
|
|
12
|
-
from mcp.types import CallToolResult, TextContent
|
|
13
|
-
|
|
14
|
-
from .statistics import calculate_descriptive_stats, calculate_correlation_matrix, perform_hypothesis_test
|
|
15
|
-
from .regression import perform_ols_regression
|
|
16
|
-
from .panel_data import fixed_effects_model, random_effects_model, hausman_test, panel_unit_root_test
|
|
17
|
-
from .time_series import var_model, vecm_model, garch_model, state_space_model, variance_decomposition
|
|
18
|
-
from .machine_learning import (
|
|
19
|
-
random_forest_regression, gradient_boosting_regression,
|
|
20
|
-
lasso_regression, ridge_regression, cross_validation, feature_importance_analysis
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
async def handle_descriptive_statistics(ctx, data: Dict[str, List[float]], **kwargs) -> CallToolResult:
|
|
25
|
-
"""处理描述性统计"""
|
|
26
|
-
if not data:
|
|
27
|
-
raise ValueError("数据不能为空")
|
|
28
|
-
|
|
29
|
-
df = pd.DataFrame(data)
|
|
30
|
-
|
|
31
|
-
# 计算统计量
|
|
32
|
-
result_data = {
|
|
33
|
-
"count": len(df),
|
|
34
|
-
"mean": float(df.mean().mean()),
|
|
35
|
-
"std": float(df.std().mean()),
|
|
36
|
-
"min": float(df.min().min()),
|
|
37
|
-
"max": float(df.max().max()),
|
|
38
|
-
"median": float(df.median().mean()),
|
|
39
|
-
"skewness": float(df.skew().mean()),
|
|
40
|
-
"kurtosis": float(df.kurtosis().mean())
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
correlation_matrix = df.corr().round(4)
|
|
44
|
-
|
|
45
|
-
return CallToolResult(
|
|
46
|
-
content=[
|
|
47
|
-
TextContent(
|
|
48
|
-
type="text",
|
|
49
|
-
text=f"描述性统计结果:\n"
|
|
50
|
-
f"均值: {result_data['mean']:.4f}\n"
|
|
51
|
-
f"标准差: {result_data['std']:.4f}\n"
|
|
52
|
-
f"最小值: {result_data['min']:.4f}\n"
|
|
53
|
-
f"最大值: {result_data['max']:.4f}\n"
|
|
54
|
-
f"中位数: {result_data['median']:.4f}\n"
|
|
55
|
-
f"偏度: {result_data['skewness']:.4f}\n"
|
|
56
|
-
f"峰度: {result_data['kurtosis']:.4f}\n\n"
|
|
57
|
-
f"相关系数矩阵:\n{correlation_matrix.to_string()}"
|
|
58
|
-
)
|
|
59
|
-
],
|
|
60
|
-
structuredContent=result_data
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
async def handle_ols_regression(ctx, y_data: List[float], x_data: List[List[float]],
|
|
65
|
-
feature_names: Optional[List[str]] = None, **kwargs) -> CallToolResult:
|
|
66
|
-
"""处理OLS回归"""
|
|
67
|
-
if not y_data or not x_data:
|
|
68
|
-
raise ValueError("因变量和自变量数据不能为空")
|
|
69
|
-
|
|
70
|
-
X = np.array(x_data)
|
|
71
|
-
y = np.array(y_data)
|
|
72
|
-
X_with_const = sm.add_constant(X)
|
|
73
|
-
model = sm.OLS(y, X_with_const).fit()
|
|
74
|
-
|
|
75
|
-
if feature_names is None:
|
|
76
|
-
feature_names = [f"x{i+1}" for i in range(X.shape[1])]
|
|
77
|
-
|
|
78
|
-
conf_int = model.conf_int()
|
|
79
|
-
coefficients = {}
|
|
80
|
-
|
|
81
|
-
for i, coef in enumerate(model.params):
|
|
82
|
-
var_name = "const" if i == 0 else feature_names[i-1]
|
|
83
|
-
coefficients[var_name] = {
|
|
84
|
-
"coef": float(coef),
|
|
85
|
-
"std_err": float(model.bse[i]),
|
|
86
|
-
"t_value": float(model.tvalues[i]),
|
|
87
|
-
"p_value": float(model.pvalues[i]),
|
|
88
|
-
"ci_lower": float(conf_int[i][0]),
|
|
89
|
-
"ci_upper": float(conf_int[i][1])
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
result_data = {
|
|
93
|
-
"rsquared": float(model.rsquared),
|
|
94
|
-
"rsquared_adj": float(model.rsquared_adj),
|
|
95
|
-
"f_statistic": float(model.fvalue),
|
|
96
|
-
"f_pvalue": float(model.f_pvalue),
|
|
97
|
-
"aic": float(model.aic),
|
|
98
|
-
"bic": float(model.bic),
|
|
99
|
-
"coefficients": coefficients
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
return CallToolResult(
|
|
103
|
-
content=[
|
|
104
|
-
TextContent(
|
|
105
|
-
type="text",
|
|
106
|
-
text=f"OLS回归分析结果:\n"
|
|
107
|
-
f"R² = {result_data['rsquared']:.4f}\n"
|
|
108
|
-
f"调整R² = {result_data['rsquared_adj']:.4f}\n"
|
|
109
|
-
f"F统计量 = {result_data['f_statistic']:.4f} (p = {result_data['f_pvalue']:.4f})\n"
|
|
110
|
-
f"AIC = {result_data['aic']:.2f}, BIC = {result_data['bic']:.2f}\n\n"
|
|
111
|
-
f"回归系数:\n{model.summary().tables[1]}"
|
|
112
|
-
)
|
|
113
|
-
],
|
|
114
|
-
structuredContent=result_data
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
async def handle_hypothesis_testing(ctx, data1: List[float], data2: Optional[List[float]] = None,
|
|
119
|
-
test_type: str = "t_test", **kwargs) -> CallToolResult:
|
|
120
|
-
"""处理假设检验"""
|
|
121
|
-
if test_type == "t_test":
|
|
122
|
-
if data2 is None:
|
|
123
|
-
result = stats.ttest_1samp(data1, 0)
|
|
124
|
-
ci = stats.t.interval(0.95, len(data1)-1, loc=np.mean(data1), scale=stats.sem(data1))
|
|
125
|
-
else:
|
|
126
|
-
result = stats.ttest_ind(data1, data2)
|
|
127
|
-
ci = None
|
|
128
|
-
|
|
129
|
-
test_result = {
|
|
130
|
-
"test_type": test_type,
|
|
131
|
-
"statistic": float(result.statistic),
|
|
132
|
-
"p_value": float(result.pvalue),
|
|
133
|
-
"significant": bool(result.pvalue < 0.05),
|
|
134
|
-
"confidence_interval": list(ci) if ci else None
|
|
135
|
-
}
|
|
136
|
-
elif test_type == "adf":
|
|
137
|
-
result = stattools.adfuller(data1)
|
|
138
|
-
test_result = {
|
|
139
|
-
"test_type": "adf",
|
|
140
|
-
"statistic": float(result[0]),
|
|
141
|
-
"p_value": float(result[1]),
|
|
142
|
-
"significant": bool(result[1] < 0.05),
|
|
143
|
-
"confidence_interval": None
|
|
144
|
-
}
|
|
145
|
-
else:
|
|
146
|
-
raise ValueError(f"不支持的检验类型: {test_type}")
|
|
147
|
-
|
|
148
|
-
ci_text = ""
|
|
149
|
-
if test_result['confidence_interval']:
|
|
150
|
-
ci_lower = test_result['confidence_interval'][0]
|
|
151
|
-
ci_upper = test_result['confidence_interval'][1]
|
|
152
|
-
ci_text = f"95%置信区间: [{ci_lower:.4f}, {ci_upper:.4f}]"
|
|
153
|
-
|
|
154
|
-
return CallToolResult(
|
|
155
|
-
content=[
|
|
156
|
-
TextContent(
|
|
157
|
-
type="text",
|
|
158
|
-
text=f"{test_type.upper()}检验结果:\n"
|
|
159
|
-
f"检验统计量 = {test_result['statistic']:.4f}\n"
|
|
160
|
-
f"p值 = {test_result['p_value']:.4f}\n"
|
|
161
|
-
f"{'显著' if test_result['significant'] else '不显著'} (5%水平)\n"
|
|
162
|
-
f"{ci_text}"
|
|
163
|
-
)
|
|
164
|
-
],
|
|
165
|
-
structuredContent=test_result
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
async def handle_time_series_analysis(ctx, data: List[float], **kwargs) -> CallToolResult:
|
|
170
|
-
"""处理时间序列分析"""
|
|
171
|
-
if not data or len(data) < 5:
|
|
172
|
-
raise ValueError("时间序列数据至少需要5个观测点")
|
|
173
|
-
|
|
174
|
-
adf_result = stattools.adfuller(data)
|
|
175
|
-
max_nlags = min(20, len(data) - 1, len(data) // 2)
|
|
176
|
-
if max_nlags < 1:
|
|
177
|
-
max_nlags = 1
|
|
178
|
-
|
|
179
|
-
try:
|
|
180
|
-
acf_values = stattools.acf(data, nlags=max_nlags)
|
|
181
|
-
pacf_values = stattools.pacf(data, nlags=max_nlags)
|
|
182
|
-
except:
|
|
183
|
-
acf_values = np.zeros(max_nlags + 1)
|
|
184
|
-
pacf_values = np.zeros(max_nlags + 1)
|
|
185
|
-
acf_values[0] = pacf_values[0] = 1.0
|
|
186
|
-
|
|
187
|
-
result_data = {
|
|
188
|
-
"adf_statistic": float(adf_result[0]),
|
|
189
|
-
"adf_pvalue": float(adf_result[1]),
|
|
190
|
-
"stationary": bool(adf_result[1] < 0.05),
|
|
191
|
-
"acf": [float(x) for x in acf_values.tolist()],
|
|
192
|
-
"pacf": [float(x) for x in pacf_values.tolist()]
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
return CallToolResult(
|
|
196
|
-
content=[
|
|
197
|
-
TextContent(
|
|
198
|
-
type="text",
|
|
199
|
-
text=f"时间序列分析结果:\n"
|
|
200
|
-
f"ADF检验统计量 = {result_data['adf_statistic']:.4f}\n"
|
|
201
|
-
f"ADF检验p值 = {result_data['adf_pvalue']:.4f}\n"
|
|
202
|
-
f"{'平稳' if result_data['stationary'] else '非平稳'}序列\n"
|
|
203
|
-
f"ACF前5阶: {result_data['acf'][:5]}\n"
|
|
204
|
-
f"PACF前5阶: {result_data['pacf'][:5]}"
|
|
205
|
-
)
|
|
206
|
-
],
|
|
207
|
-
structuredContent=result_data
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
async def handle_correlation_analysis(ctx, data: Dict[str, List[float]],
|
|
212
|
-
method: str = "pearson", **kwargs) -> CallToolResult:
|
|
213
|
-
"""处理相关性分析"""
|
|
214
|
-
if not data or len(data) < 2:
|
|
215
|
-
raise ValueError("至少需要2个变量进行相关性分析")
|
|
216
|
-
|
|
217
|
-
df = pd.DataFrame(data)
|
|
218
|
-
correlation_matrix = df.corr(method=method)
|
|
219
|
-
|
|
220
|
-
return CallToolResult(
|
|
221
|
-
content=[
|
|
222
|
-
TextContent(
|
|
223
|
-
type="text",
|
|
224
|
-
text=f"{method.title()}相关系数矩阵:\n{correlation_matrix.round(4).to_string()}"
|
|
225
|
-
)
|
|
226
|
-
]
|
|
227
|
-
)
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
# 面板数据处理器
|
|
231
|
-
async def handle_panel_fixed_effects(ctx, y_data, x_data, entity_ids, time_periods,
|
|
232
|
-
feature_names=None, entity_effects=True, time_effects=False, **kwargs):
|
|
233
|
-
result = fixed_effects_model(y_data, x_data, entity_ids, time_periods, feature_names, entity_effects, time_effects)
|
|
234
|
-
return CallToolResult(
|
|
235
|
-
content=[TextContent(type="text", text=f"固定效应模型: R²={result.rsquared:.4f}")],
|
|
236
|
-
structuredContent=result.model_dump()
|
|
237
|
-
)
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
async def handle_panel_random_effects(ctx, y_data, x_data, entity_ids, time_periods,
|
|
241
|
-
feature_names=None, entity_effects=True, time_effects=False, **kwargs):
|
|
242
|
-
result = random_effects_model(y_data, x_data, entity_ids, time_periods, feature_names, entity_effects, time_effects)
|
|
243
|
-
return CallToolResult(
|
|
244
|
-
content=[TextContent(type="text", text=f"随机效应模型: R²={result.rsquared:.4f}")],
|
|
245
|
-
structuredContent=result.model_dump()
|
|
246
|
-
)
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
async def handle_panel_hausman_test(ctx, y_data, x_data, entity_ids, time_periods, feature_names=None, **kwargs):
|
|
250
|
-
result = hausman_test(y_data, x_data, entity_ids, time_periods, feature_names)
|
|
251
|
-
return CallToolResult(
|
|
252
|
-
content=[TextContent(type="text", text=f"Hausman检验: p={result.p_value:.4f}, 建议={result.recommendation}")],
|
|
253
|
-
structuredContent=result.model_dump()
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
async def handle_panel_unit_root_test(ctx, **kwargs):
|
|
258
|
-
"""
|
|
259
|
-
处理面板单位根检验
|
|
260
|
-
|
|
261
|
-
panel_unit_root_test函数期望:data, entity_ids, time_periods
|
|
262
|
-
但panel装饰器会传入:y_data, x_data, entity_ids, time_periods
|
|
263
|
-
"""
|
|
264
|
-
# 提取参数
|
|
265
|
-
data = kwargs.get('data')
|
|
266
|
-
y_data = kwargs.get('y_data')
|
|
267
|
-
entity_ids = kwargs.get('entity_ids')
|
|
268
|
-
time_periods = kwargs.get('time_periods')
|
|
269
|
-
test_type = kwargs.get('test_type', 'levinlin')
|
|
270
|
-
|
|
271
|
-
# 如果没有data但有y_data,使用y_data(来自panel装饰器)
|
|
272
|
-
if data is None and y_data is not None:
|
|
273
|
-
data = y_data
|
|
274
|
-
|
|
275
|
-
if data is None:
|
|
276
|
-
raise ValueError("需要提供数据(data或y_data)")
|
|
277
|
-
|
|
278
|
-
if entity_ids is None or time_periods is None:
|
|
279
|
-
raise ValueError("需要提供entity_ids和time_periods")
|
|
280
|
-
|
|
281
|
-
# 只传递panel_unit_root_test需要的参数
|
|
282
|
-
result = panel_unit_root_test(data, entity_ids, time_periods, test_type)
|
|
283
|
-
return CallToolResult(
|
|
284
|
-
content=[TextContent(type="text", text=f"面板单位根检验: {'平稳' if result.stationary else '非平稳'}")],
|
|
285
|
-
structuredContent=result.model_dump()
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
# 时间序列处理器
|
|
290
|
-
async def handle_var_model(ctx, data, max_lags=5, ic="aic", **kwargs):
|
|
291
|
-
result = var_model(data, max_lags=max_lags, ic=ic)
|
|
292
|
-
return CallToolResult(
|
|
293
|
-
content=[TextContent(type="text", text=f"VAR模型: 滞后阶数={result.order}, AIC={result.aic:.2f}")],
|
|
294
|
-
structuredContent=result.model_dump()
|
|
295
|
-
)
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
async def handle_vecm_model(ctx, data, coint_rank=1, deterministic="co", max_lags=5, **kwargs):
|
|
299
|
-
result = vecm_model(data, coint_rank=coint_rank, deterministic=deterministic, max_lags=max_lags)
|
|
300
|
-
return CallToolResult(
|
|
301
|
-
content=[TextContent(type="text", text=f"VECM模型: 协整秩={result.coint_rank}, AIC={result.aic:.2f}")],
|
|
302
|
-
structuredContent=result.model_dump()
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
async def handle_garch_model(ctx, data, order=(1, 1), dist="normal", **kwargs):
|
|
307
|
-
result = garch_model(data, order=order, dist=dist)
|
|
308
|
-
return CallToolResult(
|
|
309
|
-
content=[TextContent(type="text", text=f"GARCH模型: 持久性={result.persistence:.4f}")],
|
|
310
|
-
structuredContent=result.model_dump()
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
async def handle_state_space_model(ctx, data, state_dim=1, observation_dim=1,
|
|
315
|
-
trend=True, seasonal=False, period=12, **kwargs):
|
|
316
|
-
result = state_space_model(data, state_dim, observation_dim, trend, seasonal, period)
|
|
317
|
-
return CallToolResult(
|
|
318
|
-
content=[TextContent(type="text", text=f"状态空间模型: AIC={result.aic:.2f}")],
|
|
319
|
-
structuredContent=result.model_dump()
|
|
320
|
-
)
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
async def handle_variance_decomposition(ctx, data, periods=10, max_lags=5, **kwargs):
|
|
324
|
-
result = variance_decomposition(data, periods=periods, max_lags=max_lags)
|
|
325
|
-
return CallToolResult(
|
|
326
|
-
content=[TextContent(type="text", text=f"方差分解: {periods}期")],
|
|
327
|
-
structuredContent=result
|
|
328
|
-
)
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
# 机器学习处理器
|
|
332
|
-
async def handle_random_forest(ctx, y_data, x_data, feature_names=None, n_estimators=100, max_depth=None, **kwargs):
|
|
333
|
-
result = random_forest_regression(y_data, x_data, feature_names, n_estimators, max_depth)
|
|
334
|
-
return CallToolResult(
|
|
335
|
-
content=[TextContent(type="text", text=f"随机森林: R²={result.r2_score:.4f}")],
|
|
336
|
-
structuredContent=result.model_dump()
|
|
337
|
-
)
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
async def handle_gradient_boosting(ctx, y_data, x_data, feature_names=None,
|
|
341
|
-
n_estimators=100, learning_rate=0.1, max_depth=3, **kwargs):
|
|
342
|
-
result = gradient_boosting_regression(y_data, x_data, feature_names, n_estimators, learning_rate, max_depth)
|
|
343
|
-
return CallToolResult(
|
|
344
|
-
content=[TextContent(type="text", text=f"梯度提升树: R²={result.r2_score:.4f}")],
|
|
345
|
-
structuredContent=result.model_dump()
|
|
346
|
-
)
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
async def handle_lasso_regression(ctx, y_data, x_data, feature_names=None, alpha=1.0, **kwargs):
|
|
350
|
-
result = lasso_regression(y_data, x_data, feature_names, alpha)
|
|
351
|
-
return CallToolResult(
|
|
352
|
-
content=[TextContent(type="text", text=f"Lasso回归: R²={result.r2_score:.4f}")],
|
|
353
|
-
structuredContent=result.model_dump()
|
|
354
|
-
)
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
async def handle_ridge_regression(ctx, y_data, x_data, feature_names=None, alpha=1.0, **kwargs):
|
|
358
|
-
result = ridge_regression(y_data, x_data, feature_names, alpha)
|
|
359
|
-
return CallToolResult(
|
|
360
|
-
content=[TextContent(type="text", text=f"Ridge回归: R²={result.r2_score:.4f}")],
|
|
361
|
-
structuredContent=result.model_dump()
|
|
362
|
-
)
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
async def handle_cross_validation(ctx, y_data, x_data, model_type="random_forest", cv_folds=5, scoring="r2", **kwargs):
|
|
366
|
-
result = cross_validation(y_data, x_data, model_type, cv_folds, scoring)
|
|
367
|
-
return CallToolResult(
|
|
368
|
-
content=[TextContent(type="text", text=f"交叉验证: 平均得分={result.mean_score:.4f}")],
|
|
369
|
-
structuredContent=result.model_dump()
|
|
370
|
-
)
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
async def handle_feature_importance(ctx, y_data, x_data, feature_names=None, method="random_forest", top_k=5, **kwargs):
|
|
374
|
-
result = feature_importance_analysis(y_data, x_data, feature_names, method, top_k)
|
|
375
|
-
return CallToolResult(
|
|
376
|
-
content=[TextContent(type="text", text=f"特征重要性: Top特征={result.top_features}")],
|
|
377
|
-
structuredContent=result.model_dump()
|
|
378
|
-
)
|