aigroup-econ-mcp 0.4.2__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +710 -0
  3. README.md +672 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-1.4.3.dist-info/METADATA +710 -0
  6. aigroup_econ_mcp-1.4.3.dist-info/RECORD +92 -0
  7. aigroup_econ_mcp-1.4.3.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-1.4.3.dist-info/licenses/LICENSE +21 -0
  9. cli.py +28 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +0 -0
  13. econometrics/basic_parametric_estimation/__init__.py +31 -0
  14. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  15. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  16. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  17. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  18. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  19. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  20. econometrics/causal_inference/causal_identification_strategy/__init__.py +0 -0
  21. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  22. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  23. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  24. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  25. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  26. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  27. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  28. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  29. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  30. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  31. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  32. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  33. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  34. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  35. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  36. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  37. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  38. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  39. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  40. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +0 -0
  41. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  42. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  43. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  44. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  45. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  46. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  47. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  48. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  49. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  50. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  51. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  52. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  53. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  54. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  55. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  56. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  57. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  58. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  59. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  60. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  61. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  62. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  63. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  64. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  65. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  66. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  67. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  68. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  69. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  70. prompts/__init__.py +0 -0
  71. prompts/analysis_guides.py +43 -0
  72. pyproject.toml +78 -0
  73. resources/MCP_MASTER_GUIDE.md +422 -0
  74. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  75. resources/__init__.py +0 -0
  76. server.py +83 -0
  77. tools/README.md +88 -0
  78. tools/__init__.py +45 -0
  79. tools/data_loader.py +213 -0
  80. tools/decorators.py +38 -0
  81. tools/econometrics_adapter.py +286 -0
  82. tools/mcp_tool_groups/__init__.py +1 -0
  83. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  84. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  85. tools/mcp_tool_groups/time_series_tools.py +494 -0
  86. tools/mcp_tools_registry.py +114 -0
  87. tools/model_specification_adapter.py +369 -0
  88. tools/output_formatter.py +563 -0
  89. tools/time_series_panel_data_adapter.py +858 -0
  90. tools/time_series_panel_data_tools.py +65 -0
  91. aigroup_econ_mcp/__init__.py +0 -19
  92. aigroup_econ_mcp/cli.py +0 -82
  93. aigroup_econ_mcp/config.py +0 -561
  94. aigroup_econ_mcp/server.py +0 -452
  95. aigroup_econ_mcp/tools/__init__.py +0 -18
  96. aigroup_econ_mcp/tools/base.py +0 -470
  97. aigroup_econ_mcp/tools/cache.py +0 -533
  98. aigroup_econ_mcp/tools/data_loader.py +0 -171
  99. aigroup_econ_mcp/tools/file_parser.py +0 -829
  100. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  101. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  102. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  103. aigroup_econ_mcp/tools/ml_models.py +0 -54
  104. aigroup_econ_mcp/tools/ml_regularization.py +0 -172
  105. aigroup_econ_mcp/tools/monitoring.py +0 -555
  106. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  107. aigroup_econ_mcp/tools/panel_data.py +0 -553
  108. aigroup_econ_mcp/tools/regression.py +0 -214
  109. aigroup_econ_mcp/tools/statistics.py +0 -154
  110. aigroup_econ_mcp/tools/time_series.py +0 -667
  111. aigroup_econ_mcp/tools/timeout.py +0 -283
  112. aigroup_econ_mcp/tools/tool_handlers.py +0 -378
  113. aigroup_econ_mcp/tools/tool_registry.py +0 -170
  114. aigroup_econ_mcp/tools/validation.py +0 -482
  115. aigroup_econ_mcp-0.4.2.dist-info/METADATA +0 -360
  116. aigroup_econ_mcp-0.4.2.dist-info/RECORD +0 -29
  117. aigroup_econ_mcp-0.4.2.dist-info/entry_points.txt +0 -2
  118. /aigroup_econ_mcp-0.4.2.dist-info/licenses/LICENSE → /LICENSE +0 -0
  119. {aigroup_econ_mcp-0.4.2.dist-info → aigroup_econ_mcp-1.4.3.dist-info}/WHEEL +0 -0
@@ -0,0 +1,185 @@
1
+ # MCP工具数据格式指南
2
+
3
+ ## 概述
4
+ 本文档详细说明各个MCP工具的数据输入格式要求,帮助大模型正确调用工具。
5
+
6
+ ## 1. VAR/SVAR模型工具
7
+
8
+ ### 数据格式
9
+ ```python
10
+ # 多元时间序列数据格式
11
+ data = [
12
+ [var1_t1, var2_t1, var3_t1], # 时间点1的所有变量值
13
+ [var1_t2, var2_t2, var3_t2], # 时间点2的所有变量值
14
+ [var1_t3, var2_t3, var3_t3], # 时间点3的所有变量值
15
+ # ...
16
+ ]
17
+
18
+ # 变量名称(可选)
19
+ variables = ["GDP", "Inflation", "Interest"]
20
+ ```
21
+
22
+ ### 示例调用
23
+ ```python
24
+ {
25
+ "data": [[1.0, 2.5, 1.8], [1.2, 2.7, 2.0], [1.4, 2.9, 2.2]],
26
+ "model_type": "var",
27
+ "lags": 1,
28
+ "variables": ["GDP", "Inflation", "Interest"],
29
+ "output_format": "json"
30
+ }
31
+ ```
32
+
33
+ ## 2. 联立方程模型工具
34
+
35
+ ### 数据格式说明
36
+ 联立方程模型需要三个主要数据组件:
37
+
38
+ #### 因变量数据 (y_data)
39
+ - **格式**: 二维列表,每个子列表代表一个方程的因变量时间序列
40
+ - **要求**: 所有方程的观测数量必须相同
41
+
42
+ ```python
43
+ # 两个方程的示例
44
+ y_data = [
45
+ [1.0, 1.2, 1.4, 1.6], # 方程1的因变量
46
+ [2.0, 2.2, 2.4, 2.6] # 方程2的因变量
47
+ ]
48
+ ```
49
+
50
+ #### 自变量数据 (x_data)
51
+ - **格式**: 二维列表,每个子列表代表一个观测的所有自变量值
52
+ - **要求**: 观测数量必须与因变量相同
53
+
54
+ ```python
55
+ # 4个观测,每个观测有2个自变量
56
+ x_data = [
57
+ [1.5, 2.5], # 观测1的自变量
58
+ [1.7, 2.7], # 观测2的自变量
59
+ [1.9, 2.9], # 观测3的自变量
60
+ [2.1, 3.1] # 观测4的自变量
61
+ ]
62
+ ```
63
+
64
+ #### 工具变量数据 (instruments)
65
+ - **格式**: 二维列表,每个子列表代表一个观测的所有工具变量值
66
+ - **要求**: 观测数量必须与其他变量相同
67
+
68
+ ```python
69
+ # 4个观测,每个观测有2个工具变量
70
+ instruments = [
71
+ [1.8, 2.8], # 观测1的工具变量
72
+ [2.0, 3.0], # 观测2的工具变量
73
+ [2.2, 3.2], # 观测3的工具变量
74
+ [2.4, 3.4] # 观测4的工具变量
75
+ ]
76
+ ```
77
+
78
+ ### 示例调用
79
+ ```python
80
+ {
81
+ "y_data": [[1.0, 1.2, 1.4, 1.6], [2.0, 2.2, 2.4, 2.6]],
82
+ "x_data": [[1.5, 2.5], [1.7, 2.7], [1.9, 2.9], [2.1, 3.1]],
83
+ "instruments": [[1.8, 2.8], [2.0, 3.0], [2.2, 3.2], [2.4, 3.4]],
84
+ "equation_names": ["Demand", "Supply"],
85
+ "instrument_names": ["Income", "Price"],
86
+ "constant": true,
87
+ "output_format": "json"
88
+ }
89
+ ```
90
+
91
+ ## 3. 动态面板数据模型工具
92
+
93
+ ### 数据格式说明
94
+ 动态面板数据需要四个主要数据组件:
95
+
96
+ #### 因变量数据 (y_data)
97
+ - **格式**: 一维列表,所有个体的因变量时间序列
98
+
99
+ ```python
100
+ y_data = [1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8]
101
+ ```
102
+
103
+ #### 自变量数据 (x_data)
104
+ - **格式**: 二维列表,每个子列表代表一个自变量的时间序列
105
+ - **重要**: 每个自变量的观测数量必须与因变量相同
106
+
107
+ ```python
108
+ # 1个自变量,10个观测
109
+ x_data = [[1.5, 1.7, 1.9, 2.1, 2.3, 2.5, 2.7, 2.9, 3.1, 3.3]]
110
+
111
+ # 2个自变量,10个观测
112
+ x_data = [
113
+ [1.5, 1.7, 1.9, 2.1, 2.3, 2.5, 2.7, 2.9, 3.1, 3.3], # 自变量1
114
+ [2.5, 2.7, 2.9, 3.1, 3.3, 3.5, 3.7, 3.9, 4.1, 4.3] # 自变量2
115
+ ]
116
+ ```
117
+
118
+ #### 个体标识符 (entity_ids)
119
+ - **格式**: 一维列表,标识每个观测属于哪个个体
120
+
121
+ ```python
122
+ # 2个个体,每个个体5个时间点
123
+ entity_ids = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2]
124
+ ```
125
+
126
+ #### 时间标识符 (time_periods)
127
+ - **格式**: 一维列表,标识每个观测的时间点
128
+
129
+ ```python
130
+ # 10个观测,时间从1到5重复两次
131
+ time_periods = [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
132
+ ```
133
+
134
+ ### 示例调用
135
+ ```python
136
+ {
137
+ "y_data": [1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8],
138
+ "x_data": [[1.5, 1.7, 1.9, 2.1, 2.3, 2.5, 2.7, 2.9, 3.1, 3.3]],
139
+ "entity_ids": [1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
140
+ "time_periods": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
141
+ "model_type": "diff_gmm",
142
+ "lags": 1,
143
+ "output_format": "json"
144
+ }
145
+ ```
146
+
147
+ ## 4. 数据格式验证要点
148
+
149
+ ### 通用规则
150
+ 1. **数据维度一致性**: 所有相关数据的观测数量必须相同
151
+ 2. **数据类型**: 必须为数值类型
152
+ 3. **数据完整性**: 不能有空值或缺失值
153
+
154
+ ### 联立方程模型特殊要求
155
+ - 因变量数据是二维列表,每个子列表代表一个方程
156
+ - 自变量和工具变量是二维列表,每个子列表代表一个观测
157
+ - 所有数据的观测数量必须严格一致
158
+
159
+ ### 动态面板数据特殊要求
160
+ - 自变量数据是二维列表,每个子列表代表一个自变量的完整时间序列
161
+ - 个体和时间标识符必须与因变量观测数量一致
162
+ - 面板数据必须平衡(每个个体有相同数量的时间点)
163
+
164
+ ## 5. 常见错误及解决方案
165
+
166
+ ### 错误: "自变量的观测数量必须与因变量相同"
167
+ **原因**: 数据维度不匹配
168
+ **解决方案**: 检查所有数据的观测数量是否一致
169
+
170
+ ### 错误: "所有数据序列的长度必须一致"
171
+ **原因**: 动态面板数据中某个自变量的观测数量与因变量不同
172
+ **解决方案**: 确保每个自变量的时间序列长度与因变量相同
173
+
174
+ ### 错误: "输入数据不能为空"
175
+ **原因**: 数据为空或格式错误
176
+ **解决方案**: 提供有效的数据或检查文件路径
177
+
178
+ ## 6. 最佳实践
179
+
180
+ 1. **使用文件输入**: 对于复杂数据,建议使用CSV文件格式
181
+ 2. **数据预处理**: 在调用工具前确保数据格式正确
182
+ 3. **逐步测试**: 先用小样本数据测试,确认格式正确后再使用完整数据
183
+ 4. **检查维度**: 始终验证所有相关数据的观测数量是否一致
184
+
185
+ 通过遵循这些指南,可以确保MCP工具的正确调用和稳定运行。
resources/__init__.py ADDED
File without changes
server.py ADDED
@@ -0,0 +1,83 @@
1
+ """
2
+ AIGroup 计量经济学 MCP 服务器 - 简化修复版
3
+ 直接注册工具,避免复杂的包装器
4
+ """
5
+
6
+ import sys
7
+ import os
8
+ from typing import List, Optional, Union
9
+ from mcp.server.fastmcp import FastMCP, Context
10
+ from mcp.server.session import ServerSession
11
+
12
+ # 设置Windows控制台编码
13
+ if sys.platform == "win32":
14
+ try:
15
+ # 尝试设置UTF-8编码
16
+ sys.stdout.reconfigure(encoding='utf-8')
17
+ sys.stderr.reconfigure(encoding='utf-8')
18
+ except:
19
+ # 如果失败,使用ASCII字符
20
+ pass
21
+
22
+ # 导入工具注册中心
23
+ from tools.mcp_tools_registry import registry
24
+
25
+ # 创建 FastMCP 服务器实例
26
+ mcp = FastMCP("aigroup-econ-mcp")
27
+
28
+ # 自动发现并注册所有工具组
29
+ print("正在自动发现工具组...")
30
+ registry.auto_discover_groups()
31
+
32
+ # 直接注册所有工具
33
+ print("正在注册工具...")
34
+ for tool_name, tool_info in registry.get_all_tools().items():
35
+ # 获取原始工具处理器
36
+ original_handler = tool_info["handler"]
37
+
38
+ # 直接注册工具
39
+ mcp.tool(name=tool_name, description=tool_info["description"])(original_handler)
40
+
41
+ print(f" - 已注册: {tool_name}")
42
+
43
+ @mcp.resource("guide://econometrics")
44
+ def get_econometrics_guide() -> str:
45
+ """Get complete econometrics tools guide"""
46
+ try:
47
+ with open("resources/MCP_MASTER_GUIDE.md", "r", encoding="utf-8") as f:
48
+ return f.read()
49
+ except FileNotFoundError:
50
+ return "完整使用指南文件未找到,请检查 resources/MCP_MASTER_GUIDE.md 文件是否存在。"
51
+
52
+
53
+ def main():
54
+ """Start FastMCP server"""
55
+ print("=" * 60)
56
+ print("AIGroup Econometrics MCP Server - SIMPLE FIXED")
57
+ print("=" * 60)
58
+ print("\n架构: 简化修复版")
59
+ print("\n已注册工具组:")
60
+
61
+ # 显示工具组信息
62
+ for group in registry.tool_groups:
63
+ tools_in_group = [name for name, info in registry.tools.items() if info["group"] == group.name]
64
+ print(f" - {group.name} ({len(tools_in_group)} tools)")
65
+
66
+ print(f"\n总工具数: {len(registry.tools)}")
67
+ print("\n支持格式:")
68
+ print(" 输入: txt/json/csv/excel (.xlsx, .xls)")
69
+ print(" 输出: json/markdown/txt")
70
+
71
+ print("\n优势:")
72
+ print(" * 简化工具注册")
73
+ print(" * 避免包装器问题")
74
+ print(" * 直接使用原始处理器")
75
+
76
+ print("\n启动服务器...")
77
+ print("=" * 60)
78
+
79
+ mcp.run(transport="stdio")
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()
tools/README.md ADDED
@@ -0,0 +1,88 @@
1
+ # Tools Directory
2
+
3
+ ## 当前架构 (v2.0 - Adapter Pattern)
4
+
5
+ ### 活跃文件
6
+
7
+ 1. **econometrics_adapter.py** - 核心适配器
8
+ - 将 econometrics/ 核心算法适配为 MCP 工具
9
+ - 支持文件输入和多种输出格式
10
+ - 减少 84% 代码重复
11
+
12
+ 2. **data_loader.py** - 数据加载组件
13
+ - 支持 txt/json/csv/excel 格式
14
+ - DataLoader: OLS/GMM 数据
15
+ - MLEDataLoader: MLE 数据
16
+
17
+ 3. **output_formatter.py** - 输出格式化组件
18
+ - MarkdownFormatter: Markdown 格式
19
+ - TextFormatter: 纯文本格式
20
+ - 结果保存功能
21
+
22
+ ### 架构优势
23
+
24
+ ```
25
+ MCP Server (fastmcp_server.py)
26
+
27
+ Adapter Layer (econometrics_adapter.py) - 90 lines
28
+
29
+ Core Algorithms (econometrics/basic_parametric_estimation/)
30
+ ├── ols/ols_model.py - OLS 核心算法
31
+ ├── mle/mle_model.py - MLE 核心算法
32
+ └── gmm/gmm_model.py - GMM 核心算法 (已修复 j_p_value bug)
33
+ ```
34
+
35
+ **优点**:
36
+ - ✅ 代码复用:单一真相源
37
+ - ✅ DRY 原则:不重复自己
38
+ - ✅ 易于维护:Bug 只需修复一次
39
+ - ✅ 清晰分层:职责明确
40
+ - ✅ 易于扩展:新算法只需写适配器
41
+
42
+ ### 备份文件
43
+
44
+ `bak_old_implementation/` - 旧的独立实现(已废弃)
45
+ - ols_tool.py (155 lines)
46
+ - mle_tool.py (219 lines)
47
+ - gmm_tool.py (190 lines)
48
+
49
+ 这些文件已被 econometrics_adapter.py (164 lines) 替代
50
+ 节省代码:474 lines (84%)
51
+
52
+ ## 使用示例
53
+
54
+ ```python
55
+ from tools.econometrics_adapter import (
56
+ ols_adapter,
57
+ mle_adapter,
58
+ gmm_adapter
59
+ )
60
+
61
+ # 直接数据输入
62
+ result = ols_adapter(
63
+ y_data=[1,2,3,4,5],
64
+ x_data=[[1],[2],[3],[4],[5]],
65
+ output_format="json"
66
+ )
67
+
68
+ # 文件输入
69
+ result = mle_adapter(
70
+ file_path="data/sample.csv",
71
+ distribution="normal",
72
+ output_format="markdown",
73
+ save_path="results/mle.md"
74
+ )
75
+ ```
76
+
77
+ ## 迁移记录
78
+
79
+ - 2025-11-04: 切换到适配器模式
80
+ - 修复核心 GMM bug (j_p_value)
81
+ - 移动旧实现到 bak_old_implementation/
82
+ - 减少 84% 重复代码
83
+
84
+ ## 相关文档
85
+
86
+ - [架构分析](../ARCHITECTURE_ANALYSIS.md)
87
+ - [服务器代码](../fastmcp_server.py)
88
+ - [核心算法](../econometrics/basic_parametric_estimation/)
tools/__init__.py ADDED
@@ -0,0 +1,45 @@
1
+ """
2
+ 工具模块初始化文件
3
+ """
4
+
5
+ from .data_loader import DataLoader
6
+ from .output_formatter import OutputFormatter
7
+ from .econometrics_adapter import EconometricsAdapter
8
+
9
+ # 时间序列和面板数据工具
10
+ from .time_series_panel_data_adapter import TimeSeriesPanelDataAdapter
11
+ from .time_series_panel_data_tools import (
12
+ arima_model,
13
+ exponential_smoothing_model,
14
+ garch_model,
15
+ unit_root_tests,
16
+ var_svar_model,
17
+ cointegration_analysis,
18
+ dynamic_panel_model
19
+ )
20
+
21
+ # 保持向后兼容性
22
+ ols_adapter = EconometricsAdapter.ols_regression
23
+ mle_adapter = EconometricsAdapter.mle_estimation
24
+ gmm_adapter = EconometricsAdapter.gmm_estimation
25
+
26
+ __all__ = [
27
+ "DataLoader",
28
+ "OutputFormatter",
29
+ "EconometricsAdapter",
30
+ "TimeSeriesPanelDataAdapter",
31
+
32
+ # 基础工具
33
+ "ols_adapter",
34
+ "mle_adapter",
35
+ "gmm_adapter",
36
+
37
+ # 时间序列和面板数据工具
38
+ "arima_model",
39
+ "exponential_smoothing_model",
40
+ "garch_model",
41
+ "unit_root_tests",
42
+ "var_svar_model",
43
+ "cointegration_analysis",
44
+ "dynamic_panel_model"
45
+ ]
tools/data_loader.py ADDED
@@ -0,0 +1,213 @@
1
+ """
2
+ 数据加载组件 - 支持多种文件格式
3
+ 支持txt、json、csv、excel文件的读取和解析
4
+ """
5
+
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Union
9
+ import pandas as pd
10
+
11
+
12
+ class DataLoader:
13
+ """数据加载器,支持多种文件格式"""
14
+
15
+ @staticmethod
16
+ def load_from_file(file_path: str) -> Dict[str, Any]:
17
+ """
18
+ 从文件加载数据
19
+
20
+ Args:
21
+ file_path: 文件路径
22
+
23
+ Returns:
24
+ 包含y_data和x_data的字典
25
+
26
+ Raises:
27
+ FileNotFoundError: 文件不存在
28
+ ValueError: 不支持的文件格式或数据格式错误
29
+ """
30
+ path = Path(file_path)
31
+
32
+ if not path.exists():
33
+ raise FileNotFoundError(f"文件不存在: {file_path}")
34
+
35
+ suffix = path.suffix.lower()
36
+
37
+ if suffix == '.txt':
38
+ return DataLoader._load_txt(path)
39
+ elif suffix == '.json':
40
+ return DataLoader._load_json(path)
41
+ elif suffix == '.csv':
42
+ return DataLoader._load_csv(path)
43
+ elif suffix in ['.xlsx', '.xls']:
44
+ return DataLoader._load_excel(path)
45
+ else:
46
+ raise ValueError(f"不支持的文件格式: {suffix}")
47
+
48
+ @staticmethod
49
+ def _load_txt(path: Path) -> Dict[str, Any]:
50
+ """加载txt文件(空格或制表符分隔)"""
51
+ with open(path, 'r', encoding='utf-8') as f:
52
+ lines = f.readlines()
53
+
54
+ # 跳过空行和注释行
55
+ data_lines = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')]
56
+
57
+ if not data_lines:
58
+ raise ValueError("txt文件为空或没有有效数据")
59
+
60
+ # 解析数据
61
+ data = []
62
+ for line in data_lines:
63
+ # 支持空格和制表符分隔
64
+ row = [float(x) for x in line.split()]
65
+ data.append(row)
66
+
67
+ return DataLoader._parse_data_matrix(data)
68
+
69
+ @staticmethod
70
+ def _load_json(path: Path) -> Dict[str, Any]:
71
+ """加载json文件"""
72
+ with open(path, 'r', encoding='utf-8') as f:
73
+ data = json.load(f)
74
+
75
+ # 支持两种格式:
76
+ # 1. {"y_data": [...], "x_data": [[...], ...]}
77
+ # 2. {"data": [[y, x1, x2, ...], ...]}
78
+
79
+ if "y_data" in data and "x_data" in data:
80
+ return {
81
+ "y_data": data["y_data"],
82
+ "x_data": data["x_data"],
83
+ "feature_names": data.get("feature_names"),
84
+ }
85
+ elif "data" in data:
86
+ return DataLoader._parse_data_matrix(data["data"])
87
+ else:
88
+ raise ValueError("JSON格式错误:需要包含'y_data'和'x_data'或'data'字段")
89
+
90
+ @staticmethod
91
+ def _load_csv(path: Path) -> Dict[str, Any]:
92
+ """加载csv文件"""
93
+ df = pd.read_csv(path)
94
+ return DataLoader._parse_dataframe(df)
95
+
96
+ @staticmethod
97
+ def _load_excel(path: Path) -> Dict[str, Any]:
98
+ """加载excel文件"""
99
+ df = pd.read_excel(path)
100
+ return DataLoader._parse_dataframe(df)
101
+
102
+ @staticmethod
103
+ def _parse_dataframe(df: pd.DataFrame) -> Dict[str, Any]:
104
+ """解析DataFrame"""
105
+ if df.empty:
106
+ raise ValueError("数据框为空")
107
+
108
+ # 第一列为y,其余列为x
109
+ y_data = df.iloc[:, 0].tolist()
110
+
111
+ if df.shape[1] > 1:
112
+ x_data = df.iloc[:, 1:].values.tolist()
113
+ feature_names = df.columns[1:].tolist()
114
+ else:
115
+ raise ValueError("数据至少需要包含因变量和一个自变量")
116
+
117
+ return {
118
+ "y_data": y_data,
119
+ "x_data": x_data,
120
+ "feature_names": feature_names,
121
+ }
122
+
123
+ @staticmethod
124
+ def _parse_data_matrix(data: List[List[float]]) -> Dict[str, Any]:
125
+ """解析数据矩阵(第一列为y,其余列为x)"""
126
+ if not data:
127
+ raise ValueError("数据矩阵为空")
128
+
129
+ y_data = [row[0] for row in data]
130
+
131
+ if len(data[0]) > 1:
132
+ x_data = [row[1:] for row in data]
133
+ feature_names = [f"X{i+1}" for i in range(len(data[0]) - 1)]
134
+ else:
135
+ raise ValueError("数据至少需要包含因变量和一个自变量")
136
+
137
+ return {
138
+ "y_data": y_data,
139
+ "x_data": x_data,
140
+ "feature_names": feature_names,
141
+ }
142
+
143
+
144
+ class MLEDataLoader:
145
+ """MLE专用数据加载器"""
146
+
147
+ @staticmethod
148
+ def load_from_file(file_path: str) -> Dict[str, Any]:
149
+ """
150
+ 从文件加载MLE数据(单列数据)
151
+
152
+ Args:
153
+ file_path: 文件路径
154
+
155
+ Returns:
156
+ 包含data的字典
157
+ """
158
+ path = Path(file_path)
159
+
160
+ if not path.exists():
161
+ raise FileNotFoundError(f"文件不存在: {file_path}")
162
+
163
+ suffix = path.suffix.lower()
164
+
165
+ if suffix == '.txt':
166
+ return MLEDataLoader._load_txt(path)
167
+ elif suffix == '.json':
168
+ return MLEDataLoader._load_json(path)
169
+ elif suffix == '.csv':
170
+ return MLEDataLoader._load_csv(path)
171
+ elif suffix in ['.xlsx', '.xls']:
172
+ return MLEDataLoader._load_excel(path)
173
+ else:
174
+ raise ValueError(f"不支持的文件格式: {suffix}")
175
+
176
+ @staticmethod
177
+ def _load_txt(path: Path) -> Dict[str, Any]:
178
+ """加载txt文件"""
179
+ with open(path, 'r', encoding='utf-8') as f:
180
+ lines = f.readlines()
181
+
182
+ data = []
183
+ for line in lines:
184
+ line = line.strip()
185
+ if line and not line.startswith('#'):
186
+ data.append(float(line.split()[0]))
187
+
188
+ return {"data": data}
189
+
190
+ @staticmethod
191
+ def _load_json(path: Path) -> Dict[str, Any]:
192
+ """加载json文件"""
193
+ with open(path, 'r', encoding='utf-8') as f:
194
+ loaded = json.load(f)
195
+
196
+ if isinstance(loaded, dict) and "data" in loaded:
197
+ return {"data": loaded["data"]}
198
+ elif isinstance(loaded, list):
199
+ return {"data": loaded}
200
+ else:
201
+ raise ValueError("JSON格式错误")
202
+
203
+ @staticmethod
204
+ def _load_csv(path: Path) -> Dict[str, Any]:
205
+ """加载csv文件"""
206
+ df = pd.read_csv(path)
207
+ return {"data": df.iloc[:, 0].tolist()}
208
+
209
+ @staticmethod
210
+ def _load_excel(path: Path) -> Dict[str, Any]:
211
+ """加载excel文件"""
212
+ df = pd.read_excel(path)
213
+ return {"data": df.iloc[:, 0].tolist()}
tools/decorators.py ADDED
@@ -0,0 +1,38 @@
1
+ """
2
+ 工具装饰器模块
3
+ """
4
+
5
+ from functools import wraps
6
+ from typing import Callable, Any
7
+
8
+
9
+ def with_file_support_decorator(tool_name: str):
10
+ """
11
+ 支持文件输入的装饰器
12
+
13
+ Args:
14
+ tool_name: 工具名称
15
+ """
16
+ def decorator(func: Callable) -> Callable:
17
+ @wraps(func)
18
+ def wrapper(*args, **kwargs):
19
+ # 简化实现 - 直接调用原函数
20
+ return func(*args, **kwargs)
21
+ return wrapper
22
+ return decorator
23
+
24
+
25
+ def validate_input(data_type: str = "econometric"):
26
+ """
27
+ 输入验证装饰器
28
+
29
+ Args:
30
+ data_type: 数据类型
31
+ """
32
+ def decorator(func: Callable) -> Callable:
33
+ @wraps(func)
34
+ def wrapper(*args, **kwargs):
35
+ # 简化实现 - 直接调用原函数
36
+ return func(*args, **kwargs)
37
+ return wrapper
38
+ return decorator