aigroup-econ-mcp 0.4.2__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +710 -0
  3. README.md +672 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-1.4.3.dist-info/METADATA +710 -0
  6. aigroup_econ_mcp-1.4.3.dist-info/RECORD +92 -0
  7. aigroup_econ_mcp-1.4.3.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-1.4.3.dist-info/licenses/LICENSE +21 -0
  9. cli.py +28 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +0 -0
  13. econometrics/basic_parametric_estimation/__init__.py +31 -0
  14. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  15. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  16. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  17. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  18. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  19. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  20. econometrics/causal_inference/causal_identification_strategy/__init__.py +0 -0
  21. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  22. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  23. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  24. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  25. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  26. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  27. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  28. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  29. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  30. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  31. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  32. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  33. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  34. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  35. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  36. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  37. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  38. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  39. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  40. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +0 -0
  41. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  42. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  43. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  44. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  45. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  46. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  47. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  48. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  49. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  50. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  51. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  52. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  53. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  54. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  55. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  56. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  57. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  58. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  59. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  60. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  61. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  62. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  63. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  64. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  65. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  66. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  67. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  68. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  69. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  70. prompts/__init__.py +0 -0
  71. prompts/analysis_guides.py +43 -0
  72. pyproject.toml +78 -0
  73. resources/MCP_MASTER_GUIDE.md +422 -0
  74. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  75. resources/__init__.py +0 -0
  76. server.py +83 -0
  77. tools/README.md +88 -0
  78. tools/__init__.py +45 -0
  79. tools/data_loader.py +213 -0
  80. tools/decorators.py +38 -0
  81. tools/econometrics_adapter.py +286 -0
  82. tools/mcp_tool_groups/__init__.py +1 -0
  83. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  84. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  85. tools/mcp_tool_groups/time_series_tools.py +494 -0
  86. tools/mcp_tools_registry.py +114 -0
  87. tools/model_specification_adapter.py +369 -0
  88. tools/output_formatter.py +563 -0
  89. tools/time_series_panel_data_adapter.py +858 -0
  90. tools/time_series_panel_data_tools.py +65 -0
  91. aigroup_econ_mcp/__init__.py +0 -19
  92. aigroup_econ_mcp/cli.py +0 -82
  93. aigroup_econ_mcp/config.py +0 -561
  94. aigroup_econ_mcp/server.py +0 -452
  95. aigroup_econ_mcp/tools/__init__.py +0 -18
  96. aigroup_econ_mcp/tools/base.py +0 -470
  97. aigroup_econ_mcp/tools/cache.py +0 -533
  98. aigroup_econ_mcp/tools/data_loader.py +0 -171
  99. aigroup_econ_mcp/tools/file_parser.py +0 -829
  100. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  101. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  102. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  103. aigroup_econ_mcp/tools/ml_models.py +0 -54
  104. aigroup_econ_mcp/tools/ml_regularization.py +0 -172
  105. aigroup_econ_mcp/tools/monitoring.py +0 -555
  106. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  107. aigroup_econ_mcp/tools/panel_data.py +0 -553
  108. aigroup_econ_mcp/tools/regression.py +0 -214
  109. aigroup_econ_mcp/tools/statistics.py +0 -154
  110. aigroup_econ_mcp/tools/time_series.py +0 -667
  111. aigroup_econ_mcp/tools/timeout.py +0 -283
  112. aigroup_econ_mcp/tools/tool_handlers.py +0 -378
  113. aigroup_econ_mcp/tools/tool_registry.py +0 -170
  114. aigroup_econ_mcp/tools/validation.py +0 -482
  115. aigroup_econ_mcp-0.4.2.dist-info/METADATA +0 -360
  116. aigroup_econ_mcp-0.4.2.dist-info/RECORD +0 -29
  117. aigroup_econ_mcp-0.4.2.dist-info/entry_points.txt +0 -2
  118. /aigroup_econ_mcp-0.4.2.dist-info/licenses/LICENSE → /LICENSE +0 -0
  119. {aigroup_econ_mcp-0.4.2.dist-info → aigroup_econ_mcp-1.4.3.dist-info}/WHEEL +0 -0
@@ -1,170 +0,0 @@
1
- """
2
- 工具注册器模块
3
- 简化MCP工具的注册和管理
4
- """
5
-
6
- from typing import Dict, Any, Optional, List, Callable
7
- from pydantic import Field
8
- from typing import Annotated
9
-
10
- from .base import with_file_support_decorator as econometric_tool
11
-
12
-
13
- # 标准文件输入参数定义
14
- FILE_INPUT_PARAMS = {
15
- "file_content": Annotated[
16
- Optional[str],
17
- Field(
18
- default=None,
19
- description="""CSV或JSON文件内容
20
-
21
- 📁 支持格式:
22
- - CSV: 带表头的列数据,自动检测分隔符
23
- - JSON: {"变量名": [数据], ...} 或 [{"变量1": 值, ...}, ...]
24
-
25
- 💡 使用方式:
26
- - 提供文件内容字符串
27
- - 系统会自动解析并识别变量
28
- - 优先使用file_content,如果提供则忽略其他数据参数"""
29
- )
30
- ],
31
- "file_format": Annotated[
32
- str,
33
- Field(
34
- default="auto",
35
- description="""文件格式
36
-
37
- 可选值:
38
- - "auto": 自动检测(默认)
39
- - "csv": CSV格式
40
- - "json": JSON格式"""
41
- )
42
- ]
43
- }
44
-
45
-
46
- class ToolConfig:
47
- """工具配置类"""
48
-
49
- def __init__(
50
- self,
51
- name: str,
52
- impl_func: Callable,
53
- tool_type: str,
54
- description: str = "",
55
- extra_params: Dict[str, Any] = None
56
- ):
57
- self.name = name
58
- self.impl_func = impl_func
59
- self.tool_type = tool_type
60
- self.description = description
61
- self.extra_params = extra_params or {}
62
-
63
-
64
- def create_tool_wrapper(config: ToolConfig):
65
- """
66
- 创建工具包装器,自动添加文件输入支持
67
-
68
- Args:
69
- config: 工具配置对象
70
-
71
- Returns:
72
- 包装后的工具函数
73
- """
74
- @econometric_tool(config.tool_type)
75
- async def tool_wrapper(ctx, **kwargs):
76
- """动态生成的工具包装器"""
77
- # 调用实际的实现函数
78
- return await config.impl_func(ctx, **kwargs)
79
-
80
- # 设置函数名和文档
81
- tool_wrapper.__name__ = config.name
82
- tool_wrapper.__doc__ = config.description
83
-
84
- return tool_wrapper
85
-
86
-
87
- # 工具类型到参数映射
88
- TOOL_TYPE_PARAMS = {
89
- "multi_var_dict": {
90
- "data": Annotated[
91
- Optional[Dict[str, List[float]]],
92
- Field(default=None, description="数据字典,格式为 {变量名: [数值列表]}")
93
- ]
94
- },
95
- "regression": {
96
- "y_data": Annotated[
97
- Optional[List[float]],
98
- Field(default=None, description="因变量数据")
99
- ],
100
- "x_data": Annotated[
101
- Optional[List[List[float]]],
102
- Field(default=None, description="自变量数据")
103
- ],
104
- "feature_names": Annotated[
105
- Optional[List[str]],
106
- Field(default=None, description="特征名称")
107
- ]
108
- },
109
- "single_var": {
110
- "data": Annotated[
111
- Optional[List[float]],
112
- Field(default=None, description="时间序列数据")
113
- ]
114
- },
115
- "panel": {
116
- "y_data": Annotated[
117
- Optional[List[float]],
118
- Field(default=None, description="因变量数据")
119
- ],
120
- "x_data": Annotated[
121
- Optional[List[List[float]]],
122
- Field(default=None, description="自变量数据")
123
- ],
124
- "entity_ids": Annotated[
125
- Optional[List[str]],
126
- Field(default=None, description="个体标识符")
127
- ],
128
- "time_periods": Annotated[
129
- Optional[List[str]],
130
- Field(default=None, description="时间标识符")
131
- ],
132
- "feature_names": Annotated[
133
- Optional[List[str]],
134
- Field(default=None, description="特征名称")
135
- ]
136
- },
137
- "time_series": {
138
- "data": Annotated[
139
- Optional[Dict[str, List[float]]],
140
- Field(default=None, description="多变量时间序列数据")
141
- ]
142
- }
143
- }
144
-
145
-
146
- def get_tool_params(tool_type: str, extra_params: Dict[str, Any] = None) -> Dict[str, Any]:
147
- """
148
- 获取工具的完整参数定义
149
-
150
- Args:
151
- tool_type: 工具类型
152
- extra_params: 额外的参数定义
153
-
154
- Returns:
155
- 完整的参数字典
156
- """
157
- params = {}
158
-
159
- # 添加基础参数
160
- if tool_type in TOOL_TYPE_PARAMS:
161
- params.update(TOOL_TYPE_PARAMS[tool_type])
162
-
163
- # 添加文件输入参数
164
- params.update(FILE_INPUT_PARAMS)
165
-
166
- # 添加额外参数
167
- if extra_params:
168
- params.update(extra_params)
169
-
170
- return params
@@ -1,482 +0,0 @@
1
- """
2
- 统一的参数验证系统
3
- 提供类型检查、数据验证和参数验证功能
4
- """
5
-
6
- import re
7
- import numpy as np
8
- import pandas as pd
9
- from typing import Any, Dict, List, Optional, Union, Tuple, Callable
10
- from pydantic import BaseModel, Field, validator
11
- from enum import Enum
12
- import warnings
13
-
14
-
15
- class ValidationError(Exception):
16
- """参数验证错误"""
17
- def __init__(self, message: str, field: str = None, value: Any = None):
18
- self.message = message
19
- self.field = field
20
- self.value = value
21
- super().__init__(self.message)
22
-
23
-
24
- class DataType(Enum):
25
- """支持的数据类型"""
26
- NUMERIC = "numeric"
27
- INTEGER = "integer"
28
- FLOAT = "float"
29
- STRING = "string"
30
- BOOLEAN = "boolean"
31
- LIST = "list"
32
- DICT = "dict"
33
- DATAFRAME = "dataframe"
34
- SERIES = "series"
35
-
36
-
37
- class ValidationRule(BaseModel):
38
- """验证规则定义"""
39
- required: bool = Field(default=True, description="是否必需")
40
- data_type: DataType = Field(description="数据类型")
41
- min_value: Optional[float] = Field(default=None, description="最小值")
42
- max_value: Optional[float] = Field(default=None, description="最大值")
43
- min_length: Optional[int] = Field(default=None, description="最小长度")
44
- max_length: Optional[int] = Field(default=None, description="最大长度")
45
- pattern: Optional[str] = Field(default=None, description="正则表达式模式")
46
- allowed_values: Optional[List[Any]] = Field(default=None, description="允许的值")
47
- custom_validator: Optional[Callable] = Field(default=None, description="自定义验证函数")
48
-
49
-
50
- class ParameterValidator:
51
- """
52
- 参数验证器
53
- 提供统一的参数验证和类型检查功能
54
- """
55
-
56
- def __init__(self, strict_mode: bool = True):
57
- """
58
- 初始化参数验证器
59
-
60
- Args:
61
- strict_mode: 严格模式,如果为True,验证失败时抛出异常
62
- """
63
- self.strict_mode = strict_mode
64
- self._validation_rules: Dict[str, ValidationRule] = {}
65
-
66
- def add_rule(self, field_name: str, rule: ValidationRule):
67
- """
68
- 添加验证规则
69
-
70
- Args:
71
- field_name: 字段名称
72
- rule: 验证规则
73
- """
74
- self._validation_rules[field_name] = rule
75
-
76
- def validate_parameter(self, field_name: str, value: Any) -> Tuple[bool, Optional[str]]:
77
- """
78
- 验证单个参数
79
-
80
- Args:
81
- field_name: 字段名称
82
- value: 参数值
83
-
84
- Returns:
85
- Tuple[bool, Optional[str]]: (是否验证通过, 错误信息)
86
- """
87
- if field_name not in self._validation_rules:
88
- return True, None
89
-
90
- rule = self._validation_rules[field_name]
91
-
92
- # 检查必需性
93
- if rule.required and value is None:
94
- return False, f"参数 '{field_name}' 是必需的"
95
-
96
- if value is None:
97
- return True, None
98
-
99
- # 检查数据类型
100
- type_valid, type_error = self._validate_data_type(value, rule.data_type)
101
- if not type_valid:
102
- return False, type_error
103
-
104
- # 检查数值范围
105
- if rule.min_value is not None or rule.max_value is not None:
106
- range_valid, range_error = self._validate_numeric_range(value, rule.min_value, rule.max_value)
107
- if not range_valid:
108
- return False, range_error
109
-
110
- # 检查长度范围
111
- if rule.min_length is not None or rule.max_length is not None:
112
- length_valid, length_error = self._validate_length(value, rule.min_length, rule.max_length)
113
- if not length_valid:
114
- return False, length_error
115
-
116
- # 检查正则表达式模式
117
- if rule.pattern is not None:
118
- pattern_valid, pattern_error = self._validate_pattern(value, rule.pattern)
119
- if not pattern_valid:
120
- return False, pattern_error
121
-
122
- # 检查允许的值
123
- if rule.allowed_values is not None:
124
- allowed_valid, allowed_error = self._validate_allowed_values(value, rule.allowed_values)
125
- if not allowed_valid:
126
- return False, allowed_error
127
-
128
- # 自定义验证
129
- if rule.custom_validator is not None:
130
- try:
131
- custom_valid = rule.custom_validator(value)
132
- if not custom_valid:
133
- return False, f"参数 '{field_name}' 未通过自定义验证"
134
- except Exception as e:
135
- return False, f"参数 '{field_name}' 自定义验证失败: {str(e)}"
136
-
137
- return True, None
138
-
139
- def _validate_data_type(self, value: Any, data_type: DataType) -> Tuple[bool, Optional[str]]:
140
- """验证数据类型"""
141
- try:
142
- if data_type == DataType.NUMERIC:
143
- if not isinstance(value, (int, float, np.number)):
144
- return False, f"期望数值类型,实际类型: {type(value).__name__}"
145
-
146
- elif data_type == DataType.INTEGER:
147
- if not isinstance(value, (int, np.integer)):
148
- return False, f"期望整数类型,实际类型: {type(value).__name__}"
149
-
150
- elif data_type == DataType.FLOAT:
151
- if not isinstance(value, (float, np.floating)):
152
- return False, f"期望浮点数类型,实际类型: {type(value).__name__}"
153
-
154
- elif data_type == DataType.STRING:
155
- if not isinstance(value, str):
156
- return False, f"期望字符串类型,实际类型: {type(value).__name__}"
157
-
158
- elif data_type == DataType.BOOLEAN:
159
- if not isinstance(value, bool):
160
- return False, f"期望布尔类型,实际类型: {type(value).__name__}"
161
-
162
- elif data_type == DataType.LIST:
163
- if not isinstance(value, (list, tuple, np.ndarray)):
164
- return False, f"期望列表类型,实际类型: {type(value).__name__}"
165
-
166
- elif data_type == DataType.DICT:
167
- if not isinstance(value, dict):
168
- return False, f"期望字典类型,实际类型: {type(value).__name__}"
169
-
170
- elif data_type == DataType.DATAFRAME:
171
- if not isinstance(value, pd.DataFrame):
172
- return False, f"期望DataFrame类型,实际类型: {type(value).__name__}"
173
-
174
- elif data_type == DataType.SERIES:
175
- if not isinstance(value, pd.Series):
176
- return False, f"期望Series类型,实际类型: {type(value).__name__}"
177
-
178
- return True, None
179
-
180
- except Exception as e:
181
- return False, f"数据类型验证失败: {str(e)}"
182
-
183
- def _validate_numeric_range(self, value: Any, min_val: Optional[float], max_val: Optional[float]) -> Tuple[bool, Optional[str]]:
184
- """验证数值范围"""
185
- try:
186
- numeric_value = float(value)
187
-
188
- if min_val is not None and numeric_value < min_val:
189
- return False, f"数值 {numeric_value} 小于最小值 {min_val}"
190
-
191
- if max_val is not None and numeric_value > max_val:
192
- return False, f"数值 {numeric_value} 大于最大值 {max_val}"
193
-
194
- return True, None
195
-
196
- except (ValueError, TypeError):
197
- return False, "无法转换为数值进行范围验证"
198
-
199
- def _validate_length(self, value: Any, min_len: Optional[int], max_len: Optional[int]) -> Tuple[bool, Optional[str]]:
200
- """验证长度范围"""
201
- try:
202
- length = len(value)
203
-
204
- if min_len is not None and length < min_len:
205
- return False, f"长度 {length} 小于最小长度 {min_len}"
206
-
207
- if max_len is not None and length > max_len:
208
- return False, f"长度 {length} 大于最大长度 {max_len}"
209
-
210
- return True, None
211
-
212
- except TypeError:
213
- return False, "无法获取长度信息"
214
-
215
- def _validate_pattern(self, value: Any, pattern: str) -> Tuple[bool, Optional[str]]:
216
- """验证正则表达式模式"""
217
- try:
218
- if not isinstance(value, str):
219
- return False, "模式验证仅适用于字符串类型"
220
-
221
- if not re.match(pattern, value):
222
- return False, f"字符串 '{value}' 不匹配模式 '{pattern}'"
223
-
224
- return True, None
225
-
226
- except re.error as e:
227
- return False, f"正则表达式模式错误: {str(e)}"
228
-
229
- def _validate_allowed_values(self, value: Any, allowed_values: List[Any]) -> Tuple[bool, Optional[str]]:
230
- """验证允许的值"""
231
- if value not in allowed_values:
232
- return False, f"值 '{value}' 不在允许的值列表中: {allowed_values}"
233
-
234
- return True, None
235
-
236
- def validate_all(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
237
- """
238
- 验证所有参数
239
-
240
- Args:
241
- parameters: 参数字典
242
-
243
- Returns:
244
- Dict[str, Any]: 验证后的参数(包含类型转换)
245
-
246
- Raises:
247
- ValidationError: 验证失败时抛出
248
- """
249
- validated_params = {}
250
- errors = []
251
-
252
- for field_name, rule in self._validation_rules.items():
253
- value = parameters.get(field_name)
254
-
255
- # 验证参数
256
- is_valid, error_msg = self.validate_parameter(field_name, value)
257
-
258
- if not is_valid:
259
- if self.strict_mode:
260
- raise ValidationError(error_msg, field_name, value)
261
- else:
262
- errors.append(f"{field_name}: {error_msg}")
263
- continue
264
-
265
- # 类型转换
266
- if value is not None and rule.data_type:
267
- try:
268
- converted_value = self._convert_type(value, rule.data_type)
269
- validated_params[field_name] = converted_value
270
- except Exception as e:
271
- error_msg = f"参数 '{field_name}' 类型转换失败: {str(e)}"
272
- if self.strict_mode:
273
- raise ValidationError(error_msg, field_name, value)
274
- else:
275
- errors.append(error_msg)
276
- else:
277
- validated_params[field_name] = value
278
-
279
- if errors and not self.strict_mode:
280
- warnings.warn(f"参数验证警告: {', '.join(errors)}")
281
-
282
- return validated_params
283
-
284
- def _convert_type(self, value: Any, data_type: DataType) -> Any:
285
- """类型转换"""
286
- if data_type == DataType.INTEGER:
287
- return int(value)
288
- elif data_type == DataType.FLOAT:
289
- return float(value)
290
- elif data_type == DataType.STRING:
291
- return str(value)
292
- elif data_type == DataType.BOOLEAN:
293
- return bool(value)
294
- elif data_type == DataType.LIST:
295
- if isinstance(value, (tuple, np.ndarray)):
296
- return list(value)
297
- return value
298
- else:
299
- return value
300
-
301
-
302
- # 预定义的验证器实例
303
- class EconometricValidator:
304
- """计量经济学专用验证器"""
305
-
306
- @staticmethod
307
- def create_data_validator() -> ParameterValidator:
308
- """创建数据验证器"""
309
- validator = ParameterValidator()
310
-
311
- # 数据验证规则
312
- validator.add_rule("data", ValidationRule(
313
- required=True,
314
- data_type=DataType.DICT,
315
- min_length=1,
316
- custom_validator=lambda x: all(isinstance(v, (list, np.ndarray)) for v in x.values())
317
- ))
318
-
319
- validator.add_rule("y_data", ValidationRule(
320
- required=True,
321
- data_type=DataType.LIST,
322
- min_length=10,
323
- custom_validator=lambda x: all(isinstance(v, (int, float)) for v in x)
324
- ))
325
-
326
- validator.add_rule("x_data", ValidationRule(
327
- required=True,
328
- data_type=DataType.LIST,
329
- min_length=1,
330
- custom_validator=lambda x: all(isinstance(row, (list, np.ndarray)) and len(row) > 0 for row in x)
331
- ))
332
-
333
- return validator
334
-
335
- @staticmethod
336
- def create_model_validator() -> ParameterValidator:
337
- """创建模型参数验证器"""
338
- validator = ParameterValidator()
339
-
340
- # 模型参数验证规则
341
- validator.add_rule("n_estimators", ValidationRule(
342
- required=False,
343
- data_type=DataType.INTEGER,
344
- min_value=1,
345
- max_value=10000
346
- ))
347
-
348
- validator.add_rule("max_depth", ValidationRule(
349
- required=False,
350
- data_type=DataType.INTEGER,
351
- min_value=1,
352
- max_value=100
353
- ))
354
-
355
- validator.add_rule("learning_rate", ValidationRule(
356
- required=False,
357
- data_type=DataType.FLOAT,
358
- min_value=0.001,
359
- max_value=1.0
360
- ))
361
-
362
- validator.add_rule("alpha", ValidationRule(
363
- required=False,
364
- data_type=DataType.FLOAT,
365
- min_value=0.0,
366
- max_value=100.0
367
- ))
368
-
369
- return validator
370
-
371
-
372
- # 便捷验证函数
373
- def validate_econometric_data(data: Dict[str, Any]) -> Dict[str, Any]:
374
- """
375
- 验证计量经济学数据
376
-
377
- Args:
378
- data: 输入数据,格式为 {变量名: [数值列表]}
379
-
380
- Returns:
381
- Dict[str, Any]: 验证后的数据
382
- """
383
- # 创建专门的数据验证器
384
- validator = ParameterValidator()
385
-
386
- # 添加数据验证规则
387
- validator.add_rule("data", ValidationRule(
388
- required=True,
389
- data_type=DataType.DICT,
390
- min_length=1,
391
- custom_validator=lambda x: all(isinstance(v, (list, np.ndarray)) for v in x.values())
392
- ))
393
-
394
- # 验证数据格式
395
- validated = validator.validate_all({"data": data})
396
- return validated["data"]
397
-
398
-
399
- def validate_model_parameters(parameters: Dict[str, Any]) -> Dict[str, Any]:
400
- """
401
- 验证模型参数
402
-
403
- Args:
404
- parameters: 模型参数
405
-
406
- Returns:
407
- Dict[str, Any]: 验证后的参数
408
- """
409
- validator = EconometricValidator.create_model_validator()
410
- return validator.validate_all(parameters)
411
-
412
-
413
- def validate_time_series_data(data: List[float], min_length: int = 10) -> List[float]:
414
- """
415
- 验证时间序列数据
416
-
417
- Args:
418
- data: 时间序列数据
419
- min_length: 最小长度要求
420
-
421
- Returns:
422
- List[float]: 验证后的数据
423
-
424
- Raises:
425
- ValidationError: 验证失败
426
- """
427
- if not isinstance(data, (list, np.ndarray)):
428
- raise ValidationError("时间序列数据必须是列表或数组")
429
-
430
- if len(data) < min_length:
431
- raise ValidationError(f"时间序列数据长度不足,至少需要 {min_length} 个观测点")
432
-
433
- if not all(isinstance(x, (int, float)) for x in data):
434
- raise ValidationError("时间序列数据必须全部为数值")
435
-
436
- # 检查缺失值
437
- if any(pd.isna(x) for x in data):
438
- raise ValidationError("时间序列数据包含缺失值")
439
-
440
- return list(data)
441
-
442
- def validate_numeric_data(data: List[float], data_name: str = "数据") -> bool:
443
- """
444
- 验证数值数据
445
-
446
- Args:
447
- data: 数值数据列表
448
- data_name: 数据名称,用于错误信息
449
-
450
- Returns:
451
- bool: 验证是否通过
452
-
453
- Raises:
454
- ValidationError: 验证失败时抛出
455
- """
456
- if not isinstance(data, (list, np.ndarray)):
457
- raise ValidationError(f"{data_name} 必须是列表或数组")
458
-
459
- if len(data) == 0:
460
- raise ValidationError(f"{data_name} 不能为空")
461
-
462
- if not all(isinstance(x, (int, float)) for x in data):
463
- raise ValidationError(f"{data_name} 必须全部为数值")
464
-
465
- # 检查缺失值
466
- if any(pd.isna(x) for x in data):
467
- raise ValidationError(f"{data_name} 包含缺失值")
468
-
469
- return True
470
-
471
- # 导出主要类和函数
472
- __all__ = [
473
- "ValidationError",
474
- "DataType",
475
- "ValidationRule",
476
- "ParameterValidator",
477
- "EconometricValidator",
478
- "validate_econometric_data",
479
- "validate_model_parameters",
480
- "validate_time_series_data",
481
- "validate_numeric_data"
482
- ]