aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +732 -0
  3. README.md +687 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  6. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  7. aigroup_econ_mcp-2.0.1.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-2.0.1.dist-info/licenses/LICENSE +21 -0
  9. cli.py +32 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  18. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  19. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  20. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  21. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  22. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  23. econometrics/basic_parametric_estimation/__init__.py +31 -0
  24. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  25. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  26. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  27. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  28. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  29. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  30. econometrics/causal_inference/__init__.py +66 -0
  31. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  32. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  33. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  34. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  35. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  36. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  37. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  38. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  39. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  40. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  41. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  42. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  43. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  44. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  45. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  46. econometrics/distribution_analysis/__init__.py +28 -0
  47. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  48. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  49. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  50. econometrics/missing_data/__init__.py +18 -0
  51. econometrics/missing_data/imputation_methods.py +219 -0
  52. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  53. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  54. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  55. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  56. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  57. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  58. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  59. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  60. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  61. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  62. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  63. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  64. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  65. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  66. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  67. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  68. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  69. econometrics/nonparametric/__init__.py +35 -0
  70. econometrics/nonparametric/gam_model.py +117 -0
  71. econometrics/nonparametric/kernel_regression.py +161 -0
  72. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  73. econometrics/nonparametric/quantile_regression.py +249 -0
  74. econometrics/nonparametric/spline_regression.py +100 -0
  75. econometrics/spatial_econometrics/__init__.py +68 -0
  76. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  77. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  78. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  79. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  80. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  81. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  82. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  83. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  84. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  85. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  86. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  87. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  88. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  89. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  90. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  91. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  92. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  93. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  94. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  95. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  96. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  97. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  98. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  99. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  100. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  101. econometrics/statistical_inference/__init__.py +21 -0
  102. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  103. econometrics/statistical_inference/permutation_test.py +177 -0
  104. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  105. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  106. econometrics/survival_analysis/__init__.py +18 -0
  107. econometrics/survival_analysis/survival_models.py +259 -0
  108. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  109. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  110. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  111. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  112. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  113. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  114. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  115. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  116. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  117. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  118. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  119. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  120. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  121. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  122. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  123. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  124. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  125. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  126. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  127. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  128. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  129. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  130. prompts/__init__.py +0 -0
  131. prompts/analysis_guides.py +43 -0
  132. pyproject.toml +85 -0
  133. resources/MCP_MASTER_GUIDE.md +422 -0
  134. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  135. resources/__init__.py +0 -0
  136. server.py +97 -0
  137. tools/README.md +88 -0
  138. tools/__init__.py +119 -0
  139. tools/causal_inference_adapter.py +658 -0
  140. tools/data_loader.py +213 -0
  141. tools/decorators.py +38 -0
  142. tools/distribution_analysis_adapter.py +121 -0
  143. tools/econometrics_adapter.py +286 -0
  144. tools/gwr_simple_adapter.py +54 -0
  145. tools/machine_learning_adapter.py +567 -0
  146. tools/mcp_tool_groups/__init__.py +15 -0
  147. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  148. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  149. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  150. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  151. tools/mcp_tool_groups/microecon_tools.py +325 -0
  152. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  153. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  154. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  155. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  156. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  157. tools/mcp_tool_groups/time_series_tools.py +494 -0
  158. tools/mcp_tools_registry.py +124 -0
  159. tools/microecon_adapter.py +412 -0
  160. tools/missing_data_adapter.py +73 -0
  161. tools/model_specification_adapter.py +369 -0
  162. tools/nonparametric_adapter.py +190 -0
  163. tools/output_formatter.py +563 -0
  164. tools/spatial_econometrics_adapter.py +318 -0
  165. tools/statistical_inference_adapter.py +90 -0
  166. tools/survival_analysis_adapter.py +46 -0
  167. tools/time_series_panel_data_adapter.py +858 -0
  168. tools/time_series_panel_data_tools.py +65 -0
  169. aigroup_econ_mcp/__init__.py +0 -19
  170. aigroup_econ_mcp/cli.py +0 -82
  171. aigroup_econ_mcp/config.py +0 -561
  172. aigroup_econ_mcp/server.py +0 -452
  173. aigroup_econ_mcp/tools/__init__.py +0 -19
  174. aigroup_econ_mcp/tools/base.py +0 -470
  175. aigroup_econ_mcp/tools/cache.py +0 -533
  176. aigroup_econ_mcp/tools/data_loader.py +0 -195
  177. aigroup_econ_mcp/tools/file_parser.py +0 -1027
  178. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  179. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  180. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  181. aigroup_econ_mcp/tools/ml_models.py +0 -54
  182. aigroup_econ_mcp/tools/ml_regularization.py +0 -186
  183. aigroup_econ_mcp/tools/monitoring.py +0 -555
  184. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  185. aigroup_econ_mcp/tools/panel_data.py +0 -619
  186. aigroup_econ_mcp/tools/regression.py +0 -214
  187. aigroup_econ_mcp/tools/statistics.py +0 -154
  188. aigroup_econ_mcp/tools/time_series.py +0 -698
  189. aigroup_econ_mcp/tools/timeout.py +0 -283
  190. aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
  191. aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
  192. aigroup_econ_mcp/tools/tool_registry.py +0 -478
  193. aigroup_econ_mcp/tools/validation.py +0 -482
  194. aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
  195. aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
  196. aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
  197. /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
  198. {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
@@ -1,482 +0,0 @@
1
- """
2
- 统一的参数验证系统
3
- 提供类型检查、数据验证和参数验证功能
4
- """
5
-
6
- import re
7
- import numpy as np
8
- import pandas as pd
9
- from typing import Any, Dict, List, Optional, Union, Tuple, Callable
10
- from pydantic import BaseModel, Field, validator
11
- from enum import Enum
12
- import warnings
13
-
14
-
15
- class ValidationError(Exception):
16
- """参数验证错误"""
17
- def __init__(self, message: str, field: str = None, value: Any = None):
18
- self.message = message
19
- self.field = field
20
- self.value = value
21
- super().__init__(self.message)
22
-
23
-
24
- class DataType(Enum):
25
- """支持的数据类型"""
26
- NUMERIC = "numeric"
27
- INTEGER = "integer"
28
- FLOAT = "float"
29
- STRING = "string"
30
- BOOLEAN = "boolean"
31
- LIST = "list"
32
- DICT = "dict"
33
- DATAFRAME = "dataframe"
34
- SERIES = "series"
35
-
36
-
37
- class ValidationRule(BaseModel):
38
- """验证规则定义"""
39
- required: bool = Field(default=True, description="是否必需")
40
- data_type: DataType = Field(description="数据类型")
41
- min_value: Optional[float] = Field(default=None, description="最小值")
42
- max_value: Optional[float] = Field(default=None, description="最大值")
43
- min_length: Optional[int] = Field(default=None, description="最小长度")
44
- max_length: Optional[int] = Field(default=None, description="最大长度")
45
- pattern: Optional[str] = Field(default=None, description="正则表达式模式")
46
- allowed_values: Optional[List[Any]] = Field(default=None, description="允许的值")
47
- custom_validator: Optional[Callable] = Field(default=None, description="自定义验证函数")
48
-
49
-
50
- class ParameterValidator:
51
- """
52
- 参数验证器
53
- 提供统一的参数验证和类型检查功能
54
- """
55
-
56
- def __init__(self, strict_mode: bool = True):
57
- """
58
- 初始化参数验证器
59
-
60
- Args:
61
- strict_mode: 严格模式,如果为True,验证失败时抛出异常
62
- """
63
- self.strict_mode = strict_mode
64
- self._validation_rules: Dict[str, ValidationRule] = {}
65
-
66
- def add_rule(self, field_name: str, rule: ValidationRule):
67
- """
68
- 添加验证规则
69
-
70
- Args:
71
- field_name: 字段名称
72
- rule: 验证规则
73
- """
74
- self._validation_rules[field_name] = rule
75
-
76
- def validate_parameter(self, field_name: str, value: Any) -> Tuple[bool, Optional[str]]:
77
- """
78
- 验证单个参数
79
-
80
- Args:
81
- field_name: 字段名称
82
- value: 参数值
83
-
84
- Returns:
85
- Tuple[bool, Optional[str]]: (是否验证通过, 错误信息)
86
- """
87
- if field_name not in self._validation_rules:
88
- return True, None
89
-
90
- rule = self._validation_rules[field_name]
91
-
92
- # 检查必需性
93
- if rule.required and value is None:
94
- return False, f"参数 '{field_name}' 是必需的"
95
-
96
- if value is None:
97
- return True, None
98
-
99
- # 检查数据类型
100
- type_valid, type_error = self._validate_data_type(value, rule.data_type)
101
- if not type_valid:
102
- return False, type_error
103
-
104
- # 检查数值范围
105
- if rule.min_value is not None or rule.max_value is not None:
106
- range_valid, range_error = self._validate_numeric_range(value, rule.min_value, rule.max_value)
107
- if not range_valid:
108
- return False, range_error
109
-
110
- # 检查长度范围
111
- if rule.min_length is not None or rule.max_length is not None:
112
- length_valid, length_error = self._validate_length(value, rule.min_length, rule.max_length)
113
- if not length_valid:
114
- return False, length_error
115
-
116
- # 检查正则表达式模式
117
- if rule.pattern is not None:
118
- pattern_valid, pattern_error = self._validate_pattern(value, rule.pattern)
119
- if not pattern_valid:
120
- return False, pattern_error
121
-
122
- # 检查允许的值
123
- if rule.allowed_values is not None:
124
- allowed_valid, allowed_error = self._validate_allowed_values(value, rule.allowed_values)
125
- if not allowed_valid:
126
- return False, allowed_error
127
-
128
- # 自定义验证
129
- if rule.custom_validator is not None:
130
- try:
131
- custom_valid = rule.custom_validator(value)
132
- if not custom_valid:
133
- return False, f"参数 '{field_name}' 未通过自定义验证"
134
- except Exception as e:
135
- return False, f"参数 '{field_name}' 自定义验证失败: {str(e)}"
136
-
137
- return True, None
138
-
139
- def _validate_data_type(self, value: Any, data_type: DataType) -> Tuple[bool, Optional[str]]:
140
- """验证数据类型"""
141
- try:
142
- if data_type == DataType.NUMERIC:
143
- if not isinstance(value, (int, float, np.number)):
144
- return False, f"期望数值类型,实际类型: {type(value).__name__}"
145
-
146
- elif data_type == DataType.INTEGER:
147
- if not isinstance(value, (int, np.integer)):
148
- return False, f"期望整数类型,实际类型: {type(value).__name__}"
149
-
150
- elif data_type == DataType.FLOAT:
151
- if not isinstance(value, (float, np.floating)):
152
- return False, f"期望浮点数类型,实际类型: {type(value).__name__}"
153
-
154
- elif data_type == DataType.STRING:
155
- if not isinstance(value, str):
156
- return False, f"期望字符串类型,实际类型: {type(value).__name__}"
157
-
158
- elif data_type == DataType.BOOLEAN:
159
- if not isinstance(value, bool):
160
- return False, f"期望布尔类型,实际类型: {type(value).__name__}"
161
-
162
- elif data_type == DataType.LIST:
163
- if not isinstance(value, (list, tuple, np.ndarray)):
164
- return False, f"期望列表类型,实际类型: {type(value).__name__}"
165
-
166
- elif data_type == DataType.DICT:
167
- if not isinstance(value, dict):
168
- return False, f"期望字典类型,实际类型: {type(value).__name__}"
169
-
170
- elif data_type == DataType.DATAFRAME:
171
- if not isinstance(value, pd.DataFrame):
172
- return False, f"期望DataFrame类型,实际类型: {type(value).__name__}"
173
-
174
- elif data_type == DataType.SERIES:
175
- if not isinstance(value, pd.Series):
176
- return False, f"期望Series类型,实际类型: {type(value).__name__}"
177
-
178
- return True, None
179
-
180
- except Exception as e:
181
- return False, f"数据类型验证失败: {str(e)}"
182
-
183
- def _validate_numeric_range(self, value: Any, min_val: Optional[float], max_val: Optional[float]) -> Tuple[bool, Optional[str]]:
184
- """验证数值范围"""
185
- try:
186
- numeric_value = float(value)
187
-
188
- if min_val is not None and numeric_value < min_val:
189
- return False, f"数值 {numeric_value} 小于最小值 {min_val}"
190
-
191
- if max_val is not None and numeric_value > max_val:
192
- return False, f"数值 {numeric_value} 大于最大值 {max_val}"
193
-
194
- return True, None
195
-
196
- except (ValueError, TypeError):
197
- return False, "无法转换为数值进行范围验证"
198
-
199
- def _validate_length(self, value: Any, min_len: Optional[int], max_len: Optional[int]) -> Tuple[bool, Optional[str]]:
200
- """验证长度范围"""
201
- try:
202
- length = len(value)
203
-
204
- if min_len is not None and length < min_len:
205
- return False, f"长度 {length} 小于最小长度 {min_len}"
206
-
207
- if max_len is not None and length > max_len:
208
- return False, f"长度 {length} 大于最大长度 {max_len}"
209
-
210
- return True, None
211
-
212
- except TypeError:
213
- return False, "无法获取长度信息"
214
-
215
- def _validate_pattern(self, value: Any, pattern: str) -> Tuple[bool, Optional[str]]:
216
- """验证正则表达式模式"""
217
- try:
218
- if not isinstance(value, str):
219
- return False, "模式验证仅适用于字符串类型"
220
-
221
- if not re.match(pattern, value):
222
- return False, f"字符串 '{value}' 不匹配模式 '{pattern}'"
223
-
224
- return True, None
225
-
226
- except re.error as e:
227
- return False, f"正则表达式模式错误: {str(e)}"
228
-
229
- def _validate_allowed_values(self, value: Any, allowed_values: List[Any]) -> Tuple[bool, Optional[str]]:
230
- """验证允许的值"""
231
- if value not in allowed_values:
232
- return False, f"值 '{value}' 不在允许的值列表中: {allowed_values}"
233
-
234
- return True, None
235
-
236
- def validate_all(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
237
- """
238
- 验证所有参数
239
-
240
- Args:
241
- parameters: 参数字典
242
-
243
- Returns:
244
- Dict[str, Any]: 验证后的参数(包含类型转换)
245
-
246
- Raises:
247
- ValidationError: 验证失败时抛出
248
- """
249
- validated_params = {}
250
- errors = []
251
-
252
- for field_name, rule in self._validation_rules.items():
253
- value = parameters.get(field_name)
254
-
255
- # 验证参数
256
- is_valid, error_msg = self.validate_parameter(field_name, value)
257
-
258
- if not is_valid:
259
- if self.strict_mode:
260
- raise ValidationError(error_msg, field_name, value)
261
- else:
262
- errors.append(f"{field_name}: {error_msg}")
263
- continue
264
-
265
- # 类型转换
266
- if value is not None and rule.data_type:
267
- try:
268
- converted_value = self._convert_type(value, rule.data_type)
269
- validated_params[field_name] = converted_value
270
- except Exception as e:
271
- error_msg = f"参数 '{field_name}' 类型转换失败: {str(e)}"
272
- if self.strict_mode:
273
- raise ValidationError(error_msg, field_name, value)
274
- else:
275
- errors.append(error_msg)
276
- else:
277
- validated_params[field_name] = value
278
-
279
- if errors and not self.strict_mode:
280
- warnings.warn(f"参数验证警告: {', '.join(errors)}")
281
-
282
- return validated_params
283
-
284
- def _convert_type(self, value: Any, data_type: DataType) -> Any:
285
- """类型转换"""
286
- if data_type == DataType.INTEGER:
287
- return int(value)
288
- elif data_type == DataType.FLOAT:
289
- return float(value)
290
- elif data_type == DataType.STRING:
291
- return str(value)
292
- elif data_type == DataType.BOOLEAN:
293
- return bool(value)
294
- elif data_type == DataType.LIST:
295
- if isinstance(value, (tuple, np.ndarray)):
296
- return list(value)
297
- return value
298
- else:
299
- return value
300
-
301
-
302
- # 预定义的验证器实例
303
- class EconometricValidator:
304
- """计量经济学专用验证器"""
305
-
306
- @staticmethod
307
- def create_data_validator() -> ParameterValidator:
308
- """创建数据验证器"""
309
- validator = ParameterValidator()
310
-
311
- # 数据验证规则
312
- validator.add_rule("data", ValidationRule(
313
- required=True,
314
- data_type=DataType.DICT,
315
- min_length=1,
316
- custom_validator=lambda x: all(isinstance(v, (list, np.ndarray)) for v in x.values())
317
- ))
318
-
319
- validator.add_rule("y_data", ValidationRule(
320
- required=True,
321
- data_type=DataType.LIST,
322
- min_length=10,
323
- custom_validator=lambda x: all(isinstance(v, (int, float)) for v in x)
324
- ))
325
-
326
- validator.add_rule("x_data", ValidationRule(
327
- required=True,
328
- data_type=DataType.LIST,
329
- min_length=1,
330
- custom_validator=lambda x: all(isinstance(row, (list, np.ndarray)) and len(row) > 0 for row in x)
331
- ))
332
-
333
- return validator
334
-
335
- @staticmethod
336
- def create_model_validator() -> ParameterValidator:
337
- """创建模型参数验证器"""
338
- validator = ParameterValidator()
339
-
340
- # 模型参数验证规则
341
- validator.add_rule("n_estimators", ValidationRule(
342
- required=False,
343
- data_type=DataType.INTEGER,
344
- min_value=1,
345
- max_value=10000
346
- ))
347
-
348
- validator.add_rule("max_depth", ValidationRule(
349
- required=False,
350
- data_type=DataType.INTEGER,
351
- min_value=1,
352
- max_value=100
353
- ))
354
-
355
- validator.add_rule("learning_rate", ValidationRule(
356
- required=False,
357
- data_type=DataType.FLOAT,
358
- min_value=0.001,
359
- max_value=1.0
360
- ))
361
-
362
- validator.add_rule("alpha", ValidationRule(
363
- required=False,
364
- data_type=DataType.FLOAT,
365
- min_value=0.0,
366
- max_value=100.0
367
- ))
368
-
369
- return validator
370
-
371
-
372
- # 便捷验证函数
373
- def validate_econometric_data(data: Dict[str, Any]) -> Dict[str, Any]:
374
- """
375
- 验证计量经济学数据
376
-
377
- Args:
378
- data: 输入数据,格式为 {变量名: [数值列表]}
379
-
380
- Returns:
381
- Dict[str, Any]: 验证后的数据
382
- """
383
- # 创建专门的数据验证器
384
- validator = ParameterValidator()
385
-
386
- # 添加数据验证规则
387
- validator.add_rule("data", ValidationRule(
388
- required=True,
389
- data_type=DataType.DICT,
390
- min_length=1,
391
- custom_validator=lambda x: all(isinstance(v, (list, np.ndarray)) for v in x.values())
392
- ))
393
-
394
- # 验证数据格式
395
- validated = validator.validate_all({"data": data})
396
- return validated["data"]
397
-
398
-
399
- def validate_model_parameters(parameters: Dict[str, Any]) -> Dict[str, Any]:
400
- """
401
- 验证模型参数
402
-
403
- Args:
404
- parameters: 模型参数
405
-
406
- Returns:
407
- Dict[str, Any]: 验证后的参数
408
- """
409
- validator = EconometricValidator.create_model_validator()
410
- return validator.validate_all(parameters)
411
-
412
-
413
- def validate_time_series_data(data: List[float], min_length: int = 10) -> List[float]:
414
- """
415
- 验证时间序列数据
416
-
417
- Args:
418
- data: 时间序列数据
419
- min_length: 最小长度要求
420
-
421
- Returns:
422
- List[float]: 验证后的数据
423
-
424
- Raises:
425
- ValidationError: 验证失败
426
- """
427
- if not isinstance(data, (list, np.ndarray)):
428
- raise ValidationError("时间序列数据必须是列表或数组")
429
-
430
- if len(data) < min_length:
431
- raise ValidationError(f"时间序列数据长度不足,至少需要 {min_length} 个观测点")
432
-
433
- if not all(isinstance(x, (int, float)) for x in data):
434
- raise ValidationError("时间序列数据必须全部为数值")
435
-
436
- # 检查缺失值
437
- if any(pd.isna(x) for x in data):
438
- raise ValidationError("时间序列数据包含缺失值")
439
-
440
- return list(data)
441
-
442
- def validate_numeric_data(data: List[float], data_name: str = "数据") -> bool:
443
- """
444
- 验证数值数据
445
-
446
- Args:
447
- data: 数值数据列表
448
- data_name: 数据名称,用于错误信息
449
-
450
- Returns:
451
- bool: 验证是否通过
452
-
453
- Raises:
454
- ValidationError: 验证失败时抛出
455
- """
456
- if not isinstance(data, (list, np.ndarray)):
457
- raise ValidationError(f"{data_name} 必须是列表或数组")
458
-
459
- if len(data) == 0:
460
- raise ValidationError(f"{data_name} 不能为空")
461
-
462
- if not all(isinstance(x, (int, float)) for x in data):
463
- raise ValidationError(f"{data_name} 必须全部为数值")
464
-
465
- # 检查缺失值
466
- if any(pd.isna(x) for x in data):
467
- raise ValidationError(f"{data_name} 包含缺失值")
468
-
469
- return True
470
-
471
- # 导出主要类和函数
472
- __all__ = [
473
- "ValidationError",
474
- "DataType",
475
- "ValidationRule",
476
- "ParameterValidator",
477
- "EconometricValidator",
478
- "validate_econometric_data",
479
- "validate_model_parameters",
480
- "validate_time_series_data",
481
- "validate_numeric_data"
482
- ]