aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PKG-INFO +344 -322
- README.md +335 -320
- __init__.py +1 -1
- aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
- aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
- cli.py +4 -0
- econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
- econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
- econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
- econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
- econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
- econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
- econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
- econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
- econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
- econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
- econometrics/causal_inference/__init__.py +66 -0
- econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
- econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
- econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
- econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
- econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
- econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
- econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
- econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
- econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
- econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
- econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
- econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
- econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
- econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
- econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
- econometrics/distribution_analysis/__init__.py +28 -0
- econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
- econometrics/distribution_analysis/time_series_decomposition.py +152 -0
- econometrics/distribution_analysis/variance_decomposition.py +179 -0
- econometrics/missing_data/__init__.py +18 -0
- econometrics/missing_data/imputation_methods.py +219 -0
- econometrics/nonparametric/__init__.py +35 -0
- econometrics/nonparametric/gam_model.py +117 -0
- econometrics/nonparametric/kernel_regression.py +161 -0
- econometrics/nonparametric/quantile_regression.py +249 -0
- econometrics/nonparametric/spline_regression.py +100 -0
- econometrics/spatial_econometrics/__init__.py +68 -0
- econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
- econometrics/spatial_econometrics/gwr_simple.py +154 -0
- econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
- econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
- econometrics/spatial_econometrics/spatial_regression.py +315 -0
- econometrics/spatial_econometrics/spatial_weights.py +226 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
- econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
- econometrics/statistical_inference/__init__.py +21 -0
- econometrics/statistical_inference/bootstrap_methods.py +162 -0
- econometrics/statistical_inference/permutation_test.py +177 -0
- econometrics/survival_analysis/__init__.py +18 -0
- econometrics/survival_analysis/survival_models.py +259 -0
- econometrics/tests/causal_inference_tests/__init__.py +3 -0
- econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
- econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
- econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
- econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
- econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
- econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
- econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
- pyproject.toml +9 -2
- server.py +15 -1
- tools/__init__.py +75 -1
- tools/causal_inference_adapter.py +658 -0
- tools/distribution_analysis_adapter.py +121 -0
- tools/gwr_simple_adapter.py +54 -0
- tools/machine_learning_adapter.py +567 -0
- tools/mcp_tool_groups/__init__.py +15 -1
- tools/mcp_tool_groups/causal_inference_tools.py +643 -0
- tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
- tools/mcp_tool_groups/machine_learning_tools.py +422 -0
- tools/mcp_tool_groups/microecon_tools.py +325 -0
- tools/mcp_tool_groups/missing_data_tools.py +117 -0
- tools/mcp_tool_groups/nonparametric_tools.py +225 -0
- tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
- tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
- tools/mcp_tools_registry.py +13 -3
- tools/microecon_adapter.py +412 -0
- tools/missing_data_adapter.py +73 -0
- tools/nonparametric_adapter.py +190 -0
- tools/spatial_econometrics_adapter.py +318 -0
- tools/statistical_inference_adapter.py +90 -0
- tools/survival_analysis_adapter.py +46 -0
- aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
- aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
- {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Machine Learning Adapter for Econometrics MCP Tools
|
|
3
|
+
Provides unified interfaces for 8 machine learning models
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from typing import Union, Optional, Dict, Any, List
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
# Import econometrics machine learning modules
|
|
13
|
+
from econometrics.advanced_methods.modern_computing_machine_learning import (
|
|
14
|
+
random_forest_analysis,
|
|
15
|
+
gradient_boosting_analysis,
|
|
16
|
+
svm_analysis,
|
|
17
|
+
neural_network_analysis,
|
|
18
|
+
kmeans_analysis,
|
|
19
|
+
hierarchical_clustering_analysis,
|
|
20
|
+
double_ml_analysis,
|
|
21
|
+
causal_forest_analysis
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from tools.data_loader import DataLoader
|
|
25
|
+
from tools.output_formatter import OutputFormatter
|
|
26
|
+
|
|
27
|
+
# Set up logging
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def convert_to_serializable(obj: Any) -> Any:
|
|
32
|
+
"""
|
|
33
|
+
递归转换numpy数组和其他不可序列化对象为JSON可序列化格式
|
|
34
|
+
"""
|
|
35
|
+
if isinstance(obj, np.ndarray):
|
|
36
|
+
return obj.tolist()
|
|
37
|
+
elif isinstance(obj, dict):
|
|
38
|
+
return {key: convert_to_serializable(value) for key, value in obj.items()}
|
|
39
|
+
elif isinstance(obj, (list, tuple)):
|
|
40
|
+
return [convert_to_serializable(item) for item in obj]
|
|
41
|
+
elif isinstance(obj, (np.integer, np.floating)):
|
|
42
|
+
return float(obj)
|
|
43
|
+
else:
|
|
44
|
+
return obj
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def format_output(results: Dict[str, Any], output_format: str = 'json', save_path: Optional[str] = None) -> str:
|
|
48
|
+
"""
|
|
49
|
+
统一的输出格式化函数
|
|
50
|
+
参考OLS适配器的实现方式
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
results: 结果字典
|
|
54
|
+
output_format: 输出格式 ('json', 'markdown', 'text')
|
|
55
|
+
save_path: 保存路径(可选)
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
格式化后的字符串结果
|
|
59
|
+
"""
|
|
60
|
+
# 转换所有numpy数组为可序列化格式
|
|
61
|
+
serializable_results = convert_to_serializable(results)
|
|
62
|
+
|
|
63
|
+
if output_format == 'json':
|
|
64
|
+
json_result = json.dumps(serializable_results, ensure_ascii=False, indent=2)
|
|
65
|
+
if save_path:
|
|
66
|
+
OutputFormatter.save_to_file(json_result, save_path)
|
|
67
|
+
return f"分析完成!结果已保存到: {save_path}\n\n{json_result}"
|
|
68
|
+
return json_result
|
|
69
|
+
else:
|
|
70
|
+
# 对于非JSON格式,直接返回JSON(机器学习结果暂不支持Markdown格式化)
|
|
71
|
+
json_result = json.dumps(serializable_results, ensure_ascii=False, indent=2)
|
|
72
|
+
if save_path:
|
|
73
|
+
OutputFormatter.save_to_file(json_result, save_path)
|
|
74
|
+
return f"分析完成!结果已保存到: {save_path}\n\n{json_result}"
|
|
75
|
+
return json_result
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def random_forest_adapter(
|
|
79
|
+
X_data: Optional[Union[List[float], List[List[float]]]] = None,
|
|
80
|
+
y_data: Optional[List[float]] = None,
|
|
81
|
+
file_path: Optional[str] = None,
|
|
82
|
+
feature_names: Optional[List[str]] = None,
|
|
83
|
+
problem_type: str = 'regression',
|
|
84
|
+
test_size: float = 0.2,
|
|
85
|
+
n_estimators: int = 100,
|
|
86
|
+
max_depth: Optional[int] = None,
|
|
87
|
+
random_state: int = 42,
|
|
88
|
+
output_format: str = 'json',
|
|
89
|
+
save_path: Optional[str] = None
|
|
90
|
+
) -> str:
|
|
91
|
+
"""Random Forest analysis adapter"""
|
|
92
|
+
try:
|
|
93
|
+
if file_path:
|
|
94
|
+
data = DataLoader.load_from_file(file_path)
|
|
95
|
+
if isinstance(data, dict):
|
|
96
|
+
X_data = data.get('X', data.get('features'))
|
|
97
|
+
y_data = data.get('y', data.get('target'))
|
|
98
|
+
if feature_names is None:
|
|
99
|
+
feature_names = data.get('feature_names')
|
|
100
|
+
|
|
101
|
+
if X_data is None or y_data is None:
|
|
102
|
+
raise ValueError("X_data and y_data must be provided or loaded from file")
|
|
103
|
+
|
|
104
|
+
X = np.array(X_data)
|
|
105
|
+
y = np.array(y_data)
|
|
106
|
+
|
|
107
|
+
if X.ndim == 1:
|
|
108
|
+
X = X.reshape(-1, 1)
|
|
109
|
+
|
|
110
|
+
results = random_forest_analysis(
|
|
111
|
+
X=X, y=y, problem_type=problem_type, test_size=test_size,
|
|
112
|
+
n_estimators=n_estimators, max_depth=max_depth, random_state=random_state
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
formatted_results = {
|
|
116
|
+
'model_type': 'random_forest',
|
|
117
|
+
'problem_type': problem_type,
|
|
118
|
+
'train_results': results['train_results'],
|
|
119
|
+
'test_results': results['test_results'],
|
|
120
|
+
'feature_importances': results['feature_importances'].tolist(),
|
|
121
|
+
'feature_names': feature_names,
|
|
122
|
+
'model_parameters': {
|
|
123
|
+
'n_estimators': n_estimators,
|
|
124
|
+
'max_depth': max_depth,
|
|
125
|
+
'test_size': test_size,
|
|
126
|
+
'random_state': random_state
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return format_output(formatted_results, output_format, save_path)
|
|
131
|
+
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error(f"Random Forest failed: {str(e)}")
|
|
134
|
+
return format_output({'error': str(e)}, output_format)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def gradient_boosting_adapter(
|
|
138
|
+
X_data: Optional[Union[List[float], List[List[float]]]] = None,
|
|
139
|
+
y_data: Optional[List[float]] = None,
|
|
140
|
+
file_path: Optional[str] = None,
|
|
141
|
+
feature_names: Optional[List[str]] = None,
|
|
142
|
+
algorithm: str = 'sklearn',
|
|
143
|
+
problem_type: str = 'regression',
|
|
144
|
+
test_size: float = 0.2,
|
|
145
|
+
n_estimators: int = 100,
|
|
146
|
+
learning_rate: float = 0.1,
|
|
147
|
+
max_depth: int = 3,
|
|
148
|
+
random_state: int = 42,
|
|
149
|
+
output_format: str = 'json',
|
|
150
|
+
save_path: Optional[str] = None
|
|
151
|
+
) -> str:
|
|
152
|
+
"""Gradient Boosting analysis adapter"""
|
|
153
|
+
try:
|
|
154
|
+
if file_path:
|
|
155
|
+
data = DataLoader.load_from_file(file_path)
|
|
156
|
+
if isinstance(data, dict):
|
|
157
|
+
X_data = data.get('X', data.get('features'))
|
|
158
|
+
y_data = data.get('y', data.get('target'))
|
|
159
|
+
if feature_names is None:
|
|
160
|
+
feature_names = data.get('feature_names')
|
|
161
|
+
|
|
162
|
+
if X_data is None or y_data is None:
|
|
163
|
+
raise ValueError("X_data and y_data must be provided")
|
|
164
|
+
|
|
165
|
+
X = np.array(X_data)
|
|
166
|
+
y = np.array(y_data)
|
|
167
|
+
|
|
168
|
+
if X.ndim == 1:
|
|
169
|
+
X = X.reshape(-1, 1)
|
|
170
|
+
|
|
171
|
+
results = gradient_boosting_analysis(
|
|
172
|
+
X=X, y=y, algorithm=algorithm, problem_type=problem_type,
|
|
173
|
+
test_size=test_size, n_estimators=n_estimators,
|
|
174
|
+
learning_rate=learning_rate, max_depth=max_depth, random_state=random_state
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
formatted_results = {
|
|
178
|
+
'model_type': 'gradient_boosting',
|
|
179
|
+
'algorithm': algorithm,
|
|
180
|
+
'problem_type': problem_type,
|
|
181
|
+
'train_results': results['train_results'],
|
|
182
|
+
'test_results': results['test_results'],
|
|
183
|
+
'feature_importances': results['feature_importances'].tolist() if hasattr(results['feature_importances'], 'tolist') else results['feature_importances'],
|
|
184
|
+
'feature_names': feature_names,
|
|
185
|
+
'model_parameters': {
|
|
186
|
+
'n_estimators': n_estimators,
|
|
187
|
+
'learning_rate': learning_rate,
|
|
188
|
+
'max_depth': max_depth,
|
|
189
|
+
'test_size': test_size
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return format_output(formatted_results, output_format, save_path)
|
|
194
|
+
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.error(f"Gradient Boosting failed: {str(e)}")
|
|
197
|
+
return format_output({'error': str(e)}, output_format)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def svm_adapter(
|
|
201
|
+
X_data: Optional[Union[List[float], List[List[float]]]] = None,
|
|
202
|
+
y_data: Optional[List[float]] = None,
|
|
203
|
+
file_path: Optional[str] = None,
|
|
204
|
+
feature_names: Optional[List[str]] = None,
|
|
205
|
+
problem_type: str = 'regression',
|
|
206
|
+
kernel: str = 'rbf',
|
|
207
|
+
test_size: float = 0.2,
|
|
208
|
+
C: float = 1.0,
|
|
209
|
+
gamma: str = 'scale',
|
|
210
|
+
random_state: int = 42,
|
|
211
|
+
output_format: str = 'json',
|
|
212
|
+
save_path: Optional[str] = None
|
|
213
|
+
) -> str:
|
|
214
|
+
"""SVM analysis adapter"""
|
|
215
|
+
try:
|
|
216
|
+
if file_path:
|
|
217
|
+
data = DataLoader.load_from_file(file_path)
|
|
218
|
+
if isinstance(data, dict):
|
|
219
|
+
X_data = data.get('X', data.get('features'))
|
|
220
|
+
y_data = data.get('y', data.get('target'))
|
|
221
|
+
if feature_names is None:
|
|
222
|
+
feature_names = data.get('feature_names')
|
|
223
|
+
|
|
224
|
+
if X_data is None or y_data is None:
|
|
225
|
+
raise ValueError("X_data and y_data must be provided")
|
|
226
|
+
|
|
227
|
+
X = np.array(X_data)
|
|
228
|
+
y = np.array(y_data)
|
|
229
|
+
|
|
230
|
+
if X.ndim == 1:
|
|
231
|
+
X = X.reshape(-1, 1)
|
|
232
|
+
|
|
233
|
+
results = svm_analysis(
|
|
234
|
+
X=X, y=y, problem_type=problem_type, kernel=kernel,
|
|
235
|
+
test_size=test_size, C=C, gamma=gamma, random_state=random_state
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
formatted_results = {
|
|
239
|
+
'model_type': 'svm',
|
|
240
|
+
'problem_type': problem_type,
|
|
241
|
+
'kernel': kernel,
|
|
242
|
+
'train_results': results['train_results'],
|
|
243
|
+
'test_results': results['test_results'],
|
|
244
|
+
'feature_names': feature_names,
|
|
245
|
+
'model_parameters': {
|
|
246
|
+
'C': C,
|
|
247
|
+
'gamma': gamma,
|
|
248
|
+
'test_size': test_size
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if problem_type == 'classification':
|
|
253
|
+
formatted_results['train_proba_shape'] = results['train_proba'].shape if results['train_proba'] is not None else None
|
|
254
|
+
formatted_results['test_proba_shape'] = results['test_proba'].shape if results['test_proba'] is not None else None
|
|
255
|
+
|
|
256
|
+
return format_output(formatted_results, output_format, save_path)
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
logger.error(f"SVM failed: {str(e)}")
|
|
260
|
+
return format_output({'error': str(e)}, output_format)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def neural_network_adapter(
|
|
264
|
+
X_data: Optional[Union[List[float], List[List[float]]]] = None,
|
|
265
|
+
y_data: Optional[List[float]] = None,
|
|
266
|
+
file_path: Optional[str] = None,
|
|
267
|
+
feature_names: Optional[List[str]] = None,
|
|
268
|
+
problem_type: str = 'regression',
|
|
269
|
+
hidden_layer_sizes: tuple = (100,),
|
|
270
|
+
activation: str = 'relu',
|
|
271
|
+
solver: str = 'adam',
|
|
272
|
+
test_size: float = 0.2,
|
|
273
|
+
alpha: float = 0.0001,
|
|
274
|
+
learning_rate: str = 'constant',
|
|
275
|
+
learning_rate_init: float = 0.001,
|
|
276
|
+
max_iter: int = 200,
|
|
277
|
+
random_state: int = 42,
|
|
278
|
+
output_format: str = 'json',
|
|
279
|
+
save_path: Optional[str] = None
|
|
280
|
+
) -> str:
|
|
281
|
+
"""Neural Network analysis adapter"""
|
|
282
|
+
try:
|
|
283
|
+
if file_path:
|
|
284
|
+
data = DataLoader.load_from_file(file_path)
|
|
285
|
+
if isinstance(data, dict):
|
|
286
|
+
X_data = data.get('X', data.get('features'))
|
|
287
|
+
y_data = data.get('y', data.get('target'))
|
|
288
|
+
if feature_names is None:
|
|
289
|
+
feature_names = data.get('feature_names')
|
|
290
|
+
|
|
291
|
+
if X_data is None or y_data is None:
|
|
292
|
+
raise ValueError("X_data and y_data must be provided")
|
|
293
|
+
|
|
294
|
+
X = np.array(X_data)
|
|
295
|
+
y = np.array(y_data)
|
|
296
|
+
|
|
297
|
+
if X.ndim == 1:
|
|
298
|
+
X = X.reshape(-1, 1)
|
|
299
|
+
|
|
300
|
+
results = neural_network_analysis(
|
|
301
|
+
X=X, y=y, problem_type=problem_type, hidden_layer_sizes=hidden_layer_sizes,
|
|
302
|
+
activation=activation, solver=solver, test_size=test_size, alpha=alpha,
|
|
303
|
+
learning_rate=learning_rate, learning_rate_init=learning_rate_init,
|
|
304
|
+
max_iter=max_iter, random_state=random_state
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
formatted_results = {
|
|
308
|
+
'model_type': 'neural_network',
|
|
309
|
+
'problem_type': problem_type,
|
|
310
|
+
'train_results': results['train_results'],
|
|
311
|
+
'test_results': results['test_results'],
|
|
312
|
+
'feature_names': feature_names,
|
|
313
|
+
'model_parameters': {
|
|
314
|
+
'hidden_layer_sizes': hidden_layer_sizes,
|
|
315
|
+
'activation': activation,
|
|
316
|
+
'solver': solver,
|
|
317
|
+
'alpha': alpha,
|
|
318
|
+
'learning_rate': learning_rate,
|
|
319
|
+
'learning_rate_init': learning_rate_init,
|
|
320
|
+
'max_iter': max_iter
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if problem_type == 'classification':
|
|
325
|
+
formatted_results['train_proba_shape'] = results['train_proba'].shape if results['train_proba'] is not None else None
|
|
326
|
+
formatted_results['test_proba_shape'] = results['test_proba'].shape if results['test_proba'] is not None else None
|
|
327
|
+
|
|
328
|
+
return format_output(formatted_results, output_format, save_path)
|
|
329
|
+
|
|
330
|
+
except Exception as e:
|
|
331
|
+
logger.error(f"Neural Network failed: {str(e)}")
|
|
332
|
+
return format_output({'error': str(e)}, output_format)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def kmeans_clustering_adapter(
|
|
336
|
+
X_data: Optional[Union[List[float], List[List[float]]]] = None,
|
|
337
|
+
file_path: Optional[str] = None,
|
|
338
|
+
feature_names: Optional[List[str]] = None,
|
|
339
|
+
n_clusters: int = 8,
|
|
340
|
+
init: str = 'k-means++',
|
|
341
|
+
n_init: int = 10,
|
|
342
|
+
max_iter: int = 300,
|
|
343
|
+
random_state: int = 42,
|
|
344
|
+
algorithm: str = 'lloyd',
|
|
345
|
+
use_minibatch: bool = False,
|
|
346
|
+
batch_size: int = 1000,
|
|
347
|
+
output_format: str = 'json',
|
|
348
|
+
save_path: Optional[str] = None
|
|
349
|
+
) -> str:
|
|
350
|
+
"""K-Means Clustering analysis adapter"""
|
|
351
|
+
try:
|
|
352
|
+
if file_path:
|
|
353
|
+
data = DataLoader.load_from_file(file_path)
|
|
354
|
+
if isinstance(data, dict):
|
|
355
|
+
X_data = data.get('X', data.get('features'))
|
|
356
|
+
if feature_names is None:
|
|
357
|
+
feature_names = data.get('feature_names')
|
|
358
|
+
|
|
359
|
+
if X_data is None:
|
|
360
|
+
raise ValueError("X_data must be provided")
|
|
361
|
+
|
|
362
|
+
X = np.array(X_data)
|
|
363
|
+
|
|
364
|
+
if X.ndim == 1:
|
|
365
|
+
X = X.reshape(-1, 1)
|
|
366
|
+
|
|
367
|
+
results = kmeans_analysis(
|
|
368
|
+
X=X, n_clusters=n_clusters, init=init, n_init=n_init,
|
|
369
|
+
max_iter=max_iter, random_state=random_state, algorithm=algorithm,
|
|
370
|
+
use_minibatch=use_minibatch, batch_size=batch_size
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
formatted_results = {
|
|
374
|
+
'model_type': 'kmeans_clustering',
|
|
375
|
+
'labels': results['labels'].tolist(),
|
|
376
|
+
'cluster_centers': results['cluster_centers'].tolist(),
|
|
377
|
+
'metrics': results['metrics'],
|
|
378
|
+
'feature_names': feature_names,
|
|
379
|
+
'model_parameters': {
|
|
380
|
+
'n_clusters': n_clusters,
|
|
381
|
+
'init': init,
|
|
382
|
+
'n_init': n_init,
|
|
383
|
+
'max_iter': max_iter,
|
|
384
|
+
'algorithm': algorithm,
|
|
385
|
+
'use_minibatch': use_minibatch
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
return format_output(formatted_results, output_format, save_path)
|
|
390
|
+
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.error(f"K-Means Clustering failed: {str(e)}")
|
|
393
|
+
return format_output({'error': str(e)}, output_format)
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def hierarchical_clustering_adapter(
|
|
397
|
+
X_data: Optional[Union[List[float], List[List[float]]]] = None,
|
|
398
|
+
file_path: Optional[str] = None,
|
|
399
|
+
feature_names: Optional[List[str]] = None,
|
|
400
|
+
n_clusters: int = 2,
|
|
401
|
+
linkage: str = 'ward',
|
|
402
|
+
metric: str = 'euclidean',
|
|
403
|
+
output_format: str = 'json',
|
|
404
|
+
save_path: Optional[str] = None
|
|
405
|
+
) -> str:
|
|
406
|
+
"""Hierarchical Clustering analysis adapter"""
|
|
407
|
+
try:
|
|
408
|
+
if file_path:
|
|
409
|
+
data = DataLoader.load_from_file(file_path)
|
|
410
|
+
if isinstance(data, dict):
|
|
411
|
+
X_data = data.get('X', data.get('features'))
|
|
412
|
+
if feature_names is None:
|
|
413
|
+
feature_names = data.get('feature_names')
|
|
414
|
+
|
|
415
|
+
if X_data is None:
|
|
416
|
+
raise ValueError("X_data must be provided")
|
|
417
|
+
|
|
418
|
+
X = np.array(X_data)
|
|
419
|
+
|
|
420
|
+
if X.ndim == 1:
|
|
421
|
+
X = X.reshape(-1, 1)
|
|
422
|
+
|
|
423
|
+
results = hierarchical_clustering_analysis(
|
|
424
|
+
X=X, n_clusters=n_clusters, linkage=linkage, metric=metric
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
formatted_results = {
|
|
428
|
+
'model_type': 'hierarchical_clustering',
|
|
429
|
+
'labels': results['labels'].tolist(),
|
|
430
|
+
'metrics': results['metrics'],
|
|
431
|
+
'feature_names': feature_names,
|
|
432
|
+
'model_parameters': {
|
|
433
|
+
'n_clusters': n_clusters,
|
|
434
|
+
'linkage': linkage,
|
|
435
|
+
'metric': metric
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
return format_output(formatted_results, output_format, save_path)
|
|
440
|
+
|
|
441
|
+
except Exception as e:
|
|
442
|
+
logger.error(f"Hierarchical Clustering failed: {str(e)}")
|
|
443
|
+
return format_output({'error': str(e)}, output_format)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def double_ml_adapter(
|
|
447
|
+
X_data: Optional[Union[List[float], List[List[float]]]] = None,
|
|
448
|
+
y_data: Optional[List[float]] = None,
|
|
449
|
+
d_data: Optional[List[float]] = None,
|
|
450
|
+
file_path: Optional[str] = None,
|
|
451
|
+
feature_names: Optional[List[str]] = None,
|
|
452
|
+
treatment_type: str = 'continuous',
|
|
453
|
+
n_folds: int = 5,
|
|
454
|
+
random_state: int = 42,
|
|
455
|
+
output_format: str = 'json',
|
|
456
|
+
save_path: Optional[str] = None
|
|
457
|
+
) -> str:
|
|
458
|
+
"""Double Machine Learning analysis adapter"""
|
|
459
|
+
try:
|
|
460
|
+
if file_path:
|
|
461
|
+
data = DataLoader.load_from_file(file_path)
|
|
462
|
+
if isinstance(data, dict):
|
|
463
|
+
X_data = data.get('X', data.get('features'))
|
|
464
|
+
y_data = data.get('y', data.get('outcome'))
|
|
465
|
+
d_data = data.get('d', data.get('treatment'))
|
|
466
|
+
if feature_names is None:
|
|
467
|
+
feature_names = data.get('feature_names')
|
|
468
|
+
|
|
469
|
+
if X_data is None or y_data is None or d_data is None:
|
|
470
|
+
raise ValueError("X_data, y_data, and d_data must be provided")
|
|
471
|
+
|
|
472
|
+
X = np.array(X_data)
|
|
473
|
+
y = np.array(y_data)
|
|
474
|
+
d = np.array(d_data)
|
|
475
|
+
|
|
476
|
+
if X.ndim == 1:
|
|
477
|
+
X = X.reshape(-1, 1)
|
|
478
|
+
|
|
479
|
+
results = double_ml_analysis(
|
|
480
|
+
X=X, y=y, d=d, treatment_type=treatment_type,
|
|
481
|
+
n_folds=n_folds, random_state=random_state
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
formatted_results = {
|
|
485
|
+
'model_type': 'double_ml',
|
|
486
|
+
'treatment_type': treatment_type,
|
|
487
|
+
'effect': float(results['effect']),
|
|
488
|
+
'se': float(results['se']),
|
|
489
|
+
'ci': results['ci'],
|
|
490
|
+
'pval': float(results['pval']),
|
|
491
|
+
'feature_names': feature_names,
|
|
492
|
+
'model_parameters': {
|
|
493
|
+
'treatment_type': treatment_type,
|
|
494
|
+
'n_folds': n_folds,
|
|
495
|
+
'random_state': random_state
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
return format_output(formatted_results, output_format, save_path)
|
|
500
|
+
|
|
501
|
+
except Exception as e:
|
|
502
|
+
logger.error(f"Double ML failed: {str(e)}")
|
|
503
|
+
return format_output({'error': str(e)}, output_format)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def causal_forest_adapter(
|
|
507
|
+
X_data: Optional[Union[List[float], List[List[float]]]] = None,
|
|
508
|
+
y_data: Optional[List[float]] = None,
|
|
509
|
+
w_data: Optional[List[float]] = None,
|
|
510
|
+
file_path: Optional[str] = None,
|
|
511
|
+
feature_names: Optional[List[str]] = None,
|
|
512
|
+
n_estimators: int = 100,
|
|
513
|
+
min_samples_leaf: int = 5,
|
|
514
|
+
max_depth: Optional[int] = None,
|
|
515
|
+
random_state: int = 42,
|
|
516
|
+
honest: bool = True,
|
|
517
|
+
output_format: str = 'json',
|
|
518
|
+
save_path: Optional[str] = None
|
|
519
|
+
) -> str:
|
|
520
|
+
"""Causal Forest analysis adapter"""
|
|
521
|
+
try:
|
|
522
|
+
if file_path:
|
|
523
|
+
data = DataLoader.load_from_file(file_path)
|
|
524
|
+
if isinstance(data, dict):
|
|
525
|
+
X_data = data.get('X', data.get('features'))
|
|
526
|
+
y_data = data.get('y', data.get('outcome'))
|
|
527
|
+
w_data = data.get('w', data.get('treatment'))
|
|
528
|
+
if feature_names is None:
|
|
529
|
+
feature_names = data.get('feature_names')
|
|
530
|
+
|
|
531
|
+
if X_data is None or y_data is None or w_data is None:
|
|
532
|
+
raise ValueError("X_data, y_data, and w_data must be provided")
|
|
533
|
+
|
|
534
|
+
X = np.array(X_data)
|
|
535
|
+
y = np.array(y_data)
|
|
536
|
+
w = np.array(w_data)
|
|
537
|
+
|
|
538
|
+
if X.ndim == 1:
|
|
539
|
+
X = X.reshape(-1, 1)
|
|
540
|
+
|
|
541
|
+
results = causal_forest_analysis(
|
|
542
|
+
X=X, y=y, w=w, n_estimators=n_estimators,
|
|
543
|
+
min_samples_leaf=min_samples_leaf, max_depth=max_depth,
|
|
544
|
+
random_state=random_state, honest=honest
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
te_results = results['treatment_effects']
|
|
548
|
+
formatted_results = {
|
|
549
|
+
'model_type': 'causal_forest',
|
|
550
|
+
'cate': te_results['cate'].tolist(),
|
|
551
|
+
'ate': float(te_results['ate']),
|
|
552
|
+
'cate_se': float(te_results['cate_se']),
|
|
553
|
+
'feature_names': feature_names,
|
|
554
|
+
'model_parameters': {
|
|
555
|
+
'n_estimators': n_estimators,
|
|
556
|
+
'min_samples_leaf': min_samples_leaf,
|
|
557
|
+
'max_depth': max_depth,
|
|
558
|
+
'honest': honest,
|
|
559
|
+
'random_state': random_state
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
return format_output(formatted_results, output_format, save_path)
|
|
564
|
+
|
|
565
|
+
except Exception as e:
|
|
566
|
+
logger.error(f"Causal Forest failed: {str(e)}")
|
|
567
|
+
return format_output({'error': str(e)}, output_format)
|
|
@@ -1 +1,15 @@
|
|
|
1
|
-
"""MCP工具组包"""
|
|
1
|
+
"""MCP工具组包"""
|
|
2
|
+
|
|
3
|
+
from .basic_parametric_tools import BasicParametricTools
|
|
4
|
+
from .model_specification_tools import ModelSpecificationTools
|
|
5
|
+
from .time_series_tools import TimeSeriesTools
|
|
6
|
+
from .causal_inference_tools import CausalInferenceTools
|
|
7
|
+
from .machine_learning_tools import MachineLearningTools
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"BasicParametricTools",
|
|
11
|
+
"ModelSpecificationTools",
|
|
12
|
+
"TimeSeriesTools",
|
|
13
|
+
"CausalInferenceTools",
|
|
14
|
+
"MachineLearningTools"
|
|
15
|
+
]
|