aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. PKG-INFO +344 -322
  2. README.md +335 -320
  3. __init__.py +1 -1
  4. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  6. cli.py +4 -0
  7. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  8. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  9. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  10. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  11. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  18. econometrics/causal_inference/__init__.py +66 -0
  19. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  20. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  21. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  22. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  23. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  24. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  25. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  26. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  27. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  28. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  29. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  30. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  31. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  32. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  33. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  34. econometrics/distribution_analysis/__init__.py +28 -0
  35. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  36. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  37. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  38. econometrics/missing_data/__init__.py +18 -0
  39. econometrics/missing_data/imputation_methods.py +219 -0
  40. econometrics/nonparametric/__init__.py +35 -0
  41. econometrics/nonparametric/gam_model.py +117 -0
  42. econometrics/nonparametric/kernel_regression.py +161 -0
  43. econometrics/nonparametric/quantile_regression.py +249 -0
  44. econometrics/nonparametric/spline_regression.py +100 -0
  45. econometrics/spatial_econometrics/__init__.py +68 -0
  46. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  47. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  48. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  49. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  50. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  51. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  52. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  53. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  54. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  55. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  56. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  57. econometrics/statistical_inference/__init__.py +21 -0
  58. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  59. econometrics/statistical_inference/permutation_test.py +177 -0
  60. econometrics/survival_analysis/__init__.py +18 -0
  61. econometrics/survival_analysis/survival_models.py +259 -0
  62. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  63. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  64. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  65. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  66. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  67. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  68. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  69. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  70. pyproject.toml +9 -2
  71. server.py +15 -1
  72. tools/__init__.py +75 -1
  73. tools/causal_inference_adapter.py +658 -0
  74. tools/distribution_analysis_adapter.py +121 -0
  75. tools/gwr_simple_adapter.py +54 -0
  76. tools/machine_learning_adapter.py +567 -0
  77. tools/mcp_tool_groups/__init__.py +15 -1
  78. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  79. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  80. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  81. tools/mcp_tool_groups/microecon_tools.py +325 -0
  82. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  83. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  84. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  85. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  86. tools/mcp_tools_registry.py +13 -3
  87. tools/microecon_adapter.py +412 -0
  88. tools/missing_data_adapter.py +73 -0
  89. tools/nonparametric_adapter.py +190 -0
  90. tools/spatial_econometrics_adapter.py +318 -0
  91. tools/statistical_inference_adapter.py +90 -0
  92. tools/survival_analysis_adapter.py +46 -0
  93. aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
  94. aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
  95. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
  96. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
  97. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,567 @@
1
+ """
2
+ Machine Learning Adapter for Econometrics MCP Tools
3
+ Provides unified interfaces for 8 machine learning models
4
+ """
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from typing import Union, Optional, Dict, Any, List
9
+ import json
10
+ import logging
11
+
12
+ # Import econometrics machine learning modules
13
+ from econometrics.advanced_methods.modern_computing_machine_learning import (
14
+ random_forest_analysis,
15
+ gradient_boosting_analysis,
16
+ svm_analysis,
17
+ neural_network_analysis,
18
+ kmeans_analysis,
19
+ hierarchical_clustering_analysis,
20
+ double_ml_analysis,
21
+ causal_forest_analysis
22
+ )
23
+
24
+ from tools.data_loader import DataLoader
25
+ from tools.output_formatter import OutputFormatter
26
+
27
+ # Set up logging
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ def convert_to_serializable(obj: Any) -> Any:
32
+ """
33
+ 递归转换numpy数组和其他不可序列化对象为JSON可序列化格式
34
+ """
35
+ if isinstance(obj, np.ndarray):
36
+ return obj.tolist()
37
+ elif isinstance(obj, dict):
38
+ return {key: convert_to_serializable(value) for key, value in obj.items()}
39
+ elif isinstance(obj, (list, tuple)):
40
+ return [convert_to_serializable(item) for item in obj]
41
+ elif isinstance(obj, (np.integer, np.floating)):
42
+ return float(obj)
43
+ else:
44
+ return obj
45
+
46
+
47
+ def format_output(results: Dict[str, Any], output_format: str = 'json', save_path: Optional[str] = None) -> str:
48
+ """
49
+ 统一的输出格式化函数
50
+ 参考OLS适配器的实现方式
51
+
52
+ Args:
53
+ results: 结果字典
54
+ output_format: 输出格式 ('json', 'markdown', 'text')
55
+ save_path: 保存路径(可选)
56
+
57
+ Returns:
58
+ 格式化后的字符串结果
59
+ """
60
+ # 转换所有numpy数组为可序列化格式
61
+ serializable_results = convert_to_serializable(results)
62
+
63
+ if output_format == 'json':
64
+ json_result = json.dumps(serializable_results, ensure_ascii=False, indent=2)
65
+ if save_path:
66
+ OutputFormatter.save_to_file(json_result, save_path)
67
+ return f"分析完成!结果已保存到: {save_path}\n\n{json_result}"
68
+ return json_result
69
+ else:
70
+ # 对于非JSON格式,直接返回JSON(机器学习结果暂不支持Markdown格式化)
71
+ json_result = json.dumps(serializable_results, ensure_ascii=False, indent=2)
72
+ if save_path:
73
+ OutputFormatter.save_to_file(json_result, save_path)
74
+ return f"分析完成!结果已保存到: {save_path}\n\n{json_result}"
75
+ return json_result
76
+
77
+
78
+ def random_forest_adapter(
79
+ X_data: Optional[Union[List[float], List[List[float]]]] = None,
80
+ y_data: Optional[List[float]] = None,
81
+ file_path: Optional[str] = None,
82
+ feature_names: Optional[List[str]] = None,
83
+ problem_type: str = 'regression',
84
+ test_size: float = 0.2,
85
+ n_estimators: int = 100,
86
+ max_depth: Optional[int] = None,
87
+ random_state: int = 42,
88
+ output_format: str = 'json',
89
+ save_path: Optional[str] = None
90
+ ) -> str:
91
+ """Random Forest analysis adapter"""
92
+ try:
93
+ if file_path:
94
+ data = DataLoader.load_from_file(file_path)
95
+ if isinstance(data, dict):
96
+ X_data = data.get('X', data.get('features'))
97
+ y_data = data.get('y', data.get('target'))
98
+ if feature_names is None:
99
+ feature_names = data.get('feature_names')
100
+
101
+ if X_data is None or y_data is None:
102
+ raise ValueError("X_data and y_data must be provided or loaded from file")
103
+
104
+ X = np.array(X_data)
105
+ y = np.array(y_data)
106
+
107
+ if X.ndim == 1:
108
+ X = X.reshape(-1, 1)
109
+
110
+ results = random_forest_analysis(
111
+ X=X, y=y, problem_type=problem_type, test_size=test_size,
112
+ n_estimators=n_estimators, max_depth=max_depth, random_state=random_state
113
+ )
114
+
115
+ formatted_results = {
116
+ 'model_type': 'random_forest',
117
+ 'problem_type': problem_type,
118
+ 'train_results': results['train_results'],
119
+ 'test_results': results['test_results'],
120
+ 'feature_importances': results['feature_importances'].tolist(),
121
+ 'feature_names': feature_names,
122
+ 'model_parameters': {
123
+ 'n_estimators': n_estimators,
124
+ 'max_depth': max_depth,
125
+ 'test_size': test_size,
126
+ 'random_state': random_state
127
+ }
128
+ }
129
+
130
+ return format_output(formatted_results, output_format, save_path)
131
+
132
+ except Exception as e:
133
+ logger.error(f"Random Forest failed: {str(e)}")
134
+ return format_output({'error': str(e)}, output_format)
135
+
136
+
137
+ def gradient_boosting_adapter(
138
+ X_data: Optional[Union[List[float], List[List[float]]]] = None,
139
+ y_data: Optional[List[float]] = None,
140
+ file_path: Optional[str] = None,
141
+ feature_names: Optional[List[str]] = None,
142
+ algorithm: str = 'sklearn',
143
+ problem_type: str = 'regression',
144
+ test_size: float = 0.2,
145
+ n_estimators: int = 100,
146
+ learning_rate: float = 0.1,
147
+ max_depth: int = 3,
148
+ random_state: int = 42,
149
+ output_format: str = 'json',
150
+ save_path: Optional[str] = None
151
+ ) -> str:
152
+ """Gradient Boosting analysis adapter"""
153
+ try:
154
+ if file_path:
155
+ data = DataLoader.load_from_file(file_path)
156
+ if isinstance(data, dict):
157
+ X_data = data.get('X', data.get('features'))
158
+ y_data = data.get('y', data.get('target'))
159
+ if feature_names is None:
160
+ feature_names = data.get('feature_names')
161
+
162
+ if X_data is None or y_data is None:
163
+ raise ValueError("X_data and y_data must be provided")
164
+
165
+ X = np.array(X_data)
166
+ y = np.array(y_data)
167
+
168
+ if X.ndim == 1:
169
+ X = X.reshape(-1, 1)
170
+
171
+ results = gradient_boosting_analysis(
172
+ X=X, y=y, algorithm=algorithm, problem_type=problem_type,
173
+ test_size=test_size, n_estimators=n_estimators,
174
+ learning_rate=learning_rate, max_depth=max_depth, random_state=random_state
175
+ )
176
+
177
+ formatted_results = {
178
+ 'model_type': 'gradient_boosting',
179
+ 'algorithm': algorithm,
180
+ 'problem_type': problem_type,
181
+ 'train_results': results['train_results'],
182
+ 'test_results': results['test_results'],
183
+ 'feature_importances': results['feature_importances'].tolist() if hasattr(results['feature_importances'], 'tolist') else results['feature_importances'],
184
+ 'feature_names': feature_names,
185
+ 'model_parameters': {
186
+ 'n_estimators': n_estimators,
187
+ 'learning_rate': learning_rate,
188
+ 'max_depth': max_depth,
189
+ 'test_size': test_size
190
+ }
191
+ }
192
+
193
+ return format_output(formatted_results, output_format, save_path)
194
+
195
+ except Exception as e:
196
+ logger.error(f"Gradient Boosting failed: {str(e)}")
197
+ return format_output({'error': str(e)}, output_format)
198
+
199
+
200
+ def svm_adapter(
201
+ X_data: Optional[Union[List[float], List[List[float]]]] = None,
202
+ y_data: Optional[List[float]] = None,
203
+ file_path: Optional[str] = None,
204
+ feature_names: Optional[List[str]] = None,
205
+ problem_type: str = 'regression',
206
+ kernel: str = 'rbf',
207
+ test_size: float = 0.2,
208
+ C: float = 1.0,
209
+ gamma: str = 'scale',
210
+ random_state: int = 42,
211
+ output_format: str = 'json',
212
+ save_path: Optional[str] = None
213
+ ) -> str:
214
+ """SVM analysis adapter"""
215
+ try:
216
+ if file_path:
217
+ data = DataLoader.load_from_file(file_path)
218
+ if isinstance(data, dict):
219
+ X_data = data.get('X', data.get('features'))
220
+ y_data = data.get('y', data.get('target'))
221
+ if feature_names is None:
222
+ feature_names = data.get('feature_names')
223
+
224
+ if X_data is None or y_data is None:
225
+ raise ValueError("X_data and y_data must be provided")
226
+
227
+ X = np.array(X_data)
228
+ y = np.array(y_data)
229
+
230
+ if X.ndim == 1:
231
+ X = X.reshape(-1, 1)
232
+
233
+ results = svm_analysis(
234
+ X=X, y=y, problem_type=problem_type, kernel=kernel,
235
+ test_size=test_size, C=C, gamma=gamma, random_state=random_state
236
+ )
237
+
238
+ formatted_results = {
239
+ 'model_type': 'svm',
240
+ 'problem_type': problem_type,
241
+ 'kernel': kernel,
242
+ 'train_results': results['train_results'],
243
+ 'test_results': results['test_results'],
244
+ 'feature_names': feature_names,
245
+ 'model_parameters': {
246
+ 'C': C,
247
+ 'gamma': gamma,
248
+ 'test_size': test_size
249
+ }
250
+ }
251
+
252
+ if problem_type == 'classification':
253
+ formatted_results['train_proba_shape'] = results['train_proba'].shape if results['train_proba'] is not None else None
254
+ formatted_results['test_proba_shape'] = results['test_proba'].shape if results['test_proba'] is not None else None
255
+
256
+ return format_output(formatted_results, output_format, save_path)
257
+
258
+ except Exception as e:
259
+ logger.error(f"SVM failed: {str(e)}")
260
+ return format_output({'error': str(e)}, output_format)
261
+
262
+
263
+ def neural_network_adapter(
264
+ X_data: Optional[Union[List[float], List[List[float]]]] = None,
265
+ y_data: Optional[List[float]] = None,
266
+ file_path: Optional[str] = None,
267
+ feature_names: Optional[List[str]] = None,
268
+ problem_type: str = 'regression',
269
+ hidden_layer_sizes: tuple = (100,),
270
+ activation: str = 'relu',
271
+ solver: str = 'adam',
272
+ test_size: float = 0.2,
273
+ alpha: float = 0.0001,
274
+ learning_rate: str = 'constant',
275
+ learning_rate_init: float = 0.001,
276
+ max_iter: int = 200,
277
+ random_state: int = 42,
278
+ output_format: str = 'json',
279
+ save_path: Optional[str] = None
280
+ ) -> str:
281
+ """Neural Network analysis adapter"""
282
+ try:
283
+ if file_path:
284
+ data = DataLoader.load_from_file(file_path)
285
+ if isinstance(data, dict):
286
+ X_data = data.get('X', data.get('features'))
287
+ y_data = data.get('y', data.get('target'))
288
+ if feature_names is None:
289
+ feature_names = data.get('feature_names')
290
+
291
+ if X_data is None or y_data is None:
292
+ raise ValueError("X_data and y_data must be provided")
293
+
294
+ X = np.array(X_data)
295
+ y = np.array(y_data)
296
+
297
+ if X.ndim == 1:
298
+ X = X.reshape(-1, 1)
299
+
300
+ results = neural_network_analysis(
301
+ X=X, y=y, problem_type=problem_type, hidden_layer_sizes=hidden_layer_sizes,
302
+ activation=activation, solver=solver, test_size=test_size, alpha=alpha,
303
+ learning_rate=learning_rate, learning_rate_init=learning_rate_init,
304
+ max_iter=max_iter, random_state=random_state
305
+ )
306
+
307
+ formatted_results = {
308
+ 'model_type': 'neural_network',
309
+ 'problem_type': problem_type,
310
+ 'train_results': results['train_results'],
311
+ 'test_results': results['test_results'],
312
+ 'feature_names': feature_names,
313
+ 'model_parameters': {
314
+ 'hidden_layer_sizes': hidden_layer_sizes,
315
+ 'activation': activation,
316
+ 'solver': solver,
317
+ 'alpha': alpha,
318
+ 'learning_rate': learning_rate,
319
+ 'learning_rate_init': learning_rate_init,
320
+ 'max_iter': max_iter
321
+ }
322
+ }
323
+
324
+ if problem_type == 'classification':
325
+ formatted_results['train_proba_shape'] = results['train_proba'].shape if results['train_proba'] is not None else None
326
+ formatted_results['test_proba_shape'] = results['test_proba'].shape if results['test_proba'] is not None else None
327
+
328
+ return format_output(formatted_results, output_format, save_path)
329
+
330
+ except Exception as e:
331
+ logger.error(f"Neural Network failed: {str(e)}")
332
+ return format_output({'error': str(e)}, output_format)
333
+
334
+
335
+ def kmeans_clustering_adapter(
336
+ X_data: Optional[Union[List[float], List[List[float]]]] = None,
337
+ file_path: Optional[str] = None,
338
+ feature_names: Optional[List[str]] = None,
339
+ n_clusters: int = 8,
340
+ init: str = 'k-means++',
341
+ n_init: int = 10,
342
+ max_iter: int = 300,
343
+ random_state: int = 42,
344
+ algorithm: str = 'lloyd',
345
+ use_minibatch: bool = False,
346
+ batch_size: int = 1000,
347
+ output_format: str = 'json',
348
+ save_path: Optional[str] = None
349
+ ) -> str:
350
+ """K-Means Clustering analysis adapter"""
351
+ try:
352
+ if file_path:
353
+ data = DataLoader.load_from_file(file_path)
354
+ if isinstance(data, dict):
355
+ X_data = data.get('X', data.get('features'))
356
+ if feature_names is None:
357
+ feature_names = data.get('feature_names')
358
+
359
+ if X_data is None:
360
+ raise ValueError("X_data must be provided")
361
+
362
+ X = np.array(X_data)
363
+
364
+ if X.ndim == 1:
365
+ X = X.reshape(-1, 1)
366
+
367
+ results = kmeans_analysis(
368
+ X=X, n_clusters=n_clusters, init=init, n_init=n_init,
369
+ max_iter=max_iter, random_state=random_state, algorithm=algorithm,
370
+ use_minibatch=use_minibatch, batch_size=batch_size
371
+ )
372
+
373
+ formatted_results = {
374
+ 'model_type': 'kmeans_clustering',
375
+ 'labels': results['labels'].tolist(),
376
+ 'cluster_centers': results['cluster_centers'].tolist(),
377
+ 'metrics': results['metrics'],
378
+ 'feature_names': feature_names,
379
+ 'model_parameters': {
380
+ 'n_clusters': n_clusters,
381
+ 'init': init,
382
+ 'n_init': n_init,
383
+ 'max_iter': max_iter,
384
+ 'algorithm': algorithm,
385
+ 'use_minibatch': use_minibatch
386
+ }
387
+ }
388
+
389
+ return format_output(formatted_results, output_format, save_path)
390
+
391
+ except Exception as e:
392
+ logger.error(f"K-Means Clustering failed: {str(e)}")
393
+ return format_output({'error': str(e)}, output_format)
394
+
395
+
396
+ def hierarchical_clustering_adapter(
397
+ X_data: Optional[Union[List[float], List[List[float]]]] = None,
398
+ file_path: Optional[str] = None,
399
+ feature_names: Optional[List[str]] = None,
400
+ n_clusters: int = 2,
401
+ linkage: str = 'ward',
402
+ metric: str = 'euclidean',
403
+ output_format: str = 'json',
404
+ save_path: Optional[str] = None
405
+ ) -> str:
406
+ """Hierarchical Clustering analysis adapter"""
407
+ try:
408
+ if file_path:
409
+ data = DataLoader.load_from_file(file_path)
410
+ if isinstance(data, dict):
411
+ X_data = data.get('X', data.get('features'))
412
+ if feature_names is None:
413
+ feature_names = data.get('feature_names')
414
+
415
+ if X_data is None:
416
+ raise ValueError("X_data must be provided")
417
+
418
+ X = np.array(X_data)
419
+
420
+ if X.ndim == 1:
421
+ X = X.reshape(-1, 1)
422
+
423
+ results = hierarchical_clustering_analysis(
424
+ X=X, n_clusters=n_clusters, linkage=linkage, metric=metric
425
+ )
426
+
427
+ formatted_results = {
428
+ 'model_type': 'hierarchical_clustering',
429
+ 'labels': results['labels'].tolist(),
430
+ 'metrics': results['metrics'],
431
+ 'feature_names': feature_names,
432
+ 'model_parameters': {
433
+ 'n_clusters': n_clusters,
434
+ 'linkage': linkage,
435
+ 'metric': metric
436
+ }
437
+ }
438
+
439
+ return format_output(formatted_results, output_format, save_path)
440
+
441
+ except Exception as e:
442
+ logger.error(f"Hierarchical Clustering failed: {str(e)}")
443
+ return format_output({'error': str(e)}, output_format)
444
+
445
+
446
+ def double_ml_adapter(
447
+ X_data: Optional[Union[List[float], List[List[float]]]] = None,
448
+ y_data: Optional[List[float]] = None,
449
+ d_data: Optional[List[float]] = None,
450
+ file_path: Optional[str] = None,
451
+ feature_names: Optional[List[str]] = None,
452
+ treatment_type: str = 'continuous',
453
+ n_folds: int = 5,
454
+ random_state: int = 42,
455
+ output_format: str = 'json',
456
+ save_path: Optional[str] = None
457
+ ) -> str:
458
+ """Double Machine Learning analysis adapter"""
459
+ try:
460
+ if file_path:
461
+ data = DataLoader.load_from_file(file_path)
462
+ if isinstance(data, dict):
463
+ X_data = data.get('X', data.get('features'))
464
+ y_data = data.get('y', data.get('outcome'))
465
+ d_data = data.get('d', data.get('treatment'))
466
+ if feature_names is None:
467
+ feature_names = data.get('feature_names')
468
+
469
+ if X_data is None or y_data is None or d_data is None:
470
+ raise ValueError("X_data, y_data, and d_data must be provided")
471
+
472
+ X = np.array(X_data)
473
+ y = np.array(y_data)
474
+ d = np.array(d_data)
475
+
476
+ if X.ndim == 1:
477
+ X = X.reshape(-1, 1)
478
+
479
+ results = double_ml_analysis(
480
+ X=X, y=y, d=d, treatment_type=treatment_type,
481
+ n_folds=n_folds, random_state=random_state
482
+ )
483
+
484
+ formatted_results = {
485
+ 'model_type': 'double_ml',
486
+ 'treatment_type': treatment_type,
487
+ 'effect': float(results['effect']),
488
+ 'se': float(results['se']),
489
+ 'ci': results['ci'],
490
+ 'pval': float(results['pval']),
491
+ 'feature_names': feature_names,
492
+ 'model_parameters': {
493
+ 'treatment_type': treatment_type,
494
+ 'n_folds': n_folds,
495
+ 'random_state': random_state
496
+ }
497
+ }
498
+
499
+ return format_output(formatted_results, output_format, save_path)
500
+
501
+ except Exception as e:
502
+ logger.error(f"Double ML failed: {str(e)}")
503
+ return format_output({'error': str(e)}, output_format)
504
+
505
+
506
+ def causal_forest_adapter(
507
+ X_data: Optional[Union[List[float], List[List[float]]]] = None,
508
+ y_data: Optional[List[float]] = None,
509
+ w_data: Optional[List[float]] = None,
510
+ file_path: Optional[str] = None,
511
+ feature_names: Optional[List[str]] = None,
512
+ n_estimators: int = 100,
513
+ min_samples_leaf: int = 5,
514
+ max_depth: Optional[int] = None,
515
+ random_state: int = 42,
516
+ honest: bool = True,
517
+ output_format: str = 'json',
518
+ save_path: Optional[str] = None
519
+ ) -> str:
520
+ """Causal Forest analysis adapter"""
521
+ try:
522
+ if file_path:
523
+ data = DataLoader.load_from_file(file_path)
524
+ if isinstance(data, dict):
525
+ X_data = data.get('X', data.get('features'))
526
+ y_data = data.get('y', data.get('outcome'))
527
+ w_data = data.get('w', data.get('treatment'))
528
+ if feature_names is None:
529
+ feature_names = data.get('feature_names')
530
+
531
+ if X_data is None or y_data is None or w_data is None:
532
+ raise ValueError("X_data, y_data, and w_data must be provided")
533
+
534
+ X = np.array(X_data)
535
+ y = np.array(y_data)
536
+ w = np.array(w_data)
537
+
538
+ if X.ndim == 1:
539
+ X = X.reshape(-1, 1)
540
+
541
+ results = causal_forest_analysis(
542
+ X=X, y=y, w=w, n_estimators=n_estimators,
543
+ min_samples_leaf=min_samples_leaf, max_depth=max_depth,
544
+ random_state=random_state, honest=honest
545
+ )
546
+
547
+ te_results = results['treatment_effects']
548
+ formatted_results = {
549
+ 'model_type': 'causal_forest',
550
+ 'cate': te_results['cate'].tolist(),
551
+ 'ate': float(te_results['ate']),
552
+ 'cate_se': float(te_results['cate_se']),
553
+ 'feature_names': feature_names,
554
+ 'model_parameters': {
555
+ 'n_estimators': n_estimators,
556
+ 'min_samples_leaf': min_samples_leaf,
557
+ 'max_depth': max_depth,
558
+ 'honest': honest,
559
+ 'random_state': random_state
560
+ }
561
+ }
562
+
563
+ return format_output(formatted_results, output_format, save_path)
564
+
565
+ except Exception as e:
566
+ logger.error(f"Causal Forest failed: {str(e)}")
567
+ return format_output({'error': str(e)}, output_format)
@@ -1 +1,15 @@
1
- """MCP工具组包"""
1
+ """MCP工具组包"""
2
+
3
+ from .basic_parametric_tools import BasicParametricTools
4
+ from .model_specification_tools import ModelSpecificationTools
5
+ from .time_series_tools import TimeSeriesTools
6
+ from .causal_inference_tools import CausalInferenceTools
7
+ from .machine_learning_tools import MachineLearningTools
8
+
9
+ __all__ = [
10
+ "BasicParametricTools",
11
+ "ModelSpecificationTools",
12
+ "TimeSeriesTools",
13
+ "CausalInferenceTools",
14
+ "MachineLearningTools"
15
+ ]