mcp-automl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_automl/__init__.py ADDED
File without changes
mcp_automl/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from mcp_automl.server import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
mcp_automl/server.py ADDED
@@ -0,0 +1,946 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import uuid
4
+ import os
5
+ import json
6
+ import asyncio
7
+ import duckdb
8
+ import logging
9
+ import argparse
10
+ from pathlib import Path
11
+ from mcp.server.fastmcp import FastMCP, Context
12
+ from mcp.types import PromptMessage, TextContent
13
+ from pycaret.classification import setup as setup_clf, compare_models as compare_models_clf, pull as pull_clf, save_model as save_model_clf, load_model as load_model_clf, predict_model as predict_model_clf, get_config as get_config_clf
14
+ from pycaret.regression import setup as setup_reg, compare_models as compare_models_reg, pull as pull_reg, save_model as save_model_reg, load_model as load_model_reg, predict_model as predict_model_reg, get_config as get_config_reg
15
+
16
+ # Configure logging
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Module-level configuration (set via argparse in main())
24
+ EXPERIMENT_DIR = "~/.mcp-automl/experiments"
25
+ DEFAULT_SESSION_ID = 42
26
+ QUERY_RESULT_LIMIT = 100
27
+ SUPPORTED_FILE_FORMATS = ('.csv', '.parquet', '.json')
28
+
29
+ mcp = FastMCP("mcp-automl")
30
+
31
+ class PandasJSONEncoder(json.JSONEncoder):
32
+ """Custom JSON encoder that handles pandas NA types and numpy types."""
33
+
34
+ def default(self, obj):
35
+ # Handle pandas NA types (pd.NA, pd.NaT)
36
+ if pd.isna(obj):
37
+ return None
38
+ # Handle numpy integer types
39
+ if isinstance(obj, (np.integer, np.int64, np.int32)):
40
+ return int(obj)
41
+ # Handle numpy floating types
42
+ if isinstance(obj, (np.floating, np.float64, np.float32)):
43
+ if np.isnan(obj):
44
+ return None
45
+ return float(obj)
46
+ # Handle numpy boolean
47
+ if isinstance(obj, np.bool_):
48
+ return bool(obj)
49
+ # Handle numpy arrays
50
+ if isinstance(obj, np.ndarray):
51
+ return obj.tolist()
52
+ # Let the base class raise the TypeError
53
+ return super().default(obj)
54
+
55
+ def _load_dataframe_fast(data_path: str, sample_size: int = None,
56
+ sample_method: str = 'reservoir') -> pd.DataFrame:
57
+ """
58
+ Unified data loader using DuckDB with optional sampling.
59
+
60
+ This function provides:
61
+ - Fast I/O using DuckDB for all file formats (CSV, Parquet, JSON)
62
+ - Smart sampling for large files using reservoir sampling
63
+ - Consistent dtype inference by always returning pandas DataFrame
64
+ - Flexible loading: full data for training, sampled data for inspection
65
+
66
+ Args:
67
+ data_path: Absolute path to the data file (CSV, Parquet, or JSON).
68
+ sample_size: If provided, returns a random sample of this size.
69
+ If None, loads the entire dataset.
70
+ sample_method: Sampling method to use (default: 'reservoir').
71
+ Currently only 'reservoir' is supported.
72
+
73
+ Returns:
74
+ pandas DataFrame with data loaded from file.
75
+
76
+ Raises:
77
+ ValueError: If file format is not supported or sample_method is invalid.
78
+ """
79
+ # Validate file format
80
+ if not any(data_path.endswith(ext) for ext in SUPPORTED_FILE_FORMATS):
81
+ supported = ', '.join(SUPPORTED_FILE_FORMATS)
82
+ raise ValueError(f"Unsupported file format: {data_path}. Supported formats: {supported}")
83
+
84
+ # Connect to DuckDB (in-memory)
85
+ con = duckdb.connect(database=':memory:')
86
+
87
+ # Full load for training (no sampling)
88
+ if sample_size is None:
89
+ logger.debug(f"Loading full dataset from {data_path}")
90
+ return con.execute(f"SELECT * FROM '{data_path}'").df()
91
+
92
+ # Check total row count to determine if sampling is needed
93
+ total_rows = con.execute(f"SELECT COUNT(*) FROM '{data_path}'").fetchone()[0]
94
+ logger.debug(f"File has {total_rows} total rows, sample_size={sample_size}")
95
+
96
+ if total_rows <= sample_size:
97
+ # File is small enough, just load everything
98
+ logger.debug(f"File is small ({total_rows} <= {sample_size}), loading all rows")
99
+ return con.execute(f"SELECT * FROM '{data_path}'").df()
100
+
101
+ # Apply sampling for large files
102
+ if sample_method == 'reservoir':
103
+ logger.info(f"Applying reservoir sampling: {sample_size} rows from {total_rows} total")
104
+ # Reservoir sampling gives truly random sample
105
+ return con.execute(f"""
106
+ SELECT * FROM '{data_path}'
107
+ USING SAMPLE reservoir({sample_size} ROWS)
108
+ """).df()
109
+ else:
110
+ raise ValueError(f"Unknown sample_method: {sample_method}. Only 'reservoir' is supported.")
111
+
112
+ def _get_feature_info(get_config_func, target_column: str) -> dict:
113
+ """Extracts feature information from PyCaret config."""
114
+ try:
115
+ X_train = get_config_func('X_train')
116
+ dataset = get_config_func('dataset')
117
+
118
+ used_features = list(X_train.columns)
119
+ all_cols = list(dataset.columns)
120
+
121
+ # Deduce ignored features: in dataset but not in X_train and not target
122
+ ignored_features = [c for c in all_cols if c != target_column and c not in used_features]
123
+
124
+ numeric_features = list(X_train.select_dtypes(include=np.number).columns)
125
+ # Objects and categories are categorical
126
+ categorical_features = list(X_train.select_dtypes(include=['object', 'category']).columns)
127
+
128
+ return {
129
+ "used_features": used_features,
130
+ "ignored_features": ignored_features,
131
+ "actual_numeric_features": numeric_features,
132
+ "actual_categorical_features": categorical_features
133
+ }
134
+ except Exception as e:
135
+ logger.error(f"Error extracting feature info: {e}", exc_info=True)
136
+ return {}
137
+
138
+ def _get_feature_importances(model, get_config_func) -> dict:
139
+ """Extracts feature importances from the model if available.
140
+
141
+ Supports tree-based models (feature_importances_) and linear models (coef_).
142
+ Returns a dict of {feature_name: importance} sorted by importance descending.
143
+ """
144
+ try:
145
+ X_train = get_config_func('X_train')
146
+ feature_names = list(X_train.columns)
147
+
148
+ # Try tree-based models first (RF, XGBoost, LightGBM, etc.)
149
+ if hasattr(model, 'feature_importances_'):
150
+ importances = model.feature_importances_
151
+ importance_dict = dict(zip(feature_names, [float(x) for x in importances]))
152
+ # Sort by importance descending
153
+ return dict(sorted(importance_dict.items(), key=lambda x: abs(x[1]), reverse=True))
154
+
155
+ # Try linear models (LogisticRegression, Ridge, Lasso, etc.)
156
+ if hasattr(model, 'coef_'):
157
+ coef = model.coef_
158
+ # For multi-class, coef_ has shape (n_classes, n_features), take mean absolute
159
+ if len(coef.shape) > 1:
160
+ importances = np.abs(coef).mean(axis=0)
161
+ else:
162
+ importances = np.abs(coef)
163
+ importance_dict = dict(zip(feature_names, [float(x) for x in importances]))
164
+ return dict(sorted(importance_dict.items(), key=lambda x: abs(x[1]), reverse=True))
165
+
166
+ return {}
167
+ except Exception as e:
168
+ logger.warning(f"Could not extract feature importances: {e}")
169
+ return {}
170
+
171
+ def _save_results(run_id: str, model, results: pd.DataFrame, save_model_func, metadata: dict, test_results: pd.DataFrame = None, feature_importances: dict = None) -> str:
172
+ """Helper to save model and metrics.
173
+
174
+ Args:
175
+ run_id: Unique run identifier.
176
+ model: The trained model object.
177
+ results: DataFrame containing CV results (from pull()).
178
+ save_model_func: Function to save the model.
179
+ metadata: Dictionary of run configuration.
180
+ test_results: DataFrame containing test/holdout results (from predict_model()).
181
+ """
182
+ # Create directory
183
+ run_dir = os.path.join(EXPERIMENT_DIR, run_id)
184
+ os.makedirs(run_dir, exist_ok=True)
185
+
186
+ # Save model
187
+ model_path = os.path.join(run_dir, "model")
188
+ save_model_func(model, model_path)
189
+
190
+ # Save CV metrics (best model)
191
+ if not results.empty:
192
+ metrics = results.iloc[0].to_dict()
193
+ else:
194
+ metrics = {}
195
+
196
+ # Save Test/Holdout metrics
197
+ test_metrics = {}
198
+ if test_results is not None and not test_results.empty:
199
+ # predict_model returns a dataframe where metrics are often in the first row or summary
200
+ # For PyCaret predict_model(), it returns the metrics if data is passed with labels.
201
+ # However, pull() after predict_model() usually contains the metrics.
202
+ # Let's assume test_results is result of pull() after predict_model.
203
+ try:
204
+ test_metrics = test_results.to_dict(orient='records')[0]
205
+ except (KeyError, IndexError) as e:
206
+ logger.warning(f"Could not extract test metrics: {e}")
207
+ test_metrics = {}
208
+
209
+ # Merge metadata into metrics for saving
210
+ full_metadata = {**metadata, "cv_metrics": metrics, "test_metrics": test_metrics}
211
+
212
+ metadata_path = os.path.join(run_dir, "metadata.json")
213
+ with open(metadata_path, "w") as f:
214
+ json.dump(full_metadata, f, indent=2)
215
+
216
+ # Generate HTML Report
217
+
218
+ # Format lists for HTML
219
+ def fmt_list(l):
220
+ return ", ".join(l) if l else "None"
221
+
222
+ # Separate metrics from configuration
223
+ config_metadata = {k: v for k, v in metadata.items() if k not in metrics}
224
+
225
+ # Generate config rows
226
+ config_rows = ""
227
+ for k, v in config_metadata.items():
228
+ # Clean up keys for display
229
+ display_key = k.replace("_", " ").title()
230
+
231
+ # Format values
232
+ if isinstance(v, list):
233
+ display_val = fmt_list(v)
234
+ else:
235
+ display_val = str(v)
236
+
237
+ config_rows += f'<div class="metadata-item"><strong>{display_key}:</strong> {display_val}</div>\n'
238
+
239
+ html_content = f"""
240
+ <html>
241
+ <head>
242
+ <title>Training Result - {run_id}</title>
243
+ <style>
244
+ body {{ font-family: monospace, sans-serif; margin: 20px; }}
245
+ h1, h2 {{ color: #333; }}
246
+ table {{ border-collapse: collapse; width: 100%; margin-bottom: 20px; }}
247
+ th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
248
+ th {{ background-color: #f2f2f2; }}
249
+ .metadata-item {{ margin-bottom: 5px; }}
250
+ </style>
251
+ </head>
252
+ <body>
253
+ <h1>Training Run: {run_id}</h1>
254
+
255
+ <h2>Configuration</h2>
256
+ <div class="metadata">
257
+ {config_rows}
258
+ </div>
259
+
260
+ <h2>Model Metrics (CV)</h2>
261
+ {results.to_html(classes='table', index=False) if not results.empty else "<p>No results available</p>"}
262
+
263
+ <h2>Test/Holdout Metrics</h2>
264
+ {test_results.to_html(classes='table', index=False) if test_results is not None and not test_results.empty else "<p>No test results available</p>"}
265
+
266
+ </body>
267
+ </html>
268
+ """
269
+
270
+ html_path = os.path.join(run_dir, "result.html")
271
+ with open(html_path, "w") as f:
272
+ f.write(html_content)
273
+
274
+ return json.dumps({
275
+ "run_id": run_id,
276
+ "model_path": model_path + ".pkl", # pycaret adds .pkl
277
+ "data_path": metadata.get("data_path"),
278
+ "test_data_path": metadata.get("test_data_path"),
279
+ "metadata": metrics,
280
+ "test_metrics": test_metrics,
281
+ "feature_importances": feature_importances if feature_importances else {},
282
+ "report_path": html_path
283
+ }, indent=2)
284
+
285
+ def _train_classifier_sync(run_id: str, data_path: str, target_column: str, ignore_features: list[str], numeric_features: list[str], categorical_features: list[str],
286
+ ordinal_features: dict[str, list[str]], date_features: list[str], text_features: list[str], keep_features: list[str],
287
+ imputation_type: str, numeric_imputation: str, categorical_imputation: str,
288
+ fix_imbalance: bool, remove_outliers: bool, normalize: bool, normalize_method: str,
289
+ transformation: bool, transformation_method: str,
290
+ polynomial_features: bool, interaction_features: list[str], bin_numeric_features: list[str],
291
+ feature_selection: bool, feature_selection_method: str, n_features_to_select: float,
292
+ fold_strategy: str, fold: int, n_jobs: int, test_data_path: str = None, optimize: str = None,
293
+ include_models: list[str] = None, exclude_models: list[str] = None) -> str:
294
+ """Synchronous helper for classifier training."""
295
+ # Use unified loader for consistent dtypes
296
+ data = _load_dataframe_fast(data_path)
297
+
298
+ session_id = DEFAULT_SESSION_ID
299
+
300
+ # Handle Test Data
301
+ test_data = None
302
+ if test_data_path:
303
+ test_data = _load_dataframe_fast(test_data_path)
304
+
305
+ # Ensure unique indices across train and test
306
+ data.reset_index(drop=True, inplace=True)
307
+ test_data.reset_index(drop=True, inplace=True)
308
+ test_data.index = test_data.index + len(data)
309
+ # Filter out None values to let PyCaret defaults take over where appropriate
310
+ setup_params = {
311
+ "data": data,
312
+ "test_data": test_data,
313
+ "target": target_column,
314
+ "session_id": session_id,
315
+ "verbose": False,
316
+ "html": False,
317
+ "ignore_features": ignore_features,
318
+ "numeric_features": numeric_features,
319
+ "categorical_features": categorical_features,
320
+ "ordinal_features": ordinal_features,
321
+ "date_features": date_features,
322
+ "text_features": text_features,
323
+ "keep_features": keep_features,
324
+ "imputation_type": imputation_type,
325
+ "numeric_imputation": numeric_imputation,
326
+ "categorical_imputation": categorical_imputation,
327
+ "fix_imbalance": fix_imbalance,
328
+ "remove_outliers": remove_outliers,
329
+ "normalize": normalize,
330
+ "normalize_method": normalize_method,
331
+ "transformation": transformation,
332
+ "transformation_method": transformation_method,
333
+ "polynomial_features": polynomial_features,
334
+ "interaction_features": interaction_features,
335
+ "bin_numeric_features": bin_numeric_features,
336
+ "feature_selection": feature_selection,
337
+ "feature_selection_method": feature_selection_method,
338
+ "n_features_to_select": n_features_to_select,
339
+ "fold_strategy": fold_strategy,
340
+ "fold": fold,
341
+ "n_jobs": n_jobs
342
+ }
343
+ # Remove None values
344
+ setup_params = {k: v for k, v in setup_params.items() if v is not None}
345
+
346
+ s = setup_clf(**setup_params)
347
+
348
+ feature_info = _get_feature_info(get_config_clf, target_column)
349
+
350
+ # Only pass sort if optimize is specified
351
+ compare_kwargs = {"n_select": 1, "verbose": False}
352
+ if optimize is not None:
353
+ compare_kwargs["sort"] = optimize
354
+ if include_models is not None:
355
+ compare_kwargs["include"] = include_models
356
+ if exclude_models is not None:
357
+ compare_kwargs["exclude"] = exclude_models
358
+
359
+ best_model = compare_models_clf(**compare_kwargs)
360
+ if isinstance(best_model, list):
361
+ if not best_model:
362
+ raise ValueError("compare_models returned an empty list. Try relaxing constraints or collecting more data.")
363
+ best_model = best_model[0]
364
+ results = pull_clf()
365
+
366
+ # Extract feature importances
367
+ feature_importances = _get_feature_importances(best_model, get_config_clf)
368
+
369
+ # Evaluate on holdout (test_data or split)
370
+ predict_model_clf(best_model)
371
+ test_results = pull_clf()
372
+
373
+ metadata = {
374
+ "data_path": data_path,
375
+ "test_data_path": test_data_path,
376
+ "target_column": target_column,
377
+ "session_id": session_id,
378
+ "task": "classification",
379
+ "include_models": include_models,
380
+ "exclude_models": exclude_models,
381
+ **setup_params, # Include all setup params in metadata
382
+ **feature_info
383
+ }
384
+ # Remove dataframes/series from metadata if they slipped in (data is in formatting)
385
+ if "data" in metadata: del metadata["data"]
386
+ if "test_data" in metadata: del metadata["test_data"]
387
+
388
+ return _save_results(run_id, best_model, results, save_model_clf, metadata, test_results, feature_importances)
389
+
390
+ @mcp.tool()
391
+ async def train_classifier(data_path: str, target_column: str, ctx: Context,
392
+ ignore_features: list[str] = None, numeric_features: list[str] = None, categorical_features: list[str] = None,
393
+ ordinal_features: dict[str, list[str]] = None, date_features: list[str] = None, text_features: list[str] = None, keep_features: list[str] = None,
394
+ imputation_type: str = "simple", numeric_imputation: str = "mean", categorical_imputation: str = "mode",
395
+ fix_imbalance: bool = False, remove_outliers: bool = False, normalize: bool = False, normalize_method: str = "zscore",
396
+ transformation: bool = False, transformation_method: str = "yeo-johnson",
397
+ polynomial_features: bool = False, interaction_features: list[str] = None, bin_numeric_features: list[str] = None,
398
+ feature_selection: bool = False, feature_selection_method: str = "classic", n_features_to_select: float = 0.2,
399
+ fold_strategy: str = "kfold", fold: int = 10, n_jobs: int = -1, test_data_path: str = None, optimize: str = None,
400
+ include_models: list[str] = None, exclude_models: list[str] = None) -> str:
401
+ """
402
+ Train a classification model using PyCaret with advanced configuration.
403
+
404
+ - NOTE: Please use absolute paths for data_path and test_data_path to avoid path resolution errors.
405
+
406
+ Args:
407
+ data_path: Path to dataset (csv/parquet/json).
408
+ target_column: Name of target column.
409
+ test_data_path: Optional path to specific test dataset. If provided, used for evaluation/holdout.
410
+ optimize: Metric to optimize for (e.g., 'Accuracy', 'AUC', 'Recall', 'Precision', 'F1', 'Kappa', 'MCC'). Default is 'Accuracy'.
411
+ include_models: List of model IDs to include in comparison (e.g., ['lr', 'dt', 'rf']). If None, all models are compared.
412
+ exclude_models: List of model IDs to exclude from comparison (e.g., ['catboost']). If None, no models are excluded.
413
+ ignore_features: Features to ignore.
414
+ numeric_features: Features to treat as numeric.
415
+ categorical_features: Features to treat as categorical.
416
+ ordinal_features: Dictionary of ordinal features and their order (e.g., {'grade': ['low', 'medium', 'high']}).
417
+ date_features: Features to treat as dates.
418
+ text_features: Features to treat as text (for TF-IDF etc).
419
+ keep_features: Features to ensure are kept.
420
+ imputation_type: 'simple' or 'iterative' (default: 'simple').
421
+ numeric_imputation: 'mean', 'median', 'mode' or int/float (default: 'mean').
422
+ categorical_imputation: 'mode' or str (default: 'mode').
423
+ fix_imbalance: If True, fix imbalance in training data (default: False).
424
+ remove_outliers: If True, remove outliers from training data (default: False).
425
+ normalize: If True, scale features (default: False). Recommended for linear models.
426
+ normalize_method: 'zscore', 'minmax', 'maxabs', 'robust' (default: 'zscore').
427
+ transformation: If True, apply gaussian transformation to make data more normal (default: False).
428
+ transformation_method: 'yeo-johnson' or 'quantile' (default: 'yeo-johnson').
429
+ polynomial_features: If True, create polynomial features (default: False).
430
+ interaction_features: List of features to create interactions for.
431
+ bin_numeric_features: List of numeric features to bin into categories.
432
+ feature_selection: If True, select best features (default: False).
433
+ feature_selection_method: 'classic', 'univariate', 'sequential' (default: 'classic').
434
+ n_features_to_select: Fraction (0.0-1.0) or number of features to select (default: 0.2).
435
+ fold_strategy: 'kfold', 'stratifiedkfold', 'groupkfold', 'timeseries' (default: 'kfold').
436
+ fold: Number of folds (default: 10).
437
+ n_jobs: Number of jobs to run in parallel (-1 for all cores).
438
+
439
+ Returns:
440
+ JSON string with run_id, model_path, metrics, feature_importances, and report_path.
441
+ """
442
+ try:
443
+ run_id = str(uuid.uuid4())
444
+ await ctx.report_progress(0, 100)
445
+ await ctx.info(f"Starting advanced classification training run {run_id}")
446
+
447
+ result = await asyncio.to_thread(
448
+ _train_classifier_sync,
449
+ run_id, data_path, target_column, ignore_features, numeric_features, categorical_features,
450
+ ordinal_features, date_features, text_features, keep_features,
451
+ imputation_type, numeric_imputation, categorical_imputation,
452
+ fix_imbalance, remove_outliers, normalize, normalize_method,
453
+ transformation, transformation_method,
454
+ polynomial_features, interaction_features, bin_numeric_features,
455
+ feature_selection, feature_selection_method, n_features_to_select,
456
+ fold_strategy, fold, n_jobs, test_data_path, optimize,
457
+ include_models, exclude_models
458
+ )
459
+
460
+ await ctx.report_progress(100, 100)
461
+ await ctx.info(f"Finished classification training run {run_id}")
462
+ return result
463
+ except Exception as e:
464
+ return f"Error training classifier: {str(e)}"
465
+
466
+ def _train_regressor_sync(run_id: str, data_path: str, target_column: str, ignore_features: list[str], numeric_features: list[str], categorical_features: list[str],
467
+ ordinal_features: dict[str, list[str]], date_features: list[str], text_features: list[str], keep_features: list[str],
468
+ imputation_type: str, numeric_imputation: str, categorical_imputation: str,
469
+ remove_outliers: bool, normalize: bool, normalize_method: str,
470
+ transformation: bool, transformation_method: str,
471
+ polynomial_features: bool, interaction_features: list[str], bin_numeric_features: list[str],
472
+ feature_selection: bool, feature_selection_method: str, n_features_to_select: float,
473
+ fold_strategy: str, fold: int, n_jobs: int, test_data_path: str = None, optimize: str = "R2",
474
+ include_models: list[str] = None, exclude_models: list[str] = None) -> str:
475
+ """Synchronous helper for regressor training."""
476
+ # Use unified loader for consistent dtypes
477
+ data = _load_dataframe_fast(data_path)
478
+
479
+ session_id = DEFAULT_SESSION_ID
480
+
481
+ # Handle Test Data
482
+ test_data = None
483
+ if test_data_path:
484
+ test_data = _load_dataframe_fast(test_data_path)
485
+
486
+ # Ensure unique indices across train and test
487
+ data.reset_index(drop=True, inplace=True)
488
+ test_data.reset_index(drop=True, inplace=True)
489
+ test_data.index = test_data.index + len(data)
490
+ # Filter out None values to let PyCaret defaults take over where appropriate
491
+ setup_params = {
492
+ "data": data,
493
+ "test_data": test_data,
494
+ "target": target_column,
495
+ "session_id": session_id,
496
+ "verbose": False,
497
+ "html": False,
498
+ "ignore_features": ignore_features,
499
+ "numeric_features": numeric_features,
500
+ "categorical_features": categorical_features,
501
+ "ordinal_features": ordinal_features,
502
+ "date_features": date_features,
503
+ "text_features": text_features,
504
+ "keep_features": keep_features,
505
+ "imputation_type": imputation_type,
506
+ "numeric_imputation": numeric_imputation,
507
+ "categorical_imputation": categorical_imputation,
508
+ "remove_outliers": remove_outliers,
509
+ "normalize": normalize,
510
+ "normalize_method": normalize_method,
511
+ "transformation": transformation,
512
+ "transformation_method": transformation_method,
513
+ "polynomial_features": polynomial_features,
514
+ "interaction_features": interaction_features,
515
+ "bin_numeric_features": bin_numeric_features,
516
+ "feature_selection": feature_selection,
517
+ "feature_selection_method": feature_selection_method,
518
+ "n_features_to_select": n_features_to_select,
519
+ "fold_strategy": fold_strategy,
520
+ "fold": fold,
521
+ "n_jobs": n_jobs
522
+ }
523
+ # Remove None values
524
+ setup_params = {k: v for k, v in setup_params.items() if v is not None}
525
+
526
+ s = setup_reg(**setup_params)
527
+
528
+ feature_info = _get_feature_info(get_config_reg, target_column)
529
+
530
+ # Only pass sort if optimize is specified
531
+ compare_kwargs = {"n_select": 1, "verbose": False}
532
+ if optimize is not None:
533
+ compare_kwargs["sort"] = optimize
534
+ if include_models is not None:
535
+ compare_kwargs["include"] = include_models
536
+ if exclude_models is not None:
537
+ compare_kwargs["exclude"] = exclude_models
538
+
539
+ best_model = compare_models_reg(**compare_kwargs)
540
+ if isinstance(best_model, list):
541
+ if not best_model:
542
+ raise ValueError("compare_models returned an empty list. Try relaxing constraints or collecting more data.")
543
+ best_model = best_model[0]
544
+ results = pull_reg()
545
+
546
+ # Extract feature importances
547
+ feature_importances = _get_feature_importances(best_model, get_config_reg)
548
+
549
+ # Evaluate on holdout
550
+ predict_model_reg(best_model)
551
+ test_results = pull_reg()
552
+
553
+ metadata = {
554
+ "data_path": data_path,
555
+ "test_data_path": test_data_path,
556
+ "target_column": target_column,
557
+ "session_id": session_id,
558
+ "task": "regression",
559
+ "include_models": include_models,
560
+ "exclude_models": exclude_models,
561
+ **setup_params, # Include all setup params in metadata
562
+ **feature_info
563
+ }
564
+ # Remove dataframes/series from metadata if they slipped in
565
+ if "data" in metadata: del metadata["data"]
566
+ if "test_data" in metadata: del metadata["test_data"]
567
+
568
+ return _save_results(run_id, best_model, results, save_model_reg, metadata, test_results, feature_importances)
569
+
570
+ @mcp.tool()
571
+ async def train_regressor(data_path: str, target_column: str, ctx: Context,
572
+ ignore_features: list[str] = None, numeric_features: list[str] = None, categorical_features: list[str] = None,
573
+ ordinal_features: dict[str, list[str]] = None, date_features: list[str] = None, text_features: list[str] = None, keep_features: list[str] = None,
574
+ imputation_type: str = "simple", numeric_imputation: str = "mean", categorical_imputation: str = "mode",
575
+ remove_outliers: bool = False, normalize: bool = False, normalize_method: str = "zscore",
576
+ transformation: bool = False, transformation_method: str = "yeo-johnson",
577
+ polynomial_features: bool = False, interaction_features: list[str] = None, bin_numeric_features: list[str] = None,
578
+ feature_selection: bool = False, feature_selection_method: str = "classic", n_features_to_select: float = 0.2,
579
+ fold_strategy: str = "kfold", fold: int = 10, n_jobs: int = -1, test_data_path: str = None, optimize: str = "R2",
580
+ include_models: list[str] = None, exclude_models: list[str] = None) -> str:
581
+ """
582
+ Train a regression model using PyCaret with advanced configuration.
583
+
584
+ - NOTE: Please use absolute paths for data_path and test_data_path to avoid path resolution errors.
585
+
586
+ Args:
587
+ data_path: Path to dataset (csv/parquet/json).
588
+ target_column: Name of target column.
589
+ test_data_path: Optional path to specific test dataset. If provided, used for evaluation/holdout.
590
+ optimize: Metric to optimize for (e.g., 'R2', 'RMSE', 'MAE', 'MSE', 'RMSLE', 'MAPE'). Default is 'R2'.
591
+ include_models: List of model IDs to include in comparison (e.g., ['lr', 'dt', 'rf']). If None, all models are compared.
592
+ exclude_models: List of model IDs to exclude from comparison (e.g., ['catboost']). If None, no models are excluded.
593
+ ignore_features: Features to ignore.
594
+ numeric_features: Features to treat as numeric.
595
+ categorical_features: Features to treat as categorical.
596
+ ordinal_features: Dictionary of ordinal features and their order.
597
+ date_features: Features to treat as dates.
598
+ text_features: Features to treat as text (for TF-IDF etc).
599
+ keep_features: Features to ensure are kept.
600
+ imputation_type: 'simple' or 'iterative' (default: 'simple').
601
+ numeric_imputation: 'mean', 'median', 'mode' or int/float (default: 'mean').
602
+ categorical_imputation: 'mode' or str (default: 'mode').
603
+ remove_outliers: If True, remove outliers from training data (default: False).
604
+ normalize: If True, scale features (default: False). Recommended for linear models.
605
+ normalize_method: 'zscore', 'minmax', 'maxabs', 'robust' (default: 'zscore').
606
+ transformation: If True, apply gaussian transformation to make data more normal (default: False).
607
+ transformation_method: 'yeo-johnson' or 'quantile' (default: 'yeo-johnson').
608
+ polynomial_features: If True, create polynomial features (default: False).
609
+ interaction_features: List of features to create interactions for.
610
+ bin_numeric_features: List of numeric features to bin into categories.
611
+ feature_selection: If True, select best features (default: False).
612
+ feature_selection_method: 'classic', 'univariate', 'sequential' (default: 'classic').
613
+ n_features_to_select: Fraction (0.0-1.0) or number of features to select (default: 0.2).
614
+ fold_strategy: 'kfold', 'stratifiedkfold', 'groupkfold', 'timeseries' (default: 'kfold').
615
+ fold: Number of folds (default: 10).
616
+ n_jobs: Number of jobs to run in parallel (-1 for all cores).
617
+
618
+ Returns:
619
+ JSON string with run_id, model_path, metrics, feature_importances, and report_path.
620
+ """
621
+ try:
622
+ run_id = str(uuid.uuid4())
623
+ await ctx.report_progress(0, 100)
624
+ await ctx.info(f"Starting advanced regression training run {run_id}")
625
+
626
+ result = await asyncio.to_thread(
627
+ _train_regressor_sync,
628
+ run_id, data_path, target_column, ignore_features, numeric_features, categorical_features,
629
+ ordinal_features, date_features, text_features, keep_features,
630
+ imputation_type, numeric_imputation, categorical_imputation,
631
+ remove_outliers, normalize, normalize_method,
632
+ transformation, transformation_method,
633
+ polynomial_features, interaction_features, bin_numeric_features,
634
+ feature_selection, feature_selection_method, n_features_to_select,
635
+ fold_strategy, fold, n_jobs, test_data_path, optimize,
636
+ include_models, exclude_models
637
+ )
638
+
639
+ await ctx.report_progress(100, 100)
640
+ await ctx.info(f"Finished regression training run {run_id}")
641
+ return result
642
+ except Exception as e:
643
+ return f"Error training regressor: {str(e)}"
644
+
645
+
646
+
647
+ def _predict_sync(run_id: str, data_path: str) -> str:
648
+ """Synchronous helper for predictions."""
649
+ run_dir = os.path.join(EXPERIMENT_DIR, run_id)
650
+ metadata_path = os.path.join(run_dir, "metadata.json")
651
+
652
+ if not os.path.exists(metadata_path):
653
+ return f"Error: Run ID {run_id} not found."
654
+
655
+ with open(metadata_path, "r") as f:
656
+ metadata = json.load(f)
657
+
658
+ task = metadata.get("task")
659
+ model_path = os.path.join(run_dir, "model")
660
+
661
+ # Load data using unified loader
662
+ if not os.path.exists(data_path):
663
+ return f"Error: Data file not found at {data_path}"
664
+
665
+ try:
666
+ input_data = _load_dataframe_fast(data_path)
667
+ except Exception as e:
668
+ return f"Error loading data file: {str(e)}"
669
+
670
+ if task == "classification":
671
+ model = load_model_clf(model_path)
672
+ predictions = predict_model_clf(model, data=input_data)
673
+ elif task == "regression":
674
+ model = load_model_reg(model_path)
675
+ predictions = predict_model_reg(model, data=input_data)
676
+ else:
677
+ return f"Error: Unknown task type '{task}' in metadata."
678
+
679
+ # Save predictions
680
+ predictions_dir = os.path.join(run_dir, "predictions")
681
+ os.makedirs(predictions_dir, exist_ok=True)
682
+
683
+ prediction_id = str(uuid.uuid4())
684
+ prediction_file = f"prediction_{prediction_id}.json"
685
+ prediction_path = os.path.join(predictions_dir, prediction_file)
686
+
687
+ predictions.to_json(prediction_path, orient="records", indent=2)
688
+
689
+ return prediction_path
690
+
691
+ @mcp.tool()
692
+ async def predict(run_id: str, data_path: str, ctx: Context = None) -> str:
693
+ """
694
+ Make predictions using a trained model.
695
+
696
+ - NOTE: Please use absolute paths for data_path to avoid path resolution errors.
697
+
698
+ Args:
699
+ run_id: The ID of the training run (returned by train_classifier or train_regressor).
700
+ data_path: The path to the CSV or JSON file containing the input data.
701
+
702
+ Returns:
703
+ The absolute path to the JSON file containing the predictions.
704
+ """
705
+ try:
706
+ if ctx:
707
+ await ctx.report_progress(0, 100)
708
+ await ctx.info(f"Loading model and making predictions...")
709
+
710
+ result = await asyncio.to_thread(_predict_sync, run_id, data_path)
711
+
712
+ if ctx:
713
+ await ctx.report_progress(100, 100)
714
+ await ctx.info("Prediction complete")
715
+
716
+ return result
717
+ except Exception as e:
718
+ logger.error(f"Error making predictions: {e}", exc_info=True)
719
+ return f"Error making predictions: {str(e)}"
720
+
721
+ def _inspect_data_sync(data_path: str, n_rows: int = 5) -> str:
722
+ """Synchronous helper for data inspection using unified loader."""
723
+ # Use unified loader with sampling for large files
724
+ # Sample size of 10,000 rows for statistics computation
725
+ SAMPLE_SIZE = 10000
726
+
727
+ # Get total row count first
728
+ con = duckdb.connect(database=':memory:')
729
+ total_rows = con.execute(f"SELECT COUNT(*) FROM '{data_path}'").fetchone()[0]
730
+
731
+ # Load data using unified loader (with sampling if needed)
732
+ data = _load_dataframe_fast(data_path, sample_size=SAMPLE_SIZE)
733
+
734
+ # Structure
735
+ structure = {
736
+ "rows": total_rows, # Report actual total
737
+ "columns": len(data.columns),
738
+ "column_names": list(data.columns),
739
+ "dtypes": data.dtypes.astype(str).to_dict()
740
+ }
741
+
742
+ # Statistics (computed on sample if file is large)
743
+ stats = {
744
+ "missing_values": data.isnull().sum().to_dict(),
745
+ "missing_ratio": (data.isnull().sum() / len(data)).to_dict(),
746
+ "unique_values": data.nunique().to_dict()
747
+ }
748
+
749
+ # Add note if sampling was used
750
+ if total_rows > SAMPLE_SIZE:
751
+ stats["⚠️ note"] = f"Statistics computed on {SAMPLE_SIZE} row sample from {total_rows} total rows"
752
+
753
+ # Previews (Column-oriented)
754
+ # For preview, always get first/last rows from original file
755
+ preview_df = con.execute(f"SELECT * FROM '{data_path}' LIMIT {n_rows}").df()
756
+ tail_df = con.execute(f"""
757
+ SELECT * FROM '{data_path}'
758
+ OFFSET {max(0, total_rows - n_rows)}
759
+ """).df()
760
+
761
+ seed = 42
762
+ previews = {
763
+ "head": preview_df.to_dict(orient="list"),
764
+ "tail": tail_df.to_dict(orient="list"),
765
+ "sample": data.sample(min(n_rows, len(data)), random_state=seed).to_dict(orient="list")
766
+ }
767
+
768
+ return json.dumps({
769
+ "structure": structure,
770
+ "statistics": stats,
771
+ "previews": previews
772
+ }, indent=2, cls=PandasJSONEncoder)
773
+
774
+ @mcp.tool()
775
+ async def inspect_data(data_path: str, n_rows: int = 5, ctx: Context = None) -> str:
776
+ """
777
+ Get comprehensive statistics and a preview of the dataset to understand its quality and structure.
778
+ Use this to check for missing values, unique counts, and basic data types.
779
+
780
+ - NOTE: Please use absolute paths for data_path to avoid path resolution errors.
781
+ - NOTE: For files larger than 10,000 rows, statistics are computed on a random sample for performance.
782
+
783
+ Args:
784
+ data_path: Path to the CSV, Parquet, or JSON file.
785
+ n_rows: Number of rows to show in head/tail/sample previews (default: 5).
786
+
787
+ Returns:
788
+ JSON string containing structure, stats, and previews.
789
+ """
790
+ try:
791
+ if ctx:
792
+ await ctx.report_progress(0, 100)
793
+ await ctx.info(f"Inspecting data from {data_path}")
794
+
795
+ result = await asyncio.to_thread(_inspect_data_sync, data_path, n_rows)
796
+
797
+ if ctx:
798
+ await ctx.report_progress(100, 100)
799
+ await ctx.info("Data inspection complete")
800
+
801
+ return result
802
+ except Exception as e:
803
+ logger.error(f"Error inspecting data: {e}", exc_info=True)
804
+ return f"Error inspecting data: {str(e)}"
805
+
806
+ def _query_data_sync(query: str) -> str:
807
+ """Synchronous helper for DuckDB queries."""
808
+ con = duckdb.connect(database=':memory:')
809
+
810
+ # Security/Limit check: enforce LIMIT if not present?
811
+ # For now, just truncating the result df is safer and easier than parsing SQL.
812
+
813
+ df = con.execute(query).df()
814
+
815
+ if len(df) > QUERY_RESULT_LIMIT:
816
+ df = df.head(QUERY_RESULT_LIMIT)
817
+
818
+ return df.to_json(orient="records", date_format="iso")
819
+
820
+ @mcp.tool()
821
+ async def query_data(query: str, ctx: Context = None) -> str:
822
+ """
823
+ Execute a DuckDB SQL query on data files (CSV, Parquet, JSON) to gain deeper insights.
824
+
825
+ CRITICAL: This is your PRIMARY tool for advanced data exploration.
826
+ - Use this to aggregate data (GROUP BY), join multiple files, calculate derived metrics, or inspect specific subsets.
827
+ - Prefer this over 'inspect_data' when you need to answer specific questions about the data distribution or relationships.
828
+ - You can query files directly in the FROM clause, e.g., "SELECT category, AVG(price) FROM 'data.csv' GROUP BY category".
829
+
830
+ - NOTE: Please use absolute paths for files in your FROM clause to avoid path resolution errors.
831
+
832
+ Args:
833
+ query: Standard DuckDB SQL query.
834
+
835
+ Returns:
836
+ JSON string containing the query results (limit 100 rows).
837
+ """
838
+ try:
839
+ if ctx:
840
+ await ctx.report_progress(0, 100)
841
+ await ctx.info("Executing query...")
842
+
843
+ result = await asyncio.to_thread(_query_data_sync, query)
844
+
845
+ if ctx:
846
+ await ctx.report_progress(100, 100)
847
+ await ctx.info("Query complete")
848
+
849
+ return result
850
+ except Exception as e:
851
+ logger.error(f"Error executing query: {e}", exc_info=True)
852
+ return f"Error executing query: {str(e)}"
853
+
854
+ def _process_data_sync(query: str, output_path: str) -> str:
855
+ """Synchronous helper for process_data."""
856
+ try:
857
+ con = duckdb.connect(database=':memory:')
858
+
859
+ df = con.execute(query).df()
860
+
861
+ if output_path.endswith(".csv"):
862
+ df.to_csv(output_path, index=False)
863
+ elif output_path.endswith(".parquet"):
864
+ df.to_parquet(output_path, index=False)
865
+ elif output_path.endswith(".json"):
866
+ df.to_json(output_path, orient="records", indent=2)
867
+ else:
868
+ return "Error: Output path must end with .csv, .parquet, or .json"
869
+
870
+ return f"Successfully processed data and saved to {output_path}. Rows: {len(df)}"
871
+ except Exception as e:
872
+ return f"Error processing data: {str(e)}"
873
+
874
+ @mcp.tool()
875
+ async def process_data(query: str, output_path: str, ctx: Context) -> str:
876
+ """
877
+ Execute a DuckDB SQL query to transform data and save it to a new file.
878
+
879
+ CRITICAL: This is your PRIMARY tool for Feature Engineering and Data Cleaning.
880
+ - Use this to create new features, clean dirty data, handle missing values (COALESCE), or join datasets.
881
+ - You MUST use this tool to prepare the data before training if feature engineering is needed.
882
+ - Example: "SELECT *, price/sqft as price_per_sqft, COALESCE(garage, 0) as garage_clean FROM 'train.csv'"
883
+
884
+ IMPORTANT: Strongly RECOMMEND using '.parquet' extension for output_path (e.g. 'clean_data.parquet').
885
+ - Parquet preserves data types (int, float, string, date) much better than CSV.
886
+ - CSV often loses type information (everything becomes string or inferred incorrectly).
887
+
888
+ - NOTE: Please use absolute paths for files in your query and for output_path to avoid path resolution errors.
889
+
890
+ Args:
891
+ query: Standard DuckDB SQL query.
892
+ output_path: Absolute path to save the result (must be .csv, .parquet, or .json).
893
+
894
+ Returns:
895
+ Confirmation message with the output path.
896
+ """
897
+ try:
898
+ await ctx.report_progress(0, 100)
899
+ await ctx.info(f"Starting data processing task...")
900
+
901
+ result = await asyncio.to_thread(_process_data_sync, query, output_path)
902
+
903
+ await ctx.report_progress(100, 100)
904
+ await ctx.info(f"Finished data processing task.")
905
+ return result
906
+ except Exception as e:
907
+ return f"Error in process_data: {str(e)}"
908
+
909
+ def main():
910
+ """Main entry point with argument parsing."""
911
+ global EXPERIMENT_DIR, DEFAULT_SESSION_ID
912
+
913
+ parser = argparse.ArgumentParser(
914
+ description='MCP PyCaret Server - AutoML service using PyCaret',
915
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter
916
+ )
917
+ parser.add_argument(
918
+ '--experiment-dir',
919
+ type=str,
920
+ default=EXPERIMENT_DIR,
921
+ help='Directory to store experiment results and trained models'
922
+ )
923
+ parser.add_argument(
924
+ '--session-id',
925
+ type=int,
926
+ default=DEFAULT_SESSION_ID,
927
+ help='Random seed for reproducibility'
928
+ )
929
+
930
+ args = parser.parse_args()
931
+
932
+ # Update module-level configuration
933
+ EXPERIMENT_DIR = os.path.expanduser(args.experiment_dir)
934
+ DEFAULT_SESSION_ID = args.session_id
935
+
936
+ # Ensure experiment directory exists
937
+ Path(EXPERIMENT_DIR).mkdir(parents=True, exist_ok=True)
938
+
939
+ logger.info(f"Starting MCP PyCaret Server")
940
+ logger.info(f"Experiment directory: {EXPERIMENT_DIR}")
941
+ logger.info(f"Session ID: {DEFAULT_SESSION_ID}")
942
+
943
+ mcp.run()
944
+
945
+ if __name__ == "__main__":
946
+ main()
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-automl
3
+ Version: 0.1.0
4
+ Summary: MCP server for end-to-end machine learning
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: duckdb>=1.4.3
7
+ Requires-Dist: joblib<1.4
8
+ Requires-Dist: mcp>=1.21.2
9
+ Requires-Dist: pandas<2.2.0
10
+ Requires-Dist: pycaret>=3.0.0
11
+ Requires-Dist: scikit-learn<1.4
12
+ Requires-Dist: tabulate>=0.9.0
13
+ Description-Content-Type: text/markdown
14
+
15
+ # MCP AutoML
16
+
17
+ MCP AutoML is a server that enables AI Agents to perform end-to-end machine learning workflows including data inspection, processing, model training. With MCP AutoML, AI Agents can perform more than a typical autoML framework. AI Agents can identify the target, setting baseline, and creating features by themselves.
18
+
19
+ MCP AutoML seperates tools and workflows, allowing you to create your own workflow.
20
+
21
+ ## Features
22
+
23
+ - **Data Inspection**: Analyze datasets with comprehensive statistics, data types, and previews
24
+ - **SQL-based Data Processing**: Transform and engineer features using DuckDB SQL queries
25
+ - **AutoML Training**: Train classification and regression models with automatic model comparison using PyCaret
26
+ - **Prediction**: Make predictions using trained models
27
+ - **Multi-format Support**: Works with CSV, Parquet, and JSON files
28
+
29
+ ## Usage
30
+
31
+ ### Configure MCP Server
32
+
33
+ Add to your MCP client configuration (e.g., Claude Desktop, Gemini CLI, Cursor, Antigravity):
34
+
35
+ ```json
36
+ {
37
+ "mcpServers": {
38
+ "mcp-automl": {
39
+ "command": "uvx",
40
+ "args": ["--from", "git+https://github.com/idea7766/mcp-automl", "mcp-automl"]
41
+ }
42
+ }
43
+ }
44
+ ```
45
+
46
+ ### Available Tools
47
+
48
+ | Tool | Description |
49
+ |------|-------------|
50
+ | `inspect_data` | Get comprehensive statistics and preview of a dataset |
51
+ | `query_data` | Execute DuckDB SQL queries on data files |
52
+ | `process_data` | Transform data using SQL and save to a new file |
53
+ | `train_classifier` | Train a classification model with AutoML |
54
+ | `train_regressor` | Train a regression model with AutoML |
55
+ | `predict` | Make predictions using a trained model |
56
+
57
+ ## Agent Skill
58
+
59
+ MCP AutoML includes an **data science workflow skill** that guides AI agents through best practices for machine learning projects. This skill teaches agents to:
60
+
61
+ - Identify targets and establish baselines
62
+ - Perform exploratory data analysis
63
+ - Engineer domain-specific features
64
+ - Train and evaluate models systematically
65
+
66
+ ### Installing the Skill
67
+
68
+ Copy the skill directory to your agent's skill folder:
69
+
70
+ ```bash
71
+ # For Gemini Code Assist
72
+ cp -r skill/data-science-workflow ~/.gemini/skills/
73
+
74
+ # For Claude Code
75
+ cp -r skill/data-science-workflow ~/.claude/skills/
76
+
77
+ # For other agents, copy to their respective skill directories
78
+ ```
79
+
80
+ The skill file is located at `skill/data-science-workflow/SKILL.md`.
81
+
82
+ ## Configuration
83
+
84
+ Models and experiments are saved to `~/.mcp-automl/experiments/` by default.
85
+
86
+ ## Dependencies
87
+
88
+ - [PyCaret](https://pycaret.org/) - AutoML library
89
+ - [DuckDB](https://duckdb.org/) - Fast SQL analytics
90
+ - [MCP](https://github.com/modelcontextprotocol/python-sdk) - Model Context Protocol SDK
@@ -0,0 +1,7 @@
1
+ mcp_automl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mcp_automl/__main__.py,sha256=ncTRY5zgcNOS7JcLnCVhPd9KsxjyREC245P2eo33BuI,74
3
+ mcp_automl/server.py,sha256=rk8mQFSm-Y-p5-6DqvPkdiUN6WQrC7jXGXTb4Byedgw,42435
4
+ mcp_automl-0.1.0.dist-info/METADATA,sha256=HEkgZj9ePTMKUGsnWYeQrldrE5KKptv4wz_fV43sBPA,3006
5
+ mcp_automl-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ mcp_automl-0.1.0.dist-info/entry_points.txt,sha256=7QuAE_HatGpFE7Ul7hqNHmpaMf0Ug86aFkaCXofjhLg,54
7
+ mcp_automl-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mcp-automl = mcp_automl.server:main