PyPI - mcp-automl - Versions diffs - 0.1.5__tar.gz → 0.1.7__tar.gz - Mend

mcp-automl 0.1.5tar.gz → 0.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{mcp_automl-0.1.5 → mcp_automl-0.1.7}/PKG-INFO RENAMED Viewed

@@ -1,14 +1,15 @@
 Metadata-Version: 2.4
 Name: mcp-automl
-Version: 0.1.5
+Version: 0.1.7
 Summary: MCP server for end-to-end machine learning
 Author-email: ke <idea7766@gmail.com>
 License-File: LICENSE
 Requires-Python: <3.12,>=3.10
-Requires-Dist: duckdb>=1.4.3
+Requires-Dist: duckdb[all]>=1.4.3
 Requires-Dist: joblib<1.4
 Requires-Dist: mcp>=1.21.2
 Requires-Dist: pandas<2.2.0
+Requires-Dist: pyarrow>=23.0.0
 Requires-Dist: pycaret>=3.0.0
 Requires-Dist: scikit-learn<1.4
 Requires-Dist: tabulate>=0.9.0

{mcp_automl-0.1.5 → mcp_automl-0.1.7}/pyproject.toml RENAMED Viewed

@@ -1,14 +1,15 @@
 [project]
 name = "mcp-automl"
-version = "0.1.5"
+version = "0.1.7"
 description = "MCP server for end-to-end machine learning"
 readme = "README.md"
 requires-python = ">=3.10,<3.12"
 dependencies = [
-    "duckdb>=1.4.3",
+    "duckdb[all]>=1.4.3",
     "joblib<1.4",
     "mcp>=1.21.2",
     "pandas<2.2.0",
+    "pyarrow>=23.0.0",
     "pycaret>=3.0.0",
     "scikit-learn<1.4",
     "tabulate>=0.9.0",
@@ -30,5 +31,4 @@ package = true
 [dependency-groups]
 dev = [
     "pytest-asyncio>=1.3.0",
-    "pyarrow>=14.0.0",
 ]

{mcp_automl-0.1.5 → mcp_automl-0.1.7}/src/mcp_automl/server.py RENAMED Viewed

@@ -10,8 +10,8 @@ import argparse
 from pathlib import Path
 from mcp.server.fastmcp import FastMCP, Context
 from mcp.types import PromptMessage, TextContent
-from pycaret.classification import setup as setup_clf, compare_models as compare_models_clf, pull as pull_clf, save_model as save_model_clf, load_model as load_model_clf, predict_model as predict_model_clf, get_config as get_config_clf
-from pycaret.regression import setup as setup_reg, compare_models as compare_models_reg, pull as pull_reg, save_model as save_model_reg, load_model as load_model_reg, predict_model as predict_model_reg, get_config as get_config_reg
+from pycaret.classification import setup as setup_clf, compare_models as compare_models_clf, pull as pull_clf, save_model as save_model_clf, load_model as load_model_clf, predict_model as predict_model_clf, get_config as get_config_clf, tune_model as tune_model_clf, finalize_model as finalize_model_clf
+from pycaret.regression import setup as setup_reg, compare_models as compare_models_reg, pull as pull_reg, save_model as save_model_reg, load_model as load_model_reg, predict_model as predict_model_reg, get_config as get_config_reg, tune_model as tune_model_reg, finalize_model as finalize_model_reg
 # Configure logging
 logging.basicConfig(
@@ -363,12 +363,31 @@ def _train_classifier_sync(run_id: str, data_path: str, target_column: str, igno
         best_model = best_model[0]
     results = pull_clf()
-    # Extract feature importances
+    # Tune Model
+    logger.info("Tuning best model with Optuna...")
+    try:
+         best_model = tune_model_clf(best_model, optimize=optimize, search_library="optuna", n_trials=10)
+         results = pull_clf()
+    except Exception as e:
+         logger.warning(f"Tuning failed: {e}. Proceeding with untuned model.")
+    # Extract feature importances (from the potentially tuned model)
     feature_importances = _get_feature_importances(best_model, get_config_clf)
-    # Evaluate on holdout (test_data or split)
-    predict_model_clf(best_model)
-    test_results = pull_clf()
+    # Evaluate on holdout (test_data_path)
+    test_results = None
+    if test_data_path:
+         logger.info("Evaluating on provided test data...")
+         predict_model_clf(best_model)
+         test_results = pull_clf()
+    # Finalize Model
+    logger.info("Finalizing model on all data...")
+    try:
+        best_model = finalize_model_clf(best_model)
+    except Exception as e:
+        logger.warning(f"Finalization failed: {e}. Saving non-finalized model.")
     metadata = {
         "data_path": data_path,
@@ -543,12 +562,30 @@ def _train_regressor_sync(run_id: str, data_path: str, target_column: str, ignor
         best_model = best_model[0]
     results = pull_reg()
+    # Tune Model
+    logger.info("Tuning best model with Optuna...")
+    try:
+         best_model = tune_model_reg(best_model, optimize=optimize, search_library="optuna", n_trials=10)
+         results = pull_reg()
+    except Exception as e:
+         logger.warning(f"Tuning failed: {e}. Proceeding with untuned model.")
     # Extract feature importances
     feature_importances = _get_feature_importances(best_model, get_config_reg)
     # Evaluate on holdout
-    predict_model_reg(best_model)
-    test_results = pull_reg()
+    test_results = None
+    if test_data_path:
+        logger.info("Evaluating on provided test data...")
+        predict_model_reg(best_model)
+        test_results = pull_reg()
+    # Finalize Model
+    logger.info("Finalizing model on all data...")
+    try:
+        best_model = finalize_model_reg(best_model)
+    except Exception as e:
+        logger.warning(f"Finalization failed: {e}. Saving non-finalized model.")
     metadata = {
         "data_path": data_path,

mcp_automl-0.1.7/tests/test_formats.py ADDED Viewed

@@ -0,0 +1,451 @@
+"""
+Tests for verifying supported document formats (CSV, Parquet, JSON).
+These tests ensure that all supported file formats work correctly with
+exposed MCP tools:
+- inspect_data() - data inspection tool
+- query_data() - SQL query tool
+"""
+import pytest
+import pandas as pd
+import numpy as np
+import json
+import asyncio
+from pathlib import Path
+from mcp_automl.server import (
+    inspect_data,
+    query_data,
+)
+# =============================================================================
+# Fixtures for creating test data in different formats
+# =============================================================================
+@pytest.fixture
+def sample_dataframe():
+    """Create a sample DataFrame for testing."""
+    return pd.DataFrame({
+        'int_col': [1, 2, 3, 4, 5],
+        'float_col': [1.1, 2.2, 3.3, 4.4, 5.5],
+        'str_col': ['a', 'b', 'c', 'd', 'e'],
+        'bool_col': [True, False, True, False, True],
+        'target': [0, 1, 0, 1, 0]
+    })
+@pytest.fixture
+def sample_csv_file(tmp_path, sample_dataframe):
+    """Create a sample CSV file."""
+    file_path = tmp_path / "data.csv"
+    sample_dataframe.to_csv(file_path, index=False)
+    return str(file_path)
+@pytest.fixture
+def sample_parquet_file(tmp_path, sample_dataframe):
+    """Create a sample Parquet file."""
+    file_path = tmp_path / "data.parquet"
+    sample_dataframe.to_parquet(file_path, index=False)
+    return str(file_path)
+@pytest.fixture
+def sample_json_file(tmp_path, sample_dataframe):
+    """Create a sample JSON file (records orient)."""
+    file_path = tmp_path / "data.json"
+    sample_dataframe.to_json(file_path, orient='records')
+    return str(file_path)
+@pytest.fixture
+def all_format_files(sample_csv_file, sample_parquet_file, sample_json_file):
+    """Return all format files as a dict."""
+    return {
+        'csv': sample_csv_file,
+        'parquet': sample_parquet_file,
+        'json': sample_json_file
+    }
+# =============================================================================
+# Test inspect_data() with different formats
+# =============================================================================
+class TestInspectDataFormats:
+    """Tests for inspect_data() with different file formats."""
+    def test_inspect_csv(self, sample_csv_file):
+        """Test inspect_data with CSV file."""
+        result = asyncio.run(inspect_data(sample_csv_file, n_rows=3))
+        data = json.loads(result)
+        assert "structure" in data
+        assert "statistics" in data
+        assert "previews" in data
+        assert data["structure"]["rows"] == 5
+        assert data["structure"]["columns"] == 5
+    def test_inspect_parquet(self, sample_parquet_file):
+        """Test inspect_data with Parquet file."""
+        result = asyncio.run(inspect_data(sample_parquet_file, n_rows=3))
+        data = json.loads(result)
+        assert "structure" in data
+        assert data["structure"]["rows"] == 5
+        assert data["structure"]["columns"] == 5
+        assert "int_col" in data["structure"]["column_names"]
+    def test_inspect_json(self, sample_json_file):
+        """Test inspect_data with JSON file."""
+        result = asyncio.run(inspect_data(sample_json_file, n_rows=3))
+        data = json.loads(result)
+        assert "structure" in data
+        assert data["structure"]["rows"] == 5
+        assert data["structure"]["columns"] == 5
+    def test_inspect_all_formats_consistent(self, all_format_files):
+        """Test that all formats return consistent structure info."""
+        results = {}
+        for fmt, path in all_format_files.items():
+            result = asyncio.run(inspect_data(path, n_rows=3))
+            results[fmt] = json.loads(result)
+        # All formats should report same row/column counts
+        for fmt in ['parquet', 'json']:
+            assert results[fmt]["structure"]["rows"] == results['csv']["structure"]["rows"]
+            assert results[fmt]["structure"]["columns"] == results['csv']["structure"]["columns"]
+            assert set(results[fmt]["structure"]["column_names"]) == set(results['csv']["structure"]["column_names"])
+    def test_inspect_unsupported_format_returns_error(self, tmp_path):
+        """Test that unsupported format returns error message."""
+        txt_file = tmp_path / "data.txt"
+        txt_file.write_text("some data")
+        result = asyncio.run(inspect_data(str(txt_file)))
+        assert "Error" in result
+# =============================================================================
+# Test query_data() with different formats
+# =============================================================================
+class TestQueryDataFormats:
+    """Tests for query_data() with different file formats."""
+    def test_query_csv(self, sample_csv_file):
+        """Test query_data with CSV file."""
+        query = f"SELECT COUNT(*) as cnt FROM '{sample_csv_file}'"
+        result = asyncio.run(query_data(query))
+        data = json.loads(result)
+        assert data[0]["cnt"] == 5
+    def test_query_parquet(self, sample_parquet_file):
+        """Test query_data with Parquet file."""
+        query = f"SELECT COUNT(*) as cnt FROM '{sample_parquet_file}'"
+        result = asyncio.run(query_data(query))
+        data = json.loads(result)
+        assert data[0]["cnt"] == 5
+    def test_query_json(self, sample_json_file):
+        """Test query_data with JSON file."""
+        query = f"SELECT COUNT(*) as cnt FROM '{sample_json_file}'"
+        result = asyncio.run(query_data(query))
+        data = json.loads(result)
+        assert data[0]["cnt"] == 5
+    def test_query_aggregation_all_formats(self, all_format_files):
+        """Test aggregation query works on all formats."""
+        for fmt, path in all_format_files.items():
+            query = f"SELECT SUM(int_col) as total FROM '{path}'"
+            result = asyncio.run(query_data(query))
+            data = json.loads(result)
+            assert data[0]["total"] == 15  # 1+2+3+4+5
+    def test_query_filter_all_formats(self, all_format_files):
+        """Test filter query works on all formats."""
+        for fmt, path in all_format_files.items():
+            query = f"SELECT * FROM '{path}' WHERE int_col > 3"
+            result = asyncio.run(query_data(query))
+            data = json.loads(result)
+            assert len(data) == 2  # int_col 4 and 5
+    def test_query_join_csv_parquet(self, sample_csv_file, sample_parquet_file):
+        """Test joining CSV and Parquet files in a single query."""
+        query = f"""
+            SELECT c.int_col, p.float_col
+            FROM '{sample_csv_file}' c
+            JOIN '{sample_parquet_file}' p ON c.int_col = p.int_col
+            WHERE c.int_col <= 3
+        """
+        result = asyncio.run(query_data(query))
+        data = json.loads(result)
+        assert len(data) == 3
+# =============================================================================
+# Test special cases and edge cases
+# =============================================================================
+class TestFormatEdgeCases:
+    """Tests for edge cases in format handling via exposed tools."""
+    def test_csv_with_special_characters(self, tmp_path):
+        """Test CSV with special characters in data."""
+        df = pd.DataFrame({
+            'text': ['hello, world', 'foo "bar"', "line1\nline2", 'tab\there'],
+            'target': [0, 1, 0, 1]
+        })
+        file_path = tmp_path / "special.csv"
+        df.to_csv(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == 4
+        assert "text" in data["structure"]["column_names"]
+    def test_json_nested_to_flat(self, tmp_path):
+        """Test JSON with records orientation (flat structure)."""
+        records = [
+            {"a": 1, "b": "x"},
+            {"a": 2, "b": "y"},
+            {"a": 3, "b": "z"}
+        ]
+        file_path = tmp_path / "flat.json"
+        with open(file_path, 'w') as f:
+            json.dump(records, f)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == 3
+        assert set(data["structure"]["column_names"]) == {'a', 'b'}
+    def test_parquet_with_nullable_types(self, tmp_path):
+        """Test Parquet with nullable/arrow types."""
+        df = pd.DataFrame({
+            'nullable_int': pd.array([1, 2, None, 4, 5], dtype="Int64"),
+            'nullable_float': pd.array([1.0, None, 3.0, 4.0, 5.0], dtype="Float64"),
+            'nullable_str': pd.array(['a', 'b', None, 'd', 'e'], dtype="string"),
+            'target': [0, 1, 0, 1, 0]
+        })
+        file_path = tmp_path / "nullable.parquet"
+        df.to_parquet(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == 5
+        # Check that nulls are counted correctly
+        assert data["statistics"]["missing_values"]["nullable_int"] == 1
+        assert data["statistics"]["missing_values"]["nullable_float"] == 1
+        assert data["statistics"]["missing_values"]["nullable_str"] == 1
+    def test_csv_empty_file(self, tmp_path):
+        """Test handling of empty CSV file with headers only."""
+        file_path = tmp_path / "empty.csv"
+        file_path.write_text("col1,col2,col3\n")
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == 0
+        assert set(data["structure"]["column_names"]) == {'col1', 'col2', 'col3'}
+    def test_parquet_empty_dataframe(self, tmp_path):
+        """Test handling of empty Parquet file."""
+        df = pd.DataFrame({'col1': [], 'col2': [], 'col3': []})
+        file_path = tmp_path / "empty.parquet"
+        df.to_parquet(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == 0
+        assert set(data["structure"]["column_names"]) == {'col1', 'col2', 'col3'}
+    def test_json_empty_array(self, tmp_path):
+        """Test handling of empty JSON array."""
+        file_path = tmp_path / "empty.json"
+        file_path.write_text("[]")
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == 0
+    def test_csv_with_unicode(self, tmp_path):
+        """Test CSV with Unicode characters."""
+        df = pd.DataFrame({
+            'text': ['日本語', 'العربية', 'emoji: 🎉🚀', 'ñoño'],
+            'target': [0, 1, 0, 1]
+        })
+        file_path = tmp_path / "unicode.csv"
+        df.to_csv(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == 4
+        # Verify via query
+        query = f"SELECT * FROM '{file_path}' WHERE text = '日本語'"
+        query_result = asyncio.run(query_data(query))
+        query_data_list = json.loads(query_result)
+        assert len(query_data_list) == 1
+    def test_large_csv_file(self, tmp_path):
+        """Test loading a larger CSV file."""
+        n_rows = 50000
+        df = pd.DataFrame({
+            'id': range(n_rows),
+            'value': np.random.randn(n_rows),
+            'category': [f'cat_{i % 10}' for i in range(n_rows)]
+        })
+        file_path = tmp_path / "large.csv"
+        df.to_csv(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == n_rows
+        # Verify aggregation query works
+        query = f"SELECT COUNT(*) as cnt FROM '{file_path}'"
+        query_result = asyncio.run(query_data(query))
+        query_data_list = json.loads(query_result)
+        assert query_data_list[0]["cnt"] == n_rows
+    def test_parquet_with_multiple_types(self, tmp_path):
+        """Test Parquet with diverse column types."""
+        df = pd.DataFrame({
+            'int8_col': np.array([1, 2, 3], dtype=np.int8),
+            'int16_col': np.array([100, 200, 300], dtype=np.int16),
+            'int32_col': np.array([1000, 2000, 3000], dtype=np.int32),
+            'int64_col': np.array([10000, 20000, 30000], dtype=np.int64),
+            'float32_col': np.array([1.1, 2.2, 3.3], dtype=np.float32),
+            'float64_col': np.array([1.11, 2.22, 3.33], dtype=np.float64),
+            'bool_col': [True, False, True],
+            'str_col': ['a', 'b', 'c'],
+            'target': [0, 1, 0]
+        })
+        file_path = tmp_path / "multitypes.parquet"
+        df.to_parquet(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == 3
+        assert data["structure"]["columns"] == 9
+# =============================================================================
+# Test file extension validation via exposed tools
+# =============================================================================
+class TestFileExtensionValidation:
+    """Tests for file extension handling via exposed tools."""
+    def test_unsupported_txt_extension(self, tmp_path):
+        """Test that .txt extension is not supported."""
+        txt_file = tmp_path / "data.txt"
+        txt_file.write_text("some data")
+        result = asyncio.run(inspect_data(str(txt_file)))
+        assert "Error" in result
+    def test_unsupported_xlsx_extension(self, tmp_path):
+        """Test that .xlsx extension is not supported."""
+        xlsx_file = tmp_path / "data.xlsx"
+        xlsx_file.write_bytes(b"fake xlsx content")
+        result = asyncio.run(inspect_data(str(xlsx_file)))
+        assert "Error" in result
+    def test_double_extension_csv(self, tmp_path, sample_dataframe):
+        """Test file with double extension like .tar.csv works."""
+        file_path = tmp_path / "data.tar.csv"
+        sample_dataframe.to_csv(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        assert data["structure"]["rows"] == 5
+# =============================================================================
+# Test format-specific inspect_data behavior
+# =============================================================================
+class TestInspectDataFormatDetails:
+    """Tests for format-specific behavior in inspect_data."""
+    def test_inspect_csv_dtypes(self, tmp_path):
+        """Test that CSV dtypes are correctly inferred."""
+        df = pd.DataFrame({
+            'int_col': [1, 2, 3],
+            'float_col': [1.5, 2.5, 3.5],
+            'str_col': ['a', 'b', 'c']
+        })
+        file_path = tmp_path / "typed.csv"
+        df.to_csv(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        dtypes = data["structure"]["dtypes"]
+        # DuckDB may infer these differently, just check keys exist
+        assert "int_col" in dtypes
+        assert "float_col" in dtypes
+        assert "str_col" in dtypes
+    def test_inspect_parquet_preserves_dtypes(self, tmp_path):
+        """Test that Parquet preserves exact dtypes."""
+        df = pd.DataFrame({
+            'int32_col': np.array([1, 2, 3], dtype=np.int32),
+            'float32_col': np.array([1.5, 2.5, 3.5], dtype=np.float32),
+        })
+        file_path = tmp_path / "typed.parquet"
+        df.to_parquet(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        dtypes = data["structure"]["dtypes"]
+        assert "int32_col" in dtypes
+        assert "float32_col" in dtypes
+    def test_inspect_counts_missing_values_csv(self, tmp_path):
+        """Test that missing values are correctly counted in CSV."""
+        # CSV doesn't preserve NA types well, use empty strings which become NaN
+        file_path = tmp_path / "missing.csv"
+        file_path.write_text("a,b,c\n1,2,3\n,5,\n7,,9\n")
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        missing = data["statistics"]["missing_values"]
+        assert missing["a"] == 1
+        assert missing["b"] == 1
+        assert missing["c"] == 1
+    def test_inspect_counts_missing_values_parquet(self, tmp_path):
+        """Test that missing values are correctly counted in Parquet."""
+        df = pd.DataFrame({
+            'a': [1, None, 3],
+            'b': [None, 2, None],
+            'c': [1, 2, 3]
+        })
+        file_path = tmp_path / "missing.parquet"
+        df.to_parquet(file_path, index=False)
+        result = asyncio.run(inspect_data(str(file_path)))
+        data = json.loads(result)
+        missing = data["statistics"]["missing_values"]
+        assert missing["a"] == 1
+        assert missing["b"] == 2
+        assert missing["c"] == 0

{mcp_automl-0.1.5 → mcp_automl-0.1.7}/uv.lock RENAMED Viewed

@@ -6,6 +6,27 @@ resolution-markers = [
     "python_full_version < '3.11'",
 ]
+[[package]]
+name = "adbc-driver-manager"
+version = "1.10.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9e/77/b6ffd112a67d133810d0027e9de4408a6e63e0e1c438f5866cc28eb3c213/adbc_driver_manager-1.10.0.tar.gz", hash = "sha256:f04407cf2f99bfde13dea0e136d87219c8a16678d43e322744dbd84cdd8eaac2", size = 208204, upload-time = "2026-01-09T07:13:45.803Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0e/95eae266a8d97f2f222e6db9047dc4c1fab6a3e1d5e6bd9c8efb29881ec4/adbc_driver_manager-1.10.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:b82d7ffab5ad4c892e2f3201cc3781db3f87ef0c5ce1938715fb39a5dc6671b0", size = 532926, upload-time = "2026-01-09T07:11:52.672Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/7c/c7234fe0e25ccd0fe23d8fa1e3f2682d407f49916e845e15869d262fc648/adbc_driver_manager-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e124ad209bc7112d0c0778fcc2e727c4fdf733188403129a82c10e563e89252b", size = 513090, upload-time = "2026-01-09T07:11:54.807Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/81/6fb0075c67d1039e82960ab9d039da00ef3149b872a067d2e83ea9bb9956/adbc_driver_manager-1.10.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0abafd6b7d8ef5ba9c33fa92a1c5c329bfb89a68fb12e88ca62a4e32d822f257", size = 3039894, upload-time = "2026-01-09T07:11:56.892Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/43/10e2abe7c600545fcf5b684b04073b36c87ed879a4bbc8fcd4f6f329c302/adbc_driver_manager-1.10.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ceca0800974137d2373cfb3aa4862af4b9361a2e5b94808b52df63c3f34a14eb", size = 3053785, upload-time = "2026-01-09T07:11:59.051Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/dd/8f0fe60d49fe0b7bd9eb0b76268d662f95b31a8c623fc7cef40ad9488d0f/adbc_driver_manager-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:23504672daeafabe03d4e07038754910f55f6845ef260f2249d9d8942ab16866", size = 714987, upload-time = "2026-01-09T07:12:00.771Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/23/eaea050e76a1f65749be243a68514d67e13ab896c47cbf9e652da0ba9c10/adbc_driver_manager-1.10.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:715a33d750af09e1c03fde1783490c816e08a786f151ac79269659da1d2cc4e0", size = 533268, upload-time = "2026-01-09T07:12:02.401Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/37/b81d64da4b1a032df0798bbf8c2e3abf875f9dd319598308d2efebe06523/adbc_driver_manager-1.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd40c9b20be54c55b3ce64cabd5f35f29a61886574d990a1d5b5bdd7f81a7b6", size = 513190, upload-time = "2026-01-09T07:12:04.025Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/2a/a03cd7d4eb81c478566a38e6a657b83171e61e84f6aa0c0f9b49ae9d498c/adbc_driver_manager-1.10.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:595ab4a8ec2ddb338c70f3c31481a41830ad9e2d8c1a1884184023303098bc92", size = 3111408, upload-time = "2026-01-09T07:12:06.421Z" },
+    { url = "https://files.pythonhosted.org/packages/97/67/b9309e5351d4ff02720719c6ca01716ded33075fa486157db409bc5f47be/adbc_driver_manager-1.10.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:92fdf3247aef506583e79b3b583c1bf93f28c70e771281a41843aba63c61f732", size = 3124914, upload-time = "2026-01-09T07:12:08.274Z" },
+    { url = "https://files.pythonhosted.org/packages/41/1d/228041cc7ee30e51556d991d5f30981bfbf0c2d2a91c83f34ace2a2a9d2c/adbc_driver_manager-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:7c5becb5a81fae563a10d82b570c4e1c7a8994c5b110ddaaae6afa9fd52a17b6", size = 716182, upload-time = "2026-01-09T07:12:09.766Z" },
+]
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -450,6 +471,17 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b0/83/9d8fc3413f854effa680dcad1781f68f3ada8679863c0c94ba3b36bae6ff/duckdb-1.4.3-cp311-cp311-win_arm64.whl", hash = "sha256:fbc63ffdd03835f660155b37a1b6db2005bcd46e5ad398b8cac141eb305d2a3d", size = 13070898, upload-time = "2025-12-09T10:58:14.301Z" },
 ]
+[package.optional-dependencies]
+all = [
+    { name = "adbc-driver-manager" },
+    { name = "fsspec" },
+    { name = "ipython", version = "8.38.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "ipython", version = "9.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy" },
+    { name = "pandas" },
+    { name = "pyarrow" },
+]
 [[package]]
 name = "exceptiongroup"
 version = "1.3.1"
@@ -522,6 +554,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/93/0dd45cd283c32dea1545151d8c3637b4b8c53cdb3a625aeb2885b184d74d/fonttools-4.60.1-py3-none-any.whl", hash = "sha256:906306ac7afe2156fcf0042173d6ebbb05416af70f6b370967b47f8f00103bbb", size = 1143175, upload-time = "2025-09-29T21:13:24.134Z" },
 ]
+[[package]]
+name = "fsspec"
+version = "2026.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d5/7d/5df2650c57d47c57232af5ef4b4fdbff182070421e405e0d62c6cdbfaa87/fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b", size = 310496, upload-time = "2026-01-09T15:21:35.562Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/01/c9/97cc5aae1648dcb851958a3ddf73ccd7dbe5650d95203ecb4d7720b4cdbf/fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc", size = 201838, upload-time = "2026-01-09T15:21:34.041Z" },
+]
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -993,13 +1034,14 @@ wheels = [
 [[package]]
 name = "mcp-automl"
-version = "0.1.1"
+version = "0.1.3"
 source = { editable = "." }
 dependencies = [
-    { name = "duckdb" },
+    { name = "duckdb", extra = ["all"] },
     { name = "joblib" },
     { name = "mcp" },
     { name = "pandas" },
+    { name = "pyarrow" },
     { name = "pycaret" },
     { name = "scikit-learn" },
     { name = "tabulate" },
@@ -1007,26 +1049,23 @@ dependencies = [
 [package.dev-dependencies]
 dev = [
-    { name = "pyarrow" },
     { name = "pytest-asyncio" },
 ]
 [package.metadata]
 requires-dist = [
-    { name = "duckdb", specifier = ">=1.4.3" },
+    { name = "duckdb", extras = ["all"], specifier = ">=1.4.3" },
     { name = "joblib", specifier = "<1.4" },
     { name = "mcp", specifier = ">=1.21.2" },
     { name = "pandas", specifier = "<2.2.0" },
+    { name = "pyarrow", specifier = ">=23.0.0" },
     { name = "pycaret", specifier = ">=3.0.0" },
     { name = "scikit-learn", specifier = "<1.4" },
     { name = "tabulate", specifier = ">=0.9.0" },
 ]
 [package.metadata.requires-dev]
-dev = [
-    { name = "pyarrow", specifier = ">=14.0.0" },
-    { name = "pytest-asyncio", specifier = ">=1.3.0" },
-]
+dev = [{ name = "pytest-asyncio", specifier = ">=1.3.0" }]
 [[package]]
 name = "narwhals"