PyPI - db-connect-mcp - Versions diffs - 0.1.0__py3-none-any.whl - Mend

db-connect-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of db-connect-mcp might be problematic. Click here for more details.

Files changed (25) hide show

db_connect_mcp/__init__.py +30 -0
db_connect_mcp/__main__.py +13 -0
db_connect_mcp/adapters/__init__.py +72 -0
db_connect_mcp/adapters/base.py +152 -0
db_connect_mcp/adapters/clickhouse.py +298 -0
db_connect_mcp/adapters/mysql.py +288 -0
db_connect_mcp/adapters/postgresql.py +351 -0
db_connect_mcp/core/__init__.py +13 -0
db_connect_mcp/core/analyzer.py +114 -0
db_connect_mcp/core/connection.py +371 -0
db_connect_mcp/core/executor.py +239 -0
db_connect_mcp/core/inspector.py +345 -0
db_connect_mcp/models/__init__.py +23 -0
db_connect_mcp/models/capabilities.py +98 -0
db_connect_mcp/models/config.py +401 -0
db_connect_mcp/models/database.py +112 -0
db_connect_mcp/models/query.py +119 -0
db_connect_mcp/models/statistics.py +176 -0
db_connect_mcp/models/table.py +230 -0
db_connect_mcp/server.py +496 -0
db_connect_mcp-0.1.0.dist-info/METADATA +565 -0
db_connect_mcp-0.1.0.dist-info/RECORD +25 -0
db_connect_mcp-0.1.0.dist-info/WHEEL +4 -0
db_connect_mcp-0.1.0.dist-info/entry_points.txt +2 -0
db_connect_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0

db_connect_mcp/core/executor.py ADDED Viewed

@@ -0,0 +1,239 @@
+"""Safe query execution with validation."""
+import re
+import time
+from typing import TYPE_CHECKING, Any, Optional
+from sqlalchemy import text
+from db_connect_mcp.core.connection import DatabaseConnection
+from db_connect_mcp.models.query import ExplainPlan, QueryResult
+if TYPE_CHECKING:
+    from db_connect_mcp.adapters.base import BaseAdapter
+class QueryExecutor:
+    """Safe query execution with validation and limits."""
+    # Allowed query types (read-only operations)
+    ALLOWED_QUERY_TYPES = {"SELECT", "WITH", "SHOW", "DESCRIBE", "EXPLAIN"}
+    def __init__(self, connection: DatabaseConnection, adapter: "BaseAdapter"):
+        """
+        Initialize query executor.
+        Args:
+            connection: Database connection manager
+            adapter: Database-specific adapter
+        """
+        self.connection = connection
+        self.adapter = adapter
+    async def execute_query(
+        self,
+        query: str,
+        params: Optional[dict[str, Any]] = None,
+        limit: Optional[int] = 1000,
+    ) -> QueryResult:
+        """
+        Execute a SELECT/WITH query safely.
+        Args:
+            query: SQL query to execute
+            params: Query parameters for parameterized queries
+            limit: Maximum number of rows to return (None for no limit)
+        Returns:
+            Query result with rows and metadata
+        Raises:
+            ValueError: If query is not a safe read-only query
+        """
+        # Validate query is safe
+        self._validate_query(query)
+        # Add LIMIT if not present and limit is specified
+        modified_query = query
+        if limit is not None and not self._has_limit(query):
+            modified_query = self._add_limit(query, limit)
+        start_time = time.time()
+        async with self.connection.get_connection() as conn:
+            result = await conn.execute(text(modified_query), params or {})
+            rows_data = result.fetchall()
+            # Convert to list of dicts
+            columns = list(result.keys())
+            rows = [dict(zip(columns, row)) for row in rows_data]
+            execution_time = (time.time() - start_time) * 1000  # Convert to ms
+            # Check if results were truncated
+            truncated = limit is not None and len(rows) == limit
+            return QueryResult(
+                query=modified_query,
+                rows=rows,
+                row_count=len(rows),
+                columns=columns,
+                execution_time_ms=execution_time,
+                truncated=truncated,
+                warning="Results truncated to limit" if truncated else None,
+            )
+    async def sample_data(
+        self,
+        table_name: str,
+        schema: Optional[str] = None,
+        limit: int = 100,
+    ) -> QueryResult:
+        """
+        Sample data from a table efficiently.
+        Args:
+            table_name: Table name
+            schema: Schema name
+            limit: Number of rows to sample
+        Returns:
+            Sample data query result
+        """
+        # Use adapter for database-specific efficient sampling
+        query = await self.adapter.get_sample_query(table_name, schema, limit)
+        return await self.execute_query(query, limit=limit)
+    async def explain_query(self, query: str, analyze: bool = False) -> ExplainPlan:
+        """
+        Get query execution plan.
+        Args:
+            query: SQL query to explain
+            analyze: Whether to actually execute the query (EXPLAIN ANALYZE)
+        Returns:
+            Execution plan information
+        Raises:
+            ValueError: If query is not safe or EXPLAIN not supported
+        """
+        if not self.adapter.capabilities.explain_plans:
+            raise ValueError(
+                f"EXPLAIN not supported for {self.connection.dialect} database"
+            )
+        # Validate query is safe
+        self._validate_query(query)
+        # Get database-specific EXPLAIN syntax
+        explain_query = await self.adapter.get_explain_query(query, analyze)
+        async with self.connection.get_connection() as conn:
+            result = await conn.execute(text(explain_query))
+            rows = result.fetchall()
+            # Format plan output
+            plan_lines = []
+            for row in rows:
+                # Different databases return EXPLAIN in different formats
+                plan_lines.append(str(row[0]))
+            plan_text = "\n".join(plan_lines)
+            # Parse plan (adapter-specific)
+            plan_info = await self.adapter.parse_explain_plan(plan_text, analyze)
+            return ExplainPlan(
+                query=query,
+                plan=plan_text,
+                plan_json=plan_info.get("json"),
+                estimated_cost=plan_info.get("estimated_cost"),
+                estimated_rows=plan_info.get("estimated_rows"),
+                actual_time_ms=plan_info.get("actual_time_ms"),
+                actual_rows=plan_info.get("actual_rows"),
+                warnings=plan_info.get("warnings", []),
+                recommendations=plan_info.get("recommendations", []),
+            )
+    def _validate_query(self, query: str) -> None:
+        """
+        Validate that query is safe (read-only).
+        Args:
+            query: SQL query to validate
+        Raises:
+            ValueError: If query is not allowed
+        """
+        # Normalize query
+        normalized = query.strip().upper()
+        # Remove comments
+        normalized = re.sub(r"--[^\n]*", "", normalized)
+        normalized = re.sub(r"/\*.*?\*/", "", normalized, flags=re.DOTALL)
+        # Get first keyword
+        first_keyword = normalized.split()[0] if normalized.split() else ""
+        if first_keyword not in self.ALLOWED_QUERY_TYPES:
+            raise ValueError(
+                f"Only {', '.join(self.ALLOWED_QUERY_TYPES)} queries are allowed. "
+                f"Got: {first_keyword}"
+            )
+        # Check for dangerous keywords anywhere in query
+        dangerous_keywords = {
+            "DROP",
+            "DELETE",
+            "INSERT",
+            "UPDATE",
+            "TRUNCATE",
+            "ALTER",
+            "CREATE",
+            "GRANT",
+            "REVOKE",
+        }
+        for keyword in dangerous_keywords:
+            # Use word boundaries to avoid false positives (e.g., "DESCRIBE")
+            if re.search(rf"\b{keyword}\b", normalized):
+                raise ValueError(
+                    f"Query contains dangerous keyword: {keyword}. "
+                    f"Only read-only queries are allowed."
+                )
+    def _has_limit(self, query: str) -> bool:
+        """Check if query already has a LIMIT clause."""
+        normalized = query.strip().upper()
+        return bool(re.search(r"\bLIMIT\s+\d+", normalized))
+    def _add_limit(self, query: str, limit: int) -> str:
+        """Add LIMIT clause to query if not present."""
+        # Remove trailing semicolon if present
+        query = query.rstrip().rstrip(";")
+        # Add LIMIT
+        return f"{query} LIMIT {limit}"
+    async def test_query_syntax(self, query: str) -> tuple[bool, Optional[str]]:
+        """
+        Test if query has valid syntax without executing it.
+        Args:
+            query: SQL query to test
+        Returns:
+            Tuple of (is_valid, error_message)
+        """
+        try:
+            self._validate_query(query)
+            # Try to prepare the query (this checks syntax without executing)
+            async with self.connection.get_connection() as conn:
+                await conn.execute(text(f"EXPLAIN {query}"))
+            return (True, None)
+        except Exception as e:
+            return (False, str(e))

db_connect_mcp/core/inspector.py ADDED Viewed

@@ -0,0 +1,345 @@
+"""Metadata inspection using SQLAlchemy reflection."""
+from typing import TYPE_CHECKING, Any, Optional, cast
+from sqlalchemy import inspect as sa_inspect
+from db_connect_mcp.core.connection import DatabaseConnection
+from db_connect_mcp.models.database import SchemaInfo
+from db_connect_mcp.models.table import (
+    ColumnInfo,
+    ConstraintInfo,
+    IndexInfo,
+    RelationshipInfo,
+    TableInfo,
+)
+if TYPE_CHECKING:
+    from db_connect_mcp.adapters.base import BaseAdapter
+class MetadataInspector:
+    """Database metadata inspection using SQLAlchemy Inspector."""
+    def __init__(self, connection: DatabaseConnection, adapter: "BaseAdapter"):
+        """
+        Initialize metadata inspector.
+        Args:
+            connection: Database connection manager
+            adapter: Database-specific adapter for extended functionality
+        """
+        self.connection = connection
+        self.adapter = adapter
+    async def get_schemas(self) -> list[SchemaInfo]:
+        """
+        List all schemas in the database.
+        Returns:
+            List of schema information objects
+        """
+        async with self.connection.get_connection() as conn:
+            # Use run_sync to execute synchronous reflection methods
+            def get_schema_data(sync_conn):
+                inspector = sa_inspect(sync_conn)
+                all_schemas = inspector.get_schema_names()
+                schema_data = []
+                for schema in all_schemas:
+                    if self._is_system_schema(schema):
+                        continue
+                    table_count = len(inspector.get_table_names(schema=schema))
+                    view_count = None
+                    if self.adapter.capabilities.views:
+                        view_count = len(inspector.get_view_names(schema=schema))
+                    schema_data.append(
+                        {
+                            "name": schema,
+                            "table_count": table_count,
+                            "view_count": view_count,
+                        }
+                    )
+                return schema_data
+            schemas_data = await conn.run_sync(get_schema_data)
+            result = []
+            for data in schemas_data:
+                schema_info = SchemaInfo(
+                    name=data["name"],
+                    owner=None,  # Will be filled by adapter if available
+                    table_count=data["table_count"],
+                    view_count=data["view_count"],
+                )
+                # Let adapter enrich with database-specific info
+                schema_info = await self.adapter.enrich_schema_info(conn, schema_info)
+                result.append(schema_info)
+            return result
+    async def get_tables(
+        self, schema: Optional[str] = None, include_views: bool = True
+    ) -> list[TableInfo]:
+        """
+        List tables in a schema.
+        Args:
+            schema: Schema name (None for default schema)
+            include_views: Whether to include views
+        Returns:
+            List of basic table information
+        """
+        async with self.connection.get_connection() as conn:
+            # Use run_sync to execute synchronous reflection methods
+            def get_table_data(sync_conn):
+                inspector = sa_inspect(sync_conn)
+                # Get table names
+                table_names = inspector.get_table_names(schema=schema)
+                table_data = []
+                for table_name in table_names:
+                    table_data.append({"name": table_name, "type": "BASE TABLE"})
+                # Get views if requested and supported
+                if include_views and self.adapter.capabilities.views:
+                    view_names = inspector.get_view_names(schema=schema)
+                    for view_name in view_names:
+                        table_data.append({"name": view_name, "type": "VIEW"})
+                return table_data
+            tables_data = await conn.run_sync(get_table_data)
+            tables = []
+            for data in tables_data:
+                table_info = TableInfo(
+                    name=data["name"],
+                    schema=schema,
+                    table_type=data["type"],
+                )
+                # Let adapter provide size and row count efficiently
+                table_info = await self.adapter.enrich_table_info(conn, table_info)
+                tables.append(table_info)
+            return tables
+    async def describe_table(
+        self, table_name: str, schema: Optional[str] = None
+    ) -> TableInfo:
+        """
+        Get comprehensive table description.
+        Args:
+            table_name: Table name
+            schema: Schema name (None for default)
+        Returns:
+            Comprehensive table information
+        """
+        async with self.connection.get_connection() as conn:
+            # Use run_sync to execute all synchronous reflection methods
+            def get_table_details(sync_conn):
+                inspector = sa_inspect(sync_conn)
+                # Gather all table metadata
+                result = {
+                    "columns": inspector.get_columns(table_name, schema=schema),
+                    "pk_constraint": inspector.get_pk_constraint(
+                        table_name, schema=schema
+                    ),
+                    "indexes": [],
+                    "foreign_keys": [],
+                    "unique_constraints": inspector.get_unique_constraints(
+                        table_name, schema=schema
+                    ),
+                    "check_constraints": [],
+                }
+                # Get indexes if supported
+                if self.adapter.capabilities.indexes:
+                    result["indexes"] = inspector.get_indexes(table_name, schema=schema)
+                # Get foreign keys if supported
+                if self.adapter.capabilities.foreign_keys:
+                    result["foreign_keys"] = inspector.get_foreign_keys(
+                        table_name, schema=schema
+                    )
+                # Try to get check constraints
+                try:
+                    result["check_constraints"] = inspector.get_check_constraints(
+                        table_name, schema=schema
+                    )
+                except NotImplementedError:
+                    pass
+                return result
+            table_data = await conn.run_sync(get_table_details)
+            # Basic info
+            table_info = TableInfo(
+                name=table_name,
+                schema=schema,
+                table_type="BASE TABLE",  # Will be updated if it's a view
+            )
+            # Columns
+            table_info.columns = [
+                self._column_from_sa(cast(dict[str, Any], col_data))
+                for col_data in table_data["columns"]
+            ]
+            # Primary key
+            pk_constraint = table_data["pk_constraint"]
+            if pk_constraint and pk_constraint.get("constrained_columns"):
+                pk_cols = pk_constraint["constrained_columns"]
+                for col in table_info.columns:
+                    if col.name in pk_cols:
+                        col.primary_key = True
+            # Indexes
+            for idx_data in table_data["indexes"]:
+                index = self._index_from_sa(cast(dict[str, Any], idx_data))
+                table_info.indexes.append(index)
+                # Mark indexed columns
+                for col_name in index.columns:
+                    col = table_info.get_column(col_name)
+                    if col:
+                        col.indexed = True
+            # Foreign keys
+            for fk in table_data["foreign_keys"]:
+                constraint = self._fk_constraint_from_sa(cast(dict[str, Any], fk))
+                table_info.constraints.append(constraint)
+                # Mark FK columns
+                for col_name in constraint.columns:
+                    col = table_info.get_column(col_name)
+                    if col and constraint.referenced_table:
+                        ref_cols = ",".join(constraint.referenced_columns or [])
+                        col.foreign_key = f"{constraint.referenced_table}.{ref_cols}"
+            # Unique constraints
+            for uniq in table_data["unique_constraints"]:
+                constraint = ConstraintInfo(
+                    name=uniq["name"],
+                    constraint_type="UNIQUE",
+                    columns=uniq["column_names"],
+                )
+                table_info.constraints.append(constraint)
+                # Mark unique columns
+                for col_name in constraint.columns:
+                    col = table_info.get_column(col_name)
+                    if col:
+                        col.unique = True
+            # Check constraints
+            for check in table_data["check_constraints"]:
+                constraint = ConstraintInfo(
+                    name=check["name"],
+                    constraint_type="CHECK",
+                    columns=[],  # Check constraints don't always map to specific columns
+                    definition=check.get("sqltext"),
+                )
+                table_info.constraints.append(constraint)
+            # Let adapter enrich with database-specific info
+            table_info = await self.adapter.enrich_table_info(conn, table_info)
+            return table_info
+    async def get_relationships(
+        self, table_name: str, schema: Optional[str] = None
+    ) -> list[RelationshipInfo]:
+        """
+        Get foreign key relationships for a table.
+        Args:
+            table_name: Table name
+            schema: Schema name
+        Returns:
+            List of relationship information
+        """
+        if not self.adapter.capabilities.foreign_keys:
+            return []
+        async with self.connection.get_connection() as conn:
+            # Use run_sync to execute synchronous reflection methods
+            def get_fk_data(sync_conn):
+                inspector = sa_inspect(sync_conn)
+                return inspector.get_foreign_keys(table_name, schema=schema)
+            fk_data = await conn.run_sync(get_fk_data)
+            relationships = []
+            for fk in fk_data:
+                fk_dict = cast(dict[str, Any], fk)
+                constraint_name = fk_dict.get("name") or f"fk_{table_name}_auto"
+                rel = RelationshipInfo(
+                    from_table=table_name,
+                    from_schema=schema,
+                    from_columns=fk_dict["constrained_columns"],
+                    to_table=fk_dict["referred_table"],
+                    to_schema=fk_dict.get("referred_schema"),
+                    to_columns=fk_dict["referred_columns"],
+                    constraint_name=constraint_name,
+                    on_delete=fk_dict.get("options", {}).get("ondelete"),
+                    on_update=fk_dict.get("options", {}).get("onupdate"),
+                )
+                relationships.append(rel)
+            return relationships
+    def _column_from_sa(self, col_data: dict) -> ColumnInfo:
+        """Convert SQLAlchemy column data to ColumnInfo."""
+        return ColumnInfo(
+            name=col_data["name"],
+            data_type=str(col_data["type"]),
+            nullable=col_data["nullable"],
+            default=str(col_data["default"]) if col_data.get("default") else None,
+            primary_key=False,  # Will be set later
+            foreign_key=None,  # Will be set later
+            unique=False,  # Will be set later
+            indexed=False,  # Will be set later
+            comment=col_data.get("comment"),
+        )
+    def _index_from_sa(self, idx_data: dict) -> IndexInfo:
+        """Convert SQLAlchemy index data to IndexInfo."""
+        return IndexInfo(
+            name=idx_data["name"],
+            columns=idx_data["column_names"],
+            unique=idx_data.get("unique", False),
+            index_type=idx_data.get("type"),
+        )
+    def _fk_constraint_from_sa(self, fk_data: dict) -> ConstraintInfo:
+        """Convert SQLAlchemy FK data to ConstraintInfo."""
+        return ConstraintInfo(
+            name=fk_data["name"],
+            constraint_type="FOREIGN KEY",
+            columns=fk_data["constrained_columns"],
+            referenced_table=fk_data["referred_table"],
+            referenced_columns=fk_data["referred_columns"],
+        )
+    def _is_system_schema(self, schema: str) -> bool:
+        """Check if schema is a system schema to skip."""
+        system_schemas = {
+            "postgresql": {"information_schema", "pg_catalog", "pg_toast"},
+            "mysql": {"information_schema", "mysql", "performance_schema", "sys"},
+            "clickhouse": {"information_schema", "INFORMATION_SCHEMA", "system"},
+        }
+        dialect = self.connection.dialect
+        return schema in system_schemas.get(dialect, set())

db_connect_mcp/models/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Pydantic models for database metadata and results."""
+from .capabilities import DatabaseCapabilities
+from .config import DatabaseConfig
+from .database import DatabaseInfo, SchemaInfo
+from .query import ExplainPlan, QueryResult
+from .statistics import ColumnStats, Distribution
+from .table import ColumnInfo, ConstraintInfo, IndexInfo, TableInfo
+__all__ = [
+    "DatabaseCapabilities",
+    "DatabaseConfig",
+    "DatabaseInfo",
+    "SchemaInfo",
+    "TableInfo",
+    "ColumnInfo",
+    "IndexInfo",
+    "ConstraintInfo",
+    "QueryResult",
+    "ExplainPlan",
+    "ColumnStats",
+    "Distribution",
+]

db_connect_mcp/models/capabilities.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Database capabilities model."""
+from pydantic import BaseModel, Field
+class DatabaseCapabilities(BaseModel):
+    """Flags indicating what features a database supports."""
+    foreign_keys: bool = Field(
+        default=False,
+        description="Database supports foreign key constraints",
+    )
+    indexes: bool = Field(
+        default=True,
+        description="Database supports indexes",
+    )
+    views: bool = Field(
+        default=True,
+        description="Database supports views",
+    )
+    materialized_views: bool = Field(
+        default=False,
+        description="Database supports materialized views",
+    )
+    partitions: bool = Field(
+        default=False,
+        description="Database supports table partitioning",
+    )
+    advanced_stats: bool = Field(
+        default=False,
+        description="Database supports advanced statistics (percentiles, distributions)",
+    )
+    explain_plans: bool = Field(
+        default=True,
+        description="Database supports EXPLAIN for query plans",
+    )
+    profiling: bool = Field(
+        default=False,
+        description="Database supports profiling and performance metrics",
+    )
+    comments: bool = Field(
+        default=False,
+        description="Database supports table/column comments",
+    )
+    schemas: bool = Field(
+        default=True,
+        description="Database supports schemas/namespaces",
+    )
+    transactions: bool = Field(
+        default=True,
+        description="Database supports transactions",
+    )
+    stored_procedures: bool = Field(
+        default=False,
+        description="Database supports stored procedures",
+    )
+    triggers: bool = Field(
+        default=False,
+        description="Database supports triggers",
+    )
+    def get_supported_features(self) -> list[str]:
+        """Get list of supported feature names."""
+        return [
+            field_name
+            for field_name, value in self.model_dump().items()
+            if value is True
+        ]
+    def get_unsupported_features(self) -> list[str]:
+        """Get list of unsupported feature names."""
+        return [
+            field_name
+            for field_name, value in self.model_dump().items()
+            if value is False
+        ]
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "foreign_keys": True,
+                    "indexes": True,
+                    "views": True,
+                    "materialized_views": True,
+                    "partitions": True,
+                    "advanced_stats": True,
+                    "explain_plans": True,
+                    "profiling": True,
+                    "comments": True,
+                    "schemas": True,
+                    "transactions": True,
+                    "stored_procedures": True,
+                    "triggers": True,
+                }
+            ]
+        }
+    }