db-connect-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of db-connect-mcp might be problematic. Click here for more details.

@@ -0,0 +1,239 @@
1
+ """Safe query execution with validation."""
2
+
3
+ import re
4
+ import time
5
+ from typing import TYPE_CHECKING, Any, Optional
6
+
7
+ from sqlalchemy import text
8
+
9
+ from db_connect_mcp.core.connection import DatabaseConnection
10
+ from db_connect_mcp.models.query import ExplainPlan, QueryResult
11
+
12
+ if TYPE_CHECKING:
13
+ from db_connect_mcp.adapters.base import BaseAdapter
14
+
15
+
16
+ class QueryExecutor:
17
+ """Safe query execution with validation and limits."""
18
+
19
+ # Allowed query types (read-only operations)
20
+ ALLOWED_QUERY_TYPES = {"SELECT", "WITH", "SHOW", "DESCRIBE", "EXPLAIN"}
21
+
22
+ def __init__(self, connection: DatabaseConnection, adapter: "BaseAdapter"):
23
+ """
24
+ Initialize query executor.
25
+
26
+ Args:
27
+ connection: Database connection manager
28
+ adapter: Database-specific adapter
29
+ """
30
+ self.connection = connection
31
+ self.adapter = adapter
32
+
33
+ async def execute_query(
34
+ self,
35
+ query: str,
36
+ params: Optional[dict[str, Any]] = None,
37
+ limit: Optional[int] = 1000,
38
+ ) -> QueryResult:
39
+ """
40
+ Execute a SELECT/WITH query safely.
41
+
42
+ Args:
43
+ query: SQL query to execute
44
+ params: Query parameters for parameterized queries
45
+ limit: Maximum number of rows to return (None for no limit)
46
+
47
+ Returns:
48
+ Query result with rows and metadata
49
+
50
+ Raises:
51
+ ValueError: If query is not a safe read-only query
52
+ """
53
+ # Validate query is safe
54
+ self._validate_query(query)
55
+
56
+ # Add LIMIT if not present and limit is specified
57
+ modified_query = query
58
+ if limit is not None and not self._has_limit(query):
59
+ modified_query = self._add_limit(query, limit)
60
+
61
+ start_time = time.time()
62
+
63
+ async with self.connection.get_connection() as conn:
64
+ result = await conn.execute(text(modified_query), params or {})
65
+ rows_data = result.fetchall()
66
+
67
+ # Convert to list of dicts
68
+ columns = list(result.keys())
69
+ rows = [dict(zip(columns, row)) for row in rows_data]
70
+
71
+ execution_time = (time.time() - start_time) * 1000 # Convert to ms
72
+
73
+ # Check if results were truncated
74
+ truncated = limit is not None and len(rows) == limit
75
+
76
+ return QueryResult(
77
+ query=modified_query,
78
+ rows=rows,
79
+ row_count=len(rows),
80
+ columns=columns,
81
+ execution_time_ms=execution_time,
82
+ truncated=truncated,
83
+ warning="Results truncated to limit" if truncated else None,
84
+ )
85
+
86
+ async def sample_data(
87
+ self,
88
+ table_name: str,
89
+ schema: Optional[str] = None,
90
+ limit: int = 100,
91
+ ) -> QueryResult:
92
+ """
93
+ Sample data from a table efficiently.
94
+
95
+ Args:
96
+ table_name: Table name
97
+ schema: Schema name
98
+ limit: Number of rows to sample
99
+
100
+ Returns:
101
+ Sample data query result
102
+ """
103
+ # Use adapter for database-specific efficient sampling
104
+ query = await self.adapter.get_sample_query(table_name, schema, limit)
105
+
106
+ return await self.execute_query(query, limit=limit)
107
+
108
+ async def explain_query(self, query: str, analyze: bool = False) -> ExplainPlan:
109
+ """
110
+ Get query execution plan.
111
+
112
+ Args:
113
+ query: SQL query to explain
114
+ analyze: Whether to actually execute the query (EXPLAIN ANALYZE)
115
+
116
+ Returns:
117
+ Execution plan information
118
+
119
+ Raises:
120
+ ValueError: If query is not safe or EXPLAIN not supported
121
+ """
122
+ if not self.adapter.capabilities.explain_plans:
123
+ raise ValueError(
124
+ f"EXPLAIN not supported for {self.connection.dialect} database"
125
+ )
126
+
127
+ # Validate query is safe
128
+ self._validate_query(query)
129
+
130
+ # Get database-specific EXPLAIN syntax
131
+ explain_query = await self.adapter.get_explain_query(query, analyze)
132
+
133
+ async with self.connection.get_connection() as conn:
134
+ result = await conn.execute(text(explain_query))
135
+ rows = result.fetchall()
136
+
137
+ # Format plan output
138
+ plan_lines = []
139
+ for row in rows:
140
+ # Different databases return EXPLAIN in different formats
141
+ plan_lines.append(str(row[0]))
142
+
143
+ plan_text = "\n".join(plan_lines)
144
+
145
+ # Parse plan (adapter-specific)
146
+ plan_info = await self.adapter.parse_explain_plan(plan_text, analyze)
147
+
148
+ return ExplainPlan(
149
+ query=query,
150
+ plan=plan_text,
151
+ plan_json=plan_info.get("json"),
152
+ estimated_cost=plan_info.get("estimated_cost"),
153
+ estimated_rows=plan_info.get("estimated_rows"),
154
+ actual_time_ms=plan_info.get("actual_time_ms"),
155
+ actual_rows=plan_info.get("actual_rows"),
156
+ warnings=plan_info.get("warnings", []),
157
+ recommendations=plan_info.get("recommendations", []),
158
+ )
159
+
160
+ def _validate_query(self, query: str) -> None:
161
+ """
162
+ Validate that query is safe (read-only).
163
+
164
+ Args:
165
+ query: SQL query to validate
166
+
167
+ Raises:
168
+ ValueError: If query is not allowed
169
+ """
170
+ # Normalize query
171
+ normalized = query.strip().upper()
172
+
173
+ # Remove comments
174
+ normalized = re.sub(r"--[^\n]*", "", normalized)
175
+ normalized = re.sub(r"/\*.*?\*/", "", normalized, flags=re.DOTALL)
176
+
177
+ # Get first keyword
178
+ first_keyword = normalized.split()[0] if normalized.split() else ""
179
+
180
+ if first_keyword not in self.ALLOWED_QUERY_TYPES:
181
+ raise ValueError(
182
+ f"Only {', '.join(self.ALLOWED_QUERY_TYPES)} queries are allowed. "
183
+ f"Got: {first_keyword}"
184
+ )
185
+
186
+ # Check for dangerous keywords anywhere in query
187
+ dangerous_keywords = {
188
+ "DROP",
189
+ "DELETE",
190
+ "INSERT",
191
+ "UPDATE",
192
+ "TRUNCATE",
193
+ "ALTER",
194
+ "CREATE",
195
+ "GRANT",
196
+ "REVOKE",
197
+ }
198
+
199
+ for keyword in dangerous_keywords:
200
+ # Use word boundaries to avoid false positives (e.g., "DESCRIBE")
201
+ if re.search(rf"\b{keyword}\b", normalized):
202
+ raise ValueError(
203
+ f"Query contains dangerous keyword: {keyword}. "
204
+ f"Only read-only queries are allowed."
205
+ )
206
+
207
+ def _has_limit(self, query: str) -> bool:
208
+ """Check if query already has a LIMIT clause."""
209
+ normalized = query.strip().upper()
210
+ return bool(re.search(r"\bLIMIT\s+\d+", normalized))
211
+
212
+ def _add_limit(self, query: str, limit: int) -> str:
213
+ """Add LIMIT clause to query if not present."""
214
+ # Remove trailing semicolon if present
215
+ query = query.rstrip().rstrip(";")
216
+
217
+ # Add LIMIT
218
+ return f"{query} LIMIT {limit}"
219
+
220
+ async def test_query_syntax(self, query: str) -> tuple[bool, Optional[str]]:
221
+ """
222
+ Test if query has valid syntax without executing it.
223
+
224
+ Args:
225
+ query: SQL query to test
226
+
227
+ Returns:
228
+ Tuple of (is_valid, error_message)
229
+ """
230
+ try:
231
+ self._validate_query(query)
232
+
233
+ # Try to prepare the query (this checks syntax without executing)
234
+ async with self.connection.get_connection() as conn:
235
+ await conn.execute(text(f"EXPLAIN {query}"))
236
+
237
+ return (True, None)
238
+ except Exception as e:
239
+ return (False, str(e))
@@ -0,0 +1,345 @@
1
+ """Metadata inspection using SQLAlchemy reflection."""
2
+
3
+ from typing import TYPE_CHECKING, Any, Optional, cast
4
+
5
+ from sqlalchemy import inspect as sa_inspect
6
+
7
+ from db_connect_mcp.core.connection import DatabaseConnection
8
+ from db_connect_mcp.models.database import SchemaInfo
9
+ from db_connect_mcp.models.table import (
10
+ ColumnInfo,
11
+ ConstraintInfo,
12
+ IndexInfo,
13
+ RelationshipInfo,
14
+ TableInfo,
15
+ )
16
+
17
+ if TYPE_CHECKING:
18
+ from db_connect_mcp.adapters.base import BaseAdapter
19
+
20
+
21
+ class MetadataInspector:
22
+ """Database metadata inspection using SQLAlchemy Inspector."""
23
+
24
+ def __init__(self, connection: DatabaseConnection, adapter: "BaseAdapter"):
25
+ """
26
+ Initialize metadata inspector.
27
+
28
+ Args:
29
+ connection: Database connection manager
30
+ adapter: Database-specific adapter for extended functionality
31
+ """
32
+ self.connection = connection
33
+ self.adapter = adapter
34
+
35
+ async def get_schemas(self) -> list[SchemaInfo]:
36
+ """
37
+ List all schemas in the database.
38
+
39
+ Returns:
40
+ List of schema information objects
41
+ """
42
+ async with self.connection.get_connection() as conn:
43
+ # Use run_sync to execute synchronous reflection methods
44
+ def get_schema_data(sync_conn):
45
+ inspector = sa_inspect(sync_conn)
46
+ all_schemas = inspector.get_schema_names()
47
+
48
+ schema_data = []
49
+ for schema in all_schemas:
50
+ if self._is_system_schema(schema):
51
+ continue
52
+
53
+ table_count = len(inspector.get_table_names(schema=schema))
54
+ view_count = None
55
+ if self.adapter.capabilities.views:
56
+ view_count = len(inspector.get_view_names(schema=schema))
57
+
58
+ schema_data.append(
59
+ {
60
+ "name": schema,
61
+ "table_count": table_count,
62
+ "view_count": view_count,
63
+ }
64
+ )
65
+ return schema_data
66
+
67
+ schemas_data = await conn.run_sync(get_schema_data)
68
+ result = []
69
+
70
+ for data in schemas_data:
71
+ schema_info = SchemaInfo(
72
+ name=data["name"],
73
+ owner=None, # Will be filled by adapter if available
74
+ table_count=data["table_count"],
75
+ view_count=data["view_count"],
76
+ )
77
+
78
+ # Let adapter enrich with database-specific info
79
+ schema_info = await self.adapter.enrich_schema_info(conn, schema_info)
80
+ result.append(schema_info)
81
+
82
+ return result
83
+
84
+ async def get_tables(
85
+ self, schema: Optional[str] = None, include_views: bool = True
86
+ ) -> list[TableInfo]:
87
+ """
88
+ List tables in a schema.
89
+
90
+ Args:
91
+ schema: Schema name (None for default schema)
92
+ include_views: Whether to include views
93
+
94
+ Returns:
95
+ List of basic table information
96
+ """
97
+ async with self.connection.get_connection() as conn:
98
+ # Use run_sync to execute synchronous reflection methods
99
+ def get_table_data(sync_conn):
100
+ inspector = sa_inspect(sync_conn)
101
+
102
+ # Get table names
103
+ table_names = inspector.get_table_names(schema=schema)
104
+ table_data = []
105
+
106
+ for table_name in table_names:
107
+ table_data.append({"name": table_name, "type": "BASE TABLE"})
108
+
109
+ # Get views if requested and supported
110
+ if include_views and self.adapter.capabilities.views:
111
+ view_names = inspector.get_view_names(schema=schema)
112
+ for view_name in view_names:
113
+ table_data.append({"name": view_name, "type": "VIEW"})
114
+
115
+ return table_data
116
+
117
+ tables_data = await conn.run_sync(get_table_data)
118
+ tables = []
119
+
120
+ for data in tables_data:
121
+ table_info = TableInfo(
122
+ name=data["name"],
123
+ schema=schema,
124
+ table_type=data["type"],
125
+ )
126
+ # Let adapter provide size and row count efficiently
127
+ table_info = await self.adapter.enrich_table_info(conn, table_info)
128
+ tables.append(table_info)
129
+
130
+ return tables
131
+
132
+ async def describe_table(
133
+ self, table_name: str, schema: Optional[str] = None
134
+ ) -> TableInfo:
135
+ """
136
+ Get comprehensive table description.
137
+
138
+ Args:
139
+ table_name: Table name
140
+ schema: Schema name (None for default)
141
+
142
+ Returns:
143
+ Comprehensive table information
144
+ """
145
+ async with self.connection.get_connection() as conn:
146
+ # Use run_sync to execute all synchronous reflection methods
147
+ def get_table_details(sync_conn):
148
+ inspector = sa_inspect(sync_conn)
149
+
150
+ # Gather all table metadata
151
+ result = {
152
+ "columns": inspector.get_columns(table_name, schema=schema),
153
+ "pk_constraint": inspector.get_pk_constraint(
154
+ table_name, schema=schema
155
+ ),
156
+ "indexes": [],
157
+ "foreign_keys": [],
158
+ "unique_constraints": inspector.get_unique_constraints(
159
+ table_name, schema=schema
160
+ ),
161
+ "check_constraints": [],
162
+ }
163
+
164
+ # Get indexes if supported
165
+ if self.adapter.capabilities.indexes:
166
+ result["indexes"] = inspector.get_indexes(table_name, schema=schema)
167
+
168
+ # Get foreign keys if supported
169
+ if self.adapter.capabilities.foreign_keys:
170
+ result["foreign_keys"] = inspector.get_foreign_keys(
171
+ table_name, schema=schema
172
+ )
173
+
174
+ # Try to get check constraints
175
+ try:
176
+ result["check_constraints"] = inspector.get_check_constraints(
177
+ table_name, schema=schema
178
+ )
179
+ except NotImplementedError:
180
+ pass
181
+
182
+ return result
183
+
184
+ table_data = await conn.run_sync(get_table_details)
185
+
186
+ # Basic info
187
+ table_info = TableInfo(
188
+ name=table_name,
189
+ schema=schema,
190
+ table_type="BASE TABLE", # Will be updated if it's a view
191
+ )
192
+
193
+ # Columns
194
+ table_info.columns = [
195
+ self._column_from_sa(cast(dict[str, Any], col_data))
196
+ for col_data in table_data["columns"]
197
+ ]
198
+
199
+ # Primary key
200
+ pk_constraint = table_data["pk_constraint"]
201
+ if pk_constraint and pk_constraint.get("constrained_columns"):
202
+ pk_cols = pk_constraint["constrained_columns"]
203
+ for col in table_info.columns:
204
+ if col.name in pk_cols:
205
+ col.primary_key = True
206
+
207
+ # Indexes
208
+ for idx_data in table_data["indexes"]:
209
+ index = self._index_from_sa(cast(dict[str, Any], idx_data))
210
+ table_info.indexes.append(index)
211
+
212
+ # Mark indexed columns
213
+ for col_name in index.columns:
214
+ col = table_info.get_column(col_name)
215
+ if col:
216
+ col.indexed = True
217
+
218
+ # Foreign keys
219
+ for fk in table_data["foreign_keys"]:
220
+ constraint = self._fk_constraint_from_sa(cast(dict[str, Any], fk))
221
+ table_info.constraints.append(constraint)
222
+
223
+ # Mark FK columns
224
+ for col_name in constraint.columns:
225
+ col = table_info.get_column(col_name)
226
+ if col and constraint.referenced_table:
227
+ ref_cols = ",".join(constraint.referenced_columns or [])
228
+ col.foreign_key = f"{constraint.referenced_table}.{ref_cols}"
229
+
230
+ # Unique constraints
231
+ for uniq in table_data["unique_constraints"]:
232
+ constraint = ConstraintInfo(
233
+ name=uniq["name"],
234
+ constraint_type="UNIQUE",
235
+ columns=uniq["column_names"],
236
+ )
237
+ table_info.constraints.append(constraint)
238
+
239
+ # Mark unique columns
240
+ for col_name in constraint.columns:
241
+ col = table_info.get_column(col_name)
242
+ if col:
243
+ col.unique = True
244
+
245
+ # Check constraints
246
+ for check in table_data["check_constraints"]:
247
+ constraint = ConstraintInfo(
248
+ name=check["name"],
249
+ constraint_type="CHECK",
250
+ columns=[], # Check constraints don't always map to specific columns
251
+ definition=check.get("sqltext"),
252
+ )
253
+ table_info.constraints.append(constraint)
254
+
255
+ # Let adapter enrich with database-specific info
256
+ table_info = await self.adapter.enrich_table_info(conn, table_info)
257
+
258
+ return table_info
259
+
260
+ async def get_relationships(
261
+ self, table_name: str, schema: Optional[str] = None
262
+ ) -> list[RelationshipInfo]:
263
+ """
264
+ Get foreign key relationships for a table.
265
+
266
+ Args:
267
+ table_name: Table name
268
+ schema: Schema name
269
+
270
+ Returns:
271
+ List of relationship information
272
+ """
273
+ if not self.adapter.capabilities.foreign_keys:
274
+ return []
275
+
276
+ async with self.connection.get_connection() as conn:
277
+ # Use run_sync to execute synchronous reflection methods
278
+ def get_fk_data(sync_conn):
279
+ inspector = sa_inspect(sync_conn)
280
+ return inspector.get_foreign_keys(table_name, schema=schema)
281
+
282
+ fk_data = await conn.run_sync(get_fk_data)
283
+ relationships = []
284
+
285
+ for fk in fk_data:
286
+ fk_dict = cast(dict[str, Any], fk)
287
+ constraint_name = fk_dict.get("name") or f"fk_{table_name}_auto"
288
+ rel = RelationshipInfo(
289
+ from_table=table_name,
290
+ from_schema=schema,
291
+ from_columns=fk_dict["constrained_columns"],
292
+ to_table=fk_dict["referred_table"],
293
+ to_schema=fk_dict.get("referred_schema"),
294
+ to_columns=fk_dict["referred_columns"],
295
+ constraint_name=constraint_name,
296
+ on_delete=fk_dict.get("options", {}).get("ondelete"),
297
+ on_update=fk_dict.get("options", {}).get("onupdate"),
298
+ )
299
+ relationships.append(rel)
300
+
301
+ return relationships
302
+
303
+ def _column_from_sa(self, col_data: dict) -> ColumnInfo:
304
+ """Convert SQLAlchemy column data to ColumnInfo."""
305
+ return ColumnInfo(
306
+ name=col_data["name"],
307
+ data_type=str(col_data["type"]),
308
+ nullable=col_data["nullable"],
309
+ default=str(col_data["default"]) if col_data.get("default") else None,
310
+ primary_key=False, # Will be set later
311
+ foreign_key=None, # Will be set later
312
+ unique=False, # Will be set later
313
+ indexed=False, # Will be set later
314
+ comment=col_data.get("comment"),
315
+ )
316
+
317
+ def _index_from_sa(self, idx_data: dict) -> IndexInfo:
318
+ """Convert SQLAlchemy index data to IndexInfo."""
319
+ return IndexInfo(
320
+ name=idx_data["name"],
321
+ columns=idx_data["column_names"],
322
+ unique=idx_data.get("unique", False),
323
+ index_type=idx_data.get("type"),
324
+ )
325
+
326
+ def _fk_constraint_from_sa(self, fk_data: dict) -> ConstraintInfo:
327
+ """Convert SQLAlchemy FK data to ConstraintInfo."""
328
+ return ConstraintInfo(
329
+ name=fk_data["name"],
330
+ constraint_type="FOREIGN KEY",
331
+ columns=fk_data["constrained_columns"],
332
+ referenced_table=fk_data["referred_table"],
333
+ referenced_columns=fk_data["referred_columns"],
334
+ )
335
+
336
+ def _is_system_schema(self, schema: str) -> bool:
337
+ """Check if schema is a system schema to skip."""
338
+ system_schemas = {
339
+ "postgresql": {"information_schema", "pg_catalog", "pg_toast"},
340
+ "mysql": {"information_schema", "mysql", "performance_schema", "sys"},
341
+ "clickhouse": {"information_schema", "INFORMATION_SCHEMA", "system"},
342
+ }
343
+
344
+ dialect = self.connection.dialect
345
+ return schema in system_schemas.get(dialect, set())
@@ -0,0 +1,23 @@
1
+ """Pydantic models for database metadata and results."""
2
+
3
+ from .capabilities import DatabaseCapabilities
4
+ from .config import DatabaseConfig
5
+ from .database import DatabaseInfo, SchemaInfo
6
+ from .query import ExplainPlan, QueryResult
7
+ from .statistics import ColumnStats, Distribution
8
+ from .table import ColumnInfo, ConstraintInfo, IndexInfo, TableInfo
9
+
10
+ __all__ = [
11
+ "DatabaseCapabilities",
12
+ "DatabaseConfig",
13
+ "DatabaseInfo",
14
+ "SchemaInfo",
15
+ "TableInfo",
16
+ "ColumnInfo",
17
+ "IndexInfo",
18
+ "ConstraintInfo",
19
+ "QueryResult",
20
+ "ExplainPlan",
21
+ "ColumnStats",
22
+ "Distribution",
23
+ ]
@@ -0,0 +1,98 @@
1
+ """Database capabilities model."""
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class DatabaseCapabilities(BaseModel):
7
+ """Flags indicating what features a database supports."""
8
+
9
+ foreign_keys: bool = Field(
10
+ default=False,
11
+ description="Database supports foreign key constraints",
12
+ )
13
+ indexes: bool = Field(
14
+ default=True,
15
+ description="Database supports indexes",
16
+ )
17
+ views: bool = Field(
18
+ default=True,
19
+ description="Database supports views",
20
+ )
21
+ materialized_views: bool = Field(
22
+ default=False,
23
+ description="Database supports materialized views",
24
+ )
25
+ partitions: bool = Field(
26
+ default=False,
27
+ description="Database supports table partitioning",
28
+ )
29
+ advanced_stats: bool = Field(
30
+ default=False,
31
+ description="Database supports advanced statistics (percentiles, distributions)",
32
+ )
33
+ explain_plans: bool = Field(
34
+ default=True,
35
+ description="Database supports EXPLAIN for query plans",
36
+ )
37
+ profiling: bool = Field(
38
+ default=False,
39
+ description="Database supports profiling and performance metrics",
40
+ )
41
+ comments: bool = Field(
42
+ default=False,
43
+ description="Database supports table/column comments",
44
+ )
45
+ schemas: bool = Field(
46
+ default=True,
47
+ description="Database supports schemas/namespaces",
48
+ )
49
+ transactions: bool = Field(
50
+ default=True,
51
+ description="Database supports transactions",
52
+ )
53
+ stored_procedures: bool = Field(
54
+ default=False,
55
+ description="Database supports stored procedures",
56
+ )
57
+ triggers: bool = Field(
58
+ default=False,
59
+ description="Database supports triggers",
60
+ )
61
+
62
+ def get_supported_features(self) -> list[str]:
63
+ """Get list of supported feature names."""
64
+ return [
65
+ field_name
66
+ for field_name, value in self.model_dump().items()
67
+ if value is True
68
+ ]
69
+
70
+ def get_unsupported_features(self) -> list[str]:
71
+ """Get list of unsupported feature names."""
72
+ return [
73
+ field_name
74
+ for field_name, value in self.model_dump().items()
75
+ if value is False
76
+ ]
77
+
78
+ model_config = {
79
+ "json_schema_extra": {
80
+ "examples": [
81
+ {
82
+ "foreign_keys": True,
83
+ "indexes": True,
84
+ "views": True,
85
+ "materialized_views": True,
86
+ "partitions": True,
87
+ "advanced_stats": True,
88
+ "explain_plans": True,
89
+ "profiling": True,
90
+ "comments": True,
91
+ "schemas": True,
92
+ "transactions": True,
93
+ "stored_procedures": True,
94
+ "triggers": True,
95
+ }
96
+ ]
97
+ }
98
+ }