db-connect-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of db-connect-mcp might be problematic. Click here for more details.

@@ -0,0 +1,288 @@
1
+ """MySQL adapter with good feature support."""
2
+
3
+ import json
4
+ from typing import Any, Optional
5
+
6
+ from sqlalchemy import text
7
+ from sqlalchemy.ext.asyncio import AsyncConnection
8
+
9
+ from db_connect_mcp.adapters.base import BaseAdapter
10
+ from db_connect_mcp.models.capabilities import DatabaseCapabilities
11
+ from db_connect_mcp.models.database import SchemaInfo
12
+ from db_connect_mcp.models.statistics import ColumnStats, Distribution
13
+ from db_connect_mcp.models.table import TableInfo
14
+
15
+
16
+ class MySQLAdapter(BaseAdapter):
17
+ """MySQL adapter with good feature support."""
18
+
19
+ @property
20
+ def capabilities(self) -> DatabaseCapabilities:
21
+ """MySQL has good but not comprehensive support."""
22
+ return DatabaseCapabilities(
23
+ foreign_keys=True,
24
+ indexes=True,
25
+ views=True,
26
+ materialized_views=False, # MySQL doesn't have native materialized views
27
+ partitions=True,
28
+ advanced_stats=False, # No percentile functions in MySQL
29
+ explain_plans=True,
30
+ profiling=False, # Basic profiling only
31
+ comments=True,
32
+ schemas=True, # MySQL calls them databases
33
+ transactions=True,
34
+ stored_procedures=True,
35
+ triggers=True,
36
+ )
37
+
38
+ async def enrich_schema_info(
39
+ self, conn: AsyncConnection, schema_info: SchemaInfo
40
+ ) -> SchemaInfo:
41
+ """Add MySQL-specific schema metadata."""
42
+ query = text("""
43
+ SELECT
44
+ SUM(data_length + index_length) as size_bytes
45
+ FROM information_schema.TABLES
46
+ WHERE table_schema = :schema_name
47
+ """)
48
+
49
+ result = await conn.execute(query, {"schema_name": schema_info.name})
50
+ row = result.fetchone()
51
+
52
+ if row and row[0]:
53
+ schema_info.size_bytes = int(row[0])
54
+
55
+ return schema_info
56
+
57
+ async def enrich_table_info(
58
+ self, conn: AsyncConnection, table_info: TableInfo
59
+ ) -> TableInfo:
60
+ """Add MySQL-specific table metadata."""
61
+ query = text("""
62
+ SELECT
63
+ engine,
64
+ table_rows,
65
+ data_length,
66
+ index_length,
67
+ table_comment,
68
+ create_time,
69
+ update_time
70
+ FROM information_schema.TABLES
71
+ WHERE table_schema = DATABASE()
72
+ AND table_name = :table_name
73
+ """)
74
+
75
+ result = await conn.execute(query, {"table_name": table_info.name})
76
+ row = result.fetchone()
77
+
78
+ if row:
79
+ table_info.row_count = int(row[1]) if row[1] else None
80
+ table_info.size_bytes = int(row[2]) if row[2] else None
81
+ table_info.index_size_bytes = int(row[3]) if row[3] else None
82
+ table_info.comment = row[4] if row[4] else None
83
+ table_info.created_at = str(row[5]) if row[5] else None
84
+ table_info.updated_at = str(row[6]) if row[6] else None
85
+
86
+ # MySQL-specific: storage engine
87
+ table_info.extra_info["engine"] = row[0]
88
+
89
+ return table_info
90
+
91
+ async def get_column_statistics(
92
+ self,
93
+ conn: AsyncConnection,
94
+ table_name: str,
95
+ column_name: str,
96
+ schema: Optional[str],
97
+ ) -> ColumnStats:
98
+ """Get MySQL column statistics (basic stats only)."""
99
+ table_ref = self._build_table_reference(table_name, schema)
100
+
101
+ # MySQL doesn't support percentile functions, so we get basic stats only
102
+ query = text(f"""
103
+ SELECT
104
+ COUNT(*) as total_rows,
105
+ COUNT(*) - COUNT(`{column_name}`) as null_count,
106
+ COUNT(DISTINCT `{column_name}`) as distinct_count,
107
+ MIN(`{column_name}`) as min_val,
108
+ MAX(`{column_name}`) as max_val,
109
+ AVG(`{column_name}`) as avg_val,
110
+ STD(`{column_name}`) as stddev_val
111
+ FROM {table_ref}
112
+ """)
113
+
114
+ try:
115
+ result = await conn.execute(query)
116
+ row = result.fetchone()
117
+
118
+ if not row:
119
+ return ColumnStats(
120
+ column=column_name,
121
+ data_type="unknown",
122
+ total_rows=0,
123
+ null_count=0,
124
+ sample_size=0,
125
+ warning="No data found",
126
+ )
127
+
128
+ # Get most common values
129
+ mcv_query = text(f"""
130
+ SELECT `{column_name}` as value, COUNT(*) as count
131
+ FROM {table_ref}
132
+ WHERE `{column_name}` IS NOT NULL
133
+ GROUP BY `{column_name}`
134
+ ORDER BY count DESC
135
+ LIMIT 10
136
+ """)
137
+
138
+ mcv_result = await conn.execute(mcv_query)
139
+ mcv_rows = mcv_result.fetchall()
140
+ most_common = [{"value": str(r[0]), "count": int(r[1])} for r in mcv_rows]
141
+
142
+ # Get data type from information_schema
143
+ type_query = text("""
144
+ SELECT data_type
145
+ FROM information_schema.COLUMNS
146
+ WHERE table_schema = DATABASE()
147
+ AND table_name = :table_name
148
+ AND column_name = :column_name
149
+ """)
150
+
151
+ type_result = await conn.execute(
152
+ type_query, {"table_name": table_name, "column_name": column_name}
153
+ )
154
+ type_row = type_result.fetchone()
155
+ data_type = type_row[0] if type_row else "unknown"
156
+
157
+ return ColumnStats(
158
+ column=column_name,
159
+ data_type=data_type,
160
+ total_rows=int(row[0]),
161
+ null_count=int(row[1]),
162
+ distinct_count=int(row[2]) if row[2] else None,
163
+ min_value=row[3],
164
+ max_value=row[4],
165
+ avg_value=float(row[5]) if row[5] is not None else None,
166
+ stddev_value=float(row[6]) if row[6] is not None else None,
167
+ most_common_values=most_common,
168
+ sample_size=int(row[0]),
169
+ warning="Advanced statistics (percentiles) not available in MySQL",
170
+ )
171
+
172
+ except Exception as e:
173
+ return ColumnStats(
174
+ column=column_name,
175
+ data_type="unknown",
176
+ total_rows=0,
177
+ null_count=0,
178
+ sample_size=0,
179
+ warning=f"Statistics unavailable: {str(e)}",
180
+ )
181
+
182
+ async def get_value_distribution(
183
+ self,
184
+ conn: AsyncConnection,
185
+ table_name: str,
186
+ column_name: str,
187
+ schema: Optional[str],
188
+ limit: int,
189
+ ) -> Distribution:
190
+ """Get value distribution for MySQL."""
191
+ table_ref = self._build_table_reference(table_name, schema)
192
+
193
+ stats_query = text(f"""
194
+ SELECT
195
+ COUNT(*) as total_rows,
196
+ COUNT(DISTINCT `{column_name}`) as unique_values,
197
+ COUNT(*) - COUNT(`{column_name}`) as null_count
198
+ FROM {table_ref}
199
+ """)
200
+
201
+ stats_result = await conn.execute(stats_query)
202
+ stats_row = stats_result.fetchone()
203
+
204
+ top_query = text(f"""
205
+ SELECT `{column_name}` as value, COUNT(*) as count
206
+ FROM {table_ref}
207
+ WHERE `{column_name}` IS NOT NULL
208
+ GROUP BY `{column_name}`
209
+ ORDER BY count DESC
210
+ LIMIT :limit
211
+ """)
212
+
213
+ top_result = await conn.execute(top_query, {"limit": limit})
214
+ top_rows = top_result.fetchall()
215
+
216
+ top_values = [{"value": str(r[0]), "count": int(r[1])} for r in top_rows]
217
+
218
+ if not stats_row:
219
+ return Distribution(
220
+ column=column_name,
221
+ total_rows=0,
222
+ unique_values=0,
223
+ null_count=0,
224
+ top_values=[],
225
+ sample_size=0,
226
+ )
227
+
228
+ return Distribution(
229
+ column=column_name,
230
+ total_rows=int(stats_row[0]),
231
+ unique_values=int(stats_row[1]),
232
+ null_count=int(stats_row[2]),
233
+ top_values=top_values,
234
+ sample_size=int(stats_row[0]),
235
+ )
236
+
237
+ async def get_sample_query(
238
+ self, table_name: str, schema: Optional[str], limit: int
239
+ ) -> str:
240
+ """Generate MySQL sampling query."""
241
+ table_ref = self._build_table_reference(table_name, schema)
242
+ return f"SELECT * FROM {table_ref} LIMIT {limit}"
243
+
244
+ async def get_explain_query(self, query: str, analyze: bool) -> str:
245
+ """Generate MySQL EXPLAIN query."""
246
+ if analyze:
247
+ return f"EXPLAIN ANALYZE {query}"
248
+ return f"EXPLAIN FORMAT=JSON {query}"
249
+
250
+ async def parse_explain_plan(
251
+ self, plan_text: str, analyzed: bool
252
+ ) -> dict[str, Any]:
253
+ """Parse MySQL EXPLAIN output."""
254
+ try:
255
+ plan_data = json.loads(plan_text)
256
+
257
+ result: dict[str, Any] = {
258
+ "json": plan_data,
259
+ "warnings": [],
260
+ "recommendations": [],
261
+ }
262
+
263
+ # MySQL EXPLAIN has different structure
264
+ if "query_block" in plan_data:
265
+ query_block = plan_data["query_block"]
266
+
267
+ # Extract cost if available
268
+ if "cost_info" in query_block:
269
+ cost_info = query_block["cost_info"]
270
+ result["estimated_cost"] = float(cost_info.get("query_cost", 0))
271
+
272
+ # Check for table scans
273
+ if "table" in query_block:
274
+ table = query_block["table"]
275
+ if table.get("access_type") == "ALL":
276
+ result["warnings"].append("Full table scan detected")
277
+ result["recommendations"].append("Consider adding indexes")
278
+
279
+ return result
280
+
281
+ except (json.JSONDecodeError, KeyError):
282
+ pass
283
+
284
+ return {
285
+ "json": None,
286
+ "warnings": [],
287
+ "recommendations": [],
288
+ }
@@ -0,0 +1,351 @@
1
+ """PostgreSQL adapter with full feature support."""
2
+
3
+ import json
4
+ from typing import Any, Optional
5
+
6
+ from sqlalchemy import text
7
+ from sqlalchemy.ext.asyncio import AsyncConnection
8
+
9
+ from db_connect_mcp.adapters.base import BaseAdapter
10
+ from db_connect_mcp.models.capabilities import DatabaseCapabilities
11
+ from db_connect_mcp.models.database import SchemaInfo
12
+ from db_connect_mcp.models.statistics import ColumnStats, Distribution
13
+ from db_connect_mcp.models.table import TableInfo
14
+
15
+
16
+ class PostgresAdapter(BaseAdapter):
17
+ """PostgreSQL adapter with comprehensive feature support."""
18
+
19
+ @property
20
+ def capabilities(self) -> DatabaseCapabilities:
21
+ """PostgreSQL supports all features."""
22
+ return DatabaseCapabilities(
23
+ foreign_keys=True,
24
+ indexes=True,
25
+ views=True,
26
+ materialized_views=True,
27
+ partitions=True,
28
+ advanced_stats=True,
29
+ explain_plans=True,
30
+ profiling=True,
31
+ comments=True,
32
+ schemas=True,
33
+ transactions=True,
34
+ stored_procedures=True,
35
+ triggers=True,
36
+ )
37
+
38
+ async def enrich_schema_info(
39
+ self, conn: AsyncConnection, schema_info: SchemaInfo
40
+ ) -> SchemaInfo:
41
+ """Add PostgreSQL-specific schema metadata."""
42
+ query = text("""
43
+ SELECT
44
+ pg_catalog.pg_get_userbyid(n.nspowner) as owner,
45
+ pg_catalog.obj_description(n.oid, 'pg_namespace') as comment
46
+ FROM pg_catalog.pg_namespace n
47
+ WHERE n.nspname = :schema_name
48
+ """)
49
+
50
+ result = await conn.execute(query, {"schema_name": schema_info.name})
51
+ row = result.fetchone()
52
+
53
+ if row:
54
+ schema_info.owner = row[0]
55
+ schema_info.comment = row[1]
56
+
57
+ # Get schema size
58
+ size_query = text("""
59
+ SELECT SUM(pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))::bigint
60
+ FROM pg_tables
61
+ WHERE schemaname = :schema_name
62
+ """)
63
+
64
+ result = await conn.execute(size_query, {"schema_name": schema_info.name})
65
+ row = result.fetchone()
66
+ if row and row[0]:
67
+ schema_info.size_bytes = int(row[0])
68
+
69
+ return schema_info
70
+
71
+ async def enrich_table_info(
72
+ self, conn: AsyncConnection, table_info: TableInfo
73
+ ) -> TableInfo:
74
+ """Add PostgreSQL-specific table metadata."""
75
+ table_ref = self._build_table_reference(table_info.name, table_info.schema)
76
+
77
+ query = text("""
78
+ SELECT
79
+ pg_total_relation_size(:table_ref::regclass)::bigint as total_size,
80
+ pg_relation_size(:table_ref::regclass)::bigint as table_size,
81
+ pg_indexes_size(:table_ref::regclass)::bigint as indexes_size,
82
+ (SELECT reltuples::bigint FROM pg_class WHERE oid = :table_ref::regclass::oid) as row_count,
83
+ obj_description(:table_ref::regclass, 'pg_class') as comment
84
+ """)
85
+
86
+ try:
87
+ result = await conn.execute(query, {"table_ref": table_ref})
88
+ row = result.fetchone()
89
+
90
+ if row:
91
+ table_info.size_bytes = int(row[1]) if row[1] else None
92
+ table_info.index_size_bytes = int(row[2]) if row[2] else None
93
+ table_info.row_count = int(row[3]) if row[3] else None
94
+ table_info.comment = row[4]
95
+
96
+ # Add PostgreSQL-specific extras
97
+ extras_query = text("""
98
+ SELECT
99
+ c.relkind as table_kind,
100
+ c.relpersistence as persistence,
101
+ c.relispartition as is_partition
102
+ FROM pg_class c
103
+ JOIN pg_namespace n ON n.oid = c.relnamespace
104
+ WHERE c.relname = :table_name
105
+ AND n.nspname = COALESCE(:schema_name, 'public')
106
+ """)
107
+
108
+ result = await conn.execute(
109
+ extras_query,
110
+ {"table_name": table_info.name, "schema_name": table_info.schema},
111
+ )
112
+ row = result.fetchone()
113
+
114
+ if row:
115
+ table_info.extra_info["relkind"] = row[0]
116
+ table_info.extra_info["persistence"] = row[1]
117
+ table_info.extra_info["is_partition"] = row[2]
118
+
119
+ except Exception:
120
+ # If enrichment fails, return basic info
121
+ pass
122
+
123
+ return table_info
124
+
125
+ async def get_column_statistics(
126
+ self,
127
+ conn: AsyncConnection,
128
+ table_name: str,
129
+ column_name: str,
130
+ schema: Optional[str],
131
+ ) -> ColumnStats:
132
+ """Get comprehensive PostgreSQL column statistics."""
133
+ table_ref = self._build_table_reference(table_name, schema)
134
+
135
+ # Basic stats query with PostgreSQL-specific functions
136
+ query = text(f"""
137
+ WITH stats AS (
138
+ SELECT
139
+ COUNT(*) as total_rows,
140
+ COUNT("{column_name}") as non_null_count,
141
+ COUNT(*) - COUNT("{column_name}") as null_count,
142
+ COUNT(DISTINCT "{column_name}") as distinct_count,
143
+ MIN("{column_name}") as min_val,
144
+ MAX("{column_name}") as max_val,
145
+ pg_typeof("{column_name}")::text as data_type
146
+ FROM {table_ref}
147
+ ),
148
+ numeric_stats AS (
149
+ SELECT
150
+ AVG("{column_name}")::float as avg_val,
151
+ STDDEV("{column_name}")::float as stddev_val,
152
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{column_name}") as p25,
153
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{column_name}") as p50,
154
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{column_name}") as p75,
155
+ PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "{column_name}") as p95,
156
+ PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY "{column_name}") as p99
157
+ FROM {table_ref}
158
+ WHERE "{column_name}" IS NOT NULL
159
+ )
160
+ SELECT
161
+ s.total_rows,
162
+ s.null_count,
163
+ s.distinct_count,
164
+ s.min_val,
165
+ s.max_val,
166
+ s.data_type,
167
+ n.avg_val,
168
+ n.stddev_val,
169
+ n.p25,
170
+ n.p50,
171
+ n.p75,
172
+ n.p95,
173
+ n.p99
174
+ FROM stats s
175
+ LEFT JOIN numeric_stats n ON true
176
+ """)
177
+
178
+ try:
179
+ result = await conn.execute(query)
180
+ row = result.fetchone()
181
+
182
+ if not row:
183
+ return ColumnStats(
184
+ column=column_name,
185
+ data_type="unknown",
186
+ total_rows=0,
187
+ null_count=0,
188
+ sample_size=0,
189
+ warning="No data found",
190
+ )
191
+
192
+ # Get most common values
193
+ mcv_query = text(f"""
194
+ SELECT "{column_name}" as value, COUNT(*) as count
195
+ FROM {table_ref}
196
+ WHERE "{column_name}" IS NOT NULL
197
+ GROUP BY "{column_name}"
198
+ ORDER BY count DESC
199
+ LIMIT 10
200
+ """)
201
+
202
+ mcv_result = await conn.execute(mcv_query)
203
+ mcv_rows = mcv_result.fetchall()
204
+ most_common = [{"value": str(r[0]), "count": int(r[1])} for r in mcv_rows]
205
+
206
+ return ColumnStats(
207
+ column=column_name,
208
+ data_type=str(row[5]),
209
+ total_rows=int(row[0]),
210
+ null_count=int(row[1]),
211
+ distinct_count=int(row[2]) if row[2] else None,
212
+ min_value=row[3],
213
+ max_value=row[4],
214
+ avg_value=float(row[6]) if row[6] is not None else None,
215
+ stddev_value=float(row[7]) if row[7] is not None else None,
216
+ percentile_25=row[8],
217
+ median_value=row[9],
218
+ percentile_75=row[10],
219
+ percentile_95=row[11],
220
+ percentile_99=row[12],
221
+ most_common_values=most_common,
222
+ sample_size=int(row[0]),
223
+ )
224
+
225
+ except Exception as e:
226
+ return ColumnStats(
227
+ column=column_name,
228
+ data_type="unknown",
229
+ total_rows=0,
230
+ null_count=0,
231
+ sample_size=0,
232
+ warning=f"Statistics unavailable: {str(e)}",
233
+ )
234
+
235
+ async def get_value_distribution(
236
+ self,
237
+ conn: AsyncConnection,
238
+ table_name: str,
239
+ column_name: str,
240
+ schema: Optional[str],
241
+ limit: int,
242
+ ) -> Distribution:
243
+ """Get value distribution for PostgreSQL."""
244
+ table_ref = self._build_table_reference(table_name, schema)
245
+
246
+ query = text(f"""
247
+ WITH stats AS (
248
+ SELECT
249
+ COUNT(*) as total_rows,
250
+ COUNT(DISTINCT "{column_name}") as unique_values,
251
+ COUNT(*) - COUNT("{column_name}") as null_count
252
+ FROM {table_ref}
253
+ ),
254
+ top_values AS (
255
+ SELECT "{column_name}" as value, COUNT(*) as count
256
+ FROM {table_ref}
257
+ WHERE "{column_name}" IS NOT NULL
258
+ GROUP BY "{column_name}"
259
+ ORDER BY count DESC
260
+ LIMIT :limit
261
+ )
262
+ SELECT
263
+ s.total_rows,
264
+ s.unique_values,
265
+ s.null_count,
266
+ json_agg(json_build_object('value', t.value::text, 'count', t.count)) as top_values
267
+ FROM stats s
268
+ LEFT JOIN top_values t ON true
269
+ GROUP BY s.total_rows, s.unique_values, s.null_count
270
+ """)
271
+
272
+ result = await conn.execute(query, {"limit": limit})
273
+ row = result.fetchone()
274
+
275
+ if not row:
276
+ return Distribution(
277
+ column=column_name,
278
+ total_rows=0,
279
+ unique_values=0,
280
+ null_count=0,
281
+ top_values=[],
282
+ sample_size=0,
283
+ )
284
+
285
+ top_values_data = json.loads(row[3]) if row[3] else []
286
+
287
+ return Distribution(
288
+ column=column_name,
289
+ total_rows=int(row[0]),
290
+ unique_values=int(row[1]),
291
+ null_count=int(row[2]),
292
+ top_values=top_values_data,
293
+ sample_size=int(row[0]),
294
+ )
295
+
296
+ async def get_sample_query(
297
+ self, table_name: str, schema: Optional[str], limit: int
298
+ ) -> str:
299
+ """Generate PostgreSQL sampling query with TABLESAMPLE."""
300
+ table_ref = self._build_table_reference(table_name, schema)
301
+ # Use simple LIMIT for smaller limits, TABLESAMPLE for larger datasets
302
+ return f"SELECT * FROM {table_ref} LIMIT {limit}"
303
+
304
+ async def get_explain_query(self, query: str, analyze: bool) -> str:
305
+ """Generate PostgreSQL EXPLAIN query."""
306
+ if analyze:
307
+ return f"EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) {query}"
308
+ return f"EXPLAIN (FORMAT JSON) {query}"
309
+
310
+ async def parse_explain_plan(
311
+ self, plan_text: str, analyzed: bool
312
+ ) -> dict[str, Any]:
313
+ """Parse PostgreSQL EXPLAIN JSON output."""
314
+ try:
315
+ plan_data = json.loads(plan_text)
316
+
317
+ if isinstance(plan_data, list) and len(plan_data) > 0:
318
+ plan = plan_data[0].get("Plan", {})
319
+
320
+ result: dict[str, Any] = {
321
+ "json": plan,
322
+ "estimated_cost": plan.get("Total Cost"),
323
+ "estimated_rows": plan.get("Plan Rows"),
324
+ "warnings": [],
325
+ "recommendations": [],
326
+ }
327
+
328
+ if analyzed:
329
+ result["actual_time_ms"] = plan.get("Actual Total Time")
330
+ result["actual_rows"] = plan.get("Actual Rows")
331
+
332
+ # Add warnings based on plan analysis
333
+ if "Seq Scan" in str(plan):
334
+ result["warnings"].append(
335
+ "Sequential scan detected - may be slow on large tables"
336
+ )
337
+ result["recommendations"].append(
338
+ "Consider adding appropriate indexes"
339
+ )
340
+
341
+ return result
342
+
343
+ except (json.JSONDecodeError, KeyError):
344
+ pass
345
+
346
+ # Fallback for non-JSON format
347
+ return {
348
+ "json": None,
349
+ "warnings": [],
350
+ "recommendations": [],
351
+ }
@@ -0,0 +1,13 @@
1
+ """Core database operations layer."""
2
+
3
+ from .analyzer import StatisticsAnalyzer
4
+ from .connection import DatabaseConnection
5
+ from .executor import QueryExecutor
6
+ from .inspector import MetadataInspector
7
+
8
+ __all__ = [
9
+ "DatabaseConnection",
10
+ "MetadataInspector",
11
+ "QueryExecutor",
12
+ "StatisticsAnalyzer",
13
+ ]