db-connect-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of db-connect-mcp might be problematic. Click here for more details.

@@ -0,0 +1,30 @@
1
+ """
2
+ db_mcp - Multi-database MCP server for Claude Code
3
+
4
+ A Model Context Protocol (MCP) server that provides database analysis and querying
5
+ capabilities for PostgreSQL, MySQL, and ClickHouse databases.
6
+ """
7
+
8
+ __version__ = "2.0.0"
9
+
10
+ from .models.config import DatabaseConfig
11
+ from .models.capabilities import DatabaseCapabilities
12
+ from .models.database import DatabaseInfo, SchemaInfo
13
+ from .models.table import TableInfo, ColumnInfo, IndexInfo, ConstraintInfo
14
+ from .models.query import QueryResult, ExplainPlan
15
+ from .models.statistics import ColumnStats, Distribution
16
+
17
+ __all__ = [
18
+ "DatabaseConfig",
19
+ "DatabaseCapabilities",
20
+ "DatabaseInfo",
21
+ "SchemaInfo",
22
+ "TableInfo",
23
+ "ColumnInfo",
24
+ "IndexInfo",
25
+ "ConstraintInfo",
26
+ "QueryResult",
27
+ "ExplainPlan",
28
+ "ColumnStats",
29
+ "Distribution",
30
+ ]
@@ -0,0 +1,13 @@
1
+ """Entry point for running db_connect_mcp as a module."""
2
+
3
+ import asyncio
4
+ import sys
5
+
6
+ from db_connect_mcp.server import main
7
+
8
+ if __name__ == "__main__":
9
+ # Windows-specific event loop policy
10
+ if sys.platform == "win32":
11
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) # type: ignore[attr-defined]
12
+
13
+ asyncio.run(main())
@@ -0,0 +1,72 @@
1
+ """Database adapters for specific database implementations."""
2
+
3
+ from sqlalchemy.engine.url import make_url
4
+
5
+ from .base import BaseAdapter
6
+ from .clickhouse import ClickHouseAdapter
7
+ from .mysql import MySQLAdapter
8
+ from .postgresql import PostgresAdapter
9
+ from ..models.config import DatabaseConfig
10
+
11
+ __all__ = [
12
+ "BaseAdapter",
13
+ "PostgresAdapter",
14
+ "MySQLAdapter",
15
+ "ClickHouseAdapter",
16
+ "create_adapter",
17
+ "detect_dialect",
18
+ ]
19
+
20
+
21
+ def detect_dialect(url: str) -> str:
22
+ """
23
+ Detect database dialect from connection URL.
24
+
25
+ Args:
26
+ url: Database connection URL
27
+
28
+ Returns:
29
+ Dialect name (postgresql, mysql, clickhouse)
30
+
31
+ Raises:
32
+ ValueError: If dialect cannot be detected
33
+ """
34
+ try:
35
+ parsed_url = make_url(url)
36
+ # Extract base dialect (e.g., "postgresql" from "postgresql+asyncpg")
37
+ dialect = parsed_url.drivername.split("+")[0]
38
+ return dialect
39
+ except Exception as e:
40
+ raise ValueError(f"Failed to detect dialect from URL: {e}")
41
+
42
+
43
+ def create_adapter(config: DatabaseConfig) -> BaseAdapter:
44
+ """
45
+ Factory function to create appropriate database adapter.
46
+
47
+ Args:
48
+ config: Database configuration
49
+
50
+ Returns:
51
+ Database adapter instance
52
+
53
+ Raises:
54
+ ValueError: If database type is not supported
55
+ """
56
+ dialect = config.dialect
57
+
58
+ adapters = {
59
+ "postgresql": PostgresAdapter,
60
+ "mysql": MySQLAdapter,
61
+ "clickhouse": ClickHouseAdapter,
62
+ }
63
+
64
+ adapter_class = adapters.get(dialect)
65
+
66
+ if adapter_class is None:
67
+ raise ValueError(
68
+ f"Unsupported database dialect: {dialect}. "
69
+ f"Supported dialects: {', '.join(adapters.keys())}"
70
+ )
71
+
72
+ return adapter_class()
@@ -0,0 +1,152 @@
1
+ """Base adapter abstract class for database-specific implementations."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Optional
5
+
6
+ from sqlalchemy.ext.asyncio import AsyncConnection
7
+
8
+ from db_connect_mcp.models.capabilities import DatabaseCapabilities
9
+ from db_connect_mcp.models.database import SchemaInfo
10
+ from db_connect_mcp.models.statistics import ColumnStats, Distribution
11
+ from db_connect_mcp.models.table import TableInfo
12
+
13
+
14
+ class BaseAdapter(ABC):
15
+ """Base adapter defining database-specific interface."""
16
+
17
+ @property
18
+ @abstractmethod
19
+ def capabilities(self) -> DatabaseCapabilities:
20
+ """Get capabilities for this database type."""
21
+ ...
22
+
23
+ @abstractmethod
24
+ async def enrich_schema_info(
25
+ self, conn: AsyncConnection, schema_info: SchemaInfo
26
+ ) -> SchemaInfo:
27
+ """
28
+ Enrich schema info with database-specific metadata.
29
+
30
+ Args:
31
+ conn: Database connection
32
+ schema_info: Basic schema information
33
+
34
+ Returns:
35
+ Enriched schema information
36
+ """
37
+ ...
38
+
39
+ @abstractmethod
40
+ async def enrich_table_info(
41
+ self, conn: AsyncConnection, table_info: TableInfo
42
+ ) -> TableInfo:
43
+ """
44
+ Enrich table info with database-specific metadata.
45
+
46
+ Args:
47
+ conn: Database connection
48
+ table_info: Basic table information
49
+
50
+ Returns:
51
+ Enriched table information with sizes, row counts, etc.
52
+ """
53
+ ...
54
+
55
+ @abstractmethod
56
+ async def get_column_statistics(
57
+ self,
58
+ conn: AsyncConnection,
59
+ table_name: str,
60
+ column_name: str,
61
+ schema: Optional[str],
62
+ ) -> ColumnStats:
63
+ """
64
+ Get column statistics using database-specific queries.
65
+
66
+ Args:
67
+ conn: Database connection
68
+ table_name: Table name
69
+ column_name: Column name
70
+ schema: Schema name
71
+
72
+ Returns:
73
+ Column statistics
74
+ """
75
+ ...
76
+
77
+ @abstractmethod
78
+ async def get_value_distribution(
79
+ self,
80
+ conn: AsyncConnection,
81
+ table_name: str,
82
+ column_name: str,
83
+ schema: Optional[str],
84
+ limit: int,
85
+ ) -> Distribution:
86
+ """
87
+ Get value distribution for a column.
88
+
89
+ Args:
90
+ conn: Database connection
91
+ table_name: Table name
92
+ column_name: Column name
93
+ schema: Schema name
94
+ limit: Number of top values
95
+
96
+ Returns:
97
+ Value distribution
98
+ """
99
+ ...
100
+
101
+ @abstractmethod
102
+ async def get_sample_query(
103
+ self, table_name: str, schema: Optional[str], limit: int
104
+ ) -> str:
105
+ """
106
+ Generate database-specific efficient sampling query.
107
+
108
+ Args:
109
+ table_name: Table name
110
+ schema: Schema name
111
+ limit: Number of rows to sample
112
+
113
+ Returns:
114
+ SQL query for sampling
115
+ """
116
+ ...
117
+
118
+ @abstractmethod
119
+ async def get_explain_query(self, query: str, analyze: bool) -> str:
120
+ """
121
+ Generate database-specific EXPLAIN query.
122
+
123
+ Args:
124
+ query: Query to explain
125
+ analyze: Whether to use EXPLAIN ANALYZE
126
+
127
+ Returns:
128
+ EXPLAIN query string
129
+ """
130
+ ...
131
+
132
+ @abstractmethod
133
+ async def parse_explain_plan(
134
+ self, plan_text: str, analyzed: bool
135
+ ) -> dict[str, Any]:
136
+ """
137
+ Parse EXPLAIN output into structured format.
138
+
139
+ Args:
140
+ plan_text: Raw EXPLAIN output
141
+ analyzed: Whether this was EXPLAIN ANALYZE
142
+
143
+ Returns:
144
+ Dictionary with parsed plan information
145
+ """
146
+ ...
147
+
148
+ def _build_table_reference(self, table_name: str, schema: Optional[str]) -> str:
149
+ """Build qualified table reference."""
150
+ if schema:
151
+ return f"{schema}.{table_name}"
152
+ return table_name
@@ -0,0 +1,298 @@
1
+ """ClickHouse adapter optimized for analytics workloads."""
2
+
3
+ from typing import Any, Optional
4
+
5
+ from sqlalchemy import text
6
+ from sqlalchemy.ext.asyncio import AsyncConnection
7
+
8
+ from db_connect_mcp.adapters.base import BaseAdapter
9
+ from db_connect_mcp.models.capabilities import DatabaseCapabilities
10
+ from db_connect_mcp.models.database import SchemaInfo
11
+ from db_connect_mcp.models.statistics import ColumnStats, Distribution
12
+ from db_connect_mcp.models.table import TableInfo
13
+
14
+
15
+ class ClickHouseAdapter(BaseAdapter):
16
+ """ClickHouse adapter optimized for analytical queries."""
17
+
18
+ @property
19
+ def capabilities(self) -> DatabaseCapabilities:
20
+ """ClickHouse analytics-focused capabilities."""
21
+ return DatabaseCapabilities(
22
+ foreign_keys=False, # ClickHouse doesn't enforce FK constraints
23
+ indexes=True, # Has specialized indexes
24
+ views=True,
25
+ materialized_views=True,
26
+ partitions=True, # Advanced partitioning
27
+ advanced_stats=True, # Excellent columnar statistics
28
+ explain_plans=True,
29
+ profiling=True,
30
+ comments=True,
31
+ schemas=True, # Called databases in ClickHouse
32
+ transactions=False, # No traditional transactions
33
+ stored_procedures=False,
34
+ triggers=False,
35
+ )
36
+
37
+ async def enrich_schema_info(
38
+ self, conn: AsyncConnection, schema_info: SchemaInfo
39
+ ) -> SchemaInfo:
40
+ """Add ClickHouse-specific schema metadata."""
41
+ try:
42
+ query = text("""
43
+ SELECT
44
+ sum(bytes) as size_bytes
45
+ FROM system.parts
46
+ WHERE database = :schema_name
47
+ AND active = 1
48
+ """)
49
+
50
+ result = await conn.execute(query, {"schema_name": schema_info.name})
51
+ row = result.fetchone()
52
+
53
+ if row and row[0]:
54
+ schema_info.size_bytes = int(row[0])
55
+ except Exception:
56
+ # Permission denied or table not available
57
+ # This is common for readonly users, just skip enrichment
58
+ pass
59
+
60
+ return schema_info
61
+
62
+ async def enrich_table_info(
63
+ self, conn: AsyncConnection, table_info: TableInfo
64
+ ) -> TableInfo:
65
+ """Add ClickHouse-specific table metadata."""
66
+ # Get table engine and metadata
67
+ query = text("""
68
+ SELECT
69
+ engine,
70
+ total_rows,
71
+ total_bytes,
72
+ partition_key,
73
+ sorting_key,
74
+ primary_key,
75
+ sampling_key
76
+ FROM system.tables
77
+ WHERE database = currentDatabase()
78
+ AND name = :table_name
79
+ """)
80
+
81
+ result = await conn.execute(query, {"table_name": table_info.name})
82
+ row = result.fetchone()
83
+
84
+ if row:
85
+ table_info.row_count = int(row[1]) if row[1] else None
86
+ table_info.size_bytes = int(row[2]) if row[2] else None
87
+
88
+ # ClickHouse-specific metadata
89
+ table_info.extra_info["engine"] = row[0]
90
+ table_info.extra_info["partition_key"] = row[3]
91
+ table_info.extra_info["sorting_key"] = row[4]
92
+ table_info.extra_info["primary_key"] = row[5]
93
+ table_info.extra_info["sampling_key"] = row[6]
94
+
95
+ # Get compression info (may fail due to permissions)
96
+ try:
97
+ compression_query = text("""
98
+ SELECT
99
+ sum(data_compressed_bytes) as compressed,
100
+ sum(data_uncompressed_bytes) as uncompressed
101
+ FROM system.parts
102
+ WHERE database = currentDatabase()
103
+ AND table = :table_name
104
+ AND active = 1
105
+ """)
106
+
107
+ result = await conn.execute(
108
+ compression_query, {"table_name": table_info.name}
109
+ )
110
+ row = result.fetchone()
111
+
112
+ if row and row[0]:
113
+ table_info.extra_info["compressed_bytes"] = int(row[0])
114
+ table_info.extra_info["uncompressed_bytes"] = int(row[1])
115
+ if row[1] and row[1] > 0:
116
+ ratio = float(row[0]) / float(row[1])
117
+ table_info.extra_info["compression_ratio"] = round(ratio, 2)
118
+ except Exception:
119
+ # Permission denied or table not available
120
+ # This is common for readonly users, just skip compression info
121
+ pass
122
+
123
+ return table_info
124
+
125
+ async def get_column_statistics(
126
+ self,
127
+ conn: AsyncConnection,
128
+ table_name: str,
129
+ column_name: str,
130
+ schema: Optional[str],
131
+ ) -> ColumnStats:
132
+ """Get ClickHouse column statistics with columnar optimizations."""
133
+ table_ref = self._build_table_reference(table_name, schema)
134
+
135
+ # ClickHouse has excellent support for quantiles
136
+ query = text(f"""
137
+ SELECT
138
+ count() as total_rows,
139
+ countIf(`{column_name}` IS NULL) as null_count,
140
+ uniq(`{column_name}`) as distinct_count,
141
+ min(`{column_name}`) as min_val,
142
+ max(`{column_name}`) as max_val,
143
+ avg(`{column_name}`) as avg_val,
144
+ stddevPop(`{column_name}`) as stddev_val,
145
+ quantile(0.25)(`{column_name}`) as p25,
146
+ quantile(0.50)(`{column_name}`) as p50,
147
+ quantile(0.75)(`{column_name}`) as p75,
148
+ quantile(0.95)(`{column_name}`) as p95,
149
+ quantile(0.99)(`{column_name}`) as p99,
150
+ toTypeName(`{column_name}`) as data_type
151
+ FROM {table_ref}
152
+ """)
153
+
154
+ try:
155
+ result = await conn.execute(query)
156
+ row = result.fetchone()
157
+
158
+ if not row:
159
+ return ColumnStats(
160
+ column=column_name,
161
+ data_type="unknown",
162
+ total_rows=0,
163
+ null_count=0,
164
+ sample_size=0,
165
+ warning="No data found",
166
+ )
167
+
168
+ # Get most common values
169
+ mcv_query = text(f"""
170
+ SELECT `{column_name}` as value, count() as count
171
+ FROM {table_ref}
172
+ WHERE `{column_name}` IS NOT NULL
173
+ GROUP BY `{column_name}`
174
+ ORDER BY count DESC
175
+ LIMIT 10
176
+ """)
177
+
178
+ mcv_result = await conn.execute(mcv_query)
179
+ mcv_rows = mcv_result.fetchall()
180
+ most_common = [{"value": str(r[0]), "count": int(r[1])} for r in mcv_rows]
181
+
182
+ return ColumnStats(
183
+ column=column_name,
184
+ data_type=str(row[12]),
185
+ total_rows=int(row[0]),
186
+ null_count=int(row[1]),
187
+ distinct_count=int(row[2]) if row[2] else None,
188
+ min_value=row[3],
189
+ max_value=row[4],
190
+ avg_value=float(row[5]) if row[5] is not None else None,
191
+ stddev_value=float(row[6]) if row[6] is not None else None,
192
+ percentile_25=row[7],
193
+ median_value=row[8],
194
+ percentile_75=row[9],
195
+ percentile_95=row[10],
196
+ percentile_99=row[11],
197
+ most_common_values=most_common,
198
+ sample_size=int(row[0]),
199
+ )
200
+
201
+ except Exception as e:
202
+ return ColumnStats(
203
+ column=column_name,
204
+ data_type="unknown",
205
+ total_rows=0,
206
+ null_count=0,
207
+ sample_size=0,
208
+ warning=f"Statistics unavailable: {str(e)}",
209
+ )
210
+
211
+ async def get_value_distribution(
212
+ self,
213
+ conn: AsyncConnection,
214
+ table_name: str,
215
+ column_name: str,
216
+ schema: Optional[str],
217
+ limit: int,
218
+ ) -> Distribution:
219
+ """Get value distribution for ClickHouse."""
220
+ table_ref = self._build_table_reference(table_name, schema)
221
+
222
+ stats_query = text(f"""
223
+ SELECT
224
+ count() as total_rows,
225
+ uniq(`{column_name}`) as unique_values,
226
+ countIf(`{column_name}` IS NULL) as null_count
227
+ FROM {table_ref}
228
+ """)
229
+
230
+ stats_result = await conn.execute(stats_query)
231
+ stats_row = stats_result.fetchone()
232
+
233
+ top_query = text(f"""
234
+ SELECT `{column_name}` as value, count() as count
235
+ FROM {table_ref}
236
+ WHERE `{column_name}` IS NOT NULL
237
+ GROUP BY `{column_name}`
238
+ ORDER BY count DESC
239
+ LIMIT :limit
240
+ """)
241
+
242
+ top_result = await conn.execute(top_query, {"limit": limit})
243
+ top_rows = top_result.fetchall()
244
+
245
+ top_values = [{"value": str(r[0]), "count": int(r[1])} for r in top_rows]
246
+
247
+ if not stats_row:
248
+ return Distribution(
249
+ column=column_name,
250
+ total_rows=0,
251
+ unique_values=0,
252
+ null_count=0,
253
+ top_values=[],
254
+ sample_size=0,
255
+ )
256
+
257
+ return Distribution(
258
+ column=column_name,
259
+ total_rows=int(stats_row[0]),
260
+ unique_values=int(stats_row[1]),
261
+ null_count=int(stats_row[2]),
262
+ top_values=top_values,
263
+ sample_size=int(stats_row[0]),
264
+ )
265
+
266
+ async def get_sample_query(
267
+ self, table_name: str, schema: Optional[str], limit: int
268
+ ) -> str:
269
+ """Generate ClickHouse sampling query with SAMPLE clause."""
270
+ table_ref = self._build_table_reference(table_name, schema)
271
+ # ClickHouse SAMPLE clause for efficient sampling on large datasets
272
+ return f"SELECT * FROM {table_ref} SAMPLE 0.01 LIMIT {limit}"
273
+
274
+ async def get_explain_query(self, query: str, analyze: bool) -> str:
275
+ """Generate ClickHouse EXPLAIN query."""
276
+ if analyze:
277
+ return f"EXPLAIN PIPELINE {query}"
278
+ return f"EXPLAIN {query}"
279
+
280
+ async def parse_explain_plan(
281
+ self, plan_text: str, analyzed: bool
282
+ ) -> dict[str, Any]:
283
+ """Parse ClickHouse EXPLAIN output."""
284
+ result: dict[str, Any] = {
285
+ "json": None,
286
+ "warnings": [],
287
+ "recommendations": [],
288
+ }
289
+
290
+ # ClickHouse EXPLAIN is text-based
291
+ # Look for common patterns
292
+ if "FULL" in plan_text.upper() and "SCAN" in plan_text.upper():
293
+ result["warnings"].append("Full table scan detected")
294
+ result["recommendations"].append(
295
+ "Consider using appropriate indexes or sampling"
296
+ )
297
+
298
+ return result