db-connect-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of db-connect-mcp might be problematic. Click here for more details.
- db_connect_mcp/__init__.py +30 -0
- db_connect_mcp/__main__.py +13 -0
- db_connect_mcp/adapters/__init__.py +72 -0
- db_connect_mcp/adapters/base.py +152 -0
- db_connect_mcp/adapters/clickhouse.py +298 -0
- db_connect_mcp/adapters/mysql.py +288 -0
- db_connect_mcp/adapters/postgresql.py +351 -0
- db_connect_mcp/core/__init__.py +13 -0
- db_connect_mcp/core/analyzer.py +114 -0
- db_connect_mcp/core/connection.py +371 -0
- db_connect_mcp/core/executor.py +239 -0
- db_connect_mcp/core/inspector.py +345 -0
- db_connect_mcp/models/__init__.py +23 -0
- db_connect_mcp/models/capabilities.py +98 -0
- db_connect_mcp/models/config.py +401 -0
- db_connect_mcp/models/database.py +112 -0
- db_connect_mcp/models/query.py +119 -0
- db_connect_mcp/models/statistics.py +176 -0
- db_connect_mcp/models/table.py +230 -0
- db_connect_mcp/server.py +496 -0
- db_connect_mcp-0.1.0.dist-info/METADATA +565 -0
- db_connect_mcp-0.1.0.dist-info/RECORD +25 -0
- db_connect_mcp-0.1.0.dist-info/WHEEL +4 -0
- db_connect_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- db_connect_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
db_mcp - Multi-database MCP server for Claude Code
|
|
3
|
+
|
|
4
|
+
A Model Context Protocol (MCP) server that provides database analysis and querying
|
|
5
|
+
capabilities for PostgreSQL, MySQL, and ClickHouse databases.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "2.0.0"
|
|
9
|
+
|
|
10
|
+
from .models.config import DatabaseConfig
|
|
11
|
+
from .models.capabilities import DatabaseCapabilities
|
|
12
|
+
from .models.database import DatabaseInfo, SchemaInfo
|
|
13
|
+
from .models.table import TableInfo, ColumnInfo, IndexInfo, ConstraintInfo
|
|
14
|
+
from .models.query import QueryResult, ExplainPlan
|
|
15
|
+
from .models.statistics import ColumnStats, Distribution
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"DatabaseConfig",
|
|
19
|
+
"DatabaseCapabilities",
|
|
20
|
+
"DatabaseInfo",
|
|
21
|
+
"SchemaInfo",
|
|
22
|
+
"TableInfo",
|
|
23
|
+
"ColumnInfo",
|
|
24
|
+
"IndexInfo",
|
|
25
|
+
"ConstraintInfo",
|
|
26
|
+
"QueryResult",
|
|
27
|
+
"ExplainPlan",
|
|
28
|
+
"ColumnStats",
|
|
29
|
+
"Distribution",
|
|
30
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Entry point for running db_connect_mcp as a module."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from db_connect_mcp.server import main
|
|
7
|
+
|
|
8
|
+
if __name__ == "__main__":
|
|
9
|
+
# Windows-specific event loop policy
|
|
10
|
+
if sys.platform == "win32":
|
|
11
|
+
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) # type: ignore[attr-defined]
|
|
12
|
+
|
|
13
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Database adapters for specific database implementations."""
|
|
2
|
+
|
|
3
|
+
from sqlalchemy.engine.url import make_url
|
|
4
|
+
|
|
5
|
+
from .base import BaseAdapter
|
|
6
|
+
from .clickhouse import ClickHouseAdapter
|
|
7
|
+
from .mysql import MySQLAdapter
|
|
8
|
+
from .postgresql import PostgresAdapter
|
|
9
|
+
from ..models.config import DatabaseConfig
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"BaseAdapter",
|
|
13
|
+
"PostgresAdapter",
|
|
14
|
+
"MySQLAdapter",
|
|
15
|
+
"ClickHouseAdapter",
|
|
16
|
+
"create_adapter",
|
|
17
|
+
"detect_dialect",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def detect_dialect(url: str) -> str:
|
|
22
|
+
"""
|
|
23
|
+
Detect database dialect from connection URL.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
url: Database connection URL
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Dialect name (postgresql, mysql, clickhouse)
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
ValueError: If dialect cannot be detected
|
|
33
|
+
"""
|
|
34
|
+
try:
|
|
35
|
+
parsed_url = make_url(url)
|
|
36
|
+
# Extract base dialect (e.g., "postgresql" from "postgresql+asyncpg")
|
|
37
|
+
dialect = parsed_url.drivername.split("+")[0]
|
|
38
|
+
return dialect
|
|
39
|
+
except Exception as e:
|
|
40
|
+
raise ValueError(f"Failed to detect dialect from URL: {e}")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def create_adapter(config: DatabaseConfig) -> BaseAdapter:
|
|
44
|
+
"""
|
|
45
|
+
Factory function to create appropriate database adapter.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
config: Database configuration
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Database adapter instance
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: If database type is not supported
|
|
55
|
+
"""
|
|
56
|
+
dialect = config.dialect
|
|
57
|
+
|
|
58
|
+
adapters = {
|
|
59
|
+
"postgresql": PostgresAdapter,
|
|
60
|
+
"mysql": MySQLAdapter,
|
|
61
|
+
"clickhouse": ClickHouseAdapter,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
adapter_class = adapters.get(dialect)
|
|
65
|
+
|
|
66
|
+
if adapter_class is None:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Unsupported database dialect: {dialect}. "
|
|
69
|
+
f"Supported dialects: {', '.join(adapters.keys())}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
return adapter_class()
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Base adapter abstract class for database-specific implementations."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncConnection
|
|
7
|
+
|
|
8
|
+
from db_connect_mcp.models.capabilities import DatabaseCapabilities
|
|
9
|
+
from db_connect_mcp.models.database import SchemaInfo
|
|
10
|
+
from db_connect_mcp.models.statistics import ColumnStats, Distribution
|
|
11
|
+
from db_connect_mcp.models.table import TableInfo
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BaseAdapter(ABC):
|
|
15
|
+
"""Base adapter defining database-specific interface."""
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def capabilities(self) -> DatabaseCapabilities:
|
|
20
|
+
"""Get capabilities for this database type."""
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
async def enrich_schema_info(
|
|
25
|
+
self, conn: AsyncConnection, schema_info: SchemaInfo
|
|
26
|
+
) -> SchemaInfo:
|
|
27
|
+
"""
|
|
28
|
+
Enrich schema info with database-specific metadata.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
conn: Database connection
|
|
32
|
+
schema_info: Basic schema information
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Enriched schema information
|
|
36
|
+
"""
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
async def enrich_table_info(
|
|
41
|
+
self, conn: AsyncConnection, table_info: TableInfo
|
|
42
|
+
) -> TableInfo:
|
|
43
|
+
"""
|
|
44
|
+
Enrich table info with database-specific metadata.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
conn: Database connection
|
|
48
|
+
table_info: Basic table information
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Enriched table information with sizes, row counts, etc.
|
|
52
|
+
"""
|
|
53
|
+
...
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
async def get_column_statistics(
|
|
57
|
+
self,
|
|
58
|
+
conn: AsyncConnection,
|
|
59
|
+
table_name: str,
|
|
60
|
+
column_name: str,
|
|
61
|
+
schema: Optional[str],
|
|
62
|
+
) -> ColumnStats:
|
|
63
|
+
"""
|
|
64
|
+
Get column statistics using database-specific queries.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
conn: Database connection
|
|
68
|
+
table_name: Table name
|
|
69
|
+
column_name: Column name
|
|
70
|
+
schema: Schema name
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Column statistics
|
|
74
|
+
"""
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
@abstractmethod
|
|
78
|
+
async def get_value_distribution(
|
|
79
|
+
self,
|
|
80
|
+
conn: AsyncConnection,
|
|
81
|
+
table_name: str,
|
|
82
|
+
column_name: str,
|
|
83
|
+
schema: Optional[str],
|
|
84
|
+
limit: int,
|
|
85
|
+
) -> Distribution:
|
|
86
|
+
"""
|
|
87
|
+
Get value distribution for a column.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
conn: Database connection
|
|
91
|
+
table_name: Table name
|
|
92
|
+
column_name: Column name
|
|
93
|
+
schema: Schema name
|
|
94
|
+
limit: Number of top values
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Value distribution
|
|
98
|
+
"""
|
|
99
|
+
...
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
async def get_sample_query(
|
|
103
|
+
self, table_name: str, schema: Optional[str], limit: int
|
|
104
|
+
) -> str:
|
|
105
|
+
"""
|
|
106
|
+
Generate database-specific efficient sampling query.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
table_name: Table name
|
|
110
|
+
schema: Schema name
|
|
111
|
+
limit: Number of rows to sample
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
SQL query for sampling
|
|
115
|
+
"""
|
|
116
|
+
...
|
|
117
|
+
|
|
118
|
+
@abstractmethod
|
|
119
|
+
async def get_explain_query(self, query: str, analyze: bool) -> str:
|
|
120
|
+
"""
|
|
121
|
+
Generate database-specific EXPLAIN query.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
query: Query to explain
|
|
125
|
+
analyze: Whether to use EXPLAIN ANALYZE
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
EXPLAIN query string
|
|
129
|
+
"""
|
|
130
|
+
...
|
|
131
|
+
|
|
132
|
+
@abstractmethod
|
|
133
|
+
async def parse_explain_plan(
|
|
134
|
+
self, plan_text: str, analyzed: bool
|
|
135
|
+
) -> dict[str, Any]:
|
|
136
|
+
"""
|
|
137
|
+
Parse EXPLAIN output into structured format.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
plan_text: Raw EXPLAIN output
|
|
141
|
+
analyzed: Whether this was EXPLAIN ANALYZE
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Dictionary with parsed plan information
|
|
145
|
+
"""
|
|
146
|
+
...
|
|
147
|
+
|
|
148
|
+
def _build_table_reference(self, table_name: str, schema: Optional[str]) -> str:
|
|
149
|
+
"""Build qualified table reference."""
|
|
150
|
+
if schema:
|
|
151
|
+
return f"{schema}.{table_name}"
|
|
152
|
+
return table_name
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""ClickHouse adapter optimized for analytics workloads."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import text
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncConnection
|
|
7
|
+
|
|
8
|
+
from db_connect_mcp.adapters.base import BaseAdapter
|
|
9
|
+
from db_connect_mcp.models.capabilities import DatabaseCapabilities
|
|
10
|
+
from db_connect_mcp.models.database import SchemaInfo
|
|
11
|
+
from db_connect_mcp.models.statistics import ColumnStats, Distribution
|
|
12
|
+
from db_connect_mcp.models.table import TableInfo
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ClickHouseAdapter(BaseAdapter):
|
|
16
|
+
"""ClickHouse adapter optimized for analytical queries."""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def capabilities(self) -> DatabaseCapabilities:
|
|
20
|
+
"""ClickHouse analytics-focused capabilities."""
|
|
21
|
+
return DatabaseCapabilities(
|
|
22
|
+
foreign_keys=False, # ClickHouse doesn't enforce FK constraints
|
|
23
|
+
indexes=True, # Has specialized indexes
|
|
24
|
+
views=True,
|
|
25
|
+
materialized_views=True,
|
|
26
|
+
partitions=True, # Advanced partitioning
|
|
27
|
+
advanced_stats=True, # Excellent columnar statistics
|
|
28
|
+
explain_plans=True,
|
|
29
|
+
profiling=True,
|
|
30
|
+
comments=True,
|
|
31
|
+
schemas=True, # Called databases in ClickHouse
|
|
32
|
+
transactions=False, # No traditional transactions
|
|
33
|
+
stored_procedures=False,
|
|
34
|
+
triggers=False,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
async def enrich_schema_info(
|
|
38
|
+
self, conn: AsyncConnection, schema_info: SchemaInfo
|
|
39
|
+
) -> SchemaInfo:
|
|
40
|
+
"""Add ClickHouse-specific schema metadata."""
|
|
41
|
+
try:
|
|
42
|
+
query = text("""
|
|
43
|
+
SELECT
|
|
44
|
+
sum(bytes) as size_bytes
|
|
45
|
+
FROM system.parts
|
|
46
|
+
WHERE database = :schema_name
|
|
47
|
+
AND active = 1
|
|
48
|
+
""")
|
|
49
|
+
|
|
50
|
+
result = await conn.execute(query, {"schema_name": schema_info.name})
|
|
51
|
+
row = result.fetchone()
|
|
52
|
+
|
|
53
|
+
if row and row[0]:
|
|
54
|
+
schema_info.size_bytes = int(row[0])
|
|
55
|
+
except Exception:
|
|
56
|
+
# Permission denied or table not available
|
|
57
|
+
# This is common for readonly users, just skip enrichment
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
return schema_info
|
|
61
|
+
|
|
62
|
+
async def enrich_table_info(
|
|
63
|
+
self, conn: AsyncConnection, table_info: TableInfo
|
|
64
|
+
) -> TableInfo:
|
|
65
|
+
"""Add ClickHouse-specific table metadata."""
|
|
66
|
+
# Get table engine and metadata
|
|
67
|
+
query = text("""
|
|
68
|
+
SELECT
|
|
69
|
+
engine,
|
|
70
|
+
total_rows,
|
|
71
|
+
total_bytes,
|
|
72
|
+
partition_key,
|
|
73
|
+
sorting_key,
|
|
74
|
+
primary_key,
|
|
75
|
+
sampling_key
|
|
76
|
+
FROM system.tables
|
|
77
|
+
WHERE database = currentDatabase()
|
|
78
|
+
AND name = :table_name
|
|
79
|
+
""")
|
|
80
|
+
|
|
81
|
+
result = await conn.execute(query, {"table_name": table_info.name})
|
|
82
|
+
row = result.fetchone()
|
|
83
|
+
|
|
84
|
+
if row:
|
|
85
|
+
table_info.row_count = int(row[1]) if row[1] else None
|
|
86
|
+
table_info.size_bytes = int(row[2]) if row[2] else None
|
|
87
|
+
|
|
88
|
+
# ClickHouse-specific metadata
|
|
89
|
+
table_info.extra_info["engine"] = row[0]
|
|
90
|
+
table_info.extra_info["partition_key"] = row[3]
|
|
91
|
+
table_info.extra_info["sorting_key"] = row[4]
|
|
92
|
+
table_info.extra_info["primary_key"] = row[5]
|
|
93
|
+
table_info.extra_info["sampling_key"] = row[6]
|
|
94
|
+
|
|
95
|
+
# Get compression info (may fail due to permissions)
|
|
96
|
+
try:
|
|
97
|
+
compression_query = text("""
|
|
98
|
+
SELECT
|
|
99
|
+
sum(data_compressed_bytes) as compressed,
|
|
100
|
+
sum(data_uncompressed_bytes) as uncompressed
|
|
101
|
+
FROM system.parts
|
|
102
|
+
WHERE database = currentDatabase()
|
|
103
|
+
AND table = :table_name
|
|
104
|
+
AND active = 1
|
|
105
|
+
""")
|
|
106
|
+
|
|
107
|
+
result = await conn.execute(
|
|
108
|
+
compression_query, {"table_name": table_info.name}
|
|
109
|
+
)
|
|
110
|
+
row = result.fetchone()
|
|
111
|
+
|
|
112
|
+
if row and row[0]:
|
|
113
|
+
table_info.extra_info["compressed_bytes"] = int(row[0])
|
|
114
|
+
table_info.extra_info["uncompressed_bytes"] = int(row[1])
|
|
115
|
+
if row[1] and row[1] > 0:
|
|
116
|
+
ratio = float(row[0]) / float(row[1])
|
|
117
|
+
table_info.extra_info["compression_ratio"] = round(ratio, 2)
|
|
118
|
+
except Exception:
|
|
119
|
+
# Permission denied or table not available
|
|
120
|
+
# This is common for readonly users, just skip compression info
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
return table_info
|
|
124
|
+
|
|
125
|
+
async def get_column_statistics(
|
|
126
|
+
self,
|
|
127
|
+
conn: AsyncConnection,
|
|
128
|
+
table_name: str,
|
|
129
|
+
column_name: str,
|
|
130
|
+
schema: Optional[str],
|
|
131
|
+
) -> ColumnStats:
|
|
132
|
+
"""Get ClickHouse column statistics with columnar optimizations."""
|
|
133
|
+
table_ref = self._build_table_reference(table_name, schema)
|
|
134
|
+
|
|
135
|
+
# ClickHouse has excellent support for quantiles
|
|
136
|
+
query = text(f"""
|
|
137
|
+
SELECT
|
|
138
|
+
count() as total_rows,
|
|
139
|
+
countIf(`{column_name}` IS NULL) as null_count,
|
|
140
|
+
uniq(`{column_name}`) as distinct_count,
|
|
141
|
+
min(`{column_name}`) as min_val,
|
|
142
|
+
max(`{column_name}`) as max_val,
|
|
143
|
+
avg(`{column_name}`) as avg_val,
|
|
144
|
+
stddevPop(`{column_name}`) as stddev_val,
|
|
145
|
+
quantile(0.25)(`{column_name}`) as p25,
|
|
146
|
+
quantile(0.50)(`{column_name}`) as p50,
|
|
147
|
+
quantile(0.75)(`{column_name}`) as p75,
|
|
148
|
+
quantile(0.95)(`{column_name}`) as p95,
|
|
149
|
+
quantile(0.99)(`{column_name}`) as p99,
|
|
150
|
+
toTypeName(`{column_name}`) as data_type
|
|
151
|
+
FROM {table_ref}
|
|
152
|
+
""")
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
result = await conn.execute(query)
|
|
156
|
+
row = result.fetchone()
|
|
157
|
+
|
|
158
|
+
if not row:
|
|
159
|
+
return ColumnStats(
|
|
160
|
+
column=column_name,
|
|
161
|
+
data_type="unknown",
|
|
162
|
+
total_rows=0,
|
|
163
|
+
null_count=0,
|
|
164
|
+
sample_size=0,
|
|
165
|
+
warning="No data found",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Get most common values
|
|
169
|
+
mcv_query = text(f"""
|
|
170
|
+
SELECT `{column_name}` as value, count() as count
|
|
171
|
+
FROM {table_ref}
|
|
172
|
+
WHERE `{column_name}` IS NOT NULL
|
|
173
|
+
GROUP BY `{column_name}`
|
|
174
|
+
ORDER BY count DESC
|
|
175
|
+
LIMIT 10
|
|
176
|
+
""")
|
|
177
|
+
|
|
178
|
+
mcv_result = await conn.execute(mcv_query)
|
|
179
|
+
mcv_rows = mcv_result.fetchall()
|
|
180
|
+
most_common = [{"value": str(r[0]), "count": int(r[1])} for r in mcv_rows]
|
|
181
|
+
|
|
182
|
+
return ColumnStats(
|
|
183
|
+
column=column_name,
|
|
184
|
+
data_type=str(row[12]),
|
|
185
|
+
total_rows=int(row[0]),
|
|
186
|
+
null_count=int(row[1]),
|
|
187
|
+
distinct_count=int(row[2]) if row[2] else None,
|
|
188
|
+
min_value=row[3],
|
|
189
|
+
max_value=row[4],
|
|
190
|
+
avg_value=float(row[5]) if row[5] is not None else None,
|
|
191
|
+
stddev_value=float(row[6]) if row[6] is not None else None,
|
|
192
|
+
percentile_25=row[7],
|
|
193
|
+
median_value=row[8],
|
|
194
|
+
percentile_75=row[9],
|
|
195
|
+
percentile_95=row[10],
|
|
196
|
+
percentile_99=row[11],
|
|
197
|
+
most_common_values=most_common,
|
|
198
|
+
sample_size=int(row[0]),
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
except Exception as e:
|
|
202
|
+
return ColumnStats(
|
|
203
|
+
column=column_name,
|
|
204
|
+
data_type="unknown",
|
|
205
|
+
total_rows=0,
|
|
206
|
+
null_count=0,
|
|
207
|
+
sample_size=0,
|
|
208
|
+
warning=f"Statistics unavailable: {str(e)}",
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
async def get_value_distribution(
|
|
212
|
+
self,
|
|
213
|
+
conn: AsyncConnection,
|
|
214
|
+
table_name: str,
|
|
215
|
+
column_name: str,
|
|
216
|
+
schema: Optional[str],
|
|
217
|
+
limit: int,
|
|
218
|
+
) -> Distribution:
|
|
219
|
+
"""Get value distribution for ClickHouse."""
|
|
220
|
+
table_ref = self._build_table_reference(table_name, schema)
|
|
221
|
+
|
|
222
|
+
stats_query = text(f"""
|
|
223
|
+
SELECT
|
|
224
|
+
count() as total_rows,
|
|
225
|
+
uniq(`{column_name}`) as unique_values,
|
|
226
|
+
countIf(`{column_name}` IS NULL) as null_count
|
|
227
|
+
FROM {table_ref}
|
|
228
|
+
""")
|
|
229
|
+
|
|
230
|
+
stats_result = await conn.execute(stats_query)
|
|
231
|
+
stats_row = stats_result.fetchone()
|
|
232
|
+
|
|
233
|
+
top_query = text(f"""
|
|
234
|
+
SELECT `{column_name}` as value, count() as count
|
|
235
|
+
FROM {table_ref}
|
|
236
|
+
WHERE `{column_name}` IS NOT NULL
|
|
237
|
+
GROUP BY `{column_name}`
|
|
238
|
+
ORDER BY count DESC
|
|
239
|
+
LIMIT :limit
|
|
240
|
+
""")
|
|
241
|
+
|
|
242
|
+
top_result = await conn.execute(top_query, {"limit": limit})
|
|
243
|
+
top_rows = top_result.fetchall()
|
|
244
|
+
|
|
245
|
+
top_values = [{"value": str(r[0]), "count": int(r[1])} for r in top_rows]
|
|
246
|
+
|
|
247
|
+
if not stats_row:
|
|
248
|
+
return Distribution(
|
|
249
|
+
column=column_name,
|
|
250
|
+
total_rows=0,
|
|
251
|
+
unique_values=0,
|
|
252
|
+
null_count=0,
|
|
253
|
+
top_values=[],
|
|
254
|
+
sample_size=0,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
return Distribution(
|
|
258
|
+
column=column_name,
|
|
259
|
+
total_rows=int(stats_row[0]),
|
|
260
|
+
unique_values=int(stats_row[1]),
|
|
261
|
+
null_count=int(stats_row[2]),
|
|
262
|
+
top_values=top_values,
|
|
263
|
+
sample_size=int(stats_row[0]),
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
async def get_sample_query(
|
|
267
|
+
self, table_name: str, schema: Optional[str], limit: int
|
|
268
|
+
) -> str:
|
|
269
|
+
"""Generate ClickHouse sampling query with SAMPLE clause."""
|
|
270
|
+
table_ref = self._build_table_reference(table_name, schema)
|
|
271
|
+
# ClickHouse SAMPLE clause for efficient sampling on large datasets
|
|
272
|
+
return f"SELECT * FROM {table_ref} SAMPLE 0.01 LIMIT {limit}"
|
|
273
|
+
|
|
274
|
+
async def get_explain_query(self, query: str, analyze: bool) -> str:
|
|
275
|
+
"""Generate ClickHouse EXPLAIN query."""
|
|
276
|
+
if analyze:
|
|
277
|
+
return f"EXPLAIN PIPELINE {query}"
|
|
278
|
+
return f"EXPLAIN {query}"
|
|
279
|
+
|
|
280
|
+
async def parse_explain_plan(
|
|
281
|
+
self, plan_text: str, analyzed: bool
|
|
282
|
+
) -> dict[str, Any]:
|
|
283
|
+
"""Parse ClickHouse EXPLAIN output."""
|
|
284
|
+
result: dict[str, Any] = {
|
|
285
|
+
"json": None,
|
|
286
|
+
"warnings": [],
|
|
287
|
+
"recommendations": [],
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
# ClickHouse EXPLAIN is text-based
|
|
291
|
+
# Look for common patterns
|
|
292
|
+
if "FULL" in plan_text.upper() and "SCAN" in plan_text.upper():
|
|
293
|
+
result["warnings"].append("Full table scan detected")
|
|
294
|
+
result["recommendations"].append(
|
|
295
|
+
"Consider using appropriate indexes or sampling"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
return result
|