db-connect-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of db-connect-mcp might be problematic. Click here for more details.
- db_connect_mcp/__init__.py +30 -0
- db_connect_mcp/__main__.py +13 -0
- db_connect_mcp/adapters/__init__.py +72 -0
- db_connect_mcp/adapters/base.py +152 -0
- db_connect_mcp/adapters/clickhouse.py +298 -0
- db_connect_mcp/adapters/mysql.py +288 -0
- db_connect_mcp/adapters/postgresql.py +351 -0
- db_connect_mcp/core/__init__.py +13 -0
- db_connect_mcp/core/analyzer.py +114 -0
- db_connect_mcp/core/connection.py +371 -0
- db_connect_mcp/core/executor.py +239 -0
- db_connect_mcp/core/inspector.py +345 -0
- db_connect_mcp/models/__init__.py +23 -0
- db_connect_mcp/models/capabilities.py +98 -0
- db_connect_mcp/models/config.py +401 -0
- db_connect_mcp/models/database.py +112 -0
- db_connect_mcp/models/query.py +119 -0
- db_connect_mcp/models/statistics.py +176 -0
- db_connect_mcp/models/table.py +230 -0
- db_connect_mcp/server.py +496 -0
- db_connect_mcp-0.1.0.dist-info/METADATA +565 -0
- db_connect_mcp-0.1.0.dist-info/RECORD +25 -0
- db_connect_mcp-0.1.0.dist-info/WHEEL +4 -0
- db_connect_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- db_connect_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""MySQL adapter with good feature support."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import text
|
|
7
|
+
from sqlalchemy.ext.asyncio import AsyncConnection
|
|
8
|
+
|
|
9
|
+
from db_connect_mcp.adapters.base import BaseAdapter
|
|
10
|
+
from db_connect_mcp.models.capabilities import DatabaseCapabilities
|
|
11
|
+
from db_connect_mcp.models.database import SchemaInfo
|
|
12
|
+
from db_connect_mcp.models.statistics import ColumnStats, Distribution
|
|
13
|
+
from db_connect_mcp.models.table import TableInfo
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MySQLAdapter(BaseAdapter):
|
|
17
|
+
"""MySQL adapter with good feature support."""
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def capabilities(self) -> DatabaseCapabilities:
|
|
21
|
+
"""MySQL has good but not comprehensive support."""
|
|
22
|
+
return DatabaseCapabilities(
|
|
23
|
+
foreign_keys=True,
|
|
24
|
+
indexes=True,
|
|
25
|
+
views=True,
|
|
26
|
+
materialized_views=False, # MySQL doesn't have native materialized views
|
|
27
|
+
partitions=True,
|
|
28
|
+
advanced_stats=False, # No percentile functions in MySQL
|
|
29
|
+
explain_plans=True,
|
|
30
|
+
profiling=False, # Basic profiling only
|
|
31
|
+
comments=True,
|
|
32
|
+
schemas=True, # MySQL calls them databases
|
|
33
|
+
transactions=True,
|
|
34
|
+
stored_procedures=True,
|
|
35
|
+
triggers=True,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
async def enrich_schema_info(
|
|
39
|
+
self, conn: AsyncConnection, schema_info: SchemaInfo
|
|
40
|
+
) -> SchemaInfo:
|
|
41
|
+
"""Add MySQL-specific schema metadata."""
|
|
42
|
+
query = text("""
|
|
43
|
+
SELECT
|
|
44
|
+
SUM(data_length + index_length) as size_bytes
|
|
45
|
+
FROM information_schema.TABLES
|
|
46
|
+
WHERE table_schema = :schema_name
|
|
47
|
+
""")
|
|
48
|
+
|
|
49
|
+
result = await conn.execute(query, {"schema_name": schema_info.name})
|
|
50
|
+
row = result.fetchone()
|
|
51
|
+
|
|
52
|
+
if row and row[0]:
|
|
53
|
+
schema_info.size_bytes = int(row[0])
|
|
54
|
+
|
|
55
|
+
return schema_info
|
|
56
|
+
|
|
57
|
+
async def enrich_table_info(
|
|
58
|
+
self, conn: AsyncConnection, table_info: TableInfo
|
|
59
|
+
) -> TableInfo:
|
|
60
|
+
"""Add MySQL-specific table metadata."""
|
|
61
|
+
query = text("""
|
|
62
|
+
SELECT
|
|
63
|
+
engine,
|
|
64
|
+
table_rows,
|
|
65
|
+
data_length,
|
|
66
|
+
index_length,
|
|
67
|
+
table_comment,
|
|
68
|
+
create_time,
|
|
69
|
+
update_time
|
|
70
|
+
FROM information_schema.TABLES
|
|
71
|
+
WHERE table_schema = DATABASE()
|
|
72
|
+
AND table_name = :table_name
|
|
73
|
+
""")
|
|
74
|
+
|
|
75
|
+
result = await conn.execute(query, {"table_name": table_info.name})
|
|
76
|
+
row = result.fetchone()
|
|
77
|
+
|
|
78
|
+
if row:
|
|
79
|
+
table_info.row_count = int(row[1]) if row[1] else None
|
|
80
|
+
table_info.size_bytes = int(row[2]) if row[2] else None
|
|
81
|
+
table_info.index_size_bytes = int(row[3]) if row[3] else None
|
|
82
|
+
table_info.comment = row[4] if row[4] else None
|
|
83
|
+
table_info.created_at = str(row[5]) if row[5] else None
|
|
84
|
+
table_info.updated_at = str(row[6]) if row[6] else None
|
|
85
|
+
|
|
86
|
+
# MySQL-specific: storage engine
|
|
87
|
+
table_info.extra_info["engine"] = row[0]
|
|
88
|
+
|
|
89
|
+
return table_info
|
|
90
|
+
|
|
91
|
+
async def get_column_statistics(
|
|
92
|
+
self,
|
|
93
|
+
conn: AsyncConnection,
|
|
94
|
+
table_name: str,
|
|
95
|
+
column_name: str,
|
|
96
|
+
schema: Optional[str],
|
|
97
|
+
) -> ColumnStats:
|
|
98
|
+
"""Get MySQL column statistics (basic stats only)."""
|
|
99
|
+
table_ref = self._build_table_reference(table_name, schema)
|
|
100
|
+
|
|
101
|
+
# MySQL doesn't support percentile functions, so we get basic stats only
|
|
102
|
+
query = text(f"""
|
|
103
|
+
SELECT
|
|
104
|
+
COUNT(*) as total_rows,
|
|
105
|
+
COUNT(*) - COUNT(`{column_name}`) as null_count,
|
|
106
|
+
COUNT(DISTINCT `{column_name}`) as distinct_count,
|
|
107
|
+
MIN(`{column_name}`) as min_val,
|
|
108
|
+
MAX(`{column_name}`) as max_val,
|
|
109
|
+
AVG(`{column_name}`) as avg_val,
|
|
110
|
+
STD(`{column_name}`) as stddev_val
|
|
111
|
+
FROM {table_ref}
|
|
112
|
+
""")
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
result = await conn.execute(query)
|
|
116
|
+
row = result.fetchone()
|
|
117
|
+
|
|
118
|
+
if not row:
|
|
119
|
+
return ColumnStats(
|
|
120
|
+
column=column_name,
|
|
121
|
+
data_type="unknown",
|
|
122
|
+
total_rows=0,
|
|
123
|
+
null_count=0,
|
|
124
|
+
sample_size=0,
|
|
125
|
+
warning="No data found",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Get most common values
|
|
129
|
+
mcv_query = text(f"""
|
|
130
|
+
SELECT `{column_name}` as value, COUNT(*) as count
|
|
131
|
+
FROM {table_ref}
|
|
132
|
+
WHERE `{column_name}` IS NOT NULL
|
|
133
|
+
GROUP BY `{column_name}`
|
|
134
|
+
ORDER BY count DESC
|
|
135
|
+
LIMIT 10
|
|
136
|
+
""")
|
|
137
|
+
|
|
138
|
+
mcv_result = await conn.execute(mcv_query)
|
|
139
|
+
mcv_rows = mcv_result.fetchall()
|
|
140
|
+
most_common = [{"value": str(r[0]), "count": int(r[1])} for r in mcv_rows]
|
|
141
|
+
|
|
142
|
+
# Get data type from information_schema
|
|
143
|
+
type_query = text("""
|
|
144
|
+
SELECT data_type
|
|
145
|
+
FROM information_schema.COLUMNS
|
|
146
|
+
WHERE table_schema = DATABASE()
|
|
147
|
+
AND table_name = :table_name
|
|
148
|
+
AND column_name = :column_name
|
|
149
|
+
""")
|
|
150
|
+
|
|
151
|
+
type_result = await conn.execute(
|
|
152
|
+
type_query, {"table_name": table_name, "column_name": column_name}
|
|
153
|
+
)
|
|
154
|
+
type_row = type_result.fetchone()
|
|
155
|
+
data_type = type_row[0] if type_row else "unknown"
|
|
156
|
+
|
|
157
|
+
return ColumnStats(
|
|
158
|
+
column=column_name,
|
|
159
|
+
data_type=data_type,
|
|
160
|
+
total_rows=int(row[0]),
|
|
161
|
+
null_count=int(row[1]),
|
|
162
|
+
distinct_count=int(row[2]) if row[2] else None,
|
|
163
|
+
min_value=row[3],
|
|
164
|
+
max_value=row[4],
|
|
165
|
+
avg_value=float(row[5]) if row[5] is not None else None,
|
|
166
|
+
stddev_value=float(row[6]) if row[6] is not None else None,
|
|
167
|
+
most_common_values=most_common,
|
|
168
|
+
sample_size=int(row[0]),
|
|
169
|
+
warning="Advanced statistics (percentiles) not available in MySQL",
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
except Exception as e:
|
|
173
|
+
return ColumnStats(
|
|
174
|
+
column=column_name,
|
|
175
|
+
data_type="unknown",
|
|
176
|
+
total_rows=0,
|
|
177
|
+
null_count=0,
|
|
178
|
+
sample_size=0,
|
|
179
|
+
warning=f"Statistics unavailable: {str(e)}",
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
async def get_value_distribution(
|
|
183
|
+
self,
|
|
184
|
+
conn: AsyncConnection,
|
|
185
|
+
table_name: str,
|
|
186
|
+
column_name: str,
|
|
187
|
+
schema: Optional[str],
|
|
188
|
+
limit: int,
|
|
189
|
+
) -> Distribution:
|
|
190
|
+
"""Get value distribution for MySQL."""
|
|
191
|
+
table_ref = self._build_table_reference(table_name, schema)
|
|
192
|
+
|
|
193
|
+
stats_query = text(f"""
|
|
194
|
+
SELECT
|
|
195
|
+
COUNT(*) as total_rows,
|
|
196
|
+
COUNT(DISTINCT `{column_name}`) as unique_values,
|
|
197
|
+
COUNT(*) - COUNT(`{column_name}`) as null_count
|
|
198
|
+
FROM {table_ref}
|
|
199
|
+
""")
|
|
200
|
+
|
|
201
|
+
stats_result = await conn.execute(stats_query)
|
|
202
|
+
stats_row = stats_result.fetchone()
|
|
203
|
+
|
|
204
|
+
top_query = text(f"""
|
|
205
|
+
SELECT `{column_name}` as value, COUNT(*) as count
|
|
206
|
+
FROM {table_ref}
|
|
207
|
+
WHERE `{column_name}` IS NOT NULL
|
|
208
|
+
GROUP BY `{column_name}`
|
|
209
|
+
ORDER BY count DESC
|
|
210
|
+
LIMIT :limit
|
|
211
|
+
""")
|
|
212
|
+
|
|
213
|
+
top_result = await conn.execute(top_query, {"limit": limit})
|
|
214
|
+
top_rows = top_result.fetchall()
|
|
215
|
+
|
|
216
|
+
top_values = [{"value": str(r[0]), "count": int(r[1])} for r in top_rows]
|
|
217
|
+
|
|
218
|
+
if not stats_row:
|
|
219
|
+
return Distribution(
|
|
220
|
+
column=column_name,
|
|
221
|
+
total_rows=0,
|
|
222
|
+
unique_values=0,
|
|
223
|
+
null_count=0,
|
|
224
|
+
top_values=[],
|
|
225
|
+
sample_size=0,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return Distribution(
|
|
229
|
+
column=column_name,
|
|
230
|
+
total_rows=int(stats_row[0]),
|
|
231
|
+
unique_values=int(stats_row[1]),
|
|
232
|
+
null_count=int(stats_row[2]),
|
|
233
|
+
top_values=top_values,
|
|
234
|
+
sample_size=int(stats_row[0]),
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
async def get_sample_query(
|
|
238
|
+
self, table_name: str, schema: Optional[str], limit: int
|
|
239
|
+
) -> str:
|
|
240
|
+
"""Generate MySQL sampling query."""
|
|
241
|
+
table_ref = self._build_table_reference(table_name, schema)
|
|
242
|
+
return f"SELECT * FROM {table_ref} LIMIT {limit}"
|
|
243
|
+
|
|
244
|
+
async def get_explain_query(self, query: str, analyze: bool) -> str:
|
|
245
|
+
"""Generate MySQL EXPLAIN query."""
|
|
246
|
+
if analyze:
|
|
247
|
+
return f"EXPLAIN ANALYZE {query}"
|
|
248
|
+
return f"EXPLAIN FORMAT=JSON {query}"
|
|
249
|
+
|
|
250
|
+
async def parse_explain_plan(
|
|
251
|
+
self, plan_text: str, analyzed: bool
|
|
252
|
+
) -> dict[str, Any]:
|
|
253
|
+
"""Parse MySQL EXPLAIN output."""
|
|
254
|
+
try:
|
|
255
|
+
plan_data = json.loads(plan_text)
|
|
256
|
+
|
|
257
|
+
result: dict[str, Any] = {
|
|
258
|
+
"json": plan_data,
|
|
259
|
+
"warnings": [],
|
|
260
|
+
"recommendations": [],
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
# MySQL EXPLAIN has different structure
|
|
264
|
+
if "query_block" in plan_data:
|
|
265
|
+
query_block = plan_data["query_block"]
|
|
266
|
+
|
|
267
|
+
# Extract cost if available
|
|
268
|
+
if "cost_info" in query_block:
|
|
269
|
+
cost_info = query_block["cost_info"]
|
|
270
|
+
result["estimated_cost"] = float(cost_info.get("query_cost", 0))
|
|
271
|
+
|
|
272
|
+
# Check for table scans
|
|
273
|
+
if "table" in query_block:
|
|
274
|
+
table = query_block["table"]
|
|
275
|
+
if table.get("access_type") == "ALL":
|
|
276
|
+
result["warnings"].append("Full table scan detected")
|
|
277
|
+
result["recommendations"].append("Consider adding indexes")
|
|
278
|
+
|
|
279
|
+
return result
|
|
280
|
+
|
|
281
|
+
except (json.JSONDecodeError, KeyError):
|
|
282
|
+
pass
|
|
283
|
+
|
|
284
|
+
return {
|
|
285
|
+
"json": None,
|
|
286
|
+
"warnings": [],
|
|
287
|
+
"recommendations": [],
|
|
288
|
+
}
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""PostgreSQL adapter with full feature support."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import text
|
|
7
|
+
from sqlalchemy.ext.asyncio import AsyncConnection
|
|
8
|
+
|
|
9
|
+
from db_connect_mcp.adapters.base import BaseAdapter
|
|
10
|
+
from db_connect_mcp.models.capabilities import DatabaseCapabilities
|
|
11
|
+
from db_connect_mcp.models.database import SchemaInfo
|
|
12
|
+
from db_connect_mcp.models.statistics import ColumnStats, Distribution
|
|
13
|
+
from db_connect_mcp.models.table import TableInfo
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PostgresAdapter(BaseAdapter):
|
|
17
|
+
"""PostgreSQL adapter with comprehensive feature support."""
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def capabilities(self) -> DatabaseCapabilities:
|
|
21
|
+
"""PostgreSQL supports all features."""
|
|
22
|
+
return DatabaseCapabilities(
|
|
23
|
+
foreign_keys=True,
|
|
24
|
+
indexes=True,
|
|
25
|
+
views=True,
|
|
26
|
+
materialized_views=True,
|
|
27
|
+
partitions=True,
|
|
28
|
+
advanced_stats=True,
|
|
29
|
+
explain_plans=True,
|
|
30
|
+
profiling=True,
|
|
31
|
+
comments=True,
|
|
32
|
+
schemas=True,
|
|
33
|
+
transactions=True,
|
|
34
|
+
stored_procedures=True,
|
|
35
|
+
triggers=True,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
async def enrich_schema_info(
|
|
39
|
+
self, conn: AsyncConnection, schema_info: SchemaInfo
|
|
40
|
+
) -> SchemaInfo:
|
|
41
|
+
"""Add PostgreSQL-specific schema metadata."""
|
|
42
|
+
query = text("""
|
|
43
|
+
SELECT
|
|
44
|
+
pg_catalog.pg_get_userbyid(n.nspowner) as owner,
|
|
45
|
+
pg_catalog.obj_description(n.oid, 'pg_namespace') as comment
|
|
46
|
+
FROM pg_catalog.pg_namespace n
|
|
47
|
+
WHERE n.nspname = :schema_name
|
|
48
|
+
""")
|
|
49
|
+
|
|
50
|
+
result = await conn.execute(query, {"schema_name": schema_info.name})
|
|
51
|
+
row = result.fetchone()
|
|
52
|
+
|
|
53
|
+
if row:
|
|
54
|
+
schema_info.owner = row[0]
|
|
55
|
+
schema_info.comment = row[1]
|
|
56
|
+
|
|
57
|
+
# Get schema size
|
|
58
|
+
size_query = text("""
|
|
59
|
+
SELECT SUM(pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))::bigint
|
|
60
|
+
FROM pg_tables
|
|
61
|
+
WHERE schemaname = :schema_name
|
|
62
|
+
""")
|
|
63
|
+
|
|
64
|
+
result = await conn.execute(size_query, {"schema_name": schema_info.name})
|
|
65
|
+
row = result.fetchone()
|
|
66
|
+
if row and row[0]:
|
|
67
|
+
schema_info.size_bytes = int(row[0])
|
|
68
|
+
|
|
69
|
+
return schema_info
|
|
70
|
+
|
|
71
|
+
async def enrich_table_info(
|
|
72
|
+
self, conn: AsyncConnection, table_info: TableInfo
|
|
73
|
+
) -> TableInfo:
|
|
74
|
+
"""Add PostgreSQL-specific table metadata."""
|
|
75
|
+
table_ref = self._build_table_reference(table_info.name, table_info.schema)
|
|
76
|
+
|
|
77
|
+
query = text("""
|
|
78
|
+
SELECT
|
|
79
|
+
pg_total_relation_size(:table_ref::regclass)::bigint as total_size,
|
|
80
|
+
pg_relation_size(:table_ref::regclass)::bigint as table_size,
|
|
81
|
+
pg_indexes_size(:table_ref::regclass)::bigint as indexes_size,
|
|
82
|
+
(SELECT reltuples::bigint FROM pg_class WHERE oid = :table_ref::regclass::oid) as row_count,
|
|
83
|
+
obj_description(:table_ref::regclass, 'pg_class') as comment
|
|
84
|
+
""")
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
result = await conn.execute(query, {"table_ref": table_ref})
|
|
88
|
+
row = result.fetchone()
|
|
89
|
+
|
|
90
|
+
if row:
|
|
91
|
+
table_info.size_bytes = int(row[1]) if row[1] else None
|
|
92
|
+
table_info.index_size_bytes = int(row[2]) if row[2] else None
|
|
93
|
+
table_info.row_count = int(row[3]) if row[3] else None
|
|
94
|
+
table_info.comment = row[4]
|
|
95
|
+
|
|
96
|
+
# Add PostgreSQL-specific extras
|
|
97
|
+
extras_query = text("""
|
|
98
|
+
SELECT
|
|
99
|
+
c.relkind as table_kind,
|
|
100
|
+
c.relpersistence as persistence,
|
|
101
|
+
c.relispartition as is_partition
|
|
102
|
+
FROM pg_class c
|
|
103
|
+
JOIN pg_namespace n ON n.oid = c.relnamespace
|
|
104
|
+
WHERE c.relname = :table_name
|
|
105
|
+
AND n.nspname = COALESCE(:schema_name, 'public')
|
|
106
|
+
""")
|
|
107
|
+
|
|
108
|
+
result = await conn.execute(
|
|
109
|
+
extras_query,
|
|
110
|
+
{"table_name": table_info.name, "schema_name": table_info.schema},
|
|
111
|
+
)
|
|
112
|
+
row = result.fetchone()
|
|
113
|
+
|
|
114
|
+
if row:
|
|
115
|
+
table_info.extra_info["relkind"] = row[0]
|
|
116
|
+
table_info.extra_info["persistence"] = row[1]
|
|
117
|
+
table_info.extra_info["is_partition"] = row[2]
|
|
118
|
+
|
|
119
|
+
except Exception:
|
|
120
|
+
# If enrichment fails, return basic info
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
return table_info
|
|
124
|
+
|
|
125
|
+
async def get_column_statistics(
|
|
126
|
+
self,
|
|
127
|
+
conn: AsyncConnection,
|
|
128
|
+
table_name: str,
|
|
129
|
+
column_name: str,
|
|
130
|
+
schema: Optional[str],
|
|
131
|
+
) -> ColumnStats:
|
|
132
|
+
"""Get comprehensive PostgreSQL column statistics."""
|
|
133
|
+
table_ref = self._build_table_reference(table_name, schema)
|
|
134
|
+
|
|
135
|
+
# Basic stats query with PostgreSQL-specific functions
|
|
136
|
+
query = text(f"""
|
|
137
|
+
WITH stats AS (
|
|
138
|
+
SELECT
|
|
139
|
+
COUNT(*) as total_rows,
|
|
140
|
+
COUNT("{column_name}") as non_null_count,
|
|
141
|
+
COUNT(*) - COUNT("{column_name}") as null_count,
|
|
142
|
+
COUNT(DISTINCT "{column_name}") as distinct_count,
|
|
143
|
+
MIN("{column_name}") as min_val,
|
|
144
|
+
MAX("{column_name}") as max_val,
|
|
145
|
+
pg_typeof("{column_name}")::text as data_type
|
|
146
|
+
FROM {table_ref}
|
|
147
|
+
),
|
|
148
|
+
numeric_stats AS (
|
|
149
|
+
SELECT
|
|
150
|
+
AVG("{column_name}")::float as avg_val,
|
|
151
|
+
STDDEV("{column_name}")::float as stddev_val,
|
|
152
|
+
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{column_name}") as p25,
|
|
153
|
+
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{column_name}") as p50,
|
|
154
|
+
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{column_name}") as p75,
|
|
155
|
+
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "{column_name}") as p95,
|
|
156
|
+
PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY "{column_name}") as p99
|
|
157
|
+
FROM {table_ref}
|
|
158
|
+
WHERE "{column_name}" IS NOT NULL
|
|
159
|
+
)
|
|
160
|
+
SELECT
|
|
161
|
+
s.total_rows,
|
|
162
|
+
s.null_count,
|
|
163
|
+
s.distinct_count,
|
|
164
|
+
s.min_val,
|
|
165
|
+
s.max_val,
|
|
166
|
+
s.data_type,
|
|
167
|
+
n.avg_val,
|
|
168
|
+
n.stddev_val,
|
|
169
|
+
n.p25,
|
|
170
|
+
n.p50,
|
|
171
|
+
n.p75,
|
|
172
|
+
n.p95,
|
|
173
|
+
n.p99
|
|
174
|
+
FROM stats s
|
|
175
|
+
LEFT JOIN numeric_stats n ON true
|
|
176
|
+
""")
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
result = await conn.execute(query)
|
|
180
|
+
row = result.fetchone()
|
|
181
|
+
|
|
182
|
+
if not row:
|
|
183
|
+
return ColumnStats(
|
|
184
|
+
column=column_name,
|
|
185
|
+
data_type="unknown",
|
|
186
|
+
total_rows=0,
|
|
187
|
+
null_count=0,
|
|
188
|
+
sample_size=0,
|
|
189
|
+
warning="No data found",
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Get most common values
|
|
193
|
+
mcv_query = text(f"""
|
|
194
|
+
SELECT "{column_name}" as value, COUNT(*) as count
|
|
195
|
+
FROM {table_ref}
|
|
196
|
+
WHERE "{column_name}" IS NOT NULL
|
|
197
|
+
GROUP BY "{column_name}"
|
|
198
|
+
ORDER BY count DESC
|
|
199
|
+
LIMIT 10
|
|
200
|
+
""")
|
|
201
|
+
|
|
202
|
+
mcv_result = await conn.execute(mcv_query)
|
|
203
|
+
mcv_rows = mcv_result.fetchall()
|
|
204
|
+
most_common = [{"value": str(r[0]), "count": int(r[1])} for r in mcv_rows]
|
|
205
|
+
|
|
206
|
+
return ColumnStats(
|
|
207
|
+
column=column_name,
|
|
208
|
+
data_type=str(row[5]),
|
|
209
|
+
total_rows=int(row[0]),
|
|
210
|
+
null_count=int(row[1]),
|
|
211
|
+
distinct_count=int(row[2]) if row[2] else None,
|
|
212
|
+
min_value=row[3],
|
|
213
|
+
max_value=row[4],
|
|
214
|
+
avg_value=float(row[6]) if row[6] is not None else None,
|
|
215
|
+
stddev_value=float(row[7]) if row[7] is not None else None,
|
|
216
|
+
percentile_25=row[8],
|
|
217
|
+
median_value=row[9],
|
|
218
|
+
percentile_75=row[10],
|
|
219
|
+
percentile_95=row[11],
|
|
220
|
+
percentile_99=row[12],
|
|
221
|
+
most_common_values=most_common,
|
|
222
|
+
sample_size=int(row[0]),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
except Exception as e:
|
|
226
|
+
return ColumnStats(
|
|
227
|
+
column=column_name,
|
|
228
|
+
data_type="unknown",
|
|
229
|
+
total_rows=0,
|
|
230
|
+
null_count=0,
|
|
231
|
+
sample_size=0,
|
|
232
|
+
warning=f"Statistics unavailable: {str(e)}",
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
async def get_value_distribution(
|
|
236
|
+
self,
|
|
237
|
+
conn: AsyncConnection,
|
|
238
|
+
table_name: str,
|
|
239
|
+
column_name: str,
|
|
240
|
+
schema: Optional[str],
|
|
241
|
+
limit: int,
|
|
242
|
+
) -> Distribution:
|
|
243
|
+
"""Get value distribution for PostgreSQL."""
|
|
244
|
+
table_ref = self._build_table_reference(table_name, schema)
|
|
245
|
+
|
|
246
|
+
query = text(f"""
|
|
247
|
+
WITH stats AS (
|
|
248
|
+
SELECT
|
|
249
|
+
COUNT(*) as total_rows,
|
|
250
|
+
COUNT(DISTINCT "{column_name}") as unique_values,
|
|
251
|
+
COUNT(*) - COUNT("{column_name}") as null_count
|
|
252
|
+
FROM {table_ref}
|
|
253
|
+
),
|
|
254
|
+
top_values AS (
|
|
255
|
+
SELECT "{column_name}" as value, COUNT(*) as count
|
|
256
|
+
FROM {table_ref}
|
|
257
|
+
WHERE "{column_name}" IS NOT NULL
|
|
258
|
+
GROUP BY "{column_name}"
|
|
259
|
+
ORDER BY count DESC
|
|
260
|
+
LIMIT :limit
|
|
261
|
+
)
|
|
262
|
+
SELECT
|
|
263
|
+
s.total_rows,
|
|
264
|
+
s.unique_values,
|
|
265
|
+
s.null_count,
|
|
266
|
+
json_agg(json_build_object('value', t.value::text, 'count', t.count)) as top_values
|
|
267
|
+
FROM stats s
|
|
268
|
+
LEFT JOIN top_values t ON true
|
|
269
|
+
GROUP BY s.total_rows, s.unique_values, s.null_count
|
|
270
|
+
""")
|
|
271
|
+
|
|
272
|
+
result = await conn.execute(query, {"limit": limit})
|
|
273
|
+
row = result.fetchone()
|
|
274
|
+
|
|
275
|
+
if not row:
|
|
276
|
+
return Distribution(
|
|
277
|
+
column=column_name,
|
|
278
|
+
total_rows=0,
|
|
279
|
+
unique_values=0,
|
|
280
|
+
null_count=0,
|
|
281
|
+
top_values=[],
|
|
282
|
+
sample_size=0,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
top_values_data = json.loads(row[3]) if row[3] else []
|
|
286
|
+
|
|
287
|
+
return Distribution(
|
|
288
|
+
column=column_name,
|
|
289
|
+
total_rows=int(row[0]),
|
|
290
|
+
unique_values=int(row[1]),
|
|
291
|
+
null_count=int(row[2]),
|
|
292
|
+
top_values=top_values_data,
|
|
293
|
+
sample_size=int(row[0]),
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
async def get_sample_query(
|
|
297
|
+
self, table_name: str, schema: Optional[str], limit: int
|
|
298
|
+
) -> str:
|
|
299
|
+
"""Generate PostgreSQL sampling query with TABLESAMPLE."""
|
|
300
|
+
table_ref = self._build_table_reference(table_name, schema)
|
|
301
|
+
# Use simple LIMIT for smaller limits, TABLESAMPLE for larger datasets
|
|
302
|
+
return f"SELECT * FROM {table_ref} LIMIT {limit}"
|
|
303
|
+
|
|
304
|
+
async def get_explain_query(self, query: str, analyze: bool) -> str:
|
|
305
|
+
"""Generate PostgreSQL EXPLAIN query."""
|
|
306
|
+
if analyze:
|
|
307
|
+
return f"EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) {query}"
|
|
308
|
+
return f"EXPLAIN (FORMAT JSON) {query}"
|
|
309
|
+
|
|
310
|
+
async def parse_explain_plan(
|
|
311
|
+
self, plan_text: str, analyzed: bool
|
|
312
|
+
) -> dict[str, Any]:
|
|
313
|
+
"""Parse PostgreSQL EXPLAIN JSON output."""
|
|
314
|
+
try:
|
|
315
|
+
plan_data = json.loads(plan_text)
|
|
316
|
+
|
|
317
|
+
if isinstance(plan_data, list) and len(plan_data) > 0:
|
|
318
|
+
plan = plan_data[0].get("Plan", {})
|
|
319
|
+
|
|
320
|
+
result: dict[str, Any] = {
|
|
321
|
+
"json": plan,
|
|
322
|
+
"estimated_cost": plan.get("Total Cost"),
|
|
323
|
+
"estimated_rows": plan.get("Plan Rows"),
|
|
324
|
+
"warnings": [],
|
|
325
|
+
"recommendations": [],
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if analyzed:
|
|
329
|
+
result["actual_time_ms"] = plan.get("Actual Total Time")
|
|
330
|
+
result["actual_rows"] = plan.get("Actual Rows")
|
|
331
|
+
|
|
332
|
+
# Add warnings based on plan analysis
|
|
333
|
+
if "Seq Scan" in str(plan):
|
|
334
|
+
result["warnings"].append(
|
|
335
|
+
"Sequential scan detected - may be slow on large tables"
|
|
336
|
+
)
|
|
337
|
+
result["recommendations"].append(
|
|
338
|
+
"Consider adding appropriate indexes"
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
return result
|
|
342
|
+
|
|
343
|
+
except (json.JSONDecodeError, KeyError):
|
|
344
|
+
pass
|
|
345
|
+
|
|
346
|
+
# Fallback for non-JSON format
|
|
347
|
+
return {
|
|
348
|
+
"json": None,
|
|
349
|
+
"warnings": [],
|
|
350
|
+
"recommendations": [],
|
|
351
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Core database operations layer."""
|
|
2
|
+
|
|
3
|
+
from .analyzer import StatisticsAnalyzer
|
|
4
|
+
from .connection import DatabaseConnection
|
|
5
|
+
from .executor import QueryExecutor
|
|
6
|
+
from .inspector import MetadataInspector
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"DatabaseConnection",
|
|
10
|
+
"MetadataInspector",
|
|
11
|
+
"QueryExecutor",
|
|
12
|
+
"StatisticsAnalyzer",
|
|
13
|
+
]
|