db-connect-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of db-connect-mcp might be problematic. Click here for more details.
- db_connect_mcp/__init__.py +30 -0
- db_connect_mcp/__main__.py +13 -0
- db_connect_mcp/adapters/__init__.py +72 -0
- db_connect_mcp/adapters/base.py +152 -0
- db_connect_mcp/adapters/clickhouse.py +298 -0
- db_connect_mcp/adapters/mysql.py +288 -0
- db_connect_mcp/adapters/postgresql.py +351 -0
- db_connect_mcp/core/__init__.py +13 -0
- db_connect_mcp/core/analyzer.py +114 -0
- db_connect_mcp/core/connection.py +371 -0
- db_connect_mcp/core/executor.py +239 -0
- db_connect_mcp/core/inspector.py +345 -0
- db_connect_mcp/models/__init__.py +23 -0
- db_connect_mcp/models/capabilities.py +98 -0
- db_connect_mcp/models/config.py +401 -0
- db_connect_mcp/models/database.py +112 -0
- db_connect_mcp/models/query.py +119 -0
- db_connect_mcp/models/statistics.py +176 -0
- db_connect_mcp/models/table.py +230 -0
- db_connect_mcp/server.py +496 -0
- db_connect_mcp-0.1.0.dist-info/METADATA +565 -0
- db_connect_mcp-0.1.0.dist-info/RECORD +25 -0
- db_connect_mcp-0.1.0.dist-info/WHEEL +4 -0
- db_connect_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- db_connect_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""Column statistics and distribution models."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Distribution(BaseModel):
|
|
9
|
+
"""Value distribution for a column."""
|
|
10
|
+
|
|
11
|
+
column: str = Field(..., description="Column name")
|
|
12
|
+
total_rows: int = Field(..., description="Total rows analyzed")
|
|
13
|
+
unique_values: int = Field(..., description="Number of unique values")
|
|
14
|
+
null_count: int = Field(..., description="Number of NULL values")
|
|
15
|
+
top_values: list[dict[str, Any]] = Field(
|
|
16
|
+
..., description="Top N most frequent values with counts"
|
|
17
|
+
)
|
|
18
|
+
sample_size: int = Field(..., description="Number of rows sampled")
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def null_percentage(self) -> float:
|
|
22
|
+
"""Percentage of NULL values."""
|
|
23
|
+
if self.total_rows == 0:
|
|
24
|
+
return 0.0
|
|
25
|
+
return (self.null_count / self.total_rows) * 100
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def cardinality(self) -> float:
|
|
29
|
+
"""Cardinality (unique values / total rows)."""
|
|
30
|
+
if self.total_rows == 0:
|
|
31
|
+
return 0.0
|
|
32
|
+
return self.unique_values / self.total_rows
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def is_high_cardinality(self) -> bool:
|
|
36
|
+
"""Check if column has high cardinality (>0.9)."""
|
|
37
|
+
return self.cardinality > 0.9
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def is_low_cardinality(self) -> bool:
|
|
41
|
+
"""Check if column has low cardinality (<0.1)."""
|
|
42
|
+
return self.cardinality < 0.1
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ColumnStats(BaseModel):
|
|
46
|
+
"""Statistical information about a column."""
|
|
47
|
+
|
|
48
|
+
column: str = Field(..., description="Column name")
|
|
49
|
+
data_type: str = Field(..., description="Column data type")
|
|
50
|
+
total_rows: int = Field(..., description="Total rows in table")
|
|
51
|
+
null_count: int = Field(..., description="Number of NULL values")
|
|
52
|
+
distinct_count: Optional[int] = Field(None, description="Number of distinct values")
|
|
53
|
+
min_value: Optional[Any] = Field(None, description="Minimum value")
|
|
54
|
+
max_value: Optional[Any] = Field(None, description="Maximum value")
|
|
55
|
+
avg_value: Optional[float] = Field(
|
|
56
|
+
None, description="Average value (numeric columns)"
|
|
57
|
+
)
|
|
58
|
+
median_value: Optional[Any] = Field(None, description="Median value")
|
|
59
|
+
stddev_value: Optional[float] = Field(None, description="Standard deviation")
|
|
60
|
+
percentile_25: Optional[Any] = Field(None, description="25th percentile")
|
|
61
|
+
percentile_75: Optional[Any] = Field(None, description="75th percentile")
|
|
62
|
+
percentile_95: Optional[Any] = Field(None, description="95th percentile")
|
|
63
|
+
percentile_99: Optional[Any] = Field(None, description="99th percentile")
|
|
64
|
+
most_common_values: list[dict[str, Any]] = Field(
|
|
65
|
+
default_factory=list, description="Most common values with frequencies"
|
|
66
|
+
)
|
|
67
|
+
sample_size: int = Field(..., description="Number of rows sampled for statistics")
|
|
68
|
+
warning: Optional[str] = Field(
|
|
69
|
+
None, description="Warning message if stats unavailable"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def null_percentage(self) -> float:
|
|
74
|
+
"""Percentage of NULL values."""
|
|
75
|
+
if self.total_rows == 0:
|
|
76
|
+
return 0.0
|
|
77
|
+
return (self.null_count / self.total_rows) * 100
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def completeness(self) -> float:
|
|
81
|
+
"""Data completeness (1 - null percentage as decimal)."""
|
|
82
|
+
return 1.0 - (self.null_percentage / 100.0)
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def cardinality(self) -> Optional[float]:
|
|
86
|
+
"""Cardinality ratio (distinct / total)."""
|
|
87
|
+
if self.distinct_count is None or self.total_rows == 0:
|
|
88
|
+
return None
|
|
89
|
+
return self.distinct_count / self.total_rows
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def has_advanced_stats(self) -> bool:
|
|
93
|
+
"""Check if advanced statistics are available."""
|
|
94
|
+
return any(
|
|
95
|
+
[
|
|
96
|
+
self.median_value is not None,
|
|
97
|
+
self.percentile_25 is not None,
|
|
98
|
+
self.stddev_value is not None,
|
|
99
|
+
]
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def is_numeric(self) -> bool:
|
|
104
|
+
"""Check if column appears to be numeric based on available stats."""
|
|
105
|
+
return self.avg_value is not None or self.stddev_value is not None
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def range_value(self) -> Optional[Any]:
|
|
109
|
+
"""Calculate range (max - min) for numeric columns."""
|
|
110
|
+
if self.min_value is not None and self.max_value is not None:
|
|
111
|
+
try:
|
|
112
|
+
return self.max_value - self.min_value
|
|
113
|
+
except (TypeError, ValueError):
|
|
114
|
+
return None
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
def get_quality_score(self) -> float:
|
|
118
|
+
"""
|
|
119
|
+
Calculate data quality score (0-100).
|
|
120
|
+
Based on completeness, cardinality, and availability of stats.
|
|
121
|
+
"""
|
|
122
|
+
score = 0.0
|
|
123
|
+
|
|
124
|
+
# Completeness (0-40 points)
|
|
125
|
+
score += self.completeness * 40
|
|
126
|
+
|
|
127
|
+
# Has distinct count (10 points)
|
|
128
|
+
if self.distinct_count is not None:
|
|
129
|
+
score += 10
|
|
130
|
+
|
|
131
|
+
# Has min/max (10 points)
|
|
132
|
+
if self.min_value is not None and self.max_value is not None:
|
|
133
|
+
score += 10
|
|
134
|
+
|
|
135
|
+
# Has advanced stats (20 points)
|
|
136
|
+
if self.has_advanced_stats:
|
|
137
|
+
score += 20
|
|
138
|
+
|
|
139
|
+
# Has most common values (10 points)
|
|
140
|
+
if self.most_common_values:
|
|
141
|
+
score += 10
|
|
142
|
+
|
|
143
|
+
# Has average (10 points if numeric)
|
|
144
|
+
if self.avg_value is not None:
|
|
145
|
+
score += 10
|
|
146
|
+
|
|
147
|
+
return min(score, 100.0)
|
|
148
|
+
|
|
149
|
+
model_config = {
|
|
150
|
+
"json_schema_extra": {
|
|
151
|
+
"examples": [
|
|
152
|
+
{
|
|
153
|
+
"column": "age",
|
|
154
|
+
"data_type": "integer",
|
|
155
|
+
"total_rows": 10000,
|
|
156
|
+
"null_count": 50,
|
|
157
|
+
"distinct_count": 95,
|
|
158
|
+
"min_value": 18,
|
|
159
|
+
"max_value": 95,
|
|
160
|
+
"avg_value": 42.5,
|
|
161
|
+
"median_value": 41,
|
|
162
|
+
"stddev_value": 15.2,
|
|
163
|
+
"percentile_25": 30,
|
|
164
|
+
"percentile_75": 55,
|
|
165
|
+
"percentile_95": 72,
|
|
166
|
+
"percentile_99": 85,
|
|
167
|
+
"most_common_values": [
|
|
168
|
+
{"value": 35, "count": 250},
|
|
169
|
+
{"value": 42, "count": 230},
|
|
170
|
+
],
|
|
171
|
+
"sample_size": 10000,
|
|
172
|
+
"warning": None,
|
|
173
|
+
}
|
|
174
|
+
]
|
|
175
|
+
}
|
|
176
|
+
}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Table, column, index, and constraint information models."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
7
|
+
|
|
8
|
+
# Suppress the specific warning about field 'schema' shadowing
|
|
9
|
+
warnings.filterwarnings(
|
|
10
|
+
"ignore",
|
|
11
|
+
message='Field name "schema" in "TableInfo" shadows an attribute in parent',
|
|
12
|
+
category=UserWarning,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ColumnInfo(BaseModel):
|
|
17
|
+
"""Information about a table column."""
|
|
18
|
+
|
|
19
|
+
name: str = Field(..., description="Column name")
|
|
20
|
+
data_type: str = Field(..., description="Column data type")
|
|
21
|
+
nullable: bool = Field(..., description="Whether column allows NULL")
|
|
22
|
+
default: Optional[str] = Field(None, description="Default value expression")
|
|
23
|
+
primary_key: bool = Field(
|
|
24
|
+
default=False, description="Whether column is part of primary key"
|
|
25
|
+
)
|
|
26
|
+
foreign_key: Optional[str] = Field(
|
|
27
|
+
None, description="Foreign key reference (table.column)"
|
|
28
|
+
)
|
|
29
|
+
unique: bool = Field(
|
|
30
|
+
default=False, description="Whether column has UNIQUE constraint"
|
|
31
|
+
)
|
|
32
|
+
indexed: bool = Field(default=False, description="Whether column is indexed")
|
|
33
|
+
comment: Optional[str] = Field(None, description="Column comment/description")
|
|
34
|
+
max_length: Optional[int] = Field(
|
|
35
|
+
None, description="Maximum length for string types"
|
|
36
|
+
)
|
|
37
|
+
numeric_precision: Optional[int] = Field(
|
|
38
|
+
None, description="Precision for numeric types"
|
|
39
|
+
)
|
|
40
|
+
numeric_scale: Optional[int] = Field(None, description="Scale for numeric types")
|
|
41
|
+
extra_info: dict[str, Any] = Field(
|
|
42
|
+
default_factory=dict,
|
|
43
|
+
description="Database-specific additional information",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class IndexInfo(BaseModel):
|
|
48
|
+
"""Information about a table index."""
|
|
49
|
+
|
|
50
|
+
name: str = Field(..., description="Index name")
|
|
51
|
+
columns: list[str] = Field(..., description="Indexed column names")
|
|
52
|
+
unique: bool = Field(default=False, description="Whether index enforces uniqueness")
|
|
53
|
+
primary: bool = Field(
|
|
54
|
+
default=False, description="Whether this is the primary key index"
|
|
55
|
+
)
|
|
56
|
+
index_type: Optional[str] = Field(
|
|
57
|
+
None, description="Index type (btree, hash, etc.)"
|
|
58
|
+
)
|
|
59
|
+
size_bytes: Optional[int] = Field(None, description="Index size in bytes")
|
|
60
|
+
comment: Optional[str] = Field(None, description="Index comment")
|
|
61
|
+
extra_info: dict[str, Any] = Field(
|
|
62
|
+
default_factory=dict,
|
|
63
|
+
description="Database-specific additional information",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def size_human(self) -> Optional[str]:
|
|
68
|
+
"""Human-readable size."""
|
|
69
|
+
if self.size_bytes is None:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
size = float(self.size_bytes)
|
|
73
|
+
for unit in ["B", "KB", "MB", "GB"]:
|
|
74
|
+
if size < 1024.0:
|
|
75
|
+
return f"{size:.2f} {unit}"
|
|
76
|
+
size /= 1024.0
|
|
77
|
+
return f"{size:.2f} TB"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class ConstraintInfo(BaseModel):
|
|
81
|
+
"""Information about a table constraint."""
|
|
82
|
+
|
|
83
|
+
name: str = Field(..., description="Constraint name")
|
|
84
|
+
constraint_type: str = Field(
|
|
85
|
+
..., description="Constraint type (PRIMARY KEY, FOREIGN KEY, UNIQUE, CHECK)"
|
|
86
|
+
)
|
|
87
|
+
columns: list[str] = Field(..., description="Constrained column names")
|
|
88
|
+
referenced_table: Optional[str] = Field(None, description="Referenced table for FK")
|
|
89
|
+
referenced_columns: Optional[list[str]] = Field(
|
|
90
|
+
None, description="Referenced columns for FK"
|
|
91
|
+
)
|
|
92
|
+
definition: Optional[str] = Field(None, description="Constraint definition SQL")
|
|
93
|
+
deferrable: Optional[bool] = Field(
|
|
94
|
+
None, description="Whether constraint is deferrable"
|
|
95
|
+
)
|
|
96
|
+
initially_deferred: Optional[bool] = Field(
|
|
97
|
+
None, description="Whether initially deferred"
|
|
98
|
+
)
|
|
99
|
+
extra_info: dict[str, Any] = Field(
|
|
100
|
+
default_factory=dict,
|
|
101
|
+
description="Database-specific additional information",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class RelationshipInfo(BaseModel):
|
|
106
|
+
"""Information about a foreign key relationship between tables."""
|
|
107
|
+
|
|
108
|
+
from_table: str = Field(..., description="Source table name")
|
|
109
|
+
from_schema: Optional[str] = Field(None, description="Source schema name")
|
|
110
|
+
from_columns: list[str] = Field(..., description="Source column names")
|
|
111
|
+
to_table: str = Field(..., description="Target table name")
|
|
112
|
+
to_schema: Optional[str] = Field(None, description="Target schema name")
|
|
113
|
+
to_columns: list[str] = Field(..., description="Target column names")
|
|
114
|
+
constraint_name: str = Field(..., description="Foreign key constraint name")
|
|
115
|
+
on_delete: Optional[str] = Field(None, description="ON DELETE action")
|
|
116
|
+
on_update: Optional[str] = Field(None, description="ON UPDATE action")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class TableInfo(BaseModel):
|
|
120
|
+
"""Comprehensive information about a table."""
|
|
121
|
+
|
|
122
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
123
|
+
|
|
124
|
+
name: str = Field(..., description="Table name")
|
|
125
|
+
schema: Optional[str] = Field(default=None, description="Schema name")
|
|
126
|
+
table_type: str = Field(
|
|
127
|
+
default="BASE TABLE", description="Type (BASE TABLE, VIEW, etc.)"
|
|
128
|
+
)
|
|
129
|
+
row_count: Optional[int] = Field(None, description="Approximate row count")
|
|
130
|
+
size_bytes: Optional[int] = Field(None, description="Table size in bytes")
|
|
131
|
+
index_size_bytes: Optional[int] = Field(
|
|
132
|
+
None, description="Total index size in bytes"
|
|
133
|
+
)
|
|
134
|
+
columns: list[ColumnInfo] = Field(
|
|
135
|
+
default_factory=list, description="Column information"
|
|
136
|
+
)
|
|
137
|
+
indexes: list[IndexInfo] = Field(
|
|
138
|
+
default_factory=list, description="Index information"
|
|
139
|
+
)
|
|
140
|
+
constraints: list[ConstraintInfo] = Field(
|
|
141
|
+
default_factory=list, description="Constraint information"
|
|
142
|
+
)
|
|
143
|
+
comment: Optional[str] = Field(None, description="Table comment/description")
|
|
144
|
+
created_at: Optional[str] = Field(None, description="Creation timestamp")
|
|
145
|
+
updated_at: Optional[str] = Field(None, description="Last update timestamp")
|
|
146
|
+
owner: Optional[str] = Field(None, description="Table owner")
|
|
147
|
+
extra_info: dict[str, Any] = Field(
|
|
148
|
+
default_factory=dict,
|
|
149
|
+
description="Database-specific additional information (engine, partitions, etc.)",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def size_human(self) -> Optional[str]:
|
|
154
|
+
"""Human-readable table size."""
|
|
155
|
+
if self.size_bytes is None:
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
size = float(self.size_bytes)
|
|
159
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
|
160
|
+
if size < 1024.0:
|
|
161
|
+
return f"{size:.2f} {unit}"
|
|
162
|
+
size /= 1024.0
|
|
163
|
+
return f"{size:.2f} PB"
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def total_size_bytes(self) -> Optional[int]:
|
|
167
|
+
"""Total size including indexes."""
|
|
168
|
+
if self.size_bytes is None:
|
|
169
|
+
return None
|
|
170
|
+
return self.size_bytes + (self.index_size_bytes or 0)
|
|
171
|
+
|
|
172
|
+
@property
|
|
173
|
+
def total_size_human(self) -> Optional[str]:
|
|
174
|
+
"""Human-readable total size."""
|
|
175
|
+
total = self.total_size_bytes
|
|
176
|
+
if total is None:
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
size = float(total)
|
|
180
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
|
181
|
+
if size < 1024.0:
|
|
182
|
+
return f"{size:.2f} {unit}"
|
|
183
|
+
size /= 1024.0
|
|
184
|
+
return f"{size:.2f} PB"
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def primary_key_columns(self) -> list[str]:
|
|
188
|
+
"""Get primary key column names."""
|
|
189
|
+
return [col.name for col in self.columns if col.primary_key]
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def foreign_key_columns(self) -> list[str]:
|
|
193
|
+
"""Get foreign key column names."""
|
|
194
|
+
return [col.name for col in self.columns if col.foreign_key is not None]
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def column_count(self) -> int:
|
|
198
|
+
"""Get number of columns."""
|
|
199
|
+
return len(self.columns)
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def index_count(self) -> int:
|
|
203
|
+
"""Get number of indexes."""
|
|
204
|
+
return len(self.indexes)
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def constraint_count(self) -> int:
|
|
208
|
+
"""Get number of constraints."""
|
|
209
|
+
return len(self.constraints)
|
|
210
|
+
|
|
211
|
+
def get_column(self, name: str) -> Optional[ColumnInfo]:
|
|
212
|
+
"""Get column by name."""
|
|
213
|
+
for col in self.columns:
|
|
214
|
+
if col.name == name:
|
|
215
|
+
return col
|
|
216
|
+
return None
|
|
217
|
+
|
|
218
|
+
def get_index(self, name: str) -> Optional[IndexInfo]:
|
|
219
|
+
"""Get index by name."""
|
|
220
|
+
for idx in self.indexes:
|
|
221
|
+
if idx.name == name:
|
|
222
|
+
return idx
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
def get_constraint(self, name: str) -> Optional[ConstraintInfo]:
|
|
226
|
+
"""Get constraint by name."""
|
|
227
|
+
for constraint in self.constraints:
|
|
228
|
+
if constraint.name == name:
|
|
229
|
+
return constraint
|
|
230
|
+
return None
|