db-connect-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of db-connect-mcp might be problematic. Click here for more details.

@@ -0,0 +1,176 @@
1
+ """Column statistics and distribution models."""
2
+
3
+ from typing import Any, Optional
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class Distribution(BaseModel):
9
+ """Value distribution for a column."""
10
+
11
+ column: str = Field(..., description="Column name")
12
+ total_rows: int = Field(..., description="Total rows analyzed")
13
+ unique_values: int = Field(..., description="Number of unique values")
14
+ null_count: int = Field(..., description="Number of NULL values")
15
+ top_values: list[dict[str, Any]] = Field(
16
+ ..., description="Top N most frequent values with counts"
17
+ )
18
+ sample_size: int = Field(..., description="Number of rows sampled")
19
+
20
+ @property
21
+ def null_percentage(self) -> float:
22
+ """Percentage of NULL values."""
23
+ if self.total_rows == 0:
24
+ return 0.0
25
+ return (self.null_count / self.total_rows) * 100
26
+
27
+ @property
28
+ def cardinality(self) -> float:
29
+ """Cardinality (unique values / total rows)."""
30
+ if self.total_rows == 0:
31
+ return 0.0
32
+ return self.unique_values / self.total_rows
33
+
34
+ @property
35
+ def is_high_cardinality(self) -> bool:
36
+ """Check if column has high cardinality (>0.9)."""
37
+ return self.cardinality > 0.9
38
+
39
+ @property
40
+ def is_low_cardinality(self) -> bool:
41
+ """Check if column has low cardinality (<0.1)."""
42
+ return self.cardinality < 0.1
43
+
44
+
45
+ class ColumnStats(BaseModel):
46
+ """Statistical information about a column."""
47
+
48
+ column: str = Field(..., description="Column name")
49
+ data_type: str = Field(..., description="Column data type")
50
+ total_rows: int = Field(..., description="Total rows in table")
51
+ null_count: int = Field(..., description="Number of NULL values")
52
+ distinct_count: Optional[int] = Field(None, description="Number of distinct values")
53
+ min_value: Optional[Any] = Field(None, description="Minimum value")
54
+ max_value: Optional[Any] = Field(None, description="Maximum value")
55
+ avg_value: Optional[float] = Field(
56
+ None, description="Average value (numeric columns)"
57
+ )
58
+ median_value: Optional[Any] = Field(None, description="Median value")
59
+ stddev_value: Optional[float] = Field(None, description="Standard deviation")
60
+ percentile_25: Optional[Any] = Field(None, description="25th percentile")
61
+ percentile_75: Optional[Any] = Field(None, description="75th percentile")
62
+ percentile_95: Optional[Any] = Field(None, description="95th percentile")
63
+ percentile_99: Optional[Any] = Field(None, description="99th percentile")
64
+ most_common_values: list[dict[str, Any]] = Field(
65
+ default_factory=list, description="Most common values with frequencies"
66
+ )
67
+ sample_size: int = Field(..., description="Number of rows sampled for statistics")
68
+ warning: Optional[str] = Field(
69
+ None, description="Warning message if stats unavailable"
70
+ )
71
+
72
+ @property
73
+ def null_percentage(self) -> float:
74
+ """Percentage of NULL values."""
75
+ if self.total_rows == 0:
76
+ return 0.0
77
+ return (self.null_count / self.total_rows) * 100
78
+
79
+ @property
80
+ def completeness(self) -> float:
81
+ """Data completeness (1 - null percentage as decimal)."""
82
+ return 1.0 - (self.null_percentage / 100.0)
83
+
84
+ @property
85
+ def cardinality(self) -> Optional[float]:
86
+ """Cardinality ratio (distinct / total)."""
87
+ if self.distinct_count is None or self.total_rows == 0:
88
+ return None
89
+ return self.distinct_count / self.total_rows
90
+
91
+ @property
92
+ def has_advanced_stats(self) -> bool:
93
+ """Check if advanced statistics are available."""
94
+ return any(
95
+ [
96
+ self.median_value is not None,
97
+ self.percentile_25 is not None,
98
+ self.stddev_value is not None,
99
+ ]
100
+ )
101
+
102
+ @property
103
+ def is_numeric(self) -> bool:
104
+ """Check if column appears to be numeric based on available stats."""
105
+ return self.avg_value is not None or self.stddev_value is not None
106
+
107
+ @property
108
+ def range_value(self) -> Optional[Any]:
109
+ """Calculate range (max - min) for numeric columns."""
110
+ if self.min_value is not None and self.max_value is not None:
111
+ try:
112
+ return self.max_value - self.min_value
113
+ except (TypeError, ValueError):
114
+ return None
115
+ return None
116
+
117
+ def get_quality_score(self) -> float:
118
+ """
119
+ Calculate data quality score (0-100).
120
+ Based on completeness, cardinality, and availability of stats.
121
+ """
122
+ score = 0.0
123
+
124
+ # Completeness (0-40 points)
125
+ score += self.completeness * 40
126
+
127
+ # Has distinct count (10 points)
128
+ if self.distinct_count is not None:
129
+ score += 10
130
+
131
+ # Has min/max (10 points)
132
+ if self.min_value is not None and self.max_value is not None:
133
+ score += 10
134
+
135
+ # Has advanced stats (20 points)
136
+ if self.has_advanced_stats:
137
+ score += 20
138
+
139
+ # Has most common values (10 points)
140
+ if self.most_common_values:
141
+ score += 10
142
+
143
+ # Has average (10 points if numeric)
144
+ if self.avg_value is not None:
145
+ score += 10
146
+
147
+ return min(score, 100.0)
148
+
149
+ model_config = {
150
+ "json_schema_extra": {
151
+ "examples": [
152
+ {
153
+ "column": "age",
154
+ "data_type": "integer",
155
+ "total_rows": 10000,
156
+ "null_count": 50,
157
+ "distinct_count": 95,
158
+ "min_value": 18,
159
+ "max_value": 95,
160
+ "avg_value": 42.5,
161
+ "median_value": 41,
162
+ "stddev_value": 15.2,
163
+ "percentile_25": 30,
164
+ "percentile_75": 55,
165
+ "percentile_95": 72,
166
+ "percentile_99": 85,
167
+ "most_common_values": [
168
+ {"value": 35, "count": 250},
169
+ {"value": 42, "count": 230},
170
+ ],
171
+ "sample_size": 10000,
172
+ "warning": None,
173
+ }
174
+ ]
175
+ }
176
+ }
@@ -0,0 +1,230 @@
1
+ """Table, column, index, and constraint information models."""
2
+
3
+ import warnings
4
+ from typing import Any, Optional
5
+
6
+ from pydantic import BaseModel, ConfigDict, Field
7
+
8
+ # Suppress the specific warning about field 'schema' shadowing
9
+ warnings.filterwarnings(
10
+ "ignore",
11
+ message='Field name "schema" in "TableInfo" shadows an attribute in parent',
12
+ category=UserWarning,
13
+ )
14
+
15
+
16
+ class ColumnInfo(BaseModel):
17
+ """Information about a table column."""
18
+
19
+ name: str = Field(..., description="Column name")
20
+ data_type: str = Field(..., description="Column data type")
21
+ nullable: bool = Field(..., description="Whether column allows NULL")
22
+ default: Optional[str] = Field(None, description="Default value expression")
23
+ primary_key: bool = Field(
24
+ default=False, description="Whether column is part of primary key"
25
+ )
26
+ foreign_key: Optional[str] = Field(
27
+ None, description="Foreign key reference (table.column)"
28
+ )
29
+ unique: bool = Field(
30
+ default=False, description="Whether column has UNIQUE constraint"
31
+ )
32
+ indexed: bool = Field(default=False, description="Whether column is indexed")
33
+ comment: Optional[str] = Field(None, description="Column comment/description")
34
+ max_length: Optional[int] = Field(
35
+ None, description="Maximum length for string types"
36
+ )
37
+ numeric_precision: Optional[int] = Field(
38
+ None, description="Precision for numeric types"
39
+ )
40
+ numeric_scale: Optional[int] = Field(None, description="Scale for numeric types")
41
+ extra_info: dict[str, Any] = Field(
42
+ default_factory=dict,
43
+ description="Database-specific additional information",
44
+ )
45
+
46
+
47
+ class IndexInfo(BaseModel):
48
+ """Information about a table index."""
49
+
50
+ name: str = Field(..., description="Index name")
51
+ columns: list[str] = Field(..., description="Indexed column names")
52
+ unique: bool = Field(default=False, description="Whether index enforces uniqueness")
53
+ primary: bool = Field(
54
+ default=False, description="Whether this is the primary key index"
55
+ )
56
+ index_type: Optional[str] = Field(
57
+ None, description="Index type (btree, hash, etc.)"
58
+ )
59
+ size_bytes: Optional[int] = Field(None, description="Index size in bytes")
60
+ comment: Optional[str] = Field(None, description="Index comment")
61
+ extra_info: dict[str, Any] = Field(
62
+ default_factory=dict,
63
+ description="Database-specific additional information",
64
+ )
65
+
66
+ @property
67
+ def size_human(self) -> Optional[str]:
68
+ """Human-readable size."""
69
+ if self.size_bytes is None:
70
+ return None
71
+
72
+ size = float(self.size_bytes)
73
+ for unit in ["B", "KB", "MB", "GB"]:
74
+ if size < 1024.0:
75
+ return f"{size:.2f} {unit}"
76
+ size /= 1024.0
77
+ return f"{size:.2f} TB"
78
+
79
+
80
+ class ConstraintInfo(BaseModel):
81
+ """Information about a table constraint."""
82
+
83
+ name: str = Field(..., description="Constraint name")
84
+ constraint_type: str = Field(
85
+ ..., description="Constraint type (PRIMARY KEY, FOREIGN KEY, UNIQUE, CHECK)"
86
+ )
87
+ columns: list[str] = Field(..., description="Constrained column names")
88
+ referenced_table: Optional[str] = Field(None, description="Referenced table for FK")
89
+ referenced_columns: Optional[list[str]] = Field(
90
+ None, description="Referenced columns for FK"
91
+ )
92
+ definition: Optional[str] = Field(None, description="Constraint definition SQL")
93
+ deferrable: Optional[bool] = Field(
94
+ None, description="Whether constraint is deferrable"
95
+ )
96
+ initially_deferred: Optional[bool] = Field(
97
+ None, description="Whether initially deferred"
98
+ )
99
+ extra_info: dict[str, Any] = Field(
100
+ default_factory=dict,
101
+ description="Database-specific additional information",
102
+ )
103
+
104
+
105
+ class RelationshipInfo(BaseModel):
106
+ """Information about a foreign key relationship between tables."""
107
+
108
+ from_table: str = Field(..., description="Source table name")
109
+ from_schema: Optional[str] = Field(None, description="Source schema name")
110
+ from_columns: list[str] = Field(..., description="Source column names")
111
+ to_table: str = Field(..., description="Target table name")
112
+ to_schema: Optional[str] = Field(None, description="Target schema name")
113
+ to_columns: list[str] = Field(..., description="Target column names")
114
+ constraint_name: str = Field(..., description="Foreign key constraint name")
115
+ on_delete: Optional[str] = Field(None, description="ON DELETE action")
116
+ on_update: Optional[str] = Field(None, description="ON UPDATE action")
117
+
118
+
119
+ class TableInfo(BaseModel):
120
+ """Comprehensive information about a table."""
121
+
122
+ model_config = ConfigDict(arbitrary_types_allowed=True)
123
+
124
+ name: str = Field(..., description="Table name")
125
+ schema: Optional[str] = Field(default=None, description="Schema name")
126
+ table_type: str = Field(
127
+ default="BASE TABLE", description="Type (BASE TABLE, VIEW, etc.)"
128
+ )
129
+ row_count: Optional[int] = Field(None, description="Approximate row count")
130
+ size_bytes: Optional[int] = Field(None, description="Table size in bytes")
131
+ index_size_bytes: Optional[int] = Field(
132
+ None, description="Total index size in bytes"
133
+ )
134
+ columns: list[ColumnInfo] = Field(
135
+ default_factory=list, description="Column information"
136
+ )
137
+ indexes: list[IndexInfo] = Field(
138
+ default_factory=list, description="Index information"
139
+ )
140
+ constraints: list[ConstraintInfo] = Field(
141
+ default_factory=list, description="Constraint information"
142
+ )
143
+ comment: Optional[str] = Field(None, description="Table comment/description")
144
+ created_at: Optional[str] = Field(None, description="Creation timestamp")
145
+ updated_at: Optional[str] = Field(None, description="Last update timestamp")
146
+ owner: Optional[str] = Field(None, description="Table owner")
147
+ extra_info: dict[str, Any] = Field(
148
+ default_factory=dict,
149
+ description="Database-specific additional information (engine, partitions, etc.)",
150
+ )
151
+
152
+ @property
153
+ def size_human(self) -> Optional[str]:
154
+ """Human-readable table size."""
155
+ if self.size_bytes is None:
156
+ return None
157
+
158
+ size = float(self.size_bytes)
159
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
160
+ if size < 1024.0:
161
+ return f"{size:.2f} {unit}"
162
+ size /= 1024.0
163
+ return f"{size:.2f} PB"
164
+
165
+ @property
166
+ def total_size_bytes(self) -> Optional[int]:
167
+ """Total size including indexes."""
168
+ if self.size_bytes is None:
169
+ return None
170
+ return self.size_bytes + (self.index_size_bytes or 0)
171
+
172
+ @property
173
+ def total_size_human(self) -> Optional[str]:
174
+ """Human-readable total size."""
175
+ total = self.total_size_bytes
176
+ if total is None:
177
+ return None
178
+
179
+ size = float(total)
180
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
181
+ if size < 1024.0:
182
+ return f"{size:.2f} {unit}"
183
+ size /= 1024.0
184
+ return f"{size:.2f} PB"
185
+
186
+ @property
187
+ def primary_key_columns(self) -> list[str]:
188
+ """Get primary key column names."""
189
+ return [col.name for col in self.columns if col.primary_key]
190
+
191
+ @property
192
+ def foreign_key_columns(self) -> list[str]:
193
+ """Get foreign key column names."""
194
+ return [col.name for col in self.columns if col.foreign_key is not None]
195
+
196
+ @property
197
+ def column_count(self) -> int:
198
+ """Get number of columns."""
199
+ return len(self.columns)
200
+
201
+ @property
202
+ def index_count(self) -> int:
203
+ """Get number of indexes."""
204
+ return len(self.indexes)
205
+
206
+ @property
207
+ def constraint_count(self) -> int:
208
+ """Get number of constraints."""
209
+ return len(self.constraints)
210
+
211
+ def get_column(self, name: str) -> Optional[ColumnInfo]:
212
+ """Get column by name."""
213
+ for col in self.columns:
214
+ if col.name == name:
215
+ return col
216
+ return None
217
+
218
+ def get_index(self, name: str) -> Optional[IndexInfo]:
219
+ """Get index by name."""
220
+ for idx in self.indexes:
221
+ if idx.name == name:
222
+ return idx
223
+ return None
224
+
225
+ def get_constraint(self, name: str) -> Optional[ConstraintInfo]:
226
+ """Get constraint by name."""
227
+ for constraint in self.constraints:
228
+ if constraint.name == name:
229
+ return constraint
230
+ return None