db-connect-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of db-connect-mcp might be problematic. Click here for more details.

@@ -0,0 +1,401 @@
1
+ """Database configuration model."""
2
+
3
+ import logging
4
+ from typing import Optional
5
+ from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
6
+
7
+ from pydantic import BaseModel, Field, field_validator
8
+ from sqlalchemy.engine.url import make_url
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class DatabaseConfig(BaseModel):
15
+ """Configuration for database connection and pooling."""
16
+
17
+ url: str = Field(
18
+ ...,
19
+ description="Database connection URL (e.g., postgresql://user:pass@host:5432/db)",
20
+ )
21
+ pool_size: int = Field(
22
+ default=5,
23
+ ge=1,
24
+ le=50,
25
+ description="Connection pool size",
26
+ )
27
+ max_overflow: int = Field(
28
+ default=10,
29
+ ge=0,
30
+ le=100,
31
+ description="Maximum overflow connections",
32
+ )
33
+ pool_timeout: int = Field(
34
+ default=30,
35
+ ge=1,
36
+ le=300,
37
+ description="Pool checkout timeout in seconds",
38
+ )
39
+ read_only: bool = Field(
40
+ default=True,
41
+ description="Enforce read-only connections",
42
+ )
43
+ statement_timeout: Optional[int] = Field(
44
+ default=30,
45
+ ge=1,
46
+ le=3600,
47
+ description="Statement execution timeout in seconds",
48
+ )
49
+ echo_sql: bool = Field(
50
+ default=False,
51
+ description="Echo SQL statements to stdout",
52
+ )
53
+
54
+ @field_validator("url")
55
+ @classmethod
56
+ def validate_url(cls, v: str) -> str:
57
+ """Validate and normalize database URL format."""
58
+ try:
59
+ # Handle JDBC URLs by stripping the jdbc: prefix
60
+ # JDBC is a Java-specific format, Python drivers don't use it
61
+ if v.lower().startswith("jdbc:"):
62
+ v = v[5:] # Remove "jdbc:" prefix
63
+ logger.info(
64
+ "Converted JDBC URL to Python format (removed 'jdbc:' prefix)"
65
+ )
66
+
67
+ # Some JDBC URLs might have jdbc:driver:// format (e.g., jdbc:postgresql://)
68
+ # These are already handled by removing the prefix
69
+
70
+ # Handle special case: JDBC ClickHouse URLs with query parameters
71
+ # Format: jdbc:clickhouse://host:port?user=X&password=Y&database=Z
72
+ if v.lower().startswith("clickhouse://"):
73
+ import re
74
+ from urllib.parse import unquote
75
+
76
+ # Parse JDBC-style ClickHouse URL
77
+ match = re.match(r"clickhouse://([^:/]+):?(\d+)?\?(.+)", v)
78
+ if match:
79
+ host = match.group(1)
80
+ port = match.group(2) or "9000"
81
+ params_str = match.group(3)
82
+
83
+ # Parse parameters carefully
84
+ user = "default"
85
+ password = ""
86
+ database = "default"
87
+
88
+ # Extract user
89
+ if "user=" in params_str:
90
+ user_match = re.search(r"user=([^&]+)", params_str)
91
+ if user_match:
92
+ user = unquote(user_match.group(1))
93
+
94
+ # Extract password (handle special characters)
95
+ if "password=" in params_str:
96
+ pwd_start = params_str.find("password=") + 9
97
+ # Find next parameter or end
98
+ next_param = len(params_str)
99
+ for known_param in ["&ssl=", "&database=", "&secure="]:
100
+ pos = params_str.find(known_param, pwd_start)
101
+ if pos != -1 and pos < next_param:
102
+ next_param = pos
103
+ password = unquote(params_str[pwd_start:next_param])
104
+
105
+ # Extract database
106
+ if "database=" in params_str:
107
+ db_match = re.search(r"database=([^&]+)", params_str)
108
+ if db_match:
109
+ database = unquote(db_match.group(1))
110
+
111
+ # Check for SSL/secure
112
+ secure = "ssl=true" in params_str or "secure=true" in params_str
113
+
114
+ # Build proper SQLAlchemy URL
115
+ v = f"clickhousedb://{user}:{password}@{host}:{port}/{database}"
116
+ if secure:
117
+ v += "?secure=True"
118
+
119
+ logger.info(
120
+ "Converted JDBC ClickHouse URL to SQLAlchemy format"
121
+ )
122
+
123
+ # Parse the URL to handle query parameters
124
+ parsed = urlparse(v)
125
+
126
+ # Parse query parameters
127
+ query_params = parse_qs(parsed.query)
128
+
129
+ # Extract base dialect and normalize variations
130
+ temp_url = make_url(v)
131
+ original_dialect = temp_url.drivername.split("+")[0].lower()
132
+
133
+ # Map common dialect variations to standard names
134
+ dialect_variations = {
135
+ # PostgreSQL variations
136
+ "postgresql": "postgresql",
137
+ "postgres": "postgresql",
138
+ "psql": "postgresql",
139
+ "pg": "postgresql",
140
+ "pgsql": "postgresql",
141
+ # MySQL variations
142
+ "mysql": "mysql",
143
+ "mariadb": "mysql", # MariaDB is MySQL-compatible
144
+ "maria": "mysql",
145
+ # ClickHouse variations
146
+ "clickhouse": "clickhouse",
147
+ "clickhousedb": "clickhouse", # clickhouse-connect uses this
148
+ "ch": "clickhouse",
149
+ "click": "clickhouse",
150
+ }
151
+
152
+ # Normalize the dialect
153
+ dialect = dialect_variations.get(original_dialect)
154
+
155
+ if not dialect:
156
+ supported = set(dialect_variations.keys())
157
+ raise ValueError(
158
+ f"Unsupported database dialect: '{original_dialect}'. "
159
+ f"Supported: {', '.join(sorted(supported))}"
160
+ )
161
+
162
+ # If dialect was normalized, update the URL
163
+ if original_dialect != dialect:
164
+ driver_part = ""
165
+ if "+" in temp_url.drivername:
166
+ driver_part = "+" + temp_url.drivername.split("+")[1]
167
+ temp_url = temp_url.set(drivername=dialect + driver_part)
168
+ logger.info(
169
+ f"Normalized database dialect from '{original_dialect}' to '{dialect}'"
170
+ )
171
+ # Rebuild the URL with normalized dialect
172
+ v = temp_url.render_as_string(hide_password=False)
173
+ # Re-parse the normalized URL
174
+ parsed = urlparse(v)
175
+ query_params = parse_qs(parsed.query)
176
+
177
+ # Define allowed parameters for each database type
178
+ # These are parameters that are actually useful and safe for async drivers
179
+ allowed_params = {
180
+ "postgresql": {
181
+ # Connection identification and monitoring
182
+ "application_name", # Shows up in pg_stat_activity
183
+ # Timeouts
184
+ "connect_timeout", # Connection timeout in seconds
185
+ "command_timeout", # Default timeout for operations
186
+ # Server settings
187
+ "server_settings", # Server settings dictionary
188
+ "options", # Command-line options to send to the server
189
+ # SSL settings (essential for cloud databases)
190
+ # Note: Only basic SSL params that asyncpg handles well
191
+ "ssl", # Enable SSL (e.g., 'require', 'prefer')
192
+ "sslmode", # SSL mode for connection
193
+ "direct_tls", # Use direct TLS connection
194
+ "ssl_min_protocol_version", # Minimum SSL/TLS version
195
+ "ssl_max_protocol_version", # Maximum SSL/TLS version
196
+ # We DON'T include cert/key file paths as they can cause issues
197
+ # Performance tuning
198
+ "prepared_statement_cache_size", # Cache size for prepared statements
199
+ "prepared_statement_name_func", # Function for prepared statement names
200
+ "max_cached_statement_lifetime", # Max lifetime for cached statements
201
+ "max_cacheable_statement_size", # Max size for cacheable statements
202
+ },
203
+ "mysql": {
204
+ # Character encoding - CRITICAL for proper data handling
205
+ "charset", # Character set (e.g., utf8mb4)
206
+ "use_unicode", # Whether to use unicode
207
+ # Timeouts
208
+ "connect_timeout", # Connection timeout
209
+ "read_timeout", # Read timeout
210
+ "write_timeout", # Write timeout
211
+ # Transaction control
212
+ "autocommit", # Autocommit mode
213
+ "init_command", # Initial SQL command to run
214
+ # Other useful settings
215
+ "sql_mode", # SQL mode settings
216
+ "time_zone", # Time zone setting
217
+ },
218
+ "clickhouse": {
219
+ # Database selection
220
+ "database", # Default database
221
+ # Timeouts
222
+ "timeout", # Query timeout
223
+ "connect_timeout", # Connection timeout
224
+ "send_receive_timeout", # Network timeout
225
+ "sync_request_timeout", # Sync request timeout
226
+ # Compression
227
+ "compress", # Whether to use compression
228
+ "compression", # Compression type
229
+ # Performance
230
+ "max_block_size", # Max block size for reading
231
+ "max_threads", # Max threads for query execution
232
+ },
233
+ }
234
+
235
+ # Get the allowed parameters for this dialect
236
+ dialect_params = allowed_params.get(dialect, set())
237
+
238
+ # Filter to only allowed parameters
239
+ filtered_params = {
240
+ k: v for k, v in query_params.items() if k.lower() in dialect_params
241
+ }
242
+
243
+ # Special handling for PostgreSQL SSL parameters
244
+ # Convert PostgreSQL standard SSL params to asyncpg format
245
+ if dialect == "postgresql" and filtered_params:
246
+ # asyncpg uses 'sslmode' parameter, not 'ssl'
247
+ # Convert 'ssl' parameter to 'sslmode' for asyncpg compatibility
248
+ if "ssl" in filtered_params:
249
+ ssl_value = (
250
+ filtered_params["ssl"][0]
251
+ if isinstance(filtered_params["ssl"], list)
252
+ else filtered_params["ssl"]
253
+ )
254
+
255
+ # Map common ssl values to sslmode values
256
+ ssl_to_sslmode_map = {
257
+ "require": "require",
258
+ "required": "require",
259
+ "true": "require",
260
+ "1": "require",
261
+ "prefer": "prefer",
262
+ "preferred": "prefer",
263
+ "allow": "allow",
264
+ "disable": "disable",
265
+ "disabled": "disable",
266
+ "false": "disable",
267
+ "0": "disable",
268
+ }
269
+
270
+ sslmode_value = ssl_to_sslmode_map.get(str(ssl_value).lower())
271
+ if sslmode_value:
272
+ # Replace ssl with sslmode
273
+ del filtered_params["ssl"]
274
+ filtered_params["sslmode"] = [sslmode_value]
275
+ logger.info(
276
+ f"Converted ssl={ssl_value} to sslmode={sslmode_value} for asyncpg"
277
+ )
278
+ else:
279
+ # Unknown ssl value, remove it
280
+ del filtered_params["ssl"]
281
+ logger.info(f"Removed unknown ssl value: {ssl_value}")
282
+
283
+ # Validate sslmode parameter values if present
284
+ if "sslmode" in filtered_params:
285
+ valid_sslmodes = {
286
+ "disable",
287
+ "allow",
288
+ "prefer",
289
+ "require",
290
+ "verify-ca",
291
+ "verify-full",
292
+ }
293
+ sslmode_value = (
294
+ filtered_params["sslmode"][0]
295
+ if isinstance(filtered_params["sslmode"], list)
296
+ else filtered_params["sslmode"]
297
+ )
298
+ if sslmode_value not in valid_sslmodes:
299
+ # Invalid sslmode value, default to require for safety
300
+ filtered_params["sslmode"] = ["require"]
301
+ logger.info(
302
+ f"Invalid sslmode={sslmode_value}, defaulting to sslmode=require"
303
+ )
304
+
305
+ # Log what we kept and what we removed
306
+ removed_params = set(query_params.keys()) - set(filtered_params.keys())
307
+ if removed_params:
308
+ logger.info(
309
+ f"Removed unsupported parameters for {dialect}: {removed_params}"
310
+ )
311
+ if filtered_params:
312
+ logger.info(
313
+ f"Keeping supported parameters for {dialect}: {set(filtered_params.keys())}"
314
+ )
315
+
316
+ # Rebuild URL with only allowed parameters
317
+ new_query = urlencode(filtered_params, doseq=True)
318
+ clean_url = urlunparse(
319
+ (
320
+ parsed.scheme,
321
+ parsed.netloc,
322
+ parsed.path,
323
+ parsed.params,
324
+ new_query,
325
+ parsed.fragment,
326
+ )
327
+ )
328
+
329
+ # Now parse with SQLAlchemy
330
+ url = make_url(clean_url)
331
+
332
+ # No need to check supported dialects again as we already normalized and validated above
333
+
334
+ # Handle ClickHouse special case - clickhouse-connect uses 'clickhousedb' as dialect
335
+ if dialect == "clickhouse":
336
+ # clickhouse-connect uses 'clickhousedb' as the SQLAlchemy dialect name
337
+ # Don't add a driver suffix - just use 'clickhousedb'
338
+ url = url.set(drivername="clickhousedb")
339
+ logger.info(
340
+ "Using clickhousedb dialect for ClickHouse (clickhouse-connect)"
341
+ )
342
+ elif "+" not in url.drivername:
343
+ # Map other dialects to their default async drivers
344
+ async_drivers = {
345
+ "postgresql": "asyncpg",
346
+ "mysql": "aiomysql",
347
+ }
348
+
349
+ driver = async_drivers.get(dialect)
350
+ if driver:
351
+ # Rebuild the URL with the async driver
352
+ new_drivername = f"{dialect}+{driver}"
353
+ url = url.set(drivername=new_drivername)
354
+ logger.info(f"Automatically added async driver: {new_drivername}")
355
+
356
+ # IMPORTANT: Use render_as_string to preserve the actual password
357
+ # str(url) masks the password as ***, which breaks authentication!
358
+ clean_url = url.render_as_string(hide_password=False)
359
+
360
+ return clean_url
361
+ except Exception as e:
362
+ raise ValueError(f"Invalid database URL: {e}")
363
+
364
+ @property
365
+ def dialect(self) -> str:
366
+ """Extract database dialect from URL."""
367
+ dialect = make_url(self.url).drivername.split("+")[0]
368
+ # Normalize clickhousedb back to clickhouse for consistency
369
+ if dialect == "clickhousedb":
370
+ return "clickhouse"
371
+ return dialect
372
+
373
+ @property
374
+ def driver(self) -> str:
375
+ """Extract driver name from URL."""
376
+ drivername = make_url(self.url).drivername
377
+ # clickhousedb is the whole driver name, no + separator
378
+ if drivername == "clickhousedb":
379
+ return "connect" # Indicate we're using clickhouse-connect
380
+ parts = drivername.split("+")
381
+ return parts[1] if len(parts) > 1 else ""
382
+
383
+ @property
384
+ def database(self) -> Optional[str]:
385
+ """Extract database name from URL."""
386
+ url = make_url(self.url)
387
+ return url.database
388
+
389
+ model_config = {
390
+ "json_schema_extra": {
391
+ "examples": [
392
+ {
393
+ "url": "postgresql://user:password@localhost:5432/mydb",
394
+ "pool_size": 5,
395
+ "max_overflow": 10,
396
+ "read_only": True,
397
+ "statement_timeout": 30,
398
+ }
399
+ ]
400
+ }
401
+ }
@@ -0,0 +1,112 @@
1
+ """Database and schema information models."""
2
+
3
+ from typing import Optional
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from db_connect_mcp.models.capabilities import DatabaseCapabilities
8
+
9
+
10
+ class SchemaInfo(BaseModel):
11
+ """Information about a database schema/namespace."""
12
+
13
+ name: str = Field(..., description="Schema name")
14
+ owner: Optional[str] = Field(None, description="Schema owner")
15
+ table_count: Optional[int] = Field(None, description="Number of tables in schema")
16
+ view_count: Optional[int] = Field(None, description="Number of views in schema")
17
+ size_bytes: Optional[int] = Field(None, description="Schema size in bytes")
18
+ comment: Optional[str] = Field(None, description="Schema comment/description")
19
+
20
+ @property
21
+ def size_human(self) -> Optional[str]:
22
+ """Human-readable size."""
23
+ if self.size_bytes is None:
24
+ return None
25
+
26
+ size = float(self.size_bytes)
27
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
28
+ if size < 1024.0:
29
+ return f"{size:.2f} {unit}"
30
+ size /= 1024.0
31
+ return f"{size:.2f} PB"
32
+
33
+
34
+ class DatabaseInfo(BaseModel):
35
+ """Information about the database instance."""
36
+
37
+ name: str = Field(..., description="Database name")
38
+ dialect: str = Field(
39
+ ..., description="Database dialect (postgresql, mysql, clickhouse)"
40
+ )
41
+ version: str = Field(..., description="Database version string")
42
+ size_bytes: Optional[int] = Field(None, description="Total database size in bytes")
43
+ schema_count: Optional[int] = Field(None, description="Number of schemas")
44
+ table_count: Optional[int] = Field(None, description="Total number of tables")
45
+ capabilities: DatabaseCapabilities = Field(..., description="Database capabilities")
46
+ server_encoding: Optional[str] = Field(
47
+ None, description="Server character encoding"
48
+ )
49
+ collation: Optional[str] = Field(None, description="Default collation")
50
+ connection_url: str = Field(..., description="Sanitized connection URL")
51
+ read_only: bool = Field(
52
+ default=True, description="Whether connections are read-only"
53
+ )
54
+ extra_info: dict[str, str | int | float | bool] = Field(
55
+ default_factory=dict,
56
+ description="Database-specific additional information",
57
+ )
58
+
59
+ @property
60
+ def size_human(self) -> Optional[str]:
61
+ """Human-readable size."""
62
+ if self.size_bytes is None:
63
+ return None
64
+
65
+ size = float(self.size_bytes)
66
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
67
+ if size < 1024.0:
68
+ return f"{size:.2f} {unit}"
69
+ size /= 1024.0
70
+ return f"{size:.2f} PB"
71
+
72
+ def get_feature_summary(self) -> str:
73
+ """Get a summary of supported features."""
74
+ supported = self.capabilities.get_supported_features()
75
+ return f"{len(supported)} features supported: {', '.join(supported[:5])}" + (
76
+ "..." if len(supported) > 5 else ""
77
+ )
78
+
79
+ model_config = {
80
+ "json_schema_extra": {
81
+ "examples": [
82
+ {
83
+ "name": "mydb",
84
+ "dialect": "postgresql",
85
+ "version": "PostgreSQL 15.3",
86
+ "size_bytes": 1073741824,
87
+ "schema_count": 3,
88
+ "table_count": 42,
89
+ "capabilities": {
90
+ "foreign_keys": True,
91
+ "indexes": True,
92
+ "views": True,
93
+ "materialized_views": True,
94
+ "partitions": True,
95
+ "advanced_stats": True,
96
+ "explain_plans": True,
97
+ "profiling": True,
98
+ "comments": True,
99
+ "schemas": True,
100
+ "transactions": True,
101
+ "stored_procedures": True,
102
+ "triggers": True,
103
+ },
104
+ "server_encoding": "UTF8",
105
+ "collation": "en_US.UTF-8",
106
+ "connection_url": "postgresql+asyncpg://localhost:5432/mydb",
107
+ "read_only": True,
108
+ "extra_info": {},
109
+ }
110
+ ]
111
+ }
112
+ }
@@ -0,0 +1,119 @@
1
+ """Query execution and explain plan models."""
2
+
3
+ from typing import Any, Optional
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class QueryResult(BaseModel):
9
+ """Result of a query execution."""
10
+
11
+ query: str = Field(..., description="Executed SQL query")
12
+ rows: list[dict[str, Any]] = Field(..., description="Result rows as dictionaries")
13
+ row_count: int = Field(..., description="Number of rows returned")
14
+ columns: list[str] = Field(..., description="Column names in order")
15
+ execution_time_ms: Optional[float] = Field(
16
+ None, description="Execution time in milliseconds"
17
+ )
18
+ truncated: bool = Field(
19
+ default=False, description="Whether results were truncated due to limits"
20
+ )
21
+ warning: Optional[str] = Field(None, description="Warning message if applicable")
22
+
23
+ @property
24
+ def is_empty(self) -> bool:
25
+ """Check if result set is empty."""
26
+ return self.row_count == 0
27
+
28
+ @property
29
+ def column_count(self) -> int:
30
+ """Get number of columns."""
31
+ return len(self.columns)
32
+
33
+ def get_column_values(self, column: str) -> list[Any]:
34
+ """Extract all values for a specific column."""
35
+ return [row.get(column) for row in self.rows]
36
+
37
+ def to_table_string(self, max_rows: int = 10) -> str:
38
+ """Format result as a simple table string."""
39
+ if self.is_empty:
40
+ return "No rows returned"
41
+
42
+ # Header
43
+ result_lines = [" | ".join(self.columns)]
44
+ result_lines.append("-" * len(result_lines[0]))
45
+
46
+ # Rows (truncated if needed)
47
+ display_rows = self.rows[:max_rows]
48
+ for row in display_rows:
49
+ values = [str(row.get(col, "NULL")) for col in self.columns]
50
+ result_lines.append(" | ".join(values))
51
+
52
+ if len(self.rows) > max_rows:
53
+ result_lines.append(f"... ({self.row_count - max_rows} more rows)")
54
+
55
+ return "\n".join(result_lines)
56
+
57
+
58
+ class ExplainPlan(BaseModel):
59
+ """Query execution plan from EXPLAIN."""
60
+
61
+ query: str = Field(..., description="Analyzed SQL query")
62
+ plan: str = Field(..., description="Execution plan as formatted string")
63
+ plan_json: Optional[dict[str, Any]] = Field(
64
+ None, description="Execution plan as JSON (if supported)"
65
+ )
66
+ estimated_cost: Optional[float] = Field(None, description="Estimated query cost")
67
+ estimated_rows: Optional[int] = Field(None, description="Estimated rows to process")
68
+ actual_time_ms: Optional[float] = Field(
69
+ None, description="Actual execution time if ANALYZE"
70
+ )
71
+ actual_rows: Optional[int] = Field(
72
+ None, description="Actual rows processed if ANALYZE"
73
+ )
74
+ warnings: list[str] = Field(
75
+ default_factory=list, description="Performance warnings"
76
+ )
77
+ recommendations: list[str] = Field(
78
+ default_factory=list, description="Optimization recommendations"
79
+ )
80
+
81
+ @property
82
+ def has_actual_stats(self) -> bool:
83
+ """Check if this is an EXPLAIN ANALYZE with actual statistics."""
84
+ return self.actual_time_ms is not None
85
+
86
+ @property
87
+ def cost_per_row(self) -> Optional[float]:
88
+ """Calculate estimated cost per row."""
89
+ if self.estimated_cost and self.estimated_rows and self.estimated_rows > 0:
90
+ return self.estimated_cost / self.estimated_rows
91
+ return None
92
+
93
+ def add_warning(self, warning: str) -> None:
94
+ """Add a performance warning."""
95
+ if warning not in self.warnings:
96
+ self.warnings.append(warning)
97
+
98
+ def add_recommendation(self, recommendation: str) -> None:
99
+ """Add an optimization recommendation."""
100
+ if recommendation not in self.recommendations:
101
+ self.recommendations.append(recommendation)
102
+
103
+ model_config = {
104
+ "json_schema_extra": {
105
+ "examples": [
106
+ {
107
+ "query": "SELECT * FROM users WHERE email = 'test@example.com'",
108
+ "plan": "Seq Scan on users (cost=0.00..25.88 rows=1 width=100)",
109
+ "plan_json": None,
110
+ "estimated_cost": 25.88,
111
+ "estimated_rows": 1,
112
+ "actual_time_ms": None,
113
+ "actual_rows": None,
114
+ "warnings": ["Sequential scan on large table"],
115
+ "recommendations": ["Consider adding index on email column"],
116
+ }
117
+ ]
118
+ }
119
+ }