db-connect-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of db-connect-mcp might be problematic. Click here for more details.

@@ -0,0 +1,114 @@
1
+ """Column statistics and distribution analysis."""
2
+
3
+ from typing import TYPE_CHECKING, Optional
4
+
5
+
6
+ from db_connect_mcp.core.connection import DatabaseConnection
7
+ from db_connect_mcp.models.statistics import ColumnStats, Distribution
8
+
9
+ if TYPE_CHECKING:
10
+ from db_connect_mcp.adapters.base import BaseAdapter
11
+
12
+
13
+ class StatisticsAnalyzer:
14
+ """Column statistics and value distribution analysis."""
15
+
16
+ def __init__(self, connection: DatabaseConnection, adapter: "BaseAdapter"):
17
+ """
18
+ Initialize statistics analyzer.
19
+
20
+ Args:
21
+ connection: Database connection manager
22
+ adapter: Database-specific adapter for statistics queries
23
+ """
24
+ self.connection = connection
25
+ self.adapter = adapter
26
+
27
+ async def analyze_column(
28
+ self,
29
+ table_name: str,
30
+ column_name: str,
31
+ schema: Optional[str] = None,
32
+ ) -> ColumnStats:
33
+ """
34
+ Perform comprehensive column statistical analysis.
35
+
36
+ Args:
37
+ table_name: Table name
38
+ column_name: Column name
39
+ schema: Schema name
40
+
41
+ Returns:
42
+ Column statistics with all available metrics
43
+ """
44
+ async with self.connection.get_connection() as conn:
45
+ # Delegate to adapter for database-specific statistics
46
+ stats = await self.adapter.get_column_statistics(
47
+ conn, table_name, column_name, schema
48
+ )
49
+
50
+ return stats
51
+
52
+ async def get_value_distribution(
53
+ self,
54
+ table_name: str,
55
+ column_name: str,
56
+ schema: Optional[str] = None,
57
+ limit: int = 20,
58
+ ) -> Distribution:
59
+ """
60
+ Get value distribution (top N most frequent values).
61
+
62
+ Args:
63
+ table_name: Table name
64
+ column_name: Column name
65
+ schema: Schema name
66
+ limit: Number of top values to return
67
+
68
+ Returns:
69
+ Value distribution with frequencies
70
+ """
71
+ async with self.connection.get_connection() as conn:
72
+ # Delegate to adapter for database-specific distribution query
73
+ distribution = await self.adapter.get_value_distribution(
74
+ conn, table_name, column_name, schema, limit
75
+ )
76
+
77
+ return distribution
78
+
79
+ async def analyze_multiple_columns(
80
+ self,
81
+ table_name: str,
82
+ column_names: list[str],
83
+ schema: Optional[str] = None,
84
+ ) -> list[ColumnStats]:
85
+ """
86
+ Analyze multiple columns efficiently (batch operation).
87
+
88
+ Args:
89
+ table_name: Table name
90
+ column_names: List of column names
91
+ schema: Schema name
92
+
93
+ Returns:
94
+ List of column statistics
95
+ """
96
+ results = []
97
+
98
+ for column_name in column_names:
99
+ try:
100
+ stats = await self.analyze_column(table_name, column_name, schema)
101
+ results.append(stats)
102
+ except Exception as e:
103
+ # Create stats with error message
104
+ stats = ColumnStats(
105
+ column=column_name,
106
+ data_type="unknown",
107
+ total_rows=0,
108
+ null_count=0,
109
+ sample_size=0,
110
+ warning=f"Failed to analyze: {str(e)}",
111
+ )
112
+ results.append(stats)
113
+
114
+ return results
@@ -0,0 +1,371 @@
1
+ """Database connection management with SQLAlchemy."""
2
+
3
+ import asyncio
4
+ from contextlib import asynccontextmanager
5
+ from typing import AsyncGenerator, Optional, Union
6
+
7
+ from sqlalchemy import text, create_engine, Engine, Connection
8
+ from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine, create_async_engine
9
+
10
+ from db_connect_mcp.models.config import DatabaseConfig
11
+
12
+
13
+ class SyncConnectionWrapper:
14
+ """Wrapper for sync connections to handle text() wrapping."""
15
+
16
+ def __init__(self, sync_conn: Connection):
17
+ """Initialize with a sync connection."""
18
+ self.sync_conn = sync_conn
19
+ # Copy important attributes for SQLAlchemy inspection
20
+ self.dialect = sync_conn.dialect
21
+ self.engine = sync_conn.engine
22
+ self.connection = (
23
+ sync_conn.connection if hasattr(sync_conn, "connection") else sync_conn
24
+ )
25
+ self.info = sync_conn.info if hasattr(sync_conn, "info") else {}
26
+
27
+ def execute(self, statement, parameters=None):
28
+ """Execute statement, wrapping strings in text()."""
29
+ from sqlalchemy import text
30
+
31
+ # Wrap string statements in text() for proper execution
32
+ if isinstance(statement, str):
33
+ statement = text(statement)
34
+
35
+ if parameters:
36
+ return self.sync_conn.execute(statement, parameters)
37
+ else:
38
+ return self.sync_conn.execute(statement)
39
+
40
+ def __getattr__(self, name):
41
+ """Forward other attributes to the wrapped connection."""
42
+ return getattr(self.sync_conn, name)
43
+
44
+
45
+ class AsyncConnectionWrapper:
46
+ """Wrapper to make sync connections work in async context."""
47
+
48
+ def __init__(self, sync_conn: Connection):
49
+ """Initialize with a sync connection."""
50
+ self.sync_conn = sync_conn
51
+ self._executor = None
52
+
53
+ async def execute(self, statement, parameters=None):
54
+ """Execute statement in thread pool."""
55
+ from sqlalchemy import text
56
+
57
+ # Wrap string statements in text() for proper execution
58
+ if isinstance(statement, str):
59
+ statement = text(statement)
60
+
61
+ loop = asyncio.get_event_loop()
62
+ if parameters:
63
+ result = await loop.run_in_executor(
64
+ self._executor, self.sync_conn.execute, statement, parameters
65
+ )
66
+ else:
67
+ result = await loop.run_in_executor(
68
+ self._executor, self.sync_conn.execute, statement
69
+ )
70
+ return result
71
+
72
+ async def run_sync(self, fn, *args, **kwargs):
73
+ """Run a synchronous function in thread pool.
74
+
75
+ This mimics the SQLAlchemy AsyncConnection.run_sync method.
76
+ """
77
+ loop = asyncio.get_event_loop()
78
+ # For inspection operations, pass the raw connection
79
+ # For execute operations, use the wrapper
80
+ # Check if this is likely an inspection call
81
+
82
+ fn_name = fn.__name__ if hasattr(fn, "__name__") else str(fn)
83
+
84
+ # If it's a function that needs inspection, pass the raw connection
85
+ if (
86
+ "inspect" in fn_name.lower()
87
+ or "get_schema" in fn_name.lower()
88
+ or "get_table" in fn_name.lower()
89
+ or "describe" in fn_name.lower()
90
+ ):
91
+ result = await loop.run_in_executor(
92
+ self._executor, fn, self.sync_conn, *args, **kwargs
93
+ )
94
+ else:
95
+ # For other operations, use the wrapper
96
+ wrapped_conn = SyncConnectionWrapper(self.sync_conn)
97
+ result = await loop.run_in_executor(
98
+ self._executor, fn, wrapped_conn, *args, **kwargs
99
+ )
100
+ return result
101
+
102
+ async def commit(self):
103
+ """Commit transaction in thread pool."""
104
+ loop = asyncio.get_event_loop()
105
+ await loop.run_in_executor(self._executor, self.sync_conn.commit)
106
+
107
+ async def rollback(self):
108
+ """Rollback transaction in thread pool."""
109
+ loop = asyncio.get_event_loop()
110
+ await loop.run_in_executor(self._executor, self.sync_conn.rollback)
111
+
112
+ def close(self):
113
+ """Close the sync connection."""
114
+ self.sync_conn.close()
115
+
116
+
117
+ class DatabaseConnection:
118
+ """Manages SQLAlchemy async engine and connection pool."""
119
+
120
+ def __init__(self, config: DatabaseConfig):
121
+ """
122
+ Initialize database connection.
123
+
124
+ Args:
125
+ config: Database configuration with connection URL and pool settings
126
+ """
127
+ self.config = config
128
+ self.engine: Optional[AsyncEngine] = None
129
+ self.sync_engine: Optional[Engine] = None
130
+ self._dialect = config.dialect
131
+ self._driver = config.driver
132
+ # Check if this is ClickHouse (sync only)
133
+ # clickhouse-connect uses 'clickhousedb' as the dialect name in SQLAlchemy
134
+ self._is_sync_only = (
135
+ self._dialect == "clickhouse"
136
+ or self._dialect == "clickhousedb"
137
+ or (self._dialect == "clickhouse" and self._driver == "connect")
138
+ )
139
+
140
+ async def initialize(self) -> None:
141
+ """Initialize the async or sync engine based on driver requirements."""
142
+ if self.engine is not None or self.sync_engine is not None:
143
+ return # Already initialized
144
+
145
+ # Handle ClickHouse with sync-only driver
146
+ if self._is_sync_only:
147
+ # Create synchronous engine for ClickHouse
148
+ self.sync_engine = create_engine(
149
+ self.config.url,
150
+ pool_size=self.config.pool_size,
151
+ max_overflow=self.config.max_overflow,
152
+ pool_timeout=self.config.pool_timeout,
153
+ pool_pre_ping=True,
154
+ echo=self.config.echo_sql,
155
+ )
156
+ return
157
+
158
+ # Extract SSL configuration from URL for asyncpg
159
+ connect_args = {}
160
+ if self._dialect == "postgresql" and self._driver == "asyncpg":
161
+ from sqlalchemy.engine.url import make_url
162
+
163
+ url_obj = make_url(self.config.url)
164
+
165
+ # Check for SSL-related query parameters
166
+ if url_obj.query:
167
+ # asyncpg expects 'ssl' parameter in connect_args, not in URL
168
+ if "sslmode" in url_obj.query:
169
+ sslmode = url_obj.query["sslmode"]
170
+ # Map sslmode to asyncpg's ssl parameter
171
+ if sslmode in ["require", "prefer", "allow"]:
172
+ connect_args["ssl"] = sslmode
173
+ elif sslmode == "disable":
174
+ connect_args["ssl"] = False
175
+ # Remove sslmode from URL query to avoid "unexpected keyword" error
176
+ url_obj = url_obj.difference_update_query(["sslmode"])
177
+ self.config.url = url_obj.render_as_string(hide_password=False)
178
+ elif "ssl" in url_obj.query:
179
+ ssl_value = url_obj.query["ssl"]
180
+ if ssl_value in ["require", "true", "1"]:
181
+ connect_args["ssl"] = "require"
182
+ elif ssl_value in ["false", "0", "disable"]:
183
+ connect_args["ssl"] = False
184
+ # Remove ssl from URL query
185
+ url_obj = url_obj.difference_update_query(["ssl"])
186
+ self.config.url = url_obj.render_as_string(hide_password=False)
187
+
188
+ # Create async engine for PostgreSQL and MySQL
189
+ self.engine = create_async_engine(
190
+ self.config.url,
191
+ pool_size=self.config.pool_size,
192
+ max_overflow=self.config.max_overflow,
193
+ pool_timeout=self.config.pool_timeout,
194
+ pool_pre_ping=True, # Verify connections before using
195
+ echo=self.config.echo_sql,
196
+ connect_args=connect_args if connect_args else {},
197
+ )
198
+
199
+ async def dispose(self) -> None:
200
+ """Dispose of the connection pool and cleanup resources."""
201
+ if self.engine is not None:
202
+ await self.engine.dispose()
203
+ self.engine = None
204
+ if self.sync_engine is not None:
205
+ self.sync_engine.dispose()
206
+ self.sync_engine = None
207
+
208
+ @asynccontextmanager
209
+ async def get_connection(
210
+ self,
211
+ ) -> AsyncGenerator[Union[AsyncConnection, AsyncConnectionWrapper], None]:
212
+ """
213
+ Get a connection from the pool as an async context manager.
214
+
215
+ Yields:
216
+ AsyncConnection or AsyncConnectionWrapper for executing queries
217
+
218
+ Raises:
219
+ RuntimeError: If engine not initialized
220
+ """
221
+ # Handle sync engine for ClickHouse
222
+ if self._is_sync_only:
223
+ if self.sync_engine is None:
224
+ raise RuntimeError(
225
+ "DatabaseConnection not initialized. Call initialize() first."
226
+ )
227
+
228
+ # Get sync connection
229
+ sync_conn = self.sync_engine.connect()
230
+
231
+ # Monkey-patch the execute method to handle raw SQL strings
232
+ original_execute = sync_conn.execute
233
+
234
+ def patched_execute(statement, *args, **kwargs):
235
+ from sqlalchemy import text
236
+
237
+ if isinstance(statement, str):
238
+ statement = text(statement)
239
+ return original_execute(statement, *args, **kwargs)
240
+
241
+ sync_conn.execute = patched_execute
242
+
243
+ # Wrap it for async
244
+ wrapper = AsyncConnectionWrapper(sync_conn)
245
+
246
+ try:
247
+ # Set read-only mode if configured (sync)
248
+ if self.config.read_only:
249
+ await self._set_readonly_wrapper(wrapper)
250
+
251
+ # Set statement timeout if configured (sync)
252
+ if self.config.statement_timeout:
253
+ await self._set_timeout_wrapper(
254
+ wrapper, self.config.statement_timeout
255
+ )
256
+
257
+ yield wrapper
258
+ finally:
259
+ wrapper.close()
260
+ else:
261
+ # Handle async engine for PostgreSQL and MySQL
262
+ if self.engine is None:
263
+ raise RuntimeError(
264
+ "DatabaseConnection not initialized. Call initialize() first."
265
+ )
266
+
267
+ async with self.engine.connect() as conn:
268
+ # Set read-only mode if configured
269
+ if self.config.read_only:
270
+ await self._set_readonly(conn)
271
+
272
+ # Set statement timeout if configured
273
+ if self.config.statement_timeout:
274
+ await self._set_timeout(conn, self.config.statement_timeout)
275
+
276
+ yield conn
277
+
278
+ async def _set_readonly(self, conn: AsyncConnection) -> None:
279
+ """Set connection to read-only mode based on database dialect."""
280
+ if self._dialect == "postgresql":
281
+ await conn.execute(
282
+ text("SET SESSION CHARACTERISTICS AS TRANSACTION READ ONLY")
283
+ )
284
+ elif self._dialect == "mysql":
285
+ await conn.execute(text("SET SESSION TRANSACTION READ ONLY"))
286
+ elif self._dialect == "clickhouse":
287
+ # ClickHouse doesn't have traditional read-only mode
288
+ # Read-only is enforced at user/permission level
289
+ pass
290
+
291
+ async def _set_readonly_wrapper(self, wrapper: AsyncConnectionWrapper) -> None:
292
+ """Set connection to read-only mode for wrapped sync connections."""
293
+ if self._dialect == "clickhouse":
294
+ # ClickHouse doesn't have traditional read-only mode
295
+ pass
296
+
297
+ async def _set_timeout(self, conn: AsyncConnection, timeout: int) -> None:
298
+ """Set statement timeout based on database dialect."""
299
+ timeout_ms = timeout * 1000
300
+
301
+ if self._dialect == "postgresql":
302
+ await conn.execute(text(f"SET statement_timeout = {timeout_ms}"))
303
+ elif self._dialect == "mysql":
304
+ await conn.execute(text(f"SET SESSION max_execution_time = {timeout_ms}"))
305
+ elif self._dialect == "clickhouse":
306
+ await conn.execute(text(f"SET max_execution_time = {timeout}"))
307
+
308
+ async def _set_timeout_wrapper(
309
+ self, wrapper: AsyncConnectionWrapper, timeout: int
310
+ ) -> None:
311
+ """Set statement timeout for wrapped sync connections."""
312
+ if self._dialect == "clickhouse":
313
+ await wrapper.execute(text(f"SET max_execution_time = {timeout}"))
314
+
315
+ @property
316
+ def dialect(self) -> str:
317
+ """Get database dialect name."""
318
+ return self._dialect
319
+
320
+ @property
321
+ def driver(self) -> str:
322
+ """Get database driver name."""
323
+ return self._driver
324
+
325
+ @property
326
+ def is_initialized(self) -> bool:
327
+ """Check if engine is initialized."""
328
+ return self.engine is not None or self.sync_engine is not None
329
+
330
+ async def test_connection(self) -> bool:
331
+ """
332
+ Test database connectivity.
333
+
334
+ Returns:
335
+ True if connection successful, False otherwise
336
+ """
337
+ try:
338
+ async with self.get_connection() as conn:
339
+ await conn.execute(text("SELECT 1"))
340
+ return True
341
+ except Exception:
342
+ return False
343
+
344
+ async def get_version(self) -> str:
345
+ """
346
+ Get database version string.
347
+
348
+ Returns:
349
+ Database version string
350
+ """
351
+ version_query = {
352
+ "postgresql": "SELECT version()",
353
+ "mysql": "SELECT VERSION()",
354
+ "clickhouse": "SELECT version()",
355
+ }
356
+
357
+ query = version_query.get(self._dialect, "SELECT version()")
358
+
359
+ async with self.get_connection() as conn:
360
+ result = await conn.execute(text(query))
361
+ row = result.fetchone()
362
+ return str(row[0]) if row else "Unknown"
363
+
364
+ async def __aenter__(self) -> "DatabaseConnection":
365
+ """Async context manager entry."""
366
+ await self.initialize()
367
+ return self
368
+
369
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
370
+ """Async context manager exit."""
371
+ await self.dispose()