mcp-server-motherduck 0.7.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_server_motherduck/__init__.py +198 -133
- mcp_server_motherduck/assets/duck_feet_square.png +0 -0
- mcp_server_motherduck/configs.py +1 -1
- mcp_server_motherduck/database.py +292 -53
- mcp_server_motherduck/instructions.py +187 -0
- mcp_server_motherduck/server.py +208 -115
- mcp_server_motherduck/tools/__init__.py +19 -0
- mcp_server_motherduck/tools/execute_query.py +21 -0
- mcp_server_motherduck/tools/list_columns.py +99 -0
- mcp_server_motherduck/tools/list_databases.py +52 -0
- mcp_server_motherduck/tools/list_tables.py +91 -0
- mcp_server_motherduck/tools/switch_database_connection.py +130 -0
- mcp_server_motherduck-1.0.0.dist-info/METADATA +225 -0
- mcp_server_motherduck-1.0.0.dist-info/RECORD +17 -0
- {mcp_server_motherduck-0.7.2.dist-info → mcp_server_motherduck-1.0.0.dist-info}/WHEEL +1 -1
- mcp_server_motherduck/prompt.py +0 -195
- mcp_server_motherduck-0.7.2.dist-info/METADATA +0 -458
- mcp_server_motherduck-0.7.2.dist-info/RECORD +0 -10
- {mcp_server_motherduck-0.7.2.dist-info → mcp_server_motherduck-1.0.0.dist-info}/entry_points.txt +0 -0
- {mcp_server_motherduck-0.7.2.dist-info → mcp_server_motherduck-1.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,15 +1,39 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
1
3
|
import os
|
|
4
|
+
import re
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Any, Literal, Optional
|
|
7
|
+
|
|
2
8
|
import duckdb
|
|
3
|
-
|
|
4
|
-
import io
|
|
5
|
-
from contextlib import redirect_stdout
|
|
6
|
-
from tabulate import tabulate
|
|
7
|
-
import logging
|
|
9
|
+
|
|
8
10
|
from .configs import SERVER_VERSION
|
|
9
11
|
|
|
10
12
|
logger = logging.getLogger("mcp_server_motherduck")
|
|
11
13
|
|
|
12
14
|
|
|
15
|
+
def _is_read_scaling_connection(conn: duckdb.DuckDBPyConnection) -> bool:
|
|
16
|
+
"""
|
|
17
|
+
Check if a MotherDuck connection is using read-scaling.
|
|
18
|
+
|
|
19
|
+
Read-scaling connections have a duckling ID ending with .rs.{number}
|
|
20
|
+
e.g., "my_database.rs.3", "app_db.rs.0"
|
|
21
|
+
|
|
22
|
+
Read-write connections end with .rw
|
|
23
|
+
e.g., "my_database.rw", "app_db.rw"
|
|
24
|
+
"""
|
|
25
|
+
try:
|
|
26
|
+
# __md_duckling_id() is a table function, must use FROM clause
|
|
27
|
+
result = conn.execute("SELECT * FROM __md_duckling_id()").fetchone()
|
|
28
|
+
if result and result[0]:
|
|
29
|
+
duckling_id = result[0]
|
|
30
|
+
# Check if duckling ID ends with .rs.{number}
|
|
31
|
+
return bool(re.search(r"\.rs\.\d+$", duckling_id))
|
|
32
|
+
return False
|
|
33
|
+
except Exception:
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
13
37
|
class DatabaseClient:
|
|
14
38
|
def __init__(
|
|
15
39
|
self,
|
|
@@ -18,8 +42,18 @@ class DatabaseClient:
|
|
|
18
42
|
home_dir: str | None = None,
|
|
19
43
|
saas_mode: bool = False,
|
|
20
44
|
read_only: bool = False,
|
|
45
|
+
ephemeral_connections: bool = True,
|
|
46
|
+
max_rows: int = 1024,
|
|
47
|
+
max_chars: int = 50000,
|
|
48
|
+
query_timeout: int = -1,
|
|
49
|
+
init_sql: str | None = None,
|
|
21
50
|
):
|
|
22
51
|
self._read_only = read_only
|
|
52
|
+
self._ephemeral_connections = ephemeral_connections
|
|
53
|
+
self._max_rows = max_rows
|
|
54
|
+
self._max_chars = max_chars
|
|
55
|
+
self._query_timeout = query_timeout
|
|
56
|
+
self._init_sql = init_sql
|
|
23
57
|
self.db_path, self.db_type = self._resolve_db_path_type(
|
|
24
58
|
db_path, motherduck_token, saas_mode
|
|
25
59
|
)
|
|
@@ -36,23 +70,30 @@ class DatabaseClient:
|
|
|
36
70
|
|
|
37
71
|
logger.info(f"🔌 Connecting to {self.db_type} database")
|
|
38
72
|
|
|
39
|
-
#
|
|
40
|
-
|
|
41
|
-
raise ValueError("Read-only mode is not supported for S3 databases")
|
|
73
|
+
# Read-only handling for local DuckDB files (not in-memory)
|
|
74
|
+
is_local_file = self.db_type == "duckdb" and self.db_path != ":memory:"
|
|
42
75
|
|
|
43
|
-
if
|
|
44
|
-
#
|
|
76
|
+
if is_local_file and self._read_only:
|
|
77
|
+
# For read-only local DuckDB files, use short-lived connections by default
|
|
78
|
+
# to allow concurrent access from other processes
|
|
45
79
|
try:
|
|
46
80
|
conn = duckdb.connect(
|
|
47
81
|
self.db_path,
|
|
48
|
-
config={
|
|
49
|
-
"custom_user_agent": f"mcp-server-motherduck/{SERVER_VERSION}"
|
|
50
|
-
},
|
|
82
|
+
config={"custom_user_agent": f"mcp-server-motherduck/{SERVER_VERSION}"},
|
|
51
83
|
read_only=self._read_only,
|
|
52
84
|
)
|
|
53
85
|
conn.execute("SELECT 1")
|
|
54
|
-
|
|
55
|
-
|
|
86
|
+
|
|
87
|
+
if self._ephemeral_connections:
|
|
88
|
+
# Default: close connection for concurrent access
|
|
89
|
+
conn.close()
|
|
90
|
+
return None
|
|
91
|
+
else:
|
|
92
|
+
# User requested persistent connection via --no-ephemeral-connections
|
|
93
|
+
logger.info("Using persistent read-only connection")
|
|
94
|
+
# Execute init SQL
|
|
95
|
+
self._execute_init_sql(conn)
|
|
96
|
+
return conn
|
|
56
97
|
except Exception as e:
|
|
57
98
|
logger.error(f"❌ Read-only check failed: {e}")
|
|
58
99
|
raise
|
|
@@ -60,27 +101,27 @@ class DatabaseClient:
|
|
|
60
101
|
# Check if this is an S3 path
|
|
61
102
|
if self.db_type == "s3":
|
|
62
103
|
# For S3, we need to create an in-memory connection and attach the S3 database
|
|
63
|
-
conn = duckdb.connect(
|
|
64
|
-
|
|
104
|
+
conn = duckdb.connect(":memory:")
|
|
105
|
+
|
|
65
106
|
# Install and load the httpfs extension for S3 support
|
|
66
107
|
import io
|
|
67
|
-
from contextlib import
|
|
68
|
-
|
|
108
|
+
from contextlib import redirect_stderr, redirect_stdout
|
|
109
|
+
|
|
69
110
|
null_file = io.StringIO()
|
|
70
111
|
with redirect_stdout(null_file), redirect_stderr(null_file):
|
|
71
112
|
try:
|
|
72
113
|
conn.execute("INSTALL httpfs;")
|
|
73
|
-
except:
|
|
114
|
+
except Exception:
|
|
74
115
|
pass # Extension might already be installed
|
|
75
116
|
conn.execute("LOAD httpfs;")
|
|
76
|
-
|
|
117
|
+
|
|
77
118
|
# Configure S3 credentials from environment variables using CREATE SECRET
|
|
78
|
-
aws_access_key = os.environ.get(
|
|
79
|
-
aws_secret_key = os.environ.get(
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
if aws_access_key and aws_secret_key:
|
|
119
|
+
aws_access_key = os.environ.get("AWS_ACCESS_KEY_ID")
|
|
120
|
+
aws_secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY")
|
|
121
|
+
aws_session_token = os.environ.get("AWS_SESSION_TOKEN")
|
|
122
|
+
aws_region = os.environ.get("AWS_DEFAULT_REGION", "us-east-1")
|
|
123
|
+
|
|
124
|
+
if aws_access_key and aws_secret_key and not aws_session_token:
|
|
84
125
|
# Use CREATE SECRET for better credential management
|
|
85
126
|
conn.execute(f"""
|
|
86
127
|
CREATE SECRET IF NOT EXISTS s3_secret (
|
|
@@ -90,7 +131,17 @@ class DatabaseClient:
|
|
|
90
131
|
REGION '{aws_region}'
|
|
91
132
|
);
|
|
92
133
|
""")
|
|
93
|
-
|
|
134
|
+
elif aws_session_token:
|
|
135
|
+
# Use credential_chain provider to automatically fetch credentials
|
|
136
|
+
# This supports IAM roles, SSO, instance profiles, etc.
|
|
137
|
+
conn.execute(f"""
|
|
138
|
+
CREATE SECRET IF NOT EXISTS s3_secret (
|
|
139
|
+
TYPE S3,
|
|
140
|
+
PROVIDER credential_chain,
|
|
141
|
+
REGION '{aws_region}'
|
|
142
|
+
);
|
|
143
|
+
""")
|
|
144
|
+
|
|
94
145
|
# Attach the S3 database
|
|
95
146
|
try:
|
|
96
147
|
# For S3, we always attach as READ_ONLY since S3 storage is typically read-only
|
|
@@ -98,7 +149,9 @@ class DatabaseClient:
|
|
|
98
149
|
conn.execute(f"ATTACH '{self.db_path}' AS s3db (READ_ONLY);")
|
|
99
150
|
# Use the attached database
|
|
100
151
|
conn.execute("USE s3db;")
|
|
101
|
-
logger.info(
|
|
152
|
+
logger.info(
|
|
153
|
+
f"✅ Successfully connected to {self.db_type} database (attached as read-only)"
|
|
154
|
+
)
|
|
102
155
|
except Exception as e:
|
|
103
156
|
logger.error(f"Failed to attach S3 database: {e}")
|
|
104
157
|
# If the database doesn't exist and we're not in read-only mode, try to create it
|
|
@@ -114,19 +167,62 @@ class DatabaseClient:
|
|
|
114
167
|
raise
|
|
115
168
|
else:
|
|
116
169
|
raise
|
|
117
|
-
|
|
170
|
+
|
|
171
|
+
# Execute init SQL
|
|
172
|
+
self._execute_init_sql(conn)
|
|
118
173
|
return conn
|
|
119
174
|
|
|
175
|
+
# For MotherDuck, pass read_only flag; for in-memory it's not applicable
|
|
176
|
+
read_only_flag = self._read_only if self.db_type == "motherduck" else False
|
|
177
|
+
|
|
120
178
|
conn = duckdb.connect(
|
|
121
179
|
self.db_path,
|
|
122
180
|
config={"custom_user_agent": f"mcp-server-motherduck/{SERVER_VERSION}"},
|
|
123
|
-
read_only=
|
|
181
|
+
read_only=read_only_flag,
|
|
124
182
|
)
|
|
125
183
|
|
|
126
184
|
logger.info(f"✅ Successfully connected to {self.db_type} database")
|
|
127
185
|
|
|
186
|
+
# For MotherDuck with --read-only flag, verify it's a read-scaling connection
|
|
187
|
+
if self.db_type == "motherduck" and self._read_only:
|
|
188
|
+
if not _is_read_scaling_connection(conn):
|
|
189
|
+
conn.close()
|
|
190
|
+
raise ValueError(
|
|
191
|
+
"The --read-only flag with MotherDuck requires a read-scaling token. "
|
|
192
|
+
"You appear to be using a read/write token. Please use a read-scaling token instead. "
|
|
193
|
+
"See: https://motherduck.com/docs/key-tasks/authenticating-and-connecting-to-motherduck/"
|
|
194
|
+
)
|
|
195
|
+
logger.info("Verified read-scaling connection for --read-only mode")
|
|
196
|
+
|
|
197
|
+
# Execute init SQL
|
|
198
|
+
self._execute_init_sql(conn)
|
|
199
|
+
|
|
128
200
|
return conn
|
|
129
201
|
|
|
202
|
+
def _execute_init_sql(self, conn: duckdb.DuckDBPyConnection) -> None:
|
|
203
|
+
"""Execute initialization SQL if provided."""
|
|
204
|
+
if not self._init_sql:
|
|
205
|
+
return
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
# Check if init_sql is a file path
|
|
209
|
+
if os.path.isfile(self._init_sql):
|
|
210
|
+
logger.info(f"Loading init SQL from file: {self._init_sql}")
|
|
211
|
+
with open(self._init_sql) as f:
|
|
212
|
+
sql_content = f.read()
|
|
213
|
+
else:
|
|
214
|
+
# Treat as raw SQL string
|
|
215
|
+
logger.info("Executing init SQL string")
|
|
216
|
+
sql_content = self._init_sql
|
|
217
|
+
|
|
218
|
+
# Execute the SQL
|
|
219
|
+
conn.execute(sql_content)
|
|
220
|
+
logger.info("Init SQL executed successfully")
|
|
221
|
+
|
|
222
|
+
except Exception as e:
|
|
223
|
+
logger.error(f"Failed to execute init SQL: {e}")
|
|
224
|
+
raise ValueError(f"Init SQL execution failed: {e}") from e
|
|
225
|
+
|
|
130
226
|
def _resolve_db_path_type(
|
|
131
227
|
self, db_path: str, motherduck_token: str | None = None, saas_mode: bool = False
|
|
132
228
|
) -> tuple[str, Literal["duckdb", "motherduck", "s3"]]:
|
|
@@ -134,7 +230,7 @@ class DatabaseClient:
|
|
|
134
230
|
# Handle S3 paths
|
|
135
231
|
if db_path.startswith("s3://"):
|
|
136
232
|
return db_path, "s3"
|
|
137
|
-
|
|
233
|
+
|
|
138
234
|
# Handle MotherDuck paths
|
|
139
235
|
if db_path.startswith("md:"):
|
|
140
236
|
if motherduck_token:
|
|
@@ -150,17 +246,16 @@ class DatabaseClient:
|
|
|
150
246
|
f"{db_path}?motherduck_token={motherduck_token}",
|
|
151
247
|
"motherduck",
|
|
152
248
|
)
|
|
153
|
-
elif os.getenv("motherduck_token"):
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
)
|
|
249
|
+
elif os.getenv("motherduck_token") or os.getenv("MOTHERDUCK_TOKEN"):
|
|
250
|
+
token = os.getenv("motherduck_token") or os.getenv("MOTHERDUCK_TOKEN")
|
|
251
|
+
logger.info("Using MotherDuck token from env to connect to database `md:`")
|
|
157
252
|
return (
|
|
158
|
-
f"{db_path}?motherduck_token={
|
|
253
|
+
f"{db_path}?motherduck_token={token}",
|
|
159
254
|
"motherduck",
|
|
160
255
|
)
|
|
161
256
|
else:
|
|
162
257
|
raise ValueError(
|
|
163
|
-
"Please set the `motherduck_token` as an environment variable or pass it as an argument with `--motherduck-token` when using `md:` as db_path."
|
|
258
|
+
"Please set the `motherduck_token` or `MOTHERDUCK_TOKEN` as an environment variable or pass it as an argument with `--motherduck-token` when using `md:` as db_path."
|
|
164
259
|
)
|
|
165
260
|
|
|
166
261
|
if db_path == ":memory:":
|
|
@@ -168,32 +263,176 @@ class DatabaseClient:
|
|
|
168
263
|
|
|
169
264
|
return db_path, "duckdb"
|
|
170
265
|
|
|
171
|
-
def _execute(self, query: str) -> str:
|
|
266
|
+
def _execute(self, query: str) -> dict[str, Any]:
|
|
267
|
+
"""Execute query and return JSON-serializable result."""
|
|
268
|
+
# Get connection to use
|
|
172
269
|
if self.conn is None:
|
|
173
|
-
# open short lived readonly connection for local DuckDB, run query, close connection, return result
|
|
174
270
|
conn = duckdb.connect(
|
|
175
271
|
self.db_path,
|
|
176
272
|
config={"custom_user_agent": f"mcp-server-motherduck/{SERVER_VERSION}"},
|
|
177
273
|
read_only=self._read_only,
|
|
178
274
|
)
|
|
179
|
-
q = conn.execute(query)
|
|
180
275
|
else:
|
|
181
|
-
|
|
276
|
+
conn = self.conn
|
|
182
277
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
278
|
+
try:
|
|
279
|
+
# Execute with or without timeout
|
|
280
|
+
if self._query_timeout > 0:
|
|
281
|
+
columns, column_types, rows, has_more_rows = self._execute_with_timeout(conn, query)
|
|
282
|
+
else:
|
|
283
|
+
columns, column_types, rows, has_more_rows = self._execute_direct(conn, query)
|
|
188
284
|
|
|
189
|
-
|
|
190
|
-
|
|
285
|
+
# Build result object
|
|
286
|
+
result: dict[str, Any] = {
|
|
287
|
+
"success": True,
|
|
288
|
+
"columns": columns,
|
|
289
|
+
"columnTypes": column_types,
|
|
290
|
+
"rows": rows,
|
|
291
|
+
"rowCount": len(rows),
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
# Add row truncation warning
|
|
295
|
+
if has_more_rows:
|
|
296
|
+
result["truncated"] = True
|
|
297
|
+
result["warning"] = (
|
|
298
|
+
f"Results limited to {self._max_rows:,} rows. Query returned more data."
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Check character limit on JSON output
|
|
302
|
+
json_output = json.dumps(result, default=str)
|
|
303
|
+
if len(json_output) > self._max_chars:
|
|
304
|
+
# Progressively reduce rows until under limit
|
|
305
|
+
while rows and len(json_output) > self._max_chars:
|
|
306
|
+
# Remove ~10% of rows each iteration
|
|
307
|
+
remove_count = max(1, len(rows) // 10)
|
|
308
|
+
rows = rows[:-remove_count]
|
|
309
|
+
result["rows"] = rows
|
|
310
|
+
result["rowCount"] = len(rows)
|
|
311
|
+
result["truncated"] = True
|
|
312
|
+
result["warning"] = (
|
|
313
|
+
f"Results limited to {len(rows):,} rows due to "
|
|
314
|
+
f"{self._max_chars // 1000}KB output size limit."
|
|
315
|
+
)
|
|
316
|
+
json_output = json.dumps(result, default=str)
|
|
317
|
+
|
|
318
|
+
return result
|
|
319
|
+
|
|
320
|
+
finally:
|
|
321
|
+
# Close connection if it was temporary
|
|
322
|
+
if self.conn is None:
|
|
323
|
+
conn.close()
|
|
324
|
+
|
|
325
|
+
def _execute_direct(
|
|
326
|
+
self, conn: duckdb.DuckDBPyConnection, query: str
|
|
327
|
+
) -> tuple[list[str], list[str], list[list[Any]], bool]:
|
|
328
|
+
"""Execute query without timeout - returns columns, types, rows, has_more."""
|
|
329
|
+
q = conn.execute(query)
|
|
191
330
|
|
|
192
|
-
|
|
331
|
+
# Get column metadata
|
|
332
|
+
columns = [d[0] for d in q.description] if q.description else []
|
|
333
|
+
column_types = [str(d[1]) for d in q.description] if q.description else []
|
|
334
|
+
|
|
335
|
+
# Fetch rows (max_rows + 1 to detect truncation)
|
|
336
|
+
raw_rows = q.fetchmany(self._max_rows + 1)
|
|
337
|
+
has_more_rows = len(raw_rows) > self._max_rows
|
|
338
|
+
if has_more_rows:
|
|
339
|
+
raw_rows = raw_rows[: self._max_rows]
|
|
340
|
+
|
|
341
|
+
# Convert rows to JSON-serializable lists
|
|
342
|
+
rows = [list(row) for row in raw_rows]
|
|
343
|
+
|
|
344
|
+
return columns, column_types, rows, has_more_rows
|
|
345
|
+
|
|
346
|
+
def _execute_with_timeout(
|
|
347
|
+
self, conn: duckdb.DuckDBPyConnection, query: str
|
|
348
|
+
) -> tuple[list[str], list[str], list[list[Any]], bool]:
|
|
349
|
+
"""Execute query with timeout using threading.Timer and conn.interrupt()."""
|
|
350
|
+
timer = threading.Timer(self._query_timeout, conn.interrupt)
|
|
351
|
+
timer.start()
|
|
193
352
|
|
|
194
|
-
def query(self, query: str) -> str:
|
|
195
353
|
try:
|
|
196
|
-
return self.
|
|
354
|
+
return self._execute_direct(conn, query)
|
|
355
|
+
except duckdb.InterruptException:
|
|
356
|
+
raise ValueError(
|
|
357
|
+
f"Query execution timed out after {self._query_timeout} seconds. "
|
|
358
|
+
"Increase timeout with --query-timeout argument when starting the mcp server."
|
|
359
|
+
)
|
|
360
|
+
finally:
|
|
361
|
+
timer.cancel()
|
|
197
362
|
|
|
363
|
+
def query(self, query: str) -> dict[str, Any]:
|
|
364
|
+
"""Execute a SQL query and return JSON-serializable result."""
|
|
365
|
+
try:
|
|
366
|
+
return self._execute(query)
|
|
367
|
+
except ValueError:
|
|
368
|
+
# Re-raise ValueError (timeout, etc.) as-is
|
|
369
|
+
raise
|
|
198
370
|
except Exception as e:
|
|
199
|
-
|
|
371
|
+
# Return error as structured response
|
|
372
|
+
return {
|
|
373
|
+
"success": False,
|
|
374
|
+
"error": str(e),
|
|
375
|
+
"errorType": type(e).__name__,
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
def execute_raw(self, query: str) -> tuple[list[str], list[str], list[list[Any]]]:
|
|
379
|
+
"""
|
|
380
|
+
Execute a query and return raw results (columns, types, rows).
|
|
381
|
+
Used by catalog tools that need custom result formatting.
|
|
382
|
+
"""
|
|
383
|
+
if self.conn is None:
|
|
384
|
+
conn = duckdb.connect(
|
|
385
|
+
self.db_path,
|
|
386
|
+
config={"custom_user_agent": f"mcp-server-motherduck/{SERVER_VERSION}"},
|
|
387
|
+
read_only=self._read_only,
|
|
388
|
+
)
|
|
389
|
+
else:
|
|
390
|
+
conn = self.conn
|
|
391
|
+
|
|
392
|
+
try:
|
|
393
|
+
q = conn.execute(query)
|
|
394
|
+
columns = [d[0] for d in q.description] if q.description else []
|
|
395
|
+
column_types = [str(d[1]) for d in q.description] if q.description else []
|
|
396
|
+
rows = [list(row) for row in q.fetchall()]
|
|
397
|
+
return columns, column_types, rows
|
|
398
|
+
finally:
|
|
399
|
+
if self.conn is None:
|
|
400
|
+
conn.close()
|
|
401
|
+
|
|
402
|
+
def switch_database(self, path: str, read_only: bool = True) -> None:
|
|
403
|
+
"""
|
|
404
|
+
Switch to a different primary database.
|
|
405
|
+
|
|
406
|
+
Closes any existing connection and updates the database path.
|
|
407
|
+
The next query will connect to the new database.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
path: New database path (local file, :memory:, or md:database_name)
|
|
411
|
+
read_only: Whether to connect in read-only mode
|
|
412
|
+
"""
|
|
413
|
+
# Close existing connection if any
|
|
414
|
+
if self.conn is not None:
|
|
415
|
+
try:
|
|
416
|
+
self.conn.close()
|
|
417
|
+
except Exception:
|
|
418
|
+
pass # Ignore close errors
|
|
419
|
+
self.conn = None
|
|
420
|
+
|
|
421
|
+
# Update database configuration
|
|
422
|
+
self.db_path = path
|
|
423
|
+
self._read_only = read_only
|
|
424
|
+
|
|
425
|
+
# Determine new database type
|
|
426
|
+
if path.startswith("md:") or path.startswith("motherduck:"):
|
|
427
|
+
self.db_type = "motherduck"
|
|
428
|
+
elif path.startswith("s3://"):
|
|
429
|
+
self.db_type = "s3"
|
|
430
|
+
elif path == ":memory:":
|
|
431
|
+
self.db_type = "memory"
|
|
432
|
+
else:
|
|
433
|
+
self.db_type = "duckdb"
|
|
434
|
+
|
|
435
|
+
# Re-initialize connection (will be None for read-only local DuckDB)
|
|
436
|
+
self.conn = self._initialize_connection()
|
|
437
|
+
|
|
438
|
+
logger.info(f"Switched to database: {path} (read_only={read_only})")
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Server instructions for DuckDB/MotherDuck MCP Server.
|
|
3
|
+
|
|
4
|
+
These instructions are sent to the client during initialization
|
|
5
|
+
to provide context about how to use the server's capabilities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
INSTRUCTIONS_BASE = """Execute SQL queries against DuckDB and MotherDuck databases using DuckDB SQL syntax.
|
|
9
|
+
|
|
10
|
+
## Available Tools
|
|
11
|
+
|
|
12
|
+
- `execute_query`: Execute SQL queries (DuckDB SQL dialect)
|
|
13
|
+
- `list_databases`: List all available databases
|
|
14
|
+
- `list_tables`: List tables and views in a database
|
|
15
|
+
- `list_columns`: List columns of a table or view
|
|
16
|
+
|
|
17
|
+
## DuckDB SQL Quick Reference
|
|
18
|
+
|
|
19
|
+
**Name Qualification**
|
|
20
|
+
- Format: `database.schema.table` or just `schema.table` or `table`
|
|
21
|
+
- Default schema is `main`: `db.table` = `db.main.table`
|
|
22
|
+
- Use fully qualified names when joining tables across different databases
|
|
23
|
+
|
|
24
|
+
**Identifiers and Literals:**
|
|
25
|
+
- Use double quotes (`"`) for identifiers with spaces/special characters or case-sensitivity
|
|
26
|
+
- Use single quotes (`'`) for string literals
|
|
27
|
+
|
|
28
|
+
**Flexible Query Structure:**
|
|
29
|
+
- Queries can start with `FROM`: `FROM my_table WHERE condition;`
|
|
30
|
+
- `SELECT` without `FROM` for expressions: `SELECT 1 + 1 AS result;`
|
|
31
|
+
- Support for `CREATE TABLE AS` (CTAS): `CREATE TABLE new_table AS SELECT * FROM old_table;`
|
|
32
|
+
|
|
33
|
+
**Advanced Column Selection:**
|
|
34
|
+
- Exclude columns: `SELECT * EXCLUDE (sensitive_data) FROM users;`
|
|
35
|
+
- Replace columns: `SELECT * REPLACE (UPPER(name) AS name) FROM users;`
|
|
36
|
+
- Pattern matching: `SELECT COLUMNS('sales_.*') FROM sales_data;`
|
|
37
|
+
|
|
38
|
+
**Grouping and Ordering Shortcuts:**
|
|
39
|
+
- Group by all non-aggregated columns: `SELECT category, SUM(sales) FROM sales_data GROUP BY ALL;`
|
|
40
|
+
- Order by all columns: `SELECT * FROM my_table ORDER BY ALL;`
|
|
41
|
+
|
|
42
|
+
**Complex Data Types:**
|
|
43
|
+
- Lists: `SELECT [1, 2, 3] AS my_list;`
|
|
44
|
+
- Structs: `SELECT {'a': 1, 'b': 'text'} AS my_struct;`
|
|
45
|
+
- Maps: `SELECT MAP([1,2],['one','two']) AS my_map;`
|
|
46
|
+
- JSON: `json_col->>'key'` (returns text) or `data->'$.user.id'` (returns JSON)
|
|
47
|
+
|
|
48
|
+
**Date/Time Operations:**
|
|
49
|
+
- String to timestamp: `strptime('2023-07-23', '%Y-%m-%d')::TIMESTAMP`
|
|
50
|
+
- Format timestamp: `strftime(NOW(), '%Y-%m-%d')`
|
|
51
|
+
- Extract parts: `EXTRACT(YEAR FROM DATE '2023-07-23')`
|
|
52
|
+
|
|
53
|
+
### Schema Exploration
|
|
54
|
+
|
|
55
|
+
```sql
|
|
56
|
+
-- List all databases
|
|
57
|
+
SELECT database_name, type FROM duckdb_databases();
|
|
58
|
+
|
|
59
|
+
-- For MotherDuck: List all databases (including shared)
|
|
60
|
+
SELECT alias as database_name, type FROM MD_ALL_DATABASES();
|
|
61
|
+
|
|
62
|
+
-- List tables in a database
|
|
63
|
+
SELECT schema_name, table_name FROM duckdb_tables()
|
|
64
|
+
WHERE database_name = 'your_db';
|
|
65
|
+
|
|
66
|
+
-- Get column info
|
|
67
|
+
SELECT column_name, data_type FROM duckdb_columns()
|
|
68
|
+
WHERE database_name = 'your_db' AND table_name = 'your_table';
|
|
69
|
+
|
|
70
|
+
-- Quick preview with statistics
|
|
71
|
+
SUMMARIZE your_table;
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Query Best Practices
|
|
75
|
+
|
|
76
|
+
- Filter early to reduce data volume before blocking operations
|
|
77
|
+
- Use CTEs to break complex queries into manageable parts
|
|
78
|
+
- Avoid unnecessary `ORDER BY` on intermediate results
|
|
79
|
+
- Use `arg_max()` and `arg_min()` for "most recent" queries
|
|
80
|
+
- Use `QUALIFY` for filtering window function results
|
|
81
|
+
|
|
82
|
+
```sql
|
|
83
|
+
-- Get top 2 products by sales in each category
|
|
84
|
+
SELECT category, product_name, sales_amount
|
|
85
|
+
FROM products
|
|
86
|
+
QUALIFY ROW_NUMBER() OVER (PARTITION BY category ORDER BY sales_amount DESC) <= 2;
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Persisting In-Memory Data to File
|
|
90
|
+
|
|
91
|
+
To save an in-memory database to a persistent file:
|
|
92
|
+
|
|
93
|
+
```sql
|
|
94
|
+
-- Attach a new file-based database
|
|
95
|
+
ATTACH '/path/to/my_database.db' AS my_db;
|
|
96
|
+
|
|
97
|
+
-- Copy all data from memory to the file
|
|
98
|
+
COPY FROM DATABASE memory TO my_db;
|
|
99
|
+
|
|
100
|
+
-- Optionally detach when done
|
|
101
|
+
DETACH my_db;
|
|
102
|
+
```
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_instructions(
|
|
107
|
+
read_only: bool = False,
|
|
108
|
+
saas_mode: bool = False,
|
|
109
|
+
db_path: str = ":memory:",
|
|
110
|
+
allow_switch_databases: bool = False,
|
|
111
|
+
) -> str:
|
|
112
|
+
"""
|
|
113
|
+
Get server instructions with connection context.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
read_only: Whether the server is in read-only mode
|
|
117
|
+
saas_mode: Whether MotherDuck is in SaaS mode
|
|
118
|
+
db_path: The database path being used
|
|
119
|
+
allow_switch_databases: Whether database switching is enabled
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Instructions string with context header
|
|
123
|
+
"""
|
|
124
|
+
context_lines = []
|
|
125
|
+
|
|
126
|
+
# Database info
|
|
127
|
+
if db_path == ":memory:":
|
|
128
|
+
context_lines.append("- **Database**: In-memory (data will not persist after session ends)")
|
|
129
|
+
elif db_path.startswith("md:"):
|
|
130
|
+
context_lines.append(f"- **Database**: MotherDuck cloud database (`{db_path}`)")
|
|
131
|
+
elif db_path.startswith("s3://"):
|
|
132
|
+
context_lines.append(
|
|
133
|
+
f"- **Database**: S3-hosted DuckDB file (`{db_path}`) - always read-only"
|
|
134
|
+
)
|
|
135
|
+
else:
|
|
136
|
+
context_lines.append(f"- **Database**: Local DuckDB file (`{db_path}`)")
|
|
137
|
+
|
|
138
|
+
# Access mode
|
|
139
|
+
if read_only:
|
|
140
|
+
context_lines.append(
|
|
141
|
+
"- **Access mode**: Read-only - CREATE, INSERT, UPDATE, DELETE, and DROP operations are disabled"
|
|
142
|
+
)
|
|
143
|
+
else:
|
|
144
|
+
context_lines.append("- **Access mode**: Read-write - all SQL operations are allowed")
|
|
145
|
+
|
|
146
|
+
# Security modes
|
|
147
|
+
if saas_mode:
|
|
148
|
+
context_lines.append(
|
|
149
|
+
"- **SaaS mode**: Enabled - local filesystem access is restricted for security"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Available tools
|
|
153
|
+
tools = ["execute_query", "list_databases", "list_tables", "list_columns"]
|
|
154
|
+
if allow_switch_databases:
|
|
155
|
+
tools.append("switch_database_connection")
|
|
156
|
+
context_lines.append(f"- **Available tools**: {', '.join(tools)}")
|
|
157
|
+
|
|
158
|
+
# Implications for the agent
|
|
159
|
+
context_lines.append("")
|
|
160
|
+
context_lines.append("### Important Implications")
|
|
161
|
+
|
|
162
|
+
if db_path == ":memory:" and not read_only:
|
|
163
|
+
context_lines.append("- Data created in this session will be lost when the session ends")
|
|
164
|
+
context_lines.append(
|
|
165
|
+
"- To persist data, use ATTACH and COPY FROM DATABASE (see 'Persisting In-Memory Data to File' below)"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if read_only:
|
|
169
|
+
context_lines.append(
|
|
170
|
+
"- You can only query existing data; any attempt to modify data will fail"
|
|
171
|
+
)
|
|
172
|
+
context_lines.append("- Use this mode for safe data exploration and analysis")
|
|
173
|
+
|
|
174
|
+
if allow_switch_databases and not read_only:
|
|
175
|
+
context_lines.append(
|
|
176
|
+
"- You can switch to different databases using switch_database_connection"
|
|
177
|
+
)
|
|
178
|
+
context_lines.append(
|
|
179
|
+
"- To create a new database file, use create_if_not_exists=True (only in read-write mode)"
|
|
180
|
+
)
|
|
181
|
+
elif allow_switch_databases and read_only:
|
|
182
|
+
context_lines.append(
|
|
183
|
+
"- You can switch to different existing databases, but cannot create new ones"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
context = "## Server Configuration\n\n" + "\n".join(context_lines) + "\n\n"
|
|
187
|
+
return context + INSTRUCTIONS_BASE
|