capybara-db-mcp 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -16
- package/dist/chunk-POWQBIVO.js +118 -0
- package/dist/index.js +2092 -412
- package/dist/public/assets/index--kkEKYvw.js +142 -0
- package/dist/public/assets/index-BwZw7IYg.css +1 -0
- package/dist/public/index.html +2 -2
- package/dist/{registry-LZG55S65.js → registry-4H4F4NIB.js} +1 -1
- package/package.json +1 -1
- package/dist/chunk-LRURCPHJ.js +0 -2186
- package/dist/public/assets/index-BJ-1UrcV.css +0 -1
- package/dist/public/assets/index-DBYlgGks.js +0 -147
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@ This project is intended for development, sandbox, or formally reviewed environm
|
|
|
11
11
|
|
|
12
12
|
This project is designed to reduce the likelihood of exposing query results to LLMs, but it does not replace enterprise security controls and should not be used to bypass governance processes.
|
|
13
13
|
|
|
14
|
-
**capybara-db-mcp** is a community fork of [DBHub](https://github.com/bytebase/dbhub) by [Bytebase](https://www.bytebase.com/). The key difference: **DBHub sends query results (rows, columns, counts) directly to the LLM**, which can expose sensitive data. capybara-db-mcp is designed to reduce the likelihood of exposing query results to LLMs by writing results to local files, opening them in the editor, and returning status-oriented metadata to the MCP client instead of result sets. It
|
|
14
|
+
**capybara-db-mcp** is a community fork of [DBHub](https://github.com/bytebase/dbhub) by [Bytebase](https://www.bytebase.com/). The key difference: **DBHub sends query results (rows, columns, counts) directly to the LLM**, which can expose sensitive data. capybara-db-mcp is designed to reduce the likelihood of exposing query results to LLMs by writing results to local files, opening them in the editor, and returning status-oriented metadata to the MCP client instead of result sets. It uses connector-level read-only connections (PostgreSQL, SQLite), keeps the same internal names (e.g. `dbhub.toml`) for easy merging from upstream, and adds **default-schema support** for PostgreSQL and multi-database setups.
|
|
15
15
|
|
|
16
16
|
- **Original project:** [github.com/bytebase/dbhub](https://github.com/bytebase/dbhub)
|
|
17
17
|
- **This fork:** [github.com/ajgreyling/capybara-db-mcp](https://github.com/ajgreyling/capybara-db-mcp)
|
|
@@ -34,12 +34,12 @@ git remote set-url origin https://github.com/ajgreyling/capybara-db-mcp.git
|
|
|
34
34
|
|
|
35
35
|
```mermaid
|
|
36
36
|
flowchart LR
|
|
37
|
-
subgraph clients["MCP Clients"]
|
|
37
|
+
subgraph clients["MCP Clients - Supported"]
|
|
38
38
|
A[Claude Desktop]
|
|
39
39
|
B[Claude Code]
|
|
40
40
|
C[Cursor]
|
|
41
|
-
D[
|
|
42
|
-
E[
|
|
41
|
+
D[Codex]
|
|
42
|
+
E[Gemini]
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
subgraph server["MCP Server"]
|
|
@@ -67,19 +67,27 @@ flowchart LR
|
|
|
67
67
|
M --> Ma
|
|
68
68
|
```
|
|
69
69
|
|
|
70
|
+
### Unsupported: VS Code / GitHub Copilot
|
|
71
|
+
|
|
72
|
+
**VS Code and GitHub Copilot are not supported** for capybara-db-mcp for security reasons. There is no project-level ignore file (such as `.cursorignore` or `.aiexclude`) that Copilot consistently reads to exclude `.safe-sql-results/` from AI context. Query result files may therefore be exposed to the LLM when using VS Code/Copilot, undermining the PII isolation design.
|
|
73
|
+
|
|
74
|
+
**Use of capybara-db-mcp in VS Code/Copilot is not recommended.** For PII-safe database workflows, use one of the supported editors that provide ignore mechanisms: **Cursor**, **Claude Code**, **Codex**, or **Gemini**.
|
|
75
|
+
|
|
70
76
|
## Security Model Overview
|
|
71
77
|
|
|
72
78
|
capybara-db-mcp is designed to reduce the likelihood of transmitting query result data to an LLM by isolating result sets to the local filesystem and returning status-oriented metadata to the MCP client.
|
|
73
79
|
|
|
74
80
|
- **1) LLM generates SQL**: The MCP client sends an `execute_sql` request containing SQL text.
|
|
75
|
-
- **2)
|
|
76
|
-
- **3) Query executes against the database**: The
|
|
77
|
-
- **4) Results are written locally**: Result sets are written to `.safe-sql-results/` and opened in the editor (
|
|
81
|
+
- **2) Connector is read-only**: Database connections are opened in read-only mode (PostgreSQL: `default_transaction_read_only`; SQLite: readonly file mode). Write attempts fail at the database level.
|
|
82
|
+
- **3) Query executes against the database**: The query runs using the configured connector.
|
|
83
|
+
- **4) Results are written locally**: Result sets are written to `.safe-sql-results/` and opened in the editor when running in a supported AI editor (Cursor, Claude Code, Codex, Gemini).
|
|
78
84
|
- **5) LLM receives metadata only**: The MCP tool response is formatted to avoid including raw query results in the response payload.
|
|
79
85
|
- **6) Logging remains local**: Operational logs and diagnostic details are written locally.
|
|
80
86
|
|
|
81
87
|
This design reduces the likelihood of transmitting result data to an LLM, but it does not eliminate operational, environment, or governance risks. Database-level controls (RBAC, network segmentation, auditing) and approved operating procedures remain required.
|
|
82
88
|
|
|
89
|
+
For detailed PII safety mechanisms (result isolation, generic errors, log redaction, search_objects names-only, request telemetry redaction, HTTP hardening), see [ARCHITECTURE.md](ARCHITECTURE.md).
|
|
90
|
+
|
|
83
91
|
### Result handling and LLM exposure minimization
|
|
84
92
|
|
|
85
93
|
Query results are written to local files and opened in the editor; the MCP tool response is formatted to return success/failure metadata rather than result sets:
|
|
@@ -115,16 +123,16 @@ flowchart TB
|
|
|
115
123
|
|
|
116
124
|
capybara-db-mcp is a zero-dependency, token-efficient MCP server implementing the Model Context Protocol (MCP). It supports the same features as DBHub, plus a default schema.
|
|
117
125
|
|
|
118
|
-
**Read-only enforcement**:
|
|
126
|
+
**Read-only enforcement**: Database connections are opened in read-only mode (PostgreSQL: `default_transaction_read_only`; SQLite: readonly file mode). UPDATE, DELETE, INSERT, and other write operations fail at the connection level. This reduces the risk of accidental writes but does not replace database-level RBAC or permissions configuration.
|
|
119
127
|
|
|
120
|
-
**Output isolation controls**: By default, query results are written to local files (`.safe-sql-results/`) and opened in the editor; tool responses are formatted to avoid returning result sets. Error
|
|
128
|
+
**Output isolation controls**: By default, query results are written to local files (`.safe-sql-results/`) and opened in the editor when running in a supported client (Cursor, Claude Code, Codex, Gemini); tool responses are formatted to avoid returning result sets. Error responses return generic messages only (e.g. "Execution failed. See server logs for details."); no SQL, parameter values, or database error text are returned. Logs never include SQL or parameter values. These mechanisms are designed to reduce LLM data exposure risk when used appropriately, and do not constitute regulatory compliance or replace enterprise data governance and DLP controls.
|
|
121
129
|
|
|
122
130
|
- **Local Development First**: Zero dependency, token efficient with just two MCP tools to maximize context window
|
|
123
131
|
- **Multi-Database**: PostgreSQL, MySQL, MariaDB, SQL Server, and SQLite through a single interface
|
|
124
132
|
- **Multi-Connection**: Connect to multiple databases simultaneously with TOML configuration
|
|
125
133
|
- **Default schema**: Use `--schema` (or TOML `schema = "..."`) so PostgreSQL uses that schema for `execute_sql` and `search_objects` is restricted to it (see below)
|
|
126
|
-
- **Guardrails**:
|
|
127
|
-
- **Designed to reduce LLM data exposure**: Results are written to `.safe-sql-results/` and opened in
|
|
134
|
+
- **Guardrails**: Connector-level read-only connections, row limiting, and a 60-second query timeout default (overridable per source via `query_timeout` in `dbhub.toml`) to reduce runaway operations
|
|
135
|
+
- **Designed to reduce LLM data exposure**: Results are written to `.safe-sql-results/` and opened only in supported editors (Cursor, Claude Code, Codex, Gemini); tool responses return only success/failure metadata (no file path, row data, row counts, or column names). Error responses use generic messages only; no SQL, parameter values, or database error text reach the client. Logs are redacted to avoid SQL and parameter values.
|
|
128
136
|
- **Secure Access**: SSH tunneling and SSL/TLS encryption
|
|
129
137
|
|
|
130
138
|
## Why Capybara?
|
|
@@ -138,8 +146,7 @@ PostgreSQL, MySQL, SQL Server, MariaDB, and SQLite.
|
|
|
138
146
|
## MCP Tools
|
|
139
147
|
|
|
140
148
|
- **[execute_sql](https://dbhub.ai/tools/execute-sql)**: Execute SQL queries with transaction support and safety controls
|
|
141
|
-
- **[search_objects](https://dbhub.ai/tools/search-objects)**: Search and explore database schemas, tables, columns, indexes, and procedures
|
|
142
|
-
- **[Custom Tools](https://dbhub.ai/tools/custom-tools)**: Define reusable, parameterized SQL operations in your `dbhub.toml` configuration file
|
|
149
|
+
- **[search_objects](https://dbhub.ai/tools/search-objects)**: Search and explore database schemas, tables, columns, indexes, and procedures (names only; summary/full metadata disabled for PII safety)
|
|
143
150
|
|
|
144
151
|
## Default schema (`--schema`)
|
|
145
152
|
|
|
@@ -186,13 +193,13 @@ Full DBHub docs (including TOML and command-line options) apply; see [dbhub.ai](
|
|
|
186
193
|
|
|
187
194
|
### Output isolation (designed to reduce LLM exposure)
|
|
188
195
|
|
|
189
|
-
By default, `execute_sql`
|
|
196
|
+
By default, `execute_sql` writes query results to `.safe-sql-results/` in your project directory and opens them in the editor when running in a supported AI editor (Cursor, Claude Code, Codex, Gemini). The MCP tool response sent back to the MCP client is formatted to return success/failure metadata rather than result sets. This reduces the likelihood of transmitting result data to an LLM, but it does not eliminate data handling risk and does not by itself satisfy regulatory or compliance requirements.
|
|
190
197
|
|
|
191
|
-
To reduce exfiltration risk via dynamic SQL (e.g. `SELECT secret AS "password_is_hunter2"`), tool responses are formatted to avoid including file paths, row data, row counts, or column names. Error responses
|
|
198
|
+
To reduce exfiltration risk via dynamic SQL (e.g. `SELECT secret AS "password_is_hunter2"`), tool responses are formatted to avoid including file paths, row data, row counts, or column names. Error responses return generic messages only (e.g. "Execution failed. See server logs for details."); no SQL, parameter values, or database error text are returned. Logs never include SQL or parameter values.
|
|
192
199
|
|
|
193
200
|
### Read-only enforcement
|
|
194
201
|
|
|
195
|
-
|
|
202
|
+
Database connections are opened in read-only mode (PostgreSQL: `default_transaction_read_only`; SQLite: readonly file mode). UPDATE, DELETE, INSERT, and other write operations fail at the connection level. This is a guardrail and does not substitute for database-level RBAC, permissions, or audit controls.
|
|
196
203
|
|
|
197
204
|
## Workbench
|
|
198
205
|
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
// src/tools/builtin-tools.ts
|
|
2
|
+
var BUILTIN_TOOL_EXECUTE_SQL = "execute_sql";
|
|
3
|
+
var BUILTIN_TOOL_SEARCH_OBJECTS = "search_objects";
|
|
4
|
+
var BUILTIN_TOOLS = [
|
|
5
|
+
BUILTIN_TOOL_EXECUTE_SQL,
|
|
6
|
+
BUILTIN_TOOL_SEARCH_OBJECTS
|
|
7
|
+
];
|
|
8
|
+
|
|
9
|
+
// src/tools/registry.ts
|
|
10
|
+
var ToolRegistry = class {
|
|
11
|
+
constructor(config) {
|
|
12
|
+
this.toolsBySource = this.buildRegistry(config);
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Check if a tool name is a built-in tool
|
|
16
|
+
*/
|
|
17
|
+
isBuiltinTool(toolName) {
|
|
18
|
+
return BUILTIN_TOOLS.includes(toolName);
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Build the internal registry mapping sources to their enabled tools
|
|
22
|
+
*/
|
|
23
|
+
buildRegistry(config) {
|
|
24
|
+
const registry = /* @__PURE__ */ new Map();
|
|
25
|
+
for (const tool of config.tools || []) {
|
|
26
|
+
if (!this.isBuiltinTool(tool.name)) {
|
|
27
|
+
throw new Error(
|
|
28
|
+
`Unknown tool '${tool.name}'. Valid tools: ${BUILTIN_TOOLS.join(", ")}. Custom tools are not supported.`
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
const existing = registry.get(tool.source) || [];
|
|
32
|
+
existing.push(tool);
|
|
33
|
+
registry.set(tool.source, existing);
|
|
34
|
+
}
|
|
35
|
+
for (const source of config.sources) {
|
|
36
|
+
if (!registry.has(source.id)) {
|
|
37
|
+
const defaultTools = BUILTIN_TOOLS.map((name) => {
|
|
38
|
+
if (name === "execute_sql") {
|
|
39
|
+
return { name: "execute_sql", source: source.id };
|
|
40
|
+
} else {
|
|
41
|
+
return { name: "search_objects", source: source.id };
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
registry.set(source.id, defaultTools);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return registry;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Get all enabled tool configs for a specific source
|
|
51
|
+
*/
|
|
52
|
+
getEnabledToolConfigs(sourceId) {
|
|
53
|
+
return this.toolsBySource.get(sourceId) || [];
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Get built-in tool configuration for a specific source
|
|
57
|
+
* Returns undefined if tool is not enabled or not a built-in
|
|
58
|
+
*/
|
|
59
|
+
getBuiltinToolConfig(toolName, sourceId) {
|
|
60
|
+
if (!this.isBuiltinTool(toolName)) {
|
|
61
|
+
return void 0;
|
|
62
|
+
}
|
|
63
|
+
const tools = this.getEnabledToolConfigs(sourceId);
|
|
64
|
+
return tools.find((t) => t.name === toolName);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Get all unique tools across all sources (for tools/list response)
|
|
68
|
+
* Returns the union of all enabled tools
|
|
69
|
+
*/
|
|
70
|
+
getAllTools() {
|
|
71
|
+
const seen = /* @__PURE__ */ new Set();
|
|
72
|
+
const result = [];
|
|
73
|
+
for (const tools of this.toolsBySource.values()) {
|
|
74
|
+
for (const tool of tools) {
|
|
75
|
+
if (!seen.has(tool.name)) {
|
|
76
|
+
seen.add(tool.name);
|
|
77
|
+
result.push(tool);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return result;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Get all built-in tool names that are enabled across any source
|
|
85
|
+
*/
|
|
86
|
+
getEnabledBuiltinToolNames() {
|
|
87
|
+
const enabledBuiltins = /* @__PURE__ */ new Set();
|
|
88
|
+
for (const tools of this.toolsBySource.values()) {
|
|
89
|
+
for (const tool of tools) {
|
|
90
|
+
if (this.isBuiltinTool(tool.name)) {
|
|
91
|
+
enabledBuiltins.add(tool.name);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return Array.from(enabledBuiltins);
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
var globalRegistry = null;
|
|
99
|
+
function initializeToolRegistry(config) {
|
|
100
|
+
globalRegistry = new ToolRegistry(config);
|
|
101
|
+
}
|
|
102
|
+
function getToolRegistry() {
|
|
103
|
+
if (!globalRegistry) {
|
|
104
|
+
throw new Error(
|
|
105
|
+
"Tool registry not initialized. Call initializeToolRegistry first."
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
return globalRegistry;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export {
|
|
112
|
+
BUILTIN_TOOL_EXECUTE_SQL,
|
|
113
|
+
BUILTIN_TOOL_SEARCH_OBJECTS,
|
|
114
|
+
BUILTIN_TOOLS,
|
|
115
|
+
ToolRegistry,
|
|
116
|
+
initializeToolRegistry,
|
|
117
|
+
getToolRegistry
|
|
118
|
+
};
|