woolsocks-bigquery-mcp 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -0
- package/cache.js +119 -0
- package/config.js +43 -0
- package/index.js +146 -0
- package/package.json +33 -0
- package/proxy-client.js +303 -0
- package/secrets.js +85 -0
- package/tools/index.js +15 -0
- package/tools/query/execute-query.js +115 -0
- package/tools/query/get-status.js +71 -0
- package/tools/query/index.js +11 -0
- package/tools/query/validate-query.js +83 -0
- package/tools/schema/get-table-schema.js +102 -0
- package/tools/schema/index.js +13 -0
- package/tools/schema/list-datasets.js +69 -0
- package/tools/schema/list-tables.js +72 -0
- package/tools/schema/preview-table.js +65 -0
package/README.md
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# BigQuery MCP Server
|
|
2
|
+
|
|
3
|
+
MCP server for BigQuery schema exploration and query execution via the woolsocks-ai-proxy.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
Claude Code → BigQuery MCP (local) → woolsocks-ai-proxy (Cloud Run) → BigQuery
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
### Why Local MCP + Cloud Proxy?
|
|
12
|
+
|
|
13
|
+
This setup deliberately splits responsibilities:
|
|
14
|
+
|
|
15
|
+
| Component | Location | Responsibility |
|
|
16
|
+
|-----------|----------|----------------|
|
|
17
|
+
| **BigQuery MCP** | Local (your Mac) | Caching, retry logic, MCP protocol |
|
|
18
|
+
| **woolsocks-ai-proxy** | Cloud Run | Cost limits, auth, audit logging, BigQuery access |
|
|
19
|
+
|
|
20
|
+
**Why not put everything in the proxy (like Zendesk)?**
|
|
21
|
+
|
|
22
|
+
BigQuery is different from Zendesk/Sentry because:
|
|
23
|
+
|
|
24
|
+
1. **Interactive exploration** - You browse datasets → tables → schemas → write query. Local caching makes this fluid without network latency.
|
|
25
|
+
2. **Schema stability** - Table schemas rarely change, so 15-min cache is very effective.
|
|
26
|
+
3. **Cost sensitivity** - Every BigQuery metadata call costs money. Local caching reduces API calls significantly.
|
|
27
|
+
4. **MCP tool semantics** - Structured tools (`list-datasets`, `get-schema`) that Claude can chain intelligently.
|
|
28
|
+
|
|
29
|
+
For comparison, Zendesk uses a skill → proxy pattern because ticket data changes constantly (no caching benefit) and queries are more "fetch and analyze" rather than iterative exploration.
|
|
30
|
+
|
|
31
|
+
## Tools (7)
|
|
32
|
+
|
|
33
|
+
| Tool | Description |
|
|
34
|
+
|------|-------------|
|
|
35
|
+
| `bigquery__list-datasets` | List all accessible datasets (cached 5 min) |
|
|
36
|
+
| `bigquery__list-tables` | List tables in dataset with sizes (cached 5 min) |
|
|
37
|
+
| `bigquery__get-table-schema` | Get columns, types, partitioning (cached 15 min) |
|
|
38
|
+
| `bigquery__preview-table` | Sample up to 100 rows (NOT cached) |
|
|
39
|
+
| `bigquery__execute-query` | Run validated SQL query (max 10K rows) |
|
|
40
|
+
| `bigquery__validate-query` | Dry-run cost estimation |
|
|
41
|
+
| `bigquery__get-status` | Check usage limits |
|
|
42
|
+
|
|
43
|
+
## Authentication
|
|
44
|
+
|
|
45
|
+
API key is loaded from **Google Secret Manager** at startup:
|
|
46
|
+
- Secret: `ai-proxy-api-keys` in project `woolsocks-marketing-ai`
|
|
47
|
+
- Uses Application Default Credentials (your `gcloud auth`)
|
|
48
|
+
- Fallback: `BIGQUERY_API_KEY` env var for local dev
|
|
49
|
+
|
|
50
|
+
## Cost Limits (enforced by proxy)
|
|
51
|
+
|
|
52
|
+
| Limit | Value |
|
|
53
|
+
|-------|-------|
|
|
54
|
+
| Per query | 10 GB |
|
|
55
|
+
| Daily | 50 GB |
|
|
56
|
+
| Monthly | €20 |
|
|
57
|
+
|
|
58
|
+
## Development
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Test locally
|
|
62
|
+
npm run inspect # Opens MCP Inspector
|
|
63
|
+
|
|
64
|
+
# Check syntax
|
|
65
|
+
node --check index.js
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Files
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
bigquery-mcp/
|
|
72
|
+
├── index.js # MCP server entry point
|
|
73
|
+
├── config.js # Proxy URL, timeouts, cache TTLs
|
|
74
|
+
├── secrets.js # Secret Manager integration
|
|
75
|
+
├── proxy-client.js # HTTP client with retry/circuit breaker
|
|
76
|
+
├── cache.js # In-memory schema cache
|
|
77
|
+
└── tools/
|
|
78
|
+
├── schema/ # 4 schema discovery tools
|
|
79
|
+
└── query/ # 3 query execution tools
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Related
|
|
83
|
+
|
|
84
|
+
- Proxy: `~/projects/woolsocks-ai-proxy` (schema endpoints in `src/bigquery-schema.js`)
|
|
85
|
+
- Config: `~/.mcp.json` (bigquery server entry)
|
|
86
|
+
- Documentation: `~/.claude/context/claude-code-setup.md`
|
package/cache.js
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
// Simple in-memory cache with TTL
|
|
2
|
+
|
|
3
|
+
import { CACHE_TTLS } from './config.js';
|
|
4
|
+
|
|
5
|
+
class CacheEntry {
|
|
6
|
+
constructor(value, ttlMs) {
|
|
7
|
+
this.value = value;
|
|
8
|
+
this.expiresAt = Date.now() + ttlMs;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
isExpired() {
|
|
12
|
+
return Date.now() > this.expiresAt;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
class SchemaCache {
|
|
17
|
+
constructor() {
|
|
18
|
+
this.cache = new Map();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Get a cached value by key
|
|
23
|
+
* @returns {any|null} The cached value or null if not found/expired
|
|
24
|
+
*/
|
|
25
|
+
get(key) {
|
|
26
|
+
const entry = this.cache.get(key);
|
|
27
|
+
if (!entry) return null;
|
|
28
|
+
|
|
29
|
+
if (entry.isExpired()) {
|
|
30
|
+
this.cache.delete(key);
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return entry.value;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Set a value in cache with TTL
|
|
39
|
+
*/
|
|
40
|
+
set(key, value, ttlMs) {
|
|
41
|
+
this.cache.set(key, new CacheEntry(value, ttlMs));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Invalidate entries matching a pattern
|
|
46
|
+
* @param {string} pattern - Prefix to match (e.g., 'tables:' invalidates all table entries)
|
|
47
|
+
*/
|
|
48
|
+
invalidate(pattern) {
|
|
49
|
+
for (const key of this.cache.keys()) {
|
|
50
|
+
if (key.startsWith(pattern)) {
|
|
51
|
+
this.cache.delete(key);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Clear the entire cache
|
|
58
|
+
*/
|
|
59
|
+
clear() {
|
|
60
|
+
this.cache.clear();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Get cache stats
|
|
65
|
+
*/
|
|
66
|
+
getStats() {
|
|
67
|
+
let validEntries = 0;
|
|
68
|
+
let expiredEntries = 0;
|
|
69
|
+
|
|
70
|
+
for (const entry of this.cache.values()) {
|
|
71
|
+
if (entry.isExpired()) {
|
|
72
|
+
expiredEntries++;
|
|
73
|
+
} else {
|
|
74
|
+
validEntries++;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
totalEntries: this.cache.size,
|
|
80
|
+
validEntries,
|
|
81
|
+
expiredEntries,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Singleton instance
|
|
87
|
+
export const schemaCache = new SchemaCache();
|
|
88
|
+
|
|
89
|
+
// Helper functions for specific cache types
|
|
90
|
+
export function getCachedDatasets() {
|
|
91
|
+
return schemaCache.get('datasets');
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export function setCachedDatasets(datasets) {
|
|
95
|
+
schemaCache.set('datasets', datasets, CACHE_TTLS.datasets);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export function getCachedTables(datasetId) {
|
|
99
|
+
return schemaCache.get(`tables:${datasetId}`);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export function setCachedTables(datasetId, tables) {
|
|
103
|
+
schemaCache.set(`tables:${datasetId}`, tables, CACHE_TTLS.tables);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function getCachedSchema(datasetId, tableId) {
|
|
107
|
+
return schemaCache.get(`schema:${datasetId}.${tableId}`);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export function setCachedSchema(datasetId, tableId, schema) {
|
|
111
|
+
schemaCache.set(`schema:${datasetId}.${tableId}`, schema, CACHE_TTLS.schema);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export function invalidateDatasetCache(datasetId) {
|
|
115
|
+
schemaCache.invalidate(`tables:${datasetId}`);
|
|
116
|
+
schemaCache.invalidate(`schema:${datasetId}.`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export default schemaCache;
|
package/config.js
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
// BigQuery MCP Configuration
|
|
2
|
+
|
|
3
|
+
import { getAuthTokens } from './secrets.js';
|
|
4
|
+
|
|
5
|
+
// Proxy URL - Cloud Run deployment
|
|
6
|
+
export const PROXY_URL = process.env.BIGQUERY_PROXY_URL ||
|
|
7
|
+
'https://woolsocks-ai-proxy-1009713156898.europe-west1.run.app';
|
|
8
|
+
|
|
9
|
+
// Auth tokens for the current gcloud user (cached 45 min)
|
|
10
|
+
// Use getAuthTokens() to retrieve { identityToken, accessToken }
|
|
11
|
+
export { getAuthTokens };
|
|
12
|
+
|
|
13
|
+
// Request configuration
|
|
14
|
+
export const REQUEST_TIMEOUT_MS = 30000; // 30 seconds
|
|
15
|
+
export const RATE_LIMIT_MS = 200; // Min time between requests
|
|
16
|
+
|
|
17
|
+
// Retry configuration
|
|
18
|
+
export const RETRY_CONFIG = {
|
|
19
|
+
maxAttempts: 3,
|
|
20
|
+
baseDelayMs: 1000,
|
|
21
|
+
maxDelayMs: 10000,
|
|
22
|
+
retryableStatuses: [408, 429, 500, 502, 503, 504]
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
// Circuit breaker configuration
|
|
26
|
+
export const CIRCUIT_BREAKER_CONFIG = {
|
|
27
|
+
failureThreshold: 5, // Open after 5 consecutive failures
|
|
28
|
+
cooldownMs: 30000, // 30s before trying again
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
// Cache TTLs (in milliseconds)
|
|
32
|
+
export const CACHE_TTLS = {
|
|
33
|
+
datasets: 5 * 60 * 1000, // 5 min
|
|
34
|
+
tables: 5 * 60 * 1000, // 5 min
|
|
35
|
+
schema: 15 * 60 * 1000, // 15 min
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
// Result limits
|
|
39
|
+
// Kept low intentionally — colleagues new to BigQuery should use LIMIT in their SQL
|
|
40
|
+
// rather than relying on the MCP to truncate. This also keeps response sizes manageable.
|
|
41
|
+
export const MAX_PREVIEW_ROWS = 50;
|
|
42
|
+
export const MAX_QUERY_ROWS = 1000;
|
|
43
|
+
export const DEFAULT_PREVIEW_ROWS = 5;
|
package/index.js
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// BigQuery MCP Server
|
|
4
|
+
// Provides BigQuery access via woolsocks-ai-proxy for Claude Code
|
|
5
|
+
|
|
6
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
7
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
8
|
+
import {
|
|
9
|
+
CallToolRequestSchema,
|
|
10
|
+
ListToolsRequestSchema
|
|
11
|
+
} from '@modelcontextprotocol/sdk/types.js';
|
|
12
|
+
|
|
13
|
+
import { TOOLS, TOOL_MAP } from './tools/index.js';
|
|
14
|
+
import { getClient } from './proxy-client.js';
|
|
15
|
+
|
|
16
|
+
// Server info
|
|
17
|
+
const SERVER_INFO = {
|
|
18
|
+
name: 'bigquery-mcp',
|
|
19
|
+
version: '1.0.0',
|
|
20
|
+
description: 'MCP server for BigQuery - schema exploration and query execution via woolsocks-ai-proxy'
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
// Create server instance
|
|
24
|
+
const server = new Server(SERVER_INFO, {
|
|
25
|
+
capabilities: {
|
|
26
|
+
tools: {}
|
|
27
|
+
}
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
// Handle tool listing
|
|
31
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
32
|
+
return {
|
|
33
|
+
tools: TOOLS.map(tool => ({
|
|
34
|
+
name: tool.name,
|
|
35
|
+
description: tool.description,
|
|
36
|
+
inputSchema: tool.inputSchema
|
|
37
|
+
}))
|
|
38
|
+
};
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
// Handle tool execution
|
|
42
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
43
|
+
const { name, arguments: args } = request.params;
|
|
44
|
+
|
|
45
|
+
const tool = TOOL_MAP.get(name);
|
|
46
|
+
|
|
47
|
+
if (!tool) {
|
|
48
|
+
return {
|
|
49
|
+
content: [{
|
|
50
|
+
type: 'text',
|
|
51
|
+
text: JSON.stringify({
|
|
52
|
+
error: `Unknown tool: ${name}`,
|
|
53
|
+
availableTools: TOOLS.map(t => t.name)
|
|
54
|
+
}, null, 2)
|
|
55
|
+
}],
|
|
56
|
+
isError: true
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
return await tool.handler(args || {});
|
|
62
|
+
} catch (error) {
|
|
63
|
+
// Handle proxy API errors
|
|
64
|
+
if (error.status !== undefined) {
|
|
65
|
+
let message = error.message;
|
|
66
|
+
let hint = null;
|
|
67
|
+
|
|
68
|
+
// Provide helpful hints for common errors
|
|
69
|
+
switch (error.status) {
|
|
70
|
+
case 401:
|
|
71
|
+
message = 'Authentication failed.';
|
|
72
|
+
hint = 'Check that BIGQUERY_API_KEY is set correctly in ~/.zshrc';
|
|
73
|
+
break;
|
|
74
|
+
case 402:
|
|
75
|
+
hint = 'Query was blocked by cost controls. Try a smaller query or add date filters.';
|
|
76
|
+
break;
|
|
77
|
+
case 403:
|
|
78
|
+
hint = 'Access denied. Only EU-region tables are accessible.';
|
|
79
|
+
break;
|
|
80
|
+
case 404:
|
|
81
|
+
message = 'Resource not found.';
|
|
82
|
+
hint = 'Check dataset/table names are correct.';
|
|
83
|
+
break;
|
|
84
|
+
case 408:
|
|
85
|
+
message = 'Request timeout.';
|
|
86
|
+
hint = 'The query may be too complex. Try simplifying or adding LIMIT.';
|
|
87
|
+
break;
|
|
88
|
+
case 429:
|
|
89
|
+
message = 'Rate limited.';
|
|
90
|
+
hint = 'Please wait a moment and try again.';
|
|
91
|
+
break;
|
|
92
|
+
case 503:
|
|
93
|
+
hint = 'Proxy may be unavailable. Circuit breaker is active.';
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
content: [{
|
|
99
|
+
type: 'text',
|
|
100
|
+
text: JSON.stringify({
|
|
101
|
+
error: message,
|
|
102
|
+
hint,
|
|
103
|
+
status: error.status,
|
|
104
|
+
endpoint: error.endpoint
|
|
105
|
+
}, null, 2)
|
|
106
|
+
}],
|
|
107
|
+
isError: true
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Handle other errors
|
|
112
|
+
return {
|
|
113
|
+
content: [{
|
|
114
|
+
type: 'text',
|
|
115
|
+
text: JSON.stringify({
|
|
116
|
+
error: error.message,
|
|
117
|
+
hint: 'Check that BIGQUERY_API_KEY is set in ~/.zshrc and the proxy is accessible.'
|
|
118
|
+
}, null, 2)
|
|
119
|
+
}],
|
|
120
|
+
isError: true
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// Start server
|
|
126
|
+
async function main() {
|
|
127
|
+
// Verify API key is set before starting
|
|
128
|
+
try {
|
|
129
|
+
getClient();
|
|
130
|
+
} catch (error) {
|
|
131
|
+
console.error(`[BigQuery MCP] Startup error: ${error.message}`);
|
|
132
|
+
console.error('[BigQuery MCP] Ensure BIGQUERY_API_KEY is exported in ~/.zshrc');
|
|
133
|
+
process.exit(1);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const transport = new StdioServerTransport();
|
|
137
|
+
await server.connect(transport);
|
|
138
|
+
|
|
139
|
+
// Log to stderr (stdout is reserved for MCP protocol)
|
|
140
|
+
console.error(`[BigQuery MCP] Server started (${TOOLS.length} tools available)`);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
main().catch((error) => {
|
|
144
|
+
console.error('[BigQuery MCP] Failed to start:', error);
|
|
145
|
+
process.exit(1);
|
|
146
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "woolsocks-bigquery-mcp",
|
|
3
|
+
"version": "2.0.0",
|
|
4
|
+
"description": "Query Woolsocks BigQuery data with built-in cost guardrails and a complete data catalog covering all datasets, tables, and conventions.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"bin": {
|
|
8
|
+
"woolsocks-bigquery-mcp": "index.js"
|
|
9
|
+
},
|
|
10
|
+
"publishConfig": {
|
|
11
|
+
"access": "public"
|
|
12
|
+
},
|
|
13
|
+
"scripts": {
|
|
14
|
+
"start": "node index.js",
|
|
15
|
+
"inspect": "npx @modelcontextprotocol/inspector node index.js"
|
|
16
|
+
},
|
|
17
|
+
"dependencies": {
|
|
18
|
+
"@modelcontextprotocol/sdk": "^1.1.0"
|
|
19
|
+
},
|
|
20
|
+
"devDependencies": {
|
|
21
|
+
"@modelcontextprotocol/inspector": "^0.10.2"
|
|
22
|
+
},
|
|
23
|
+
"author": {
|
|
24
|
+
"name": "Woolsocks Product Team"
|
|
25
|
+
},
|
|
26
|
+
"license": "MIT",
|
|
27
|
+
"keywords": [
|
|
28
|
+
"mcp",
|
|
29
|
+
"bigquery",
|
|
30
|
+
"woolsocks",
|
|
31
|
+
"data-analysis"
|
|
32
|
+
]
|
|
33
|
+
}
|