woolsocks-bigquery-mcp 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,86 @@
1
+ # BigQuery MCP Server
2
+
3
+ MCP server for BigQuery schema exploration and query execution via the woolsocks-ai-proxy.
4
+
5
+ ## Architecture
6
+
7
+ ```
8
+ Claude Code → BigQuery MCP (local) → woolsocks-ai-proxy (Cloud Run) → BigQuery
9
+ ```
10
+
11
+ ### Why Local MCP + Cloud Proxy?
12
+
13
+ This setup deliberately splits responsibilities:
14
+
15
+ | Component | Location | Responsibility |
16
+ |-----------|----------|----------------|
17
+ | **BigQuery MCP** | Local (your Mac) | Caching, retry logic, MCP protocol |
18
+ | **woolsocks-ai-proxy** | Cloud Run | Cost limits, auth, audit logging, BigQuery access |
19
+
20
+ **Why not put everything in the proxy (like Zendesk)?**
21
+
22
+ BigQuery is different from Zendesk/Sentry because:
23
+
24
+ 1. **Interactive exploration** - You browse datasets → tables → schemas → write query. Local caching makes this fluid without network latency.
25
+ 2. **Schema stability** - Table schemas rarely change, so 15-min cache is very effective.
26
+ 3. **Cost sensitivity** - Every BigQuery metadata call costs money. Local caching reduces API calls significantly.
27
+ 4. **MCP tool semantics** - Structured tools (`list-datasets`, `get-schema`) that Claude can chain intelligently.
28
+
29
+ For comparison, Zendesk uses a skill → proxy pattern because ticket data changes constantly (no caching benefit) and queries are more "fetch and analyze" rather than iterative exploration.
30
+
31
+ ## Tools (7)
32
+
33
+ | Tool | Description |
34
+ |------|-------------|
35
+ | `bigquery__list-datasets` | List all accessible datasets (cached 5 min) |
36
+ | `bigquery__list-tables` | List tables in dataset with sizes (cached 5 min) |
37
+ | `bigquery__get-table-schema` | Get columns, types, partitioning (cached 15 min) |
38
+ | `bigquery__preview-table` | Sample up to 100 rows (NOT cached) |
39
+ | `bigquery__execute-query` | Run validated SQL query (max 10K rows) |
40
+ | `bigquery__validate-query` | Dry-run cost estimation |
41
+ | `bigquery__get-status` | Check usage limits |
42
+
43
+ ## Authentication
44
+
45
+ API key is loaded from **Google Secret Manager** at startup:
46
+ - Secret: `ai-proxy-api-keys` in project `woolsocks-marketing-ai`
47
+ - Uses Application Default Credentials (your `gcloud auth`)
48
+ - Fallback: `BIGQUERY_API_KEY` env var for local dev
49
+
50
+ ## Cost Limits (enforced by proxy)
51
+
52
+ | Limit | Value |
53
+ |-------|-------|
54
+ | Per query | 10 GB |
55
+ | Daily | 50 GB |
56
+ | Monthly | €20 |
57
+
58
+ ## Development
59
+
60
+ ```bash
61
+ # Test locally
62
+ npm run inspect # Opens MCP Inspector
63
+
64
+ # Check syntax
65
+ node --check index.js
66
+ ```
67
+
68
+ ## Files
69
+
70
+ ```
71
+ bigquery-mcp/
72
+ ├── index.js # MCP server entry point
73
+ ├── config.js # Proxy URL, timeouts, cache TTLs
74
+ ├── secrets.js # Secret Manager integration
75
+ ├── proxy-client.js # HTTP client with retry/circuit breaker
76
+ ├── cache.js # In-memory schema cache
77
+ └── tools/
78
+ ├── schema/ # 4 schema discovery tools
79
+ └── query/ # 3 query execution tools
80
+ ```
81
+
82
+ ## Related
83
+
84
+ - Proxy: `~/projects/woolsocks-ai-proxy` (schema endpoints in `src/bigquery-schema.js`)
85
+ - Config: `~/.mcp.json` (bigquery server entry)
86
+ - Documentation: `~/.claude/context/claude-code-setup.md`
package/cache.js ADDED
@@ -0,0 +1,119 @@
1
+ // Simple in-memory cache with TTL
2
+
3
+ import { CACHE_TTLS } from './config.js';
4
+
5
+ class CacheEntry {
6
+ constructor(value, ttlMs) {
7
+ this.value = value;
8
+ this.expiresAt = Date.now() + ttlMs;
9
+ }
10
+
11
+ isExpired() {
12
+ return Date.now() > this.expiresAt;
13
+ }
14
+ }
15
+
16
+ class SchemaCache {
17
+ constructor() {
18
+ this.cache = new Map();
19
+ }
20
+
21
+ /**
22
+ * Get a cached value by key
23
+ * @returns {any|null} The cached value or null if not found/expired
24
+ */
25
+ get(key) {
26
+ const entry = this.cache.get(key);
27
+ if (!entry) return null;
28
+
29
+ if (entry.isExpired()) {
30
+ this.cache.delete(key);
31
+ return null;
32
+ }
33
+
34
+ return entry.value;
35
+ }
36
+
37
+ /**
38
+ * Set a value in cache with TTL
39
+ */
40
+ set(key, value, ttlMs) {
41
+ this.cache.set(key, new CacheEntry(value, ttlMs));
42
+ }
43
+
44
+ /**
45
+ * Invalidate entries matching a pattern
46
+ * @param {string} pattern - Prefix to match (e.g., 'tables:' invalidates all table entries)
47
+ */
48
+ invalidate(pattern) {
49
+ for (const key of this.cache.keys()) {
50
+ if (key.startsWith(pattern)) {
51
+ this.cache.delete(key);
52
+ }
53
+ }
54
+ }
55
+
56
+ /**
57
+ * Clear the entire cache
58
+ */
59
+ clear() {
60
+ this.cache.clear();
61
+ }
62
+
63
+ /**
64
+ * Get cache stats
65
+ */
66
+ getStats() {
67
+ let validEntries = 0;
68
+ let expiredEntries = 0;
69
+
70
+ for (const entry of this.cache.values()) {
71
+ if (entry.isExpired()) {
72
+ expiredEntries++;
73
+ } else {
74
+ validEntries++;
75
+ }
76
+ }
77
+
78
+ return {
79
+ totalEntries: this.cache.size,
80
+ validEntries,
81
+ expiredEntries,
82
+ };
83
+ }
84
+ }
85
+
86
+ // Singleton instance
87
+ export const schemaCache = new SchemaCache();
88
+
89
+ // Helper functions for specific cache types
90
+ export function getCachedDatasets() {
91
+ return schemaCache.get('datasets');
92
+ }
93
+
94
+ export function setCachedDatasets(datasets) {
95
+ schemaCache.set('datasets', datasets, CACHE_TTLS.datasets);
96
+ }
97
+
98
+ export function getCachedTables(datasetId) {
99
+ return schemaCache.get(`tables:${datasetId}`);
100
+ }
101
+
102
+ export function setCachedTables(datasetId, tables) {
103
+ schemaCache.set(`tables:${datasetId}`, tables, CACHE_TTLS.tables);
104
+ }
105
+
106
+ export function getCachedSchema(datasetId, tableId) {
107
+ return schemaCache.get(`schema:${datasetId}.${tableId}`);
108
+ }
109
+
110
+ export function setCachedSchema(datasetId, tableId, schema) {
111
+ schemaCache.set(`schema:${datasetId}.${tableId}`, schema, CACHE_TTLS.schema);
112
+ }
113
+
114
+ export function invalidateDatasetCache(datasetId) {
115
+ schemaCache.invalidate(`tables:${datasetId}`);
116
+ schemaCache.invalidate(`schema:${datasetId}.`);
117
+ }
118
+
119
+ export default schemaCache;
package/config.js ADDED
@@ -0,0 +1,43 @@
1
+ // BigQuery MCP Configuration
2
+
3
+ import { getAuthTokens } from './secrets.js';
4
+
5
+ // Proxy URL - Cloud Run deployment
6
+ export const PROXY_URL = process.env.BIGQUERY_PROXY_URL ||
7
+ 'https://woolsocks-ai-proxy-1009713156898.europe-west1.run.app';
8
+
9
+ // Auth tokens for the current gcloud user (cached 45 min)
10
+ // Use getAuthTokens() to retrieve { identityToken, accessToken }
11
+ export { getAuthTokens };
12
+
13
+ // Request configuration
14
+ export const REQUEST_TIMEOUT_MS = 30000; // 30 seconds
15
+ export const RATE_LIMIT_MS = 200; // Min time between requests
16
+
17
+ // Retry configuration
18
+ export const RETRY_CONFIG = {
19
+ maxAttempts: 3,
20
+ baseDelayMs: 1000,
21
+ maxDelayMs: 10000,
22
+ retryableStatuses: [408, 429, 500, 502, 503, 504]
23
+ };
24
+
25
+ // Circuit breaker configuration
26
+ export const CIRCUIT_BREAKER_CONFIG = {
27
+ failureThreshold: 5, // Open after 5 consecutive failures
28
+ cooldownMs: 30000, // 30s before trying again
29
+ };
30
+
31
+ // Cache TTLs (in milliseconds)
32
+ export const CACHE_TTLS = {
33
+ datasets: 5 * 60 * 1000, // 5 min
34
+ tables: 5 * 60 * 1000, // 5 min
35
+ schema: 15 * 60 * 1000, // 15 min
36
+ };
37
+
38
+ // Result limits
39
+ // Kept low intentionally — colleagues new to BigQuery should use LIMIT in their SQL
40
+ // rather than relying on the MCP to truncate. This also keeps response sizes manageable.
41
+ export const MAX_PREVIEW_ROWS = 50;
42
+ export const MAX_QUERY_ROWS = 1000;
43
+ export const DEFAULT_PREVIEW_ROWS = 5;
package/index.js ADDED
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env node
2
+
3
+ // BigQuery MCP Server
4
+ // Provides BigQuery access via woolsocks-ai-proxy for Claude Code
5
+
6
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
7
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
8
+ import {
9
+ CallToolRequestSchema,
10
+ ListToolsRequestSchema
11
+ } from '@modelcontextprotocol/sdk/types.js';
12
+
13
+ import { TOOLS, TOOL_MAP } from './tools/index.js';
14
+ import { getClient } from './proxy-client.js';
15
+
16
+ // Server info
17
+ const SERVER_INFO = {
18
+ name: 'bigquery-mcp',
19
+ version: '1.0.0',
20
+ description: 'MCP server for BigQuery - schema exploration and query execution via woolsocks-ai-proxy'
21
+ };
22
+
23
+ // Create server instance
24
+ const server = new Server(SERVER_INFO, {
25
+ capabilities: {
26
+ tools: {}
27
+ }
28
+ });
29
+
30
+ // Handle tool listing
31
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
32
+ return {
33
+ tools: TOOLS.map(tool => ({
34
+ name: tool.name,
35
+ description: tool.description,
36
+ inputSchema: tool.inputSchema
37
+ }))
38
+ };
39
+ });
40
+
41
+ // Handle tool execution
42
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
43
+ const { name, arguments: args } = request.params;
44
+
45
+ const tool = TOOL_MAP.get(name);
46
+
47
+ if (!tool) {
48
+ return {
49
+ content: [{
50
+ type: 'text',
51
+ text: JSON.stringify({
52
+ error: `Unknown tool: ${name}`,
53
+ availableTools: TOOLS.map(t => t.name)
54
+ }, null, 2)
55
+ }],
56
+ isError: true
57
+ };
58
+ }
59
+
60
+ try {
61
+ return await tool.handler(args || {});
62
+ } catch (error) {
63
+ // Handle proxy API errors
64
+ if (error.status !== undefined) {
65
+ let message = error.message;
66
+ let hint = null;
67
+
68
+ // Provide helpful hints for common errors
69
+ switch (error.status) {
70
+ case 401:
71
+ message = 'Authentication failed.';
72
+ hint = 'Check that BIGQUERY_API_KEY is set correctly in ~/.zshrc';
73
+ break;
74
+ case 402:
75
+ hint = 'Query was blocked by cost controls. Try a smaller query or add date filters.';
76
+ break;
77
+ case 403:
78
+ hint = 'Access denied. Only EU-region tables are accessible.';
79
+ break;
80
+ case 404:
81
+ message = 'Resource not found.';
82
+ hint = 'Check dataset/table names are correct.';
83
+ break;
84
+ case 408:
85
+ message = 'Request timeout.';
86
+ hint = 'The query may be too complex. Try simplifying or adding LIMIT.';
87
+ break;
88
+ case 429:
89
+ message = 'Rate limited.';
90
+ hint = 'Please wait a moment and try again.';
91
+ break;
92
+ case 503:
93
+ hint = 'Proxy may be unavailable. Circuit breaker is active.';
94
+ break;
95
+ }
96
+
97
+ return {
98
+ content: [{
99
+ type: 'text',
100
+ text: JSON.stringify({
101
+ error: message,
102
+ hint,
103
+ status: error.status,
104
+ endpoint: error.endpoint
105
+ }, null, 2)
106
+ }],
107
+ isError: true
108
+ };
109
+ }
110
+
111
+ // Handle other errors
112
+ return {
113
+ content: [{
114
+ type: 'text',
115
+ text: JSON.stringify({
116
+ error: error.message,
117
+ hint: 'Check that BIGQUERY_API_KEY is set in ~/.zshrc and the proxy is accessible.'
118
+ }, null, 2)
119
+ }],
120
+ isError: true
121
+ };
122
+ }
123
+ });
124
+
125
+ // Start server
126
+ async function main() {
127
+ // Verify API key is set before starting
128
+ try {
129
+ getClient();
130
+ } catch (error) {
131
+ console.error(`[BigQuery MCP] Startup error: ${error.message}`);
132
+ console.error('[BigQuery MCP] Ensure BIGQUERY_API_KEY is exported in ~/.zshrc');
133
+ process.exit(1);
134
+ }
135
+
136
+ const transport = new StdioServerTransport();
137
+ await server.connect(transport);
138
+
139
+ // Log to stderr (stdout is reserved for MCP protocol)
140
+ console.error(`[BigQuery MCP] Server started (${TOOLS.length} tools available)`);
141
+ }
142
+
143
+ main().catch((error) => {
144
+ console.error('[BigQuery MCP] Failed to start:', error);
145
+ process.exit(1);
146
+ });
package/package.json ADDED
@@ -0,0 +1,33 @@
1
+ {
2
+ "name": "woolsocks-bigquery-mcp",
3
+ "version": "2.0.0",
4
+ "description": "Query Woolsocks BigQuery data with built-in cost guardrails and a complete data catalog covering all datasets, tables, and conventions.",
5
+ "main": "index.js",
6
+ "type": "module",
7
+ "bin": {
8
+ "woolsocks-bigquery-mcp": "index.js"
9
+ },
10
+ "publishConfig": {
11
+ "access": "public"
12
+ },
13
+ "scripts": {
14
+ "start": "node index.js",
15
+ "inspect": "npx @modelcontextprotocol/inspector node index.js"
16
+ },
17
+ "dependencies": {
18
+ "@modelcontextprotocol/sdk": "^1.1.0"
19
+ },
20
+ "devDependencies": {
21
+ "@modelcontextprotocol/inspector": "^0.10.2"
22
+ },
23
+ "author": {
24
+ "name": "Woolsocks Product Team"
25
+ },
26
+ "license": "MIT",
27
+ "keywords": [
28
+ "mcp",
29
+ "bigquery",
30
+ "woolsocks",
31
+ "data-analysis"
32
+ ]
33
+ }