woolsocks-bigquery-mcp 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -0
- package/cache.js +119 -0
- package/config.js +43 -0
- package/index.js +146 -0
- package/package.json +33 -0
- package/proxy-client.js +303 -0
- package/secrets.js +85 -0
- package/tools/index.js +15 -0
- package/tools/query/execute-query.js +115 -0
- package/tools/query/get-status.js +71 -0
- package/tools/query/index.js +11 -0
- package/tools/query/validate-query.js +83 -0
- package/tools/schema/get-table-schema.js +102 -0
- package/tools/schema/index.js +13 -0
- package/tools/schema/list-datasets.js +69 -0
- package/tools/schema/list-tables.js +72 -0
- package/tools/schema/preview-table.js +65 -0
package/proxy-client.js
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
// HTTP client for woolsocks-ai-proxy BigQuery endpoints
|
|
2
|
+
// Includes retry logic, circuit breaker, and rate limiting
|
|
3
|
+
|
|
4
|
+
import {
|
|
5
|
+
PROXY_URL,
|
|
6
|
+
getAuthTokens,
|
|
7
|
+
REQUEST_TIMEOUT_MS,
|
|
8
|
+
RATE_LIMIT_MS,
|
|
9
|
+
RETRY_CONFIG,
|
|
10
|
+
CIRCUIT_BREAKER_CONFIG
|
|
11
|
+
} from './config.js';
|
|
12
|
+
|
|
13
|
+
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
|
14
|
+
|
|
15
|
+
export class ProxyApiError extends Error {
|
|
16
|
+
constructor(status, message, endpoint) {
|
|
17
|
+
super(`Proxy API error (${status}): ${message}`);
|
|
18
|
+
this.status = status;
|
|
19
|
+
this.endpoint = endpoint;
|
|
20
|
+
this.name = 'ProxyApiError';
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
class CircuitBreaker {
|
|
25
|
+
constructor(config) {
|
|
26
|
+
this.failureThreshold = config.failureThreshold;
|
|
27
|
+
this.cooldownMs = config.cooldownMs;
|
|
28
|
+
this.failures = 0;
|
|
29
|
+
this.state = 'CLOSED'; // CLOSED | OPEN | HALF_OPEN
|
|
30
|
+
this.lastFailureTime = null;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
canRequest() {
|
|
34
|
+
if (this.state === 'CLOSED') return true;
|
|
35
|
+
|
|
36
|
+
if (this.state === 'OPEN') {
|
|
37
|
+
// Check if cooldown has passed
|
|
38
|
+
if (Date.now() - this.lastFailureTime >= this.cooldownMs) {
|
|
39
|
+
this.state = 'HALF_OPEN';
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// HALF_OPEN - allow one request through
|
|
46
|
+
return true;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
recordSuccess() {
|
|
50
|
+
this.failures = 0;
|
|
51
|
+
this.state = 'CLOSED';
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
recordFailure() {
|
|
55
|
+
this.failures++;
|
|
56
|
+
this.lastFailureTime = Date.now();
|
|
57
|
+
|
|
58
|
+
if (this.failures >= this.failureThreshold) {
|
|
59
|
+
this.state = 'OPEN';
|
|
60
|
+
console.error(`[CircuitBreaker] Circuit OPEN after ${this.failures} failures`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
getState() {
|
|
65
|
+
return {
|
|
66
|
+
state: this.state,
|
|
67
|
+
failures: this.failures,
|
|
68
|
+
lastFailureTime: this.lastFailureTime,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export class ProxyClient {
|
|
74
|
+
constructor({ identityToken, accessToken }) {
|
|
75
|
+
if (!identityToken || !accessToken) {
|
|
76
|
+
throw new Error(
|
|
77
|
+
'GCP tokens are required. Run `gcloud auth login` and `gcloud auth application-default login`.'
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
this.baseUrl = PROXY_URL;
|
|
81
|
+
this.identityToken = identityToken; // Authenticates to proxy (Cloud Run IAM)
|
|
82
|
+
this.accessToken = accessToken; // Passed through to BigQuery (your own IAM access)
|
|
83
|
+
this.lastRequestTime = 0;
|
|
84
|
+
this.circuitBreaker = new CircuitBreaker(CIRCUIT_BREAKER_CONFIG);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Create a new ProxyClient with async initialization.
|
|
89
|
+
* Fetches the current user's GCP tokens via gcloud.
|
|
90
|
+
*/
|
|
91
|
+
static async create() {
|
|
92
|
+
const tokens = await getAuthTokens();
|
|
93
|
+
return new ProxyClient(tokens);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Rate limit requests
|
|
98
|
+
*/
|
|
99
|
+
async rateLimit() {
|
|
100
|
+
const now = Date.now();
|
|
101
|
+
const timeSinceLastRequest = now - this.lastRequestTime;
|
|
102
|
+
if (timeSinceLastRequest < RATE_LIMIT_MS) {
|
|
103
|
+
await sleep(RATE_LIMIT_MS - timeSinceLastRequest);
|
|
104
|
+
}
|
|
105
|
+
this.lastRequestTime = Date.now();
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Calculate retry delay with exponential backoff
|
|
110
|
+
*/
|
|
111
|
+
getRetryDelay(attempt) {
|
|
112
|
+
const delay = Math.min(
|
|
113
|
+
RETRY_CONFIG.baseDelayMs * Math.pow(2, attempt),
|
|
114
|
+
RETRY_CONFIG.maxDelayMs
|
|
115
|
+
);
|
|
116
|
+
// Add jitter (0-25% of delay)
|
|
117
|
+
return delay + Math.random() * delay * 0.25;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Check if status code is retryable
|
|
122
|
+
*/
|
|
123
|
+
isRetryable(status) {
|
|
124
|
+
return RETRY_CONFIG.retryableStatuses.includes(status);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Make HTTP request with retry and circuit breaker
|
|
129
|
+
*/
|
|
130
|
+
async request(endpoint, options = {}) {
|
|
131
|
+
// Check circuit breaker
|
|
132
|
+
if (!this.circuitBreaker.canRequest()) {
|
|
133
|
+
throw new ProxyApiError(
|
|
134
|
+
503,
|
|
135
|
+
'Circuit breaker is OPEN. Proxy may be unavailable. Will retry in 30s.',
|
|
136
|
+
endpoint
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
let lastError;
|
|
141
|
+
|
|
142
|
+
for (let attempt = 0; attempt < RETRY_CONFIG.maxAttempts; attempt++) {
|
|
143
|
+
try {
|
|
144
|
+
await this.rateLimit();
|
|
145
|
+
|
|
146
|
+
const url = `${this.baseUrl}${endpoint}`;
|
|
147
|
+
const controller = new AbortController();
|
|
148
|
+
const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
|
|
149
|
+
|
|
150
|
+
const response = await fetch(url, {
|
|
151
|
+
...options,
|
|
152
|
+
signal: controller.signal,
|
|
153
|
+
headers: {
|
|
154
|
+
'Content-Type': 'application/json',
|
|
155
|
+
// Identity token: proves who you are to the proxy (Cloud Run IAM auth)
|
|
156
|
+
'Authorization': `Bearer ${this.identityToken}`,
|
|
157
|
+
// Access token: proxy passes this to BigQuery so queries run as your GCP identity
|
|
158
|
+
'X-BQ-Access-Token': this.accessToken,
|
|
159
|
+
...options.headers
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
clearTimeout(timeoutId);
|
|
164
|
+
|
|
165
|
+
if (!response.ok) {
|
|
166
|
+
const errorData = await response.json().catch(() => ({
|
|
167
|
+
error: response.statusText
|
|
168
|
+
}));
|
|
169
|
+
|
|
170
|
+
const error = new ProxyApiError(
|
|
171
|
+
response.status,
|
|
172
|
+
errorData.error || errorData.message || response.statusText,
|
|
173
|
+
endpoint
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
// Don't retry client errors (4xx) except rate limits
|
|
177
|
+
if (response.status < 500 && response.status !== 429) {
|
|
178
|
+
this.circuitBreaker.recordSuccess(); // Not a server issue
|
|
179
|
+
throw error;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
lastError = error;
|
|
183
|
+
|
|
184
|
+
if (this.isRetryable(response.status) && attempt < RETRY_CONFIG.maxAttempts - 1) {
|
|
185
|
+
const delay = this.getRetryDelay(attempt);
|
|
186
|
+
console.error(`[ProxyClient] Retrying in ${Math.round(delay)}ms (attempt ${attempt + 1}/${RETRY_CONFIG.maxAttempts})`);
|
|
187
|
+
await sleep(delay);
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
this.circuitBreaker.recordFailure();
|
|
192
|
+
throw error;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Success
|
|
196
|
+
this.circuitBreaker.recordSuccess();
|
|
197
|
+
return response.json();
|
|
198
|
+
|
|
199
|
+
} catch (error) {
|
|
200
|
+
if (error.name === 'AbortError') {
|
|
201
|
+
lastError = new ProxyApiError(408, 'Request timeout', endpoint);
|
|
202
|
+
} else if (error instanceof ProxyApiError) {
|
|
203
|
+
lastError = error;
|
|
204
|
+
} else {
|
|
205
|
+
lastError = new ProxyApiError(0, error.message, endpoint);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Retry on network errors
|
|
209
|
+
if (attempt < RETRY_CONFIG.maxAttempts - 1 && !(error instanceof ProxyApiError && error.status >= 400 && error.status < 500)) {
|
|
210
|
+
const delay = this.getRetryDelay(attempt);
|
|
211
|
+
console.error(`[ProxyClient] Network error, retrying in ${Math.round(delay)}ms: ${error.message}`);
|
|
212
|
+
await sleep(delay);
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
this.circuitBreaker.recordFailure();
|
|
217
|
+
throw lastError;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
throw lastError;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// =========================================================================
|
|
225
|
+
// Schema Discovery Methods
|
|
226
|
+
// =========================================================================
|
|
227
|
+
|
|
228
|
+
async listDatasets() {
|
|
229
|
+
return this.request('/v1/bigquery/datasets');
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
async listTables(datasetId) {
|
|
233
|
+
return this.request(`/v1/bigquery/datasets/${encodeURIComponent(datasetId)}/tables`);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
async getTableSchema(datasetId, tableId) {
|
|
237
|
+
return this.request(
|
|
238
|
+
`/v1/bigquery/datasets/${encodeURIComponent(datasetId)}/tables/${encodeURIComponent(tableId)}`
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
async previewTable(datasetId, tableId, maxRows = 10) {
|
|
243
|
+
return this.request(
|
|
244
|
+
`/v1/bigquery/datasets/${encodeURIComponent(datasetId)}/tables/${encodeURIComponent(tableId)}/preview?maxRows=${maxRows}`
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// =========================================================================
|
|
249
|
+
// Query Execution Methods
|
|
250
|
+
// =========================================================================
|
|
251
|
+
|
|
252
|
+
async executeQuery(sql) {
|
|
253
|
+
return this.request('/v1/bigquery/query', {
|
|
254
|
+
method: 'POST',
|
|
255
|
+
body: JSON.stringify({ sql })
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
async validateQuery(sql) {
|
|
260
|
+
return this.request('/v1/bigquery/validate', {
|
|
261
|
+
method: 'POST',
|
|
262
|
+
body: JSON.stringify({ sql })
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
async getStatus() {
|
|
267
|
+
return this.request('/v1/bigquery/status');
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// =========================================================================
|
|
271
|
+
// Utility Methods
|
|
272
|
+
// =========================================================================
|
|
273
|
+
|
|
274
|
+
getCircuitBreakerState() {
|
|
275
|
+
return this.circuitBreaker.getState();
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Singleton instance
|
|
280
|
+
let client = null;
|
|
281
|
+
let clientPromise = null;
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Get or create the ProxyClient singleton.
|
|
285
|
+
* Initializes async on first call (fetches API key from Secret Manager).
|
|
286
|
+
*/
|
|
287
|
+
export async function getClient() {
|
|
288
|
+
if (client) {
|
|
289
|
+
return client;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Prevent multiple simultaneous initializations
|
|
293
|
+
if (!clientPromise) {
|
|
294
|
+
clientPromise = ProxyClient.create().then(c => {
|
|
295
|
+
client = c;
|
|
296
|
+
return c;
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
return clientPromise;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
export default ProxyClient;
|
package/secrets.js
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
// GCP token auth for BigQuery MCP
|
|
2
|
+
// Uses the colleague's own gcloud credentials instead of a shared API key.
|
|
3
|
+
// This means BigQuery access is controlled per-person by infra via GCP IAM.
|
|
4
|
+
//
|
|
5
|
+
// Two tokens are fetched:
|
|
6
|
+
// identityToken → authenticates to woolsocks-ai-proxy (proves who you are)
|
|
7
|
+
// accessToken → passed through to BigQuery so queries run as YOUR GCP identity
|
|
8
|
+
//
|
|
9
|
+
// Prerequisites:
|
|
10
|
+
// gcloud auth login
|
|
11
|
+
// gcloud auth application-default login
|
|
12
|
+
// gcloud must be in PATH
|
|
13
|
+
|
|
14
|
+
import { exec } from 'child_process';
|
|
15
|
+
import { promisify } from 'util';
|
|
16
|
+
|
|
17
|
+
const execAsync = promisify(exec);
|
|
18
|
+
|
|
19
|
+
const PROXY_URL = process.env.BIGQUERY_PROXY_URL ||
|
|
20
|
+
'https://woolsocks-ai-proxy-1009713156898.europe-west1.run.app';
|
|
21
|
+
|
|
22
|
+
// Cache tokens for 45 minutes (gcloud tokens expire after 60 min)
|
|
23
|
+
let cachedTokens = null;
|
|
24
|
+
let cacheExpiry = 0;
|
|
25
|
+
const TOKEN_TTL_MS = 45 * 60 * 1000;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Get GCP auth tokens for the current gcloud user.
|
|
29
|
+
* Returns { identityToken, accessToken }.
|
|
30
|
+
*
|
|
31
|
+
* Env var overrides for testing:
|
|
32
|
+
* BIGQUERY_IDENTITY_TOKEN, BIGQUERY_ACCESS_TOKEN
|
|
33
|
+
*/
|
|
34
|
+
export async function getAuthTokens() {
|
|
35
|
+
const now = Date.now();
|
|
36
|
+
|
|
37
|
+
// Return cached tokens if still valid
|
|
38
|
+
if (cachedTokens && now < cacheExpiry) {
|
|
39
|
+
return cachedTokens;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Env var override for local testing / CI
|
|
43
|
+
if (process.env.BIGQUERY_IDENTITY_TOKEN && process.env.BIGQUERY_ACCESS_TOKEN) {
|
|
44
|
+
console.error('[auth] Using tokens from environment variables');
|
|
45
|
+
cachedTokens = {
|
|
46
|
+
identityToken: process.env.BIGQUERY_IDENTITY_TOKEN,
|
|
47
|
+
accessToken: process.env.BIGQUERY_ACCESS_TOKEN,
|
|
48
|
+
};
|
|
49
|
+
cacheExpiry = now + TOKEN_TTL_MS;
|
|
50
|
+
return cachedTokens;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
// Identity token: authenticates to the proxy (Cloud Run IAM)
|
|
55
|
+
const { stdout: identityRaw } = await execAsync(
|
|
56
|
+
`gcloud auth print-identity-token --audiences=${PROXY_URL}`
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
// Access token: passed through to BigQuery so queries run as your own identity
|
|
60
|
+
const { stdout: accessRaw } = await execAsync(
|
|
61
|
+
'gcloud auth print-access-token'
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
cachedTokens = {
|
|
65
|
+
identityToken: identityRaw.trim(),
|
|
66
|
+
accessToken: accessRaw.trim(),
|
|
67
|
+
};
|
|
68
|
+
cacheExpiry = now + TOKEN_TTL_MS;
|
|
69
|
+
|
|
70
|
+
console.error('[auth] GCP tokens loaded from gcloud');
|
|
71
|
+
return cachedTokens;
|
|
72
|
+
|
|
73
|
+
} catch (error) {
|
|
74
|
+
console.error('[auth] Failed to get gcloud tokens:', error.message);
|
|
75
|
+
throw new Error(
|
|
76
|
+
'Could not get GCP credentials.\n' +
|
|
77
|
+
'Run the following and try again:\n' +
|
|
78
|
+
' gcloud auth login\n' +
|
|
79
|
+
' gcloud auth application-default login\n\n' +
|
|
80
|
+
'No BigQuery access yet? Ask infra via a Jira PERM ticket:\n' +
|
|
81
|
+
'https://woolsocks.atlassian.net/jira/software/projects/PERM/boards\n' +
|
|
82
|
+
'→ Request: "BigQuery read access for Claude Code"'
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
}
|
package/tools/index.js
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// Tool registry - combines all tools from all modules
|
|
2
|
+
|
|
3
|
+
import schemaTools from './schema/index.js';
|
|
4
|
+
import queryTools from './query/index.js';
|
|
5
|
+
|
|
6
|
+
// Combine all tools into a single array
|
|
7
|
+
export const TOOLS = [
|
|
8
|
+
...schemaTools,
|
|
9
|
+
...queryTools
|
|
10
|
+
];
|
|
11
|
+
|
|
12
|
+
// Create a map for quick lookup by name
|
|
13
|
+
export const TOOL_MAP = new Map(TOOLS.map(tool => [tool.name, tool]));
|
|
14
|
+
|
|
15
|
+
export default TOOLS;
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
// Execute a BigQuery query
|
|
2
|
+
|
|
3
|
+
import { getClient } from '../../proxy-client.js';
|
|
4
|
+
import { MAX_QUERY_ROWS } from '../../config.js';
|
|
5
|
+
|
|
6
|
+
export default {
|
|
7
|
+
name: 'bigquery__execute-query',
|
|
8
|
+
description: `Execute a BigQuery SQL query. IMPORTANT RULES — follow these every time:
|
|
9
|
+
1. ALWAYS call bigquery__validate-query FIRST to check cost before executing.
|
|
10
|
+
2. ALWAYS include a LIMIT clause (max 1000 rows for exploration).
|
|
11
|
+
3. ALWAYS add a date/time filter on any table larger than 1 GB (use WHERE created_at, _PARTITIONTIME, or similar).
|
|
12
|
+
4. NEVER use SELECT * on large tables — select only the columns you need.
|
|
13
|
+
5. If you get a 401/403 error, the user does not have BigQuery access — tell them to request it via a Jira ticket in the PERM project (see error hint).
|
|
14
|
+
Guardrails enforced server-side: 10 GB per-query scan limit, 50 GB daily limit, EU region only. Max ${MAX_QUERY_ROWS} rows returned.`,
|
|
15
|
+
inputSchema: {
|
|
16
|
+
type: 'object',
|
|
17
|
+
properties: {
|
|
18
|
+
sql: {
|
|
19
|
+
type: 'string',
|
|
20
|
+
description: 'The SQL query to execute. Use fully qualified table names (dataset.table). Always add date/partition filters.'
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
required: ['sql']
|
|
24
|
+
},
|
|
25
|
+
|
|
26
|
+
async handler(args) {
|
|
27
|
+
const { sql } = args;
|
|
28
|
+
|
|
29
|
+
if (!sql || typeof sql !== 'string') {
|
|
30
|
+
return {
|
|
31
|
+
content: [{
|
|
32
|
+
type: 'text',
|
|
33
|
+
text: JSON.stringify({
|
|
34
|
+
error: 'sql parameter is required and must be a string',
|
|
35
|
+
hint: 'Use bigquery__get-table-schema to understand table structure before querying'
|
|
36
|
+
}, null, 2)
|
|
37
|
+
}],
|
|
38
|
+
isError: true
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const client = await getClient();
|
|
43
|
+
const result = await client.executeQuery(sql);
|
|
44
|
+
|
|
45
|
+
// Handle blocked queries
|
|
46
|
+
if (result.blocked) {
|
|
47
|
+
return {
|
|
48
|
+
content: [{
|
|
49
|
+
type: 'text',
|
|
50
|
+
text: JSON.stringify({
|
|
51
|
+
blocked: true,
|
|
52
|
+
reason: result.reason,
|
|
53
|
+
message: result.message,
|
|
54
|
+
details: {
|
|
55
|
+
bytesScanned: result.bytesScanned,
|
|
56
|
+
estimatedGB: result.estimatedGB,
|
|
57
|
+
scanLimitGB: result.scanLimitGB,
|
|
58
|
+
dailyUsedGB: result.dailyUsedGB,
|
|
59
|
+
dailyLimitGB: result.dailyLimitGB,
|
|
60
|
+
},
|
|
61
|
+
hint: 'Reduce query cost by: (1) adding a date filter e.g. WHERE created_at >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY), (2) selecting specific columns instead of *, (3) adding LIMIT 1000.'
|
|
62
|
+
}, null, 2)
|
|
63
|
+
}],
|
|
64
|
+
isError: true
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Handle execution failures
|
|
69
|
+
if (!result.success) {
|
|
70
|
+
const isAuthError = result.error?.includes('401') || result.error?.includes('403') ||
|
|
71
|
+
result.error?.includes('permission') || result.error?.includes('Access Denied');
|
|
72
|
+
return {
|
|
73
|
+
content: [{
|
|
74
|
+
type: 'text',
|
|
75
|
+
text: JSON.stringify({
|
|
76
|
+
success: false,
|
|
77
|
+
error: result.error,
|
|
78
|
+
message: result.message,
|
|
79
|
+
hint: isAuthError
|
|
80
|
+
? 'You do not have BigQuery access. Ask your infra manager to grant access by creating a ticket in the Jira PERM project: https://woolsocks.atlassian.net/jira/software/projects/PERM/boards — request "BigQuery read access for Claude Code".'
|
|
81
|
+
: 'Check SQL syntax and table names. Use bigquery__get-table-schema to verify column names.'
|
|
82
|
+
}, null, 2)
|
|
83
|
+
}],
|
|
84
|
+
isError: true
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Truncate results if too many rows
|
|
89
|
+
let rows = result.rows;
|
|
90
|
+
let truncated = false;
|
|
91
|
+
if (rows.length > MAX_QUERY_ROWS) {
|
|
92
|
+
rows = rows.slice(0, MAX_QUERY_ROWS);
|
|
93
|
+
truncated = true;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
content: [{
|
|
98
|
+
type: 'text',
|
|
99
|
+
text: JSON.stringify({
|
|
100
|
+
success: true,
|
|
101
|
+
rowCount: result.rowCount,
|
|
102
|
+
returnedRows: rows.length,
|
|
103
|
+
truncated,
|
|
104
|
+
bytesScanned: result.bytesScanned,
|
|
105
|
+
estimatedGB: result.estimatedGB,
|
|
106
|
+
dailyUsedGB: result.dailyUsedGB,
|
|
107
|
+
dailyLimitGB: result.dailyLimitGB,
|
|
108
|
+
costNote: `Scanned ${result.estimatedGB}GB. Daily usage: ${result.dailyUsedGB}GB / ${result.dailyLimitGB}GB`,
|
|
109
|
+
rows,
|
|
110
|
+
...(truncated && { warning: `Results truncated to ${MAX_QUERY_ROWS} rows. Add LIMIT to your query to control output size.` })
|
|
111
|
+
}, null, 2)
|
|
112
|
+
}]
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
};
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
// Get BigQuery usage status
|
|
2
|
+
|
|
3
|
+
import { getClient } from '../../proxy-client.js';
|
|
4
|
+
import { schemaCache } from '../../cache.js';
|
|
5
|
+
|
|
6
|
+
export default {
|
|
7
|
+
name: 'bigquery__get-status',
|
|
8
|
+
description: 'Get current BigQuery usage status including daily/monthly limits, scan counts, and remaining budget. Also shows circuit breaker state and cache stats. Check this before running large queries.',
|
|
9
|
+
inputSchema: {
|
|
10
|
+
type: 'object',
|
|
11
|
+
properties: {},
|
|
12
|
+
required: []
|
|
13
|
+
},
|
|
14
|
+
|
|
15
|
+
async handler(args) {
|
|
16
|
+
const client = await getClient();
|
|
17
|
+
const status = await client.getStatus();
|
|
18
|
+
|
|
19
|
+
// Get client state
|
|
20
|
+
const circuitState = client.getCircuitBreakerState();
|
|
21
|
+
const cacheStats = schemaCache.getStats();
|
|
22
|
+
|
|
23
|
+
// Build status message
|
|
24
|
+
const lines = [
|
|
25
|
+
'BigQuery Status:',
|
|
26
|
+
` Ready: ${status.ready ? 'Yes' : 'No'}`,
|
|
27
|
+
'',
|
|
28
|
+
'Daily Usage:',
|
|
29
|
+
` Scanned: ${status.daily.usedGB} GB / ${status.daily.limitGB} GB (${status.daily.percentUsed}%)`,
|
|
30
|
+
` Queries: ${status.daily.queryCount}`,
|
|
31
|
+
'',
|
|
32
|
+
'Monthly Usage:',
|
|
33
|
+
` Cost: EUR ${status.monthly.spentEUR} / EUR ${status.monthly.limitEUR} (${status.monthly.percentUsed}%)`,
|
|
34
|
+
` Queries: ${status.monthly.queryCount}`,
|
|
35
|
+
'',
|
|
36
|
+
'Limits:',
|
|
37
|
+
` Per query: ${status.limits.perQueryGB} GB`,
|
|
38
|
+
` Daily: ${status.limits.dailyGB} GB`,
|
|
39
|
+
` Monthly: EUR ${status.limits.monthlyEUR}`,
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
// Add warnings if close to limits
|
|
43
|
+
let warning = null;
|
|
44
|
+
if (status.daily.percentUsed >= 90) {
|
|
45
|
+
warning = 'CRITICAL: Daily limit nearly exhausted! Wait until tomorrow for large queries.';
|
|
46
|
+
} else if (status.daily.percentUsed >= 80) {
|
|
47
|
+
warning = 'WARNING: Daily limit at 80%. Use queries sparingly.';
|
|
48
|
+
} else if (status.monthly.percentUsed >= 90) {
|
|
49
|
+
warning = 'CRITICAL: Monthly budget nearly exhausted!';
|
|
50
|
+
} else if (status.monthly.percentUsed >= 80) {
|
|
51
|
+
warning = 'WARNING: Monthly budget at 80%.';
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
content: [{
|
|
56
|
+
type: 'text',
|
|
57
|
+
text: JSON.stringify({
|
|
58
|
+
ready: status.ready,
|
|
59
|
+
error: status.error,
|
|
60
|
+
limits: status.limits,
|
|
61
|
+
daily: status.daily,
|
|
62
|
+
monthly: status.monthly,
|
|
63
|
+
warning,
|
|
64
|
+
statusMessage: lines.join('\n'),
|
|
65
|
+
circuitBreaker: circuitState,
|
|
66
|
+
cache: cacheStats,
|
|
67
|
+
}, null, 2)
|
|
68
|
+
}]
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
};
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// Validate a query without executing (dry-run)
|
|
2
|
+
|
|
3
|
+
import { getClient } from '../../proxy-client.js';
|
|
4
|
+
|
|
5
|
+
export default {
|
|
6
|
+
name: 'bigquery__validate-query',
|
|
7
|
+
description: 'Validate a BigQuery query without executing it. Returns estimated bytes to scan and checks against cost limits. ALWAYS call this before bigquery__execute-query — it is free (no data scanned) and prevents surprise cost overruns. If validation fails, revise the query (add date filter, LIMIT, or fewer columns) before executing.',
|
|
8
|
+
inputSchema: {
|
|
9
|
+
type: 'object',
|
|
10
|
+
properties: {
|
|
11
|
+
sql: {
|
|
12
|
+
type: 'string',
|
|
13
|
+
description: 'The SQL query to validate'
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
required: ['sql']
|
|
17
|
+
},
|
|
18
|
+
|
|
19
|
+
async handler(args) {
|
|
20
|
+
const { sql } = args;
|
|
21
|
+
|
|
22
|
+
if (!sql || typeof sql !== 'string') {
|
|
23
|
+
return {
|
|
24
|
+
content: [{
|
|
25
|
+
type: 'text',
|
|
26
|
+
text: JSON.stringify({
|
|
27
|
+
error: 'sql parameter is required and must be a string',
|
|
28
|
+
hint: 'Provide a valid SQL query to validate'
|
|
29
|
+
}, null, 2)
|
|
30
|
+
}],
|
|
31
|
+
isError: true
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const client = await getClient();
|
|
36
|
+
const validation = await client.validateQuery(sql);
|
|
37
|
+
|
|
38
|
+
if (!validation.approved) {
|
|
39
|
+
const isAuthError = validation.reason === 'unauthorized' || validation.reason === 'forbidden' ||
|
|
40
|
+
validation.message?.toLowerCase().includes('permission') ||
|
|
41
|
+
validation.message?.toLowerCase().includes('access denied');
|
|
42
|
+
return {
|
|
43
|
+
content: [{
|
|
44
|
+
type: 'text',
|
|
45
|
+
text: JSON.stringify({
|
|
46
|
+
approved: false,
|
|
47
|
+
reason: validation.reason,
|
|
48
|
+
message: validation.message,
|
|
49
|
+
details: {
|
|
50
|
+
bytesScanned: validation.bytesScanned,
|
|
51
|
+
estimatedGB: validation.estimatedGB,
|
|
52
|
+
scanLimitGB: validation.scanLimitGB,
|
|
53
|
+
dailyUsedGB: validation.dailyUsedGB,
|
|
54
|
+
dailyLimitGB: validation.dailyLimitGB,
|
|
55
|
+
tables: validation.tables,
|
|
56
|
+
},
|
|
57
|
+
hint: isAuthError
|
|
58
|
+
? 'You do not have BigQuery access. Ask your infra manager to grant access by creating a ticket in the Jira PERM project: https://woolsocks.atlassian.net/jira/software/projects/PERM/boards — request "BigQuery read access for Claude Code".'
|
|
59
|
+
: 'Reduce query cost: (1) add a date filter e.g. WHERE created_at >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY), (2) select specific columns instead of *, (3) add LIMIT 1000.'
|
|
60
|
+
}, null, 2)
|
|
61
|
+
}],
|
|
62
|
+
isError: true
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return {
|
|
67
|
+
content: [{
|
|
68
|
+
type: 'text',
|
|
69
|
+
text: JSON.stringify({
|
|
70
|
+
approved: true,
|
|
71
|
+
estimatedGB: validation.estimatedGB,
|
|
72
|
+
scanLimitGB: validation.scanLimitGB,
|
|
73
|
+
dailyUsedGB: validation.dailyUsedGB,
|
|
74
|
+
dailyLimitGB: validation.dailyLimitGB,
|
|
75
|
+
remainingDailyGB: validation.dailyLimitGB - validation.dailyUsedGB,
|
|
76
|
+
tables: validation.tables,
|
|
77
|
+
message: `Query approved. Will scan ~${validation.estimatedGB}GB (limit: ${validation.scanLimitGB}GB per query).`,
|
|
78
|
+
hint: 'Safe to execute with bigquery__execute-query'
|
|
79
|
+
}, null, 2)
|
|
80
|
+
}]
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
};
|