woolsocks-bigquery-mcp 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -0
- package/cache.js +119 -0
- package/config.js +43 -0
- package/index.js +146 -0
- package/package.json +33 -0
- package/proxy-client.js +303 -0
- package/secrets.js +85 -0
- package/tools/index.js +15 -0
- package/tools/query/execute-query.js +115 -0
- package/tools/query/get-status.js +71 -0
- package/tools/query/index.js +11 -0
- package/tools/query/validate-query.js +83 -0
- package/tools/schema/get-table-schema.js +102 -0
- package/tools/schema/index.js +13 -0
- package/tools/schema/list-datasets.js +69 -0
- package/tools/schema/list-tables.js +72 -0
- package/tools/schema/preview-table.js +65 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// Get detailed table schema
|
|
2
|
+
|
|
3
|
+
import { getClient } from '../../proxy-client.js';
|
|
4
|
+
import { getCachedSchema, setCachedSchema } from '../../cache.js';
|
|
5
|
+
|
|
6
|
+
export default {
|
|
7
|
+
name: 'bigquery__get-table-schema',
|
|
8
|
+
description: 'Get detailed schema for a BigQuery table including all columns, data types, partitioning, and clustering info. Essential before writing queries. Results are cached for 15 minutes.',
|
|
9
|
+
inputSchema: {
|
|
10
|
+
type: 'object',
|
|
11
|
+
properties: {
|
|
12
|
+
dataset_id: {
|
|
13
|
+
type: 'string',
|
|
14
|
+
description: 'The dataset ID containing the table'
|
|
15
|
+
},
|
|
16
|
+
table_id: {
|
|
17
|
+
type: 'string',
|
|
18
|
+
description: 'The table ID to get schema for'
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
required: ['dataset_id', 'table_id']
|
|
22
|
+
},
|
|
23
|
+
|
|
24
|
+
async handler(args) {
|
|
25
|
+
const { dataset_id, table_id } = args;
|
|
26
|
+
|
|
27
|
+
if (!dataset_id || !table_id) {
|
|
28
|
+
return {
|
|
29
|
+
content: [{
|
|
30
|
+
type: 'text',
|
|
31
|
+
text: JSON.stringify({
|
|
32
|
+
error: 'Both dataset_id and table_id are required',
|
|
33
|
+
hint: 'Use bigquery__list-tables to see available tables'
|
|
34
|
+
}, null, 2)
|
|
35
|
+
}],
|
|
36
|
+
isError: true
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Check cache first
|
|
41
|
+
const cached = getCachedSchema(dataset_id, table_id);
|
|
42
|
+
if (cached) {
|
|
43
|
+
return formatSchemaResponse(cached, true);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const client = await getClient();
|
|
47
|
+
const schema = await client.getTableSchema(dataset_id, table_id);
|
|
48
|
+
|
|
49
|
+
// Cache the result
|
|
50
|
+
setCachedSchema(dataset_id, table_id, schema);
|
|
51
|
+
|
|
52
|
+
return formatSchemaResponse(schema, false);
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
function formatSchemaResponse(schema, cached) {
|
|
57
|
+
// Format schema fields for readability
|
|
58
|
+
const formatFields = (fields, indent = 0) => {
|
|
59
|
+
return fields.map(f => {
|
|
60
|
+
const prefix = ' '.repeat(indent);
|
|
61
|
+
let line = `${prefix}${f.name}: ${f.type}`;
|
|
62
|
+
if (f.mode === 'REQUIRED') line += ' (required)';
|
|
63
|
+
if (f.mode === 'REPEATED') line += ' (array)';
|
|
64
|
+
if (f.description) line += ` -- ${f.description}`;
|
|
65
|
+
|
|
66
|
+
if (f.fields) {
|
|
67
|
+
line += '\n' + formatFields(f.fields, indent + 1);
|
|
68
|
+
}
|
|
69
|
+
return line;
|
|
70
|
+
}).join('\n');
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
const schemaText = formatFields(schema.schema);
|
|
74
|
+
|
|
75
|
+
// Build query hints
|
|
76
|
+
let queryHint = 'Use bigquery__validate-query to check query cost before running.';
|
|
77
|
+
if (schema.partitioning) {
|
|
78
|
+
const partField = schema.partitioning.field || '_PARTITIONTIME';
|
|
79
|
+
queryHint = `This table is partitioned on ${partField}. ALWAYS include a date filter like WHERE ${partField} >= 'YYYY-MM-DD' to avoid expensive full scans.`;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
content: [{
|
|
84
|
+
type: 'text',
|
|
85
|
+
text: JSON.stringify({
|
|
86
|
+
table: `${schema.datasetId}.${schema.id}`,
|
|
87
|
+
type: schema.type,
|
|
88
|
+
location: schema.location,
|
|
89
|
+
numRows: schema.numRows,
|
|
90
|
+
sizeGB: schema.sizeGB,
|
|
91
|
+
lastModified: schema.lastModified,
|
|
92
|
+
partitioning: schema.partitioning,
|
|
93
|
+
clustering: schema.clustering,
|
|
94
|
+
description: schema.description,
|
|
95
|
+
schemaFormatted: schemaText,
|
|
96
|
+
schema: schema.schema,
|
|
97
|
+
cached,
|
|
98
|
+
queryHint
|
|
99
|
+
}, null, 2)
|
|
100
|
+
}]
|
|
101
|
+
};
|
|
102
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
// Schema discovery tools
|
|
2
|
+
|
|
3
|
+
import listDatasets from './list-datasets.js';
|
|
4
|
+
import listTables from './list-tables.js';
|
|
5
|
+
import getTableSchema from './get-table-schema.js';
|
|
6
|
+
import previewTable from './preview-table.js';
|
|
7
|
+
|
|
8
|
+
export default [
|
|
9
|
+
listDatasets,
|
|
10
|
+
listTables,
|
|
11
|
+
getTableSchema,
|
|
12
|
+
previewTable
|
|
13
|
+
];
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// List all BigQuery datasets
|
|
2
|
+
|
|
3
|
+
import { getClient } from '../../proxy-client.js';
|
|
4
|
+
import { getCachedDatasets, setCachedDatasets } from '../../cache.js';
|
|
5
|
+
|
|
6
|
+
export default {
|
|
7
|
+
name: 'bigquery__list-datasets',
|
|
8
|
+
description: 'List all BigQuery datasets accessible to Woolsocks. Returns dataset names, locations, and basic metadata. Use this first to discover available data. Results are cached for 5 minutes.',
|
|
9
|
+
inputSchema: {
|
|
10
|
+
type: 'object',
|
|
11
|
+
properties: {},
|
|
12
|
+
required: []
|
|
13
|
+
},
|
|
14
|
+
|
|
15
|
+
async handler(args) {
|
|
16
|
+
// Check cache first
|
|
17
|
+
const cached = getCachedDatasets();
|
|
18
|
+
if (cached) {
|
|
19
|
+
return {
|
|
20
|
+
content: [{
|
|
21
|
+
type: 'text',
|
|
22
|
+
text: JSON.stringify({
|
|
23
|
+
datasets: cached.datasets,
|
|
24
|
+
count: cached.count,
|
|
25
|
+
cached: true,
|
|
26
|
+
hint: 'Use bigquery__list-tables with a dataset_id to see tables in a dataset.'
|
|
27
|
+
}, null, 2)
|
|
28
|
+
}]
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const client = await getClient();
|
|
33
|
+
let result;
|
|
34
|
+
try {
|
|
35
|
+
result = await client.listDatasets();
|
|
36
|
+
} catch (err) {
|
|
37
|
+
const isAuthError = err.status === 401 || err.status === 403 ||
|
|
38
|
+
err.message?.toLowerCase().includes('permission') ||
|
|
39
|
+
err.message?.toLowerCase().includes('unauthorized');
|
|
40
|
+
return {
|
|
41
|
+
content: [{
|
|
42
|
+
type: 'text',
|
|
43
|
+
text: JSON.stringify({
|
|
44
|
+
error: err.message,
|
|
45
|
+
hint: isAuthError
|
|
46
|
+
? 'You do not have BigQuery access. Ask your infra manager to grant access by creating a ticket in the Jira PERM project: https://woolsocks.atlassian.net/jira/software/projects/PERM/boards — request "BigQuery read access for Claude Code".'
|
|
47
|
+
: 'Failed to list datasets. Check that the BigQuery proxy is reachable.'
|
|
48
|
+
}, null, 2)
|
|
49
|
+
}],
|
|
50
|
+
isError: true
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Cache the result
|
|
55
|
+
setCachedDatasets(result);
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
content: [{
|
|
59
|
+
type: 'text',
|
|
60
|
+
text: JSON.stringify({
|
|
61
|
+
datasets: result.datasets,
|
|
62
|
+
count: result.count,
|
|
63
|
+
cached: false,
|
|
64
|
+
hint: 'Use bigquery__list-tables with a dataset_id to see tables in a dataset.'
|
|
65
|
+
}, null, 2)
|
|
66
|
+
}]
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
};
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
// List tables in a BigQuery dataset
|
|
2
|
+
|
|
3
|
+
import { getClient } from '../../proxy-client.js';
|
|
4
|
+
import { getCachedTables, setCachedTables } from '../../cache.js';
|
|
5
|
+
|
|
6
|
+
export default {
|
|
7
|
+
name: 'bigquery__list-tables',
|
|
8
|
+
description: 'List all tables in a BigQuery dataset with size (GB), row count, and type (TABLE/VIEW). Tables are sorted by size descending. Results are cached for 5 minutes.',
|
|
9
|
+
inputSchema: {
|
|
10
|
+
type: 'object',
|
|
11
|
+
properties: {
|
|
12
|
+
dataset_id: {
|
|
13
|
+
type: 'string',
|
|
14
|
+
description: 'The dataset ID to list tables from (e.g., "analytics", "raw_data")'
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
required: ['dataset_id']
|
|
18
|
+
},
|
|
19
|
+
|
|
20
|
+
async handler(args) {
|
|
21
|
+
const { dataset_id } = args;
|
|
22
|
+
|
|
23
|
+
if (!dataset_id) {
|
|
24
|
+
return {
|
|
25
|
+
content: [{
|
|
26
|
+
type: 'text',
|
|
27
|
+
text: JSON.stringify({
|
|
28
|
+
error: 'dataset_id is required',
|
|
29
|
+
hint: 'Use bigquery__list-datasets first to see available datasets'
|
|
30
|
+
}, null, 2)
|
|
31
|
+
}],
|
|
32
|
+
isError: true
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Check cache first
|
|
37
|
+
const cached = getCachedTables(dataset_id);
|
|
38
|
+
if (cached) {
|
|
39
|
+
return {
|
|
40
|
+
content: [{
|
|
41
|
+
type: 'text',
|
|
42
|
+
text: JSON.stringify({
|
|
43
|
+
datasetId: cached.datasetId,
|
|
44
|
+
tables: cached.tables,
|
|
45
|
+
count: cached.count,
|
|
46
|
+
cached: true,
|
|
47
|
+
hint: 'Use bigquery__get-table-schema to see column details for a specific table.'
|
|
48
|
+
}, null, 2)
|
|
49
|
+
}]
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const client = await getClient();
|
|
54
|
+
const result = await client.listTables(dataset_id);
|
|
55
|
+
|
|
56
|
+
// Cache the result
|
|
57
|
+
setCachedTables(dataset_id, result);
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
content: [{
|
|
61
|
+
type: 'text',
|
|
62
|
+
text: JSON.stringify({
|
|
63
|
+
datasetId: result.datasetId,
|
|
64
|
+
tables: result.tables,
|
|
65
|
+
count: result.count,
|
|
66
|
+
cached: false,
|
|
67
|
+
hint: 'Use bigquery__get-table-schema to see column details for a specific table.'
|
|
68
|
+
}, null, 2)
|
|
69
|
+
}]
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
};
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// Preview sample rows from a table
|
|
2
|
+
|
|
3
|
+
import { getClient } from '../../proxy-client.js';
|
|
4
|
+
import { DEFAULT_PREVIEW_ROWS, MAX_PREVIEW_ROWS } from '../../config.js';
|
|
5
|
+
|
|
6
|
+
export default {
|
|
7
|
+
name: 'bigquery__preview-table',
|
|
8
|
+
description: `Get sample rows from a BigQuery table to understand data format. Limited to ${MAX_PREVIEW_ROWS} rows max. Only works on EU-region tables. Results are NOT cached.`,
|
|
9
|
+
inputSchema: {
|
|
10
|
+
type: 'object',
|
|
11
|
+
properties: {
|
|
12
|
+
dataset_id: {
|
|
13
|
+
type: 'string',
|
|
14
|
+
description: 'The dataset ID containing the table'
|
|
15
|
+
},
|
|
16
|
+
table_id: {
|
|
17
|
+
type: 'string',
|
|
18
|
+
description: 'The table ID to preview'
|
|
19
|
+
},
|
|
20
|
+
max_rows: {
|
|
21
|
+
type: 'number',
|
|
22
|
+
description: `Number of rows to preview (default: ${DEFAULT_PREVIEW_ROWS}, max: ${MAX_PREVIEW_ROWS})`,
|
|
23
|
+
minimum: 1,
|
|
24
|
+
maximum: MAX_PREVIEW_ROWS
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
required: ['dataset_id', 'table_id']
|
|
28
|
+
},
|
|
29
|
+
|
|
30
|
+
async handler(args) {
|
|
31
|
+
const { dataset_id, table_id, max_rows } = args;
|
|
32
|
+
|
|
33
|
+
if (!dataset_id || !table_id) {
|
|
34
|
+
return {
|
|
35
|
+
content: [{
|
|
36
|
+
type: 'text',
|
|
37
|
+
text: JSON.stringify({
|
|
38
|
+
error: 'Both dataset_id and table_id are required',
|
|
39
|
+
hint: 'Use bigquery__list-tables to see available tables'
|
|
40
|
+
}, null, 2)
|
|
41
|
+
}],
|
|
42
|
+
isError: true
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const client = await getClient();
|
|
47
|
+
const maxRows = Math.min(max_rows || DEFAULT_PREVIEW_ROWS, MAX_PREVIEW_ROWS);
|
|
48
|
+
const preview = await client.previewTable(dataset_id, table_id, maxRows);
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
content: [{
|
|
52
|
+
type: 'text',
|
|
53
|
+
text: JSON.stringify({
|
|
54
|
+
table: `${preview.datasetId}.${preview.tableId}`,
|
|
55
|
+
location: preview.location,
|
|
56
|
+
rowCount: preview.rowCount,
|
|
57
|
+
maxRowsRequested: preview.maxRowsRequested,
|
|
58
|
+
totalRowsInTable: preview.totalRowsInTable,
|
|
59
|
+
rows: preview.rows,
|
|
60
|
+
hint: 'This is a sample of actual data. Use bigquery__execute-query for filtered/aggregated queries.'
|
|
61
|
+
}, null, 2)
|
|
62
|
+
}]
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
};
|