woolsocks-bigquery-mcp 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ // Get detailed table schema
2
+
3
+ import { getClient } from '../../proxy-client.js';
4
+ import { getCachedSchema, setCachedSchema } from '../../cache.js';
5
+
6
+ export default {
7
+ name: 'bigquery__get-table-schema',
8
+ description: 'Get detailed schema for a BigQuery table including all columns, data types, partitioning, and clustering info. Essential before writing queries. Results are cached for 15 minutes.',
9
+ inputSchema: {
10
+ type: 'object',
11
+ properties: {
12
+ dataset_id: {
13
+ type: 'string',
14
+ description: 'The dataset ID containing the table'
15
+ },
16
+ table_id: {
17
+ type: 'string',
18
+ description: 'The table ID to get schema for'
19
+ }
20
+ },
21
+ required: ['dataset_id', 'table_id']
22
+ },
23
+
24
+ async handler(args) {
25
+ const { dataset_id, table_id } = args;
26
+
27
+ if (!dataset_id || !table_id) {
28
+ return {
29
+ content: [{
30
+ type: 'text',
31
+ text: JSON.stringify({
32
+ error: 'Both dataset_id and table_id are required',
33
+ hint: 'Use bigquery__list-tables to see available tables'
34
+ }, null, 2)
35
+ }],
36
+ isError: true
37
+ };
38
+ }
39
+
40
+ // Check cache first
41
+ const cached = getCachedSchema(dataset_id, table_id);
42
+ if (cached) {
43
+ return formatSchemaResponse(cached, true);
44
+ }
45
+
46
+ const client = await getClient();
47
+ const schema = await client.getTableSchema(dataset_id, table_id);
48
+
49
+ // Cache the result
50
+ setCachedSchema(dataset_id, table_id, schema);
51
+
52
+ return formatSchemaResponse(schema, false);
53
+ }
54
+ };
55
+
56
+ function formatSchemaResponse(schema, cached) {
57
+ // Format schema fields for readability
58
+ const formatFields = (fields, indent = 0) => {
59
+ return fields.map(f => {
60
+ const prefix = ' '.repeat(indent);
61
+ let line = `${prefix}${f.name}: ${f.type}`;
62
+ if (f.mode === 'REQUIRED') line += ' (required)';
63
+ if (f.mode === 'REPEATED') line += ' (array)';
64
+ if (f.description) line += ` -- ${f.description}`;
65
+
66
+ if (f.fields) {
67
+ line += '\n' + formatFields(f.fields, indent + 1);
68
+ }
69
+ return line;
70
+ }).join('\n');
71
+ };
72
+
73
+ const schemaText = formatFields(schema.schema);
74
+
75
+ // Build query hints
76
+ let queryHint = 'Use bigquery__validate-query to check query cost before running.';
77
+ if (schema.partitioning) {
78
+ const partField = schema.partitioning.field || '_PARTITIONTIME';
79
+ queryHint = `This table is partitioned on ${partField}. ALWAYS include a date filter like WHERE ${partField} >= 'YYYY-MM-DD' to avoid expensive full scans.`;
80
+ }
81
+
82
+ return {
83
+ content: [{
84
+ type: 'text',
85
+ text: JSON.stringify({
86
+ table: `${schema.datasetId}.${schema.id}`,
87
+ type: schema.type,
88
+ location: schema.location,
89
+ numRows: schema.numRows,
90
+ sizeGB: schema.sizeGB,
91
+ lastModified: schema.lastModified,
92
+ partitioning: schema.partitioning,
93
+ clustering: schema.clustering,
94
+ description: schema.description,
95
+ schemaFormatted: schemaText,
96
+ schema: schema.schema,
97
+ cached,
98
+ queryHint
99
+ }, null, 2)
100
+ }]
101
+ };
102
+ }
@@ -0,0 +1,13 @@
1
+ // Schema discovery tools
2
+
3
+ import listDatasets from './list-datasets.js';
4
+ import listTables from './list-tables.js';
5
+ import getTableSchema from './get-table-schema.js';
6
+ import previewTable from './preview-table.js';
7
+
8
+ export default [
9
+ listDatasets,
10
+ listTables,
11
+ getTableSchema,
12
+ previewTable
13
+ ];
@@ -0,0 +1,69 @@
1
+ // List all BigQuery datasets
2
+
3
+ import { getClient } from '../../proxy-client.js';
4
+ import { getCachedDatasets, setCachedDatasets } from '../../cache.js';
5
+
6
+ export default {
7
+ name: 'bigquery__list-datasets',
8
+ description: 'List all BigQuery datasets accessible to Woolsocks. Returns dataset names, locations, and basic metadata. Use this first to discover available data. Results are cached for 5 minutes.',
9
+ inputSchema: {
10
+ type: 'object',
11
+ properties: {},
12
+ required: []
13
+ },
14
+
15
+ async handler(args) {
16
+ // Check cache first
17
+ const cached = getCachedDatasets();
18
+ if (cached) {
19
+ return {
20
+ content: [{
21
+ type: 'text',
22
+ text: JSON.stringify({
23
+ datasets: cached.datasets,
24
+ count: cached.count,
25
+ cached: true,
26
+ hint: 'Use bigquery__list-tables with a dataset_id to see tables in a dataset.'
27
+ }, null, 2)
28
+ }]
29
+ };
30
+ }
31
+
32
+ const client = await getClient();
33
+ let result;
34
+ try {
35
+ result = await client.listDatasets();
36
+ } catch (err) {
37
+ const isAuthError = err.status === 401 || err.status === 403 ||
38
+ err.message?.toLowerCase().includes('permission') ||
39
+ err.message?.toLowerCase().includes('unauthorized');
40
+ return {
41
+ content: [{
42
+ type: 'text',
43
+ text: JSON.stringify({
44
+ error: err.message,
45
+ hint: isAuthError
46
+ ? 'You do not have BigQuery access. Ask your infra manager to grant access by creating a ticket in the Jira PERM project: https://woolsocks.atlassian.net/jira/software/projects/PERM/boards — request "BigQuery read access for Claude Code".'
47
+ : 'Failed to list datasets. Check that the BigQuery proxy is reachable.'
48
+ }, null, 2)
49
+ }],
50
+ isError: true
51
+ };
52
+ }
53
+
54
+ // Cache the result
55
+ setCachedDatasets(result);
56
+
57
+ return {
58
+ content: [{
59
+ type: 'text',
60
+ text: JSON.stringify({
61
+ datasets: result.datasets,
62
+ count: result.count,
63
+ cached: false,
64
+ hint: 'Use bigquery__list-tables with a dataset_id to see tables in a dataset.'
65
+ }, null, 2)
66
+ }]
67
+ };
68
+ }
69
+ };
@@ -0,0 +1,72 @@
1
+ // List tables in a BigQuery dataset
2
+
3
+ import { getClient } from '../../proxy-client.js';
4
+ import { getCachedTables, setCachedTables } from '../../cache.js';
5
+
6
+ export default {
7
+ name: 'bigquery__list-tables',
8
+ description: 'List all tables in a BigQuery dataset with size (GB), row count, and type (TABLE/VIEW). Tables are sorted by size descending. Results are cached for 5 minutes.',
9
+ inputSchema: {
10
+ type: 'object',
11
+ properties: {
12
+ dataset_id: {
13
+ type: 'string',
14
+ description: 'The dataset ID to list tables from (e.g., "analytics", "raw_data")'
15
+ }
16
+ },
17
+ required: ['dataset_id']
18
+ },
19
+
20
+ async handler(args) {
21
+ const { dataset_id } = args;
22
+
23
+ if (!dataset_id) {
24
+ return {
25
+ content: [{
26
+ type: 'text',
27
+ text: JSON.stringify({
28
+ error: 'dataset_id is required',
29
+ hint: 'Use bigquery__list-datasets first to see available datasets'
30
+ }, null, 2)
31
+ }],
32
+ isError: true
33
+ };
34
+ }
35
+
36
+ // Check cache first
37
+ const cached = getCachedTables(dataset_id);
38
+ if (cached) {
39
+ return {
40
+ content: [{
41
+ type: 'text',
42
+ text: JSON.stringify({
43
+ datasetId: cached.datasetId,
44
+ tables: cached.tables,
45
+ count: cached.count,
46
+ cached: true,
47
+ hint: 'Use bigquery__get-table-schema to see column details for a specific table.'
48
+ }, null, 2)
49
+ }]
50
+ };
51
+ }
52
+
53
+ const client = await getClient();
54
+ const result = await client.listTables(dataset_id);
55
+
56
+ // Cache the result
57
+ setCachedTables(dataset_id, result);
58
+
59
+ return {
60
+ content: [{
61
+ type: 'text',
62
+ text: JSON.stringify({
63
+ datasetId: result.datasetId,
64
+ tables: result.tables,
65
+ count: result.count,
66
+ cached: false,
67
+ hint: 'Use bigquery__get-table-schema to see column details for a specific table.'
68
+ }, null, 2)
69
+ }]
70
+ };
71
+ }
72
+ };
@@ -0,0 +1,65 @@
1
+ // Preview sample rows from a table
2
+
3
+ import { getClient } from '../../proxy-client.js';
4
+ import { DEFAULT_PREVIEW_ROWS, MAX_PREVIEW_ROWS } from '../../config.js';
5
+
6
+ export default {
7
+ name: 'bigquery__preview-table',
8
+ description: `Get sample rows from a BigQuery table to understand data format. Limited to ${MAX_PREVIEW_ROWS} rows max. Only works on EU-region tables. Results are NOT cached.`,
9
+ inputSchema: {
10
+ type: 'object',
11
+ properties: {
12
+ dataset_id: {
13
+ type: 'string',
14
+ description: 'The dataset ID containing the table'
15
+ },
16
+ table_id: {
17
+ type: 'string',
18
+ description: 'The table ID to preview'
19
+ },
20
+ max_rows: {
21
+ type: 'number',
22
+ description: `Number of rows to preview (default: ${DEFAULT_PREVIEW_ROWS}, max: ${MAX_PREVIEW_ROWS})`,
23
+ minimum: 1,
24
+ maximum: MAX_PREVIEW_ROWS
25
+ }
26
+ },
27
+ required: ['dataset_id', 'table_id']
28
+ },
29
+
30
+ async handler(args) {
31
+ const { dataset_id, table_id, max_rows } = args;
32
+
33
+ if (!dataset_id || !table_id) {
34
+ return {
35
+ content: [{
36
+ type: 'text',
37
+ text: JSON.stringify({
38
+ error: 'Both dataset_id and table_id are required',
39
+ hint: 'Use bigquery__list-tables to see available tables'
40
+ }, null, 2)
41
+ }],
42
+ isError: true
43
+ };
44
+ }
45
+
46
+ const client = await getClient();
47
+ const maxRows = Math.min(max_rows || DEFAULT_PREVIEW_ROWS, MAX_PREVIEW_ROWS);
48
+ const preview = await client.previewTable(dataset_id, table_id, maxRows);
49
+
50
+ return {
51
+ content: [{
52
+ type: 'text',
53
+ text: JSON.stringify({
54
+ table: `${preview.datasetId}.${preview.tableId}`,
55
+ location: preview.location,
56
+ rowCount: preview.rowCount,
57
+ maxRowsRequested: preview.maxRowsRequested,
58
+ totalRowsInTable: preview.totalRowsInTable,
59
+ rows: preview.rows,
60
+ hint: 'This is a sample of actual data. Use bigquery__execute-query for filtered/aggregated queries.'
61
+ }, null, 2)
62
+ }]
63
+ };
64
+ }
65
+ };