gcs-google-mcp-server 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -11
- package/build/healthcheck.js +37 -0
- package/build/index.js +5 -12
- package/package.json +1 -1
- package/shared/gcs-client/gcs-client.d.ts +8 -0
- package/shared/gcs-client/gcs-client.integration-mock.js +12 -0
- package/shared/gcs-client/gcs-client.js +8 -0
- package/shared/index.d.ts +1 -1
- package/shared/tools/download-object.d.ts +52 -0
- package/shared/tools/download-object.js +82 -0
- package/shared/tools/download-prefix.d.ts +67 -0
- package/shared/tools/download-prefix.js +169 -0
- package/shared/tools.js +4 -0
package/README.md
CHANGED
|
@@ -20,17 +20,41 @@ MCP server for Google Cloud Storage operations with fine-grained tool access con
|
|
|
20
20
|
|
|
21
21
|
### Tools
|
|
22
22
|
|
|
23
|
-
| Tool
|
|
24
|
-
|
|
|
25
|
-
| `list_buckets`
|
|
26
|
-
| `list_objects`
|
|
27
|
-
| `get_object`
|
|
28
|
-
| `
|
|
29
|
-
| `
|
|
30
|
-
| `
|
|
31
|
-
| `
|
|
32
|
-
| `
|
|
33
|
-
| `
|
|
23
|
+
| Tool | Group | Description |
|
|
24
|
+
| ----------------- | --------- | ---------------------------------------------------------------- |
|
|
25
|
+
| `list_buckets` | readonly | List all GCS buckets in the Google Cloud project |
|
|
26
|
+
| `list_objects` | readonly | List objects in a bucket with prefix and pagination |
|
|
27
|
+
| `get_object` | readonly | Get object contents as text |
|
|
28
|
+
| `download_object` | readonly | Download a single object to a local file (binary-safe) |
|
|
29
|
+
| `download_prefix` | readonly | Recursively download a prefix to a local directory (binary-safe) |
|
|
30
|
+
| `head_bucket` | readonly | Check if a bucket exists and is accessible |
|
|
31
|
+
| `put_object` | readwrite | Upload or update an object |
|
|
32
|
+
| `copy_object` | readwrite | Copy an object within or across buckets |
|
|
33
|
+
| `create_bucket` | readwrite | Create a new GCS bucket |
|
|
34
|
+
| `delete_object` | delete | Delete an object from a bucket |
|
|
35
|
+
| `delete_bucket` | delete | Delete an empty GCS bucket |
|
|
36
|
+
|
|
37
|
+
### Downloading to Local Disk
|
|
38
|
+
|
|
39
|
+
`get_object` returns an object's contents inline as UTF-8 text, which is lossy for binary data and impractical for large files or many files at once. For local data-wrangling, use the download tools instead — both stream **raw bytes** to disk and belong to the `readonly` toolgroup:
|
|
40
|
+
|
|
41
|
+
- **`download_object`** — download a single object to a local file path. Defaults to a unique file under the OS temp directory.
|
|
42
|
+
- **`download_prefix`** — recursively download every object under a prefix to a local directory, preserving the key path structure as subdirectories. Paginates through the full listing, skips directory-placeholder objects (keys ending in `/`), collects per-object errors without aborting the batch, and returns a manifest:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"destinationDir": "/tmp/gcs-download-my-bucket-1700000000000",
|
|
47
|
+
"objectCount": 1234,
|
|
48
|
+
"totalBytes": 5678901,
|
|
49
|
+
"files": [
|
|
50
|
+
{ "key": "logs/2024/01/data.json", "localPath": "/tmp/.../01/data.json", "size": 1234 }
|
|
51
|
+
],
|
|
52
|
+
"filesTruncated": true,
|
|
53
|
+
"errors": []
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
The inline `files` list is capped (`maxInlineEntries`, default 100), but `objectCount` and `totalBytes` always reflect the full download. When `destinationDir` is omitted it defaults to a unique folder under the OS temp directory.
|
|
34
58
|
|
|
35
59
|
### Resources
|
|
36
60
|
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { logInfo } from '../shared/logging.js';
|
|
2
|
+
/**
|
|
3
|
+
* Thrown when a constrained bucket cannot be reached (does not exist or the
|
|
4
|
+
* service account lacks access to it).
|
|
5
|
+
*/
|
|
6
|
+
export class BucketNotAccessibleError extends Error {
|
|
7
|
+
constructor(bucket) {
|
|
8
|
+
super(`Constrained bucket "${bucket}" does not exist or is not accessible`);
|
|
9
|
+
this.name = 'BucketNotAccessibleError';
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Validate GCS credentials and connectivity.
|
|
14
|
+
*
|
|
15
|
+
* - When constrained to a single bucket (`GCS_BUCKET`), probe ONLY that bucket
|
|
16
|
+
* via `headBucket`, which needs only bucket-level permissions
|
|
17
|
+
* (`storage.buckets.get`). This path must NOT call `listBuckets()`, because
|
|
18
|
+
* that requires the project-level `storage.buckets.list` permission, which a
|
|
19
|
+
* least-privilege, bucket-scoped service account intentionally does not have.
|
|
20
|
+
* - Without a constraint, validate by listing buckets, which legitimately
|
|
21
|
+
* requires project-level access.
|
|
22
|
+
*
|
|
23
|
+
* Throws on failure; callers decide how to surface it.
|
|
24
|
+
*/
|
|
25
|
+
export async function validateGcsCredentials(client, constrainedBucket) {
|
|
26
|
+
if (constrainedBucket) {
|
|
27
|
+
const bucketExists = await client.headBucket(constrainedBucket);
|
|
28
|
+
if (!bucketExists) {
|
|
29
|
+
throw new BucketNotAccessibleError(constrainedBucket);
|
|
30
|
+
}
|
|
31
|
+
logInfo('healthcheck', `Constrained bucket "${constrainedBucket}" verified`);
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
await client.listBuckets();
|
|
35
|
+
logInfo('healthcheck', 'GCS credentials validated successfully');
|
|
36
|
+
}
|
|
37
|
+
}
|
package/build/index.js
CHANGED
|
@@ -5,6 +5,7 @@ import { fileURLToPath } from 'url';
|
|
|
5
5
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
6
6
|
import { createMCPServer, GoogleCloudStorageClient } from '../shared/index.js';
|
|
7
7
|
import { logServerStart, logError, logWarning, logInfo } from '../shared/logging.js';
|
|
8
|
+
import { validateGcsCredentials } from './healthcheck.js';
|
|
8
9
|
// Read version from package.json
|
|
9
10
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
10
11
|
const packageJsonPath = join(__dirname, '..', 'package.json');
|
|
@@ -108,18 +109,10 @@ async function performHealthChecks() {
|
|
|
108
109
|
keyFilePath,
|
|
109
110
|
keyFileContents,
|
|
110
111
|
});
|
|
111
|
-
//
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if (constrainedBucket) {
|
|
116
|
-
const bucketExists = await client.headBucket(constrainedBucket);
|
|
117
|
-
if (!bucketExists) {
|
|
118
|
-
logError('healthcheck', `Constrained bucket "${constrainedBucket}" does not exist or is not accessible`);
|
|
119
|
-
process.exit(1);
|
|
120
|
-
}
|
|
121
|
-
logInfo('healthcheck', `Constrained bucket "${constrainedBucket}" verified`);
|
|
122
|
-
}
|
|
112
|
+
// Validate credentials. When constrained to a single bucket, this probes
|
|
113
|
+
// only that bucket and never calls listBuckets(), so a least-privilege,
|
|
114
|
+
// bucket-scoped service account (without storage.buckets.list) can start.
|
|
115
|
+
await validateGcsCredentials(client, constrainedBucket);
|
|
123
116
|
}
|
|
124
117
|
catch (error) {
|
|
125
118
|
const message = error instanceof Error ? error.message : String(error);
|
package/package.json
CHANGED
|
@@ -35,6 +35,12 @@ export interface GetObjectResult {
|
|
|
35
35
|
etag?: string;
|
|
36
36
|
metadata?: Record<string, string>;
|
|
37
37
|
}
|
|
38
|
+
export interface GetObjectBytesResult {
|
|
39
|
+
/** Raw object bytes. Unlike getObject, this is NOT decoded to UTF-8, so it is binary-safe. */
|
|
40
|
+
content: Buffer;
|
|
41
|
+
contentType?: string;
|
|
42
|
+
contentLength?: number;
|
|
43
|
+
}
|
|
38
44
|
export interface PutObjectOptions {
|
|
39
45
|
contentType?: string;
|
|
40
46
|
metadata?: Record<string, string>;
|
|
@@ -51,6 +57,7 @@ export interface IGCSClient {
|
|
|
51
57
|
listBuckets(): Promise<ListBucketsResult>;
|
|
52
58
|
listObjects(bucket: string, options?: ListObjectsOptions): Promise<ListObjectsResult>;
|
|
53
59
|
getObject(bucket: string, key: string): Promise<GetObjectResult>;
|
|
60
|
+
getObjectBytes(bucket: string, key: string): Promise<GetObjectBytesResult>;
|
|
54
61
|
putObject(bucket: string, key: string, content: string, options?: PutObjectOptions): Promise<PutObjectResult>;
|
|
55
62
|
deleteObject(bucket: string, key: string): Promise<void>;
|
|
56
63
|
createBucket(bucket: string, location?: string): Promise<void>;
|
|
@@ -64,6 +71,7 @@ export declare class GoogleCloudStorageClient implements IGCSClient {
|
|
|
64
71
|
listBuckets(): Promise<ListBucketsResult>;
|
|
65
72
|
listObjects(bucket: string, options?: ListObjectsOptions): Promise<ListObjectsResult>;
|
|
66
73
|
getObject(bucket: string, key: string): Promise<GetObjectResult>;
|
|
74
|
+
getObjectBytes(bucket: string, key: string): Promise<GetObjectBytesResult>;
|
|
67
75
|
putObject(bucket: string, key: string, content: string, options?: PutObjectOptions): Promise<PutObjectResult>;
|
|
68
76
|
deleteObject(bucket: string, key: string): Promise<void>;
|
|
69
77
|
createBucket(bucket: string, location?: string): Promise<void>;
|
|
@@ -65,6 +65,18 @@ export function createIntegrationMockGCSClient(mockData = {}) {
|
|
|
65
65
|
metadata: obj.metadata || {},
|
|
66
66
|
};
|
|
67
67
|
},
|
|
68
|
+
async getObjectBytes(bucket, key) {
|
|
69
|
+
const bucketObjects = objects?.[bucket];
|
|
70
|
+
if (!bucketObjects || !bucketObjects[key]) {
|
|
71
|
+
throw new Error(`Object not found: ${bucket}/${key}`);
|
|
72
|
+
}
|
|
73
|
+
const content = Buffer.from(bucketObjects[key].content);
|
|
74
|
+
return {
|
|
75
|
+
content,
|
|
76
|
+
contentType: bucketObjects[key].contentType || 'text/plain',
|
|
77
|
+
contentLength: content.length,
|
|
78
|
+
};
|
|
79
|
+
},
|
|
68
80
|
async putObject(bucket, key, content, putOptions = {}) {
|
|
69
81
|
if (!objects) {
|
|
70
82
|
throw new Error('Mock data not initialized');
|
|
@@ -66,6 +66,14 @@ export class GoogleCloudStorageClient {
|
|
|
66
66
|
metadata: metadata.metadata || {},
|
|
67
67
|
};
|
|
68
68
|
}
|
|
69
|
+
async getObjectBytes(bucket, key) {
|
|
70
|
+
const file = this.storage.bucket(bucket).file(key);
|
|
71
|
+
const [content] = await file.download();
|
|
72
|
+
return {
|
|
73
|
+
content,
|
|
74
|
+
contentLength: content.length,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
69
77
|
async putObject(bucket, key, content, options = {}) {
|
|
70
78
|
const file = this.storage.bucket(bucket).file(key);
|
|
71
79
|
await file.save(content, {
|
package/shared/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export { registerResources } from './resources.js';
|
|
2
2
|
export { createRegisterTools, type ToolGroup, parseEnabledToolGroups, parseToolFilters, getAllToolNames, } from './tools.js';
|
|
3
3
|
export { createMCPServer, type CreateMCPServerOptions, type GCSClientFactory, type IGCSClient, type GCSClientConfig, GoogleCloudStorageClient, } from './server.js';
|
|
4
|
-
export { type ListBucketsResult, type ListObjectsOptions, type ListObjectsResult, type GetObjectResult, type PutObjectOptions, type PutObjectResult, type CopyObjectResult, } from './gcs-client/gcs-client.js';
|
|
4
|
+
export { type ListBucketsResult, type ListObjectsOptions, type ListObjectsResult, type GetObjectResult, type GetObjectBytesResult, type PutObjectOptions, type PutObjectResult, type CopyObjectResult, } from './gcs-client/gcs-client.js';
|
|
5
5
|
export { createIntegrationMockGCSClient, type MockGCSData, } from './gcs-client/gcs-client.integration-mock.js';
|
|
6
6
|
export { getSelectedResourceId, hasSelectedResource, isResourceLocked, getServerState, setSelectedResourceId, clearSelectedResource, initializeStateFromEnvironment, resetState, } from './state.js';
|
|
7
7
|
export { logServerStart, logError, logWarning, logInfo, logDebug } from './logging.js';
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
3
|
+
import type { GCSClientFactory } from '../server.js';
|
|
4
|
+
export declare const DownloadObjectSchema: z.ZodObject<{
|
|
5
|
+
bucket: z.ZodString;
|
|
6
|
+
key: z.ZodString;
|
|
7
|
+
destinationPath: z.ZodOptional<z.ZodString>;
|
|
8
|
+
}, "strip", z.ZodTypeAny, {
|
|
9
|
+
bucket: string;
|
|
10
|
+
key: string;
|
|
11
|
+
destinationPath?: string | undefined;
|
|
12
|
+
}, {
|
|
13
|
+
bucket: string;
|
|
14
|
+
key: string;
|
|
15
|
+
destinationPath?: string | undefined;
|
|
16
|
+
}>;
|
|
17
|
+
export declare function downloadObjectTool(_server: Server, clientFactory: GCSClientFactory): {
|
|
18
|
+
name: string;
|
|
19
|
+
description: string;
|
|
20
|
+
inputSchema: {
|
|
21
|
+
type: "object";
|
|
22
|
+
properties: {
|
|
23
|
+
bucket: {
|
|
24
|
+
type: string;
|
|
25
|
+
description: "The name of the GCS bucket (e.g., \"my-app-data\")";
|
|
26
|
+
};
|
|
27
|
+
key: {
|
|
28
|
+
type: string;
|
|
29
|
+
description: "The object key (path) within the bucket (e.g., \"exports/archive.tar.gz\")";
|
|
30
|
+
};
|
|
31
|
+
destinationPath: {
|
|
32
|
+
type: string;
|
|
33
|
+
description: "Absolute or relative local file path to write the object to. Defaults to a unique file under the OS temp directory, preserving the key path (e.g., \"/tmp/gcs-download-<bucket>-<timestamp>/<key>\").";
|
|
34
|
+
};
|
|
35
|
+
};
|
|
36
|
+
required: string[];
|
|
37
|
+
};
|
|
38
|
+
handler: (args: unknown) => Promise<{
|
|
39
|
+
content: {
|
|
40
|
+
type: string;
|
|
41
|
+
text: string;
|
|
42
|
+
}[];
|
|
43
|
+
isError?: undefined;
|
|
44
|
+
} | {
|
|
45
|
+
content: {
|
|
46
|
+
type: string;
|
|
47
|
+
text: string;
|
|
48
|
+
}[];
|
|
49
|
+
isError: boolean;
|
|
50
|
+
}>;
|
|
51
|
+
};
|
|
52
|
+
//# sourceMappingURL=download-object.d.ts.map
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { mkdir, writeFile } from 'fs/promises';
|
|
3
|
+
import os from 'os';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
const PARAM_DESCRIPTIONS = {
|
|
6
|
+
bucket: 'The name of the GCS bucket (e.g., "my-app-data")',
|
|
7
|
+
key: 'The object key (path) within the bucket (e.g., "exports/archive.tar.gz")',
|
|
8
|
+
destinationPath: 'Absolute or relative local file path to write the object to. Defaults to a unique file under the OS temp directory, preserving the key path (e.g., "/tmp/gcs-download-<bucket>-<timestamp>/<key>").',
|
|
9
|
+
};
|
|
10
|
+
export const DownloadObjectSchema = z.object({
|
|
11
|
+
bucket: z.string().min(1).describe(PARAM_DESCRIPTIONS.bucket),
|
|
12
|
+
key: z.string().min(1).describe(PARAM_DESCRIPTIONS.key),
|
|
13
|
+
destinationPath: z.string().min(1).optional().describe(PARAM_DESCRIPTIONS.destinationPath),
|
|
14
|
+
});
|
|
15
|
+
export function downloadObjectTool(_server, clientFactory) {
|
|
16
|
+
return {
|
|
17
|
+
name: 'download_object',
|
|
18
|
+
description: `Download a single object to a local file path, writing raw bytes (binary-safe).
|
|
19
|
+
|
|
20
|
+
Unlike get_object (which returns content inline as UTF-8 text and is lossy for binary or large files), this tool streams the object to disk and returns the local path.
|
|
21
|
+
|
|
22
|
+
Example response:
|
|
23
|
+
{
|
|
24
|
+
"bucket": "my-bucket",
|
|
25
|
+
"key": "exports/archive.tar.gz",
|
|
26
|
+
"localPath": "/tmp/gcs-download-my-bucket-1700000000000/exports/archive.tar.gz",
|
|
27
|
+
"size": 1048576
|
|
28
|
+
}`,
|
|
29
|
+
inputSchema: {
|
|
30
|
+
type: 'object',
|
|
31
|
+
properties: {
|
|
32
|
+
bucket: { type: 'string', description: PARAM_DESCRIPTIONS.bucket },
|
|
33
|
+
key: { type: 'string', description: PARAM_DESCRIPTIONS.key },
|
|
34
|
+
destinationPath: { type: 'string', description: PARAM_DESCRIPTIONS.destinationPath },
|
|
35
|
+
},
|
|
36
|
+
required: ['bucket', 'key'],
|
|
37
|
+
},
|
|
38
|
+
handler: async (args) => {
|
|
39
|
+
try {
|
|
40
|
+
const validated = DownloadObjectSchema.parse(args);
|
|
41
|
+
const client = clientFactory();
|
|
42
|
+
let localPath;
|
|
43
|
+
if (validated.destinationPath) {
|
|
44
|
+
// The caller chose an explicit path; writing there is their intent.
|
|
45
|
+
localPath = path.resolve(validated.destinationPath);
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
// The destination is derived from the (untrusted) object key, so confine
|
|
49
|
+
// the resolved path to the temp root to prevent path-traversal escapes.
|
|
50
|
+
const root = path.resolve(os.tmpdir(), `gcs-download-${validated.bucket}-${Date.now()}`);
|
|
51
|
+
localPath = path.resolve(root, validated.key);
|
|
52
|
+
if (localPath !== root && !localPath.startsWith(root + path.sep)) {
|
|
53
|
+
throw new Error(`Object key "${validated.key}" resolves outside the destination directory`);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
const { content } = await client.getObjectBytes(validated.bucket, validated.key);
|
|
57
|
+
await mkdir(path.dirname(localPath), { recursive: true });
|
|
58
|
+
await writeFile(localPath, content);
|
|
59
|
+
return {
|
|
60
|
+
content: [
|
|
61
|
+
{
|
|
62
|
+
type: 'text',
|
|
63
|
+
text: JSON.stringify({
|
|
64
|
+
bucket: validated.bucket,
|
|
65
|
+
key: validated.key,
|
|
66
|
+
localPath,
|
|
67
|
+
size: content.length,
|
|
68
|
+
}, null, 2),
|
|
69
|
+
},
|
|
70
|
+
],
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
75
|
+
return {
|
|
76
|
+
content: [{ type: 'text', text: `Error downloading object: ${message}` }],
|
|
77
|
+
isError: true,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
3
|
+
import type { GCSClientFactory } from '../server.js';
|
|
4
|
+
export declare const DownloadPrefixSchema: z.ZodObject<{
|
|
5
|
+
bucket: z.ZodString;
|
|
6
|
+
prefix: z.ZodString;
|
|
7
|
+
destinationDir: z.ZodOptional<z.ZodString>;
|
|
8
|
+
maxInlineEntries: z.ZodOptional<z.ZodNumber>;
|
|
9
|
+
}, "strip", z.ZodTypeAny, {
|
|
10
|
+
prefix: string;
|
|
11
|
+
bucket: string;
|
|
12
|
+
destinationDir?: string | undefined;
|
|
13
|
+
maxInlineEntries?: number | undefined;
|
|
14
|
+
}, {
|
|
15
|
+
prefix: string;
|
|
16
|
+
bucket: string;
|
|
17
|
+
destinationDir?: string | undefined;
|
|
18
|
+
maxInlineEntries?: number | undefined;
|
|
19
|
+
}>;
|
|
20
|
+
/**
|
|
21
|
+
* Compute the path of an object key relative to the downloaded prefix, so the
|
|
22
|
+
* directory structure under the prefix is preserved on disk.
|
|
23
|
+
*
|
|
24
|
+
* Example: prefix="logs/2024/", key="logs/2024/01/data.json" -> "01/data.json"
|
|
25
|
+
*/
|
|
26
|
+
export declare function relativeKeyForPrefix(key: string, prefix: string): string;
|
|
27
|
+
export declare function downloadPrefixTool(_server: Server, clientFactory: GCSClientFactory): {
|
|
28
|
+
name: string;
|
|
29
|
+
description: string;
|
|
30
|
+
inputSchema: {
|
|
31
|
+
type: "object";
|
|
32
|
+
properties: {
|
|
33
|
+
bucket: {
|
|
34
|
+
type: string;
|
|
35
|
+
description: "The name of the GCS bucket (e.g., \"my-app-data\")";
|
|
36
|
+
};
|
|
37
|
+
prefix: {
|
|
38
|
+
type: string;
|
|
39
|
+
description: "The key prefix to download recursively (e.g., \"logs/2024/\"). All objects under this prefix are downloaded. Use an empty string to download the entire bucket.";
|
|
40
|
+
};
|
|
41
|
+
destinationDir: {
|
|
42
|
+
type: string;
|
|
43
|
+
description: "Local directory to download into. Created if it does not exist. Defaults to a unique folder under the OS temp directory (e.g., \"/tmp/gcs-download-<bucket>-<timestamp>\").";
|
|
44
|
+
};
|
|
45
|
+
maxInlineEntries: {
|
|
46
|
+
type: string;
|
|
47
|
+
minimum: number;
|
|
48
|
+
description: "Maximum number of file entries to include inline in the response manifest (default: 100). Counts and totals always reflect the full set regardless of this cap.";
|
|
49
|
+
};
|
|
50
|
+
};
|
|
51
|
+
required: string[];
|
|
52
|
+
};
|
|
53
|
+
handler: (args: unknown) => Promise<{
|
|
54
|
+
content: {
|
|
55
|
+
type: string;
|
|
56
|
+
text: string;
|
|
57
|
+
}[];
|
|
58
|
+
isError?: undefined;
|
|
59
|
+
} | {
|
|
60
|
+
content: {
|
|
61
|
+
type: string;
|
|
62
|
+
text: string;
|
|
63
|
+
}[];
|
|
64
|
+
isError: boolean;
|
|
65
|
+
}>;
|
|
66
|
+
};
|
|
67
|
+
//# sourceMappingURL=download-prefix.d.ts.map
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { mkdir, writeFile } from 'fs/promises';
|
|
3
|
+
import os from 'os';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
const PARAM_DESCRIPTIONS = {
|
|
6
|
+
bucket: 'The name of the GCS bucket (e.g., "my-app-data")',
|
|
7
|
+
prefix: 'The key prefix to download recursively (e.g., "logs/2024/"). All objects under this prefix are downloaded. Use an empty string to download the entire bucket.',
|
|
8
|
+
destinationDir: 'Local directory to download into. Created if it does not exist. Defaults to a unique folder under the OS temp directory (e.g., "/tmp/gcs-download-<bucket>-<timestamp>").',
|
|
9
|
+
maxInlineEntries: 'Maximum number of file entries to include inline in the response manifest (default: 100). Counts and totals always reflect the full set regardless of this cap.',
|
|
10
|
+
};
|
|
11
|
+
// Cap how many object paths are echoed back inline so that downloading thousands
|
|
12
|
+
// of objects does not produce an enormous tool response.
|
|
13
|
+
const DEFAULT_MAX_INLINE_ENTRIES = 100;
|
|
14
|
+
// Safety guard against a misbehaving paginator returning the same token forever.
|
|
15
|
+
const MAX_LIST_PAGES = 100_000;
|
|
16
|
+
export const DownloadPrefixSchema = z.object({
|
|
17
|
+
bucket: z.string().min(1).describe(PARAM_DESCRIPTIONS.bucket),
|
|
18
|
+
prefix: z.string().describe(PARAM_DESCRIPTIONS.prefix),
|
|
19
|
+
destinationDir: z.string().min(1).optional().describe(PARAM_DESCRIPTIONS.destinationDir),
|
|
20
|
+
maxInlineEntries: z
|
|
21
|
+
.number()
|
|
22
|
+
.int()
|
|
23
|
+
.min(0)
|
|
24
|
+
.optional()
|
|
25
|
+
.describe(PARAM_DESCRIPTIONS.maxInlineEntries),
|
|
26
|
+
});
|
|
27
|
+
/**
|
|
28
|
+
* Compute the path of an object key relative to the downloaded prefix, so the
|
|
29
|
+
* directory structure under the prefix is preserved on disk.
|
|
30
|
+
*
|
|
31
|
+
* Example: prefix="logs/2024/", key="logs/2024/01/data.json" -> "01/data.json"
|
|
32
|
+
*/
|
|
33
|
+
export function relativeKeyForPrefix(key, prefix) {
|
|
34
|
+
let rel = key;
|
|
35
|
+
if (prefix && key.startsWith(prefix)) {
|
|
36
|
+
rel = key.slice(prefix.length);
|
|
37
|
+
}
|
|
38
|
+
// Strip any leading slashes left over when the prefix did not end in "/".
|
|
39
|
+
rel = rel.replace(/^\/+/, '');
|
|
40
|
+
// A key identical to a prefix that points directly at an object leaves an
|
|
41
|
+
// empty relative path — fall back to the key's basename.
|
|
42
|
+
return rel === '' ? path.posix.basename(key) : rel;
|
|
43
|
+
}
|
|
44
|
+
/** A GCS key is a "directory placeholder" when it ends with a slash. */
|
|
45
|
+
function isDirectoryPlaceholder(key) {
|
|
46
|
+
return key.endsWith('/');
|
|
47
|
+
}
|
|
48
|
+
export function downloadPrefixTool(_server, clientFactory) {
|
|
49
|
+
return {
|
|
50
|
+
name: 'download_prefix',
|
|
51
|
+
description: `Recursively download every object under a prefix to a local directory, preserving the key path structure as subdirectories.
|
|
52
|
+
|
|
53
|
+
Unlike get_object (which returns a single object inline as UTF-8 text), this tool streams raw bytes to disk and is binary-safe. It is the right tool for bulk-fetching a sub-prefix so you can run local data-wrangling over the files.
|
|
54
|
+
|
|
55
|
+
Returns a manifest:
|
|
56
|
+
{
|
|
57
|
+
"destinationDir": "/tmp/gcs-download-my-bucket-1700000000000",
|
|
58
|
+
"objectCount": 1234,
|
|
59
|
+
"totalBytes": 5678901,
|
|
60
|
+
"files": [
|
|
61
|
+
{ "key": "logs/2024/01/data.json", "localPath": "/tmp/.../01/data.json", "size": 1234 }
|
|
62
|
+
],
|
|
63
|
+
"filesTruncated": true,
|
|
64
|
+
"errors": []
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
Notes:
|
|
68
|
+
- Directory placeholder objects (keys ending in "/") are skipped.
|
|
69
|
+
- Per-object download failures are collected in "errors" without aborting the whole batch.
|
|
70
|
+
- The "files" list is capped (see maxInlineEntries); "objectCount" and "totalBytes" always reflect the full download.`,
|
|
71
|
+
inputSchema: {
|
|
72
|
+
type: 'object',
|
|
73
|
+
properties: {
|
|
74
|
+
bucket: { type: 'string', description: PARAM_DESCRIPTIONS.bucket },
|
|
75
|
+
prefix: { type: 'string', description: PARAM_DESCRIPTIONS.prefix },
|
|
76
|
+
destinationDir: { type: 'string', description: PARAM_DESCRIPTIONS.destinationDir },
|
|
77
|
+
maxInlineEntries: {
|
|
78
|
+
type: 'number',
|
|
79
|
+
minimum: 0,
|
|
80
|
+
description: PARAM_DESCRIPTIONS.maxInlineEntries,
|
|
81
|
+
},
|
|
82
|
+
},
|
|
83
|
+
required: ['bucket', 'prefix'],
|
|
84
|
+
},
|
|
85
|
+
handler: async (args) => {
|
|
86
|
+
try {
|
|
87
|
+
const validated = DownloadPrefixSchema.parse(args);
|
|
88
|
+
const client = clientFactory();
|
|
89
|
+
const maxInlineEntries = validated.maxInlineEntries ?? DEFAULT_MAX_INLINE_ENTRIES;
|
|
90
|
+
const destinationDir = validated.destinationDir ??
|
|
91
|
+
path.join(os.tmpdir(), `gcs-download-${validated.bucket}-${Date.now()}`);
|
|
92
|
+
const resolvedRoot = path.resolve(destinationDir);
|
|
93
|
+
await mkdir(resolvedRoot, { recursive: true });
|
|
94
|
+
// Collect every object key under the prefix, paginating until exhausted.
|
|
95
|
+
// No delimiter is passed so the listing recurses into all sub-prefixes.
|
|
96
|
+
const keys = [];
|
|
97
|
+
let pageToken;
|
|
98
|
+
const seenTokens = new Set();
|
|
99
|
+
let pages = 0;
|
|
100
|
+
do {
|
|
101
|
+
const page = await client.listObjects(validated.bucket, {
|
|
102
|
+
prefix: validated.prefix,
|
|
103
|
+
maxResults: 1000,
|
|
104
|
+
pageToken,
|
|
105
|
+
});
|
|
106
|
+
for (const obj of page.objects) {
|
|
107
|
+
if (!isDirectoryPlaceholder(obj.key)) {
|
|
108
|
+
keys.push({ key: obj.key, size: obj.size });
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
pageToken = page.isTruncated ? page.nextPageToken : undefined;
|
|
112
|
+
if (pageToken && seenTokens.has(pageToken)) {
|
|
113
|
+
// Paginator is looping on the same token; stop to avoid an infinite loop.
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
if (pageToken)
|
|
117
|
+
seenTokens.add(pageToken);
|
|
118
|
+
} while (pageToken && ++pages < MAX_LIST_PAGES);
|
|
119
|
+
const files = [];
|
|
120
|
+
const errors = [];
|
|
121
|
+
let totalBytes = 0;
|
|
122
|
+
for (const { key } of keys) {
|
|
123
|
+
const rel = relativeKeyForPrefix(key, validated.prefix);
|
|
124
|
+
const localPath = path.resolve(resolvedRoot, rel);
|
|
125
|
+
// Defense in depth: never write outside the destination root, even if a
|
|
126
|
+
// key contains ".." segments.
|
|
127
|
+
if (localPath !== resolvedRoot && !localPath.startsWith(resolvedRoot + path.sep)) {
|
|
128
|
+
errors.push({ key, error: 'Resolved path escapes destination directory; skipped' });
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
try {
|
|
132
|
+
const { content } = await client.getObjectBytes(validated.bucket, key);
|
|
133
|
+
await mkdir(path.dirname(localPath), { recursive: true });
|
|
134
|
+
await writeFile(localPath, content);
|
|
135
|
+
totalBytes += content.length;
|
|
136
|
+
files.push({ key, localPath, size: content.length });
|
|
137
|
+
}
|
|
138
|
+
catch (error) {
|
|
139
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
140
|
+
errors.push({ key, error: message });
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
const manifest = {
|
|
144
|
+
destinationDir: resolvedRoot,
|
|
145
|
+
objectCount: files.length,
|
|
146
|
+
totalBytes,
|
|
147
|
+
files: files.slice(0, maxInlineEntries),
|
|
148
|
+
filesTruncated: files.length > maxInlineEntries,
|
|
149
|
+
errors,
|
|
150
|
+
};
|
|
151
|
+
return {
|
|
152
|
+
content: [
|
|
153
|
+
{
|
|
154
|
+
type: 'text',
|
|
155
|
+
text: JSON.stringify(manifest, null, 2),
|
|
156
|
+
},
|
|
157
|
+
],
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
162
|
+
return {
|
|
163
|
+
content: [{ type: 'text', text: `Error downloading prefix: ${message}` }],
|
|
164
|
+
isError: true,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
},
|
|
168
|
+
};
|
|
169
|
+
}
|
package/shared/tools.js
CHANGED
|
@@ -2,6 +2,8 @@ import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprot
|
|
|
2
2
|
import { listBucketsTool } from './tools/list-buckets.js';
|
|
3
3
|
import { listObjectsTool } from './tools/list-objects.js';
|
|
4
4
|
import { getObjectTool } from './tools/get-object.js';
|
|
5
|
+
import { downloadPrefixTool } from './tools/download-prefix.js';
|
|
6
|
+
import { downloadObjectTool } from './tools/download-object.js';
|
|
5
7
|
import { putObjectTool } from './tools/put-object.js';
|
|
6
8
|
import { deleteObjectTool } from './tools/delete-object.js';
|
|
7
9
|
import { createBucketTool } from './tools/create-bucket.js';
|
|
@@ -45,6 +47,8 @@ const ALL_TOOLS = [
|
|
|
45
47
|
{ factory: listBucketsTool, groups: ['readonly'], bucketLevelOnly: true },
|
|
46
48
|
{ factory: listObjectsTool, groups: ['readonly'], bucketParams: ['bucket'] },
|
|
47
49
|
{ factory: getObjectTool, groups: ['readonly'], bucketParams: ['bucket'] },
|
|
50
|
+
{ factory: downloadObjectTool, groups: ['readonly'], bucketParams: ['bucket'] },
|
|
51
|
+
{ factory: downloadPrefixTool, groups: ['readonly'], bucketParams: ['bucket'] },
|
|
48
52
|
{ factory: headBucketTool, groups: ['readonly'], bucketLevelOnly: true },
|
|
49
53
|
// Write operations (non-destructive)
|
|
50
54
|
{ factory: putObjectTool, groups: ['readwrite'], bucketParams: ['bucket'] },
|