@echofiles/echo-pdf 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,6 +88,33 @@ echo-pdf mcp tools
88
88
  echo-pdf mcp call --tool file_ops --args '{"op":"list"}'
89
89
  ```
90
90
 
91
+ ### 3.1.1 纯 MCP 场景推荐流程(本地 PDF)
92
+
93
+ 远端 MCP server 无法直接读取你本机文件路径。推荐两步:
94
+
95
+ 1. 先通过 HTTP 上传本地 PDF,拿到 `fileId`
96
+ 2. 再用 MCP 工具传 `fileId` 调用
97
+
98
+ 示例:
99
+
100
+ ```bash
101
+ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/api/files/upload \
102
+ -F 'file=@./input.pdf'
103
+
104
+ echo-pdf mcp call --tool pdf_extract_pages --args '{"fileId":"<FILE_ID>","pages":[1]}'
105
+ ```
106
+
107
+ ### 3.1.2 不上传文件的 URL ingest 流程
108
+
109
+ 如果 PDF 已经在公网可访问,直接传 `url`:
110
+
111
+ ```bash
112
+ echo-pdf mcp call --tool pdf_extract_pages --args '{
113
+ "url":"https://example.com/sample.pdf",
114
+ "pages":[1]
115
+ }'
116
+ ```
117
+
91
118
  ### 3.2 给客户端生成 MCP 配置片段
92
119
 
93
120
  ```bash
@@ -107,6 +134,12 @@ echo-pdf setup add json
107
134
  - `pdf_tables_to_latex`
108
135
  - `file_ops`
109
136
 
137
+ MCP 输出策略:
138
+
139
+ - `pdf_extract_pages` 在 MCP 下默认 `returnMode=url`(不传 `returnMode` 时生效)
140
+ - MCP `text` 会对大字段做去二进制/截断,避免把大段 base64 塞进上下文
141
+ - 二进制结果请优先使用 `resource_link` 中的下载地址
142
+
110
143
  ## 4. Web UI 使用
111
144
 
112
145
  打开:
@@ -183,6 +216,7 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
183
216
  - `agent.defaultProvider`
184
217
  - `agent.defaultModel`
185
218
  - `service.publicBaseUrl`
219
+ - `service.fileGet.cacheTtlSeconds`
186
220
  - `service.maxPdfBytes`
187
221
  - `service.storage.maxFileBytes`
188
222
  - `service.storage.maxTotalBytes`
@@ -205,6 +239,8 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
205
239
  - `ECHO_PDF_DEFAULT_PROVIDER`
206
240
  - `ECHO_PDF_DEFAULT_MODEL`
207
241
  - `ECHO_PDF_PUBLIC_BASE_URL`(可选,强制 artifacts 生成外部可访问绝对 URL)
242
+ - `ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS`(可选,`/api/files/get` 缓存秒数,`0` 表示 `no-store`)
243
+ - `ECHO_PDF_FILE_GET_AUTH_HEADER` + `ECHO_PDF_FILE_GET_AUTH_ENV`(可选,启用下载端点 header 鉴权)
208
244
  - `ECHO_PDF_MCP_KEY`(可选,启用 MCP 鉴权)
209
245
  - `ECHO_PDF_WORKER_NAME`(CLI 默认 URL 推导)
210
246
 
@@ -263,3 +299,12 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
263
299
  "arguments":{"fileId":"<FILE_ID>","pages":[1],"returnMode":"url"}
264
300
  }'
265
301
  ```
302
+
303
+ ### 8.4 错误码语义
304
+
305
+ - 客户端输入错误返回稳定 `4xx + code`,例如:
306
+ - `PAGES_REQUIRED`(400)
307
+ - `PAGE_OUT_OF_RANGE`(400)
308
+ - `MISSING_FILE_INPUT`(400)
309
+ - `FILE_NOT_FOUND`(404)
310
+ - 服务端故障返回 `5xx`。
@@ -2,6 +2,9 @@
2
2
  "service": {
3
3
  "name": "echo-pdf",
4
4
  "publicBaseUrl": "https://echo-pdf.echofilesai.workers.dev",
5
+ "fileGet": {
6
+ "cacheTtlSeconds": 300
7
+ },
5
8
  "maxPdfBytes": 10000000,
6
9
  "maxPagesPerRequest": 20,
7
10
  "defaultRenderScale": 2,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@echofiles/echo-pdf",
3
3
  "description": "MCP-first PDF agent on Cloudflare Workers with CLI and web demo.",
4
- "version": "0.3.0",
4
+ "version": "0.3.1",
5
5
  "type": "module",
6
6
  "publishConfig": {
7
7
  "access": "public"
@@ -0,0 +1,21 @@
1
+ export class HttpError extends Error {
2
+ readonly status: number
3
+ readonly code: string
4
+ readonly details?: unknown
5
+
6
+ constructor(status: number, code: string, message: string, details?: unknown) {
7
+ super(message)
8
+ this.status = status
9
+ this.code = code
10
+ this.details = details
11
+ }
12
+ }
13
+
14
+ export const badRequest = (code: string, message: string, details?: unknown): HttpError =>
15
+ new HttpError(400, code, message, details)
16
+
17
+ export const notFound = (code: string, message: string, details?: unknown): HttpError =>
18
+ new HttpError(404, code, message, details)
19
+
20
+ export const unprocessable = (code: string, message: string, details?: unknown): HttpError =>
21
+ new HttpError(422, code, message, details)
package/src/index.ts CHANGED
@@ -61,6 +61,21 @@ const asObj = (value: unknown): JsonObject =>
61
61
  const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
62
62
  typeof configured === "string" && configured.length > 0 ? configured : request.url
63
63
 
64
+ const sanitizeDownloadFilename = (filename: string): string => {
65
+ const cleaned = filename
66
+ .replace(/[\r\n"]/g, "")
67
+ .replace(/[^\x20-\x7E]+/g, "")
68
+ .trim()
69
+ return cleaned.length > 0 ? cleaned : "download.bin"
70
+ }
71
+
72
+ const isFileGetAuthorized = (request: Request, env: Env, config: { authHeader?: string; authEnv?: string }): boolean => {
73
+ if (!config.authHeader || !config.authEnv) return true
74
+ const required = env[config.authEnv]
75
+ if (typeof required !== "string" || required.length === 0) return true
76
+ return request.headers.get(config.authHeader) === required
77
+ }
78
+
64
79
  const sseResponse = (stream: ReadableStream<Uint8Array>): Response =>
65
80
  new Response(stream, {
66
81
  headers: {
@@ -147,6 +162,10 @@ export default {
147
162
  version: config.mcp.version,
148
163
  authHeader: config.mcp.authHeader ?? null,
149
164
  },
165
+ fileGet: {
166
+ authHeader: config.service.fileGet?.authHeader ?? null,
167
+ cacheTtlSeconds: config.service.fileGet?.cacheTtlSeconds ?? 300,
168
+ },
150
169
  })
151
170
  }
152
171
 
@@ -304,6 +323,10 @@ export default {
304
323
  }
305
324
 
306
325
  if (request.method === "GET" && url.pathname === "/api/files/get") {
326
+ const fileGetConfig = config.service.fileGet ?? {}
327
+ if (!isFileGetAuthorized(request, env, fileGetConfig)) {
328
+ return json({ error: "Unauthorized", code: "UNAUTHORIZED" }, 401)
329
+ }
307
330
  const fileId = url.searchParams.get("fileId") || ""
308
331
  if (!fileId) return json({ error: "Missing fileId" }, 400)
309
332
  const file = await fileStore.get(fileId)
@@ -311,9 +334,13 @@ export default {
311
334
  const download = url.searchParams.get("download") === "1"
312
335
  const headers = new Headers()
313
336
  headers.set("Content-Type", file.mimeType)
314
- headers.set("Cache-Control", "no-store")
337
+ const cacheTtl = Number(fileGetConfig.cacheTtlSeconds ?? 300)
338
+ const cacheControl = cacheTtl > 0
339
+ ? `public, max-age=${Math.floor(cacheTtl)}, s-maxage=${Math.floor(cacheTtl)}`
340
+ : "no-store"
341
+ headers.set("Cache-Control", cacheControl)
315
342
  if (download) {
316
- headers.set("Content-Disposition", `attachment; filename=\"${file.filename.replace(/\"/g, "")}\"`)
343
+ headers.set("Content-Disposition", `attachment; filename=\"${sanitizeDownloadFilename(file.filename)}\"`)
317
344
  }
318
345
  return new Response(file.bytes, { status: 200, headers })
319
346
  }
package/src/mcp-server.ts CHANGED
@@ -48,6 +48,16 @@ const maybeAuthorized = (request: Request, env: Env, config: EchoPdfConfig): boo
48
48
  const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
49
49
  typeof configured === "string" && configured.length > 0 ? configured : request.url
50
50
 
51
+ const prepareMcpToolArgs = (toolName: string, args: Record<string, unknown>): Record<string, unknown> => {
52
+ if (toolName === "pdf_extract_pages") {
53
+ const mode = typeof args.returnMode === "string" ? args.returnMode : ""
54
+ if (!mode) {
55
+ return { ...args, returnMode: "url" }
56
+ }
57
+ }
58
+ return args
59
+ }
60
+
51
61
  export const handleMcpRequest = async (
52
62
  request: Request,
53
63
  env: Env,
@@ -103,7 +113,7 @@ export const handleMcpRequest = async (
103
113
  }
104
114
 
105
115
  const toolName = typeof params.name === "string" ? params.name : ""
106
- const args = asObj(params.arguments)
116
+ const args = prepareMcpToolArgs(toolName, asObj(params.arguments))
107
117
  if (!toolName) {
108
118
  return err(id, -32602, "Invalid params: name is required", {
109
119
  code: "INVALID_PARAMS",
package/src/pdf-agent.ts CHANGED
@@ -2,6 +2,7 @@ import type { Env, FileStore, ReturnMode } from "./types"
2
2
  import type { AgentTraceEvent, EchoPdfConfig, PdfOperationRequest } from "./pdf-types"
3
3
  import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults"
4
4
  import { fromBase64, normalizeReturnMode, toDataUrl } from "./file-utils"
5
+ import { badRequest, notFound, unprocessable } from "./http-error"
5
6
  import { extractPdfPageText, getPdfPageCount, renderPdfPageToPng, toBytes } from "./pdfium-engine"
6
7
  import { visionRecognize } from "./provider-client"
7
8
 
@@ -22,11 +23,20 @@ const traceStep = (
22
23
  }
23
24
 
24
25
  const ensurePages = (pages: ReadonlyArray<number>, pageCount: number, maxPages: number): number[] => {
25
- if (pages.length === 0) throw new Error("At least one page is required")
26
- if (pages.length > maxPages) throw new Error(`Page count exceeds maxPagesPerRequest (${maxPages})`)
26
+ if (pages.length === 0) throw badRequest("PAGES_REQUIRED", "At least one page is required")
27
+ if (pages.length > maxPages) {
28
+ throw badRequest("TOO_MANY_PAGES", `Page count exceeds maxPagesPerRequest (${maxPages})`, {
29
+ maxPagesPerRequest: maxPages,
30
+ providedPages: pages.length,
31
+ })
32
+ }
27
33
  for (const page of pages) {
28
34
  if (!Number.isInteger(page) || page < 1 || page > pageCount) {
29
- throw new Error(`Page ${page} out of range 1..${pageCount}`)
35
+ throw badRequest("PAGE_OUT_OF_RANGE", `Page ${page} out of range 1..${pageCount}`, {
36
+ page,
37
+ min: 1,
38
+ max: pageCount,
39
+ })
30
40
  }
31
41
  }
32
42
  return [...new Set(pages)].sort((a, b) => a - b)
@@ -45,7 +55,7 @@ export const ingestPdfFromPayload = async (
45
55
  if (input.fileId) {
46
56
  const existing = await opts.fileStore.get(input.fileId)
47
57
  if (!existing) {
48
- throw new Error(`File not found: ${input.fileId}`)
58
+ throw notFound("FILE_NOT_FOUND", `File not found: ${input.fileId}`, { fileId: input.fileId })
49
59
  }
50
60
  return {
51
61
  id: existing.id,
@@ -59,7 +69,11 @@ export const ingestPdfFromPayload = async (
59
69
 
60
70
  if (input.url) {
61
71
  traceStep(opts, "start", "file.fetch.url", { url: input.url })
62
- bytes = await toBytes(input.url)
72
+ try {
73
+ bytes = await toBytes(input.url)
74
+ } catch (error) {
75
+ throw badRequest("URL_FETCH_FAILED", `Unable to fetch PDF from url: ${error instanceof Error ? error.message : String(error)}`)
76
+ }
63
77
  try {
64
78
  const u = new URL(input.url)
65
79
  filename = decodeURIComponent(u.pathname.split("/").pop() || filename)
@@ -74,10 +88,13 @@ export const ingestPdfFromPayload = async (
74
88
  }
75
89
 
76
90
  if (!bytes) {
77
- throw new Error("Missing file input. Provide fileId, url or base64")
91
+ throw badRequest("MISSING_FILE_INPUT", "Missing file input. Provide fileId, url or base64")
78
92
  }
79
93
  if (bytes.byteLength > config.service.maxPdfBytes) {
80
- throw new Error(`PDF exceeds max size (${config.service.maxPdfBytes} bytes)`)
94
+ throw badRequest("PDF_TOO_LARGE", `PDF exceeds max size (${config.service.maxPdfBytes} bytes)`, {
95
+ maxPdfBytes: config.service.maxPdfBytes,
96
+ sizeBytes: bytes.byteLength,
97
+ })
81
98
  }
82
99
 
83
100
  const meta = await opts.fileStore.put({
@@ -164,7 +181,7 @@ export const runPdfAgent = async (
164
181
  const providerAlias = resolveProviderAlias(config, request.provider)
165
182
  const model = resolveModelForProvider(config, providerAlias, request.model)
166
183
  if (!model) {
167
- throw new Error("model is required for OCR or table extraction; set agent.defaultModel")
184
+ throw badRequest("MODEL_REQUIRED", "model is required for OCR or table extraction; set agent.defaultModel")
168
185
  }
169
186
 
170
187
  if (request.operation === "ocr_pages") {
@@ -216,7 +233,9 @@ export const runPdfAgent = async (
216
233
  })
217
234
  const latex = extractTabularLatex(rawLatex)
218
235
  if (!latex) {
219
- throw new Error(`table extraction did not return valid LaTeX tabular for page ${page}`)
236
+ throw unprocessable("TABLE_LATEX_MISSING", `table extraction did not return valid LaTeX tabular for page ${page}`, {
237
+ page,
238
+ })
220
239
  }
221
240
  tables.push({ page, latex })
222
241
  traceStep(opts, "end", "table.page", { page, chars: latex.length })
package/src/pdf-config.ts CHANGED
@@ -37,6 +37,9 @@ const validateConfig = (config: EchoPdfConfig): EchoPdfConfig => {
37
37
  ) {
38
38
  throw new Error("service.publicBaseUrl must start with http:// or https://")
39
39
  }
40
+ if (typeof config.service.fileGet?.cacheTtlSeconds === "number" && config.service.fileGet.cacheTtlSeconds < 0) {
41
+ throw new Error("service.fileGet.cacheTtlSeconds must be >= 0")
42
+ }
40
43
  if (!Number.isFinite(config.service.storage.maxFileBytes) || config.service.storage.maxFileBytes <= 0) {
41
44
  throw new Error("service.storage.maxFileBytes must be positive")
42
45
  }
@@ -73,6 +76,9 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
73
76
  const providerOverride = env.ECHO_PDF_DEFAULT_PROVIDER
74
77
  const modelOverride = env.ECHO_PDF_DEFAULT_MODEL
75
78
  const publicBaseUrlOverride = env.ECHO_PDF_PUBLIC_BASE_URL
79
+ const fileGetAuthHeaderOverride = env.ECHO_PDF_FILE_GET_AUTH_HEADER
80
+ const fileGetAuthEnvOverride = env.ECHO_PDF_FILE_GET_AUTH_ENV
81
+ const fileGetCacheTtlOverride = env.ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS
76
82
  const withOverrides: EchoPdfConfig = {
77
83
  ...resolved,
78
84
  service: {
@@ -81,6 +87,23 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
81
87
  typeof publicBaseUrlOverride === "string" && publicBaseUrlOverride.trim().length > 0
82
88
  ? publicBaseUrlOverride.trim()
83
89
  : resolved.service.publicBaseUrl,
90
+ fileGet: {
91
+ authHeader:
92
+ typeof fileGetAuthHeaderOverride === "string" && fileGetAuthHeaderOverride.trim().length > 0
93
+ ? fileGetAuthHeaderOverride.trim()
94
+ : resolved.service.fileGet?.authHeader,
95
+ authEnv:
96
+ typeof fileGetAuthEnvOverride === "string" && fileGetAuthEnvOverride.trim().length > 0
97
+ ? fileGetAuthEnvOverride.trim()
98
+ : resolved.service.fileGet?.authEnv,
99
+ cacheTtlSeconds: (() => {
100
+ if (typeof fileGetCacheTtlOverride === "string" && fileGetCacheTtlOverride.trim().length > 0) {
101
+ const value = Number(fileGetCacheTtlOverride)
102
+ return Number.isFinite(value) && value >= 0 ? Math.floor(value) : resolved.service.fileGet?.cacheTtlSeconds
103
+ }
104
+ return resolved.service.fileGet?.cacheTtlSeconds
105
+ })(),
106
+ },
84
107
  },
85
108
  agent: {
86
109
  ...resolved.agent,
package/src/pdf-types.ts CHANGED
@@ -23,6 +23,11 @@ export interface EchoPdfConfig {
23
23
  readonly service: {
24
24
  readonly name: string
25
25
  readonly publicBaseUrl?: string
26
+ readonly fileGet?: {
27
+ readonly authHeader?: string
28
+ readonly authEnv?: string
29
+ readonly cacheTtlSeconds?: number
30
+ }
26
31
  readonly maxPdfBytes: number
27
32
  readonly maxPagesPerRequest: number
28
33
  readonly defaultRenderScale: number
@@ -79,20 +79,7 @@ export class R2FileStore implements FileStore {
79
79
  }
80
80
 
81
81
  async list(): Promise<ReadonlyArray<StoredFileMeta>> {
82
- const listed = await this.bucket.list({ prefix: PREFIX, limit: 1000 })
83
- return listed.objects.map((obj) => {
84
- const meta = (obj.customMetadata ?? {}) as MetaFields
85
- const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
86
- const filename = meta.filename ?? toId(obj.key)
87
- const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
88
- return {
89
- id: toId(obj.key),
90
- filename,
91
- mimeType,
92
- sizeBytes: obj.size,
93
- createdAt,
94
- }
95
- })
82
+ return await this.listAllFiles()
96
83
  }
97
84
 
98
85
  async delete(fileId: string): Promise<boolean> {
@@ -101,7 +88,7 @@ export class R2FileStore implements FileStore {
101
88
  }
102
89
 
103
90
  async stats(): Promise<unknown> {
104
- const files = await this.list()
91
+ const files = await this.listAllFiles()
105
92
  const totalBytes = files.reduce((sum, file) => sum + file.sizeBytes, 0)
106
93
  return {
107
94
  backend: "r2",
@@ -114,17 +101,24 @@ export class R2FileStore implements FileStore {
114
101
  }
115
102
 
116
103
  async cleanup(): Promise<unknown> {
117
- const before = await this.list()
118
- const deletedExpired = await this.deleteExpired(before)
119
- const afterExpired = await this.list()
120
- const deletedEvicted = await this.evictIfNeeded(afterExpired, 0)
121
- const after = await this.list()
104
+ const files = await this.listAllFiles()
105
+ const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
106
+ const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
107
+ if (expired.length > 0) {
108
+ await this.bucket.delete(expired.map((f) => toKey(f.id)))
109
+ }
110
+ const evict = this.pickEvictions(active, 0)
111
+ if (evict.length > 0) {
112
+ await this.bucket.delete(evict.map((f) => toKey(f.id)))
113
+ }
114
+ const evictIds = new Set(evict.map((f) => f.id))
115
+ const after = active.filter((f) => !evictIds.has(f.id))
122
116
  const totalBytes = after.reduce((sum, file) => sum + file.sizeBytes, 0)
123
117
  return {
124
118
  backend: "r2",
125
119
  policy: this.policy,
126
- deletedExpired,
127
- deletedEvicted,
120
+ deletedExpired: expired.length,
121
+ deletedEvicted: evict.length,
128
122
  stats: {
129
123
  fileCount: after.length,
130
124
  totalBytes,
@@ -133,12 +127,19 @@ export class R2FileStore implements FileStore {
133
127
  }
134
128
 
135
129
  private async cleanupInternal(incomingBytes: number): Promise<void> {
136
- const files = await this.list()
137
- await this.deleteExpired(files)
138
- const afterExpired = await this.list()
139
- await this.evictIfNeeded(afterExpired, incomingBytes)
140
- const finalFiles = await this.list()
141
- const finalTotal = finalFiles.reduce((sum, file) => sum + file.sizeBytes, 0)
130
+ const files = await this.listAllFiles()
131
+ const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
132
+ const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
133
+ if (expired.length > 0) {
134
+ await this.bucket.delete(expired.map((f) => toKey(f.id)))
135
+ }
136
+ const evict = this.pickEvictions(active, incomingBytes)
137
+ if (evict.length > 0) {
138
+ await this.bucket.delete(evict.map((f) => toKey(f.id)))
139
+ }
140
+ const evictIds = new Set(evict.map((f) => f.id))
141
+ const remaining = active.filter((f) => !evictIds.has(f.id))
142
+ const finalTotal = remaining.reduce((sum, file) => sum + file.sizeBytes, 0)
142
143
  if (finalTotal + incomingBytes > this.policy.maxTotalBytes) {
143
144
  const err = new Error(
144
145
  `storage quota exceeded: total ${finalTotal} + incoming ${incomingBytes} > maxTotalBytes ${this.policy.maxTotalBytes}`
@@ -150,17 +151,10 @@ export class R2FileStore implements FileStore {
150
151
  }
151
152
  }
152
153
 
153
- private async deleteExpired(files: ReadonlyArray<StoredFileMeta>): Promise<number> {
154
- const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
155
- if (expired.length === 0) return 0
156
- await this.bucket.delete(expired.map((f) => toKey(f.id)))
157
- return expired.length
158
- }
159
-
160
- private async evictIfNeeded(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): Promise<number> {
154
+ private pickEvictions(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): StoredFileMeta[] {
161
155
  const totalBytes = files.reduce((sum, f) => sum + f.sizeBytes, 0)
162
156
  const projected = totalBytes + incomingBytes
163
- if (projected <= this.policy.maxTotalBytes) return 0
157
+ if (projected <= this.policy.maxTotalBytes) return []
164
158
 
165
159
  const needFree = projected - this.policy.maxTotalBytes
166
160
  const candidates = [...files].sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
@@ -172,9 +166,30 @@ export class R2FileStore implements FileStore {
172
166
  if (freed >= needFree) break
173
167
  if (evict.length >= this.policy.cleanupBatchSize) break
174
168
  }
175
- if (evict.length === 0) return 0
176
- await this.bucket.delete(evict.map((f) => toKey(f.id)))
177
- return evict.length
169
+ return evict
178
170
  }
179
- }
180
171
 
172
+ private async listAllFiles(): Promise<StoredFileMeta[]> {
173
+ const files: StoredFileMeta[] = []
174
+ let cursor: string | undefined
175
+ while (true) {
176
+ const listed = await this.bucket.list({ prefix: PREFIX, limit: 1000, cursor })
177
+ for (const obj of listed.objects) {
178
+ const meta = (obj.customMetadata ?? {}) as MetaFields
179
+ const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
180
+ const filename = meta.filename ?? toId(obj.key)
181
+ const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
182
+ files.push({
183
+ id: toId(obj.key),
184
+ filename,
185
+ mimeType,
186
+ sizeBytes: obj.size,
187
+ createdAt,
188
+ })
189
+ }
190
+ if (listed.truncated !== true || !listed.cursor) break
191
+ cursor = listed.cursor
192
+ }
193
+ return files
194
+ }
195
+ }
@@ -15,6 +15,10 @@ export interface ToolOutputEnvelope {
15
15
  readonly artifacts: ToolArtifact[]
16
16
  }
17
17
 
18
+ const MAX_TEXT_STRING = 1200
19
+ const MAX_TEXT_ARRAY = 40
20
+ const MAX_TEXT_DEPTH = 8
21
+
18
22
  const asObj = (value: unknown): JsonObject =>
19
23
  typeof value === "object" && value !== null && !Array.isArray(value)
20
24
  ? (value as JsonObject)
@@ -113,6 +117,38 @@ const summarizeData = (data: unknown): string => {
113
117
  return "Tool executed successfully."
114
118
  }
115
119
 
120
+ const sanitizeString = (value: string): string => {
121
+ if (value.startsWith("data:")) {
122
+ const [head] = value.split(",", 1)
123
+ return `${head},<omitted>`
124
+ }
125
+ if (/^[A-Za-z0-9+/=]{300,}$/.test(value)) {
126
+ return `<base64 omitted len=${value.length}>`
127
+ }
128
+ if (value.length > MAX_TEXT_STRING) {
129
+ return `${value.slice(0, MAX_TEXT_STRING)}...(truncated ${value.length - MAX_TEXT_STRING} chars)`
130
+ }
131
+ return value
132
+ }
133
+
134
+ const sanitizeForText = (value: unknown, depth = 0): unknown => {
135
+ if (depth >= MAX_TEXT_DEPTH) return "<max-depth>"
136
+ if (typeof value === "string") return sanitizeString(value)
137
+ if (typeof value !== "object" || value === null) return value
138
+ if (Array.isArray(value)) {
139
+ const items = value.slice(0, MAX_TEXT_ARRAY).map((item) => sanitizeForText(item, depth + 1))
140
+ if (value.length > MAX_TEXT_ARRAY) {
141
+ items.push(`<truncated ${value.length - MAX_TEXT_ARRAY} items>`)
142
+ }
143
+ return items
144
+ }
145
+ const out: Record<string, unknown> = {}
146
+ for (const [key, nested] of Object.entries(value)) {
147
+ out[key] = sanitizeForText(nested, depth + 1)
148
+ }
149
+ return out
150
+ }
151
+
116
152
  export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<string, unknown>> => {
117
153
  const lines: string[] = [summarizeData(envelope.data)]
118
154
  if (envelope.artifacts.length > 0) {
@@ -130,7 +166,7 @@ export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<stri
130
166
  }
131
167
  }
132
168
  lines.push("")
133
- lines.push(JSON.stringify(envelope, null, 2))
169
+ lines.push(JSON.stringify(sanitizeForText(envelope), null, 2))
134
170
 
135
171
  const content: Array<Record<string, unknown>> = [{ type: "text", text: lines.join("\n") }]
136
172
  for (const artifact of envelope.artifacts) {