@echofiles/echo-pdf 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,6 +27,13 @@
27
27
  - MCP: `https://echo-pdf.echofilesai.workers.dev/mcp`
28
28
  - HTTP API 根路径: `https://echo-pdf.echofilesai.workers.dev`
29
29
 
30
+ ## 1.1 API 兼容性说明
31
+
32
+ - 从 `v0.3.0` 开始,`POST /tools/call` 返回结构改为:
33
+ - `{"ok": true, "data": ..., "artifacts": [...]}`
34
+ - 老格式 `{"name":"...","output":...}` 已移除。
35
+ - MCP `tools/call` 仍保留 `type:"text"`,并新增 `type:"resource_link"` 供下载二进制结果。
36
+
30
37
  ## 2. 快速开始(CLI)
31
38
 
32
39
  安装:
@@ -81,6 +88,33 @@ echo-pdf mcp tools
81
88
  echo-pdf mcp call --tool file_ops --args '{"op":"list"}'
82
89
  ```
83
90
 
91
+ ### 3.1.1 纯 MCP 场景推荐流程(本地 PDF)
92
+
93
+ 远端 MCP server 无法直接读取你本机文件路径。推荐两步:
94
+
95
+ 1. 先通过 HTTP 上传本地 PDF,拿到 `fileId`
96
+ 2. 再用 MCP 工具传 `fileId` 调用
97
+
98
+ 示例:
99
+
100
+ ```bash
101
+ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/api/files/upload \
102
+ -F 'file=@./input.pdf'
103
+
104
+ echo-pdf mcp call --tool pdf_extract_pages --args '{"fileId":"<FILE_ID>","pages":[1]}'
105
+ ```
106
+
107
+ ### 3.1.2 不上传文件的 URL ingest 流程
108
+
109
+ 如果 PDF 已经在公网可访问,直接传 `url`:
110
+
111
+ ```bash
112
+ echo-pdf mcp call --tool pdf_extract_pages --args '{
113
+ "url":"https://example.com/sample.pdf",
114
+ "pages":[1]
115
+ }'
116
+ ```
117
+
84
118
  ### 3.2 给客户端生成 MCP 配置片段
85
119
 
86
120
  ```bash
@@ -100,6 +134,12 @@ echo-pdf setup add json
100
134
  - `pdf_tables_to_latex`
101
135
  - `file_ops`
102
136
 
137
+ MCP 输出策略:
138
+
139
+ - `pdf_extract_pages` 在 MCP 下默认 `returnMode=url`(不传 `returnMode` 时生效)
140
+ - MCP `text` 会对大字段做去二进制/截断,避免把大段 base64 塞进上下文
141
+ - 二进制结果请优先使用 `resource_link` 中的下载地址
142
+
103
143
  ## 4. Web UI 使用
104
144
 
105
145
  打开:
@@ -117,7 +157,8 @@ echo-pdf setup add json
117
157
  说明:
118
158
 
119
159
  - UI 中输入的 key 属于当前会话,不落库到服务端。
120
- - `returnMode` 目前仅支持 `inline` 和 `file_id`(`url` 尚未实现)。
160
+ - `returnMode` 支持 `inline`、`file_id`、`url`。
161
+ - `tools/call` 返回统一结构:`{ ok, data, artifacts }`,其中 `artifacts[*].url` 可直接下载。
121
162
  - 表格工具返回值会校验并要求包含合法 `tabular`,否则报错。
122
163
 
123
164
  ## 5. HTTP API 使用
@@ -174,6 +215,8 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
174
215
 
175
216
  - `agent.defaultProvider`
176
217
  - `agent.defaultModel`
218
+ - `service.publicBaseUrl`
219
+ - `service.fileGet.cacheTtlSeconds`
177
220
  - `service.maxPdfBytes`
178
221
  - `service.storage.maxFileBytes`
179
222
  - `service.storage.maxTotalBytes`
@@ -184,7 +227,9 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
184
227
  - `service.maxPdfBytes`:允许处理的 PDF 最大字节数。
185
228
  - `service.storage.maxFileBytes`:文件存储单文件上限(上传 PDF、`url/base64` ingest、以及 `file_id` 结果都会落到存储层)。
186
229
  - 当前项目要求 `service.storage.maxFileBytes >= service.maxPdfBytes`,否则配置无效并在启动时报错。
187
- - 默认配置下两者都是 `1200000`(约 1.2MB)。
230
+ - 当前默认配置下两者都是 `10000000`(10MB)。
231
+ - 当未绑定 R2、使用 DO 存储时,`service.storage.maxFileBytes` 必须 `<= 1200000`,否则启动会报错。
232
+ - 生产建议始终绑定 R2,并让 DO 只负责协调/元数据,不承载大文件数据。
188
233
 
189
234
  常用环境变量:
190
235
 
@@ -193,6 +238,9 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
193
238
  - `VERCEL_AI_GATEWAY_API_KEY` / `VERCEL_AI_GATEWAY_KEY`
194
239
  - `ECHO_PDF_DEFAULT_PROVIDER`
195
240
  - `ECHO_PDF_DEFAULT_MODEL`
241
+ - `ECHO_PDF_PUBLIC_BASE_URL`(可选,强制 artifacts 生成外部可访问绝对 URL)
242
+ - `ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS`(可选,`/api/files/get` 缓存秒数,`0` 表示 `no-store`)
243
+ - `ECHO_PDF_FILE_GET_AUTH_HEADER` + `ECHO_PDF_FILE_GET_AUTH_ENV`(可选,启用下载端点 header 鉴权)
196
244
  - `ECHO_PDF_MCP_KEY`(可选,启用 MCP 鉴权)
197
245
  - `ECHO_PDF_WORKER_NAME`(CLI 默认 URL 推导)
198
246
 
@@ -235,6 +283,28 @@ INPUT_PDF=./fixtures/input.pdf ./scripts/export-fixtures.sh
235
283
 
236
284
  当前实现要求模型输出中必须包含合法 `\\begin{tabular}...\\end{tabular}`。如果模型返回解释性文本或超时,会直接报错。
237
285
 
238
- ### 8.3 `returnMode=url` 为什么不可用
286
+ ### 8.3 `returnMode=url` 如何使用
287
+
288
+ `url` 模式会把结果落到存储层,并返回一个可直接 `GET` 的下载地址:
289
+
290
+ - `GET /api/files/get?fileId=<id>`
291
+
292
+ 示例(提取页面并返回 URL):
293
+
294
+ ```bash
295
+ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
296
+ -H 'content-type: application/json' \
297
+ -d '{
298
+ "name":"pdf_extract_pages",
299
+ "arguments":{"fileId":"<FILE_ID>","pages":[1],"returnMode":"url"}
300
+ }'
301
+ ```
302
+
303
+ ### 8.4 错误码语义
239
304
 
240
- 当前版本没有对外文件下载路由或签名 URL 能力,因此 `url` 模式未实现。请使用 `inline` 或 `file_id`。
305
+ - 客户端输入错误返回稳定 `4xx + code`,例如:
306
+ - `PAGES_REQUIRED`(400)
307
+ - `PAGE_OUT_OF_RANGE`(400)
308
+ - `MISSING_FILE_INPUT`(400)
309
+ - `FILE_NOT_FOUND`(404)
310
+ - 服务端故障返回 `5xx`。
@@ -1,6 +1,10 @@
1
1
  {
2
2
  "service": {
3
3
  "name": "echo-pdf",
4
+ "publicBaseUrl": "https://echo-pdf.echofilesai.workers.dev",
5
+ "fileGet": {
6
+ "cacheTtlSeconds": 300
7
+ },
4
8
  "maxPdfBytes": 10000000,
5
9
  "maxPagesPerRequest": 20,
6
10
  "defaultRenderScale": 2,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@echofiles/echo-pdf",
3
3
  "description": "MCP-first PDF agent on Cloudflare Workers with CLI and web demo.",
4
- "version": "0.2.0",
4
+ "version": "0.3.1",
5
5
  "type": "module",
6
6
  "publishConfig": {
7
7
  "access": "public"
@@ -41,12 +41,12 @@ run_json() {
41
41
 
42
42
  validate_ocr_json() {
43
43
  local json_file="$1"
44
- node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.output?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
44
+ node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
45
45
  }
46
46
 
47
47
  validate_tables_json() {
48
48
  local json_file="$1"
49
- node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.output?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
49
+ node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
50
50
  }
51
51
 
52
52
  # 1) Save test logs locally (do not block artifact export on transient network failure)
@@ -142,7 +142,7 @@ if [[ -n "${PROVIDER}" ]]; then
142
142
  else
143
143
  run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
144
144
  fi
145
- node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.output?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
145
+ node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.data?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
146
146
 
147
147
  # 6) MCP tool calls
148
148
  run_json "mcp-initialize" cli mcp initialize
package/src/file-ops.ts CHANGED
@@ -26,10 +26,8 @@ export const runFileOp = async (
26
26
  bytes,
27
27
  })
28
28
  const returnMode = normalizeReturnMode(input.returnMode)
29
- if (returnMode === "url") {
30
- throw new Error("returnMode=url is not implemented; use inline or file_id")
31
- }
32
29
  if (returnMode === "file_id") return { returnMode, file: meta }
30
+ if (returnMode === "url") return { returnMode, file: meta, url: `/api/files/get?fileId=${encodeURIComponent(meta.id)}` }
33
31
  const stored = await fileStore.get(meta.id)
34
32
  if (!stored) throw new Error(`File not found after put: ${meta.id}`)
35
33
  return {
@@ -276,7 +276,16 @@ export class DurableObjectFileStore {
276
276
  })
277
277
  const payload = (await response.json()) as { file?: StoredFileMeta; error?: string }
278
278
  if (!response.ok || !payload.file) {
279
- throw new Error(payload.error ?? "DO put failed")
279
+ const details = payload as { error?: string; code?: string; policy?: unknown; stats?: unknown }
280
+ const error = new Error(payload.error ?? "DO put failed") as Error & {
281
+ status?: number
282
+ code?: string
283
+ details?: unknown
284
+ }
285
+ error.status = response.status
286
+ error.code = typeof details.code === "string" ? details.code : undefined
287
+ error.details = { policy: details.policy, stats: details.stats }
288
+ throw error
280
289
  }
281
290
  return payload.file
282
291
  }
@@ -0,0 +1,21 @@
1
+ export class HttpError extends Error {
2
+ readonly status: number
3
+ readonly code: string
4
+ readonly details?: unknown
5
+
6
+ constructor(status: number, code: string, message: string, details?: unknown) {
7
+ super(message)
8
+ this.status = status
9
+ this.code = code
10
+ this.details = details
11
+ }
12
+ }
13
+
14
+ export const badRequest = (code: string, message: string, details?: unknown): HttpError =>
15
+ new HttpError(400, code, message, details)
16
+
17
+ export const notFound = (code: string, message: string, details?: unknown): HttpError =>
18
+ new HttpError(404, code, message, details)
19
+
20
+ export const unprocessable = (code: string, message: string, details?: unknown): HttpError =>
21
+ new HttpError(422, code, message, details)
package/src/index.ts CHANGED
@@ -5,6 +5,7 @@ import { handleMcpRequest } from "./mcp-server"
5
5
  import { loadEchoPdfConfig } from "./pdf-config"
6
6
  import { getRuntimeFileStore } from "./pdf-storage"
7
7
  import { listProviderModels } from "./provider-client"
8
+ import { buildToolOutputEnvelope } from "./response-schema"
8
9
  import { callTool, listToolSchemas } from "./tool-registry"
9
10
  import type { AgentTraceEvent, PdfOperationRequest } from "./pdf-types"
10
11
  import type { Env, JsonObject } from "./types"
@@ -21,6 +22,25 @@ const json = (data: unknown, status = 200): Response =>
21
22
  const toError = (error: unknown): string =>
22
23
  error instanceof Error ? error.message : String(error)
23
24
 
25
+ const errorStatus = (error: unknown): number | null => {
26
+ const status = (error as { status?: unknown })?.status
27
+ return typeof status === "number" && Number.isFinite(status) ? status : null
28
+ }
29
+
30
+ const errorCode = (error: unknown): string | null => {
31
+ const code = (error as { code?: unknown })?.code
32
+ return typeof code === "string" && code.length > 0 ? code : null
33
+ }
34
+
35
+ const errorDetails = (error: unknown): unknown => (error as { details?: unknown })?.details
36
+
37
+ const jsonError = (error: unknown, fallbackStatus = 500): Response => {
38
+ const status = errorStatus(error) ?? fallbackStatus
39
+ const code = errorCode(error)
40
+ const details = errorDetails(error)
41
+ return json({ error: toError(error), code, details }, status)
42
+ }
43
+
24
44
  const readJson = async (request: Request): Promise<Record<string, unknown>> => {
25
45
  try {
26
46
  const body = await request.json()
@@ -38,6 +58,24 @@ const asObj = (value: unknown): JsonObject =>
38
58
  ? (value as JsonObject)
39
59
  : {}
40
60
 
61
+ const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
62
+ typeof configured === "string" && configured.length > 0 ? configured : request.url
63
+
64
+ const sanitizeDownloadFilename = (filename: string): string => {
65
+ const cleaned = filename
66
+ .replace(/[\r\n"]/g, "")
67
+ .replace(/[^\x20-\x7E]+/g, "")
68
+ .trim()
69
+ return cleaned.length > 0 ? cleaned : "download.bin"
70
+ }
71
+
72
+ const isFileGetAuthorized = (request: Request, env: Env, config: { authHeader?: string; authEnv?: string }): boolean => {
73
+ if (!config.authHeader || !config.authEnv) return true
74
+ const required = env[config.authEnv]
75
+ if (typeof required !== "string" || required.length === 0) return true
76
+ return request.headers.get(config.authHeader) === required
77
+ }
78
+
41
79
  const sseResponse = (stream: ReadableStream<Uint8Array>): Response =>
42
80
  new Response(stream, {
43
81
  headers: {
@@ -117,13 +155,17 @@ export default {
117
155
  fileUploadEndpoint: "/api/files/upload",
118
156
  fileStatsEndpoint: "/api/files/stats",
119
157
  fileCleanupEndpoint: "/api/files/cleanup",
120
- supportedReturnModes: ["inline", "file_id"],
158
+ supportedReturnModes: ["inline", "file_id", "url"],
121
159
  },
122
160
  mcp: {
123
161
  serverName: config.mcp.serverName,
124
162
  version: config.mcp.version,
125
163
  authHeader: config.mcp.authHeader ?? null,
126
164
  },
165
+ fileGet: {
166
+ authHeader: config.service.fileGet?.authHeader ?? null,
167
+ cacheTtlSeconds: config.service.fileGet?.cacheTtlSeconds ?? 300,
168
+ },
127
169
  })
128
170
  }
129
171
 
@@ -163,9 +205,9 @@ export default {
163
205
  ? (body.providerApiKeys as Record<string, string>)
164
206
  : undefined,
165
207
  })
166
- return json({ name, output: result })
208
+ return json(buildToolOutputEnvelope(result, resolvePublicBaseUrl(request, config.service.publicBaseUrl)))
167
209
  } catch (error) {
168
- return json({ error: toError(error) }, 500)
210
+ return jsonError(error, 500)
169
211
  }
170
212
  }
171
213
 
@@ -198,7 +240,7 @@ export default {
198
240
  })
199
241
  return json(result)
200
242
  } catch (error) {
201
- return json({ error: toError(error) }, 500)
243
+ return jsonError(error, 500)
202
244
  }
203
245
  }
204
246
 
@@ -253,7 +295,7 @@ export default {
253
295
  })
254
296
  return json(result)
255
297
  } catch (error) {
256
- return json({ error: toError(error) }, 500)
298
+ return jsonError(error, 500)
257
299
  }
258
300
  }
259
301
 
@@ -276,8 +318,31 @@ export default {
276
318
  })
277
319
  return json({ file: stored }, 200)
278
320
  } catch (error) {
279
- return json({ error: toError(error) }, 500)
321
+ return jsonError(error, 500)
322
+ }
323
+ }
324
+
325
+ if (request.method === "GET" && url.pathname === "/api/files/get") {
326
+ const fileGetConfig = config.service.fileGet ?? {}
327
+ if (!isFileGetAuthorized(request, env, fileGetConfig)) {
328
+ return json({ error: "Unauthorized", code: "UNAUTHORIZED" }, 401)
329
+ }
330
+ const fileId = url.searchParams.get("fileId") || ""
331
+ if (!fileId) return json({ error: "Missing fileId" }, 400)
332
+ const file = await fileStore.get(fileId)
333
+ if (!file) return json({ error: "File not found" }, 404)
334
+ const download = url.searchParams.get("download") === "1"
335
+ const headers = new Headers()
336
+ headers.set("Content-Type", file.mimeType)
337
+ const cacheTtl = Number(fileGetConfig.cacheTtlSeconds ?? 300)
338
+ const cacheControl = cacheTtl > 0
339
+ ? `public, max-age=${Math.floor(cacheTtl)}, s-maxage=${Math.floor(cacheTtl)}`
340
+ : "no-store"
341
+ headers.set("Cache-Control", cacheControl)
342
+ if (download) {
343
+ headers.set("Content-Disposition", `attachment; filename=\"${sanitizeDownloadFilename(file.filename)}\"`)
280
344
  }
345
+ return new Response(file.bytes, { status: 200, headers })
281
346
  }
282
347
 
283
348
  if (request.method === "GET" && url.pathname === "/api/files/stats") {
@@ -321,6 +386,7 @@ export default {
321
386
  stream: "POST /api/agent/stream",
322
387
  files: "POST /api/files/op",
323
388
  fileUpload: "POST /api/files/upload",
389
+ fileGet: "GET /api/files/get?fileId=<id>",
324
390
  fileStats: "GET /api/files/stats",
325
391
  fileCleanup: "POST /api/files/cleanup",
326
392
  mcp: "POST /mcp",
package/src/mcp-server.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import type { Env, FileStore } from "./types"
2
2
  import type { EchoPdfConfig } from "./pdf-types"
3
+ import { buildMcpContent, buildToolOutputEnvelope } from "./response-schema"
3
4
  import { callTool, listToolSchemas } from "./tool-registry"
4
5
 
5
6
  interface JsonRpcRequest {
@@ -19,12 +20,17 @@ const ok = (id: JsonRpcRequest["id"], result: unknown): Response =>
19
20
  { headers: { "Content-Type": "application/json" } }
20
21
  )
21
22
 
22
- const err = (id: JsonRpcRequest["id"], code: number, message: string): Response =>
23
+ const err = (
24
+ id: JsonRpcRequest["id"],
25
+ code: number,
26
+ message: string,
27
+ data?: Record<string, unknown>
28
+ ): Response =>
23
29
  new Response(
24
30
  JSON.stringify({
25
31
  jsonrpc: "2.0",
26
32
  id: id ?? null,
27
- error: { code, message },
33
+ error: data ? { code, message, data } : { code, message },
28
34
  }),
29
35
  { status: 400, headers: { "Content-Type": "application/json" } }
30
36
  )
@@ -39,6 +45,19 @@ const maybeAuthorized = (request: Request, env: Env, config: EchoPdfConfig): boo
39
45
  return request.headers.get(config.mcp.authHeader) === required
40
46
  }
41
47
 
48
+ const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
49
+ typeof configured === "string" && configured.length > 0 ? configured : request.url
50
+
51
+ const prepareMcpToolArgs = (toolName: string, args: Record<string, unknown>): Record<string, unknown> => {
52
+ if (toolName === "pdf_extract_pages") {
53
+ const mode = typeof args.returnMode === "string" ? args.returnMode : ""
54
+ if (!mode) {
55
+ return { ...args, returnMode: "url" }
56
+ }
57
+ }
58
+ return args
59
+ }
60
+
42
61
  export const handleMcpRequest = async (
43
62
  request: Request,
44
63
  env: Env,
@@ -94,7 +113,13 @@ export const handleMcpRequest = async (
94
113
  }
95
114
 
96
115
  const toolName = typeof params.name === "string" ? params.name : ""
97
- const args = asObj(params.arguments)
116
+ const args = prepareMcpToolArgs(toolName, asObj(params.arguments))
117
+ if (!toolName) {
118
+ return err(id, -32602, "Invalid params: name is required", {
119
+ code: "INVALID_PARAMS",
120
+ status: 400,
121
+ })
122
+ }
98
123
 
99
124
  try {
100
125
  const result = await callTool(toolName, args, {
@@ -102,8 +127,32 @@ export const handleMcpRequest = async (
102
127
  env,
103
128
  fileStore,
104
129
  })
105
- return ok(id, { content: [{ type: "text", text: JSON.stringify(result) }] })
130
+ const envelope = buildToolOutputEnvelope(result, resolvePublicBaseUrl(request, config.service.publicBaseUrl))
131
+ return ok(id, { content: buildMcpContent(envelope) })
106
132
  } catch (error) {
107
- return err(id, -32000, error instanceof Error ? error.message : String(error))
133
+ const message = error instanceof Error ? error.message : String(error)
134
+ const status = (error as { status?: unknown })?.status
135
+ const stableStatus = typeof status === "number" && Number.isFinite(status) ? status : 500
136
+ const code = (error as { code?: unknown })?.code
137
+ const details = (error as { details?: unknown })?.details
138
+ if (message.startsWith("Unknown tool:")) {
139
+ return err(id, -32601, message, {
140
+ code: typeof code === "string" ? code : "TOOL_NOT_FOUND",
141
+ status: 404,
142
+ details,
143
+ })
144
+ }
145
+ if (stableStatus >= 400 && stableStatus < 500) {
146
+ return err(id, -32602, message, {
147
+ code: typeof code === "string" ? code : "INVALID_PARAMS",
148
+ status: stableStatus,
149
+ details,
150
+ })
151
+ }
152
+ return err(id, -32000, message, {
153
+ code: typeof code === "string" ? code : "INTERNAL_ERROR",
154
+ status: stableStatus,
155
+ details,
156
+ })
108
157
  }
109
158
  }
package/src/pdf-agent.ts CHANGED
@@ -2,6 +2,7 @@ import type { Env, FileStore, ReturnMode } from "./types"
2
2
  import type { AgentTraceEvent, EchoPdfConfig, PdfOperationRequest } from "./pdf-types"
3
3
  import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults"
4
4
  import { fromBase64, normalizeReturnMode, toDataUrl } from "./file-utils"
5
+ import { badRequest, notFound, unprocessable } from "./http-error"
5
6
  import { extractPdfPageText, getPdfPageCount, renderPdfPageToPng, toBytes } from "./pdfium-engine"
6
7
  import { visionRecognize } from "./provider-client"
7
8
 
@@ -22,11 +23,20 @@ const traceStep = (
22
23
  }
23
24
 
24
25
  const ensurePages = (pages: ReadonlyArray<number>, pageCount: number, maxPages: number): number[] => {
25
- if (pages.length === 0) throw new Error("At least one page is required")
26
- if (pages.length > maxPages) throw new Error(`Page count exceeds maxPagesPerRequest (${maxPages})`)
26
+ if (pages.length === 0) throw badRequest("PAGES_REQUIRED", "At least one page is required")
27
+ if (pages.length > maxPages) {
28
+ throw badRequest("TOO_MANY_PAGES", `Page count exceeds maxPagesPerRequest (${maxPages})`, {
29
+ maxPagesPerRequest: maxPages,
30
+ providedPages: pages.length,
31
+ })
32
+ }
27
33
  for (const page of pages) {
28
34
  if (!Number.isInteger(page) || page < 1 || page > pageCount) {
29
- throw new Error(`Page ${page} out of range 1..${pageCount}`)
35
+ throw badRequest("PAGE_OUT_OF_RANGE", `Page ${page} out of range 1..${pageCount}`, {
36
+ page,
37
+ min: 1,
38
+ max: pageCount,
39
+ })
30
40
  }
31
41
  }
32
42
  return [...new Set(pages)].sort((a, b) => a - b)
@@ -45,7 +55,7 @@ export const ingestPdfFromPayload = async (
45
55
  if (input.fileId) {
46
56
  const existing = await opts.fileStore.get(input.fileId)
47
57
  if (!existing) {
48
- throw new Error(`File not found: ${input.fileId}`)
58
+ throw notFound("FILE_NOT_FOUND", `File not found: ${input.fileId}`, { fileId: input.fileId })
49
59
  }
50
60
  return {
51
61
  id: existing.id,
@@ -59,7 +69,11 @@ export const ingestPdfFromPayload = async (
59
69
 
60
70
  if (input.url) {
61
71
  traceStep(opts, "start", "file.fetch.url", { url: input.url })
62
- bytes = await toBytes(input.url)
72
+ try {
73
+ bytes = await toBytes(input.url)
74
+ } catch (error) {
75
+ throw badRequest("URL_FETCH_FAILED", `Unable to fetch PDF from url: ${error instanceof Error ? error.message : String(error)}`)
76
+ }
63
77
  try {
64
78
  const u = new URL(input.url)
65
79
  filename = decodeURIComponent(u.pathname.split("/").pop() || filename)
@@ -74,10 +88,13 @@ export const ingestPdfFromPayload = async (
74
88
  }
75
89
 
76
90
  if (!bytes) {
77
- throw new Error("Missing file input. Provide fileId, url or base64")
91
+ throw badRequest("MISSING_FILE_INPUT", "Missing file input. Provide fileId, url or base64")
78
92
  }
79
93
  if (bytes.byteLength > config.service.maxPdfBytes) {
80
- throw new Error(`PDF exceeds max size (${config.service.maxPdfBytes} bytes)`)
94
+ throw badRequest("PDF_TOO_LARGE", `PDF exceeds max size (${config.service.maxPdfBytes} bytes)`, {
95
+ maxPdfBytes: config.service.maxPdfBytes,
96
+ sizeBytes: bytes.byteLength,
97
+ })
81
98
  }
82
99
 
83
100
  const meta = await opts.fileStore.put({
@@ -122,9 +139,6 @@ export const runPdfAgent = async (
122
139
  const pages = ensurePages(request.pages, pageCount, config.service.maxPagesPerRequest)
123
140
  const scale = request.renderScale ?? config.service.defaultRenderScale
124
141
  const returnMode = resolveReturnMode(request.returnMode)
125
- if (returnMode === "url") {
126
- throw new Error("returnMode=url is not implemented; use inline or file_id")
127
- }
128
142
 
129
143
  if (request.operation === "extract_pages") {
130
144
  const images: Array<{ page: number; mimeType: string; data?: string; fileId?: string; url?: string | null }> = []
@@ -138,6 +152,18 @@ export const runPdfAgent = async (
138
152
  bytes: rendered.png,
139
153
  })
140
154
  images.push({ page, mimeType: "image/png", fileId: stored.id })
155
+ } else if (returnMode === "url") {
156
+ const stored = await opts.fileStore.put({
157
+ filename: `${file.filename}-p${page}.png`,
158
+ mimeType: "image/png",
159
+ bytes: rendered.png,
160
+ })
161
+ images.push({
162
+ page,
163
+ mimeType: "image/png",
164
+ fileId: stored.id,
165
+ url: `/api/files/get?fileId=${encodeURIComponent(stored.id)}`,
166
+ })
141
167
  } else {
142
168
  images.push({
143
169
  page,
@@ -155,7 +181,7 @@ export const runPdfAgent = async (
155
181
  const providerAlias = resolveProviderAlias(config, request.provider)
156
182
  const model = resolveModelForProvider(config, providerAlias, request.model)
157
183
  if (!model) {
158
- throw new Error("model is required for OCR or table extraction; set agent.defaultModel")
184
+ throw badRequest("MODEL_REQUIRED", "model is required for OCR or table extraction; set agent.defaultModel")
159
185
  }
160
186
 
161
187
  if (request.operation === "ocr_pages") {
@@ -207,7 +233,9 @@ export const runPdfAgent = async (
207
233
  })
208
234
  const latex = extractTabularLatex(rawLatex)
209
235
  if (!latex) {
210
- throw new Error(`table extraction did not return valid LaTeX tabular for page ${page}`)
236
+ throw unprocessable("TABLE_LATEX_MISSING", `table extraction did not return valid LaTeX tabular for page ${page}`, {
237
+ page,
238
+ })
211
239
  }
212
240
  tables.push({ page, latex })
213
241
  traceStep(opts, "end", "table.page", { page, chars: latex.length })
package/src/pdf-config.ts CHANGED
@@ -30,6 +30,16 @@ const validateConfig = (config: EchoPdfConfig): EchoPdfConfig => {
30
30
  if (!config.service?.name) throw new Error("service.name is required")
31
31
  if (!config.pdfium?.wasmUrl) throw new Error("pdfium.wasmUrl is required")
32
32
  if (!config.service?.storage) throw new Error("service.storage is required")
33
+ if (
34
+ typeof config.service.publicBaseUrl === "string" &&
35
+ config.service.publicBaseUrl.length > 0 &&
36
+ !/^https?:\/\//.test(config.service.publicBaseUrl)
37
+ ) {
38
+ throw new Error("service.publicBaseUrl must start with http:// or https://")
39
+ }
40
+ if (typeof config.service.fileGet?.cacheTtlSeconds === "number" && config.service.fileGet.cacheTtlSeconds < 0) {
41
+ throw new Error("service.fileGet.cacheTtlSeconds must be >= 0")
42
+ }
33
43
  if (!Number.isFinite(config.service.storage.maxFileBytes) || config.service.storage.maxFileBytes <= 0) {
34
44
  throw new Error("service.storage.maxFileBytes must be positive")
35
45
  }
@@ -65,8 +75,36 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
65
75
 
66
76
  const providerOverride = env.ECHO_PDF_DEFAULT_PROVIDER
67
77
  const modelOverride = env.ECHO_PDF_DEFAULT_MODEL
78
+ const publicBaseUrlOverride = env.ECHO_PDF_PUBLIC_BASE_URL
79
+ const fileGetAuthHeaderOverride = env.ECHO_PDF_FILE_GET_AUTH_HEADER
80
+ const fileGetAuthEnvOverride = env.ECHO_PDF_FILE_GET_AUTH_ENV
81
+ const fileGetCacheTtlOverride = env.ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS
68
82
  const withOverrides: EchoPdfConfig = {
69
83
  ...resolved,
84
+ service: {
85
+ ...resolved.service,
86
+ publicBaseUrl:
87
+ typeof publicBaseUrlOverride === "string" && publicBaseUrlOverride.trim().length > 0
88
+ ? publicBaseUrlOverride.trim()
89
+ : resolved.service.publicBaseUrl,
90
+ fileGet: {
91
+ authHeader:
92
+ typeof fileGetAuthHeaderOverride === "string" && fileGetAuthHeaderOverride.trim().length > 0
93
+ ? fileGetAuthHeaderOverride.trim()
94
+ : resolved.service.fileGet?.authHeader,
95
+ authEnv:
96
+ typeof fileGetAuthEnvOverride === "string" && fileGetAuthEnvOverride.trim().length > 0
97
+ ? fileGetAuthEnvOverride.trim()
98
+ : resolved.service.fileGet?.authEnv,
99
+ cacheTtlSeconds: (() => {
100
+ if (typeof fileGetCacheTtlOverride === "string" && fileGetCacheTtlOverride.trim().length > 0) {
101
+ const value = Number(fileGetCacheTtlOverride)
102
+ return Number.isFinite(value) && value >= 0 ? Math.floor(value) : resolved.service.fileGet?.cacheTtlSeconds
103
+ }
104
+ return resolved.service.fileGet?.cacheTtlSeconds
105
+ })(),
106
+ },
107
+ },
70
108
  agent: {
71
109
  ...resolved.agent,
72
110
  defaultProvider:
@@ -1,4 +1,5 @@
1
1
  import { DurableObjectFileStore } from "./file-store-do"
2
+ import { R2FileStore } from "./r2-file-store"
2
3
  import type { EchoPdfConfig } from "./pdf-types"
3
4
  import type { Env, FileStore, StoredFileMeta, StoredFileRecord } from "./types"
4
5
 
@@ -47,6 +48,7 @@ class InMemoryFileStore implements FileStore {
47
48
  }
48
49
 
49
50
  const fallbackStore = new InMemoryFileStore()
51
+ const DO_SAFE_MAX_FILE_BYTES = 1_200_000
50
52
 
51
53
  export interface RuntimeFileStoreBundle {
52
54
  readonly store: FileStore
@@ -55,7 +57,20 @@ export interface RuntimeFileStoreBundle {
55
57
  }
56
58
 
57
59
  export const getRuntimeFileStore = (env: Env, config: EchoPdfConfig): RuntimeFileStoreBundle => {
60
+ if (env.FILE_STORE_BUCKET) {
61
+ const store = new R2FileStore(env.FILE_STORE_BUCKET, config.service.storage)
62
+ return {
63
+ store,
64
+ stats: async () => store.stats(),
65
+ cleanup: async () => store.cleanup(),
66
+ }
67
+ }
58
68
  if (env.FILE_STORE_DO) {
69
+ if (config.service.storage.maxFileBytes > DO_SAFE_MAX_FILE_BYTES) {
70
+ throw new Error(
71
+ `service.storage.maxFileBytes=${config.service.storage.maxFileBytes} exceeds DO backend limit ${DO_SAFE_MAX_FILE_BYTES}; bind FILE_STORE_BUCKET (R2) or reduce maxFileBytes`
72
+ )
73
+ }
59
74
  const store = new DurableObjectFileStore(env.FILE_STORE_DO, config.service.storage)
60
75
  return {
61
76
  store,
package/src/pdf-types.ts CHANGED
@@ -22,6 +22,12 @@ export interface StoragePolicy {
22
22
  export interface EchoPdfConfig {
23
23
  readonly service: {
24
24
  readonly name: string
25
+ readonly publicBaseUrl?: string
26
+ readonly fileGet?: {
27
+ readonly authHeader?: string
28
+ readonly authEnv?: string
29
+ readonly cacheTtlSeconds?: number
30
+ }
25
31
  readonly maxPdfBytes: number
26
32
  readonly maxPagesPerRequest: number
27
33
  readonly defaultRenderScale: number
@@ -0,0 +1,195 @@
1
+ import type { StoragePolicy } from "./pdf-types"
2
+ import type { FileStore, StoredFileMeta, StoredFileRecord } from "./types"
3
+
4
+ const PREFIX = "file/"
5
+
6
+ type MetaFields = {
7
+ filename?: string
8
+ mimeType?: string
9
+ createdAt?: string
10
+ }
11
+
12
+ const toId = (key: string): string => key.startsWith(PREFIX) ? key.slice(PREFIX.length) : key
13
+ const toKey = (id: string): string => `${PREFIX}${id}`
14
+
15
+ const parseCreatedAt = (value: string | undefined, fallback: Date): string => {
16
+ if (typeof value === "string" && value.trim().length > 0) {
17
+ const ms = Date.parse(value)
18
+ if (Number.isFinite(ms)) return new Date(ms).toISOString()
19
+ }
20
+ return fallback.toISOString()
21
+ }
22
+
23
+ const isExpired = (createdAtIso: string, ttlHours: number): boolean => {
24
+ const ms = Date.parse(createdAtIso)
25
+ if (!Number.isFinite(ms)) return false
26
+ return Date.now() - ms > ttlHours * 60 * 60 * 1000
27
+ }
28
+
29
+ export class R2FileStore implements FileStore {
30
+ constructor(
31
+ private readonly bucket: R2Bucket,
32
+ private readonly policy: StoragePolicy
33
+ ) {}
34
+
35
+ async put(input: { readonly filename: string; readonly mimeType: string; readonly bytes: Uint8Array }): Promise<StoredFileMeta> {
36
+ const sizeBytes = input.bytes.byteLength
37
+ if (sizeBytes > this.policy.maxFileBytes) {
38
+ const err = new Error(`file too large: ${sizeBytes} bytes exceeds maxFileBytes ${this.policy.maxFileBytes}`)
39
+ ;(err as { status?: number; code?: string; details?: unknown }).status = 413
40
+ ;(err as { status?: number; code?: string; details?: unknown }).code = "FILE_TOO_LARGE"
41
+ ;(err as { status?: number; code?: string; details?: unknown }).details = { policy: this.policy, sizeBytes }
42
+ throw err
43
+ }
44
+
45
+ await this.cleanupInternal(sizeBytes)
46
+
47
+ const id = crypto.randomUUID()
48
+ const createdAt = new Date().toISOString()
49
+ await this.bucket.put(toKey(id), input.bytes, {
50
+ httpMetadata: {
51
+ contentType: input.mimeType,
52
+ },
53
+ customMetadata: {
54
+ filename: input.filename,
55
+ mimeType: input.mimeType,
56
+ createdAt,
57
+ },
58
+ })
59
+
60
+ return { id, filename: input.filename, mimeType: input.mimeType, sizeBytes, createdAt }
61
+ }
62
+
63
+ async get(fileId: string): Promise<StoredFileRecord | null> {
64
+ const obj = await this.bucket.get(toKey(fileId))
65
+ if (!obj) return null
66
+ const meta = (obj.customMetadata ?? {}) as MetaFields
67
+ const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
68
+ const filename = meta.filename ?? fileId
69
+ const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
70
+ const bytes = new Uint8Array(await obj.arrayBuffer())
71
+ return {
72
+ id: fileId,
73
+ filename,
74
+ mimeType,
75
+ sizeBytes: bytes.byteLength,
76
+ createdAt,
77
+ bytes,
78
+ }
79
+ }
80
+
81
+ async list(): Promise<ReadonlyArray<StoredFileMeta>> {
82
+ return await this.listAllFiles()
83
+ }
84
+
85
+ async delete(fileId: string): Promise<boolean> {
86
+ await this.bucket.delete(toKey(fileId))
87
+ return true
88
+ }
89
+
90
+ async stats(): Promise<unknown> {
91
+ const files = await this.listAllFiles()
92
+ const totalBytes = files.reduce((sum, file) => sum + file.sizeBytes, 0)
93
+ return {
94
+ backend: "r2",
95
+ policy: this.policy,
96
+ stats: {
97
+ fileCount: files.length,
98
+ totalBytes,
99
+ },
100
+ }
101
+ }
102
+
103
+ async cleanup(): Promise<unknown> {
104
+ const files = await this.listAllFiles()
105
+ const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
106
+ const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
107
+ if (expired.length > 0) {
108
+ await this.bucket.delete(expired.map((f) => toKey(f.id)))
109
+ }
110
+ const evict = this.pickEvictions(active, 0)
111
+ if (evict.length > 0) {
112
+ await this.bucket.delete(evict.map((f) => toKey(f.id)))
113
+ }
114
+ const evictIds = new Set(evict.map((f) => f.id))
115
+ const after = active.filter((f) => !evictIds.has(f.id))
116
+ const totalBytes = after.reduce((sum, file) => sum + file.sizeBytes, 0)
117
+ return {
118
+ backend: "r2",
119
+ policy: this.policy,
120
+ deletedExpired: expired.length,
121
+ deletedEvicted: evict.length,
122
+ stats: {
123
+ fileCount: after.length,
124
+ totalBytes,
125
+ },
126
+ }
127
+ }
128
+
129
+ private async cleanupInternal(incomingBytes: number): Promise<void> {
130
+ const files = await this.listAllFiles()
131
+ const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
132
+ const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
133
+ if (expired.length > 0) {
134
+ await this.bucket.delete(expired.map((f) => toKey(f.id)))
135
+ }
136
+ const evict = this.pickEvictions(active, incomingBytes)
137
+ if (evict.length > 0) {
138
+ await this.bucket.delete(evict.map((f) => toKey(f.id)))
139
+ }
140
+ const evictIds = new Set(evict.map((f) => f.id))
141
+ const remaining = active.filter((f) => !evictIds.has(f.id))
142
+ const finalTotal = remaining.reduce((sum, file) => sum + file.sizeBytes, 0)
143
+ if (finalTotal + incomingBytes > this.policy.maxTotalBytes) {
144
+ const err = new Error(
145
+ `storage quota exceeded: total ${finalTotal} + incoming ${incomingBytes} > maxTotalBytes ${this.policy.maxTotalBytes}`
146
+ )
147
+ ;(err as { status?: number; code?: string; details?: unknown }).status = 507
148
+ ;(err as { status?: number; code?: string; details?: unknown }).code = "STORAGE_QUOTA_EXCEEDED"
149
+ ;(err as { status?: number; code?: string; details?: unknown }).details = { policy: this.policy, totalBytes: finalTotal, incomingBytes }
150
+ throw err
151
+ }
152
+ }
153
+
154
+ private pickEvictions(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): StoredFileMeta[] {
155
+ const totalBytes = files.reduce((sum, f) => sum + f.sizeBytes, 0)
156
+ const projected = totalBytes + incomingBytes
157
+ if (projected <= this.policy.maxTotalBytes) return []
158
+
159
+ const needFree = projected - this.policy.maxTotalBytes
160
+ const candidates = [...files].sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
161
+ const evict: StoredFileMeta[] = []
162
+ let freed = 0
163
+ for (const file of candidates) {
164
+ evict.push(file)
165
+ freed += file.sizeBytes
166
+ if (freed >= needFree) break
167
+ if (evict.length >= this.policy.cleanupBatchSize) break
168
+ }
169
+ return evict
170
+ }
171
+
172
+ private async listAllFiles(): Promise<StoredFileMeta[]> {
173
+ const files: StoredFileMeta[] = []
174
+ let cursor: string | undefined
175
+ while (true) {
176
+ const listed = await this.bucket.list({ prefix: PREFIX, limit: 1000, cursor })
177
+ for (const obj of listed.objects) {
178
+ const meta = (obj.customMetadata ?? {}) as MetaFields
179
+ const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
180
+ const filename = meta.filename ?? toId(obj.key)
181
+ const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
182
+ files.push({
183
+ id: toId(obj.key),
184
+ filename,
185
+ mimeType,
186
+ sizeBytes: obj.size,
187
+ createdAt,
188
+ })
189
+ }
190
+ if (listed.truncated !== true || !listed.cursor) break
191
+ cursor = listed.cursor
192
+ }
193
+ return files
194
+ }
195
+ }
@@ -0,0 +1,182 @@
1
+ import type { JsonObject } from "./types"
2
+
3
+ export interface ToolArtifact {
4
+ readonly id?: string
5
+ readonly kind: "image" | "pdf" | "file" | "json" | "text"
6
+ readonly mimeType?: string
7
+ readonly filename?: string
8
+ readonly sizeBytes?: number
9
+ readonly url?: string
10
+ }
11
+
12
+ export interface ToolOutputEnvelope {
13
+ readonly ok: true
14
+ readonly data: unknown
15
+ readonly artifacts: ToolArtifact[]
16
+ }
17
+
18
+ const MAX_TEXT_STRING = 1200
19
+ const MAX_TEXT_ARRAY = 40
20
+ const MAX_TEXT_DEPTH = 8
21
+
22
+ const asObj = (value: unknown): JsonObject =>
23
+ typeof value === "object" && value !== null && !Array.isArray(value)
24
+ ? (value as JsonObject)
25
+ : {}
26
+
27
+ const inferKind = (mimeType?: string): ToolArtifact["kind"] => {
28
+ const mime = (mimeType || "").toLowerCase()
29
+ if (mime.startsWith("image/")) return "image"
30
+ if (mime === "application/pdf") return "pdf"
31
+ if (mime.includes("json")) return "json"
32
+ if (mime.startsWith("text/")) return "text"
33
+ return "file"
34
+ }
35
+
36
+ const toAbsoluteUrl = (value: string, baseUrl: string): string => {
37
+ try {
38
+ return new URL(value, baseUrl).toString()
39
+ } catch {
40
+ return value
41
+ }
42
+ }
43
+
44
+ const addArtifact = (artifacts: ToolArtifact[], artifact: ToolArtifact): void => {
45
+ if (!artifact.id && !artifact.url && !artifact.filename) return
46
+ artifacts.push(artifact)
47
+ }
48
+
49
+ export const buildToolOutputEnvelope = (
50
+ result: unknown,
51
+ baseUrl: string
52
+ ): ToolOutputEnvelope => {
53
+ const root = asObj(result)
54
+ const artifacts: ToolArtifact[] = []
55
+
56
+ const fileMeta = asObj(root.file)
57
+ if (typeof fileMeta.id === "string") {
58
+ addArtifact(artifacts, {
59
+ id: fileMeta.id,
60
+ kind: inferKind(typeof fileMeta.mimeType === "string" ? fileMeta.mimeType : undefined),
61
+ mimeType: typeof fileMeta.mimeType === "string" ? fileMeta.mimeType : undefined,
62
+ filename: typeof fileMeta.filename === "string" ? fileMeta.filename : undefined,
63
+ sizeBytes: typeof fileMeta.sizeBytes === "number" ? fileMeta.sizeBytes : undefined,
64
+ url: typeof root.url === "string" ? toAbsoluteUrl(root.url, baseUrl) : undefined,
65
+ })
66
+ }
67
+
68
+ const images = Array.isArray(root.images) ? root.images : []
69
+ for (const item of images) {
70
+ const image = asObj(item)
71
+ const fileId = typeof image.fileId === "string" ? image.fileId : undefined
72
+ const rawUrl = typeof image.url === "string" ? image.url : undefined
73
+ if (!fileId && !rawUrl) continue
74
+ addArtifact(artifacts, {
75
+ id: fileId,
76
+ kind: "image",
77
+ mimeType: typeof image.mimeType === "string" ? image.mimeType : "image/png",
78
+ filename: fileId ? `artifact-${fileId}.png` : undefined,
79
+ url: rawUrl ? toAbsoluteUrl(rawUrl, baseUrl) : undefined,
80
+ })
81
+ }
82
+
83
+ const files = Array.isArray(root.files) ? root.files : []
84
+ for (const item of files) {
85
+ const meta = asObj(item)
86
+ if (typeof meta.id !== "string") continue
87
+ addArtifact(artifacts, {
88
+ id: meta.id,
89
+ kind: inferKind(typeof meta.mimeType === "string" ? meta.mimeType : undefined),
90
+ mimeType: typeof meta.mimeType === "string" ? meta.mimeType : undefined,
91
+ filename: typeof meta.filename === "string" ? meta.filename : undefined,
92
+ sizeBytes: typeof meta.sizeBytes === "number" ? meta.sizeBytes : undefined,
93
+ })
94
+ }
95
+
96
+ return {
97
+ ok: true,
98
+ data: result,
99
+ artifacts,
100
+ }
101
+ }
102
+
103
+ const summarizeData = (data: unknown): string => {
104
+ const root = asObj(data)
105
+ if (typeof root.returnMode === "string" && Array.isArray(root.images)) {
106
+ return `Extracted ${root.images.length} page image(s) in returnMode=${root.returnMode}.`
107
+ }
108
+ if (Array.isArray(root.pages)) {
109
+ return `Processed ${root.pages.length} page(s).`
110
+ }
111
+ if (Array.isArray(root.files)) {
112
+ return `Listed ${root.files.length} file(s).`
113
+ }
114
+ if (typeof root.deleted === "boolean") {
115
+ return root.deleted ? "File deleted." : "File not found."
116
+ }
117
+ return "Tool executed successfully."
118
+ }
119
+
120
+ const sanitizeString = (value: string): string => {
121
+ if (value.startsWith("data:")) {
122
+ const [head] = value.split(",", 1)
123
+ return `${head},<omitted>`
124
+ }
125
+ if (/^[A-Za-z0-9+/=]{300,}$/.test(value)) {
126
+ return `<base64 omitted len=${value.length}>`
127
+ }
128
+ if (value.length > MAX_TEXT_STRING) {
129
+ return `${value.slice(0, MAX_TEXT_STRING)}...(truncated ${value.length - MAX_TEXT_STRING} chars)`
130
+ }
131
+ return value
132
+ }
133
+
134
+ const sanitizeForText = (value: unknown, depth = 0): unknown => {
135
+ if (depth >= MAX_TEXT_DEPTH) return "<max-depth>"
136
+ if (typeof value === "string") return sanitizeString(value)
137
+ if (typeof value !== "object" || value === null) return value
138
+ if (Array.isArray(value)) {
139
+ const items = value.slice(0, MAX_TEXT_ARRAY).map((item) => sanitizeForText(item, depth + 1))
140
+ if (value.length > MAX_TEXT_ARRAY) {
141
+ items.push(`<truncated ${value.length - MAX_TEXT_ARRAY} items>`)
142
+ }
143
+ return items
144
+ }
145
+ const out: Record<string, unknown> = {}
146
+ for (const [key, nested] of Object.entries(value)) {
147
+ out[key] = sanitizeForText(nested, depth + 1)
148
+ }
149
+ return out
150
+ }
151
+
152
+ export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<string, unknown>> => {
153
+ const lines: string[] = [summarizeData(envelope.data)]
154
+ if (envelope.artifacts.length > 0) {
155
+ lines.push("Artifacts:")
156
+ for (const artifact of envelope.artifacts) {
157
+ const descriptor = [
158
+ artifact.kind,
159
+ artifact.filename ?? artifact.id ?? "artifact",
160
+ artifact.mimeType ?? "",
161
+ artifact.url ?? "",
162
+ ]
163
+ .filter((v) => v.length > 0)
164
+ .join(" | ")
165
+ lines.push(`- ${descriptor}`)
166
+ }
167
+ }
168
+ lines.push("")
169
+ lines.push(JSON.stringify(sanitizeForText(envelope), null, 2))
170
+
171
+ const content: Array<Record<string, unknown>> = [{ type: "text", text: lines.join("\n") }]
172
+ for (const artifact of envelope.artifacts) {
173
+ if (!artifact.url) continue
174
+ content.push({
175
+ type: "resource_link",
176
+ name: artifact.filename ?? artifact.id ?? "artifact",
177
+ uri: artifact.url,
178
+ mimeType: artifact.mimeType ?? "application/octet-stream",
179
+ })
180
+ }
181
+ return content
182
+ }
@@ -44,7 +44,7 @@ const toolDefinitions: ReadonlyArray<ToolDefinition> = [
44
44
  filename: { type: "string" },
45
45
  pages: { type: "array", items: { type: "integer" } },
46
46
  renderScale: { type: "number" },
47
- returnMode: { type: "string", enum: ["inline", "file_id"] },
47
+ returnMode: { type: "string", enum: ["inline", "file_id", "url"] },
48
48
  },
49
49
  required: ["pages"],
50
50
  },
@@ -168,7 +168,7 @@ const toolDefinitions: ReadonlyArray<ToolDefinition> = [
168
168
  filename: { type: "string" },
169
169
  mimeType: { type: "string" },
170
170
  base64: { type: "string" },
171
- returnMode: { type: "string", enum: ["inline", "file_id"] },
171
+ returnMode: { type: "string", enum: ["inline", "file_id", "url"] },
172
172
  },
173
173
  required: ["op"],
174
174
  },
package/src/types.ts CHANGED
@@ -11,8 +11,9 @@ export type ReturnMode = "inline" | "file_id" | "url"
11
11
  export interface Env {
12
12
  readonly ECHO_PDF_CONFIG_JSON?: string
13
13
  readonly ASSETS?: Fetcher
14
+ readonly FILE_STORE_BUCKET?: R2Bucket
14
15
  readonly FILE_STORE_DO?: DurableObjectNamespace
15
- readonly [key: string]: string | Fetcher | DurableObjectNamespace | undefined
16
+ readonly [key: string]: string | Fetcher | DurableObjectNamespace | R2Bucket | undefined
16
17
  }
17
18
 
18
19
  export interface StoredFileMeta {
package/wrangler.toml CHANGED
@@ -6,6 +6,10 @@ compatibility_date = "2026-03-06"
6
6
  directory = "./assets"
7
7
  binding = "ASSETS"
8
8
 
9
+ [[r2_buckets]]
10
+ binding = "FILE_STORE_BUCKET"
11
+ bucket_name = "echo-pdf-files"
12
+
9
13
  [[durable_objects.bindings]]
10
14
  name = "FILE_STORE_DO"
11
15
  class_name = "FileStoreDO"