@echofiles/echo-pdf 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,6 +27,13 @@
27
27
  - MCP: `https://echo-pdf.echofilesai.workers.dev/mcp`
28
28
  - HTTP API 根路径: `https://echo-pdf.echofilesai.workers.dev`
29
29
 
30
+ ## 1.1 API 兼容性说明
31
+
32
+ - 从 `v0.3.0` 开始,`POST /tools/call` 返回结构改为:
33
+ - `{"ok": true, "data": ..., "artifacts": [...]}`
34
+ - 老格式 `{"name":"...","output":...}` 已移除。
35
+ - MCP `tools/call` 仍保留 `type:"text"`,并新增 `type:"resource_link"` 供下载二进制结果。
36
+
30
37
  ## 2. 快速开始(CLI)
31
38
 
32
39
  安装:
@@ -117,7 +124,8 @@ echo-pdf setup add json
117
124
  说明:
118
125
 
119
126
  - UI 中输入的 key 属于当前会话,不落库到服务端。
120
- - `returnMode` 目前仅支持 `inline` 和 `file_id`(`url` 尚未实现)。
127
+ - `returnMode` 支持 `inline`、`file_id`、`url`。
128
+ - `tools/call` 返回统一结构:`{ ok, data, artifacts }`,其中 `artifacts[*].url` 可直接下载。
121
129
  - 表格工具返回值会校验并要求包含合法 `tabular`,否则报错。
122
130
 
123
131
  ## 5. HTTP API 使用
@@ -174,6 +182,7 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
174
182
 
175
183
  - `agent.defaultProvider`
176
184
  - `agent.defaultModel`
185
+ - `service.publicBaseUrl`
177
186
  - `service.maxPdfBytes`
178
187
  - `service.storage.maxFileBytes`
179
188
  - `service.storage.maxTotalBytes`
@@ -184,7 +193,9 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
184
193
  - `service.maxPdfBytes`:允许处理的 PDF 最大字节数。
185
194
  - `service.storage.maxFileBytes`:文件存储单文件上限(上传 PDF、`url/base64` ingest、以及 `file_id` 结果都会落到存储层)。
186
195
  - 当前项目要求 `service.storage.maxFileBytes >= service.maxPdfBytes`,否则配置无效并在启动时报错。
187
- - 默认配置下两者都是 `1200000`(约 1.2MB)。
196
+ - 当前默认配置下两者都是 `10000000`(10MB)。
197
+ - 当未绑定 R2、使用 DO 存储时,`service.storage.maxFileBytes` 必须 `<= 1200000`,否则启动会报错。
198
+ - 生产建议始终绑定 R2,并让 DO 只负责协调/元数据,不承载大文件数据。
188
199
 
189
200
  常用环境变量:
190
201
 
@@ -193,6 +204,7 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
193
204
  - `VERCEL_AI_GATEWAY_API_KEY` / `VERCEL_AI_GATEWAY_KEY`
194
205
  - `ECHO_PDF_DEFAULT_PROVIDER`
195
206
  - `ECHO_PDF_DEFAULT_MODEL`
207
+ - `ECHO_PDF_PUBLIC_BASE_URL`(可选,强制 artifacts 生成外部可访问绝对 URL)
196
208
  - `ECHO_PDF_MCP_KEY`(可选,启用 MCP 鉴权)
197
209
  - `ECHO_PDF_WORKER_NAME`(CLI 默认 URL 推导)
198
210
 
@@ -235,6 +247,19 @@ INPUT_PDF=./fixtures/input.pdf ./scripts/export-fixtures.sh
235
247
 
236
248
  当前实现要求模型输出中必须包含合法 `\\begin{tabular}...\\end{tabular}`。如果模型返回解释性文本或超时,会直接报错。
237
249
 
238
- ### 8.3 `returnMode=url` 为什么不可用
250
+ ### 8.3 `returnMode=url` 如何使用
251
+
252
+ `url` 模式会把结果落到存储层,并返回一个可直接 `GET` 的下载地址:
239
253
 
240
- 当前版本没有对外文件下载路由或签名 URL 能力,因此 `url` 模式未实现。请使用 `inline` 或 `file_id`。
254
+ - `GET /api/files/get?fileId=<id>`
255
+
256
+ 示例(提取页面并返回 URL):
257
+
258
+ ```bash
259
+ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
260
+ -H 'content-type: application/json' \
261
+ -d '{
262
+ "name":"pdf_extract_pages",
263
+ "arguments":{"fileId":"<FILE_ID>","pages":[1],"returnMode":"url"}
264
+ }'
265
+ ```
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "service": {
3
3
  "name": "echo-pdf",
4
+ "publicBaseUrl": "https://echo-pdf.echofilesai.workers.dev",
4
5
  "maxPdfBytes": 10000000,
5
6
  "maxPagesPerRequest": 20,
6
7
  "defaultRenderScale": 2,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@echofiles/echo-pdf",
3
3
  "description": "MCP-first PDF agent on Cloudflare Workers with CLI and web demo.",
4
- "version": "0.2.0",
4
+ "version": "0.3.0",
5
5
  "type": "module",
6
6
  "publishConfig": {
7
7
  "access": "public"
@@ -41,12 +41,12 @@ run_json() {
41
41
 
42
42
  validate_ocr_json() {
43
43
  local json_file="$1"
44
- node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.output?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
44
+ node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
45
45
  }
46
46
 
47
47
  validate_tables_json() {
48
48
  local json_file="$1"
49
- node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.output?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
49
+ node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
50
50
  }
51
51
 
52
52
  # 1) Save test logs locally (do not block artifact export on transient network failure)
@@ -142,7 +142,7 @@ if [[ -n "${PROVIDER}" ]]; then
142
142
  else
143
143
  run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
144
144
  fi
145
- node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.output?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
145
+ node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.data?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
146
146
 
147
147
  # 6) MCP tool calls
148
148
  run_json "mcp-initialize" cli mcp initialize
package/src/file-ops.ts CHANGED
@@ -26,10 +26,8 @@ export const runFileOp = async (
26
26
  bytes,
27
27
  })
28
28
  const returnMode = normalizeReturnMode(input.returnMode)
29
- if (returnMode === "url") {
30
- throw new Error("returnMode=url is not implemented; use inline or file_id")
31
- }
32
29
  if (returnMode === "file_id") return { returnMode, file: meta }
30
+ if (returnMode === "url") return { returnMode, file: meta, url: `/api/files/get?fileId=${encodeURIComponent(meta.id)}` }
33
31
  const stored = await fileStore.get(meta.id)
34
32
  if (!stored) throw new Error(`File not found after put: ${meta.id}`)
35
33
  return {
@@ -276,7 +276,16 @@ export class DurableObjectFileStore {
276
276
  })
277
277
  const payload = (await response.json()) as { file?: StoredFileMeta; error?: string }
278
278
  if (!response.ok || !payload.file) {
279
- throw new Error(payload.error ?? "DO put failed")
279
+ const details = payload as { error?: string; code?: string; policy?: unknown; stats?: unknown }
280
+ const error = new Error(payload.error ?? "DO put failed") as Error & {
281
+ status?: number
282
+ code?: string
283
+ details?: unknown
284
+ }
285
+ error.status = response.status
286
+ error.code = typeof details.code === "string" ? details.code : undefined
287
+ error.details = { policy: details.policy, stats: details.stats }
288
+ throw error
280
289
  }
281
290
  return payload.file
282
291
  }
package/src/index.ts CHANGED
@@ -5,6 +5,7 @@ import { handleMcpRequest } from "./mcp-server"
5
5
  import { loadEchoPdfConfig } from "./pdf-config"
6
6
  import { getRuntimeFileStore } from "./pdf-storage"
7
7
  import { listProviderModels } from "./provider-client"
8
+ import { buildToolOutputEnvelope } from "./response-schema"
8
9
  import { callTool, listToolSchemas } from "./tool-registry"
9
10
  import type { AgentTraceEvent, PdfOperationRequest } from "./pdf-types"
10
11
  import type { Env, JsonObject } from "./types"
@@ -21,6 +22,25 @@ const json = (data: unknown, status = 200): Response =>
21
22
  const toError = (error: unknown): string =>
22
23
  error instanceof Error ? error.message : String(error)
23
24
 
25
+ const errorStatus = (error: unknown): number | null => {
26
+ const status = (error as { status?: unknown })?.status
27
+ return typeof status === "number" && Number.isFinite(status) ? status : null
28
+ }
29
+
30
+ const errorCode = (error: unknown): string | null => {
31
+ const code = (error as { code?: unknown })?.code
32
+ return typeof code === "string" && code.length > 0 ? code : null
33
+ }
34
+
35
+ const errorDetails = (error: unknown): unknown => (error as { details?: unknown })?.details
36
+
37
+ const jsonError = (error: unknown, fallbackStatus = 500): Response => {
38
+ const status = errorStatus(error) ?? fallbackStatus
39
+ const code = errorCode(error)
40
+ const details = errorDetails(error)
41
+ return json({ error: toError(error), code, details }, status)
42
+ }
43
+
24
44
  const readJson = async (request: Request): Promise<Record<string, unknown>> => {
25
45
  try {
26
46
  const body = await request.json()
@@ -38,6 +58,9 @@ const asObj = (value: unknown): JsonObject =>
38
58
  ? (value as JsonObject)
39
59
  : {}
40
60
 
61
+ const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
62
+ typeof configured === "string" && configured.length > 0 ? configured : request.url
63
+
41
64
  const sseResponse = (stream: ReadableStream<Uint8Array>): Response =>
42
65
  new Response(stream, {
43
66
  headers: {
@@ -117,7 +140,7 @@ export default {
117
140
  fileUploadEndpoint: "/api/files/upload",
118
141
  fileStatsEndpoint: "/api/files/stats",
119
142
  fileCleanupEndpoint: "/api/files/cleanup",
120
- supportedReturnModes: ["inline", "file_id"],
143
+ supportedReturnModes: ["inline", "file_id", "url"],
121
144
  },
122
145
  mcp: {
123
146
  serverName: config.mcp.serverName,
@@ -163,9 +186,9 @@ export default {
163
186
  ? (body.providerApiKeys as Record<string, string>)
164
187
  : undefined,
165
188
  })
166
- return json({ name, output: result })
189
+ return json(buildToolOutputEnvelope(result, resolvePublicBaseUrl(request, config.service.publicBaseUrl)))
167
190
  } catch (error) {
168
- return json({ error: toError(error) }, 500)
191
+ return jsonError(error, 500)
169
192
  }
170
193
  }
171
194
 
@@ -198,7 +221,7 @@ export default {
198
221
  })
199
222
  return json(result)
200
223
  } catch (error) {
201
- return json({ error: toError(error) }, 500)
224
+ return jsonError(error, 500)
202
225
  }
203
226
  }
204
227
 
@@ -253,7 +276,7 @@ export default {
253
276
  })
254
277
  return json(result)
255
278
  } catch (error) {
256
- return json({ error: toError(error) }, 500)
279
+ return jsonError(error, 500)
257
280
  }
258
281
  }
259
282
 
@@ -276,8 +299,23 @@ export default {
276
299
  })
277
300
  return json({ file: stored }, 200)
278
301
  } catch (error) {
279
- return json({ error: toError(error) }, 500)
302
+ return jsonError(error, 500)
303
+ }
304
+ }
305
+
306
+ if (request.method === "GET" && url.pathname === "/api/files/get") {
307
+ const fileId = url.searchParams.get("fileId") || ""
308
+ if (!fileId) return json({ error: "Missing fileId" }, 400)
309
+ const file = await fileStore.get(fileId)
310
+ if (!file) return json({ error: "File not found" }, 404)
311
+ const download = url.searchParams.get("download") === "1"
312
+ const headers = new Headers()
313
+ headers.set("Content-Type", file.mimeType)
314
+ headers.set("Cache-Control", "no-store")
315
+ if (download) {
316
+ headers.set("Content-Disposition", `attachment; filename=\"${file.filename.replace(/\"/g, "")}\"`)
280
317
  }
318
+ return new Response(file.bytes, { status: 200, headers })
281
319
  }
282
320
 
283
321
  if (request.method === "GET" && url.pathname === "/api/files/stats") {
@@ -321,6 +359,7 @@ export default {
321
359
  stream: "POST /api/agent/stream",
322
360
  files: "POST /api/files/op",
323
361
  fileUpload: "POST /api/files/upload",
362
+ fileGet: "GET /api/files/get?fileId=<id>",
324
363
  fileStats: "GET /api/files/stats",
325
364
  fileCleanup: "POST /api/files/cleanup",
326
365
  mcp: "POST /mcp",
package/src/mcp-server.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import type { Env, FileStore } from "./types"
2
2
  import type { EchoPdfConfig } from "./pdf-types"
3
+ import { buildMcpContent, buildToolOutputEnvelope } from "./response-schema"
3
4
  import { callTool, listToolSchemas } from "./tool-registry"
4
5
 
5
6
  interface JsonRpcRequest {
@@ -19,12 +20,17 @@ const ok = (id: JsonRpcRequest["id"], result: unknown): Response =>
19
20
  { headers: { "Content-Type": "application/json" } }
20
21
  )
21
22
 
22
- const err = (id: JsonRpcRequest["id"], code: number, message: string): Response =>
23
+ const err = (
24
+ id: JsonRpcRequest["id"],
25
+ code: number,
26
+ message: string,
27
+ data?: Record<string, unknown>
28
+ ): Response =>
23
29
  new Response(
24
30
  JSON.stringify({
25
31
  jsonrpc: "2.0",
26
32
  id: id ?? null,
27
- error: { code, message },
33
+ error: data ? { code, message, data } : { code, message },
28
34
  }),
29
35
  { status: 400, headers: { "Content-Type": "application/json" } }
30
36
  )
@@ -39,6 +45,9 @@ const maybeAuthorized = (request: Request, env: Env, config: EchoPdfConfig): boo
39
45
  return request.headers.get(config.mcp.authHeader) === required
40
46
  }
41
47
 
48
+ const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
49
+ typeof configured === "string" && configured.length > 0 ? configured : request.url
50
+
42
51
  export const handleMcpRequest = async (
43
52
  request: Request,
44
53
  env: Env,
@@ -95,6 +104,12 @@ export const handleMcpRequest = async (
95
104
 
96
105
  const toolName = typeof params.name === "string" ? params.name : ""
97
106
  const args = asObj(params.arguments)
107
+ if (!toolName) {
108
+ return err(id, -32602, "Invalid params: name is required", {
109
+ code: "INVALID_PARAMS",
110
+ status: 400,
111
+ })
112
+ }
98
113
 
99
114
  try {
100
115
  const result = await callTool(toolName, args, {
@@ -102,8 +117,32 @@ export const handleMcpRequest = async (
102
117
  env,
103
118
  fileStore,
104
119
  })
105
- return ok(id, { content: [{ type: "text", text: JSON.stringify(result) }] })
120
+ const envelope = buildToolOutputEnvelope(result, resolvePublicBaseUrl(request, config.service.publicBaseUrl))
121
+ return ok(id, { content: buildMcpContent(envelope) })
106
122
  } catch (error) {
107
- return err(id, -32000, error instanceof Error ? error.message : String(error))
123
+ const message = error instanceof Error ? error.message : String(error)
124
+ const status = (error as { status?: unknown })?.status
125
+ const stableStatus = typeof status === "number" && Number.isFinite(status) ? status : 500
126
+ const code = (error as { code?: unknown })?.code
127
+ const details = (error as { details?: unknown })?.details
128
+ if (message.startsWith("Unknown tool:")) {
129
+ return err(id, -32601, message, {
130
+ code: typeof code === "string" ? code : "TOOL_NOT_FOUND",
131
+ status: 404,
132
+ details,
133
+ })
134
+ }
135
+ if (stableStatus >= 400 && stableStatus < 500) {
136
+ return err(id, -32602, message, {
137
+ code: typeof code === "string" ? code : "INVALID_PARAMS",
138
+ status: stableStatus,
139
+ details,
140
+ })
141
+ }
142
+ return err(id, -32000, message, {
143
+ code: typeof code === "string" ? code : "INTERNAL_ERROR",
144
+ status: stableStatus,
145
+ details,
146
+ })
108
147
  }
109
148
  }
package/src/pdf-agent.ts CHANGED
@@ -122,9 +122,6 @@ export const runPdfAgent = async (
122
122
  const pages = ensurePages(request.pages, pageCount, config.service.maxPagesPerRequest)
123
123
  const scale = request.renderScale ?? config.service.defaultRenderScale
124
124
  const returnMode = resolveReturnMode(request.returnMode)
125
- if (returnMode === "url") {
126
- throw new Error("returnMode=url is not implemented; use inline or file_id")
127
- }
128
125
 
129
126
  if (request.operation === "extract_pages") {
130
127
  const images: Array<{ page: number; mimeType: string; data?: string; fileId?: string; url?: string | null }> = []
@@ -138,6 +135,18 @@ export const runPdfAgent = async (
138
135
  bytes: rendered.png,
139
136
  })
140
137
  images.push({ page, mimeType: "image/png", fileId: stored.id })
138
+ } else if (returnMode === "url") {
139
+ const stored = await opts.fileStore.put({
140
+ filename: `${file.filename}-p${page}.png`,
141
+ mimeType: "image/png",
142
+ bytes: rendered.png,
143
+ })
144
+ images.push({
145
+ page,
146
+ mimeType: "image/png",
147
+ fileId: stored.id,
148
+ url: `/api/files/get?fileId=${encodeURIComponent(stored.id)}`,
149
+ })
141
150
  } else {
142
151
  images.push({
143
152
  page,
package/src/pdf-config.ts CHANGED
@@ -30,6 +30,13 @@ const validateConfig = (config: EchoPdfConfig): EchoPdfConfig => {
30
30
  if (!config.service?.name) throw new Error("service.name is required")
31
31
  if (!config.pdfium?.wasmUrl) throw new Error("pdfium.wasmUrl is required")
32
32
  if (!config.service?.storage) throw new Error("service.storage is required")
33
+ if (
34
+ typeof config.service.publicBaseUrl === "string" &&
35
+ config.service.publicBaseUrl.length > 0 &&
36
+ !/^https?:\/\//.test(config.service.publicBaseUrl)
37
+ ) {
38
+ throw new Error("service.publicBaseUrl must start with http:// or https://")
39
+ }
33
40
  if (!Number.isFinite(config.service.storage.maxFileBytes) || config.service.storage.maxFileBytes <= 0) {
34
41
  throw new Error("service.storage.maxFileBytes must be positive")
35
42
  }
@@ -65,8 +72,16 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
65
72
 
66
73
  const providerOverride = env.ECHO_PDF_DEFAULT_PROVIDER
67
74
  const modelOverride = env.ECHO_PDF_DEFAULT_MODEL
75
+ const publicBaseUrlOverride = env.ECHO_PDF_PUBLIC_BASE_URL
68
76
  const withOverrides: EchoPdfConfig = {
69
77
  ...resolved,
78
+ service: {
79
+ ...resolved.service,
80
+ publicBaseUrl:
81
+ typeof publicBaseUrlOverride === "string" && publicBaseUrlOverride.trim().length > 0
82
+ ? publicBaseUrlOverride.trim()
83
+ : resolved.service.publicBaseUrl,
84
+ },
70
85
  agent: {
71
86
  ...resolved.agent,
72
87
  defaultProvider:
@@ -1,4 +1,5 @@
1
1
  import { DurableObjectFileStore } from "./file-store-do"
2
+ import { R2FileStore } from "./r2-file-store"
2
3
  import type { EchoPdfConfig } from "./pdf-types"
3
4
  import type { Env, FileStore, StoredFileMeta, StoredFileRecord } from "./types"
4
5
 
@@ -47,6 +48,7 @@ class InMemoryFileStore implements FileStore {
47
48
  }
48
49
 
49
50
  const fallbackStore = new InMemoryFileStore()
51
+ const DO_SAFE_MAX_FILE_BYTES = 1_200_000
50
52
 
51
53
  export interface RuntimeFileStoreBundle {
52
54
  readonly store: FileStore
@@ -55,7 +57,20 @@ export interface RuntimeFileStoreBundle {
55
57
  }
56
58
 
57
59
  export const getRuntimeFileStore = (env: Env, config: EchoPdfConfig): RuntimeFileStoreBundle => {
60
+ if (env.FILE_STORE_BUCKET) {
61
+ const store = new R2FileStore(env.FILE_STORE_BUCKET, config.service.storage)
62
+ return {
63
+ store,
64
+ stats: async () => store.stats(),
65
+ cleanup: async () => store.cleanup(),
66
+ }
67
+ }
58
68
  if (env.FILE_STORE_DO) {
69
+ if (config.service.storage.maxFileBytes > DO_SAFE_MAX_FILE_BYTES) {
70
+ throw new Error(
71
+ `service.storage.maxFileBytes=${config.service.storage.maxFileBytes} exceeds DO backend limit ${DO_SAFE_MAX_FILE_BYTES}; bind FILE_STORE_BUCKET (R2) or reduce maxFileBytes`
72
+ )
73
+ }
59
74
  const store = new DurableObjectFileStore(env.FILE_STORE_DO, config.service.storage)
60
75
  return {
61
76
  store,
package/src/pdf-types.ts CHANGED
@@ -22,6 +22,7 @@ export interface StoragePolicy {
22
22
  export interface EchoPdfConfig {
23
23
  readonly service: {
24
24
  readonly name: string
25
+ readonly publicBaseUrl?: string
25
26
  readonly maxPdfBytes: number
26
27
  readonly maxPagesPerRequest: number
27
28
  readonly defaultRenderScale: number
@@ -0,0 +1,180 @@
1
+ import type { StoragePolicy } from "./pdf-types"
2
+ import type { FileStore, StoredFileMeta, StoredFileRecord } from "./types"
3
+
4
+ const PREFIX = "file/"
5
+
6
+ type MetaFields = {
7
+ filename?: string
8
+ mimeType?: string
9
+ createdAt?: string
10
+ }
11
+
12
+ const toId = (key: string): string => key.startsWith(PREFIX) ? key.slice(PREFIX.length) : key
13
+ const toKey = (id: string): string => `${PREFIX}${id}`
14
+
15
+ const parseCreatedAt = (value: string | undefined, fallback: Date): string => {
16
+ if (typeof value === "string" && value.trim().length > 0) {
17
+ const ms = Date.parse(value)
18
+ if (Number.isFinite(ms)) return new Date(ms).toISOString()
19
+ }
20
+ return fallback.toISOString()
21
+ }
22
+
23
+ const isExpired = (createdAtIso: string, ttlHours: number): boolean => {
24
+ const ms = Date.parse(createdAtIso)
25
+ if (!Number.isFinite(ms)) return false
26
+ return Date.now() - ms > ttlHours * 60 * 60 * 1000
27
+ }
28
+
29
+ export class R2FileStore implements FileStore {
30
+ constructor(
31
+ private readonly bucket: R2Bucket,
32
+ private readonly policy: StoragePolicy
33
+ ) {}
34
+
35
+ async put(input: { readonly filename: string; readonly mimeType: string; readonly bytes: Uint8Array }): Promise<StoredFileMeta> {
36
+ const sizeBytes = input.bytes.byteLength
37
+ if (sizeBytes > this.policy.maxFileBytes) {
38
+ const err = new Error(`file too large: ${sizeBytes} bytes exceeds maxFileBytes ${this.policy.maxFileBytes}`)
39
+ ;(err as { status?: number; code?: string; details?: unknown }).status = 413
40
+ ;(err as { status?: number; code?: string; details?: unknown }).code = "FILE_TOO_LARGE"
41
+ ;(err as { status?: number; code?: string; details?: unknown }).details = { policy: this.policy, sizeBytes }
42
+ throw err
43
+ }
44
+
45
+ await this.cleanupInternal(sizeBytes)
46
+
47
+ const id = crypto.randomUUID()
48
+ const createdAt = new Date().toISOString()
49
+ await this.bucket.put(toKey(id), input.bytes, {
50
+ httpMetadata: {
51
+ contentType: input.mimeType,
52
+ },
53
+ customMetadata: {
54
+ filename: input.filename,
55
+ mimeType: input.mimeType,
56
+ createdAt,
57
+ },
58
+ })
59
+
60
+ return { id, filename: input.filename, mimeType: input.mimeType, sizeBytes, createdAt }
61
+ }
62
+
63
+ async get(fileId: string): Promise<StoredFileRecord | null> {
64
+ const obj = await this.bucket.get(toKey(fileId))
65
+ if (!obj) return null
66
+ const meta = (obj.customMetadata ?? {}) as MetaFields
67
+ const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
68
+ const filename = meta.filename ?? fileId
69
+ const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
70
+ const bytes = new Uint8Array(await obj.arrayBuffer())
71
+ return {
72
+ id: fileId,
73
+ filename,
74
+ mimeType,
75
+ sizeBytes: bytes.byteLength,
76
+ createdAt,
77
+ bytes,
78
+ }
79
+ }
80
+
81
+ async list(): Promise<ReadonlyArray<StoredFileMeta>> {
82
+ const listed = await this.bucket.list({ prefix: PREFIX, limit: 1000 })
83
+ return listed.objects.map((obj) => {
84
+ const meta = (obj.customMetadata ?? {}) as MetaFields
85
+ const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
86
+ const filename = meta.filename ?? toId(obj.key)
87
+ const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
88
+ return {
89
+ id: toId(obj.key),
90
+ filename,
91
+ mimeType,
92
+ sizeBytes: obj.size,
93
+ createdAt,
94
+ }
95
+ })
96
+ }
97
+
98
+ async delete(fileId: string): Promise<boolean> {
99
+ await this.bucket.delete(toKey(fileId))
100
+ return true
101
+ }
102
+
103
+ async stats(): Promise<unknown> {
104
+ const files = await this.list()
105
+ const totalBytes = files.reduce((sum, file) => sum + file.sizeBytes, 0)
106
+ return {
107
+ backend: "r2",
108
+ policy: this.policy,
109
+ stats: {
110
+ fileCount: files.length,
111
+ totalBytes,
112
+ },
113
+ }
114
+ }
115
+
116
+ async cleanup(): Promise<unknown> {
117
+ const before = await this.list()
118
+ const deletedExpired = await this.deleteExpired(before)
119
+ const afterExpired = await this.list()
120
+ const deletedEvicted = await this.evictIfNeeded(afterExpired, 0)
121
+ const after = await this.list()
122
+ const totalBytes = after.reduce((sum, file) => sum + file.sizeBytes, 0)
123
+ return {
124
+ backend: "r2",
125
+ policy: this.policy,
126
+ deletedExpired,
127
+ deletedEvicted,
128
+ stats: {
129
+ fileCount: after.length,
130
+ totalBytes,
131
+ },
132
+ }
133
+ }
134
+
135
+ private async cleanupInternal(incomingBytes: number): Promise<void> {
136
+ const files = await this.list()
137
+ await this.deleteExpired(files)
138
+ const afterExpired = await this.list()
139
+ await this.evictIfNeeded(afterExpired, incomingBytes)
140
+ const finalFiles = await this.list()
141
+ const finalTotal = finalFiles.reduce((sum, file) => sum + file.sizeBytes, 0)
142
+ if (finalTotal + incomingBytes > this.policy.maxTotalBytes) {
143
+ const err = new Error(
144
+ `storage quota exceeded: total ${finalTotal} + incoming ${incomingBytes} > maxTotalBytes ${this.policy.maxTotalBytes}`
145
+ )
146
+ ;(err as { status?: number; code?: string; details?: unknown }).status = 507
147
+ ;(err as { status?: number; code?: string; details?: unknown }).code = "STORAGE_QUOTA_EXCEEDED"
148
+ ;(err as { status?: number; code?: string; details?: unknown }).details = { policy: this.policy, totalBytes: finalTotal, incomingBytes }
149
+ throw err
150
+ }
151
+ }
152
+
153
+ private async deleteExpired(files: ReadonlyArray<StoredFileMeta>): Promise<number> {
154
+ const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
155
+ if (expired.length === 0) return 0
156
+ await this.bucket.delete(expired.map((f) => toKey(f.id)))
157
+ return expired.length
158
+ }
159
+
160
+ private async evictIfNeeded(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): Promise<number> {
161
+ const totalBytes = files.reduce((sum, f) => sum + f.sizeBytes, 0)
162
+ const projected = totalBytes + incomingBytes
163
+ if (projected <= this.policy.maxTotalBytes) return 0
164
+
165
+ const needFree = projected - this.policy.maxTotalBytes
166
+ const candidates = [...files].sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
167
+ const evict: StoredFileMeta[] = []
168
+ let freed = 0
169
+ for (const file of candidates) {
170
+ evict.push(file)
171
+ freed += file.sizeBytes
172
+ if (freed >= needFree) break
173
+ if (evict.length >= this.policy.cleanupBatchSize) break
174
+ }
175
+ if (evict.length === 0) return 0
176
+ await this.bucket.delete(evict.map((f) => toKey(f.id)))
177
+ return evict.length
178
+ }
179
+ }
180
+
@@ -0,0 +1,146 @@
1
+ import type { JsonObject } from "./types"
2
+
3
+ export interface ToolArtifact {
4
+ readonly id?: string
5
+ readonly kind: "image" | "pdf" | "file" | "json" | "text"
6
+ readonly mimeType?: string
7
+ readonly filename?: string
8
+ readonly sizeBytes?: number
9
+ readonly url?: string
10
+ }
11
+
12
+ export interface ToolOutputEnvelope {
13
+ readonly ok: true
14
+ readonly data: unknown
15
+ readonly artifacts: ToolArtifact[]
16
+ }
17
+
18
+ const asObj = (value: unknown): JsonObject =>
19
+ typeof value === "object" && value !== null && !Array.isArray(value)
20
+ ? (value as JsonObject)
21
+ : {}
22
+
23
+ const inferKind = (mimeType?: string): ToolArtifact["kind"] => {
24
+ const mime = (mimeType || "").toLowerCase()
25
+ if (mime.startsWith("image/")) return "image"
26
+ if (mime === "application/pdf") return "pdf"
27
+ if (mime.includes("json")) return "json"
28
+ if (mime.startsWith("text/")) return "text"
29
+ return "file"
30
+ }
31
+
32
+ const toAbsoluteUrl = (value: string, baseUrl: string): string => {
33
+ try {
34
+ return new URL(value, baseUrl).toString()
35
+ } catch {
36
+ return value
37
+ }
38
+ }
39
+
40
+ const addArtifact = (artifacts: ToolArtifact[], artifact: ToolArtifact): void => {
41
+ if (!artifact.id && !artifact.url && !artifact.filename) return
42
+ artifacts.push(artifact)
43
+ }
44
+
45
+ export const buildToolOutputEnvelope = (
46
+ result: unknown,
47
+ baseUrl: string
48
+ ): ToolOutputEnvelope => {
49
+ const root = asObj(result)
50
+ const artifacts: ToolArtifact[] = []
51
+
52
+ const fileMeta = asObj(root.file)
53
+ if (typeof fileMeta.id === "string") {
54
+ addArtifact(artifacts, {
55
+ id: fileMeta.id,
56
+ kind: inferKind(typeof fileMeta.mimeType === "string" ? fileMeta.mimeType : undefined),
57
+ mimeType: typeof fileMeta.mimeType === "string" ? fileMeta.mimeType : undefined,
58
+ filename: typeof fileMeta.filename === "string" ? fileMeta.filename : undefined,
59
+ sizeBytes: typeof fileMeta.sizeBytes === "number" ? fileMeta.sizeBytes : undefined,
60
+ url: typeof root.url === "string" ? toAbsoluteUrl(root.url, baseUrl) : undefined,
61
+ })
62
+ }
63
+
64
+ const images = Array.isArray(root.images) ? root.images : []
65
+ for (const item of images) {
66
+ const image = asObj(item)
67
+ const fileId = typeof image.fileId === "string" ? image.fileId : undefined
68
+ const rawUrl = typeof image.url === "string" ? image.url : undefined
69
+ if (!fileId && !rawUrl) continue
70
+ addArtifact(artifacts, {
71
+ id: fileId,
72
+ kind: "image",
73
+ mimeType: typeof image.mimeType === "string" ? image.mimeType : "image/png",
74
+ filename: fileId ? `artifact-${fileId}.png` : undefined,
75
+ url: rawUrl ? toAbsoluteUrl(rawUrl, baseUrl) : undefined,
76
+ })
77
+ }
78
+
79
+ const files = Array.isArray(root.files) ? root.files : []
80
+ for (const item of files) {
81
+ const meta = asObj(item)
82
+ if (typeof meta.id !== "string") continue
83
+ addArtifact(artifacts, {
84
+ id: meta.id,
85
+ kind: inferKind(typeof meta.mimeType === "string" ? meta.mimeType : undefined),
86
+ mimeType: typeof meta.mimeType === "string" ? meta.mimeType : undefined,
87
+ filename: typeof meta.filename === "string" ? meta.filename : undefined,
88
+ sizeBytes: typeof meta.sizeBytes === "number" ? meta.sizeBytes : undefined,
89
+ })
90
+ }
91
+
92
+ return {
93
+ ok: true,
94
+ data: result,
95
+ artifacts,
96
+ }
97
+ }
98
+
99
+ const summarizeData = (data: unknown): string => {
100
+ const root = asObj(data)
101
+ if (typeof root.returnMode === "string" && Array.isArray(root.images)) {
102
+ return `Extracted ${root.images.length} page image(s) in returnMode=${root.returnMode}.`
103
+ }
104
+ if (Array.isArray(root.pages)) {
105
+ return `Processed ${root.pages.length} page(s).`
106
+ }
107
+ if (Array.isArray(root.files)) {
108
+ return `Listed ${root.files.length} file(s).`
109
+ }
110
+ if (typeof root.deleted === "boolean") {
111
+ return root.deleted ? "File deleted." : "File not found."
112
+ }
113
+ return "Tool executed successfully."
114
+ }
115
+
116
+ export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<string, unknown>> => {
117
+ const lines: string[] = [summarizeData(envelope.data)]
118
+ if (envelope.artifacts.length > 0) {
119
+ lines.push("Artifacts:")
120
+ for (const artifact of envelope.artifacts) {
121
+ const descriptor = [
122
+ artifact.kind,
123
+ artifact.filename ?? artifact.id ?? "artifact",
124
+ artifact.mimeType ?? "",
125
+ artifact.url ?? "",
126
+ ]
127
+ .filter((v) => v.length > 0)
128
+ .join(" | ")
129
+ lines.push(`- ${descriptor}`)
130
+ }
131
+ }
132
+ lines.push("")
133
+ lines.push(JSON.stringify(envelope, null, 2))
134
+
135
+ const content: Array<Record<string, unknown>> = [{ type: "text", text: lines.join("\n") }]
136
+ for (const artifact of envelope.artifacts) {
137
+ if (!artifact.url) continue
138
+ content.push({
139
+ type: "resource_link",
140
+ name: artifact.filename ?? artifact.id ?? "artifact",
141
+ uri: artifact.url,
142
+ mimeType: artifact.mimeType ?? "application/octet-stream",
143
+ })
144
+ }
145
+ return content
146
+ }
@@ -44,7 +44,7 @@ const toolDefinitions: ReadonlyArray<ToolDefinition> = [
44
44
  filename: { type: "string" },
45
45
  pages: { type: "array", items: { type: "integer" } },
46
46
  renderScale: { type: "number" },
47
- returnMode: { type: "string", enum: ["inline", "file_id"] },
47
+ returnMode: { type: "string", enum: ["inline", "file_id", "url"] },
48
48
  },
49
49
  required: ["pages"],
50
50
  },
@@ -168,7 +168,7 @@ const toolDefinitions: ReadonlyArray<ToolDefinition> = [
168
168
  filename: { type: "string" },
169
169
  mimeType: { type: "string" },
170
170
  base64: { type: "string" },
171
- returnMode: { type: "string", enum: ["inline", "file_id"] },
171
+ returnMode: { type: "string", enum: ["inline", "file_id", "url"] },
172
172
  },
173
173
  required: ["op"],
174
174
  },
package/src/types.ts CHANGED
@@ -11,8 +11,9 @@ export type ReturnMode = "inline" | "file_id" | "url"
11
11
  export interface Env {
12
12
  readonly ECHO_PDF_CONFIG_JSON?: string
13
13
  readonly ASSETS?: Fetcher
14
+ readonly FILE_STORE_BUCKET?: R2Bucket
14
15
  readonly FILE_STORE_DO?: DurableObjectNamespace
15
- readonly [key: string]: string | Fetcher | DurableObjectNamespace | undefined
16
+ readonly [key: string]: string | Fetcher | DurableObjectNamespace | R2Bucket | undefined
16
17
  }
17
18
 
18
19
  export interface StoredFileMeta {
package/wrangler.toml CHANGED
@@ -6,6 +6,10 @@ compatibility_date = "2026-03-06"
6
6
  directory = "./assets"
7
7
  binding = "ASSETS"
8
8
 
9
+ [[r2_buckets]]
10
+ binding = "FILE_STORE_BUCKET"
11
+ bucket_name = "echo-pdf-files"
12
+
9
13
  [[durable_objects.bindings]]
10
14
  name = "FILE_STORE_DO"
11
15
  class_name = "FileStoreDO"