@echofiles/echo-pdf 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -48,6 +48,13 @@ npm i -g @echofiles/echo-pdf
48
48
  echo-pdf init --service-url https://echo-pdf.echofilesai.workers.dev
49
49
  ```
50
50
 
51
+ 本地一键启动服务(daemon):
52
+
53
+ ```bash
54
+ echo-pdf dev --port 8788
55
+ echo-pdf init --service-url http://127.0.0.1:8788
56
+ ```
57
+
51
58
  配置 API Key(仅保存在本机 CLI 配置,不会上报到服务端存储):
52
59
 
53
60
  ```bash
@@ -88,6 +95,47 @@ echo-pdf mcp tools
88
95
  echo-pdf mcp call --tool file_ops --args '{"op":"list"}'
89
96
  ```
90
97
 
98
+ ### 3.1.1 纯 MCP 场景推荐流程(本地 PDF)
99
+
100
+ 远端 MCP server 无法直接读取你本机文件路径。推荐两步:
101
+
102
+ 1. 先通过 HTTP 上传本地 PDF,拿到 `fileId`
103
+ 2. 再用 MCP 工具传 `fileId` 调用
104
+
105
+ 示例:
106
+
107
+ ```bash
108
+ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/api/files/upload \
109
+ -F 'file=@./input.pdf'
110
+
111
+ echo-pdf mcp call --tool pdf_extract_pages --args '{"fileId":"<FILE_ID>","pages":[1]}'
112
+ ```
113
+
114
+ ### 3.1.2 不上传文件的 URL ingest 流程
115
+
116
+ 如果 PDF 已经在公网可访问,直接传 `url`:
117
+
118
+ ```bash
119
+ echo-pdf mcp call --tool pdf_extract_pages --args '{
120
+ "url":"https://example.com/sample.pdf",
121
+ "pages":[1]
122
+ }'
123
+ ```
124
+
125
+ ### 3.1.3 stdio MCP(支持本地文件路径)
126
+
127
+ stdio 模式会把本地 `path/filePath` 自动上传为 `fileId` 后再调用远端工具。
128
+
129
+ ```bash
130
+ echo-pdf mcp stdio
131
+ ```
132
+
133
+ 生成 Claude Desktop/Cursor 等可用的 stdio 配置片段:
134
+
135
+ ```bash
136
+ echo-pdf setup add claude-desktop --mode stdio
137
+ ```
138
+
91
139
  ### 3.2 给客户端生成 MCP 配置片段
92
140
 
93
141
  ```bash
@@ -107,6 +155,12 @@ echo-pdf setup add json
107
155
  - `pdf_tables_to_latex`
108
156
  - `file_ops`
109
157
 
158
+ MCP 输出策略:
159
+
160
+ - `pdf_extract_pages` 在 MCP 下默认 `returnMode=url`(不传 `returnMode` 时生效)
161
+ - MCP `text` 会对大字段做去二进制/截断,避免把大段 base64 塞进上下文
162
+ - 二进制结果请优先使用 `resource_link` 中的下载地址
163
+
110
164
  ## 4. Web UI 使用
111
165
 
112
166
  打开:
@@ -139,6 +193,12 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/api/files/upload \
139
193
 
140
194
  返回中会拿到 `file.id`。
141
195
 
196
+ CLI 等价命令:
197
+
198
+ ```bash
199
+ echo-pdf file upload ./sample.pdf
200
+ ```
201
+
142
202
  ### 5.2 提取页面图片
143
203
 
144
204
  ```bash
@@ -152,6 +212,18 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
152
212
  }'
153
213
  ```
154
214
 
215
+ CLI(支持直接传本地路径):
216
+
217
+ ```bash
218
+ echo-pdf call --tool pdf_extract_pages --args '{"path":"./sample.pdf","pages":[1],"returnMode":"url"}'
219
+ ```
220
+
221
+ 下载产物:
222
+
223
+ ```bash
224
+ echo-pdf file get --file-id <FILE_ID> --out ./output.bin
225
+ ```
226
+
155
227
  ### 5.3 OCR
156
228
 
157
229
  ```bash
@@ -183,6 +255,7 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
183
255
  - `agent.defaultProvider`
184
256
  - `agent.defaultModel`
185
257
  - `service.publicBaseUrl`
258
+ - `service.fileGet.cacheTtlSeconds`
186
259
  - `service.maxPdfBytes`
187
260
  - `service.storage.maxFileBytes`
188
261
  - `service.storage.maxTotalBytes`
@@ -205,6 +278,8 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
205
278
  - `ECHO_PDF_DEFAULT_PROVIDER`
206
279
  - `ECHO_PDF_DEFAULT_MODEL`
207
280
  - `ECHO_PDF_PUBLIC_BASE_URL`(可选,强制 artifacts 生成外部可访问绝对 URL)
281
+ - `ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS`(可选,`/api/files/get` 缓存秒数,`0` 表示 `no-store`)
282
+ - `ECHO_PDF_FILE_GET_AUTH_HEADER` + `ECHO_PDF_FILE_GET_AUTH_ENV`(可选,启用下载端点 header 鉴权)
208
283
  - `ECHO_PDF_MCP_KEY`(可选,启用 MCP 鉴权)
209
284
  - `ECHO_PDF_WORKER_NAME`(CLI 默认 URL 推导)
210
285
 
@@ -263,3 +338,12 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
263
338
  "arguments":{"fileId":"<FILE_ID>","pages":[1],"returnMode":"url"}
264
339
  }'
265
340
  ```
341
+
342
+ ### 8.4 错误码语义
343
+
344
+ - 客户端输入错误返回稳定 `4xx + code`,例如:
345
+ - `PAGES_REQUIRED`(400)
346
+ - `PAGE_OUT_OF_RANGE`(400)
347
+ - `MISSING_FILE_INPUT`(400)
348
+ - `FILE_NOT_FOUND`(404)
349
+ - 服务端故障返回 `5xx`。
package/bin/echo-pdf.js CHANGED
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env node
2
+ import { spawn } from "node:child_process"
2
3
  import fs from "node:fs"
3
4
  import os from "node:os"
4
5
  import path from "node:path"
@@ -208,6 +209,170 @@ const buildMcpRequest = (id, method, params = {}) => ({
208
209
  params,
209
210
  })
210
211
 
212
+ const uploadFile = async (serviceUrl, filePath) => {
213
+ const absPath = path.resolve(process.cwd(), filePath)
214
+ const bytes = fs.readFileSync(absPath)
215
+ const filename = path.basename(absPath)
216
+ const form = new FormData()
217
+ form.append("file", new Blob([bytes]), filename)
218
+ const response = await fetch(`${serviceUrl}/api/files/upload`, { method: "POST", body: form })
219
+ const text = await response.text()
220
+ let data
221
+ try {
222
+ data = JSON.parse(text)
223
+ } catch {
224
+ data = { raw: text }
225
+ }
226
+ if (!response.ok) {
227
+ throw new Error(`${response.status} ${JSON.stringify(data)}`)
228
+ }
229
+ return data
230
+ }
231
+
232
+ const downloadFile = async (serviceUrl, fileId, outputPath) => {
233
+ const response = await fetch(`${serviceUrl}/api/files/get?fileId=${encodeURIComponent(fileId)}&download=1`)
234
+ if (!response.ok) {
235
+ const text = await response.text()
236
+ throw new Error(`${response.status} ${text}`)
237
+ }
238
+ const bytes = Buffer.from(await response.arrayBuffer())
239
+ const absOut = path.resolve(process.cwd(), outputPath)
240
+ fs.mkdirSync(path.dirname(absOut), { recursive: true })
241
+ fs.writeFileSync(absOut, bytes)
242
+ return absOut
243
+ }
244
+
245
+ const withUploadedLocalFile = async (serviceUrl, tool, args) => {
246
+ const nextArgs = { ...(args || {}) }
247
+ if (tool.startsWith("pdf_")) {
248
+ const localPath = typeof nextArgs.path === "string"
249
+ ? nextArgs.path
250
+ : (typeof nextArgs.filePath === "string" ? nextArgs.filePath : "")
251
+ if (localPath && !nextArgs.fileId && !nextArgs.url && !nextArgs.base64) {
252
+ const upload = await uploadFile(serviceUrl, localPath)
253
+ const fileId = upload?.file?.id
254
+ if (!fileId) throw new Error(`upload failed for local path: ${localPath}`)
255
+ nextArgs.fileId = fileId
256
+ delete nextArgs.path
257
+ delete nextArgs.filePath
258
+ }
259
+ }
260
+ return nextArgs
261
+ }
262
+
263
+ const runDevServer = (port, host) => {
264
+ const wranglerBin = path.resolve(__dirname, "../node_modules/.bin/wrangler")
265
+ const wranglerArgs = ["dev", "--port", String(port), "--ip", host]
266
+ const cmd = fs.existsSync(wranglerBin) ? wranglerBin : "npx"
267
+ const args = fs.existsSync(wranglerBin) ? wranglerArgs : ["-y", "wrangler", ...wranglerArgs]
268
+ const child = spawn(cmd, args, {
269
+ stdio: "inherit",
270
+ env: process.env,
271
+ cwd: process.cwd(),
272
+ })
273
+ child.on("exit", (code, signal) => {
274
+ if (signal) process.kill(process.pid, signal)
275
+ process.exit(code ?? 0)
276
+ })
277
+ }
278
+
279
+ const mcpReadLoop = (onMessage, onError) => {
280
+ let buffer = Buffer.alloc(0)
281
+ let expectedLength = null
282
+ process.stdin.on("data", (chunk) => {
283
+ buffer = Buffer.concat([buffer, chunk])
284
+ while (true) {
285
+ if (expectedLength === null) {
286
+ const headerEnd = buffer.indexOf("\r\n\r\n")
287
+ if (headerEnd === -1) break
288
+ const headerRaw = buffer.slice(0, headerEnd).toString("utf-8")
289
+ const lines = headerRaw.split("\r\n")
290
+ const cl = lines.find((line) => line.toLowerCase().startsWith("content-length:"))
291
+ if (!cl) {
292
+ onError(new Error("Missing Content-Length"))
293
+ buffer = buffer.slice(headerEnd + 4)
294
+ continue
295
+ }
296
+ expectedLength = Number(cl.split(":")[1]?.trim() || "0")
297
+ buffer = buffer.slice(headerEnd + 4)
298
+ }
299
+ if (!Number.isFinite(expectedLength) || expectedLength < 0) {
300
+ onError(new Error("Invalid Content-Length"))
301
+ expectedLength = null
302
+ continue
303
+ }
304
+ if (buffer.length < expectedLength) break
305
+ const body = buffer.slice(0, expectedLength).toString("utf-8")
306
+ buffer = buffer.slice(expectedLength)
307
+ expectedLength = null
308
+ try {
309
+ const maybePromise = onMessage(JSON.parse(body))
310
+ if (maybePromise && typeof maybePromise.then === "function") {
311
+ maybePromise.catch(onError)
312
+ }
313
+ } catch (error) {
314
+ onError(error)
315
+ }
316
+ }
317
+ })
318
+ }
319
+
320
+ const mcpWrite = (obj) => {
321
+ const body = Buffer.from(JSON.stringify(obj))
322
+ const header = Buffer.from(`Content-Length: ${body.length}\r\n\r\n`)
323
+ process.stdout.write(header)
324
+ process.stdout.write(body)
325
+ }
326
+
327
+ const runMcpStdio = async () => {
328
+ const config = loadConfig()
329
+ const serviceUrl = config.serviceUrl
330
+ const headers = buildMcpHeaders()
331
+ mcpReadLoop(async (msg) => {
332
+ const method = msg?.method
333
+ const id = Object.hasOwn(msg || {}, "id") ? msg.id : null
334
+ if (msg?.jsonrpc !== "2.0" || typeof method !== "string") {
335
+ mcpWrite({ jsonrpc: "2.0", id, error: { code: -32600, message: "Invalid Request" } })
336
+ return
337
+ }
338
+ if (method === "notifications/initialized") return
339
+ if (method === "initialize" || method === "tools/list") {
340
+ const data = await postJson(`${serviceUrl}/mcp`, msg, headers)
341
+ mcpWrite(data)
342
+ return
343
+ }
344
+ if (method === "tools/call") {
345
+ try {
346
+ const tool = String(msg?.params?.name || "")
347
+ const args = (msg?.params?.arguments && typeof msg.params.arguments === "object")
348
+ ? msg.params.arguments
349
+ : {}
350
+ const preparedArgs = await withUploadedLocalFile(serviceUrl, tool, args)
351
+ const payload = {
352
+ ...msg,
353
+ params: {
354
+ ...(msg.params || {}),
355
+ arguments: preparedArgs,
356
+ },
357
+ }
358
+ const data = await postJson(`${serviceUrl}/mcp`, payload, headers)
359
+ mcpWrite(data)
360
+ } catch (error) {
361
+ mcpWrite({
362
+ jsonrpc: "2.0",
363
+ id,
364
+ error: { code: -32603, message: error instanceof Error ? error.message : String(error) },
365
+ })
366
+ }
367
+ return
368
+ }
369
+ const data = await postJson(`${serviceUrl}/mcp`, msg, headers)
370
+ mcpWrite(data)
371
+ }, (error) => {
372
+ process.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`)
373
+ })
374
+ }
375
+
211
376
  const parseConfigValue = (raw, type = "auto") => {
212
377
  if (type === "string") return String(raw)
213
378
  if (type === "number") {
@@ -302,6 +467,7 @@ const usage = () => {
302
467
  process.stdout.write(`echo-pdf CLI\n\n`)
303
468
  process.stdout.write(`Commands:\n`)
304
469
  process.stdout.write(` init [--service-url URL]\n`)
470
+ process.stdout.write(` dev [--port 8788] [--host 127.0.0.1]\n`)
305
471
  process.stdout.write(` provider set --provider <${PROVIDER_SET_NAMES.join("|")}> --api-key <KEY> [--profile name]\n`)
306
472
  process.stdout.write(` provider use --provider <${PROVIDER_ALIASES.join("|")}> [--profile name]\n`)
307
473
  process.stdout.write(` provider list [--profile name]\n`)
@@ -312,13 +478,29 @@ const usage = () => {
312
478
  process.stdout.write(` model list [--profile name]\n`)
313
479
  process.stdout.write(` tools\n`)
314
480
  process.stdout.write(` call --tool <name> --args '<json>' [--provider alias] [--model model] [--profile name]\n`)
481
+ process.stdout.write(` file upload <local.pdf>\n`)
482
+ process.stdout.write(` file get --file-id <id> --out <path>\n`)
315
483
  process.stdout.write(` mcp initialize\n`)
316
484
  process.stdout.write(` mcp tools\n`)
317
485
  process.stdout.write(` mcp call --tool <name> --args '<json>'\n`)
486
+ process.stdout.write(` mcp stdio\n`)
318
487
  process.stdout.write(` setup add <claude-desktop|claude-code|cursor|cline|windsurf|gemini|json>\n`)
319
488
  }
320
489
 
321
- const setupSnippet = (tool, serviceUrl) => {
490
+ const setupSnippet = (tool, serviceUrl, mode = "http") => {
491
+ if (mode === "stdio") {
492
+ return {
493
+ mcpServers: {
494
+ "echo-pdf": {
495
+ command: "echo-pdf",
496
+ args: ["mcp", "stdio"],
497
+ env: {
498
+ ECHO_PDF_SERVICE_URL: serviceUrl,
499
+ },
500
+ },
501
+ },
502
+ }
503
+ }
322
504
  const transport = {
323
505
  type: "streamable-http",
324
506
  url: `${serviceUrl}/mcp`,
@@ -405,6 +587,14 @@ const main = async () => {
405
587
  return
406
588
  }
407
589
 
590
+ if (command === "dev") {
591
+ const port = typeof flags.port === "string" ? Number(flags.port) : 8788
592
+ const host = typeof flags.host === "string" ? flags.host : "127.0.0.1"
593
+ if (!Number.isFinite(port) || port <= 0) throw new Error("dev --port must be positive number")
594
+ runDevServer(Math.floor(port), host)
595
+ return
596
+ }
597
+
408
598
  if (command === "provider" && subcommand === "set") {
409
599
  const providerAlias = resolveProviderAliasInput(flags.provider)
410
600
  const apiKey = flags["api-key"]
@@ -538,15 +728,42 @@ const main = async () => {
538
728
  const tool = flags.tool
539
729
  if (typeof tool !== "string") throw new Error("call requires --tool")
540
730
  const args = typeof flags.args === "string" ? JSON.parse(flags.args) : {}
731
+ const preparedArgs = await withUploadedLocalFile(config.serviceUrl, tool, args)
541
732
  const provider = resolveProviderAlias(profile, flags.provider)
542
733
  const model = typeof flags.model === "string" ? flags.model : resolveDefaultModel(profile, provider)
543
734
  const providerApiKeys = buildProviderApiKeys(config, profileName)
544
- const payload = buildToolCallRequest({ tool, args, provider, model, providerApiKeys })
735
+ const payload = buildToolCallRequest({ tool, args: preparedArgs, provider, model, providerApiKeys })
545
736
  const data = await postJson(`${config.serviceUrl}/tools/call`, payload)
546
737
  print(data)
547
738
  return
548
739
  }
549
740
 
741
+ if (command === "file") {
742
+ const action = rest[0] || ""
743
+ const config = loadConfig()
744
+ if (action === "upload") {
745
+ const filePath = rest[1]
746
+ if (!filePath) throw new Error("file upload requires a path")
747
+ const data = await uploadFile(config.serviceUrl, filePath)
748
+ print({
749
+ fileId: data?.file?.id || "",
750
+ filename: data?.file?.filename || path.basename(filePath),
751
+ sizeBytes: data?.file?.sizeBytes || 0,
752
+ file: data?.file || null,
753
+ })
754
+ return
755
+ }
756
+ if (action === "get") {
757
+ const fileId = typeof flags["file-id"] === "string" ? flags["file-id"] : ""
758
+ const out = typeof flags.out === "string" ? flags.out : ""
759
+ if (!fileId || !out) throw new Error("file get requires --file-id and --out")
760
+ const savedTo = await downloadFile(config.serviceUrl, fileId, out)
761
+ print({ ok: true, fileId, savedTo })
762
+ return
763
+ }
764
+ throw new Error("file command supports: upload|get")
765
+ }
766
+
550
767
  if (command === "mcp" && subcommand === "initialize") {
551
768
  const config = loadConfig()
552
769
  const data = await postJson(`${config.serviceUrl}/mcp`, buildMcpRequest(1, "initialize"), buildMcpHeaders())
@@ -575,11 +792,18 @@ const main = async () => {
575
792
  return
576
793
  }
577
794
 
795
+ if (command === "mcp" && subcommand === "stdio") {
796
+ await runMcpStdio()
797
+ return
798
+ }
799
+
578
800
  if (command === "setup" && subcommand === "add") {
579
801
  const tool = rest[0]
580
802
  if (!tool) throw new Error("setup add requires tool name")
581
803
  const config = loadConfig()
582
- print(setupSnippet(tool, config.serviceUrl))
804
+ const mode = typeof flags.mode === "string" ? flags.mode : "http"
805
+ if (!["http", "stdio"].includes(mode)) throw new Error("setup add --mode must be http|stdio")
806
+ print(setupSnippet(tool, config.serviceUrl, mode))
583
807
  return
584
808
  }
585
809
 
@@ -2,6 +2,9 @@
2
2
  "service": {
3
3
  "name": "echo-pdf",
4
4
  "publicBaseUrl": "https://echo-pdf.echofilesai.workers.dev",
5
+ "fileGet": {
6
+ "cacheTtlSeconds": 300
7
+ },
5
8
  "maxPdfBytes": 10000000,
6
9
  "maxPagesPerRequest": 20,
7
10
  "defaultRenderScale": 2,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@echofiles/echo-pdf",
3
3
  "description": "MCP-first PDF agent on Cloudflare Workers with CLI and web demo.",
4
- "version": "0.3.0",
4
+ "version": "0.4.0",
5
5
  "type": "module",
6
6
  "publishConfig": {
7
7
  "access": "public"
@@ -0,0 +1,21 @@
1
+ export class HttpError extends Error {
2
+ readonly status: number
3
+ readonly code: string
4
+ readonly details?: unknown
5
+
6
+ constructor(status: number, code: string, message: string, details?: unknown) {
7
+ super(message)
8
+ this.status = status
9
+ this.code = code
10
+ this.details = details
11
+ }
12
+ }
13
+
14
+ export const badRequest = (code: string, message: string, details?: unknown): HttpError =>
15
+ new HttpError(400, code, message, details)
16
+
17
+ export const notFound = (code: string, message: string, details?: unknown): HttpError =>
18
+ new HttpError(404, code, message, details)
19
+
20
+ export const unprocessable = (code: string, message: string, details?: unknown): HttpError =>
21
+ new HttpError(422, code, message, details)
package/src/index.ts CHANGED
@@ -61,6 +61,21 @@ const asObj = (value: unknown): JsonObject =>
61
61
  const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
62
62
  typeof configured === "string" && configured.length > 0 ? configured : request.url
63
63
 
64
+ const sanitizeDownloadFilename = (filename: string): string => {
65
+ const cleaned = filename
66
+ .replace(/[\r\n"]/g, "")
67
+ .replace(/[^\x20-\x7E]+/g, "")
68
+ .trim()
69
+ return cleaned.length > 0 ? cleaned : "download.bin"
70
+ }
71
+
72
+ const isFileGetAuthorized = (request: Request, env: Env, config: { authHeader?: string; authEnv?: string }): boolean => {
73
+ if (!config.authHeader || !config.authEnv) return true
74
+ const required = env[config.authEnv]
75
+ if (typeof required !== "string" || required.length === 0) return true
76
+ return request.headers.get(config.authHeader) === required
77
+ }
78
+
64
79
  const sseResponse = (stream: ReadableStream<Uint8Array>): Response =>
65
80
  new Response(stream, {
66
81
  headers: {
@@ -147,6 +162,10 @@ export default {
147
162
  version: config.mcp.version,
148
163
  authHeader: config.mcp.authHeader ?? null,
149
164
  },
165
+ fileGet: {
166
+ authHeader: config.service.fileGet?.authHeader ?? null,
167
+ cacheTtlSeconds: config.service.fileGet?.cacheTtlSeconds ?? 300,
168
+ },
150
169
  })
151
170
  }
152
171
 
@@ -304,6 +323,10 @@ export default {
304
323
  }
305
324
 
306
325
  if (request.method === "GET" && url.pathname === "/api/files/get") {
326
+ const fileGetConfig = config.service.fileGet ?? {}
327
+ if (!isFileGetAuthorized(request, env, fileGetConfig)) {
328
+ return json({ error: "Unauthorized", code: "UNAUTHORIZED" }, 401)
329
+ }
307
330
  const fileId = url.searchParams.get("fileId") || ""
308
331
  if (!fileId) return json({ error: "Missing fileId" }, 400)
309
332
  const file = await fileStore.get(fileId)
@@ -311,9 +334,13 @@ export default {
311
334
  const download = url.searchParams.get("download") === "1"
312
335
  const headers = new Headers()
313
336
  headers.set("Content-Type", file.mimeType)
314
- headers.set("Cache-Control", "no-store")
337
+ const cacheTtl = Number(fileGetConfig.cacheTtlSeconds ?? 300)
338
+ const cacheControl = cacheTtl > 0
339
+ ? `public, max-age=${Math.floor(cacheTtl)}, s-maxage=${Math.floor(cacheTtl)}`
340
+ : "no-store"
341
+ headers.set("Cache-Control", cacheControl)
315
342
  if (download) {
316
- headers.set("Content-Disposition", `attachment; filename=\"${file.filename.replace(/\"/g, "")}\"`)
343
+ headers.set("Content-Disposition", `attachment; filename=\"${sanitizeDownloadFilename(file.filename)}\"`)
317
344
  }
318
345
  return new Response(file.bytes, { status: 200, headers })
319
346
  }
package/src/mcp-server.ts CHANGED
@@ -48,6 +48,16 @@ const maybeAuthorized = (request: Request, env: Env, config: EchoPdfConfig): boo
48
48
  const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
49
49
  typeof configured === "string" && configured.length > 0 ? configured : request.url
50
50
 
51
+ const prepareMcpToolArgs = (toolName: string, args: Record<string, unknown>): Record<string, unknown> => {
52
+ if (toolName === "pdf_extract_pages") {
53
+ const mode = typeof args.returnMode === "string" ? args.returnMode : ""
54
+ if (!mode) {
55
+ return { ...args, returnMode: "url" }
56
+ }
57
+ }
58
+ return args
59
+ }
60
+
51
61
  export const handleMcpRequest = async (
52
62
  request: Request,
53
63
  env: Env,
@@ -103,7 +113,7 @@ export const handleMcpRequest = async (
103
113
  }
104
114
 
105
115
  const toolName = typeof params.name === "string" ? params.name : ""
106
- const args = asObj(params.arguments)
116
+ const args = prepareMcpToolArgs(toolName, asObj(params.arguments))
107
117
  if (!toolName) {
108
118
  return err(id, -32602, "Invalid params: name is required", {
109
119
  code: "INVALID_PARAMS",
package/src/pdf-agent.ts CHANGED
@@ -2,6 +2,7 @@ import type { Env, FileStore, ReturnMode } from "./types"
2
2
  import type { AgentTraceEvent, EchoPdfConfig, PdfOperationRequest } from "./pdf-types"
3
3
  import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults"
4
4
  import { fromBase64, normalizeReturnMode, toDataUrl } from "./file-utils"
5
+ import { badRequest, notFound, unprocessable } from "./http-error"
5
6
  import { extractPdfPageText, getPdfPageCount, renderPdfPageToPng, toBytes } from "./pdfium-engine"
6
7
  import { visionRecognize } from "./provider-client"
7
8
 
@@ -22,11 +23,20 @@ const traceStep = (
22
23
  }
23
24
 
24
25
  const ensurePages = (pages: ReadonlyArray<number>, pageCount: number, maxPages: number): number[] => {
25
- if (pages.length === 0) throw new Error("At least one page is required")
26
- if (pages.length > maxPages) throw new Error(`Page count exceeds maxPagesPerRequest (${maxPages})`)
26
+ if (pages.length === 0) throw badRequest("PAGES_REQUIRED", "At least one page is required")
27
+ if (pages.length > maxPages) {
28
+ throw badRequest("TOO_MANY_PAGES", `Page count exceeds maxPagesPerRequest (${maxPages})`, {
29
+ maxPagesPerRequest: maxPages,
30
+ providedPages: pages.length,
31
+ })
32
+ }
27
33
  for (const page of pages) {
28
34
  if (!Number.isInteger(page) || page < 1 || page > pageCount) {
29
- throw new Error(`Page ${page} out of range 1..${pageCount}`)
35
+ throw badRequest("PAGE_OUT_OF_RANGE", `Page ${page} out of range 1..${pageCount}`, {
36
+ page,
37
+ min: 1,
38
+ max: pageCount,
39
+ })
30
40
  }
31
41
  }
32
42
  return [...new Set(pages)].sort((a, b) => a - b)
@@ -45,7 +55,7 @@ export const ingestPdfFromPayload = async (
45
55
  if (input.fileId) {
46
56
  const existing = await opts.fileStore.get(input.fileId)
47
57
  if (!existing) {
48
- throw new Error(`File not found: ${input.fileId}`)
58
+ throw notFound("FILE_NOT_FOUND", `File not found: ${input.fileId}`, { fileId: input.fileId })
49
59
  }
50
60
  return {
51
61
  id: existing.id,
@@ -59,7 +69,11 @@ export const ingestPdfFromPayload = async (
59
69
 
60
70
  if (input.url) {
61
71
  traceStep(opts, "start", "file.fetch.url", { url: input.url })
62
- bytes = await toBytes(input.url)
72
+ try {
73
+ bytes = await toBytes(input.url)
74
+ } catch (error) {
75
+ throw badRequest("URL_FETCH_FAILED", `Unable to fetch PDF from url: ${error instanceof Error ? error.message : String(error)}`)
76
+ }
63
77
  try {
64
78
  const u = new URL(input.url)
65
79
  filename = decodeURIComponent(u.pathname.split("/").pop() || filename)
@@ -74,10 +88,13 @@ export const ingestPdfFromPayload = async (
74
88
  }
75
89
 
76
90
  if (!bytes) {
77
- throw new Error("Missing file input. Provide fileId, url or base64")
91
+ throw badRequest("MISSING_FILE_INPUT", "Missing file input. Provide fileId, url or base64")
78
92
  }
79
93
  if (bytes.byteLength > config.service.maxPdfBytes) {
80
- throw new Error(`PDF exceeds max size (${config.service.maxPdfBytes} bytes)`)
94
+ throw badRequest("PDF_TOO_LARGE", `PDF exceeds max size (${config.service.maxPdfBytes} bytes)`, {
95
+ maxPdfBytes: config.service.maxPdfBytes,
96
+ sizeBytes: bytes.byteLength,
97
+ })
81
98
  }
82
99
 
83
100
  const meta = await opts.fileStore.put({
@@ -164,7 +181,7 @@ export const runPdfAgent = async (
164
181
  const providerAlias = resolveProviderAlias(config, request.provider)
165
182
  const model = resolveModelForProvider(config, providerAlias, request.model)
166
183
  if (!model) {
167
- throw new Error("model is required for OCR or table extraction; set agent.defaultModel")
184
+ throw badRequest("MODEL_REQUIRED", "model is required for OCR or table extraction; set agent.defaultModel")
168
185
  }
169
186
 
170
187
  if (request.operation === "ocr_pages") {
@@ -216,7 +233,9 @@ export const runPdfAgent = async (
216
233
  })
217
234
  const latex = extractTabularLatex(rawLatex)
218
235
  if (!latex) {
219
- throw new Error(`table extraction did not return valid LaTeX tabular for page ${page}`)
236
+ throw unprocessable("TABLE_LATEX_MISSING", `table extraction did not return valid LaTeX tabular for page ${page}`, {
237
+ page,
238
+ })
220
239
  }
221
240
  tables.push({ page, latex })
222
241
  traceStep(opts, "end", "table.page", { page, chars: latex.length })
package/src/pdf-config.ts CHANGED
@@ -37,6 +37,9 @@ const validateConfig = (config: EchoPdfConfig): EchoPdfConfig => {
37
37
  ) {
38
38
  throw new Error("service.publicBaseUrl must start with http:// or https://")
39
39
  }
40
+ if (typeof config.service.fileGet?.cacheTtlSeconds === "number" && config.service.fileGet.cacheTtlSeconds < 0) {
41
+ throw new Error("service.fileGet.cacheTtlSeconds must be >= 0")
42
+ }
40
43
  if (!Number.isFinite(config.service.storage.maxFileBytes) || config.service.storage.maxFileBytes <= 0) {
41
44
  throw new Error("service.storage.maxFileBytes must be positive")
42
45
  }
@@ -73,6 +76,9 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
73
76
  const providerOverride = env.ECHO_PDF_DEFAULT_PROVIDER
74
77
  const modelOverride = env.ECHO_PDF_DEFAULT_MODEL
75
78
  const publicBaseUrlOverride = env.ECHO_PDF_PUBLIC_BASE_URL
79
+ const fileGetAuthHeaderOverride = env.ECHO_PDF_FILE_GET_AUTH_HEADER
80
+ const fileGetAuthEnvOverride = env.ECHO_PDF_FILE_GET_AUTH_ENV
81
+ const fileGetCacheTtlOverride = env.ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS
76
82
  const withOverrides: EchoPdfConfig = {
77
83
  ...resolved,
78
84
  service: {
@@ -81,6 +87,23 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
81
87
  typeof publicBaseUrlOverride === "string" && publicBaseUrlOverride.trim().length > 0
82
88
  ? publicBaseUrlOverride.trim()
83
89
  : resolved.service.publicBaseUrl,
90
+ fileGet: {
91
+ authHeader:
92
+ typeof fileGetAuthHeaderOverride === "string" && fileGetAuthHeaderOverride.trim().length > 0
93
+ ? fileGetAuthHeaderOverride.trim()
94
+ : resolved.service.fileGet?.authHeader,
95
+ authEnv:
96
+ typeof fileGetAuthEnvOverride === "string" && fileGetAuthEnvOverride.trim().length > 0
97
+ ? fileGetAuthEnvOverride.trim()
98
+ : resolved.service.fileGet?.authEnv,
99
+ cacheTtlSeconds: (() => {
100
+ if (typeof fileGetCacheTtlOverride === "string" && fileGetCacheTtlOverride.trim().length > 0) {
101
+ const value = Number(fileGetCacheTtlOverride)
102
+ return Number.isFinite(value) && value >= 0 ? Math.floor(value) : resolved.service.fileGet?.cacheTtlSeconds
103
+ }
104
+ return resolved.service.fileGet?.cacheTtlSeconds
105
+ })(),
106
+ },
84
107
  },
85
108
  agent: {
86
109
  ...resolved.agent,
package/src/pdf-types.ts CHANGED
@@ -23,6 +23,11 @@ export interface EchoPdfConfig {
23
23
  readonly service: {
24
24
  readonly name: string
25
25
  readonly publicBaseUrl?: string
26
+ readonly fileGet?: {
27
+ readonly authHeader?: string
28
+ readonly authEnv?: string
29
+ readonly cacheTtlSeconds?: number
30
+ }
26
31
  readonly maxPdfBytes: number
27
32
  readonly maxPagesPerRequest: number
28
33
  readonly defaultRenderScale: number
@@ -79,20 +79,7 @@ export class R2FileStore implements FileStore {
79
79
  }
80
80
 
81
81
  async list(): Promise<ReadonlyArray<StoredFileMeta>> {
82
- const listed = await this.bucket.list({ prefix: PREFIX, limit: 1000 })
83
- return listed.objects.map((obj) => {
84
- const meta = (obj.customMetadata ?? {}) as MetaFields
85
- const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
86
- const filename = meta.filename ?? toId(obj.key)
87
- const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
88
- return {
89
- id: toId(obj.key),
90
- filename,
91
- mimeType,
92
- sizeBytes: obj.size,
93
- createdAt,
94
- }
95
- })
82
+ return await this.listAllFiles()
96
83
  }
97
84
 
98
85
  async delete(fileId: string): Promise<boolean> {
@@ -101,7 +88,7 @@ export class R2FileStore implements FileStore {
101
88
  }
102
89
 
103
90
  async stats(): Promise<unknown> {
104
- const files = await this.list()
91
+ const files = await this.listAllFiles()
105
92
  const totalBytes = files.reduce((sum, file) => sum + file.sizeBytes, 0)
106
93
  return {
107
94
  backend: "r2",
@@ -114,17 +101,24 @@ export class R2FileStore implements FileStore {
114
101
  }
115
102
 
116
103
  async cleanup(): Promise<unknown> {
117
- const before = await this.list()
118
- const deletedExpired = await this.deleteExpired(before)
119
- const afterExpired = await this.list()
120
- const deletedEvicted = await this.evictIfNeeded(afterExpired, 0)
121
- const after = await this.list()
104
+ const files = await this.listAllFiles()
105
+ const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
106
+ const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
107
+ if (expired.length > 0) {
108
+ await this.bucket.delete(expired.map((f) => toKey(f.id)))
109
+ }
110
+ const evict = this.pickEvictions(active, 0)
111
+ if (evict.length > 0) {
112
+ await this.bucket.delete(evict.map((f) => toKey(f.id)))
113
+ }
114
+ const evictIds = new Set(evict.map((f) => f.id))
115
+ const after = active.filter((f) => !evictIds.has(f.id))
122
116
  const totalBytes = after.reduce((sum, file) => sum + file.sizeBytes, 0)
123
117
  return {
124
118
  backend: "r2",
125
119
  policy: this.policy,
126
- deletedExpired,
127
- deletedEvicted,
120
+ deletedExpired: expired.length,
121
+ deletedEvicted: evict.length,
128
122
  stats: {
129
123
  fileCount: after.length,
130
124
  totalBytes,
@@ -133,12 +127,19 @@ export class R2FileStore implements FileStore {
133
127
  }
134
128
 
135
129
  private async cleanupInternal(incomingBytes: number): Promise<void> {
136
- const files = await this.list()
137
- await this.deleteExpired(files)
138
- const afterExpired = await this.list()
139
- await this.evictIfNeeded(afterExpired, incomingBytes)
140
- const finalFiles = await this.list()
141
- const finalTotal = finalFiles.reduce((sum, file) => sum + file.sizeBytes, 0)
130
+ const files = await this.listAllFiles()
131
+ const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
132
+ const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
133
+ if (expired.length > 0) {
134
+ await this.bucket.delete(expired.map((f) => toKey(f.id)))
135
+ }
136
+ const evict = this.pickEvictions(active, incomingBytes)
137
+ if (evict.length > 0) {
138
+ await this.bucket.delete(evict.map((f) => toKey(f.id)))
139
+ }
140
+ const evictIds = new Set(evict.map((f) => f.id))
141
+ const remaining = active.filter((f) => !evictIds.has(f.id))
142
+ const finalTotal = remaining.reduce((sum, file) => sum + file.sizeBytes, 0)
142
143
  if (finalTotal + incomingBytes > this.policy.maxTotalBytes) {
143
144
  const err = new Error(
144
145
  `storage quota exceeded: total ${finalTotal} + incoming ${incomingBytes} > maxTotalBytes ${this.policy.maxTotalBytes}`
@@ -150,17 +151,10 @@ export class R2FileStore implements FileStore {
150
151
  }
151
152
  }
152
153
 
153
- private async deleteExpired(files: ReadonlyArray<StoredFileMeta>): Promise<number> {
154
- const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
155
- if (expired.length === 0) return 0
156
- await this.bucket.delete(expired.map((f) => toKey(f.id)))
157
- return expired.length
158
- }
159
-
160
- private async evictIfNeeded(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): Promise<number> {
154
+ private pickEvictions(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): StoredFileMeta[] {
161
155
  const totalBytes = files.reduce((sum, f) => sum + f.sizeBytes, 0)
162
156
  const projected = totalBytes + incomingBytes
163
- if (projected <= this.policy.maxTotalBytes) return 0
157
+ if (projected <= this.policy.maxTotalBytes) return []
164
158
 
165
159
  const needFree = projected - this.policy.maxTotalBytes
166
160
  const candidates = [...files].sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
@@ -172,9 +166,30 @@ export class R2FileStore implements FileStore {
172
166
  if (freed >= needFree) break
173
167
  if (evict.length >= this.policy.cleanupBatchSize) break
174
168
  }
175
- if (evict.length === 0) return 0
176
- await this.bucket.delete(evict.map((f) => toKey(f.id)))
177
- return evict.length
169
+ return evict
178
170
  }
179
- }
180
171
 
172
+ private async listAllFiles(): Promise<StoredFileMeta[]> {
173
+ const files: StoredFileMeta[] = []
174
+ let cursor: string | undefined
175
+ while (true) {
176
+ const listed = await this.bucket.list({ prefix: PREFIX, limit: 1000, cursor })
177
+ for (const obj of listed.objects) {
178
+ const meta = (obj.customMetadata ?? {}) as MetaFields
179
+ const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
180
+ const filename = meta.filename ?? toId(obj.key)
181
+ const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
182
+ files.push({
183
+ id: toId(obj.key),
184
+ filename,
185
+ mimeType,
186
+ sizeBytes: obj.size,
187
+ createdAt,
188
+ })
189
+ }
190
+ if (listed.truncated !== true || !listed.cursor) break
191
+ cursor = listed.cursor
192
+ }
193
+ return files
194
+ }
195
+ }
@@ -15,6 +15,10 @@ export interface ToolOutputEnvelope {
15
15
  readonly artifacts: ToolArtifact[]
16
16
  }
17
17
 
18
+ const MAX_TEXT_STRING = 1200
19
+ const MAX_TEXT_ARRAY = 40
20
+ const MAX_TEXT_DEPTH = 8
21
+
18
22
  const asObj = (value: unknown): JsonObject =>
19
23
  typeof value === "object" && value !== null && !Array.isArray(value)
20
24
  ? (value as JsonObject)
@@ -113,6 +117,38 @@ const summarizeData = (data: unknown): string => {
113
117
  return "Tool executed successfully."
114
118
  }
115
119
 
120
+ const sanitizeString = (value: string): string => {
121
+ if (value.startsWith("data:")) {
122
+ const [head] = value.split(",", 1)
123
+ return `${head},<omitted>`
124
+ }
125
+ if (/^[A-Za-z0-9+/=]{300,}$/.test(value)) {
126
+ return `<base64 omitted len=${value.length}>`
127
+ }
128
+ if (value.length > MAX_TEXT_STRING) {
129
+ return `${value.slice(0, MAX_TEXT_STRING)}...(truncated ${value.length - MAX_TEXT_STRING} chars)`
130
+ }
131
+ return value
132
+ }
133
+
134
+ const sanitizeForText = (value: unknown, depth = 0): unknown => {
135
+ if (depth >= MAX_TEXT_DEPTH) return "<max-depth>"
136
+ if (typeof value === "string") return sanitizeString(value)
137
+ if (typeof value !== "object" || value === null) return value
138
+ if (Array.isArray(value)) {
139
+ const items = value.slice(0, MAX_TEXT_ARRAY).map((item) => sanitizeForText(item, depth + 1))
140
+ if (value.length > MAX_TEXT_ARRAY) {
141
+ items.push(`<truncated ${value.length - MAX_TEXT_ARRAY} items>`)
142
+ }
143
+ return items
144
+ }
145
+ const out: Record<string, unknown> = {}
146
+ for (const [key, nested] of Object.entries(value)) {
147
+ out[key] = sanitizeForText(nested, depth + 1)
148
+ }
149
+ return out
150
+ }
151
+
116
152
  export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<string, unknown>> => {
117
153
  const lines: string[] = [summarizeData(envelope.data)]
118
154
  if (envelope.artifacts.length > 0) {
@@ -130,7 +166,7 @@ export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<stri
130
166
  }
131
167
  }
132
168
  lines.push("")
133
- lines.push(JSON.stringify(envelope, null, 2))
169
+ lines.push(JSON.stringify(sanitizeForText(envelope), null, 2))
134
170
 
135
171
  const content: Array<Record<string, unknown>> = [{ type: "text", text: lines.join("\n") }]
136
172
  for (const artifact of envelope.artifacts) {