@echofiles/echo-pdf 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -0
- package/bin/echo-pdf.js +227 -3
- package/echo-pdf.config.json +3 -0
- package/package.json +1 -1
- package/src/http-error.ts +21 -0
- package/src/index.ts +29 -2
- package/src/mcp-server.ts +11 -1
- package/src/pdf-agent.ts +28 -9
- package/src/pdf-config.ts +23 -0
- package/src/pdf-types.ts +5 -0
- package/src/r2-file-store.ts +56 -41
- package/src/response-schema.ts +37 -1
package/README.md
CHANGED
|
@@ -48,6 +48,13 @@ npm i -g @echofiles/echo-pdf
|
|
|
48
48
|
echo-pdf init --service-url https://echo-pdf.echofilesai.workers.dev
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
+
本地一键启动服务(daemon):
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
echo-pdf dev --port 8788
|
|
55
|
+
echo-pdf init --service-url http://127.0.0.1:8788
|
|
56
|
+
```
|
|
57
|
+
|
|
51
58
|
配置 API Key(仅保存在本机 CLI 配置,不会上报到服务端存储):
|
|
52
59
|
|
|
53
60
|
```bash
|
|
@@ -88,6 +95,47 @@ echo-pdf mcp tools
|
|
|
88
95
|
echo-pdf mcp call --tool file_ops --args '{"op":"list"}'
|
|
89
96
|
```
|
|
90
97
|
|
|
98
|
+
### 3.1.1 纯 MCP 场景推荐流程(本地 PDF)
|
|
99
|
+
|
|
100
|
+
远端 MCP server 无法直接读取你本机文件路径。推荐两步:
|
|
101
|
+
|
|
102
|
+
1. 先通过 HTTP 上传本地 PDF,拿到 `fileId`
|
|
103
|
+
2. 再用 MCP 工具传 `fileId` 调用
|
|
104
|
+
|
|
105
|
+
示例:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/api/files/upload \
|
|
109
|
+
-F 'file=@./input.pdf'
|
|
110
|
+
|
|
111
|
+
echo-pdf mcp call --tool pdf_extract_pages --args '{"fileId":"<FILE_ID>","pages":[1]}'
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### 3.1.2 不上传文件的 URL ingest 流程
|
|
115
|
+
|
|
116
|
+
如果 PDF 已经在公网可访问,直接传 `url`:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
echo-pdf mcp call --tool pdf_extract_pages --args '{
|
|
120
|
+
"url":"https://example.com/sample.pdf",
|
|
121
|
+
"pages":[1]
|
|
122
|
+
}'
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### 3.1.3 stdio MCP(支持本地文件路径)
|
|
126
|
+
|
|
127
|
+
stdio 模式会把本地 `path/filePath` 自动上传为 `fileId` 后再调用远端工具。
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
echo-pdf mcp stdio
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
生成 Claude Desktop/Cursor 等可用的 stdio 配置片段:
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
echo-pdf setup add claude-desktop --mode stdio
|
|
137
|
+
```
|
|
138
|
+
|
|
91
139
|
### 3.2 给客户端生成 MCP 配置片段
|
|
92
140
|
|
|
93
141
|
```bash
|
|
@@ -107,6 +155,12 @@ echo-pdf setup add json
|
|
|
107
155
|
- `pdf_tables_to_latex`
|
|
108
156
|
- `file_ops`
|
|
109
157
|
|
|
158
|
+
MCP 输出策略:
|
|
159
|
+
|
|
160
|
+
- `pdf_extract_pages` 在 MCP 下默认 `returnMode=url`(不传 `returnMode` 时生效)
|
|
161
|
+
- MCP `text` 会对大字段做去二进制/截断,避免把大段 base64 塞进上下文
|
|
162
|
+
- 二进制结果请优先使用 `resource_link` 中的下载地址
|
|
163
|
+
|
|
110
164
|
## 4. Web UI 使用
|
|
111
165
|
|
|
112
166
|
打开:
|
|
@@ -139,6 +193,12 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/api/files/upload \
|
|
|
139
193
|
|
|
140
194
|
返回中会拿到 `file.id`。
|
|
141
195
|
|
|
196
|
+
CLI 等价命令:
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
echo-pdf file upload ./sample.pdf
|
|
200
|
+
```
|
|
201
|
+
|
|
142
202
|
### 5.2 提取页面图片
|
|
143
203
|
|
|
144
204
|
```bash
|
|
@@ -152,6 +212,18 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
|
|
|
152
212
|
}'
|
|
153
213
|
```
|
|
154
214
|
|
|
215
|
+
CLI(支持直接传本地路径):
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
echo-pdf call --tool pdf_extract_pages --args '{"path":"./sample.pdf","pages":[1],"returnMode":"url"}'
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
下载产物:
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
echo-pdf file get --file-id <FILE_ID> --out ./output.bin
|
|
225
|
+
```
|
|
226
|
+
|
|
155
227
|
### 5.3 OCR
|
|
156
228
|
|
|
157
229
|
```bash
|
|
@@ -183,6 +255,7 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
|
|
|
183
255
|
- `agent.defaultProvider`
|
|
184
256
|
- `agent.defaultModel`
|
|
185
257
|
- `service.publicBaseUrl`
|
|
258
|
+
- `service.fileGet.cacheTtlSeconds`
|
|
186
259
|
- `service.maxPdfBytes`
|
|
187
260
|
- `service.storage.maxFileBytes`
|
|
188
261
|
- `service.storage.maxTotalBytes`
|
|
@@ -205,6 +278,8 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
|
|
|
205
278
|
- `ECHO_PDF_DEFAULT_PROVIDER`
|
|
206
279
|
- `ECHO_PDF_DEFAULT_MODEL`
|
|
207
280
|
- `ECHO_PDF_PUBLIC_BASE_URL`(可选,强制 artifacts 生成外部可访问绝对 URL)
|
|
281
|
+
- `ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS`(可选,`/api/files/get` 缓存秒数,`0` 表示 `no-store`)
|
|
282
|
+
- `ECHO_PDF_FILE_GET_AUTH_HEADER` + `ECHO_PDF_FILE_GET_AUTH_ENV`(可选,启用下载端点 header 鉴权)
|
|
208
283
|
- `ECHO_PDF_MCP_KEY`(可选,启用 MCP 鉴权)
|
|
209
284
|
- `ECHO_PDF_WORKER_NAME`(CLI 默认 URL 推导)
|
|
210
285
|
|
|
@@ -263,3 +338,12 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
|
|
|
263
338
|
"arguments":{"fileId":"<FILE_ID>","pages":[1],"returnMode":"url"}
|
|
264
339
|
}'
|
|
265
340
|
```
|
|
341
|
+
|
|
342
|
+
### 8.4 错误码语义
|
|
343
|
+
|
|
344
|
+
- 客户端输入错误返回稳定 `4xx + code`,例如:
|
|
345
|
+
- `PAGES_REQUIRED`(400)
|
|
346
|
+
- `PAGE_OUT_OF_RANGE`(400)
|
|
347
|
+
- `MISSING_FILE_INPUT`(400)
|
|
348
|
+
- `FILE_NOT_FOUND`(404)
|
|
349
|
+
- 服务端故障返回 `5xx`。
|
package/bin/echo-pdf.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { spawn } from "node:child_process"
|
|
2
3
|
import fs from "node:fs"
|
|
3
4
|
import os from "node:os"
|
|
4
5
|
import path from "node:path"
|
|
@@ -208,6 +209,170 @@ const buildMcpRequest = (id, method, params = {}) => ({
|
|
|
208
209
|
params,
|
|
209
210
|
})
|
|
210
211
|
|
|
212
|
+
const uploadFile = async (serviceUrl, filePath) => {
|
|
213
|
+
const absPath = path.resolve(process.cwd(), filePath)
|
|
214
|
+
const bytes = fs.readFileSync(absPath)
|
|
215
|
+
const filename = path.basename(absPath)
|
|
216
|
+
const form = new FormData()
|
|
217
|
+
form.append("file", new Blob([bytes]), filename)
|
|
218
|
+
const response = await fetch(`${serviceUrl}/api/files/upload`, { method: "POST", body: form })
|
|
219
|
+
const text = await response.text()
|
|
220
|
+
let data
|
|
221
|
+
try {
|
|
222
|
+
data = JSON.parse(text)
|
|
223
|
+
} catch {
|
|
224
|
+
data = { raw: text }
|
|
225
|
+
}
|
|
226
|
+
if (!response.ok) {
|
|
227
|
+
throw new Error(`${response.status} ${JSON.stringify(data)}`)
|
|
228
|
+
}
|
|
229
|
+
return data
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const downloadFile = async (serviceUrl, fileId, outputPath) => {
|
|
233
|
+
const response = await fetch(`${serviceUrl}/api/files/get?fileId=${encodeURIComponent(fileId)}&download=1`)
|
|
234
|
+
if (!response.ok) {
|
|
235
|
+
const text = await response.text()
|
|
236
|
+
throw new Error(`${response.status} ${text}`)
|
|
237
|
+
}
|
|
238
|
+
const bytes = Buffer.from(await response.arrayBuffer())
|
|
239
|
+
const absOut = path.resolve(process.cwd(), outputPath)
|
|
240
|
+
fs.mkdirSync(path.dirname(absOut), { recursive: true })
|
|
241
|
+
fs.writeFileSync(absOut, bytes)
|
|
242
|
+
return absOut
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const withUploadedLocalFile = async (serviceUrl, tool, args) => {
|
|
246
|
+
const nextArgs = { ...(args || {}) }
|
|
247
|
+
if (tool.startsWith("pdf_")) {
|
|
248
|
+
const localPath = typeof nextArgs.path === "string"
|
|
249
|
+
? nextArgs.path
|
|
250
|
+
: (typeof nextArgs.filePath === "string" ? nextArgs.filePath : "")
|
|
251
|
+
if (localPath && !nextArgs.fileId && !nextArgs.url && !nextArgs.base64) {
|
|
252
|
+
const upload = await uploadFile(serviceUrl, localPath)
|
|
253
|
+
const fileId = upload?.file?.id
|
|
254
|
+
if (!fileId) throw new Error(`upload failed for local path: ${localPath}`)
|
|
255
|
+
nextArgs.fileId = fileId
|
|
256
|
+
delete nextArgs.path
|
|
257
|
+
delete nextArgs.filePath
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
return nextArgs
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const runDevServer = (port, host) => {
|
|
264
|
+
const wranglerBin = path.resolve(__dirname, "../node_modules/.bin/wrangler")
|
|
265
|
+
const wranglerArgs = ["dev", "--port", String(port), "--ip", host]
|
|
266
|
+
const cmd = fs.existsSync(wranglerBin) ? wranglerBin : "npx"
|
|
267
|
+
const args = fs.existsSync(wranglerBin) ? wranglerArgs : ["-y", "wrangler", ...wranglerArgs]
|
|
268
|
+
const child = spawn(cmd, args, {
|
|
269
|
+
stdio: "inherit",
|
|
270
|
+
env: process.env,
|
|
271
|
+
cwd: process.cwd(),
|
|
272
|
+
})
|
|
273
|
+
child.on("exit", (code, signal) => {
|
|
274
|
+
if (signal) process.kill(process.pid, signal)
|
|
275
|
+
process.exit(code ?? 0)
|
|
276
|
+
})
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const mcpReadLoop = (onMessage, onError) => {
|
|
280
|
+
let buffer = Buffer.alloc(0)
|
|
281
|
+
let expectedLength = null
|
|
282
|
+
process.stdin.on("data", (chunk) => {
|
|
283
|
+
buffer = Buffer.concat([buffer, chunk])
|
|
284
|
+
while (true) {
|
|
285
|
+
if (expectedLength === null) {
|
|
286
|
+
const headerEnd = buffer.indexOf("\r\n\r\n")
|
|
287
|
+
if (headerEnd === -1) break
|
|
288
|
+
const headerRaw = buffer.slice(0, headerEnd).toString("utf-8")
|
|
289
|
+
const lines = headerRaw.split("\r\n")
|
|
290
|
+
const cl = lines.find((line) => line.toLowerCase().startsWith("content-length:"))
|
|
291
|
+
if (!cl) {
|
|
292
|
+
onError(new Error("Missing Content-Length"))
|
|
293
|
+
buffer = buffer.slice(headerEnd + 4)
|
|
294
|
+
continue
|
|
295
|
+
}
|
|
296
|
+
expectedLength = Number(cl.split(":")[1]?.trim() || "0")
|
|
297
|
+
buffer = buffer.slice(headerEnd + 4)
|
|
298
|
+
}
|
|
299
|
+
if (!Number.isFinite(expectedLength) || expectedLength < 0) {
|
|
300
|
+
onError(new Error("Invalid Content-Length"))
|
|
301
|
+
expectedLength = null
|
|
302
|
+
continue
|
|
303
|
+
}
|
|
304
|
+
if (buffer.length < expectedLength) break
|
|
305
|
+
const body = buffer.slice(0, expectedLength).toString("utf-8")
|
|
306
|
+
buffer = buffer.slice(expectedLength)
|
|
307
|
+
expectedLength = null
|
|
308
|
+
try {
|
|
309
|
+
const maybePromise = onMessage(JSON.parse(body))
|
|
310
|
+
if (maybePromise && typeof maybePromise.then === "function") {
|
|
311
|
+
maybePromise.catch(onError)
|
|
312
|
+
}
|
|
313
|
+
} catch (error) {
|
|
314
|
+
onError(error)
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
})
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const mcpWrite = (obj) => {
|
|
321
|
+
const body = Buffer.from(JSON.stringify(obj))
|
|
322
|
+
const header = Buffer.from(`Content-Length: ${body.length}\r\n\r\n`)
|
|
323
|
+
process.stdout.write(header)
|
|
324
|
+
process.stdout.write(body)
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const runMcpStdio = async () => {
|
|
328
|
+
const config = loadConfig()
|
|
329
|
+
const serviceUrl = config.serviceUrl
|
|
330
|
+
const headers = buildMcpHeaders()
|
|
331
|
+
mcpReadLoop(async (msg) => {
|
|
332
|
+
const method = msg?.method
|
|
333
|
+
const id = Object.hasOwn(msg || {}, "id") ? msg.id : null
|
|
334
|
+
if (msg?.jsonrpc !== "2.0" || typeof method !== "string") {
|
|
335
|
+
mcpWrite({ jsonrpc: "2.0", id, error: { code: -32600, message: "Invalid Request" } })
|
|
336
|
+
return
|
|
337
|
+
}
|
|
338
|
+
if (method === "notifications/initialized") return
|
|
339
|
+
if (method === "initialize" || method === "tools/list") {
|
|
340
|
+
const data = await postJson(`${serviceUrl}/mcp`, msg, headers)
|
|
341
|
+
mcpWrite(data)
|
|
342
|
+
return
|
|
343
|
+
}
|
|
344
|
+
if (method === "tools/call") {
|
|
345
|
+
try {
|
|
346
|
+
const tool = String(msg?.params?.name || "")
|
|
347
|
+
const args = (msg?.params?.arguments && typeof msg.params.arguments === "object")
|
|
348
|
+
? msg.params.arguments
|
|
349
|
+
: {}
|
|
350
|
+
const preparedArgs = await withUploadedLocalFile(serviceUrl, tool, args)
|
|
351
|
+
const payload = {
|
|
352
|
+
...msg,
|
|
353
|
+
params: {
|
|
354
|
+
...(msg.params || {}),
|
|
355
|
+
arguments: preparedArgs,
|
|
356
|
+
},
|
|
357
|
+
}
|
|
358
|
+
const data = await postJson(`${serviceUrl}/mcp`, payload, headers)
|
|
359
|
+
mcpWrite(data)
|
|
360
|
+
} catch (error) {
|
|
361
|
+
mcpWrite({
|
|
362
|
+
jsonrpc: "2.0",
|
|
363
|
+
id,
|
|
364
|
+
error: { code: -32603, message: error instanceof Error ? error.message : String(error) },
|
|
365
|
+
})
|
|
366
|
+
}
|
|
367
|
+
return
|
|
368
|
+
}
|
|
369
|
+
const data = await postJson(`${serviceUrl}/mcp`, msg, headers)
|
|
370
|
+
mcpWrite(data)
|
|
371
|
+
}, (error) => {
|
|
372
|
+
process.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`)
|
|
373
|
+
})
|
|
374
|
+
}
|
|
375
|
+
|
|
211
376
|
const parseConfigValue = (raw, type = "auto") => {
|
|
212
377
|
if (type === "string") return String(raw)
|
|
213
378
|
if (type === "number") {
|
|
@@ -302,6 +467,7 @@ const usage = () => {
|
|
|
302
467
|
process.stdout.write(`echo-pdf CLI\n\n`)
|
|
303
468
|
process.stdout.write(`Commands:\n`)
|
|
304
469
|
process.stdout.write(` init [--service-url URL]\n`)
|
|
470
|
+
process.stdout.write(` dev [--port 8788] [--host 127.0.0.1]\n`)
|
|
305
471
|
process.stdout.write(` provider set --provider <${PROVIDER_SET_NAMES.join("|")}> --api-key <KEY> [--profile name]\n`)
|
|
306
472
|
process.stdout.write(` provider use --provider <${PROVIDER_ALIASES.join("|")}> [--profile name]\n`)
|
|
307
473
|
process.stdout.write(` provider list [--profile name]\n`)
|
|
@@ -312,13 +478,29 @@ const usage = () => {
|
|
|
312
478
|
process.stdout.write(` model list [--profile name]\n`)
|
|
313
479
|
process.stdout.write(` tools\n`)
|
|
314
480
|
process.stdout.write(` call --tool <name> --args '<json>' [--provider alias] [--model model] [--profile name]\n`)
|
|
481
|
+
process.stdout.write(` file upload <local.pdf>\n`)
|
|
482
|
+
process.stdout.write(` file get --file-id <id> --out <path>\n`)
|
|
315
483
|
process.stdout.write(` mcp initialize\n`)
|
|
316
484
|
process.stdout.write(` mcp tools\n`)
|
|
317
485
|
process.stdout.write(` mcp call --tool <name> --args '<json>'\n`)
|
|
486
|
+
process.stdout.write(` mcp stdio\n`)
|
|
318
487
|
process.stdout.write(` setup add <claude-desktop|claude-code|cursor|cline|windsurf|gemini|json>\n`)
|
|
319
488
|
}
|
|
320
489
|
|
|
321
|
-
const setupSnippet = (tool, serviceUrl) => {
|
|
490
|
+
const setupSnippet = (tool, serviceUrl, mode = "http") => {
|
|
491
|
+
if (mode === "stdio") {
|
|
492
|
+
return {
|
|
493
|
+
mcpServers: {
|
|
494
|
+
"echo-pdf": {
|
|
495
|
+
command: "echo-pdf",
|
|
496
|
+
args: ["mcp", "stdio"],
|
|
497
|
+
env: {
|
|
498
|
+
ECHO_PDF_SERVICE_URL: serviceUrl,
|
|
499
|
+
},
|
|
500
|
+
},
|
|
501
|
+
},
|
|
502
|
+
}
|
|
503
|
+
}
|
|
322
504
|
const transport = {
|
|
323
505
|
type: "streamable-http",
|
|
324
506
|
url: `${serviceUrl}/mcp`,
|
|
@@ -405,6 +587,14 @@ const main = async () => {
|
|
|
405
587
|
return
|
|
406
588
|
}
|
|
407
589
|
|
|
590
|
+
if (command === "dev") {
|
|
591
|
+
const port = typeof flags.port === "string" ? Number(flags.port) : 8788
|
|
592
|
+
const host = typeof flags.host === "string" ? flags.host : "127.0.0.1"
|
|
593
|
+
if (!Number.isFinite(port) || port <= 0) throw new Error("dev --port must be positive number")
|
|
594
|
+
runDevServer(Math.floor(port), host)
|
|
595
|
+
return
|
|
596
|
+
}
|
|
597
|
+
|
|
408
598
|
if (command === "provider" && subcommand === "set") {
|
|
409
599
|
const providerAlias = resolveProviderAliasInput(flags.provider)
|
|
410
600
|
const apiKey = flags["api-key"]
|
|
@@ -538,15 +728,42 @@ const main = async () => {
|
|
|
538
728
|
const tool = flags.tool
|
|
539
729
|
if (typeof tool !== "string") throw new Error("call requires --tool")
|
|
540
730
|
const args = typeof flags.args === "string" ? JSON.parse(flags.args) : {}
|
|
731
|
+
const preparedArgs = await withUploadedLocalFile(config.serviceUrl, tool, args)
|
|
541
732
|
const provider = resolveProviderAlias(profile, flags.provider)
|
|
542
733
|
const model = typeof flags.model === "string" ? flags.model : resolveDefaultModel(profile, provider)
|
|
543
734
|
const providerApiKeys = buildProviderApiKeys(config, profileName)
|
|
544
|
-
const payload = buildToolCallRequest({ tool, args, provider, model, providerApiKeys })
|
|
735
|
+
const payload = buildToolCallRequest({ tool, args: preparedArgs, provider, model, providerApiKeys })
|
|
545
736
|
const data = await postJson(`${config.serviceUrl}/tools/call`, payload)
|
|
546
737
|
print(data)
|
|
547
738
|
return
|
|
548
739
|
}
|
|
549
740
|
|
|
741
|
+
if (command === "file") {
|
|
742
|
+
const action = rest[0] || ""
|
|
743
|
+
const config = loadConfig()
|
|
744
|
+
if (action === "upload") {
|
|
745
|
+
const filePath = rest[1]
|
|
746
|
+
if (!filePath) throw new Error("file upload requires a path")
|
|
747
|
+
const data = await uploadFile(config.serviceUrl, filePath)
|
|
748
|
+
print({
|
|
749
|
+
fileId: data?.file?.id || "",
|
|
750
|
+
filename: data?.file?.filename || path.basename(filePath),
|
|
751
|
+
sizeBytes: data?.file?.sizeBytes || 0,
|
|
752
|
+
file: data?.file || null,
|
|
753
|
+
})
|
|
754
|
+
return
|
|
755
|
+
}
|
|
756
|
+
if (action === "get") {
|
|
757
|
+
const fileId = typeof flags["file-id"] === "string" ? flags["file-id"] : ""
|
|
758
|
+
const out = typeof flags.out === "string" ? flags.out : ""
|
|
759
|
+
if (!fileId || !out) throw new Error("file get requires --file-id and --out")
|
|
760
|
+
const savedTo = await downloadFile(config.serviceUrl, fileId, out)
|
|
761
|
+
print({ ok: true, fileId, savedTo })
|
|
762
|
+
return
|
|
763
|
+
}
|
|
764
|
+
throw new Error("file command supports: upload|get")
|
|
765
|
+
}
|
|
766
|
+
|
|
550
767
|
if (command === "mcp" && subcommand === "initialize") {
|
|
551
768
|
const config = loadConfig()
|
|
552
769
|
const data = await postJson(`${config.serviceUrl}/mcp`, buildMcpRequest(1, "initialize"), buildMcpHeaders())
|
|
@@ -575,11 +792,18 @@ const main = async () => {
|
|
|
575
792
|
return
|
|
576
793
|
}
|
|
577
794
|
|
|
795
|
+
if (command === "mcp" && subcommand === "stdio") {
|
|
796
|
+
await runMcpStdio()
|
|
797
|
+
return
|
|
798
|
+
}
|
|
799
|
+
|
|
578
800
|
if (command === "setup" && subcommand === "add") {
|
|
579
801
|
const tool = rest[0]
|
|
580
802
|
if (!tool) throw new Error("setup add requires tool name")
|
|
581
803
|
const config = loadConfig()
|
|
582
|
-
|
|
804
|
+
const mode = typeof flags.mode === "string" ? flags.mode : "http"
|
|
805
|
+
if (!["http", "stdio"].includes(mode)) throw new Error("setup add --mode must be http|stdio")
|
|
806
|
+
print(setupSnippet(tool, config.serviceUrl, mode))
|
|
583
807
|
return
|
|
584
808
|
}
|
|
585
809
|
|
package/echo-pdf.config.json
CHANGED
package/package.json
CHANGED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export class HttpError extends Error {
|
|
2
|
+
readonly status: number
|
|
3
|
+
readonly code: string
|
|
4
|
+
readonly details?: unknown
|
|
5
|
+
|
|
6
|
+
constructor(status: number, code: string, message: string, details?: unknown) {
|
|
7
|
+
super(message)
|
|
8
|
+
this.status = status
|
|
9
|
+
this.code = code
|
|
10
|
+
this.details = details
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export const badRequest = (code: string, message: string, details?: unknown): HttpError =>
|
|
15
|
+
new HttpError(400, code, message, details)
|
|
16
|
+
|
|
17
|
+
export const notFound = (code: string, message: string, details?: unknown): HttpError =>
|
|
18
|
+
new HttpError(404, code, message, details)
|
|
19
|
+
|
|
20
|
+
export const unprocessable = (code: string, message: string, details?: unknown): HttpError =>
|
|
21
|
+
new HttpError(422, code, message, details)
|
package/src/index.ts
CHANGED
|
@@ -61,6 +61,21 @@ const asObj = (value: unknown): JsonObject =>
|
|
|
61
61
|
const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
|
|
62
62
|
typeof configured === "string" && configured.length > 0 ? configured : request.url
|
|
63
63
|
|
|
64
|
+
const sanitizeDownloadFilename = (filename: string): string => {
|
|
65
|
+
const cleaned = filename
|
|
66
|
+
.replace(/[\r\n"]/g, "")
|
|
67
|
+
.replace(/[^\x20-\x7E]+/g, "")
|
|
68
|
+
.trim()
|
|
69
|
+
return cleaned.length > 0 ? cleaned : "download.bin"
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const isFileGetAuthorized = (request: Request, env: Env, config: { authHeader?: string; authEnv?: string }): boolean => {
|
|
73
|
+
if (!config.authHeader || !config.authEnv) return true
|
|
74
|
+
const required = env[config.authEnv]
|
|
75
|
+
if (typeof required !== "string" || required.length === 0) return true
|
|
76
|
+
return request.headers.get(config.authHeader) === required
|
|
77
|
+
}
|
|
78
|
+
|
|
64
79
|
const sseResponse = (stream: ReadableStream<Uint8Array>): Response =>
|
|
65
80
|
new Response(stream, {
|
|
66
81
|
headers: {
|
|
@@ -147,6 +162,10 @@ export default {
|
|
|
147
162
|
version: config.mcp.version,
|
|
148
163
|
authHeader: config.mcp.authHeader ?? null,
|
|
149
164
|
},
|
|
165
|
+
fileGet: {
|
|
166
|
+
authHeader: config.service.fileGet?.authHeader ?? null,
|
|
167
|
+
cacheTtlSeconds: config.service.fileGet?.cacheTtlSeconds ?? 300,
|
|
168
|
+
},
|
|
150
169
|
})
|
|
151
170
|
}
|
|
152
171
|
|
|
@@ -304,6 +323,10 @@ export default {
|
|
|
304
323
|
}
|
|
305
324
|
|
|
306
325
|
if (request.method === "GET" && url.pathname === "/api/files/get") {
|
|
326
|
+
const fileGetConfig = config.service.fileGet ?? {}
|
|
327
|
+
if (!isFileGetAuthorized(request, env, fileGetConfig)) {
|
|
328
|
+
return json({ error: "Unauthorized", code: "UNAUTHORIZED" }, 401)
|
|
329
|
+
}
|
|
307
330
|
const fileId = url.searchParams.get("fileId") || ""
|
|
308
331
|
if (!fileId) return json({ error: "Missing fileId" }, 400)
|
|
309
332
|
const file = await fileStore.get(fileId)
|
|
@@ -311,9 +334,13 @@ export default {
|
|
|
311
334
|
const download = url.searchParams.get("download") === "1"
|
|
312
335
|
const headers = new Headers()
|
|
313
336
|
headers.set("Content-Type", file.mimeType)
|
|
314
|
-
|
|
337
|
+
const cacheTtl = Number(fileGetConfig.cacheTtlSeconds ?? 300)
|
|
338
|
+
const cacheControl = cacheTtl > 0
|
|
339
|
+
? `public, max-age=${Math.floor(cacheTtl)}, s-maxage=${Math.floor(cacheTtl)}`
|
|
340
|
+
: "no-store"
|
|
341
|
+
headers.set("Cache-Control", cacheControl)
|
|
315
342
|
if (download) {
|
|
316
|
-
headers.set("Content-Disposition", `attachment; filename=\"${file.filename
|
|
343
|
+
headers.set("Content-Disposition", `attachment; filename=\"${sanitizeDownloadFilename(file.filename)}\"`)
|
|
317
344
|
}
|
|
318
345
|
return new Response(file.bytes, { status: 200, headers })
|
|
319
346
|
}
|
package/src/mcp-server.ts
CHANGED
|
@@ -48,6 +48,16 @@ const maybeAuthorized = (request: Request, env: Env, config: EchoPdfConfig): boo
|
|
|
48
48
|
const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
|
|
49
49
|
typeof configured === "string" && configured.length > 0 ? configured : request.url
|
|
50
50
|
|
|
51
|
+
const prepareMcpToolArgs = (toolName: string, args: Record<string, unknown>): Record<string, unknown> => {
|
|
52
|
+
if (toolName === "pdf_extract_pages") {
|
|
53
|
+
const mode = typeof args.returnMode === "string" ? args.returnMode : ""
|
|
54
|
+
if (!mode) {
|
|
55
|
+
return { ...args, returnMode: "url" }
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return args
|
|
59
|
+
}
|
|
60
|
+
|
|
51
61
|
export const handleMcpRequest = async (
|
|
52
62
|
request: Request,
|
|
53
63
|
env: Env,
|
|
@@ -103,7 +113,7 @@ export const handleMcpRequest = async (
|
|
|
103
113
|
}
|
|
104
114
|
|
|
105
115
|
const toolName = typeof params.name === "string" ? params.name : ""
|
|
106
|
-
const args = asObj(params.arguments)
|
|
116
|
+
const args = prepareMcpToolArgs(toolName, asObj(params.arguments))
|
|
107
117
|
if (!toolName) {
|
|
108
118
|
return err(id, -32602, "Invalid params: name is required", {
|
|
109
119
|
code: "INVALID_PARAMS",
|
package/src/pdf-agent.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { Env, FileStore, ReturnMode } from "./types"
|
|
|
2
2
|
import type { AgentTraceEvent, EchoPdfConfig, PdfOperationRequest } from "./pdf-types"
|
|
3
3
|
import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults"
|
|
4
4
|
import { fromBase64, normalizeReturnMode, toDataUrl } from "./file-utils"
|
|
5
|
+
import { badRequest, notFound, unprocessable } from "./http-error"
|
|
5
6
|
import { extractPdfPageText, getPdfPageCount, renderPdfPageToPng, toBytes } from "./pdfium-engine"
|
|
6
7
|
import { visionRecognize } from "./provider-client"
|
|
7
8
|
|
|
@@ -22,11 +23,20 @@ const traceStep = (
|
|
|
22
23
|
}
|
|
23
24
|
|
|
24
25
|
const ensurePages = (pages: ReadonlyArray<number>, pageCount: number, maxPages: number): number[] => {
|
|
25
|
-
if (pages.length === 0) throw
|
|
26
|
-
if (pages.length > maxPages)
|
|
26
|
+
if (pages.length === 0) throw badRequest("PAGES_REQUIRED", "At least one page is required")
|
|
27
|
+
if (pages.length > maxPages) {
|
|
28
|
+
throw badRequest("TOO_MANY_PAGES", `Page count exceeds maxPagesPerRequest (${maxPages})`, {
|
|
29
|
+
maxPagesPerRequest: maxPages,
|
|
30
|
+
providedPages: pages.length,
|
|
31
|
+
})
|
|
32
|
+
}
|
|
27
33
|
for (const page of pages) {
|
|
28
34
|
if (!Number.isInteger(page) || page < 1 || page > pageCount) {
|
|
29
|
-
throw
|
|
35
|
+
throw badRequest("PAGE_OUT_OF_RANGE", `Page ${page} out of range 1..${pageCount}`, {
|
|
36
|
+
page,
|
|
37
|
+
min: 1,
|
|
38
|
+
max: pageCount,
|
|
39
|
+
})
|
|
30
40
|
}
|
|
31
41
|
}
|
|
32
42
|
return [...new Set(pages)].sort((a, b) => a - b)
|
|
@@ -45,7 +55,7 @@ export const ingestPdfFromPayload = async (
|
|
|
45
55
|
if (input.fileId) {
|
|
46
56
|
const existing = await opts.fileStore.get(input.fileId)
|
|
47
57
|
if (!existing) {
|
|
48
|
-
throw
|
|
58
|
+
throw notFound("FILE_NOT_FOUND", `File not found: ${input.fileId}`, { fileId: input.fileId })
|
|
49
59
|
}
|
|
50
60
|
return {
|
|
51
61
|
id: existing.id,
|
|
@@ -59,7 +69,11 @@ export const ingestPdfFromPayload = async (
|
|
|
59
69
|
|
|
60
70
|
if (input.url) {
|
|
61
71
|
traceStep(opts, "start", "file.fetch.url", { url: input.url })
|
|
62
|
-
|
|
72
|
+
try {
|
|
73
|
+
bytes = await toBytes(input.url)
|
|
74
|
+
} catch (error) {
|
|
75
|
+
throw badRequest("URL_FETCH_FAILED", `Unable to fetch PDF from url: ${error instanceof Error ? error.message : String(error)}`)
|
|
76
|
+
}
|
|
63
77
|
try {
|
|
64
78
|
const u = new URL(input.url)
|
|
65
79
|
filename = decodeURIComponent(u.pathname.split("/").pop() || filename)
|
|
@@ -74,10 +88,13 @@ export const ingestPdfFromPayload = async (
|
|
|
74
88
|
}
|
|
75
89
|
|
|
76
90
|
if (!bytes) {
|
|
77
|
-
throw
|
|
91
|
+
throw badRequest("MISSING_FILE_INPUT", "Missing file input. Provide fileId, url or base64")
|
|
78
92
|
}
|
|
79
93
|
if (bytes.byteLength > config.service.maxPdfBytes) {
|
|
80
|
-
throw
|
|
94
|
+
throw badRequest("PDF_TOO_LARGE", `PDF exceeds max size (${config.service.maxPdfBytes} bytes)`, {
|
|
95
|
+
maxPdfBytes: config.service.maxPdfBytes,
|
|
96
|
+
sizeBytes: bytes.byteLength,
|
|
97
|
+
})
|
|
81
98
|
}
|
|
82
99
|
|
|
83
100
|
const meta = await opts.fileStore.put({
|
|
@@ -164,7 +181,7 @@ export const runPdfAgent = async (
|
|
|
164
181
|
const providerAlias = resolveProviderAlias(config, request.provider)
|
|
165
182
|
const model = resolveModelForProvider(config, providerAlias, request.model)
|
|
166
183
|
if (!model) {
|
|
167
|
-
throw
|
|
184
|
+
throw badRequest("MODEL_REQUIRED", "model is required for OCR or table extraction; set agent.defaultModel")
|
|
168
185
|
}
|
|
169
186
|
|
|
170
187
|
if (request.operation === "ocr_pages") {
|
|
@@ -216,7 +233,9 @@ export const runPdfAgent = async (
|
|
|
216
233
|
})
|
|
217
234
|
const latex = extractTabularLatex(rawLatex)
|
|
218
235
|
if (!latex) {
|
|
219
|
-
throw
|
|
236
|
+
throw unprocessable("TABLE_LATEX_MISSING", `table extraction did not return valid LaTeX tabular for page ${page}`, {
|
|
237
|
+
page,
|
|
238
|
+
})
|
|
220
239
|
}
|
|
221
240
|
tables.push({ page, latex })
|
|
222
241
|
traceStep(opts, "end", "table.page", { page, chars: latex.length })
|
package/src/pdf-config.ts
CHANGED
|
@@ -37,6 +37,9 @@ const validateConfig = (config: EchoPdfConfig): EchoPdfConfig => {
|
|
|
37
37
|
) {
|
|
38
38
|
throw new Error("service.publicBaseUrl must start with http:// or https://")
|
|
39
39
|
}
|
|
40
|
+
if (typeof config.service.fileGet?.cacheTtlSeconds === "number" && config.service.fileGet.cacheTtlSeconds < 0) {
|
|
41
|
+
throw new Error("service.fileGet.cacheTtlSeconds must be >= 0")
|
|
42
|
+
}
|
|
40
43
|
if (!Number.isFinite(config.service.storage.maxFileBytes) || config.service.storage.maxFileBytes <= 0) {
|
|
41
44
|
throw new Error("service.storage.maxFileBytes must be positive")
|
|
42
45
|
}
|
|
@@ -73,6 +76,9 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
|
|
|
73
76
|
const providerOverride = env.ECHO_PDF_DEFAULT_PROVIDER
|
|
74
77
|
const modelOverride = env.ECHO_PDF_DEFAULT_MODEL
|
|
75
78
|
const publicBaseUrlOverride = env.ECHO_PDF_PUBLIC_BASE_URL
|
|
79
|
+
const fileGetAuthHeaderOverride = env.ECHO_PDF_FILE_GET_AUTH_HEADER
|
|
80
|
+
const fileGetAuthEnvOverride = env.ECHO_PDF_FILE_GET_AUTH_ENV
|
|
81
|
+
const fileGetCacheTtlOverride = env.ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS
|
|
76
82
|
const withOverrides: EchoPdfConfig = {
|
|
77
83
|
...resolved,
|
|
78
84
|
service: {
|
|
@@ -81,6 +87,23 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
|
|
|
81
87
|
typeof publicBaseUrlOverride === "string" && publicBaseUrlOverride.trim().length > 0
|
|
82
88
|
? publicBaseUrlOverride.trim()
|
|
83
89
|
: resolved.service.publicBaseUrl,
|
|
90
|
+
fileGet: {
|
|
91
|
+
authHeader:
|
|
92
|
+
typeof fileGetAuthHeaderOverride === "string" && fileGetAuthHeaderOverride.trim().length > 0
|
|
93
|
+
? fileGetAuthHeaderOverride.trim()
|
|
94
|
+
: resolved.service.fileGet?.authHeader,
|
|
95
|
+
authEnv:
|
|
96
|
+
typeof fileGetAuthEnvOverride === "string" && fileGetAuthEnvOverride.trim().length > 0
|
|
97
|
+
? fileGetAuthEnvOverride.trim()
|
|
98
|
+
: resolved.service.fileGet?.authEnv,
|
|
99
|
+
cacheTtlSeconds: (() => {
|
|
100
|
+
if (typeof fileGetCacheTtlOverride === "string" && fileGetCacheTtlOverride.trim().length > 0) {
|
|
101
|
+
const value = Number(fileGetCacheTtlOverride)
|
|
102
|
+
return Number.isFinite(value) && value >= 0 ? Math.floor(value) : resolved.service.fileGet?.cacheTtlSeconds
|
|
103
|
+
}
|
|
104
|
+
return resolved.service.fileGet?.cacheTtlSeconds
|
|
105
|
+
})(),
|
|
106
|
+
},
|
|
84
107
|
},
|
|
85
108
|
agent: {
|
|
86
109
|
...resolved.agent,
|
package/src/pdf-types.ts
CHANGED
|
@@ -23,6 +23,11 @@ export interface EchoPdfConfig {
|
|
|
23
23
|
readonly service: {
|
|
24
24
|
readonly name: string
|
|
25
25
|
readonly publicBaseUrl?: string
|
|
26
|
+
readonly fileGet?: {
|
|
27
|
+
readonly authHeader?: string
|
|
28
|
+
readonly authEnv?: string
|
|
29
|
+
readonly cacheTtlSeconds?: number
|
|
30
|
+
}
|
|
26
31
|
readonly maxPdfBytes: number
|
|
27
32
|
readonly maxPagesPerRequest: number
|
|
28
33
|
readonly defaultRenderScale: number
|
package/src/r2-file-store.ts
CHANGED
|
@@ -79,20 +79,7 @@ export class R2FileStore implements FileStore {
|
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
async list(): Promise<ReadonlyArray<StoredFileMeta>> {
|
|
82
|
-
|
|
83
|
-
return listed.objects.map((obj) => {
|
|
84
|
-
const meta = (obj.customMetadata ?? {}) as MetaFields
|
|
85
|
-
const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
|
|
86
|
-
const filename = meta.filename ?? toId(obj.key)
|
|
87
|
-
const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
|
|
88
|
-
return {
|
|
89
|
-
id: toId(obj.key),
|
|
90
|
-
filename,
|
|
91
|
-
mimeType,
|
|
92
|
-
sizeBytes: obj.size,
|
|
93
|
-
createdAt,
|
|
94
|
-
}
|
|
95
|
-
})
|
|
82
|
+
return await this.listAllFiles()
|
|
96
83
|
}
|
|
97
84
|
|
|
98
85
|
async delete(fileId: string): Promise<boolean> {
|
|
@@ -101,7 +88,7 @@ export class R2FileStore implements FileStore {
|
|
|
101
88
|
}
|
|
102
89
|
|
|
103
90
|
async stats(): Promise<unknown> {
|
|
104
|
-
const files = await this.
|
|
91
|
+
const files = await this.listAllFiles()
|
|
105
92
|
const totalBytes = files.reduce((sum, file) => sum + file.sizeBytes, 0)
|
|
106
93
|
return {
|
|
107
94
|
backend: "r2",
|
|
@@ -114,17 +101,24 @@ export class R2FileStore implements FileStore {
|
|
|
114
101
|
}
|
|
115
102
|
|
|
116
103
|
async cleanup(): Promise<unknown> {
|
|
117
|
-
const
|
|
118
|
-
const
|
|
119
|
-
const
|
|
120
|
-
|
|
121
|
-
|
|
104
|
+
const files = await this.listAllFiles()
|
|
105
|
+
const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
|
|
106
|
+
const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
|
|
107
|
+
if (expired.length > 0) {
|
|
108
|
+
await this.bucket.delete(expired.map((f) => toKey(f.id)))
|
|
109
|
+
}
|
|
110
|
+
const evict = this.pickEvictions(active, 0)
|
|
111
|
+
if (evict.length > 0) {
|
|
112
|
+
await this.bucket.delete(evict.map((f) => toKey(f.id)))
|
|
113
|
+
}
|
|
114
|
+
const evictIds = new Set(evict.map((f) => f.id))
|
|
115
|
+
const after = active.filter((f) => !evictIds.has(f.id))
|
|
122
116
|
const totalBytes = after.reduce((sum, file) => sum + file.sizeBytes, 0)
|
|
123
117
|
return {
|
|
124
118
|
backend: "r2",
|
|
125
119
|
policy: this.policy,
|
|
126
|
-
deletedExpired,
|
|
127
|
-
deletedEvicted,
|
|
120
|
+
deletedExpired: expired.length,
|
|
121
|
+
deletedEvicted: evict.length,
|
|
128
122
|
stats: {
|
|
129
123
|
fileCount: after.length,
|
|
130
124
|
totalBytes,
|
|
@@ -133,12 +127,19 @@ export class R2FileStore implements FileStore {
|
|
|
133
127
|
}
|
|
134
128
|
|
|
135
129
|
private async cleanupInternal(incomingBytes: number): Promise<void> {
|
|
136
|
-
const files = await this.
|
|
137
|
-
|
|
138
|
-
const
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
130
|
+
const files = await this.listAllFiles()
|
|
131
|
+
const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
|
|
132
|
+
const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
|
|
133
|
+
if (expired.length > 0) {
|
|
134
|
+
await this.bucket.delete(expired.map((f) => toKey(f.id)))
|
|
135
|
+
}
|
|
136
|
+
const evict = this.pickEvictions(active, incomingBytes)
|
|
137
|
+
if (evict.length > 0) {
|
|
138
|
+
await this.bucket.delete(evict.map((f) => toKey(f.id)))
|
|
139
|
+
}
|
|
140
|
+
const evictIds = new Set(evict.map((f) => f.id))
|
|
141
|
+
const remaining = active.filter((f) => !evictIds.has(f.id))
|
|
142
|
+
const finalTotal = remaining.reduce((sum, file) => sum + file.sizeBytes, 0)
|
|
142
143
|
if (finalTotal + incomingBytes > this.policy.maxTotalBytes) {
|
|
143
144
|
const err = new Error(
|
|
144
145
|
`storage quota exceeded: total ${finalTotal} + incoming ${incomingBytes} > maxTotalBytes ${this.policy.maxTotalBytes}`
|
|
@@ -150,17 +151,10 @@ export class R2FileStore implements FileStore {
|
|
|
150
151
|
}
|
|
151
152
|
}
|
|
152
153
|
|
|
153
|
-
private
|
|
154
|
-
const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
|
|
155
|
-
if (expired.length === 0) return 0
|
|
156
|
-
await this.bucket.delete(expired.map((f) => toKey(f.id)))
|
|
157
|
-
return expired.length
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
private async evictIfNeeded(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): Promise<number> {
|
|
154
|
+
private pickEvictions(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): StoredFileMeta[] {
|
|
161
155
|
const totalBytes = files.reduce((sum, f) => sum + f.sizeBytes, 0)
|
|
162
156
|
const projected = totalBytes + incomingBytes
|
|
163
|
-
if (projected <= this.policy.maxTotalBytes) return
|
|
157
|
+
if (projected <= this.policy.maxTotalBytes) return []
|
|
164
158
|
|
|
165
159
|
const needFree = projected - this.policy.maxTotalBytes
|
|
166
160
|
const candidates = [...files].sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
|
|
@@ -172,9 +166,30 @@ export class R2FileStore implements FileStore {
|
|
|
172
166
|
if (freed >= needFree) break
|
|
173
167
|
if (evict.length >= this.policy.cleanupBatchSize) break
|
|
174
168
|
}
|
|
175
|
-
|
|
176
|
-
await this.bucket.delete(evict.map((f) => toKey(f.id)))
|
|
177
|
-
return evict.length
|
|
169
|
+
return evict
|
|
178
170
|
}
|
|
179
|
-
}
|
|
180
171
|
|
|
172
|
+
private async listAllFiles(): Promise<StoredFileMeta[]> {
|
|
173
|
+
const files: StoredFileMeta[] = []
|
|
174
|
+
let cursor: string | undefined
|
|
175
|
+
while (true) {
|
|
176
|
+
const listed = await this.bucket.list({ prefix: PREFIX, limit: 1000, cursor })
|
|
177
|
+
for (const obj of listed.objects) {
|
|
178
|
+
const meta = (obj.customMetadata ?? {}) as MetaFields
|
|
179
|
+
const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
|
|
180
|
+
const filename = meta.filename ?? toId(obj.key)
|
|
181
|
+
const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
|
|
182
|
+
files.push({
|
|
183
|
+
id: toId(obj.key),
|
|
184
|
+
filename,
|
|
185
|
+
mimeType,
|
|
186
|
+
sizeBytes: obj.size,
|
|
187
|
+
createdAt,
|
|
188
|
+
})
|
|
189
|
+
}
|
|
190
|
+
if (listed.truncated !== true || !listed.cursor) break
|
|
191
|
+
cursor = listed.cursor
|
|
192
|
+
}
|
|
193
|
+
return files
|
|
194
|
+
}
|
|
195
|
+
}
|
package/src/response-schema.ts
CHANGED
|
@@ -15,6 +15,10 @@ export interface ToolOutputEnvelope {
|
|
|
15
15
|
readonly artifacts: ToolArtifact[]
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
const MAX_TEXT_STRING = 1200
|
|
19
|
+
const MAX_TEXT_ARRAY = 40
|
|
20
|
+
const MAX_TEXT_DEPTH = 8
|
|
21
|
+
|
|
18
22
|
const asObj = (value: unknown): JsonObject =>
|
|
19
23
|
typeof value === "object" && value !== null && !Array.isArray(value)
|
|
20
24
|
? (value as JsonObject)
|
|
@@ -113,6 +117,38 @@ const summarizeData = (data: unknown): string => {
|
|
|
113
117
|
return "Tool executed successfully."
|
|
114
118
|
}
|
|
115
119
|
|
|
120
|
+
const sanitizeString = (value: string): string => {
|
|
121
|
+
if (value.startsWith("data:")) {
|
|
122
|
+
const [head] = value.split(",", 1)
|
|
123
|
+
return `${head},<omitted>`
|
|
124
|
+
}
|
|
125
|
+
if (/^[A-Za-z0-9+/=]{300,}$/.test(value)) {
|
|
126
|
+
return `<base64 omitted len=${value.length}>`
|
|
127
|
+
}
|
|
128
|
+
if (value.length > MAX_TEXT_STRING) {
|
|
129
|
+
return `${value.slice(0, MAX_TEXT_STRING)}...(truncated ${value.length - MAX_TEXT_STRING} chars)`
|
|
130
|
+
}
|
|
131
|
+
return value
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const sanitizeForText = (value: unknown, depth = 0): unknown => {
|
|
135
|
+
if (depth >= MAX_TEXT_DEPTH) return "<max-depth>"
|
|
136
|
+
if (typeof value === "string") return sanitizeString(value)
|
|
137
|
+
if (typeof value !== "object" || value === null) return value
|
|
138
|
+
if (Array.isArray(value)) {
|
|
139
|
+
const items = value.slice(0, MAX_TEXT_ARRAY).map((item) => sanitizeForText(item, depth + 1))
|
|
140
|
+
if (value.length > MAX_TEXT_ARRAY) {
|
|
141
|
+
items.push(`<truncated ${value.length - MAX_TEXT_ARRAY} items>`)
|
|
142
|
+
}
|
|
143
|
+
return items
|
|
144
|
+
}
|
|
145
|
+
const out: Record<string, unknown> = {}
|
|
146
|
+
for (const [key, nested] of Object.entries(value)) {
|
|
147
|
+
out[key] = sanitizeForText(nested, depth + 1)
|
|
148
|
+
}
|
|
149
|
+
return out
|
|
150
|
+
}
|
|
151
|
+
|
|
116
152
|
export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<string, unknown>> => {
|
|
117
153
|
const lines: string[] = [summarizeData(envelope.data)]
|
|
118
154
|
if (envelope.artifacts.length > 0) {
|
|
@@ -130,7 +166,7 @@ export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<stri
|
|
|
130
166
|
}
|
|
131
167
|
}
|
|
132
168
|
lines.push("")
|
|
133
|
-
lines.push(JSON.stringify(envelope, null, 2))
|
|
169
|
+
lines.push(JSON.stringify(sanitizeForText(envelope), null, 2))
|
|
134
170
|
|
|
135
171
|
const content: Array<Record<string, unknown>> = [{ type: "text", text: lines.join("\n") }]
|
|
136
172
|
for (const artifact of envelope.artifacts) {
|