@echofiles/echo-pdf 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -0
- package/echo-pdf.config.json +3 -0
- package/package.json +1 -1
- package/src/http-error.ts +21 -0
- package/src/index.ts +29 -2
- package/src/mcp-server.ts +11 -1
- package/src/pdf-agent.ts +28 -9
- package/src/pdf-config.ts +23 -0
- package/src/pdf-types.ts +5 -0
- package/src/r2-file-store.ts +56 -41
- package/src/response-schema.ts +37 -1
package/README.md
CHANGED
|
@@ -88,6 +88,33 @@ echo-pdf mcp tools
|
|
|
88
88
|
echo-pdf mcp call --tool file_ops --args '{"op":"list"}'
|
|
89
89
|
```
|
|
90
90
|
|
|
91
|
+
### 3.1.1 纯 MCP 场景推荐流程(本地 PDF)
|
|
92
|
+
|
|
93
|
+
远端 MCP server 无法直接读取你本机文件路径。推荐两步:
|
|
94
|
+
|
|
95
|
+
1. 先通过 HTTP 上传本地 PDF,拿到 `fileId`
|
|
96
|
+
2. 再用 MCP 工具传 `fileId` 调用
|
|
97
|
+
|
|
98
|
+
示例:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/api/files/upload \
|
|
102
|
+
-F 'file=@./input.pdf'
|
|
103
|
+
|
|
104
|
+
echo-pdf mcp call --tool pdf_extract_pages --args '{"fileId":"<FILE_ID>","pages":[1]}'
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### 3.1.2 不上传文件的 URL ingest 流程
|
|
108
|
+
|
|
109
|
+
如果 PDF 已经在公网可访问,直接传 `url`:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
echo-pdf mcp call --tool pdf_extract_pages --args '{
|
|
113
|
+
"url":"https://example.com/sample.pdf",
|
|
114
|
+
"pages":[1]
|
|
115
|
+
}'
|
|
116
|
+
```
|
|
117
|
+
|
|
91
118
|
### 3.2 给客户端生成 MCP 配置片段
|
|
92
119
|
|
|
93
120
|
```bash
|
|
@@ -107,6 +134,12 @@ echo-pdf setup add json
|
|
|
107
134
|
- `pdf_tables_to_latex`
|
|
108
135
|
- `file_ops`
|
|
109
136
|
|
|
137
|
+
MCP 输出策略:
|
|
138
|
+
|
|
139
|
+
- `pdf_extract_pages` 在 MCP 下默认 `returnMode=url`(不传 `returnMode` 时生效)
|
|
140
|
+
- MCP `text` 会对大字段做去二进制/截断,避免把大段 base64 塞进上下文
|
|
141
|
+
- 二进制结果请优先使用 `resource_link` 中的下载地址
|
|
142
|
+
|
|
110
143
|
## 4. Web UI 使用
|
|
111
144
|
|
|
112
145
|
打开:
|
|
@@ -183,6 +216,7 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
|
|
|
183
216
|
- `agent.defaultProvider`
|
|
184
217
|
- `agent.defaultModel`
|
|
185
218
|
- `service.publicBaseUrl`
|
|
219
|
+
- `service.fileGet.cacheTtlSeconds`
|
|
186
220
|
- `service.maxPdfBytes`
|
|
187
221
|
- `service.storage.maxFileBytes`
|
|
188
222
|
- `service.storage.maxTotalBytes`
|
|
@@ -205,6 +239,8 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
|
|
|
205
239
|
- `ECHO_PDF_DEFAULT_PROVIDER`
|
|
206
240
|
- `ECHO_PDF_DEFAULT_MODEL`
|
|
207
241
|
- `ECHO_PDF_PUBLIC_BASE_URL`(可选,强制 artifacts 生成外部可访问绝对 URL)
|
|
242
|
+
- `ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS`(可选,`/api/files/get` 缓存秒数,`0` 表示 `no-store`)
|
|
243
|
+
- `ECHO_PDF_FILE_GET_AUTH_HEADER` + `ECHO_PDF_FILE_GET_AUTH_ENV`(可选,启用下载端点 header 鉴权)
|
|
208
244
|
- `ECHO_PDF_MCP_KEY`(可选,启用 MCP 鉴权)
|
|
209
245
|
- `ECHO_PDF_WORKER_NAME`(CLI 默认 URL 推导)
|
|
210
246
|
|
|
@@ -263,3 +299,12 @@ curl -sS -X POST https://echo-pdf.echofilesai.workers.dev/tools/call \
|
|
|
263
299
|
"arguments":{"fileId":"<FILE_ID>","pages":[1],"returnMode":"url"}
|
|
264
300
|
}'
|
|
265
301
|
```
|
|
302
|
+
|
|
303
|
+
### 8.4 错误码语义
|
|
304
|
+
|
|
305
|
+
- 客户端输入错误返回稳定 `4xx + code`,例如:
|
|
306
|
+
- `PAGES_REQUIRED`(400)
|
|
307
|
+
- `PAGE_OUT_OF_RANGE`(400)
|
|
308
|
+
- `MISSING_FILE_INPUT`(400)
|
|
309
|
+
- `FILE_NOT_FOUND`(404)
|
|
310
|
+
- 服务端故障返回 `5xx`。
|
package/echo-pdf.config.json
CHANGED
package/package.json
CHANGED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export class HttpError extends Error {
|
|
2
|
+
readonly status: number
|
|
3
|
+
readonly code: string
|
|
4
|
+
readonly details?: unknown
|
|
5
|
+
|
|
6
|
+
constructor(status: number, code: string, message: string, details?: unknown) {
|
|
7
|
+
super(message)
|
|
8
|
+
this.status = status
|
|
9
|
+
this.code = code
|
|
10
|
+
this.details = details
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export const badRequest = (code: string, message: string, details?: unknown): HttpError =>
|
|
15
|
+
new HttpError(400, code, message, details)
|
|
16
|
+
|
|
17
|
+
export const notFound = (code: string, message: string, details?: unknown): HttpError =>
|
|
18
|
+
new HttpError(404, code, message, details)
|
|
19
|
+
|
|
20
|
+
export const unprocessable = (code: string, message: string, details?: unknown): HttpError =>
|
|
21
|
+
new HttpError(422, code, message, details)
|
package/src/index.ts
CHANGED
|
@@ -61,6 +61,21 @@ const asObj = (value: unknown): JsonObject =>
|
|
|
61
61
|
const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
|
|
62
62
|
typeof configured === "string" && configured.length > 0 ? configured : request.url
|
|
63
63
|
|
|
64
|
+
const sanitizeDownloadFilename = (filename: string): string => {
|
|
65
|
+
const cleaned = filename
|
|
66
|
+
.replace(/[\r\n"]/g, "")
|
|
67
|
+
.replace(/[^\x20-\x7E]+/g, "")
|
|
68
|
+
.trim()
|
|
69
|
+
return cleaned.length > 0 ? cleaned : "download.bin"
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const isFileGetAuthorized = (request: Request, env: Env, config: { authHeader?: string; authEnv?: string }): boolean => {
|
|
73
|
+
if (!config.authHeader || !config.authEnv) return true
|
|
74
|
+
const required = env[config.authEnv]
|
|
75
|
+
if (typeof required !== "string" || required.length === 0) return true
|
|
76
|
+
return request.headers.get(config.authHeader) === required
|
|
77
|
+
}
|
|
78
|
+
|
|
64
79
|
const sseResponse = (stream: ReadableStream<Uint8Array>): Response =>
|
|
65
80
|
new Response(stream, {
|
|
66
81
|
headers: {
|
|
@@ -147,6 +162,10 @@ export default {
|
|
|
147
162
|
version: config.mcp.version,
|
|
148
163
|
authHeader: config.mcp.authHeader ?? null,
|
|
149
164
|
},
|
|
165
|
+
fileGet: {
|
|
166
|
+
authHeader: config.service.fileGet?.authHeader ?? null,
|
|
167
|
+
cacheTtlSeconds: config.service.fileGet?.cacheTtlSeconds ?? 300,
|
|
168
|
+
},
|
|
150
169
|
})
|
|
151
170
|
}
|
|
152
171
|
|
|
@@ -304,6 +323,10 @@ export default {
|
|
|
304
323
|
}
|
|
305
324
|
|
|
306
325
|
if (request.method === "GET" && url.pathname === "/api/files/get") {
|
|
326
|
+
const fileGetConfig = config.service.fileGet ?? {}
|
|
327
|
+
if (!isFileGetAuthorized(request, env, fileGetConfig)) {
|
|
328
|
+
return json({ error: "Unauthorized", code: "UNAUTHORIZED" }, 401)
|
|
329
|
+
}
|
|
307
330
|
const fileId = url.searchParams.get("fileId") || ""
|
|
308
331
|
if (!fileId) return json({ error: "Missing fileId" }, 400)
|
|
309
332
|
const file = await fileStore.get(fileId)
|
|
@@ -311,9 +334,13 @@ export default {
|
|
|
311
334
|
const download = url.searchParams.get("download") === "1"
|
|
312
335
|
const headers = new Headers()
|
|
313
336
|
headers.set("Content-Type", file.mimeType)
|
|
314
|
-
|
|
337
|
+
const cacheTtl = Number(fileGetConfig.cacheTtlSeconds ?? 300)
|
|
338
|
+
const cacheControl = cacheTtl > 0
|
|
339
|
+
? `public, max-age=${Math.floor(cacheTtl)}, s-maxage=${Math.floor(cacheTtl)}`
|
|
340
|
+
: "no-store"
|
|
341
|
+
headers.set("Cache-Control", cacheControl)
|
|
315
342
|
if (download) {
|
|
316
|
-
headers.set("Content-Disposition", `attachment; filename=\"${file.filename
|
|
343
|
+
headers.set("Content-Disposition", `attachment; filename=\"${sanitizeDownloadFilename(file.filename)}\"`)
|
|
317
344
|
}
|
|
318
345
|
return new Response(file.bytes, { status: 200, headers })
|
|
319
346
|
}
|
package/src/mcp-server.ts
CHANGED
|
@@ -48,6 +48,16 @@ const maybeAuthorized = (request: Request, env: Env, config: EchoPdfConfig): boo
|
|
|
48
48
|
const resolvePublicBaseUrl = (request: Request, configured?: string): string =>
|
|
49
49
|
typeof configured === "string" && configured.length > 0 ? configured : request.url
|
|
50
50
|
|
|
51
|
+
const prepareMcpToolArgs = (toolName: string, args: Record<string, unknown>): Record<string, unknown> => {
|
|
52
|
+
if (toolName === "pdf_extract_pages") {
|
|
53
|
+
const mode = typeof args.returnMode === "string" ? args.returnMode : ""
|
|
54
|
+
if (!mode) {
|
|
55
|
+
return { ...args, returnMode: "url" }
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return args
|
|
59
|
+
}
|
|
60
|
+
|
|
51
61
|
export const handleMcpRequest = async (
|
|
52
62
|
request: Request,
|
|
53
63
|
env: Env,
|
|
@@ -103,7 +113,7 @@ export const handleMcpRequest = async (
|
|
|
103
113
|
}
|
|
104
114
|
|
|
105
115
|
const toolName = typeof params.name === "string" ? params.name : ""
|
|
106
|
-
const args = asObj(params.arguments)
|
|
116
|
+
const args = prepareMcpToolArgs(toolName, asObj(params.arguments))
|
|
107
117
|
if (!toolName) {
|
|
108
118
|
return err(id, -32602, "Invalid params: name is required", {
|
|
109
119
|
code: "INVALID_PARAMS",
|
package/src/pdf-agent.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { Env, FileStore, ReturnMode } from "./types"
|
|
|
2
2
|
import type { AgentTraceEvent, EchoPdfConfig, PdfOperationRequest } from "./pdf-types"
|
|
3
3
|
import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults"
|
|
4
4
|
import { fromBase64, normalizeReturnMode, toDataUrl } from "./file-utils"
|
|
5
|
+
import { badRequest, notFound, unprocessable } from "./http-error"
|
|
5
6
|
import { extractPdfPageText, getPdfPageCount, renderPdfPageToPng, toBytes } from "./pdfium-engine"
|
|
6
7
|
import { visionRecognize } from "./provider-client"
|
|
7
8
|
|
|
@@ -22,11 +23,20 @@ const traceStep = (
|
|
|
22
23
|
}
|
|
23
24
|
|
|
24
25
|
const ensurePages = (pages: ReadonlyArray<number>, pageCount: number, maxPages: number): number[] => {
|
|
25
|
-
if (pages.length === 0) throw
|
|
26
|
-
if (pages.length > maxPages)
|
|
26
|
+
if (pages.length === 0) throw badRequest("PAGES_REQUIRED", "At least one page is required")
|
|
27
|
+
if (pages.length > maxPages) {
|
|
28
|
+
throw badRequest("TOO_MANY_PAGES", `Page count exceeds maxPagesPerRequest (${maxPages})`, {
|
|
29
|
+
maxPagesPerRequest: maxPages,
|
|
30
|
+
providedPages: pages.length,
|
|
31
|
+
})
|
|
32
|
+
}
|
|
27
33
|
for (const page of pages) {
|
|
28
34
|
if (!Number.isInteger(page) || page < 1 || page > pageCount) {
|
|
29
|
-
throw
|
|
35
|
+
throw badRequest("PAGE_OUT_OF_RANGE", `Page ${page} out of range 1..${pageCount}`, {
|
|
36
|
+
page,
|
|
37
|
+
min: 1,
|
|
38
|
+
max: pageCount,
|
|
39
|
+
})
|
|
30
40
|
}
|
|
31
41
|
}
|
|
32
42
|
return [...new Set(pages)].sort((a, b) => a - b)
|
|
@@ -45,7 +55,7 @@ export const ingestPdfFromPayload = async (
|
|
|
45
55
|
if (input.fileId) {
|
|
46
56
|
const existing = await opts.fileStore.get(input.fileId)
|
|
47
57
|
if (!existing) {
|
|
48
|
-
throw
|
|
58
|
+
throw notFound("FILE_NOT_FOUND", `File not found: ${input.fileId}`, { fileId: input.fileId })
|
|
49
59
|
}
|
|
50
60
|
return {
|
|
51
61
|
id: existing.id,
|
|
@@ -59,7 +69,11 @@ export const ingestPdfFromPayload = async (
|
|
|
59
69
|
|
|
60
70
|
if (input.url) {
|
|
61
71
|
traceStep(opts, "start", "file.fetch.url", { url: input.url })
|
|
62
|
-
|
|
72
|
+
try {
|
|
73
|
+
bytes = await toBytes(input.url)
|
|
74
|
+
} catch (error) {
|
|
75
|
+
throw badRequest("URL_FETCH_FAILED", `Unable to fetch PDF from url: ${error instanceof Error ? error.message : String(error)}`)
|
|
76
|
+
}
|
|
63
77
|
try {
|
|
64
78
|
const u = new URL(input.url)
|
|
65
79
|
filename = decodeURIComponent(u.pathname.split("/").pop() || filename)
|
|
@@ -74,10 +88,13 @@ export const ingestPdfFromPayload = async (
|
|
|
74
88
|
}
|
|
75
89
|
|
|
76
90
|
if (!bytes) {
|
|
77
|
-
throw
|
|
91
|
+
throw badRequest("MISSING_FILE_INPUT", "Missing file input. Provide fileId, url or base64")
|
|
78
92
|
}
|
|
79
93
|
if (bytes.byteLength > config.service.maxPdfBytes) {
|
|
80
|
-
throw
|
|
94
|
+
throw badRequest("PDF_TOO_LARGE", `PDF exceeds max size (${config.service.maxPdfBytes} bytes)`, {
|
|
95
|
+
maxPdfBytes: config.service.maxPdfBytes,
|
|
96
|
+
sizeBytes: bytes.byteLength,
|
|
97
|
+
})
|
|
81
98
|
}
|
|
82
99
|
|
|
83
100
|
const meta = await opts.fileStore.put({
|
|
@@ -164,7 +181,7 @@ export const runPdfAgent = async (
|
|
|
164
181
|
const providerAlias = resolveProviderAlias(config, request.provider)
|
|
165
182
|
const model = resolveModelForProvider(config, providerAlias, request.model)
|
|
166
183
|
if (!model) {
|
|
167
|
-
throw
|
|
184
|
+
throw badRequest("MODEL_REQUIRED", "model is required for OCR or table extraction; set agent.defaultModel")
|
|
168
185
|
}
|
|
169
186
|
|
|
170
187
|
if (request.operation === "ocr_pages") {
|
|
@@ -216,7 +233,9 @@ export const runPdfAgent = async (
|
|
|
216
233
|
})
|
|
217
234
|
const latex = extractTabularLatex(rawLatex)
|
|
218
235
|
if (!latex) {
|
|
219
|
-
throw
|
|
236
|
+
throw unprocessable("TABLE_LATEX_MISSING", `table extraction did not return valid LaTeX tabular for page ${page}`, {
|
|
237
|
+
page,
|
|
238
|
+
})
|
|
220
239
|
}
|
|
221
240
|
tables.push({ page, latex })
|
|
222
241
|
traceStep(opts, "end", "table.page", { page, chars: latex.length })
|
package/src/pdf-config.ts
CHANGED
|
@@ -37,6 +37,9 @@ const validateConfig = (config: EchoPdfConfig): EchoPdfConfig => {
|
|
|
37
37
|
) {
|
|
38
38
|
throw new Error("service.publicBaseUrl must start with http:// or https://")
|
|
39
39
|
}
|
|
40
|
+
if (typeof config.service.fileGet?.cacheTtlSeconds === "number" && config.service.fileGet.cacheTtlSeconds < 0) {
|
|
41
|
+
throw new Error("service.fileGet.cacheTtlSeconds must be >= 0")
|
|
42
|
+
}
|
|
40
43
|
if (!Number.isFinite(config.service.storage.maxFileBytes) || config.service.storage.maxFileBytes <= 0) {
|
|
41
44
|
throw new Error("service.storage.maxFileBytes must be positive")
|
|
42
45
|
}
|
|
@@ -73,6 +76,9 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
|
|
|
73
76
|
const providerOverride = env.ECHO_PDF_DEFAULT_PROVIDER
|
|
74
77
|
const modelOverride = env.ECHO_PDF_DEFAULT_MODEL
|
|
75
78
|
const publicBaseUrlOverride = env.ECHO_PDF_PUBLIC_BASE_URL
|
|
79
|
+
const fileGetAuthHeaderOverride = env.ECHO_PDF_FILE_GET_AUTH_HEADER
|
|
80
|
+
const fileGetAuthEnvOverride = env.ECHO_PDF_FILE_GET_AUTH_ENV
|
|
81
|
+
const fileGetCacheTtlOverride = env.ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS
|
|
76
82
|
const withOverrides: EchoPdfConfig = {
|
|
77
83
|
...resolved,
|
|
78
84
|
service: {
|
|
@@ -81,6 +87,23 @@ export const loadEchoPdfConfig = (env: Env): EchoPdfConfig => {
|
|
|
81
87
|
typeof publicBaseUrlOverride === "string" && publicBaseUrlOverride.trim().length > 0
|
|
82
88
|
? publicBaseUrlOverride.trim()
|
|
83
89
|
: resolved.service.publicBaseUrl,
|
|
90
|
+
fileGet: {
|
|
91
|
+
authHeader:
|
|
92
|
+
typeof fileGetAuthHeaderOverride === "string" && fileGetAuthHeaderOverride.trim().length > 0
|
|
93
|
+
? fileGetAuthHeaderOverride.trim()
|
|
94
|
+
: resolved.service.fileGet?.authHeader,
|
|
95
|
+
authEnv:
|
|
96
|
+
typeof fileGetAuthEnvOverride === "string" && fileGetAuthEnvOverride.trim().length > 0
|
|
97
|
+
? fileGetAuthEnvOverride.trim()
|
|
98
|
+
: resolved.service.fileGet?.authEnv,
|
|
99
|
+
cacheTtlSeconds: (() => {
|
|
100
|
+
if (typeof fileGetCacheTtlOverride === "string" && fileGetCacheTtlOverride.trim().length > 0) {
|
|
101
|
+
const value = Number(fileGetCacheTtlOverride)
|
|
102
|
+
return Number.isFinite(value) && value >= 0 ? Math.floor(value) : resolved.service.fileGet?.cacheTtlSeconds
|
|
103
|
+
}
|
|
104
|
+
return resolved.service.fileGet?.cacheTtlSeconds
|
|
105
|
+
})(),
|
|
106
|
+
},
|
|
84
107
|
},
|
|
85
108
|
agent: {
|
|
86
109
|
...resolved.agent,
|
package/src/pdf-types.ts
CHANGED
|
@@ -23,6 +23,11 @@ export interface EchoPdfConfig {
|
|
|
23
23
|
readonly service: {
|
|
24
24
|
readonly name: string
|
|
25
25
|
readonly publicBaseUrl?: string
|
|
26
|
+
readonly fileGet?: {
|
|
27
|
+
readonly authHeader?: string
|
|
28
|
+
readonly authEnv?: string
|
|
29
|
+
readonly cacheTtlSeconds?: number
|
|
30
|
+
}
|
|
26
31
|
readonly maxPdfBytes: number
|
|
27
32
|
readonly maxPagesPerRequest: number
|
|
28
33
|
readonly defaultRenderScale: number
|
package/src/r2-file-store.ts
CHANGED
|
@@ -79,20 +79,7 @@ export class R2FileStore implements FileStore {
|
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
async list(): Promise<ReadonlyArray<StoredFileMeta>> {
|
|
82
|
-
|
|
83
|
-
return listed.objects.map((obj) => {
|
|
84
|
-
const meta = (obj.customMetadata ?? {}) as MetaFields
|
|
85
|
-
const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
|
|
86
|
-
const filename = meta.filename ?? toId(obj.key)
|
|
87
|
-
const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
|
|
88
|
-
return {
|
|
89
|
-
id: toId(obj.key),
|
|
90
|
-
filename,
|
|
91
|
-
mimeType,
|
|
92
|
-
sizeBytes: obj.size,
|
|
93
|
-
createdAt,
|
|
94
|
-
}
|
|
95
|
-
})
|
|
82
|
+
return await this.listAllFiles()
|
|
96
83
|
}
|
|
97
84
|
|
|
98
85
|
async delete(fileId: string): Promise<boolean> {
|
|
@@ -101,7 +88,7 @@ export class R2FileStore implements FileStore {
|
|
|
101
88
|
}
|
|
102
89
|
|
|
103
90
|
async stats(): Promise<unknown> {
|
|
104
|
-
const files = await this.
|
|
91
|
+
const files = await this.listAllFiles()
|
|
105
92
|
const totalBytes = files.reduce((sum, file) => sum + file.sizeBytes, 0)
|
|
106
93
|
return {
|
|
107
94
|
backend: "r2",
|
|
@@ -114,17 +101,24 @@ export class R2FileStore implements FileStore {
|
|
|
114
101
|
}
|
|
115
102
|
|
|
116
103
|
async cleanup(): Promise<unknown> {
|
|
117
|
-
const
|
|
118
|
-
const
|
|
119
|
-
const
|
|
120
|
-
|
|
121
|
-
|
|
104
|
+
const files = await this.listAllFiles()
|
|
105
|
+
const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
|
|
106
|
+
const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
|
|
107
|
+
if (expired.length > 0) {
|
|
108
|
+
await this.bucket.delete(expired.map((f) => toKey(f.id)))
|
|
109
|
+
}
|
|
110
|
+
const evict = this.pickEvictions(active, 0)
|
|
111
|
+
if (evict.length > 0) {
|
|
112
|
+
await this.bucket.delete(evict.map((f) => toKey(f.id)))
|
|
113
|
+
}
|
|
114
|
+
const evictIds = new Set(evict.map((f) => f.id))
|
|
115
|
+
const after = active.filter((f) => !evictIds.has(f.id))
|
|
122
116
|
const totalBytes = after.reduce((sum, file) => sum + file.sizeBytes, 0)
|
|
123
117
|
return {
|
|
124
118
|
backend: "r2",
|
|
125
119
|
policy: this.policy,
|
|
126
|
-
deletedExpired,
|
|
127
|
-
deletedEvicted,
|
|
120
|
+
deletedExpired: expired.length,
|
|
121
|
+
deletedEvicted: evict.length,
|
|
128
122
|
stats: {
|
|
129
123
|
fileCount: after.length,
|
|
130
124
|
totalBytes,
|
|
@@ -133,12 +127,19 @@ export class R2FileStore implements FileStore {
|
|
|
133
127
|
}
|
|
134
128
|
|
|
135
129
|
private async cleanupInternal(incomingBytes: number): Promise<void> {
|
|
136
|
-
const files = await this.
|
|
137
|
-
|
|
138
|
-
const
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
130
|
+
const files = await this.listAllFiles()
|
|
131
|
+
const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
|
|
132
|
+
const active = files.filter((f) => !isExpired(f.createdAt, this.policy.ttlHours))
|
|
133
|
+
if (expired.length > 0) {
|
|
134
|
+
await this.bucket.delete(expired.map((f) => toKey(f.id)))
|
|
135
|
+
}
|
|
136
|
+
const evict = this.pickEvictions(active, incomingBytes)
|
|
137
|
+
if (evict.length > 0) {
|
|
138
|
+
await this.bucket.delete(evict.map((f) => toKey(f.id)))
|
|
139
|
+
}
|
|
140
|
+
const evictIds = new Set(evict.map((f) => f.id))
|
|
141
|
+
const remaining = active.filter((f) => !evictIds.has(f.id))
|
|
142
|
+
const finalTotal = remaining.reduce((sum, file) => sum + file.sizeBytes, 0)
|
|
142
143
|
if (finalTotal + incomingBytes > this.policy.maxTotalBytes) {
|
|
143
144
|
const err = new Error(
|
|
144
145
|
`storage quota exceeded: total ${finalTotal} + incoming ${incomingBytes} > maxTotalBytes ${this.policy.maxTotalBytes}`
|
|
@@ -150,17 +151,10 @@ export class R2FileStore implements FileStore {
|
|
|
150
151
|
}
|
|
151
152
|
}
|
|
152
153
|
|
|
153
|
-
private
|
|
154
|
-
const expired = files.filter((f) => isExpired(f.createdAt, this.policy.ttlHours))
|
|
155
|
-
if (expired.length === 0) return 0
|
|
156
|
-
await this.bucket.delete(expired.map((f) => toKey(f.id)))
|
|
157
|
-
return expired.length
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
private async evictIfNeeded(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): Promise<number> {
|
|
154
|
+
private pickEvictions(files: ReadonlyArray<StoredFileMeta>, incomingBytes: number): StoredFileMeta[] {
|
|
161
155
|
const totalBytes = files.reduce((sum, f) => sum + f.sizeBytes, 0)
|
|
162
156
|
const projected = totalBytes + incomingBytes
|
|
163
|
-
if (projected <= this.policy.maxTotalBytes) return
|
|
157
|
+
if (projected <= this.policy.maxTotalBytes) return []
|
|
164
158
|
|
|
165
159
|
const needFree = projected - this.policy.maxTotalBytes
|
|
166
160
|
const candidates = [...files].sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
|
|
@@ -172,9 +166,30 @@ export class R2FileStore implements FileStore {
|
|
|
172
166
|
if (freed >= needFree) break
|
|
173
167
|
if (evict.length >= this.policy.cleanupBatchSize) break
|
|
174
168
|
}
|
|
175
|
-
|
|
176
|
-
await this.bucket.delete(evict.map((f) => toKey(f.id)))
|
|
177
|
-
return evict.length
|
|
169
|
+
return evict
|
|
178
170
|
}
|
|
179
|
-
}
|
|
180
171
|
|
|
172
|
+
private async listAllFiles(): Promise<StoredFileMeta[]> {
|
|
173
|
+
const files: StoredFileMeta[] = []
|
|
174
|
+
let cursor: string | undefined
|
|
175
|
+
while (true) {
|
|
176
|
+
const listed = await this.bucket.list({ prefix: PREFIX, limit: 1000, cursor })
|
|
177
|
+
for (const obj of listed.objects) {
|
|
178
|
+
const meta = (obj.customMetadata ?? {}) as MetaFields
|
|
179
|
+
const createdAt = parseCreatedAt(meta.createdAt, obj.uploaded)
|
|
180
|
+
const filename = meta.filename ?? toId(obj.key)
|
|
181
|
+
const mimeType = meta.mimeType ?? obj.httpMetadata?.contentType ?? "application/octet-stream"
|
|
182
|
+
files.push({
|
|
183
|
+
id: toId(obj.key),
|
|
184
|
+
filename,
|
|
185
|
+
mimeType,
|
|
186
|
+
sizeBytes: obj.size,
|
|
187
|
+
createdAt,
|
|
188
|
+
})
|
|
189
|
+
}
|
|
190
|
+
if (listed.truncated !== true || !listed.cursor) break
|
|
191
|
+
cursor = listed.cursor
|
|
192
|
+
}
|
|
193
|
+
return files
|
|
194
|
+
}
|
|
195
|
+
}
|
package/src/response-schema.ts
CHANGED
|
@@ -15,6 +15,10 @@ export interface ToolOutputEnvelope {
|
|
|
15
15
|
readonly artifacts: ToolArtifact[]
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
const MAX_TEXT_STRING = 1200
|
|
19
|
+
const MAX_TEXT_ARRAY = 40
|
|
20
|
+
const MAX_TEXT_DEPTH = 8
|
|
21
|
+
|
|
18
22
|
const asObj = (value: unknown): JsonObject =>
|
|
19
23
|
typeof value === "object" && value !== null && !Array.isArray(value)
|
|
20
24
|
? (value as JsonObject)
|
|
@@ -113,6 +117,38 @@ const summarizeData = (data: unknown): string => {
|
|
|
113
117
|
return "Tool executed successfully."
|
|
114
118
|
}
|
|
115
119
|
|
|
120
|
+
const sanitizeString = (value: string): string => {
|
|
121
|
+
if (value.startsWith("data:")) {
|
|
122
|
+
const [head] = value.split(",", 1)
|
|
123
|
+
return `${head},<omitted>`
|
|
124
|
+
}
|
|
125
|
+
if (/^[A-Za-z0-9+/=]{300,}$/.test(value)) {
|
|
126
|
+
return `<base64 omitted len=${value.length}>`
|
|
127
|
+
}
|
|
128
|
+
if (value.length > MAX_TEXT_STRING) {
|
|
129
|
+
return `${value.slice(0, MAX_TEXT_STRING)}...(truncated ${value.length - MAX_TEXT_STRING} chars)`
|
|
130
|
+
}
|
|
131
|
+
return value
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const sanitizeForText = (value: unknown, depth = 0): unknown => {
|
|
135
|
+
if (depth >= MAX_TEXT_DEPTH) return "<max-depth>"
|
|
136
|
+
if (typeof value === "string") return sanitizeString(value)
|
|
137
|
+
if (typeof value !== "object" || value === null) return value
|
|
138
|
+
if (Array.isArray(value)) {
|
|
139
|
+
const items = value.slice(0, MAX_TEXT_ARRAY).map((item) => sanitizeForText(item, depth + 1))
|
|
140
|
+
if (value.length > MAX_TEXT_ARRAY) {
|
|
141
|
+
items.push(`<truncated ${value.length - MAX_TEXT_ARRAY} items>`)
|
|
142
|
+
}
|
|
143
|
+
return items
|
|
144
|
+
}
|
|
145
|
+
const out: Record<string, unknown> = {}
|
|
146
|
+
for (const [key, nested] of Object.entries(value)) {
|
|
147
|
+
out[key] = sanitizeForText(nested, depth + 1)
|
|
148
|
+
}
|
|
149
|
+
return out
|
|
150
|
+
}
|
|
151
|
+
|
|
116
152
|
export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<string, unknown>> => {
|
|
117
153
|
const lines: string[] = [summarizeData(envelope.data)]
|
|
118
154
|
if (envelope.artifacts.length > 0) {
|
|
@@ -130,7 +166,7 @@ export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<stri
|
|
|
130
166
|
}
|
|
131
167
|
}
|
|
132
168
|
lines.push("")
|
|
133
|
-
lines.push(JSON.stringify(envelope, null, 2))
|
|
169
|
+
lines.push(JSON.stringify(sanitizeForText(envelope), null, 2))
|
|
134
170
|
|
|
135
171
|
const content: Array<Record<string, unknown>> = [{ type: "text", text: lines.join("\n") }]
|
|
136
172
|
for (const artifact of envelope.artifacts) {
|