@echofiles/echo-pdf 0.4.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +85 -562
- package/bin/echo-pdf.js +130 -525
- package/dist/file-utils.d.ts +0 -3
- package/dist/file-utils.js +0 -18
- package/dist/local/document.d.ts +10 -0
- package/dist/local/document.js +133 -0
- package/dist/local/index.d.ts +3 -135
- package/dist/local/index.js +2 -555
- package/dist/local/semantic.d.ts +2 -0
- package/dist/local/semantic.js +231 -0
- package/dist/local/shared.d.ts +50 -0
- package/dist/local/shared.js +173 -0
- package/dist/local/types.d.ts +183 -0
- package/dist/local/types.js +2 -0
- package/dist/node/pdfium-local.js +30 -6
- package/dist/pdf-config.js +2 -65
- package/dist/pdf-types.d.ts +1 -58
- package/dist/types.d.ts +1 -87
- package/echo-pdf.config.json +1 -21
- package/package.json +25 -22
- package/bin/lib/http.js +0 -97
- package/bin/lib/mcp-stdio.js +0 -99
- package/dist/auth.d.ts +0 -18
- package/dist/auth.js +0 -36
- package/dist/core/index.d.ts +0 -50
- package/dist/core/index.js +0 -7
- package/dist/file-ops.d.ts +0 -11
- package/dist/file-ops.js +0 -36
- package/dist/file-store-do.d.ts +0 -36
- package/dist/file-store-do.js +0 -298
- package/dist/http-error.d.ts +0 -9
- package/dist/http-error.js +0 -14
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/mcp-server.d.ts +0 -3
- package/dist/mcp-server.js +0 -124
- package/dist/node/semantic-local.d.ts +0 -16
- package/dist/node/semantic-local.js +0 -113
- package/dist/pdf-agent.d.ts +0 -18
- package/dist/pdf-agent.js +0 -217
- package/dist/pdf-storage.d.ts +0 -8
- package/dist/pdf-storage.js +0 -86
- package/dist/pdfium-engine.d.ts +0 -9
- package/dist/pdfium-engine.js +0 -180
- package/dist/r2-file-store.d.ts +0 -20
- package/dist/r2-file-store.js +0 -176
- package/dist/response-schema.d.ts +0 -15
- package/dist/response-schema.js +0 -159
- package/dist/tool-registry.d.ts +0 -16
- package/dist/tool-registry.js +0 -175
- package/dist/worker.d.ts +0 -7
- package/dist/worker.js +0 -386
- package/scripts/export-fixtures.sh +0 -204
- package/wrangler.toml +0 -19
package/dist/worker.js
DELETED
|
@@ -1,386 +0,0 @@
|
|
|
1
|
-
import { normalizeReturnMode } from "./file-utils.js";
|
|
2
|
-
import { FileStoreDO } from "./file-store-do.js";
|
|
3
|
-
import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults.js";
|
|
4
|
-
import { checkHeaderAuth } from "./auth.js";
|
|
5
|
-
import { handleMcpRequest } from "./mcp-server.js";
|
|
6
|
-
import { loadEchoPdfConfig } from "./pdf-config.js";
|
|
7
|
-
import { getRuntimeFileStore } from "./pdf-storage.js";
|
|
8
|
-
import { listProviderModels } from "./provider-client.js";
|
|
9
|
-
import { buildToolOutputEnvelope } from "./response-schema.js";
|
|
10
|
-
import { callTool, listToolSchemas } from "./tool-registry.js";
|
|
11
|
-
const json = (data, status = 200) => new Response(JSON.stringify(data), {
|
|
12
|
-
status,
|
|
13
|
-
headers: {
|
|
14
|
-
"Content-Type": "application/json; charset=utf-8",
|
|
15
|
-
"Cache-Control": "no-store",
|
|
16
|
-
},
|
|
17
|
-
});
|
|
18
|
-
const toError = (error) => error instanceof Error ? error.message : String(error);
|
|
19
|
-
const errorStatus = (error) => {
|
|
20
|
-
const status = error?.status;
|
|
21
|
-
return typeof status === "number" && Number.isFinite(status) ? status : null;
|
|
22
|
-
};
|
|
23
|
-
const errorCode = (error) => {
|
|
24
|
-
const code = error?.code;
|
|
25
|
-
return typeof code === "string" && code.length > 0 ? code : null;
|
|
26
|
-
};
|
|
27
|
-
const errorDetails = (error) => error?.details;
|
|
28
|
-
const jsonError = (error, fallbackStatus = 500) => {
|
|
29
|
-
const status = errorStatus(error) ?? fallbackStatus;
|
|
30
|
-
const code = errorCode(error);
|
|
31
|
-
const details = errorDetails(error);
|
|
32
|
-
return json({ error: toError(error), code, details }, status);
|
|
33
|
-
};
|
|
34
|
-
const readJson = async (request) => {
|
|
35
|
-
try {
|
|
36
|
-
const body = await request.json();
|
|
37
|
-
if (typeof body === "object" && body !== null && !Array.isArray(body)) {
|
|
38
|
-
return body;
|
|
39
|
-
}
|
|
40
|
-
return {};
|
|
41
|
-
}
|
|
42
|
-
catch {
|
|
43
|
-
return {};
|
|
44
|
-
}
|
|
45
|
-
};
|
|
46
|
-
const asObj = (value) => typeof value === "object" && value !== null && !Array.isArray(value)
|
|
47
|
-
? value
|
|
48
|
-
: {};
|
|
49
|
-
const resolvePublicBaseUrl = (request, configured) => typeof configured === "string" && configured.length > 0 ? configured : request.url;
|
|
50
|
-
const sanitizeDownloadFilename = (filename) => {
|
|
51
|
-
const cleaned = filename
|
|
52
|
-
.replace(/[\r\n"]/g, "")
|
|
53
|
-
.replace(/[^\x20-\x7E]+/g, "")
|
|
54
|
-
.trim();
|
|
55
|
-
return cleaned.length > 0 ? cleaned : "download.bin";
|
|
56
|
-
};
|
|
57
|
-
const sseResponse = (stream) => new Response(stream, {
|
|
58
|
-
headers: {
|
|
59
|
-
"Content-Type": "text/event-stream; charset=utf-8",
|
|
60
|
-
"Cache-Control": "no-store",
|
|
61
|
-
Connection: "keep-alive",
|
|
62
|
-
},
|
|
63
|
-
});
|
|
64
|
-
const encodeSse = (event, data) => {
|
|
65
|
-
const encoder = new TextEncoder();
|
|
66
|
-
return encoder.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
|
|
67
|
-
};
|
|
68
|
-
const isValidOperation = (value) => value === "extract_pages" || value === "ocr_pages" || value === "tables_to_latex";
|
|
69
|
-
const toPdfOperation = (input, defaultProvider) => ({
|
|
70
|
-
operation: isValidOperation(input.operation) ? input.operation : "extract_pages",
|
|
71
|
-
fileId: typeof input.fileId === "string" ? input.fileId : undefined,
|
|
72
|
-
url: typeof input.url === "string" ? input.url : undefined,
|
|
73
|
-
base64: typeof input.base64 === "string" ? input.base64 : undefined,
|
|
74
|
-
filename: typeof input.filename === "string" ? input.filename : undefined,
|
|
75
|
-
pages: Array.isArray(input.pages) ? input.pages.map((v) => Number(v)) : [],
|
|
76
|
-
renderScale: typeof input.renderScale === "number" ? input.renderScale : undefined,
|
|
77
|
-
provider: typeof input.provider === "string" ? input.provider : defaultProvider,
|
|
78
|
-
model: typeof input.model === "string" ? input.model : "",
|
|
79
|
-
providerApiKeys: typeof input.providerApiKeys === "object" && input.providerApiKeys !== null
|
|
80
|
-
? input.providerApiKeys
|
|
81
|
-
: undefined,
|
|
82
|
-
returnMode: normalizeReturnMode(input.returnMode),
|
|
83
|
-
prompt: typeof input.prompt === "string" ? input.prompt : undefined,
|
|
84
|
-
});
|
|
85
|
-
const toolNameByOperation = {
|
|
86
|
-
extract_pages: "pdf_extract_pages",
|
|
87
|
-
ocr_pages: "pdf_ocr_pages",
|
|
88
|
-
tables_to_latex: "pdf_tables_to_latex",
|
|
89
|
-
};
|
|
90
|
-
const operationArgsFromRequest = (request) => {
|
|
91
|
-
const args = {
|
|
92
|
-
pages: request.pages,
|
|
93
|
-
};
|
|
94
|
-
if (request.fileId)
|
|
95
|
-
args.fileId = request.fileId;
|
|
96
|
-
if (request.url)
|
|
97
|
-
args.url = request.url;
|
|
98
|
-
if (request.base64)
|
|
99
|
-
args.base64 = request.base64;
|
|
100
|
-
if (request.filename)
|
|
101
|
-
args.filename = request.filename;
|
|
102
|
-
if (typeof request.renderScale === "number")
|
|
103
|
-
args.renderScale = request.renderScale;
|
|
104
|
-
if (request.returnMode)
|
|
105
|
-
args.returnMode = request.returnMode;
|
|
106
|
-
if (request.provider)
|
|
107
|
-
args.provider = request.provider;
|
|
108
|
-
if (request.model)
|
|
109
|
-
args.model = request.model;
|
|
110
|
-
if (request.prompt)
|
|
111
|
-
args.prompt = request.prompt;
|
|
112
|
-
return args;
|
|
113
|
-
};
|
|
114
|
-
const checkComputeAuth = (request, env, config) => checkHeaderAuth(request, env, {
|
|
115
|
-
authHeader: config.service.computeAuth?.authHeader,
|
|
116
|
-
authEnv: config.service.computeAuth?.authEnv,
|
|
117
|
-
allowMissingSecret: false,
|
|
118
|
-
misconfiguredCode: "COMPUTE_AUTH_MISCONFIGURED",
|
|
119
|
-
unauthorizedCode: "UNAUTHORIZED",
|
|
120
|
-
contextName: "compute endpoint",
|
|
121
|
-
});
|
|
122
|
-
export default {
|
|
123
|
-
async fetch(request, env, ctx) {
|
|
124
|
-
const url = new URL(request.url);
|
|
125
|
-
const config = loadEchoPdfConfig(env);
|
|
126
|
-
const runtimeStore = getRuntimeFileStore(env, config);
|
|
127
|
-
const fileStore = runtimeStore.store;
|
|
128
|
-
if (request.method === "GET" && url.pathname === "/health") {
|
|
129
|
-
return json({ ok: true, service: config.service.name, now: new Date().toISOString() });
|
|
130
|
-
}
|
|
131
|
-
if (request.method === "GET" && url.pathname === "/config") {
|
|
132
|
-
return json({
|
|
133
|
-
service: config.service,
|
|
134
|
-
agent: config.agent,
|
|
135
|
-
providers: Object.entries(config.providers).map(([alias, provider]) => ({ alias, type: provider.type })),
|
|
136
|
-
capabilities: {
|
|
137
|
-
toolCatalogEndpoint: "/tools/catalog",
|
|
138
|
-
toolCallEndpoint: "/tools/call",
|
|
139
|
-
fileOpsEndpoint: "/api/files/op",
|
|
140
|
-
fileUploadEndpoint: "/api/files/upload",
|
|
141
|
-
fileStatsEndpoint: "/api/files/stats",
|
|
142
|
-
fileCleanupEndpoint: "/api/files/cleanup",
|
|
143
|
-
supportedReturnModes: ["inline", "file_id", "url"],
|
|
144
|
-
},
|
|
145
|
-
mcp: {
|
|
146
|
-
serverName: config.mcp.serverName,
|
|
147
|
-
version: config.mcp.version,
|
|
148
|
-
authHeader: config.mcp.authHeader ?? null,
|
|
149
|
-
},
|
|
150
|
-
fileGet: {
|
|
151
|
-
authHeader: config.service.fileGet?.authHeader ?? null,
|
|
152
|
-
cacheTtlSeconds: config.service.fileGet?.cacheTtlSeconds ?? 300,
|
|
153
|
-
},
|
|
154
|
-
});
|
|
155
|
-
}
|
|
156
|
-
if (request.method === "GET" && url.pathname === "/tools/catalog") {
|
|
157
|
-
return json({ tools: listToolSchemas() });
|
|
158
|
-
}
|
|
159
|
-
if (request.method === "POST" && url.pathname === "/tools/call") {
|
|
160
|
-
const auth = checkComputeAuth(request, env, config);
|
|
161
|
-
if (!auth.ok)
|
|
162
|
-
return json({ error: auth.message, code: auth.code }, auth.status);
|
|
163
|
-
const body = await readJson(request);
|
|
164
|
-
const name = typeof body.name === "string" ? body.name : "";
|
|
165
|
-
if (!name)
|
|
166
|
-
return json({ error: "Missing required field: name" }, 400);
|
|
167
|
-
try {
|
|
168
|
-
const args = asObj(body.arguments);
|
|
169
|
-
const preferredProvider = resolveProviderAlias(config, typeof body.provider === "string" ? body.provider : undefined);
|
|
170
|
-
const preferredModel = resolveModelForProvider(config, preferredProvider, typeof body.model === "string" ? body.model : undefined);
|
|
171
|
-
if (name === "pdf_ocr_pages" || name === "pdf_tables_to_latex") {
|
|
172
|
-
if (typeof args.provider !== "string" || args.provider.length === 0) {
|
|
173
|
-
args.provider = preferredProvider;
|
|
174
|
-
}
|
|
175
|
-
if (typeof args.model !== "string" || args.model.length === 0) {
|
|
176
|
-
args.model = preferredModel;
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
const result = await callTool(name, args, {
|
|
180
|
-
config,
|
|
181
|
-
env,
|
|
182
|
-
fileStore,
|
|
183
|
-
providerApiKeys: typeof body.providerApiKeys === "object" && body.providerApiKeys !== null
|
|
184
|
-
? body.providerApiKeys
|
|
185
|
-
: undefined,
|
|
186
|
-
});
|
|
187
|
-
return json(buildToolOutputEnvelope(result, resolvePublicBaseUrl(request, config.service.publicBaseUrl)));
|
|
188
|
-
}
|
|
189
|
-
catch (error) {
|
|
190
|
-
return jsonError(error, 500);
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
if (request.method === "POST" && url.pathname === "/providers/models") {
|
|
194
|
-
const auth = checkComputeAuth(request, env, config);
|
|
195
|
-
if (!auth.ok)
|
|
196
|
-
return json({ error: auth.message, code: auth.code }, auth.status);
|
|
197
|
-
const body = await readJson(request);
|
|
198
|
-
const provider = resolveProviderAlias(config, typeof body.provider === "string" ? body.provider : undefined);
|
|
199
|
-
const runtimeKeys = typeof body.providerApiKeys === "object" && body.providerApiKeys !== null
|
|
200
|
-
? body.providerApiKeys
|
|
201
|
-
: undefined;
|
|
202
|
-
try {
|
|
203
|
-
const models = await listProviderModels(config, env, provider, runtimeKeys);
|
|
204
|
-
return json({ provider, models });
|
|
205
|
-
}
|
|
206
|
-
catch (error) {
|
|
207
|
-
return jsonError(error, 500);
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
if (request.method === "POST" && url.pathname === "/api/agent/run") {
|
|
211
|
-
const auth = checkComputeAuth(request, env, config);
|
|
212
|
-
if (!auth.ok)
|
|
213
|
-
return json({ error: auth.message, code: auth.code }, auth.status);
|
|
214
|
-
const body = await readJson(request);
|
|
215
|
-
if (Object.hasOwn(body, "operation") && !isValidOperation(body.operation)) {
|
|
216
|
-
return json({ error: "Invalid operation. Must be one of: extract_pages, ocr_pages, tables_to_latex" }, 400);
|
|
217
|
-
}
|
|
218
|
-
const requestPayload = toPdfOperation(body, config.agent.defaultProvider);
|
|
219
|
-
try {
|
|
220
|
-
const result = await callTool(toolNameByOperation[requestPayload.operation], operationArgsFromRequest(requestPayload), {
|
|
221
|
-
config,
|
|
222
|
-
env,
|
|
223
|
-
fileStore,
|
|
224
|
-
providerApiKeys: requestPayload.providerApiKeys,
|
|
225
|
-
});
|
|
226
|
-
return json(result);
|
|
227
|
-
}
|
|
228
|
-
catch (error) {
|
|
229
|
-
return jsonError(error, 500);
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
if (request.method === "POST" && url.pathname === "/api/agent/stream") {
|
|
233
|
-
const auth = checkComputeAuth(request, env, config);
|
|
234
|
-
if (!auth.ok)
|
|
235
|
-
return json({ error: auth.message, code: auth.code }, auth.status);
|
|
236
|
-
const body = await readJson(request);
|
|
237
|
-
if (Object.hasOwn(body, "operation") && !isValidOperation(body.operation)) {
|
|
238
|
-
return json({ error: "Invalid operation. Must be one of: extract_pages, ocr_pages, tables_to_latex" }, 400);
|
|
239
|
-
}
|
|
240
|
-
const requestPayload = toPdfOperation(body, config.agent.defaultProvider);
|
|
241
|
-
const stream = new TransformStream();
|
|
242
|
-
const writer = stream.writable.getWriter();
|
|
243
|
-
let queue = Promise.resolve();
|
|
244
|
-
const send = (event, data) => {
|
|
245
|
-
queue = queue.then(() => writer.write(encodeSse(event, data))).catch(() => undefined);
|
|
246
|
-
};
|
|
247
|
-
const run = async () => {
|
|
248
|
-
try {
|
|
249
|
-
send("meta", { kind: "meta", startedAt: new Date().toISOString(), streaming: true });
|
|
250
|
-
send("io", { kind: "io", direction: "input", content: requestPayload });
|
|
251
|
-
const result = await callTool(toolNameByOperation[requestPayload.operation], operationArgsFromRequest(requestPayload), {
|
|
252
|
-
config,
|
|
253
|
-
env,
|
|
254
|
-
fileStore,
|
|
255
|
-
providerApiKeys: requestPayload.providerApiKeys,
|
|
256
|
-
trace: (event) => send("step", event),
|
|
257
|
-
});
|
|
258
|
-
send("io", { kind: "io", direction: "output", content: "operation completed" });
|
|
259
|
-
send("result", { kind: "result", output: result });
|
|
260
|
-
send("done", { ok: true });
|
|
261
|
-
}
|
|
262
|
-
catch (error) {
|
|
263
|
-
send("error", { kind: "error", message: toError(error) });
|
|
264
|
-
send("done", { ok: false });
|
|
265
|
-
}
|
|
266
|
-
finally {
|
|
267
|
-
await queue;
|
|
268
|
-
await writer.close();
|
|
269
|
-
}
|
|
270
|
-
};
|
|
271
|
-
ctx.waitUntil(run());
|
|
272
|
-
return sseResponse(stream.readable);
|
|
273
|
-
}
|
|
274
|
-
if (request.method === "POST" && url.pathname === "/api/files/op") {
|
|
275
|
-
const body = await readJson(request);
|
|
276
|
-
try {
|
|
277
|
-
const result = await callTool("file_ops", asObj(body), {
|
|
278
|
-
config,
|
|
279
|
-
env,
|
|
280
|
-
fileStore,
|
|
281
|
-
});
|
|
282
|
-
return json(result);
|
|
283
|
-
}
|
|
284
|
-
catch (error) {
|
|
285
|
-
return jsonError(error, 500);
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
if (request.method === "POST" && url.pathname === "/api/files/upload") {
|
|
289
|
-
try {
|
|
290
|
-
const formData = await request.formData();
|
|
291
|
-
const file = formData.get("file");
|
|
292
|
-
if (!file || typeof file.arrayBuffer !== "function") {
|
|
293
|
-
return json({ error: "Missing file field: file" }, 400);
|
|
294
|
-
}
|
|
295
|
-
const bytes = new Uint8Array(await file.arrayBuffer());
|
|
296
|
-
const stored = await fileStore.put({
|
|
297
|
-
filename: file.name || `upload-${Date.now()}.pdf`,
|
|
298
|
-
mimeType: file.type || "application/pdf",
|
|
299
|
-
bytes,
|
|
300
|
-
});
|
|
301
|
-
return json({ file: stored }, 200);
|
|
302
|
-
}
|
|
303
|
-
catch (error) {
|
|
304
|
-
return jsonError(error, 500);
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
if (request.method === "GET" && url.pathname === "/api/files/get") {
|
|
308
|
-
const fileGetConfig = config.service.fileGet ?? {};
|
|
309
|
-
const auth = checkHeaderAuth(request, env, {
|
|
310
|
-
authHeader: fileGetConfig.authHeader,
|
|
311
|
-
authEnv: fileGetConfig.authEnv,
|
|
312
|
-
allowMissingSecret: env.ECHO_PDF_ALLOW_MISSING_AUTH_SECRET === "1",
|
|
313
|
-
misconfiguredCode: "AUTH_MISCONFIGURED",
|
|
314
|
-
unauthorizedCode: "UNAUTHORIZED",
|
|
315
|
-
contextName: "file get",
|
|
316
|
-
});
|
|
317
|
-
if (!auth.ok) {
|
|
318
|
-
return json({ error: auth.message, code: auth.code }, auth.status);
|
|
319
|
-
}
|
|
320
|
-
const fileId = url.searchParams.get("fileId") || "";
|
|
321
|
-
if (!fileId)
|
|
322
|
-
return json({ error: "Missing fileId" }, 400);
|
|
323
|
-
const file = await fileStore.get(fileId);
|
|
324
|
-
if (!file)
|
|
325
|
-
return json({ error: "File not found" }, 404);
|
|
326
|
-
const download = url.searchParams.get("download") === "1";
|
|
327
|
-
const headers = new Headers();
|
|
328
|
-
headers.set("Content-Type", file.mimeType);
|
|
329
|
-
const cacheTtl = Number(fileGetConfig.cacheTtlSeconds ?? 300);
|
|
330
|
-
const cacheControl = cacheTtl > 0
|
|
331
|
-
? `public, max-age=${Math.floor(cacheTtl)}, s-maxage=${Math.floor(cacheTtl)}`
|
|
332
|
-
: "no-store";
|
|
333
|
-
headers.set("Cache-Control", cacheControl);
|
|
334
|
-
if (download) {
|
|
335
|
-
headers.set("Content-Disposition", `attachment; filename=\"${sanitizeDownloadFilename(file.filename)}\"`);
|
|
336
|
-
}
|
|
337
|
-
return new Response(file.bytes, { status: 200, headers });
|
|
338
|
-
}
|
|
339
|
-
if (request.method === "GET" && url.pathname === "/api/files/stats") {
|
|
340
|
-
try {
|
|
341
|
-
return json(await runtimeStore.stats(), 200);
|
|
342
|
-
}
|
|
343
|
-
catch (error) {
|
|
344
|
-
return json({ error: toError(error) }, 500);
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
if (request.method === "POST" && url.pathname === "/api/files/cleanup") {
|
|
348
|
-
try {
|
|
349
|
-
return json(await runtimeStore.cleanup(), 200);
|
|
350
|
-
}
|
|
351
|
-
catch (error) {
|
|
352
|
-
return json({ error: toError(error) }, 500);
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
if (request.method === "POST" && url.pathname === "/mcp") {
|
|
356
|
-
return await handleMcpRequest(request, env, config, fileStore);
|
|
357
|
-
}
|
|
358
|
-
if (request.method === "GET" && env.ASSETS) {
|
|
359
|
-
const assetReq = url.pathname === "/"
|
|
360
|
-
? new Request(new URL("/index.html", url), request)
|
|
361
|
-
: request;
|
|
362
|
-
const asset = await env.ASSETS.fetch(assetReq);
|
|
363
|
-
if (asset.status !== 404)
|
|
364
|
-
return asset;
|
|
365
|
-
}
|
|
366
|
-
return json({
|
|
367
|
-
error: "Not found",
|
|
368
|
-
routes: {
|
|
369
|
-
health: "GET /health",
|
|
370
|
-
config: "GET /config",
|
|
371
|
-
toolsCatalog: "GET /tools/catalog",
|
|
372
|
-
toolCall: "POST /tools/call",
|
|
373
|
-
models: "POST /providers/models",
|
|
374
|
-
run: "POST /api/agent/run",
|
|
375
|
-
stream: "POST /api/agent/stream",
|
|
376
|
-
files: "POST /api/files/op",
|
|
377
|
-
fileUpload: "POST /api/files/upload",
|
|
378
|
-
fileGet: "GET /api/files/get?fileId=<id>",
|
|
379
|
-
fileStats: "GET /api/files/stats",
|
|
380
|
-
fileCleanup: "POST /api/files/cleanup",
|
|
381
|
-
mcp: "POST /mcp",
|
|
382
|
-
},
|
|
383
|
-
}, 404);
|
|
384
|
-
},
|
|
385
|
-
};
|
|
386
|
-
export { FileStoreDO };
|
|
@@ -1,204 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
set -euo pipefail
|
|
3
|
-
|
|
4
|
-
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
5
|
-
OUT_DIR="${ROOT_DIR}/fixtures/output"
|
|
6
|
-
EXPORT_PORT="${EXPORT_PORT:-8798}"
|
|
7
|
-
BASE_URL="${BASE_URL:-http://127.0.0.1:${EXPORT_PORT}}"
|
|
8
|
-
INPUT_PDF="${INPUT_PDF:-${ROOT_DIR}/fixtures/input.pdf}"
|
|
9
|
-
START_LOCAL_DEV="${START_LOCAL_DEV:-1}"
|
|
10
|
-
RUN_TABLES="${RUN_TABLES:-1}"
|
|
11
|
-
REQUIRE_LLM_SUCCESS="${REQUIRE_LLM_SUCCESS:-1}"
|
|
12
|
-
|
|
13
|
-
mkdir -p "$OUT_DIR"
|
|
14
|
-
rm -rf "${OUT_DIR:?}/"*
|
|
15
|
-
|
|
16
|
-
if [[ -f "${ROOT_DIR}/../.env.local" ]]; then
|
|
17
|
-
set -a
|
|
18
|
-
# shellcheck source=/dev/null
|
|
19
|
-
source "${ROOT_DIR}/../.env.local"
|
|
20
|
-
set +a
|
|
21
|
-
fi
|
|
22
|
-
|
|
23
|
-
if [[ ! -f "${INPUT_PDF}" ]]; then
|
|
24
|
-
echo "missing input pdf: ${INPUT_PDF}" >&2
|
|
25
|
-
exit 1
|
|
26
|
-
fi
|
|
27
|
-
|
|
28
|
-
cli() {
|
|
29
|
-
node "${ROOT_DIR}/bin/echo-pdf.js" "$@"
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
run_json() {
|
|
33
|
-
local name="$1"
|
|
34
|
-
shift
|
|
35
|
-
if "$@" > "${OUT_DIR}/${name}.json" 2> "${OUT_DIR}/${name}.err"; then
|
|
36
|
-
rm -f "${OUT_DIR}/${name}.err"
|
|
37
|
-
else
|
|
38
|
-
printf '{"ok":false,"error_file":"%s.err"}\n' "$name" > "${OUT_DIR}/${name}.json"
|
|
39
|
-
fi
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
validate_ocr_json() {
|
|
43
|
-
local json_file="$1"
|
|
44
|
-
node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
validate_tables_json() {
|
|
48
|
-
local json_file="$1"
|
|
49
|
-
node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
# 1) Save test logs locally (do not block artifact export on transient network failure)
|
|
53
|
-
set +e
|
|
54
|
-
{
|
|
55
|
-
echo "[typecheck]"
|
|
56
|
-
npm --prefix "$ROOT_DIR" run typecheck
|
|
57
|
-
TYPECHECK_CODE=$?
|
|
58
|
-
echo
|
|
59
|
-
echo "[test]"
|
|
60
|
-
npm --prefix "$ROOT_DIR" run test
|
|
61
|
-
TEST_CODE=$?
|
|
62
|
-
echo
|
|
63
|
-
echo "[smoke]"
|
|
64
|
-
npm --prefix "$ROOT_DIR" run smoke
|
|
65
|
-
SMOKE_CODE=$?
|
|
66
|
-
echo
|
|
67
|
-
echo "typecheck_exit=${TYPECHECK_CODE}"
|
|
68
|
-
echo "test_exit=${TEST_CODE}"
|
|
69
|
-
echo "smoke_exit=${SMOKE_CODE}"
|
|
70
|
-
} > "${OUT_DIR}/test.log" 2>&1
|
|
71
|
-
set -e
|
|
72
|
-
|
|
73
|
-
cat > "${OUT_DIR}/test-status.json" <<JSON
|
|
74
|
-
{"typecheck":${TYPECHECK_CODE:-1},"test":${TEST_CODE:-1},"smoke":${SMOKE_CODE:-1}}
|
|
75
|
-
JSON
|
|
76
|
-
|
|
77
|
-
DEV_PID=""
|
|
78
|
-
cleanup() {
|
|
79
|
-
if [[ -n "${DEV_PID}" ]] && kill -0 "${DEV_PID}" >/dev/null 2>&1; then
|
|
80
|
-
kill "${DEV_PID}" >/dev/null 2>&1 || true
|
|
81
|
-
wait "${DEV_PID}" 2>/dev/null || true
|
|
82
|
-
fi
|
|
83
|
-
}
|
|
84
|
-
trap cleanup EXIT
|
|
85
|
-
|
|
86
|
-
if [[ "${START_LOCAL_DEV}" == "1" ]]; then
|
|
87
|
-
npm --prefix "$ROOT_DIR" run dev -- --ip 127.0.0.1 --port "${EXPORT_PORT}" --inspector-port 0 > "${OUT_DIR}/export-local-dev.log" 2>&1 &
|
|
88
|
-
DEV_PID=$!
|
|
89
|
-
for _ in $(seq 1 120); do
|
|
90
|
-
if node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null 2>&1; then
|
|
91
|
-
break
|
|
92
|
-
fi
|
|
93
|
-
sleep 0.5
|
|
94
|
-
done
|
|
95
|
-
node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null
|
|
96
|
-
fi
|
|
97
|
-
|
|
98
|
-
# 2) Init CLI + provider settings
|
|
99
|
-
cli init --service-url "$BASE_URL" > "${OUT_DIR}/cli-init.json"
|
|
100
|
-
|
|
101
|
-
node -e 'const fs=require("fs");const cfg=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const entries=Object.entries(cfg.providers||{});const pick=(key)=>{const keys=[key];if(key.endsWith("_API_KEY"))keys.push(key.replace(/_API_KEY$/,"_KEY"));if(key.endsWith("_KEY"))keys.push(key.replace(/_KEY$/,"_API_KEY"));for(const k of keys){const v=process.env[k];if(typeof v==="string"&&v.trim())return {k,v:v.trim()};}return null;};const forced=String(process.env.SMOKE_LLM_PROVIDER||"").trim();if(forced&&cfg.providers?.[forced]){const found=pick(String(cfg.providers[forced].apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:forced,apiKey:found.v,env:found.k,forced:true}));process.exit(0);}}const preferred=String(cfg.agent?.defaultProvider||"");const ordered=entries.sort((a,b)=>a[0]===preferred?-1:b[0]===preferred?1:0);for(const [alias,p] of ordered){const found=pick(String(p.apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:alias,apiKey:found.v,env:found.k,forced:false}));process.exit(0);}}process.stdout.write(JSON.stringify({provider:preferred||"",apiKey:"",env:"",forced:false}));' "${ROOT_DIR}/echo-pdf.config.json" > "${OUT_DIR}/provider-selection.json"
|
|
102
|
-
PROVIDER="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.provider||""))' "${OUT_DIR}/provider-selection.json")"
|
|
103
|
-
PROVIDER_KEY="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.apiKey||""))' "${OUT_DIR}/provider-selection.json")"
|
|
104
|
-
PREFERRED_MODEL="${SMOKE_LLM_MODEL:-${ECHO_PDF_DEFAULT_MODEL:-}}"
|
|
105
|
-
if [[ -n "${PROVIDER}" ]] && [[ -n "${PROVIDER_KEY}" ]]; then
|
|
106
|
-
cli provider set --provider "${PROVIDER}" --api-key "${PROVIDER_KEY}" > "${OUT_DIR}/provider-set.json"
|
|
107
|
-
cli provider use --provider "${PROVIDER}" > "${OUT_DIR}/provider-use.json"
|
|
108
|
-
else
|
|
109
|
-
echo '{"warning":"No provider key found in env, LLM calls may fail"}' > "${OUT_DIR}/provider-warning.json"
|
|
110
|
-
fi
|
|
111
|
-
|
|
112
|
-
# 3) Pull models via CLI and select one
|
|
113
|
-
if [[ -n "${PROVIDER}" ]]; then
|
|
114
|
-
run_json "models" cli models --provider "${PROVIDER}"
|
|
115
|
-
else
|
|
116
|
-
echo '{"warning":"No provider selected, skip model list"}' > "${OUT_DIR}/models.json"
|
|
117
|
-
fi
|
|
118
|
-
MODEL="${PREFERRED_MODEL}"
|
|
119
|
-
if [[ -n "$MODEL" ]] && [[ -n "${PROVIDER}" ]]; then
|
|
120
|
-
if ! node -e 'const fs=require("fs");const file=process.argv[1];const model=process.argv[2];const j=JSON.parse(fs.readFileSync(file,"utf8"));const models=Array.isArray(j.models)?j.models:[];process.exit(models.includes(model)?0:1)' "${OUT_DIR}/models.json" "$MODEL"; then
|
|
121
|
-
echo "Configured model not found in provider model list: ${MODEL}" >&2
|
|
122
|
-
exit 1
|
|
123
|
-
fi
|
|
124
|
-
cli model set --provider "${PROVIDER}" --model "$MODEL" > "${OUT_DIR}/model-set.json"
|
|
125
|
-
else
|
|
126
|
-
echo '{"warning":"Missing ECHO_PDF_DEFAULT_MODEL / SMOKE_LLM_MODEL"}' > "${OUT_DIR}/model-warning.json"
|
|
127
|
-
exit 1
|
|
128
|
-
fi
|
|
129
|
-
|
|
130
|
-
# 4) Upload the exact local fixture for subsequent CLI/MCP calls
|
|
131
|
-
node -e 'const fs=require("fs"); const path=require("path"); (async()=>{ const base=process.argv[1]; const file=process.argv[2]; const bytes=fs.readFileSync(file); const fd=new FormData(); fd.set("file", new Blob([bytes], {type:"application/pdf"}), path.basename(file)); const res=await fetch(`${base}/api/files/upload`, {method:"POST", body:fd}); const txt=await res.text(); fs.writeFileSync(process.argv[3], txt); if(!res.ok){process.stderr.write(txt); process.exit(1);} })().catch((e)=>{console.error(String(e)); process.exit(1)})' "$BASE_URL" "$INPUT_PDF" "${OUT_DIR}/upload.json"
|
|
132
|
-
FILE_ID="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(j.file?.id||"")' "${OUT_DIR}/upload.json")"
|
|
133
|
-
if [[ -z "${FILE_ID}" ]]; then
|
|
134
|
-
echo "upload did not return file id" >&2
|
|
135
|
-
exit 1
|
|
136
|
-
fi
|
|
137
|
-
|
|
138
|
-
# 5) CLI tool calls
|
|
139
|
-
run_json "tools-catalog" cli tools
|
|
140
|
-
if [[ -n "${PROVIDER}" ]]; then
|
|
141
|
-
run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}" --provider "${PROVIDER}" --model "${MODEL:-}"
|
|
142
|
-
else
|
|
143
|
-
run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
|
|
144
|
-
fi
|
|
145
|
-
node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.data?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
|
|
146
|
-
|
|
147
|
-
# 6) MCP tool calls
|
|
148
|
-
run_json "mcp-initialize" cli mcp initialize
|
|
149
|
-
run_json "mcp-tools" cli mcp tools
|
|
150
|
-
run_json "mcp-call-fileops" cli mcp call --tool file_ops --args '{"op":"list"}'
|
|
151
|
-
run_json "mcp-extract-pages" cli mcp call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
|
|
152
|
-
|
|
153
|
-
# 7) LLM tool calls
|
|
154
|
-
OCR_OK=0
|
|
155
|
-
TABLES_OK=0
|
|
156
|
-
if [[ -n "${PROVIDER}" ]]; then
|
|
157
|
-
: > "${OUT_DIR}/llm-attempts.log"
|
|
158
|
-
echo "[ocr] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
|
|
159
|
-
if cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-ocr-pages.json" 2> "${OUT_DIR}/cli-ocr-pages.err"; then
|
|
160
|
-
if validate_ocr_json "${OUT_DIR}/cli-ocr-pages.json"; then
|
|
161
|
-
OCR_OK=1
|
|
162
|
-
echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/ocr-selected-model.json"
|
|
163
|
-
fi
|
|
164
|
-
fi
|
|
165
|
-
else
|
|
166
|
-
run_json "cli-ocr-pages" cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
|
|
167
|
-
fi
|
|
168
|
-
|
|
169
|
-
if [[ "${RUN_TABLES}" == "1" ]]; then
|
|
170
|
-
if [[ -n "${PROVIDER}" ]]; then
|
|
171
|
-
echo "[tables] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
|
|
172
|
-
if cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-tables-to-latex.json" 2> "${OUT_DIR}/cli-tables-to-latex.err"; then
|
|
173
|
-
if validate_tables_json "${OUT_DIR}/cli-tables-to-latex.json"; then
|
|
174
|
-
TABLES_OK=1
|
|
175
|
-
echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/tables-selected-model.json"
|
|
176
|
-
fi
|
|
177
|
-
fi
|
|
178
|
-
else
|
|
179
|
-
run_json "cli-tables-to-latex" cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
|
|
180
|
-
fi
|
|
181
|
-
else
|
|
182
|
-
echo '{"skipped":true,"reason":"Set RUN_TABLES=1 to enable table-latex call"}' > "${OUT_DIR}/cli-tables-to-latex.json"
|
|
183
|
-
fi
|
|
184
|
-
|
|
185
|
-
if [[ "${REQUIRE_LLM_SUCCESS}" == "1" ]]; then
|
|
186
|
-
if [[ "${OCR_OK}" != "1" ]]; then
|
|
187
|
-
echo "OCR failed for configured model. See ${OUT_DIR}/cli-ocr-pages.err and llm-attempts.log" >&2
|
|
188
|
-
exit 1
|
|
189
|
-
fi
|
|
190
|
-
if [[ "${RUN_TABLES}" == "1" ]] && [[ "${TABLES_OK}" != "1" ]]; then
|
|
191
|
-
echo "Tables failed for configured model. See ${OUT_DIR}/cli-tables-to-latex.err and llm-attempts.log" >&2
|
|
192
|
-
exit 1
|
|
193
|
-
fi
|
|
194
|
-
fi
|
|
195
|
-
|
|
196
|
-
cat > "${OUT_DIR}/summary.txt" <<TXT
|
|
197
|
-
base_url=${BASE_URL}
|
|
198
|
-
input_pdf=${INPUT_PDF}
|
|
199
|
-
file_id=${FILE_ID}
|
|
200
|
-
model=${MODEL}
|
|
201
|
-
outputs_dir=${OUT_DIR}
|
|
202
|
-
TXT
|
|
203
|
-
|
|
204
|
-
ls -la "$OUT_DIR"
|
package/wrangler.toml
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
name = "echo-pdf"
|
|
2
|
-
main = "src/worker.ts"
|
|
3
|
-
compatibility_date = "2026-03-06"
|
|
4
|
-
|
|
5
|
-
[assets]
|
|
6
|
-
directory = "./assets"
|
|
7
|
-
binding = "ASSETS"
|
|
8
|
-
|
|
9
|
-
[[r2_buckets]]
|
|
10
|
-
binding = "FILE_STORE_BUCKET"
|
|
11
|
-
bucket_name = "echo-pdf-files"
|
|
12
|
-
|
|
13
|
-
[[durable_objects.bindings]]
|
|
14
|
-
name = "FILE_STORE_DO"
|
|
15
|
-
class_name = "FileStoreDO"
|
|
16
|
-
|
|
17
|
-
[[migrations]]
|
|
18
|
-
tag = "v1"
|
|
19
|
-
new_sqlite_classes = ["FileStoreDO"]
|