@echofiles/echo-pdf 0.4.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +85 -562
  3. package/bin/echo-pdf.js +130 -525
  4. package/dist/file-utils.d.ts +0 -3
  5. package/dist/file-utils.js +0 -18
  6. package/dist/local/document.d.ts +10 -0
  7. package/dist/local/document.js +133 -0
  8. package/dist/local/index.d.ts +3 -135
  9. package/dist/local/index.js +2 -555
  10. package/dist/local/semantic.d.ts +2 -0
  11. package/dist/local/semantic.js +231 -0
  12. package/dist/local/shared.d.ts +50 -0
  13. package/dist/local/shared.js +173 -0
  14. package/dist/local/types.d.ts +183 -0
  15. package/dist/local/types.js +2 -0
  16. package/dist/node/pdfium-local.js +30 -6
  17. package/dist/pdf-config.js +2 -65
  18. package/dist/pdf-types.d.ts +1 -58
  19. package/dist/types.d.ts +1 -87
  20. package/echo-pdf.config.json +1 -21
  21. package/package.json +25 -22
  22. package/bin/lib/http.js +0 -97
  23. package/bin/lib/mcp-stdio.js +0 -99
  24. package/dist/auth.d.ts +0 -18
  25. package/dist/auth.js +0 -36
  26. package/dist/core/index.d.ts +0 -50
  27. package/dist/core/index.js +0 -7
  28. package/dist/file-ops.d.ts +0 -11
  29. package/dist/file-ops.js +0 -36
  30. package/dist/file-store-do.d.ts +0 -36
  31. package/dist/file-store-do.js +0 -298
  32. package/dist/http-error.d.ts +0 -9
  33. package/dist/http-error.js +0 -14
  34. package/dist/index.d.ts +0 -1
  35. package/dist/index.js +0 -1
  36. package/dist/mcp-server.d.ts +0 -3
  37. package/dist/mcp-server.js +0 -124
  38. package/dist/node/semantic-local.d.ts +0 -16
  39. package/dist/node/semantic-local.js +0 -113
  40. package/dist/pdf-agent.d.ts +0 -18
  41. package/dist/pdf-agent.js +0 -217
  42. package/dist/pdf-storage.d.ts +0 -8
  43. package/dist/pdf-storage.js +0 -86
  44. package/dist/pdfium-engine.d.ts +0 -9
  45. package/dist/pdfium-engine.js +0 -180
  46. package/dist/r2-file-store.d.ts +0 -20
  47. package/dist/r2-file-store.js +0 -176
  48. package/dist/response-schema.d.ts +0 -15
  49. package/dist/response-schema.js +0 -159
  50. package/dist/tool-registry.d.ts +0 -16
  51. package/dist/tool-registry.js +0 -175
  52. package/dist/worker.d.ts +0 -7
  53. package/dist/worker.js +0 -386
  54. package/scripts/export-fixtures.sh +0 -204
  55. package/wrangler.toml +0 -19
package/dist/worker.js DELETED
@@ -1,386 +0,0 @@
1
- import { normalizeReturnMode } from "./file-utils.js";
2
- import { FileStoreDO } from "./file-store-do.js";
3
- import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults.js";
4
- import { checkHeaderAuth } from "./auth.js";
5
- import { handleMcpRequest } from "./mcp-server.js";
6
- import { loadEchoPdfConfig } from "./pdf-config.js";
7
- import { getRuntimeFileStore } from "./pdf-storage.js";
8
- import { listProviderModels } from "./provider-client.js";
9
- import { buildToolOutputEnvelope } from "./response-schema.js";
10
- import { callTool, listToolSchemas } from "./tool-registry.js";
11
- const json = (data, status = 200) => new Response(JSON.stringify(data), {
12
- status,
13
- headers: {
14
- "Content-Type": "application/json; charset=utf-8",
15
- "Cache-Control": "no-store",
16
- },
17
- });
18
- const toError = (error) => error instanceof Error ? error.message : String(error);
19
- const errorStatus = (error) => {
20
- const status = error?.status;
21
- return typeof status === "number" && Number.isFinite(status) ? status : null;
22
- };
23
- const errorCode = (error) => {
24
- const code = error?.code;
25
- return typeof code === "string" && code.length > 0 ? code : null;
26
- };
27
- const errorDetails = (error) => error?.details;
28
- const jsonError = (error, fallbackStatus = 500) => {
29
- const status = errorStatus(error) ?? fallbackStatus;
30
- const code = errorCode(error);
31
- const details = errorDetails(error);
32
- return json({ error: toError(error), code, details }, status);
33
- };
34
- const readJson = async (request) => {
35
- try {
36
- const body = await request.json();
37
- if (typeof body === "object" && body !== null && !Array.isArray(body)) {
38
- return body;
39
- }
40
- return {};
41
- }
42
- catch {
43
- return {};
44
- }
45
- };
46
- const asObj = (value) => typeof value === "object" && value !== null && !Array.isArray(value)
47
- ? value
48
- : {};
49
- const resolvePublicBaseUrl = (request, configured) => typeof configured === "string" && configured.length > 0 ? configured : request.url;
50
- const sanitizeDownloadFilename = (filename) => {
51
- const cleaned = filename
52
- .replace(/[\r\n"]/g, "")
53
- .replace(/[^\x20-\x7E]+/g, "")
54
- .trim();
55
- return cleaned.length > 0 ? cleaned : "download.bin";
56
- };
57
- const sseResponse = (stream) => new Response(stream, {
58
- headers: {
59
- "Content-Type": "text/event-stream; charset=utf-8",
60
- "Cache-Control": "no-store",
61
- Connection: "keep-alive",
62
- },
63
- });
64
- const encodeSse = (event, data) => {
65
- const encoder = new TextEncoder();
66
- return encoder.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
67
- };
68
- const isValidOperation = (value) => value === "extract_pages" || value === "ocr_pages" || value === "tables_to_latex";
69
- const toPdfOperation = (input, defaultProvider) => ({
70
- operation: isValidOperation(input.operation) ? input.operation : "extract_pages",
71
- fileId: typeof input.fileId === "string" ? input.fileId : undefined,
72
- url: typeof input.url === "string" ? input.url : undefined,
73
- base64: typeof input.base64 === "string" ? input.base64 : undefined,
74
- filename: typeof input.filename === "string" ? input.filename : undefined,
75
- pages: Array.isArray(input.pages) ? input.pages.map((v) => Number(v)) : [],
76
- renderScale: typeof input.renderScale === "number" ? input.renderScale : undefined,
77
- provider: typeof input.provider === "string" ? input.provider : defaultProvider,
78
- model: typeof input.model === "string" ? input.model : "",
79
- providerApiKeys: typeof input.providerApiKeys === "object" && input.providerApiKeys !== null
80
- ? input.providerApiKeys
81
- : undefined,
82
- returnMode: normalizeReturnMode(input.returnMode),
83
- prompt: typeof input.prompt === "string" ? input.prompt : undefined,
84
- });
85
- const toolNameByOperation = {
86
- extract_pages: "pdf_extract_pages",
87
- ocr_pages: "pdf_ocr_pages",
88
- tables_to_latex: "pdf_tables_to_latex",
89
- };
90
- const operationArgsFromRequest = (request) => {
91
- const args = {
92
- pages: request.pages,
93
- };
94
- if (request.fileId)
95
- args.fileId = request.fileId;
96
- if (request.url)
97
- args.url = request.url;
98
- if (request.base64)
99
- args.base64 = request.base64;
100
- if (request.filename)
101
- args.filename = request.filename;
102
- if (typeof request.renderScale === "number")
103
- args.renderScale = request.renderScale;
104
- if (request.returnMode)
105
- args.returnMode = request.returnMode;
106
- if (request.provider)
107
- args.provider = request.provider;
108
- if (request.model)
109
- args.model = request.model;
110
- if (request.prompt)
111
- args.prompt = request.prompt;
112
- return args;
113
- };
114
- const checkComputeAuth = (request, env, config) => checkHeaderAuth(request, env, {
115
- authHeader: config.service.computeAuth?.authHeader,
116
- authEnv: config.service.computeAuth?.authEnv,
117
- allowMissingSecret: false,
118
- misconfiguredCode: "COMPUTE_AUTH_MISCONFIGURED",
119
- unauthorizedCode: "UNAUTHORIZED",
120
- contextName: "compute endpoint",
121
- });
122
- export default {
123
- async fetch(request, env, ctx) {
124
- const url = new URL(request.url);
125
- const config = loadEchoPdfConfig(env);
126
- const runtimeStore = getRuntimeFileStore(env, config);
127
- const fileStore = runtimeStore.store;
128
- if (request.method === "GET" && url.pathname === "/health") {
129
- return json({ ok: true, service: config.service.name, now: new Date().toISOString() });
130
- }
131
- if (request.method === "GET" && url.pathname === "/config") {
132
- return json({
133
- service: config.service,
134
- agent: config.agent,
135
- providers: Object.entries(config.providers).map(([alias, provider]) => ({ alias, type: provider.type })),
136
- capabilities: {
137
- toolCatalogEndpoint: "/tools/catalog",
138
- toolCallEndpoint: "/tools/call",
139
- fileOpsEndpoint: "/api/files/op",
140
- fileUploadEndpoint: "/api/files/upload",
141
- fileStatsEndpoint: "/api/files/stats",
142
- fileCleanupEndpoint: "/api/files/cleanup",
143
- supportedReturnModes: ["inline", "file_id", "url"],
144
- },
145
- mcp: {
146
- serverName: config.mcp.serverName,
147
- version: config.mcp.version,
148
- authHeader: config.mcp.authHeader ?? null,
149
- },
150
- fileGet: {
151
- authHeader: config.service.fileGet?.authHeader ?? null,
152
- cacheTtlSeconds: config.service.fileGet?.cacheTtlSeconds ?? 300,
153
- },
154
- });
155
- }
156
- if (request.method === "GET" && url.pathname === "/tools/catalog") {
157
- return json({ tools: listToolSchemas() });
158
- }
159
- if (request.method === "POST" && url.pathname === "/tools/call") {
160
- const auth = checkComputeAuth(request, env, config);
161
- if (!auth.ok)
162
- return json({ error: auth.message, code: auth.code }, auth.status);
163
- const body = await readJson(request);
164
- const name = typeof body.name === "string" ? body.name : "";
165
- if (!name)
166
- return json({ error: "Missing required field: name" }, 400);
167
- try {
168
- const args = asObj(body.arguments);
169
- const preferredProvider = resolveProviderAlias(config, typeof body.provider === "string" ? body.provider : undefined);
170
- const preferredModel = resolveModelForProvider(config, preferredProvider, typeof body.model === "string" ? body.model : undefined);
171
- if (name === "pdf_ocr_pages" || name === "pdf_tables_to_latex") {
172
- if (typeof args.provider !== "string" || args.provider.length === 0) {
173
- args.provider = preferredProvider;
174
- }
175
- if (typeof args.model !== "string" || args.model.length === 0) {
176
- args.model = preferredModel;
177
- }
178
- }
179
- const result = await callTool(name, args, {
180
- config,
181
- env,
182
- fileStore,
183
- providerApiKeys: typeof body.providerApiKeys === "object" && body.providerApiKeys !== null
184
- ? body.providerApiKeys
185
- : undefined,
186
- });
187
- return json(buildToolOutputEnvelope(result, resolvePublicBaseUrl(request, config.service.publicBaseUrl)));
188
- }
189
- catch (error) {
190
- return jsonError(error, 500);
191
- }
192
- }
193
- if (request.method === "POST" && url.pathname === "/providers/models") {
194
- const auth = checkComputeAuth(request, env, config);
195
- if (!auth.ok)
196
- return json({ error: auth.message, code: auth.code }, auth.status);
197
- const body = await readJson(request);
198
- const provider = resolveProviderAlias(config, typeof body.provider === "string" ? body.provider : undefined);
199
- const runtimeKeys = typeof body.providerApiKeys === "object" && body.providerApiKeys !== null
200
- ? body.providerApiKeys
201
- : undefined;
202
- try {
203
- const models = await listProviderModels(config, env, provider, runtimeKeys);
204
- return json({ provider, models });
205
- }
206
- catch (error) {
207
- return jsonError(error, 500);
208
- }
209
- }
210
- if (request.method === "POST" && url.pathname === "/api/agent/run") {
211
- const auth = checkComputeAuth(request, env, config);
212
- if (!auth.ok)
213
- return json({ error: auth.message, code: auth.code }, auth.status);
214
- const body = await readJson(request);
215
- if (Object.hasOwn(body, "operation") && !isValidOperation(body.operation)) {
216
- return json({ error: "Invalid operation. Must be one of: extract_pages, ocr_pages, tables_to_latex" }, 400);
217
- }
218
- const requestPayload = toPdfOperation(body, config.agent.defaultProvider);
219
- try {
220
- const result = await callTool(toolNameByOperation[requestPayload.operation], operationArgsFromRequest(requestPayload), {
221
- config,
222
- env,
223
- fileStore,
224
- providerApiKeys: requestPayload.providerApiKeys,
225
- });
226
- return json(result);
227
- }
228
- catch (error) {
229
- return jsonError(error, 500);
230
- }
231
- }
232
- if (request.method === "POST" && url.pathname === "/api/agent/stream") {
233
- const auth = checkComputeAuth(request, env, config);
234
- if (!auth.ok)
235
- return json({ error: auth.message, code: auth.code }, auth.status);
236
- const body = await readJson(request);
237
- if (Object.hasOwn(body, "operation") && !isValidOperation(body.operation)) {
238
- return json({ error: "Invalid operation. Must be one of: extract_pages, ocr_pages, tables_to_latex" }, 400);
239
- }
240
- const requestPayload = toPdfOperation(body, config.agent.defaultProvider);
241
- const stream = new TransformStream();
242
- const writer = stream.writable.getWriter();
243
- let queue = Promise.resolve();
244
- const send = (event, data) => {
245
- queue = queue.then(() => writer.write(encodeSse(event, data))).catch(() => undefined);
246
- };
247
- const run = async () => {
248
- try {
249
- send("meta", { kind: "meta", startedAt: new Date().toISOString(), streaming: true });
250
- send("io", { kind: "io", direction: "input", content: requestPayload });
251
- const result = await callTool(toolNameByOperation[requestPayload.operation], operationArgsFromRequest(requestPayload), {
252
- config,
253
- env,
254
- fileStore,
255
- providerApiKeys: requestPayload.providerApiKeys,
256
- trace: (event) => send("step", event),
257
- });
258
- send("io", { kind: "io", direction: "output", content: "operation completed" });
259
- send("result", { kind: "result", output: result });
260
- send("done", { ok: true });
261
- }
262
- catch (error) {
263
- send("error", { kind: "error", message: toError(error) });
264
- send("done", { ok: false });
265
- }
266
- finally {
267
- await queue;
268
- await writer.close();
269
- }
270
- };
271
- ctx.waitUntil(run());
272
- return sseResponse(stream.readable);
273
- }
274
- if (request.method === "POST" && url.pathname === "/api/files/op") {
275
- const body = await readJson(request);
276
- try {
277
- const result = await callTool("file_ops", asObj(body), {
278
- config,
279
- env,
280
- fileStore,
281
- });
282
- return json(result);
283
- }
284
- catch (error) {
285
- return jsonError(error, 500);
286
- }
287
- }
288
- if (request.method === "POST" && url.pathname === "/api/files/upload") {
289
- try {
290
- const formData = await request.formData();
291
- const file = formData.get("file");
292
- if (!file || typeof file.arrayBuffer !== "function") {
293
- return json({ error: "Missing file field: file" }, 400);
294
- }
295
- const bytes = new Uint8Array(await file.arrayBuffer());
296
- const stored = await fileStore.put({
297
- filename: file.name || `upload-${Date.now()}.pdf`,
298
- mimeType: file.type || "application/pdf",
299
- bytes,
300
- });
301
- return json({ file: stored }, 200);
302
- }
303
- catch (error) {
304
- return jsonError(error, 500);
305
- }
306
- }
307
- if (request.method === "GET" && url.pathname === "/api/files/get") {
308
- const fileGetConfig = config.service.fileGet ?? {};
309
- const auth = checkHeaderAuth(request, env, {
310
- authHeader: fileGetConfig.authHeader,
311
- authEnv: fileGetConfig.authEnv,
312
- allowMissingSecret: env.ECHO_PDF_ALLOW_MISSING_AUTH_SECRET === "1",
313
- misconfiguredCode: "AUTH_MISCONFIGURED",
314
- unauthorizedCode: "UNAUTHORIZED",
315
- contextName: "file get",
316
- });
317
- if (!auth.ok) {
318
- return json({ error: auth.message, code: auth.code }, auth.status);
319
- }
320
- const fileId = url.searchParams.get("fileId") || "";
321
- if (!fileId)
322
- return json({ error: "Missing fileId" }, 400);
323
- const file = await fileStore.get(fileId);
324
- if (!file)
325
- return json({ error: "File not found" }, 404);
326
- const download = url.searchParams.get("download") === "1";
327
- const headers = new Headers();
328
- headers.set("Content-Type", file.mimeType);
329
- const cacheTtl = Number(fileGetConfig.cacheTtlSeconds ?? 300);
330
- const cacheControl = cacheTtl > 0
331
- ? `public, max-age=${Math.floor(cacheTtl)}, s-maxage=${Math.floor(cacheTtl)}`
332
- : "no-store";
333
- headers.set("Cache-Control", cacheControl);
334
- if (download) {
335
- headers.set("Content-Disposition", `attachment; filename=\"${sanitizeDownloadFilename(file.filename)}\"`);
336
- }
337
- return new Response(file.bytes, { status: 200, headers });
338
- }
339
- if (request.method === "GET" && url.pathname === "/api/files/stats") {
340
- try {
341
- return json(await runtimeStore.stats(), 200);
342
- }
343
- catch (error) {
344
- return json({ error: toError(error) }, 500);
345
- }
346
- }
347
- if (request.method === "POST" && url.pathname === "/api/files/cleanup") {
348
- try {
349
- return json(await runtimeStore.cleanup(), 200);
350
- }
351
- catch (error) {
352
- return json({ error: toError(error) }, 500);
353
- }
354
- }
355
- if (request.method === "POST" && url.pathname === "/mcp") {
356
- return await handleMcpRequest(request, env, config, fileStore);
357
- }
358
- if (request.method === "GET" && env.ASSETS) {
359
- const assetReq = url.pathname === "/"
360
- ? new Request(new URL("/index.html", url), request)
361
- : request;
362
- const asset = await env.ASSETS.fetch(assetReq);
363
- if (asset.status !== 404)
364
- return asset;
365
- }
366
- return json({
367
- error: "Not found",
368
- routes: {
369
- health: "GET /health",
370
- config: "GET /config",
371
- toolsCatalog: "GET /tools/catalog",
372
- toolCall: "POST /tools/call",
373
- models: "POST /providers/models",
374
- run: "POST /api/agent/run",
375
- stream: "POST /api/agent/stream",
376
- files: "POST /api/files/op",
377
- fileUpload: "POST /api/files/upload",
378
- fileGet: "GET /api/files/get?fileId=<id>",
379
- fileStats: "GET /api/files/stats",
380
- fileCleanup: "POST /api/files/cleanup",
381
- mcp: "POST /mcp",
382
- },
383
- }, 404);
384
- },
385
- };
386
- export { FileStoreDO };
@@ -1,204 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
-
4
- ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
5
- OUT_DIR="${ROOT_DIR}/fixtures/output"
6
- EXPORT_PORT="${EXPORT_PORT:-8798}"
7
- BASE_URL="${BASE_URL:-http://127.0.0.1:${EXPORT_PORT}}"
8
- INPUT_PDF="${INPUT_PDF:-${ROOT_DIR}/fixtures/input.pdf}"
9
- START_LOCAL_DEV="${START_LOCAL_DEV:-1}"
10
- RUN_TABLES="${RUN_TABLES:-1}"
11
- REQUIRE_LLM_SUCCESS="${REQUIRE_LLM_SUCCESS:-1}"
12
-
13
- mkdir -p "$OUT_DIR"
14
- rm -rf "${OUT_DIR:?}/"*
15
-
16
- if [[ -f "${ROOT_DIR}/../.env.local" ]]; then
17
- set -a
18
- # shellcheck source=/dev/null
19
- source "${ROOT_DIR}/../.env.local"
20
- set +a
21
- fi
22
-
23
- if [[ ! -f "${INPUT_PDF}" ]]; then
24
- echo "missing input pdf: ${INPUT_PDF}" >&2
25
- exit 1
26
- fi
27
-
28
- cli() {
29
- node "${ROOT_DIR}/bin/echo-pdf.js" "$@"
30
- }
31
-
32
- run_json() {
33
- local name="$1"
34
- shift
35
- if "$@" > "${OUT_DIR}/${name}.json" 2> "${OUT_DIR}/${name}.err"; then
36
- rm -f "${OUT_DIR}/${name}.err"
37
- else
38
- printf '{"ok":false,"error_file":"%s.err"}\n' "$name" > "${OUT_DIR}/${name}.json"
39
- fi
40
- }
41
-
42
- validate_ocr_json() {
43
- local json_file="$1"
44
- node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
45
- }
46
-
47
- validate_tables_json() {
48
- local json_file="$1"
49
- node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
50
- }
51
-
52
- # 1) Save test logs locally (do not block artifact export on transient network failure)
53
- set +e
54
- {
55
- echo "[typecheck]"
56
- npm --prefix "$ROOT_DIR" run typecheck
57
- TYPECHECK_CODE=$?
58
- echo
59
- echo "[test]"
60
- npm --prefix "$ROOT_DIR" run test
61
- TEST_CODE=$?
62
- echo
63
- echo "[smoke]"
64
- npm --prefix "$ROOT_DIR" run smoke
65
- SMOKE_CODE=$?
66
- echo
67
- echo "typecheck_exit=${TYPECHECK_CODE}"
68
- echo "test_exit=${TEST_CODE}"
69
- echo "smoke_exit=${SMOKE_CODE}"
70
- } > "${OUT_DIR}/test.log" 2>&1
71
- set -e
72
-
73
- cat > "${OUT_DIR}/test-status.json" <<JSON
74
- {"typecheck":${TYPECHECK_CODE:-1},"test":${TEST_CODE:-1},"smoke":${SMOKE_CODE:-1}}
75
- JSON
76
-
77
- DEV_PID=""
78
- cleanup() {
79
- if [[ -n "${DEV_PID}" ]] && kill -0 "${DEV_PID}" >/dev/null 2>&1; then
80
- kill "${DEV_PID}" >/dev/null 2>&1 || true
81
- wait "${DEV_PID}" 2>/dev/null || true
82
- fi
83
- }
84
- trap cleanup EXIT
85
-
86
- if [[ "${START_LOCAL_DEV}" == "1" ]]; then
87
- npm --prefix "$ROOT_DIR" run dev -- --ip 127.0.0.1 --port "${EXPORT_PORT}" --inspector-port 0 > "${OUT_DIR}/export-local-dev.log" 2>&1 &
88
- DEV_PID=$!
89
- for _ in $(seq 1 120); do
90
- if node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null 2>&1; then
91
- break
92
- fi
93
- sleep 0.5
94
- done
95
- node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null
96
- fi
97
-
98
- # 2) Init CLI + provider settings
99
- cli init --service-url "$BASE_URL" > "${OUT_DIR}/cli-init.json"
100
-
101
- node -e 'const fs=require("fs");const cfg=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const entries=Object.entries(cfg.providers||{});const pick=(key)=>{const keys=[key];if(key.endsWith("_API_KEY"))keys.push(key.replace(/_API_KEY$/,"_KEY"));if(key.endsWith("_KEY"))keys.push(key.replace(/_KEY$/,"_API_KEY"));for(const k of keys){const v=process.env[k];if(typeof v==="string"&&v.trim())return {k,v:v.trim()};}return null;};const forced=String(process.env.SMOKE_LLM_PROVIDER||"").trim();if(forced&&cfg.providers?.[forced]){const found=pick(String(cfg.providers[forced].apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:forced,apiKey:found.v,env:found.k,forced:true}));process.exit(0);}}const preferred=String(cfg.agent?.defaultProvider||"");const ordered=entries.sort((a,b)=>a[0]===preferred?-1:b[0]===preferred?1:0);for(const [alias,p] of ordered){const found=pick(String(p.apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:alias,apiKey:found.v,env:found.k,forced:false}));process.exit(0);}}process.stdout.write(JSON.stringify({provider:preferred||"",apiKey:"",env:"",forced:false}));' "${ROOT_DIR}/echo-pdf.config.json" > "${OUT_DIR}/provider-selection.json"
102
- PROVIDER="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.provider||""))' "${OUT_DIR}/provider-selection.json")"
103
- PROVIDER_KEY="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.apiKey||""))' "${OUT_DIR}/provider-selection.json")"
104
- PREFERRED_MODEL="${SMOKE_LLM_MODEL:-${ECHO_PDF_DEFAULT_MODEL:-}}"
105
- if [[ -n "${PROVIDER}" ]] && [[ -n "${PROVIDER_KEY}" ]]; then
106
- cli provider set --provider "${PROVIDER}" --api-key "${PROVIDER_KEY}" > "${OUT_DIR}/provider-set.json"
107
- cli provider use --provider "${PROVIDER}" > "${OUT_DIR}/provider-use.json"
108
- else
109
- echo '{"warning":"No provider key found in env, LLM calls may fail"}' > "${OUT_DIR}/provider-warning.json"
110
- fi
111
-
112
- # 3) Pull models via CLI and select one
113
- if [[ -n "${PROVIDER}" ]]; then
114
- run_json "models" cli models --provider "${PROVIDER}"
115
- else
116
- echo '{"warning":"No provider selected, skip model list"}' > "${OUT_DIR}/models.json"
117
- fi
118
- MODEL="${PREFERRED_MODEL}"
119
- if [[ -n "$MODEL" ]] && [[ -n "${PROVIDER}" ]]; then
120
- if ! node -e 'const fs=require("fs");const file=process.argv[1];const model=process.argv[2];const j=JSON.parse(fs.readFileSync(file,"utf8"));const models=Array.isArray(j.models)?j.models:[];process.exit(models.includes(model)?0:1)' "${OUT_DIR}/models.json" "$MODEL"; then
121
- echo "Configured model not found in provider model list: ${MODEL}" >&2
122
- exit 1
123
- fi
124
- cli model set --provider "${PROVIDER}" --model "$MODEL" > "${OUT_DIR}/model-set.json"
125
- else
126
- echo '{"warning":"Missing ECHO_PDF_DEFAULT_MODEL / SMOKE_LLM_MODEL"}' > "${OUT_DIR}/model-warning.json"
127
- exit 1
128
- fi
129
-
130
- # 4) Upload the exact local fixture for subsequent CLI/MCP calls
131
- node -e 'const fs=require("fs"); const path=require("path"); (async()=>{ const base=process.argv[1]; const file=process.argv[2]; const bytes=fs.readFileSync(file); const fd=new FormData(); fd.set("file", new Blob([bytes], {type:"application/pdf"}), path.basename(file)); const res=await fetch(`${base}/api/files/upload`, {method:"POST", body:fd}); const txt=await res.text(); fs.writeFileSync(process.argv[3], txt); if(!res.ok){process.stderr.write(txt); process.exit(1);} })().catch((e)=>{console.error(String(e)); process.exit(1)})' "$BASE_URL" "$INPUT_PDF" "${OUT_DIR}/upload.json"
132
- FILE_ID="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(j.file?.id||"")' "${OUT_DIR}/upload.json")"
133
- if [[ -z "${FILE_ID}" ]]; then
134
- echo "upload did not return file id" >&2
135
- exit 1
136
- fi
137
-
138
- # 5) CLI tool calls
139
- run_json "tools-catalog" cli tools
140
- if [[ -n "${PROVIDER}" ]]; then
141
- run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}" --provider "${PROVIDER}" --model "${MODEL:-}"
142
- else
143
- run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
144
- fi
145
- node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.data?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
146
-
147
- # 6) MCP tool calls
148
- run_json "mcp-initialize" cli mcp initialize
149
- run_json "mcp-tools" cli mcp tools
150
- run_json "mcp-call-fileops" cli mcp call --tool file_ops --args '{"op":"list"}'
151
- run_json "mcp-extract-pages" cli mcp call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
152
-
153
- # 7) LLM tool calls
154
- OCR_OK=0
155
- TABLES_OK=0
156
- if [[ -n "${PROVIDER}" ]]; then
157
- : > "${OUT_DIR}/llm-attempts.log"
158
- echo "[ocr] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
159
- if cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-ocr-pages.json" 2> "${OUT_DIR}/cli-ocr-pages.err"; then
160
- if validate_ocr_json "${OUT_DIR}/cli-ocr-pages.json"; then
161
- OCR_OK=1
162
- echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/ocr-selected-model.json"
163
- fi
164
- fi
165
- else
166
- run_json "cli-ocr-pages" cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
167
- fi
168
-
169
- if [[ "${RUN_TABLES}" == "1" ]]; then
170
- if [[ -n "${PROVIDER}" ]]; then
171
- echo "[tables] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
172
- if cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-tables-to-latex.json" 2> "${OUT_DIR}/cli-tables-to-latex.err"; then
173
- if validate_tables_json "${OUT_DIR}/cli-tables-to-latex.json"; then
174
- TABLES_OK=1
175
- echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/tables-selected-model.json"
176
- fi
177
- fi
178
- else
179
- run_json "cli-tables-to-latex" cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
180
- fi
181
- else
182
- echo '{"skipped":true,"reason":"Set RUN_TABLES=1 to enable table-latex call"}' > "${OUT_DIR}/cli-tables-to-latex.json"
183
- fi
184
-
185
- if [[ "${REQUIRE_LLM_SUCCESS}" == "1" ]]; then
186
- if [[ "${OCR_OK}" != "1" ]]; then
187
- echo "OCR failed for configured model. See ${OUT_DIR}/cli-ocr-pages.err and llm-attempts.log" >&2
188
- exit 1
189
- fi
190
- if [[ "${RUN_TABLES}" == "1" ]] && [[ "${TABLES_OK}" != "1" ]]; then
191
- echo "Tables failed for configured model. See ${OUT_DIR}/cli-tables-to-latex.err and llm-attempts.log" >&2
192
- exit 1
193
- fi
194
- fi
195
-
196
- cat > "${OUT_DIR}/summary.txt" <<TXT
197
- base_url=${BASE_URL}
198
- input_pdf=${INPUT_PDF}
199
- file_id=${FILE_ID}
200
- model=${MODEL}
201
- outputs_dir=${OUT_DIR}
202
- TXT
203
-
204
- ls -la "$OUT_DIR"
package/wrangler.toml DELETED
@@ -1,19 +0,0 @@
1
- name = "echo-pdf"
2
- main = "src/worker.ts"
3
- compatibility_date = "2026-03-06"
4
-
5
- [assets]
6
- directory = "./assets"
7
- binding = "ASSETS"
8
-
9
- [[r2_buckets]]
10
- binding = "FILE_STORE_BUCKET"
11
- bucket_name = "echo-pdf-files"
12
-
13
- [[durable_objects.bindings]]
14
- name = "FILE_STORE_DO"
15
- class_name = "FileStoreDO"
16
-
17
- [[migrations]]
18
- tag = "v1"
19
- new_sqlite_classes = ["FileStoreDO"]