@echofiles/echo-pdf 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +75 -0
  2. package/dist/agent-defaults.d.ts +3 -0
  3. package/dist/agent-defaults.js +18 -0
  4. package/dist/auth.d.ts +18 -0
  5. package/dist/auth.js +24 -0
  6. package/dist/core/index.d.ts +50 -0
  7. package/dist/core/index.js +7 -0
  8. package/dist/file-ops.d.ts +11 -0
  9. package/dist/file-ops.js +36 -0
  10. package/dist/file-store-do.d.ts +36 -0
  11. package/dist/file-store-do.js +298 -0
  12. package/dist/file-utils.d.ts +6 -0
  13. package/dist/file-utils.js +36 -0
  14. package/dist/http-error.d.ts +9 -0
  15. package/dist/http-error.js +14 -0
  16. package/dist/index.d.ts +1 -0
  17. package/dist/index.js +1 -0
  18. package/dist/mcp-server.d.ts +3 -0
  19. package/dist/mcp-server.js +127 -0
  20. package/dist/pdf-agent.d.ts +18 -0
  21. package/dist/pdf-agent.js +217 -0
  22. package/dist/pdf-config.d.ts +4 -0
  23. package/dist/pdf-config.js +130 -0
  24. package/dist/pdf-storage.d.ts +8 -0
  25. package/dist/pdf-storage.js +86 -0
  26. package/dist/pdf-types.d.ts +79 -0
  27. package/dist/pdf-types.js +1 -0
  28. package/dist/pdfium-engine.d.ts +9 -0
  29. package/dist/pdfium-engine.js +180 -0
  30. package/dist/provider-client.d.ts +12 -0
  31. package/dist/provider-client.js +134 -0
  32. package/dist/provider-keys.d.ts +10 -0
  33. package/dist/provider-keys.js +27 -0
  34. package/dist/r2-file-store.d.ts +20 -0
  35. package/dist/r2-file-store.js +176 -0
  36. package/dist/response-schema.d.ts +15 -0
  37. package/dist/response-schema.js +159 -0
  38. package/dist/tool-registry.d.ts +16 -0
  39. package/dist/tool-registry.js +175 -0
  40. package/dist/types.d.ts +91 -0
  41. package/dist/types.js +1 -0
  42. package/dist/worker.d.ts +7 -0
  43. package/dist/worker.js +366 -0
  44. package/package.json +22 -4
  45. package/wrangler.toml +1 -1
  46. package/src/agent-defaults.ts +0 -25
  47. package/src/file-ops.ts +0 -50
  48. package/src/file-store-do.ts +0 -349
  49. package/src/file-utils.ts +0 -43
  50. package/src/http-error.ts +0 -21
  51. package/src/index.ts +0 -415
  52. package/src/mcp-server.ts +0 -171
  53. package/src/pdf-agent.ts +0 -252
  54. package/src/pdf-config.ts +0 -143
  55. package/src/pdf-storage.ts +0 -109
  56. package/src/pdf-types.ts +0 -85
  57. package/src/pdfium-engine.ts +0 -207
  58. package/src/provider-client.ts +0 -176
  59. package/src/provider-keys.ts +0 -44
  60. package/src/r2-file-store.ts +0 -195
  61. package/src/response-schema.ts +0 -182
  62. package/src/tool-registry.ts +0 -203
  63. package/src/types.ts +0 -40
  64. package/src/wasm.d.ts +0 -4
@@ -0,0 +1,3 @@
1
+ import type { Env, FileStore } from "./types.js";
2
+ import type { EchoPdfConfig } from "./pdf-types.js";
3
+ export declare const handleMcpRequest: (request: Request, env: Env, config: EchoPdfConfig, fileStore: FileStore) => Promise<Response>;
@@ -0,0 +1,127 @@
1
+ import { checkHeaderAuth } from "./auth.js";
2
+ import { buildMcpContent, buildToolOutputEnvelope } from "./response-schema.js";
3
+ import { callTool, listToolSchemas } from "./tool-registry.js";
4
+ const ok = (id, result) => new Response(JSON.stringify({
5
+ jsonrpc: "2.0",
6
+ id: id ?? null,
7
+ result,
8
+ }), { headers: { "Content-Type": "application/json" } });
9
+ const err = (id, code, message, data) => new Response(JSON.stringify({
10
+ jsonrpc: "2.0",
11
+ id: id ?? null,
12
+ error: data ? { code, message, data } : { code, message },
13
+ }), { status: 400, headers: { "Content-Type": "application/json" } });
14
+ const asObj = (v) => typeof v === "object" && v !== null && !Array.isArray(v) ? v : {};
15
+ const resolvePublicBaseUrl = (request, configured) => typeof configured === "string" && configured.length > 0 ? configured : request.url;
16
+ const prepareMcpToolArgs = (toolName, args) => {
17
+ if (toolName === "pdf_extract_pages") {
18
+ const mode = typeof args.returnMode === "string" ? args.returnMode : "";
19
+ if (!mode) {
20
+ return { ...args, returnMode: "url" };
21
+ }
22
+ }
23
+ return args;
24
+ };
25
+ export const handleMcpRequest = async (request, env, config, fileStore) => {
26
+ const auth = checkHeaderAuth(request, env, {
27
+ authHeader: config.mcp.authHeader,
28
+ authEnv: config.mcp.authEnv,
29
+ allowMissingSecret: env.ECHO_PDF_ALLOW_MISSING_AUTH_SECRET === "1",
30
+ misconfiguredCode: "AUTH_MISCONFIGURED",
31
+ unauthorizedCode: "UNAUTHORIZED",
32
+ contextName: "MCP",
33
+ });
34
+ if (!auth.ok) {
35
+ return new Response(JSON.stringify({ error: auth.message, code: auth.code }), {
36
+ status: auth.status,
37
+ headers: { "Content-Type": "application/json" },
38
+ });
39
+ }
40
+ let body;
41
+ try {
42
+ body = (await request.json());
43
+ }
44
+ catch {
45
+ return err(null, -32700, "Parse error");
46
+ }
47
+ if (typeof body !== "object" || body === null) {
48
+ return err(null, -32600, "Invalid Request");
49
+ }
50
+ if (body.jsonrpc !== "2.0") {
51
+ return err(body.id ?? null, -32600, "Invalid Request: jsonrpc must be '2.0'");
52
+ }
53
+ const method = body.method ?? "";
54
+ const id = body.id ?? null;
55
+ if (typeof method !== "string" || method.length === 0) {
56
+ return err(id, -32600, "Invalid Request: method is required");
57
+ }
58
+ if (method.startsWith("notifications/")) {
59
+ return new Response(null, { status: 204 });
60
+ }
61
+ const params = asObj(body.params);
62
+ if (method === "initialize") {
63
+ return ok(id, {
64
+ protocolVersion: "2024-11-05",
65
+ serverInfo: {
66
+ name: config.mcp.serverName,
67
+ version: config.mcp.version,
68
+ },
69
+ capabilities: {
70
+ tools: {},
71
+ },
72
+ });
73
+ }
74
+ if (method === "tools/list") {
75
+ return ok(id, { tools: listToolSchemas().map((tool) => ({
76
+ name: tool.name,
77
+ description: tool.description,
78
+ inputSchema: tool.inputSchema,
79
+ })) });
80
+ }
81
+ if (method !== "tools/call") {
82
+ return err(id, -32601, `Unsupported method: ${method}`);
83
+ }
84
+ const toolName = typeof params.name === "string" ? params.name : "";
85
+ const args = prepareMcpToolArgs(toolName, asObj(params.arguments));
86
+ if (!toolName) {
87
+ return err(id, -32602, "Invalid params: name is required", {
88
+ code: "INVALID_PARAMS",
89
+ status: 400,
90
+ });
91
+ }
92
+ try {
93
+ const result = await callTool(toolName, args, {
94
+ config,
95
+ env,
96
+ fileStore,
97
+ });
98
+ const envelope = buildToolOutputEnvelope(result, resolvePublicBaseUrl(request, config.service.publicBaseUrl));
99
+ return ok(id, { content: buildMcpContent(envelope) });
100
+ }
101
+ catch (error) {
102
+ const message = error instanceof Error ? error.message : String(error);
103
+ const status = error?.status;
104
+ const stableStatus = typeof status === "number" && Number.isFinite(status) ? status : 500;
105
+ const code = error?.code;
106
+ const details = error?.details;
107
+ if (message.startsWith("Unknown tool:")) {
108
+ return err(id, -32601, message, {
109
+ code: typeof code === "string" ? code : "TOOL_NOT_FOUND",
110
+ status: 404,
111
+ details,
112
+ });
113
+ }
114
+ if (stableStatus >= 400 && stableStatus < 500) {
115
+ return err(id, -32602, message, {
116
+ code: typeof code === "string" ? code : "INVALID_PARAMS",
117
+ status: stableStatus,
118
+ details,
119
+ });
120
+ }
121
+ return err(id, -32000, message, {
122
+ code: typeof code === "string" ? code : "INTERNAL_ERROR",
123
+ status: stableStatus,
124
+ details,
125
+ });
126
+ }
127
+ };
@@ -0,0 +1,18 @@
1
+ import type { Env, FileStore } from "./types.js";
2
+ import type { AgentTraceEvent, EchoPdfConfig, PdfOperationRequest } from "./pdf-types.js";
3
+ interface RuntimeOptions {
4
+ readonly trace?: (event: AgentTraceEvent) => void;
5
+ readonly fileStore: FileStore;
6
+ }
7
+ export declare const ingestPdfFromPayload: (config: EchoPdfConfig, input: {
8
+ readonly fileId?: string;
9
+ readonly url?: string;
10
+ readonly base64?: string;
11
+ readonly filename?: string;
12
+ }, opts: RuntimeOptions) => Promise<{
13
+ id: string;
14
+ filename: string;
15
+ bytes: Uint8Array;
16
+ }>;
17
+ export declare const runPdfAgent: (config: EchoPdfConfig, env: Env, request: PdfOperationRequest, opts: RuntimeOptions) => Promise<unknown>;
18
+ export {};
@@ -0,0 +1,217 @@
1
+ import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults.js";
2
+ import { fromBase64, normalizeReturnMode, toDataUrl } from "./file-utils.js";
3
+ import { badRequest, notFound, unprocessable } from "./http-error.js";
4
+ import { extractPdfPageText, getPdfPageCount, renderPdfPageToPng, toBytes } from "./pdfium-engine.js";
5
+ import { visionRecognize } from "./provider-client.js";
6
+ const traceStep = (opts, phase, name, payload, level) => {
7
+ if (!opts.trace)
8
+ return;
9
+ opts.trace({ kind: "step", phase, name, payload, level });
10
+ };
11
+ const ensurePages = (pages, pageCount, maxPages) => {
12
+ if (pages.length === 0)
13
+ throw badRequest("PAGES_REQUIRED", "At least one page is required");
14
+ if (pages.length > maxPages) {
15
+ throw badRequest("TOO_MANY_PAGES", `Page count exceeds maxPagesPerRequest (${maxPages})`, {
16
+ maxPagesPerRequest: maxPages,
17
+ providedPages: pages.length,
18
+ });
19
+ }
20
+ for (const page of pages) {
21
+ if (!Number.isInteger(page) || page < 1 || page > pageCount) {
22
+ throw badRequest("PAGE_OUT_OF_RANGE", `Page ${page} out of range 1..${pageCount}`, {
23
+ page,
24
+ min: 1,
25
+ max: pageCount,
26
+ });
27
+ }
28
+ }
29
+ return [...new Set(pages)].sort((a, b) => a - b);
30
+ };
31
+ export const ingestPdfFromPayload = async (config, input, opts) => {
32
+ if (input.fileId) {
33
+ const existing = await opts.fileStore.get(input.fileId);
34
+ if (!existing) {
35
+ throw notFound("FILE_NOT_FOUND", `File not found: ${input.fileId}`, { fileId: input.fileId });
36
+ }
37
+ return {
38
+ id: existing.id,
39
+ filename: existing.filename,
40
+ bytes: existing.bytes,
41
+ };
42
+ }
43
+ let bytes = null;
44
+ let filename = input.filename ?? "document.pdf";
45
+ if (input.url) {
46
+ traceStep(opts, "start", "file.fetch.url", { url: input.url });
47
+ try {
48
+ bytes = await toBytes(input.url);
49
+ }
50
+ catch (error) {
51
+ throw badRequest("URL_FETCH_FAILED", `Unable to fetch PDF from url: ${error instanceof Error ? error.message : String(error)}`);
52
+ }
53
+ try {
54
+ const u = new URL(input.url);
55
+ filename = decodeURIComponent(u.pathname.split("/").pop() || filename);
56
+ }
57
+ catch {
58
+ // ignore URL parse failure
59
+ }
60
+ traceStep(opts, "end", "file.fetch.url", { sizeBytes: bytes.byteLength });
61
+ }
62
+ else if (input.base64) {
63
+ traceStep(opts, "start", "file.decode.base64");
64
+ bytes = fromBase64(input.base64);
65
+ traceStep(opts, "end", "file.decode.base64", { sizeBytes: bytes.byteLength });
66
+ }
67
+ if (!bytes) {
68
+ throw badRequest("MISSING_FILE_INPUT", "Missing file input. Provide fileId, url or base64");
69
+ }
70
+ if (bytes.byteLength > config.service.maxPdfBytes) {
71
+ throw badRequest("PDF_TOO_LARGE", `PDF exceeds max size (${config.service.maxPdfBytes} bytes)`, {
72
+ maxPdfBytes: config.service.maxPdfBytes,
73
+ sizeBytes: bytes.byteLength,
74
+ });
75
+ }
76
+ const meta = await opts.fileStore.put({
77
+ filename,
78
+ mimeType: "application/pdf",
79
+ bytes,
80
+ });
81
+ traceStep(opts, "end", "file.stored", { fileId: meta.id, sizeBytes: meta.sizeBytes });
82
+ return {
83
+ id: meta.id,
84
+ filename: meta.filename,
85
+ bytes,
86
+ };
87
+ };
88
+ const resolveReturnMode = (value) => normalizeReturnMode(value);
89
+ const stripCodeFences = (value) => {
90
+ const text = value.trim();
91
+ const fenced = text.match(/^```[a-zA-Z0-9_-]*\n([\s\S]*?)\n```$/);
92
+ return typeof fenced?.[1] === "string" ? fenced[1].trim() : text;
93
+ };
94
+ const extractTabularLatex = (value) => {
95
+ const text = stripCodeFences(value);
96
+ const blocks = text.match(/\\begin\{tabular\}[\s\S]*?\\end\{tabular\}/g);
97
+ if (!blocks || blocks.length === 0)
98
+ return "";
99
+ return blocks.map((b) => b.trim()).join("\n\n");
100
+ };
101
+ export const runPdfAgent = async (config, env, request, opts) => {
102
+ traceStep(opts, "start", "pdf.operation", { operation: request.operation });
103
+ const file = await ingestPdfFromPayload(config, request, opts);
104
+ const pageCount = await getPdfPageCount(config, file.bytes);
105
+ traceStep(opts, "log", "pdf.meta", { fileId: file.id, pageCount });
106
+ const pages = ensurePages(request.pages, pageCount, config.service.maxPagesPerRequest);
107
+ const scale = request.renderScale ?? config.service.defaultRenderScale;
108
+ const returnMode = resolveReturnMode(request.returnMode);
109
+ if (request.operation === "extract_pages") {
110
+ const images = [];
111
+ for (const page of pages) {
112
+ traceStep(opts, "start", "render.page", { page });
113
+ const rendered = await renderPdfPageToPng(config, file.bytes, page - 1, scale);
114
+ if (returnMode === "file_id") {
115
+ const stored = await opts.fileStore.put({
116
+ filename: `${file.filename}-p${page}.png`,
117
+ mimeType: "image/png",
118
+ bytes: rendered.png,
119
+ });
120
+ images.push({ page, mimeType: "image/png", fileId: stored.id });
121
+ }
122
+ else if (returnMode === "url") {
123
+ const stored = await opts.fileStore.put({
124
+ filename: `${file.filename}-p${page}.png`,
125
+ mimeType: "image/png",
126
+ bytes: rendered.png,
127
+ });
128
+ images.push({
129
+ page,
130
+ mimeType: "image/png",
131
+ fileId: stored.id,
132
+ url: `/api/files/get?fileId=${encodeURIComponent(stored.id)}`,
133
+ });
134
+ }
135
+ else {
136
+ images.push({
137
+ page,
138
+ mimeType: "image/png",
139
+ data: toDataUrl(rendered.png, "image/png"),
140
+ });
141
+ }
142
+ traceStep(opts, "end", "render.page", { page, width: rendered.width, height: rendered.height });
143
+ }
144
+ const result = { fileId: file.id, pageCount, returnMode, images };
145
+ traceStep(opts, "end", "pdf.operation", { operation: request.operation });
146
+ return result;
147
+ }
148
+ const providerAlias = resolveProviderAlias(config, request.provider);
149
+ const model = resolveModelForProvider(config, providerAlias, request.model);
150
+ if (!model) {
151
+ throw badRequest("MODEL_REQUIRED", "model is required for OCR or table extraction; set agent.defaultModel");
152
+ }
153
+ if (request.operation === "ocr_pages") {
154
+ const results = [];
155
+ for (const page of pages) {
156
+ traceStep(opts, "start", "ocr.page", { page });
157
+ const rendered = await renderPdfPageToPng(config, file.bytes, page - 1, scale);
158
+ const imageDataUrl = toDataUrl(rendered.png, "image/png");
159
+ const fallbackText = await extractPdfPageText(config, file.bytes, page - 1);
160
+ const prompt = request.prompt?.trim() || config.agent.ocrPrompt;
161
+ const llmText = await visionRecognize({
162
+ config,
163
+ env,
164
+ providerAlias,
165
+ model,
166
+ prompt,
167
+ imageDataUrl,
168
+ runtimeApiKeys: request.providerApiKeys,
169
+ });
170
+ const text = stripCodeFences(llmText || fallbackText || "");
171
+ results.push({ page, text });
172
+ traceStep(opts, "end", "ocr.page", { page, chars: text.length });
173
+ }
174
+ const result = {
175
+ fileId: file.id,
176
+ pageCount,
177
+ provider: providerAlias,
178
+ model,
179
+ pages: results,
180
+ };
181
+ traceStep(opts, "end", "pdf.operation", { operation: request.operation });
182
+ return result;
183
+ }
184
+ const tables = [];
185
+ for (const page of pages) {
186
+ traceStep(opts, "start", "table.page", { page });
187
+ const rendered = await renderPdfPageToPng(config, file.bytes, page - 1, scale);
188
+ const imageDataUrl = toDataUrl(rendered.png, "image/png");
189
+ const prompt = request.prompt?.trim() || config.agent.tablePrompt;
190
+ const rawLatex = await visionRecognize({
191
+ config,
192
+ env,
193
+ providerAlias,
194
+ model,
195
+ prompt,
196
+ imageDataUrl,
197
+ runtimeApiKeys: request.providerApiKeys,
198
+ });
199
+ const latex = extractTabularLatex(rawLatex);
200
+ if (!latex) {
201
+ throw unprocessable("TABLE_LATEX_MISSING", `table extraction did not return valid LaTeX tabular for page ${page}`, {
202
+ page,
203
+ });
204
+ }
205
+ tables.push({ page, latex });
206
+ traceStep(opts, "end", "table.page", { page, chars: latex.length });
207
+ }
208
+ const result = {
209
+ fileId: file.id,
210
+ pageCount,
211
+ provider: providerAlias,
212
+ model,
213
+ pages: tables,
214
+ };
215
+ traceStep(opts, "end", "pdf.operation", { operation: request.operation });
216
+ return result;
217
+ };
@@ -0,0 +1,4 @@
1
+ import type { Env } from "./types.js";
2
+ import type { EchoPdfConfig } from "./pdf-types.js";
3
+ export declare const loadEchoPdfConfig: (env: Env) => EchoPdfConfig;
4
+ export declare const readRequiredEnv: (env: Env, key: string) => string;
@@ -0,0 +1,130 @@
1
+ import rawConfig from "../echo-pdf.config.json" with { type: "json" };
2
+ const ENV_PATTERN = /\$\{([A-Z0-9_]+)\}/g;
3
+ const isObject = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
4
+ const interpolateEnv = (input, env) => input.replace(ENV_PATTERN, (_, name) => {
5
+ const value = env[name];
6
+ return typeof value === "string" ? value : `\${${name}}`;
7
+ });
8
+ const resolveEnvRefs = (value, env) => {
9
+ if (typeof value === "string")
10
+ return interpolateEnv(value, env);
11
+ if (Array.isArray(value))
12
+ return value.map((item) => resolveEnvRefs(item, env));
13
+ if (isObject(value)) {
14
+ const out = {};
15
+ for (const [key, nested] of Object.entries(value)) {
16
+ out[key] = resolveEnvRefs(nested, env);
17
+ }
18
+ return out;
19
+ }
20
+ return value;
21
+ };
22
+ const validateConfig = (config) => {
23
+ if (!config.service?.name)
24
+ throw new Error("service.name is required");
25
+ if (!config.pdfium?.wasmUrl)
26
+ throw new Error("pdfium.wasmUrl is required");
27
+ if (!config.service?.storage)
28
+ throw new Error("service.storage is required");
29
+ if (typeof config.service.publicBaseUrl === "string" &&
30
+ config.service.publicBaseUrl.length > 0 &&
31
+ !/^https?:\/\//.test(config.service.publicBaseUrl)) {
32
+ throw new Error("service.publicBaseUrl must start with http:// or https://");
33
+ }
34
+ if (typeof config.service.fileGet?.cacheTtlSeconds === "number" && config.service.fileGet.cacheTtlSeconds < 0) {
35
+ throw new Error("service.fileGet.cacheTtlSeconds must be >= 0");
36
+ }
37
+ if (!Number.isFinite(config.service.storage.maxFileBytes) || config.service.storage.maxFileBytes <= 0) {
38
+ throw new Error("service.storage.maxFileBytes must be positive");
39
+ }
40
+ if (config.service.storage.maxFileBytes < config.service.maxPdfBytes) {
41
+ throw new Error("service.storage.maxFileBytes must be >= service.maxPdfBytes");
42
+ }
43
+ if (!Number.isFinite(config.service.storage.maxTotalBytes) || config.service.storage.maxTotalBytes <= 0) {
44
+ throw new Error("service.storage.maxTotalBytes must be positive");
45
+ }
46
+ if (config.service.storage.maxTotalBytes < config.service.storage.maxFileBytes) {
47
+ throw new Error("service.storage.maxTotalBytes must be >= maxFileBytes");
48
+ }
49
+ if (!Number.isFinite(config.service.storage.ttlHours) || config.service.storage.ttlHours <= 0) {
50
+ throw new Error("service.storage.ttlHours must be positive");
51
+ }
52
+ if (!Number.isFinite(config.service.storage.cleanupBatchSize) || config.service.storage.cleanupBatchSize <= 0) {
53
+ throw new Error("service.storage.cleanupBatchSize must be positive");
54
+ }
55
+ if (!config.agent?.defaultProvider)
56
+ throw new Error("agent.defaultProvider is required");
57
+ if (!config.providers?.[config.agent.defaultProvider]) {
58
+ throw new Error(`default provider "${config.agent.defaultProvider}" missing`);
59
+ }
60
+ if (typeof config.agent.defaultModel !== "string") {
61
+ throw new Error("agent.defaultModel must be a string");
62
+ }
63
+ return config;
64
+ };
65
+ export const loadEchoPdfConfig = (env) => {
66
+ const fromEnv = env.ECHO_PDF_CONFIG_JSON?.trim();
67
+ const configJson = fromEnv ? JSON.parse(fromEnv) : rawConfig;
68
+ const resolved = resolveEnvRefs(configJson, env);
69
+ const providerOverride = env.ECHO_PDF_DEFAULT_PROVIDER;
70
+ const modelOverride = env.ECHO_PDF_DEFAULT_MODEL;
71
+ const publicBaseUrlOverride = env.ECHO_PDF_PUBLIC_BASE_URL;
72
+ const fileGetAuthHeaderOverride = env.ECHO_PDF_FILE_GET_AUTH_HEADER;
73
+ const fileGetAuthEnvOverride = env.ECHO_PDF_FILE_GET_AUTH_ENV;
74
+ const fileGetCacheTtlOverride = env.ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS;
75
+ const withOverrides = {
76
+ ...resolved,
77
+ service: {
78
+ ...resolved.service,
79
+ publicBaseUrl: typeof publicBaseUrlOverride === "string" && publicBaseUrlOverride.trim().length > 0
80
+ ? publicBaseUrlOverride.trim()
81
+ : resolved.service.publicBaseUrl,
82
+ fileGet: {
83
+ authHeader: typeof fileGetAuthHeaderOverride === "string" && fileGetAuthHeaderOverride.trim().length > 0
84
+ ? fileGetAuthHeaderOverride.trim()
85
+ : resolved.service.fileGet?.authHeader,
86
+ authEnv: typeof fileGetAuthEnvOverride === "string" && fileGetAuthEnvOverride.trim().length > 0
87
+ ? fileGetAuthEnvOverride.trim()
88
+ : resolved.service.fileGet?.authEnv,
89
+ cacheTtlSeconds: (() => {
90
+ if (typeof fileGetCacheTtlOverride === "string" && fileGetCacheTtlOverride.trim().length > 0) {
91
+ const value = Number(fileGetCacheTtlOverride);
92
+ return Number.isFinite(value) && value >= 0 ? Math.floor(value) : resolved.service.fileGet?.cacheTtlSeconds;
93
+ }
94
+ return resolved.service.fileGet?.cacheTtlSeconds;
95
+ })(),
96
+ },
97
+ },
98
+ agent: {
99
+ ...resolved.agent,
100
+ defaultProvider: typeof providerOverride === "string" && providerOverride.trim().length > 0
101
+ ? providerOverride.trim()
102
+ : resolved.agent.defaultProvider,
103
+ defaultModel: typeof modelOverride === "string" && modelOverride.trim().length > 0
104
+ ? modelOverride.trim()
105
+ : resolved.agent.defaultModel,
106
+ },
107
+ };
108
+ return validateConfig(withOverrides);
109
+ };
110
+ export const readRequiredEnv = (env, key) => {
111
+ const read = (name) => {
112
+ const value = env[name];
113
+ return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
114
+ };
115
+ const direct = read(key);
116
+ if (direct)
117
+ return direct;
118
+ // Backward compatibility: allow *_KEY and *_API_KEY aliases.
119
+ if (key.endsWith("_API_KEY")) {
120
+ const alt = read(key.replace(/_API_KEY$/, "_KEY"));
121
+ if (alt)
122
+ return alt;
123
+ }
124
+ if (key.endsWith("_KEY")) {
125
+ const alt = read(key.replace(/_KEY$/, "_API_KEY"));
126
+ if (alt)
127
+ return alt;
128
+ }
129
+ throw new Error(`Missing required env var "${key}"`);
130
+ };
@@ -0,0 +1,8 @@
1
+ import type { EchoPdfConfig } from "./pdf-types.js";
2
+ import type { Env, FileStore } from "./types.js";
3
+ export interface RuntimeFileStoreBundle {
4
+ readonly store: FileStore;
5
+ stats: () => Promise<unknown>;
6
+ cleanup: () => Promise<unknown>;
7
+ }
8
+ export declare const getRuntimeFileStore: (env: Env, config: EchoPdfConfig) => RuntimeFileStoreBundle;
@@ -0,0 +1,86 @@
1
+ import { DurableObjectFileStore } from "./file-store-do.js";
2
+ import { R2FileStore } from "./r2-file-store.js";
3
+ class InMemoryFileStore {
4
+ store = new Map();
5
+ async put(input) {
6
+ const id = crypto.randomUUID();
7
+ const record = {
8
+ id,
9
+ filename: input.filename,
10
+ mimeType: input.mimeType,
11
+ sizeBytes: input.bytes.byteLength,
12
+ createdAt: new Date().toISOString(),
13
+ bytes: input.bytes,
14
+ };
15
+ this.store.set(id, record);
16
+ return this.toMeta(record);
17
+ }
18
+ async get(fileId) {
19
+ return this.store.get(fileId) ?? null;
20
+ }
21
+ async list() {
22
+ return [...this.store.values()].map((record) => this.toMeta(record));
23
+ }
24
+ async delete(fileId) {
25
+ return this.store.delete(fileId);
26
+ }
27
+ toMeta(record) {
28
+ return {
29
+ id: record.id,
30
+ filename: record.filename,
31
+ mimeType: record.mimeType,
32
+ sizeBytes: record.sizeBytes,
33
+ createdAt: record.createdAt,
34
+ };
35
+ }
36
+ }
37
+ const fallbackStore = new InMemoryFileStore();
38
+ const DO_SAFE_MAX_FILE_BYTES = 1_200_000;
39
+ export const getRuntimeFileStore = (env, config) => {
40
+ if (env.FILE_STORE_BUCKET) {
41
+ const store = new R2FileStore(env.FILE_STORE_BUCKET, config.service.storage);
42
+ return {
43
+ store,
44
+ stats: async () => store.stats(),
45
+ cleanup: async () => store.cleanup(),
46
+ };
47
+ }
48
+ if (env.FILE_STORE_DO) {
49
+ if (config.service.storage.maxFileBytes > DO_SAFE_MAX_FILE_BYTES) {
50
+ throw new Error(`service.storage.maxFileBytes=${config.service.storage.maxFileBytes} exceeds DO backend limit ${DO_SAFE_MAX_FILE_BYTES}; bind FILE_STORE_BUCKET (R2) or reduce maxFileBytes`);
51
+ }
52
+ const store = new DurableObjectFileStore(env.FILE_STORE_DO, config.service.storage);
53
+ return {
54
+ store,
55
+ stats: async () => store.stats(),
56
+ cleanup: async () => store.cleanup(),
57
+ };
58
+ }
59
+ return {
60
+ store: fallbackStore,
61
+ stats: async () => {
62
+ const files = await fallbackStore.list();
63
+ const totalBytes = files.reduce((sum, file) => sum + file.sizeBytes, 0);
64
+ return {
65
+ backend: "memory",
66
+ policy: config.service.storage,
67
+ stats: {
68
+ fileCount: files.length,
69
+ totalBytes,
70
+ },
71
+ };
72
+ },
73
+ cleanup: async () => ({
74
+ backend: "memory",
75
+ deletedExpired: 0,
76
+ deletedEvicted: 0,
77
+ stats: await (async () => {
78
+ const files = await fallbackStore.list();
79
+ return {
80
+ fileCount: files.length,
81
+ totalBytes: files.reduce((sum, file) => sum + file.sizeBytes, 0),
82
+ };
83
+ })(),
84
+ }),
85
+ };
86
+ };
@@ -0,0 +1,79 @@
1
+ import type { ProviderType, ReturnMode } from "./types.js";
2
+ export interface EchoPdfProviderConfig {
3
+ readonly type: ProviderType;
4
+ readonly apiKeyEnv: string;
5
+ readonly baseUrl?: string;
6
+ readonly headers?: Record<string, string>;
7
+ readonly timeoutMs?: number;
8
+ readonly endpoints?: {
9
+ readonly chatCompletionsPath?: string;
10
+ readonly modelsPath?: string;
11
+ };
12
+ }
13
+ export interface StoragePolicy {
14
+ readonly maxFileBytes: number;
15
+ readonly maxTotalBytes: number;
16
+ readonly ttlHours: number;
17
+ readonly cleanupBatchSize: number;
18
+ }
19
+ export interface EchoPdfConfig {
20
+ readonly service: {
21
+ readonly name: string;
22
+ readonly publicBaseUrl?: string;
23
+ readonly fileGet?: {
24
+ readonly authHeader?: string;
25
+ readonly authEnv?: string;
26
+ readonly cacheTtlSeconds?: number;
27
+ };
28
+ readonly maxPdfBytes: number;
29
+ readonly maxPagesPerRequest: number;
30
+ readonly defaultRenderScale: number;
31
+ readonly storage: StoragePolicy;
32
+ };
33
+ readonly pdfium: {
34
+ readonly wasmUrl: string;
35
+ };
36
+ readonly agent: {
37
+ readonly defaultProvider: string;
38
+ readonly defaultModel: string;
39
+ readonly ocrPrompt: string;
40
+ readonly tablePrompt: string;
41
+ };
42
+ readonly providers: Record<string, EchoPdfProviderConfig>;
43
+ readonly mcp: {
44
+ readonly serverName: string;
45
+ readonly version: string;
46
+ readonly authHeader?: string;
47
+ readonly authEnv?: string;
48
+ };
49
+ }
50
+ export interface AgentTraceEvent {
51
+ readonly kind: "step";
52
+ readonly phase: "start" | "end" | "log";
53
+ readonly name: string;
54
+ readonly level?: "info" | "error";
55
+ readonly payload?: unknown;
56
+ }
57
+ export interface PdfOperationRequest {
58
+ readonly operation: "extract_pages" | "ocr_pages" | "tables_to_latex";
59
+ readonly fileId?: string;
60
+ readonly url?: string;
61
+ readonly base64?: string;
62
+ readonly filename?: string;
63
+ readonly pages: ReadonlyArray<number>;
64
+ readonly renderScale?: number;
65
+ readonly provider?: string;
66
+ readonly model: string;
67
+ readonly providerApiKeys?: Record<string, string>;
68
+ readonly returnMode?: ReturnMode;
69
+ readonly prompt?: string;
70
+ }
71
+ export interface ToolSchema {
72
+ readonly name: string;
73
+ readonly description: string;
74
+ readonly inputSchema: Record<string, unknown>;
75
+ readonly source: {
76
+ readonly kind: "local";
77
+ readonly toolName: string;
78
+ };
79
+ }
@@ -0,0 +1 @@
1
+ export {};