@echofiles/echo-pdf 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -0
- package/bin/echo-pdf.js +9 -164
- package/bin/lib/http.js +72 -0
- package/bin/lib/mcp-stdio.js +99 -0
- package/dist/agent-defaults.d.ts +3 -0
- package/dist/agent-defaults.js +18 -0
- package/dist/auth.d.ts +18 -0
- package/dist/auth.js +24 -0
- package/dist/core/index.d.ts +50 -0
- package/dist/core/index.js +7 -0
- package/dist/file-ops.d.ts +11 -0
- package/dist/file-ops.js +36 -0
- package/dist/file-store-do.d.ts +36 -0
- package/dist/file-store-do.js +298 -0
- package/dist/file-utils.d.ts +6 -0
- package/dist/file-utils.js +36 -0
- package/dist/http-error.d.ts +9 -0
- package/dist/http-error.js +14 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/mcp-server.d.ts +3 -0
- package/dist/mcp-server.js +127 -0
- package/dist/pdf-agent.d.ts +18 -0
- package/dist/pdf-agent.js +217 -0
- package/dist/pdf-config.d.ts +4 -0
- package/dist/pdf-config.js +130 -0
- package/dist/pdf-storage.d.ts +8 -0
- package/dist/pdf-storage.js +86 -0
- package/dist/pdf-types.d.ts +79 -0
- package/dist/pdf-types.js +1 -0
- package/dist/pdfium-engine.d.ts +9 -0
- package/dist/pdfium-engine.js +180 -0
- package/dist/provider-client.d.ts +12 -0
- package/dist/provider-client.js +134 -0
- package/dist/provider-keys.d.ts +10 -0
- package/dist/provider-keys.js +27 -0
- package/dist/r2-file-store.d.ts +20 -0
- package/dist/r2-file-store.js +176 -0
- package/dist/response-schema.d.ts +15 -0
- package/dist/response-schema.js +159 -0
- package/dist/tool-registry.d.ts +16 -0
- package/dist/tool-registry.js +175 -0
- package/dist/types.d.ts +91 -0
- package/dist/types.js +1 -0
- package/dist/worker.d.ts +7 -0
- package/dist/worker.js +366 -0
- package/package.json +22 -4
- package/wrangler.toml +1 -1
- package/src/agent-defaults.ts +0 -25
- package/src/file-ops.ts +0 -50
- package/src/file-store-do.ts +0 -349
- package/src/file-utils.ts +0 -43
- package/src/http-error.ts +0 -21
- package/src/index.ts +0 -400
- package/src/mcp-server.ts +0 -158
- package/src/pdf-agent.ts +0 -252
- package/src/pdf-config.ts +0 -143
- package/src/pdf-storage.ts +0 -109
- package/src/pdf-types.ts +0 -85
- package/src/pdfium-engine.ts +0 -207
- package/src/provider-client.ts +0 -176
- package/src/provider-keys.ts +0 -44
- package/src/r2-file-store.ts +0 -195
- package/src/response-schema.ts +0 -182
- package/src/tool-registry.ts +0 -203
- package/src/types.ts +0 -40
- package/src/wasm.d.ts +0 -4
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults.js";
|
|
2
|
+
import { fromBase64, normalizeReturnMode, toDataUrl } from "./file-utils.js";
|
|
3
|
+
import { badRequest, notFound, unprocessable } from "./http-error.js";
|
|
4
|
+
import { extractPdfPageText, getPdfPageCount, renderPdfPageToPng, toBytes } from "./pdfium-engine.js";
|
|
5
|
+
import { visionRecognize } from "./provider-client.js";
|
|
6
|
+
const traceStep = (opts, phase, name, payload, level) => {
|
|
7
|
+
if (!opts.trace)
|
|
8
|
+
return;
|
|
9
|
+
opts.trace({ kind: "step", phase, name, payload, level });
|
|
10
|
+
};
|
|
11
|
+
const ensurePages = (pages, pageCount, maxPages) => {
|
|
12
|
+
if (pages.length === 0)
|
|
13
|
+
throw badRequest("PAGES_REQUIRED", "At least one page is required");
|
|
14
|
+
if (pages.length > maxPages) {
|
|
15
|
+
throw badRequest("TOO_MANY_PAGES", `Page count exceeds maxPagesPerRequest (${maxPages})`, {
|
|
16
|
+
maxPagesPerRequest: maxPages,
|
|
17
|
+
providedPages: pages.length,
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
for (const page of pages) {
|
|
21
|
+
if (!Number.isInteger(page) || page < 1 || page > pageCount) {
|
|
22
|
+
throw badRequest("PAGE_OUT_OF_RANGE", `Page ${page} out of range 1..${pageCount}`, {
|
|
23
|
+
page,
|
|
24
|
+
min: 1,
|
|
25
|
+
max: pageCount,
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return [...new Set(pages)].sort((a, b) => a - b);
|
|
30
|
+
};
|
|
31
|
+
export const ingestPdfFromPayload = async (config, input, opts) => {
|
|
32
|
+
if (input.fileId) {
|
|
33
|
+
const existing = await opts.fileStore.get(input.fileId);
|
|
34
|
+
if (!existing) {
|
|
35
|
+
throw notFound("FILE_NOT_FOUND", `File not found: ${input.fileId}`, { fileId: input.fileId });
|
|
36
|
+
}
|
|
37
|
+
return {
|
|
38
|
+
id: existing.id,
|
|
39
|
+
filename: existing.filename,
|
|
40
|
+
bytes: existing.bytes,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
let bytes = null;
|
|
44
|
+
let filename = input.filename ?? "document.pdf";
|
|
45
|
+
if (input.url) {
|
|
46
|
+
traceStep(opts, "start", "file.fetch.url", { url: input.url });
|
|
47
|
+
try {
|
|
48
|
+
bytes = await toBytes(input.url);
|
|
49
|
+
}
|
|
50
|
+
catch (error) {
|
|
51
|
+
throw badRequest("URL_FETCH_FAILED", `Unable to fetch PDF from url: ${error instanceof Error ? error.message : String(error)}`);
|
|
52
|
+
}
|
|
53
|
+
try {
|
|
54
|
+
const u = new URL(input.url);
|
|
55
|
+
filename = decodeURIComponent(u.pathname.split("/").pop() || filename);
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
// ignore URL parse failure
|
|
59
|
+
}
|
|
60
|
+
traceStep(opts, "end", "file.fetch.url", { sizeBytes: bytes.byteLength });
|
|
61
|
+
}
|
|
62
|
+
else if (input.base64) {
|
|
63
|
+
traceStep(opts, "start", "file.decode.base64");
|
|
64
|
+
bytes = fromBase64(input.base64);
|
|
65
|
+
traceStep(opts, "end", "file.decode.base64", { sizeBytes: bytes.byteLength });
|
|
66
|
+
}
|
|
67
|
+
if (!bytes) {
|
|
68
|
+
throw badRequest("MISSING_FILE_INPUT", "Missing file input. Provide fileId, url or base64");
|
|
69
|
+
}
|
|
70
|
+
if (bytes.byteLength > config.service.maxPdfBytes) {
|
|
71
|
+
throw badRequest("PDF_TOO_LARGE", `PDF exceeds max size (${config.service.maxPdfBytes} bytes)`, {
|
|
72
|
+
maxPdfBytes: config.service.maxPdfBytes,
|
|
73
|
+
sizeBytes: bytes.byteLength,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
const meta = await opts.fileStore.put({
|
|
77
|
+
filename,
|
|
78
|
+
mimeType: "application/pdf",
|
|
79
|
+
bytes,
|
|
80
|
+
});
|
|
81
|
+
traceStep(opts, "end", "file.stored", { fileId: meta.id, sizeBytes: meta.sizeBytes });
|
|
82
|
+
return {
|
|
83
|
+
id: meta.id,
|
|
84
|
+
filename: meta.filename,
|
|
85
|
+
bytes,
|
|
86
|
+
};
|
|
87
|
+
};
|
|
88
|
+
const resolveReturnMode = (value) => normalizeReturnMode(value);
|
|
89
|
+
const stripCodeFences = (value) => {
|
|
90
|
+
const text = value.trim();
|
|
91
|
+
const fenced = text.match(/^```[a-zA-Z0-9_-]*\n([\s\S]*?)\n```$/);
|
|
92
|
+
return typeof fenced?.[1] === "string" ? fenced[1].trim() : text;
|
|
93
|
+
};
|
|
94
|
+
const extractTabularLatex = (value) => {
|
|
95
|
+
const text = stripCodeFences(value);
|
|
96
|
+
const blocks = text.match(/\\begin\{tabular\}[\s\S]*?\\end\{tabular\}/g);
|
|
97
|
+
if (!blocks || blocks.length === 0)
|
|
98
|
+
return "";
|
|
99
|
+
return blocks.map((b) => b.trim()).join("\n\n");
|
|
100
|
+
};
|
|
101
|
+
export const runPdfAgent = async (config, env, request, opts) => {
|
|
102
|
+
traceStep(opts, "start", "pdf.operation", { operation: request.operation });
|
|
103
|
+
const file = await ingestPdfFromPayload(config, request, opts);
|
|
104
|
+
const pageCount = await getPdfPageCount(config, file.bytes);
|
|
105
|
+
traceStep(opts, "log", "pdf.meta", { fileId: file.id, pageCount });
|
|
106
|
+
const pages = ensurePages(request.pages, pageCount, config.service.maxPagesPerRequest);
|
|
107
|
+
const scale = request.renderScale ?? config.service.defaultRenderScale;
|
|
108
|
+
const returnMode = resolveReturnMode(request.returnMode);
|
|
109
|
+
if (request.operation === "extract_pages") {
|
|
110
|
+
const images = [];
|
|
111
|
+
for (const page of pages) {
|
|
112
|
+
traceStep(opts, "start", "render.page", { page });
|
|
113
|
+
const rendered = await renderPdfPageToPng(config, file.bytes, page - 1, scale);
|
|
114
|
+
if (returnMode === "file_id") {
|
|
115
|
+
const stored = await opts.fileStore.put({
|
|
116
|
+
filename: `${file.filename}-p${page}.png`,
|
|
117
|
+
mimeType: "image/png",
|
|
118
|
+
bytes: rendered.png,
|
|
119
|
+
});
|
|
120
|
+
images.push({ page, mimeType: "image/png", fileId: stored.id });
|
|
121
|
+
}
|
|
122
|
+
else if (returnMode === "url") {
|
|
123
|
+
const stored = await opts.fileStore.put({
|
|
124
|
+
filename: `${file.filename}-p${page}.png`,
|
|
125
|
+
mimeType: "image/png",
|
|
126
|
+
bytes: rendered.png,
|
|
127
|
+
});
|
|
128
|
+
images.push({
|
|
129
|
+
page,
|
|
130
|
+
mimeType: "image/png",
|
|
131
|
+
fileId: stored.id,
|
|
132
|
+
url: `/api/files/get?fileId=${encodeURIComponent(stored.id)}`,
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
images.push({
|
|
137
|
+
page,
|
|
138
|
+
mimeType: "image/png",
|
|
139
|
+
data: toDataUrl(rendered.png, "image/png"),
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
traceStep(opts, "end", "render.page", { page, width: rendered.width, height: rendered.height });
|
|
143
|
+
}
|
|
144
|
+
const result = { fileId: file.id, pageCount, returnMode, images };
|
|
145
|
+
traceStep(opts, "end", "pdf.operation", { operation: request.operation });
|
|
146
|
+
return result;
|
|
147
|
+
}
|
|
148
|
+
const providerAlias = resolveProviderAlias(config, request.provider);
|
|
149
|
+
const model = resolveModelForProvider(config, providerAlias, request.model);
|
|
150
|
+
if (!model) {
|
|
151
|
+
throw badRequest("MODEL_REQUIRED", "model is required for OCR or table extraction; set agent.defaultModel");
|
|
152
|
+
}
|
|
153
|
+
if (request.operation === "ocr_pages") {
|
|
154
|
+
const results = [];
|
|
155
|
+
for (const page of pages) {
|
|
156
|
+
traceStep(opts, "start", "ocr.page", { page });
|
|
157
|
+
const rendered = await renderPdfPageToPng(config, file.bytes, page - 1, scale);
|
|
158
|
+
const imageDataUrl = toDataUrl(rendered.png, "image/png");
|
|
159
|
+
const fallbackText = await extractPdfPageText(config, file.bytes, page - 1);
|
|
160
|
+
const prompt = request.prompt?.trim() || config.agent.ocrPrompt;
|
|
161
|
+
const llmText = await visionRecognize({
|
|
162
|
+
config,
|
|
163
|
+
env,
|
|
164
|
+
providerAlias,
|
|
165
|
+
model,
|
|
166
|
+
prompt,
|
|
167
|
+
imageDataUrl,
|
|
168
|
+
runtimeApiKeys: request.providerApiKeys,
|
|
169
|
+
});
|
|
170
|
+
const text = stripCodeFences(llmText || fallbackText || "");
|
|
171
|
+
results.push({ page, text });
|
|
172
|
+
traceStep(opts, "end", "ocr.page", { page, chars: text.length });
|
|
173
|
+
}
|
|
174
|
+
const result = {
|
|
175
|
+
fileId: file.id,
|
|
176
|
+
pageCount,
|
|
177
|
+
provider: providerAlias,
|
|
178
|
+
model,
|
|
179
|
+
pages: results,
|
|
180
|
+
};
|
|
181
|
+
traceStep(opts, "end", "pdf.operation", { operation: request.operation });
|
|
182
|
+
return result;
|
|
183
|
+
}
|
|
184
|
+
const tables = [];
|
|
185
|
+
for (const page of pages) {
|
|
186
|
+
traceStep(opts, "start", "table.page", { page });
|
|
187
|
+
const rendered = await renderPdfPageToPng(config, file.bytes, page - 1, scale);
|
|
188
|
+
const imageDataUrl = toDataUrl(rendered.png, "image/png");
|
|
189
|
+
const prompt = request.prompt?.trim() || config.agent.tablePrompt;
|
|
190
|
+
const rawLatex = await visionRecognize({
|
|
191
|
+
config,
|
|
192
|
+
env,
|
|
193
|
+
providerAlias,
|
|
194
|
+
model,
|
|
195
|
+
prompt,
|
|
196
|
+
imageDataUrl,
|
|
197
|
+
runtimeApiKeys: request.providerApiKeys,
|
|
198
|
+
});
|
|
199
|
+
const latex = extractTabularLatex(rawLatex);
|
|
200
|
+
if (!latex) {
|
|
201
|
+
throw unprocessable("TABLE_LATEX_MISSING", `table extraction did not return valid LaTeX tabular for page ${page}`, {
|
|
202
|
+
page,
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
tables.push({ page, latex });
|
|
206
|
+
traceStep(opts, "end", "table.page", { page, chars: latex.length });
|
|
207
|
+
}
|
|
208
|
+
const result = {
|
|
209
|
+
fileId: file.id,
|
|
210
|
+
pageCount,
|
|
211
|
+
provider: providerAlias,
|
|
212
|
+
model,
|
|
213
|
+
pages: tables,
|
|
214
|
+
};
|
|
215
|
+
traceStep(opts, "end", "pdf.operation", { operation: request.operation });
|
|
216
|
+
return result;
|
|
217
|
+
};
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import rawConfig from "../echo-pdf.config.json" with { type: "json" };
|
|
2
|
+
const ENV_PATTERN = /\$\{([A-Z0-9_]+)\}/g;
|
|
3
|
+
const isObject = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
|
|
4
|
+
const interpolateEnv = (input, env) => input.replace(ENV_PATTERN, (_, name) => {
|
|
5
|
+
const value = env[name];
|
|
6
|
+
return typeof value === "string" ? value : `\${${name}}`;
|
|
7
|
+
});
|
|
8
|
+
const resolveEnvRefs = (value, env) => {
|
|
9
|
+
if (typeof value === "string")
|
|
10
|
+
return interpolateEnv(value, env);
|
|
11
|
+
if (Array.isArray(value))
|
|
12
|
+
return value.map((item) => resolveEnvRefs(item, env));
|
|
13
|
+
if (isObject(value)) {
|
|
14
|
+
const out = {};
|
|
15
|
+
for (const [key, nested] of Object.entries(value)) {
|
|
16
|
+
out[key] = resolveEnvRefs(nested, env);
|
|
17
|
+
}
|
|
18
|
+
return out;
|
|
19
|
+
}
|
|
20
|
+
return value;
|
|
21
|
+
};
|
|
22
|
+
const validateConfig = (config) => {
|
|
23
|
+
if (!config.service?.name)
|
|
24
|
+
throw new Error("service.name is required");
|
|
25
|
+
if (!config.pdfium?.wasmUrl)
|
|
26
|
+
throw new Error("pdfium.wasmUrl is required");
|
|
27
|
+
if (!config.service?.storage)
|
|
28
|
+
throw new Error("service.storage is required");
|
|
29
|
+
if (typeof config.service.publicBaseUrl === "string" &&
|
|
30
|
+
config.service.publicBaseUrl.length > 0 &&
|
|
31
|
+
!/^https?:\/\//.test(config.service.publicBaseUrl)) {
|
|
32
|
+
throw new Error("service.publicBaseUrl must start with http:// or https://");
|
|
33
|
+
}
|
|
34
|
+
if (typeof config.service.fileGet?.cacheTtlSeconds === "number" && config.service.fileGet.cacheTtlSeconds < 0) {
|
|
35
|
+
throw new Error("service.fileGet.cacheTtlSeconds must be >= 0");
|
|
36
|
+
}
|
|
37
|
+
if (!Number.isFinite(config.service.storage.maxFileBytes) || config.service.storage.maxFileBytes <= 0) {
|
|
38
|
+
throw new Error("service.storage.maxFileBytes must be positive");
|
|
39
|
+
}
|
|
40
|
+
if (config.service.storage.maxFileBytes < config.service.maxPdfBytes) {
|
|
41
|
+
throw new Error("service.storage.maxFileBytes must be >= service.maxPdfBytes");
|
|
42
|
+
}
|
|
43
|
+
if (!Number.isFinite(config.service.storage.maxTotalBytes) || config.service.storage.maxTotalBytes <= 0) {
|
|
44
|
+
throw new Error("service.storage.maxTotalBytes must be positive");
|
|
45
|
+
}
|
|
46
|
+
if (config.service.storage.maxTotalBytes < config.service.storage.maxFileBytes) {
|
|
47
|
+
throw new Error("service.storage.maxTotalBytes must be >= maxFileBytes");
|
|
48
|
+
}
|
|
49
|
+
if (!Number.isFinite(config.service.storage.ttlHours) || config.service.storage.ttlHours <= 0) {
|
|
50
|
+
throw new Error("service.storage.ttlHours must be positive");
|
|
51
|
+
}
|
|
52
|
+
if (!Number.isFinite(config.service.storage.cleanupBatchSize) || config.service.storage.cleanupBatchSize <= 0) {
|
|
53
|
+
throw new Error("service.storage.cleanupBatchSize must be positive");
|
|
54
|
+
}
|
|
55
|
+
if (!config.agent?.defaultProvider)
|
|
56
|
+
throw new Error("agent.defaultProvider is required");
|
|
57
|
+
if (!config.providers?.[config.agent.defaultProvider]) {
|
|
58
|
+
throw new Error(`default provider "${config.agent.defaultProvider}" missing`);
|
|
59
|
+
}
|
|
60
|
+
if (typeof config.agent.defaultModel !== "string") {
|
|
61
|
+
throw new Error("agent.defaultModel must be a string");
|
|
62
|
+
}
|
|
63
|
+
return config;
|
|
64
|
+
};
|
|
65
|
+
export const loadEchoPdfConfig = (env) => {
|
|
66
|
+
const fromEnv = env.ECHO_PDF_CONFIG_JSON?.trim();
|
|
67
|
+
const configJson = fromEnv ? JSON.parse(fromEnv) : rawConfig;
|
|
68
|
+
const resolved = resolveEnvRefs(configJson, env);
|
|
69
|
+
const providerOverride = env.ECHO_PDF_DEFAULT_PROVIDER;
|
|
70
|
+
const modelOverride = env.ECHO_PDF_DEFAULT_MODEL;
|
|
71
|
+
const publicBaseUrlOverride = env.ECHO_PDF_PUBLIC_BASE_URL;
|
|
72
|
+
const fileGetAuthHeaderOverride = env.ECHO_PDF_FILE_GET_AUTH_HEADER;
|
|
73
|
+
const fileGetAuthEnvOverride = env.ECHO_PDF_FILE_GET_AUTH_ENV;
|
|
74
|
+
const fileGetCacheTtlOverride = env.ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS;
|
|
75
|
+
const withOverrides = {
|
|
76
|
+
...resolved,
|
|
77
|
+
service: {
|
|
78
|
+
...resolved.service,
|
|
79
|
+
publicBaseUrl: typeof publicBaseUrlOverride === "string" && publicBaseUrlOverride.trim().length > 0
|
|
80
|
+
? publicBaseUrlOverride.trim()
|
|
81
|
+
: resolved.service.publicBaseUrl,
|
|
82
|
+
fileGet: {
|
|
83
|
+
authHeader: typeof fileGetAuthHeaderOverride === "string" && fileGetAuthHeaderOverride.trim().length > 0
|
|
84
|
+
? fileGetAuthHeaderOverride.trim()
|
|
85
|
+
: resolved.service.fileGet?.authHeader,
|
|
86
|
+
authEnv: typeof fileGetAuthEnvOverride === "string" && fileGetAuthEnvOverride.trim().length > 0
|
|
87
|
+
? fileGetAuthEnvOverride.trim()
|
|
88
|
+
: resolved.service.fileGet?.authEnv,
|
|
89
|
+
cacheTtlSeconds: (() => {
|
|
90
|
+
if (typeof fileGetCacheTtlOverride === "string" && fileGetCacheTtlOverride.trim().length > 0) {
|
|
91
|
+
const value = Number(fileGetCacheTtlOverride);
|
|
92
|
+
return Number.isFinite(value) && value >= 0 ? Math.floor(value) : resolved.service.fileGet?.cacheTtlSeconds;
|
|
93
|
+
}
|
|
94
|
+
return resolved.service.fileGet?.cacheTtlSeconds;
|
|
95
|
+
})(),
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
agent: {
|
|
99
|
+
...resolved.agent,
|
|
100
|
+
defaultProvider: typeof providerOverride === "string" && providerOverride.trim().length > 0
|
|
101
|
+
? providerOverride.trim()
|
|
102
|
+
: resolved.agent.defaultProvider,
|
|
103
|
+
defaultModel: typeof modelOverride === "string" && modelOverride.trim().length > 0
|
|
104
|
+
? modelOverride.trim()
|
|
105
|
+
: resolved.agent.defaultModel,
|
|
106
|
+
},
|
|
107
|
+
};
|
|
108
|
+
return validateConfig(withOverrides);
|
|
109
|
+
};
|
|
110
|
+
export const readRequiredEnv = (env, key) => {
|
|
111
|
+
const read = (name) => {
|
|
112
|
+
const value = env[name];
|
|
113
|
+
return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
|
|
114
|
+
};
|
|
115
|
+
const direct = read(key);
|
|
116
|
+
if (direct)
|
|
117
|
+
return direct;
|
|
118
|
+
// Backward compatibility: allow *_KEY and *_API_KEY aliases.
|
|
119
|
+
if (key.endsWith("_API_KEY")) {
|
|
120
|
+
const alt = read(key.replace(/_API_KEY$/, "_KEY"));
|
|
121
|
+
if (alt)
|
|
122
|
+
return alt;
|
|
123
|
+
}
|
|
124
|
+
if (key.endsWith("_KEY")) {
|
|
125
|
+
const alt = read(key.replace(/_KEY$/, "_API_KEY"));
|
|
126
|
+
if (alt)
|
|
127
|
+
return alt;
|
|
128
|
+
}
|
|
129
|
+
throw new Error(`Missing required env var "${key}"`);
|
|
130
|
+
};
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { EchoPdfConfig } from "./pdf-types.js";
|
|
2
|
+
import type { Env, FileStore } from "./types.js";
|
|
3
|
+
export interface RuntimeFileStoreBundle {
|
|
4
|
+
readonly store: FileStore;
|
|
5
|
+
stats: () => Promise<unknown>;
|
|
6
|
+
cleanup: () => Promise<unknown>;
|
|
7
|
+
}
|
|
8
|
+
export declare const getRuntimeFileStore: (env: Env, config: EchoPdfConfig) => RuntimeFileStoreBundle;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { DurableObjectFileStore } from "./file-store-do.js";
|
|
2
|
+
import { R2FileStore } from "./r2-file-store.js";
|
|
3
|
+
class InMemoryFileStore {
|
|
4
|
+
store = new Map();
|
|
5
|
+
async put(input) {
|
|
6
|
+
const id = crypto.randomUUID();
|
|
7
|
+
const record = {
|
|
8
|
+
id,
|
|
9
|
+
filename: input.filename,
|
|
10
|
+
mimeType: input.mimeType,
|
|
11
|
+
sizeBytes: input.bytes.byteLength,
|
|
12
|
+
createdAt: new Date().toISOString(),
|
|
13
|
+
bytes: input.bytes,
|
|
14
|
+
};
|
|
15
|
+
this.store.set(id, record);
|
|
16
|
+
return this.toMeta(record);
|
|
17
|
+
}
|
|
18
|
+
async get(fileId) {
|
|
19
|
+
return this.store.get(fileId) ?? null;
|
|
20
|
+
}
|
|
21
|
+
async list() {
|
|
22
|
+
return [...this.store.values()].map((record) => this.toMeta(record));
|
|
23
|
+
}
|
|
24
|
+
async delete(fileId) {
|
|
25
|
+
return this.store.delete(fileId);
|
|
26
|
+
}
|
|
27
|
+
toMeta(record) {
|
|
28
|
+
return {
|
|
29
|
+
id: record.id,
|
|
30
|
+
filename: record.filename,
|
|
31
|
+
mimeType: record.mimeType,
|
|
32
|
+
sizeBytes: record.sizeBytes,
|
|
33
|
+
createdAt: record.createdAt,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const fallbackStore = new InMemoryFileStore();
|
|
38
|
+
const DO_SAFE_MAX_FILE_BYTES = 1_200_000;
|
|
39
|
+
export const getRuntimeFileStore = (env, config) => {
|
|
40
|
+
if (env.FILE_STORE_BUCKET) {
|
|
41
|
+
const store = new R2FileStore(env.FILE_STORE_BUCKET, config.service.storage);
|
|
42
|
+
return {
|
|
43
|
+
store,
|
|
44
|
+
stats: async () => store.stats(),
|
|
45
|
+
cleanup: async () => store.cleanup(),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
if (env.FILE_STORE_DO) {
|
|
49
|
+
if (config.service.storage.maxFileBytes > DO_SAFE_MAX_FILE_BYTES) {
|
|
50
|
+
throw new Error(`service.storage.maxFileBytes=${config.service.storage.maxFileBytes} exceeds DO backend limit ${DO_SAFE_MAX_FILE_BYTES}; bind FILE_STORE_BUCKET (R2) or reduce maxFileBytes`);
|
|
51
|
+
}
|
|
52
|
+
const store = new DurableObjectFileStore(env.FILE_STORE_DO, config.service.storage);
|
|
53
|
+
return {
|
|
54
|
+
store,
|
|
55
|
+
stats: async () => store.stats(),
|
|
56
|
+
cleanup: async () => store.cleanup(),
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
return {
|
|
60
|
+
store: fallbackStore,
|
|
61
|
+
stats: async () => {
|
|
62
|
+
const files = await fallbackStore.list();
|
|
63
|
+
const totalBytes = files.reduce((sum, file) => sum + file.sizeBytes, 0);
|
|
64
|
+
return {
|
|
65
|
+
backend: "memory",
|
|
66
|
+
policy: config.service.storage,
|
|
67
|
+
stats: {
|
|
68
|
+
fileCount: files.length,
|
|
69
|
+
totalBytes,
|
|
70
|
+
},
|
|
71
|
+
};
|
|
72
|
+
},
|
|
73
|
+
cleanup: async () => ({
|
|
74
|
+
backend: "memory",
|
|
75
|
+
deletedExpired: 0,
|
|
76
|
+
deletedEvicted: 0,
|
|
77
|
+
stats: await (async () => {
|
|
78
|
+
const files = await fallbackStore.list();
|
|
79
|
+
return {
|
|
80
|
+
fileCount: files.length,
|
|
81
|
+
totalBytes: files.reduce((sum, file) => sum + file.sizeBytes, 0),
|
|
82
|
+
};
|
|
83
|
+
})(),
|
|
84
|
+
}),
|
|
85
|
+
};
|
|
86
|
+
};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import type { ProviderType, ReturnMode } from "./types.js";
|
|
2
|
+
export interface EchoPdfProviderConfig {
|
|
3
|
+
readonly type: ProviderType;
|
|
4
|
+
readonly apiKeyEnv: string;
|
|
5
|
+
readonly baseUrl?: string;
|
|
6
|
+
readonly headers?: Record<string, string>;
|
|
7
|
+
readonly timeoutMs?: number;
|
|
8
|
+
readonly endpoints?: {
|
|
9
|
+
readonly chatCompletionsPath?: string;
|
|
10
|
+
readonly modelsPath?: string;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
export interface StoragePolicy {
|
|
14
|
+
readonly maxFileBytes: number;
|
|
15
|
+
readonly maxTotalBytes: number;
|
|
16
|
+
readonly ttlHours: number;
|
|
17
|
+
readonly cleanupBatchSize: number;
|
|
18
|
+
}
|
|
19
|
+
export interface EchoPdfConfig {
|
|
20
|
+
readonly service: {
|
|
21
|
+
readonly name: string;
|
|
22
|
+
readonly publicBaseUrl?: string;
|
|
23
|
+
readonly fileGet?: {
|
|
24
|
+
readonly authHeader?: string;
|
|
25
|
+
readonly authEnv?: string;
|
|
26
|
+
readonly cacheTtlSeconds?: number;
|
|
27
|
+
};
|
|
28
|
+
readonly maxPdfBytes: number;
|
|
29
|
+
readonly maxPagesPerRequest: number;
|
|
30
|
+
readonly defaultRenderScale: number;
|
|
31
|
+
readonly storage: StoragePolicy;
|
|
32
|
+
};
|
|
33
|
+
readonly pdfium: {
|
|
34
|
+
readonly wasmUrl: string;
|
|
35
|
+
};
|
|
36
|
+
readonly agent: {
|
|
37
|
+
readonly defaultProvider: string;
|
|
38
|
+
readonly defaultModel: string;
|
|
39
|
+
readonly ocrPrompt: string;
|
|
40
|
+
readonly tablePrompt: string;
|
|
41
|
+
};
|
|
42
|
+
readonly providers: Record<string, EchoPdfProviderConfig>;
|
|
43
|
+
readonly mcp: {
|
|
44
|
+
readonly serverName: string;
|
|
45
|
+
readonly version: string;
|
|
46
|
+
readonly authHeader?: string;
|
|
47
|
+
readonly authEnv?: string;
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
export interface AgentTraceEvent {
|
|
51
|
+
readonly kind: "step";
|
|
52
|
+
readonly phase: "start" | "end" | "log";
|
|
53
|
+
readonly name: string;
|
|
54
|
+
readonly level?: "info" | "error";
|
|
55
|
+
readonly payload?: unknown;
|
|
56
|
+
}
|
|
57
|
+
export interface PdfOperationRequest {
|
|
58
|
+
readonly operation: "extract_pages" | "ocr_pages" | "tables_to_latex";
|
|
59
|
+
readonly fileId?: string;
|
|
60
|
+
readonly url?: string;
|
|
61
|
+
readonly base64?: string;
|
|
62
|
+
readonly filename?: string;
|
|
63
|
+
readonly pages: ReadonlyArray<number>;
|
|
64
|
+
readonly renderScale?: number;
|
|
65
|
+
readonly provider?: string;
|
|
66
|
+
readonly model: string;
|
|
67
|
+
readonly providerApiKeys?: Record<string, string>;
|
|
68
|
+
readonly returnMode?: ReturnMode;
|
|
69
|
+
readonly prompt?: string;
|
|
70
|
+
}
|
|
71
|
+
export interface ToolSchema {
|
|
72
|
+
readonly name: string;
|
|
73
|
+
readonly description: string;
|
|
74
|
+
readonly inputSchema: Record<string, unknown>;
|
|
75
|
+
readonly source: {
|
|
76
|
+
readonly kind: "local";
|
|
77
|
+
readonly toolName: string;
|
|
78
|
+
};
|
|
79
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { EchoPdfConfig } from "./pdf-types.js";
|
|
2
|
+
export declare const getPdfPageCount: (config: EchoPdfConfig, bytes: Uint8Array) => Promise<number>;
|
|
3
|
+
export declare const renderPdfPageToPng: (config: EchoPdfConfig, bytes: Uint8Array, pageIndex: number, scale?: number) => Promise<{
|
|
4
|
+
width: number;
|
|
5
|
+
height: number;
|
|
6
|
+
png: Uint8Array;
|
|
7
|
+
}>;
|
|
8
|
+
export declare const extractPdfPageText: (config: EchoPdfConfig, bytes: Uint8Array, pageIndex: number) => Promise<string>;
|
|
9
|
+
export declare const toBytes: (value: string) => Promise<Uint8Array>;
|