@echofiles/echo-pdf 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +302 -11
  2. package/bin/echo-pdf.js +176 -8
  3. package/bin/lib/http.js +26 -1
  4. package/dist/agent-defaults.d.ts +3 -0
  5. package/dist/agent-defaults.js +18 -0
  6. package/dist/auth.d.ts +18 -0
  7. package/dist/auth.js +36 -0
  8. package/dist/core/index.d.ts +50 -0
  9. package/dist/core/index.js +7 -0
  10. package/dist/file-ops.d.ts +11 -0
  11. package/dist/file-ops.js +36 -0
  12. package/dist/file-store-do.d.ts +36 -0
  13. package/dist/file-store-do.js +298 -0
  14. package/dist/file-utils.d.ts +6 -0
  15. package/dist/file-utils.js +36 -0
  16. package/dist/http-error.d.ts +9 -0
  17. package/dist/http-error.js +14 -0
  18. package/dist/index.d.ts +1 -0
  19. package/dist/index.js +1 -0
  20. package/dist/local/index.d.ts +135 -0
  21. package/dist/local/index.js +555 -0
  22. package/dist/mcp-server.d.ts +3 -0
  23. package/dist/mcp-server.js +124 -0
  24. package/dist/node/pdfium-local.d.ts +8 -0
  25. package/dist/node/pdfium-local.js +147 -0
  26. package/dist/node/semantic-local.d.ts +16 -0
  27. package/dist/node/semantic-local.js +113 -0
  28. package/dist/pdf-agent.d.ts +18 -0
  29. package/dist/pdf-agent.js +217 -0
  30. package/dist/pdf-config.d.ts +4 -0
  31. package/dist/pdf-config.js +140 -0
  32. package/dist/pdf-storage.d.ts +8 -0
  33. package/dist/pdf-storage.js +86 -0
  34. package/dist/pdf-types.d.ts +83 -0
  35. package/dist/pdf-types.js +1 -0
  36. package/dist/pdfium-engine.d.ts +9 -0
  37. package/dist/pdfium-engine.js +180 -0
  38. package/dist/provider-client.d.ts +20 -0
  39. package/dist/provider-client.js +173 -0
  40. package/dist/provider-keys.d.ts +10 -0
  41. package/dist/provider-keys.js +27 -0
  42. package/dist/r2-file-store.d.ts +20 -0
  43. package/dist/r2-file-store.js +176 -0
  44. package/dist/response-schema.d.ts +15 -0
  45. package/dist/response-schema.js +159 -0
  46. package/dist/tool-registry.d.ts +16 -0
  47. package/dist/tool-registry.js +175 -0
  48. package/dist/types.d.ts +91 -0
  49. package/dist/types.js +1 -0
  50. package/dist/worker.d.ts +7 -0
  51. package/dist/worker.js +386 -0
  52. package/package.json +34 -5
  53. package/wrangler.toml +1 -1
  54. package/src/agent-defaults.ts +0 -25
  55. package/src/file-ops.ts +0 -50
  56. package/src/file-store-do.ts +0 -349
  57. package/src/file-utils.ts +0 -43
  58. package/src/http-error.ts +0 -21
  59. package/src/index.ts +0 -415
  60. package/src/mcp-server.ts +0 -171
  61. package/src/pdf-agent.ts +0 -252
  62. package/src/pdf-config.ts +0 -143
  63. package/src/pdf-storage.ts +0 -109
  64. package/src/pdf-types.ts +0 -85
  65. package/src/pdfium-engine.ts +0 -207
  66. package/src/provider-client.ts +0 -176
  67. package/src/provider-keys.ts +0 -44
  68. package/src/r2-file-store.ts +0 -195
  69. package/src/response-schema.ts +0 -182
  70. package/src/tool-registry.ts +0 -203
  71. package/src/types.ts +0 -40
  72. package/src/wasm.d.ts +0 -4
@@ -0,0 +1,140 @@
1
+ import rawConfig from "../echo-pdf.config.json" with { type: "json" };
2
+ const ENV_PATTERN = /\$\{([A-Z0-9_]+)\}/g;
3
+ const isObject = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
4
+ const interpolateEnv = (input, env) => input.replace(ENV_PATTERN, (_, name) => {
5
+ const value = env[name];
6
+ return typeof value === "string" ? value : `\${${name}}`;
7
+ });
8
+ const resolveEnvRefs = (value, env) => {
9
+ if (typeof value === "string")
10
+ return interpolateEnv(value, env);
11
+ if (Array.isArray(value))
12
+ return value.map((item) => resolveEnvRefs(item, env));
13
+ if (isObject(value)) {
14
+ const out = {};
15
+ for (const [key, nested] of Object.entries(value)) {
16
+ out[key] = resolveEnvRefs(nested, env);
17
+ }
18
+ return out;
19
+ }
20
+ return value;
21
+ };
22
+ const validateConfig = (config) => {
23
+ if (!config.service?.name)
24
+ throw new Error("service.name is required");
25
+ if (!config.pdfium?.wasmUrl)
26
+ throw new Error("pdfium.wasmUrl is required");
27
+ if (!config.service?.storage)
28
+ throw new Error("service.storage is required");
29
+ if (typeof config.service.publicBaseUrl === "string" &&
30
+ config.service.publicBaseUrl.length > 0 &&
31
+ !/^https?:\/\//.test(config.service.publicBaseUrl)) {
32
+ throw new Error("service.publicBaseUrl must start with http:// or https://");
33
+ }
34
+ if (typeof config.service.fileGet?.cacheTtlSeconds === "number" && config.service.fileGet.cacheTtlSeconds < 0) {
35
+ throw new Error("service.fileGet.cacheTtlSeconds must be >= 0");
36
+ }
37
+ if (!Number.isFinite(config.service.storage.maxFileBytes) || config.service.storage.maxFileBytes <= 0) {
38
+ throw new Error("service.storage.maxFileBytes must be positive");
39
+ }
40
+ if (config.service.storage.maxFileBytes < config.service.maxPdfBytes) {
41
+ throw new Error("service.storage.maxFileBytes must be >= service.maxPdfBytes");
42
+ }
43
+ if (!Number.isFinite(config.service.storage.maxTotalBytes) || config.service.storage.maxTotalBytes <= 0) {
44
+ throw new Error("service.storage.maxTotalBytes must be positive");
45
+ }
46
+ if (config.service.storage.maxTotalBytes < config.service.storage.maxFileBytes) {
47
+ throw new Error("service.storage.maxTotalBytes must be >= maxFileBytes");
48
+ }
49
+ if (!Number.isFinite(config.service.storage.ttlHours) || config.service.storage.ttlHours <= 0) {
50
+ throw new Error("service.storage.ttlHours must be positive");
51
+ }
52
+ if (!Number.isFinite(config.service.storage.cleanupBatchSize) || config.service.storage.cleanupBatchSize <= 0) {
53
+ throw new Error("service.storage.cleanupBatchSize must be positive");
54
+ }
55
+ if (!config.agent?.defaultProvider)
56
+ throw new Error("agent.defaultProvider is required");
57
+ if (!config.providers?.[config.agent.defaultProvider]) {
58
+ throw new Error(`default provider "${config.agent.defaultProvider}" missing`);
59
+ }
60
+ if (typeof config.agent.defaultModel !== "string") {
61
+ throw new Error("agent.defaultModel must be a string");
62
+ }
63
+ return config;
64
+ };
65
+ export const loadEchoPdfConfig = (env) => {
66
+ const fromEnv = env.ECHO_PDF_CONFIG_JSON?.trim();
67
+ const configJson = fromEnv ? JSON.parse(fromEnv) : rawConfig;
68
+ const resolved = resolveEnvRefs(configJson, env);
69
+ const providerOverride = env.ECHO_PDF_DEFAULT_PROVIDER;
70
+ const modelOverride = env.ECHO_PDF_DEFAULT_MODEL;
71
+ const publicBaseUrlOverride = env.ECHO_PDF_PUBLIC_BASE_URL;
72
+ const computeAuthHeaderOverride = env.ECHO_PDF_COMPUTE_AUTH_HEADER;
73
+ const computeAuthEnvOverride = env.ECHO_PDF_COMPUTE_AUTH_ENV;
74
+ const fileGetAuthHeaderOverride = env.ECHO_PDF_FILE_GET_AUTH_HEADER;
75
+ const fileGetAuthEnvOverride = env.ECHO_PDF_FILE_GET_AUTH_ENV;
76
+ const fileGetCacheTtlOverride = env.ECHO_PDF_FILE_GET_CACHE_TTL_SECONDS;
77
+ const withOverrides = {
78
+ ...resolved,
79
+ service: {
80
+ ...resolved.service,
81
+ publicBaseUrl: typeof publicBaseUrlOverride === "string" && publicBaseUrlOverride.trim().length > 0
82
+ ? publicBaseUrlOverride.trim()
83
+ : resolved.service.publicBaseUrl,
84
+ computeAuth: {
85
+ authHeader: typeof computeAuthHeaderOverride === "string" && computeAuthHeaderOverride.trim().length > 0
86
+ ? computeAuthHeaderOverride.trim()
87
+ : resolved.service.computeAuth?.authHeader,
88
+ authEnv: typeof computeAuthEnvOverride === "string" && computeAuthEnvOverride.trim().length > 0
89
+ ? computeAuthEnvOverride.trim()
90
+ : resolved.service.computeAuth?.authEnv,
91
+ },
92
+ fileGet: {
93
+ authHeader: typeof fileGetAuthHeaderOverride === "string" && fileGetAuthHeaderOverride.trim().length > 0
94
+ ? fileGetAuthHeaderOverride.trim()
95
+ : resolved.service.fileGet?.authHeader,
96
+ authEnv: typeof fileGetAuthEnvOverride === "string" && fileGetAuthEnvOverride.trim().length > 0
97
+ ? fileGetAuthEnvOverride.trim()
98
+ : resolved.service.fileGet?.authEnv,
99
+ cacheTtlSeconds: (() => {
100
+ if (typeof fileGetCacheTtlOverride === "string" && fileGetCacheTtlOverride.trim().length > 0) {
101
+ const value = Number(fileGetCacheTtlOverride);
102
+ return Number.isFinite(value) && value >= 0 ? Math.floor(value) : resolved.service.fileGet?.cacheTtlSeconds;
103
+ }
104
+ return resolved.service.fileGet?.cacheTtlSeconds;
105
+ })(),
106
+ },
107
+ },
108
+ agent: {
109
+ ...resolved.agent,
110
+ defaultProvider: typeof providerOverride === "string" && providerOverride.trim().length > 0
111
+ ? providerOverride.trim()
112
+ : resolved.agent.defaultProvider,
113
+ defaultModel: typeof modelOverride === "string" && modelOverride.trim().length > 0
114
+ ? modelOverride.trim()
115
+ : resolved.agent.defaultModel,
116
+ },
117
+ };
118
+ return validateConfig(withOverrides);
119
+ };
120
+ export const readRequiredEnv = (env, key) => {
121
+ const read = (name) => {
122
+ const value = env[name];
123
+ return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
124
+ };
125
+ const direct = read(key);
126
+ if (direct)
127
+ return direct;
128
+ // Backward compatibility: allow *_KEY and *_API_KEY aliases.
129
+ if (key.endsWith("_API_KEY")) {
130
+ const alt = read(key.replace(/_API_KEY$/, "_KEY"));
131
+ if (alt)
132
+ return alt;
133
+ }
134
+ if (key.endsWith("_KEY")) {
135
+ const alt = read(key.replace(/_KEY$/, "_API_KEY"));
136
+ if (alt)
137
+ return alt;
138
+ }
139
+ throw new Error(`Missing required env var "${key}"`);
140
+ };
@@ -0,0 +1,8 @@
1
+ import type { EchoPdfConfig } from "./pdf-types.js";
2
+ import type { Env, FileStore } from "./types.js";
3
+ export interface RuntimeFileStoreBundle {
4
+ readonly store: FileStore;
5
+ stats: () => Promise<unknown>;
6
+ cleanup: () => Promise<unknown>;
7
+ }
8
+ export declare const getRuntimeFileStore: (env: Env, config: EchoPdfConfig) => RuntimeFileStoreBundle;
@@ -0,0 +1,86 @@
1
+ import { DurableObjectFileStore } from "./file-store-do.js";
2
+ import { R2FileStore } from "./r2-file-store.js";
3
+ class InMemoryFileStore {
4
+ store = new Map();
5
+ async put(input) {
6
+ const id = crypto.randomUUID();
7
+ const record = {
8
+ id,
9
+ filename: input.filename,
10
+ mimeType: input.mimeType,
11
+ sizeBytes: input.bytes.byteLength,
12
+ createdAt: new Date().toISOString(),
13
+ bytes: input.bytes,
14
+ };
15
+ this.store.set(id, record);
16
+ return this.toMeta(record);
17
+ }
18
+ async get(fileId) {
19
+ return this.store.get(fileId) ?? null;
20
+ }
21
+ async list() {
22
+ return [...this.store.values()].map((record) => this.toMeta(record));
23
+ }
24
+ async delete(fileId) {
25
+ return this.store.delete(fileId);
26
+ }
27
+ toMeta(record) {
28
+ return {
29
+ id: record.id,
30
+ filename: record.filename,
31
+ mimeType: record.mimeType,
32
+ sizeBytes: record.sizeBytes,
33
+ createdAt: record.createdAt,
34
+ };
35
+ }
36
+ }
37
+ const fallbackStore = new InMemoryFileStore();
38
+ const DO_SAFE_MAX_FILE_BYTES = 1_200_000;
39
+ export const getRuntimeFileStore = (env, config) => {
40
+ if (env.FILE_STORE_BUCKET) {
41
+ const store = new R2FileStore(env.FILE_STORE_BUCKET, config.service.storage);
42
+ return {
43
+ store,
44
+ stats: async () => store.stats(),
45
+ cleanup: async () => store.cleanup(),
46
+ };
47
+ }
48
+ if (env.FILE_STORE_DO) {
49
+ if (config.service.storage.maxFileBytes > DO_SAFE_MAX_FILE_BYTES) {
50
+ throw new Error(`service.storage.maxFileBytes=${config.service.storage.maxFileBytes} exceeds DO backend limit ${DO_SAFE_MAX_FILE_BYTES}; bind FILE_STORE_BUCKET (R2) or reduce maxFileBytes`);
51
+ }
52
+ const store = new DurableObjectFileStore(env.FILE_STORE_DO, config.service.storage);
53
+ return {
54
+ store,
55
+ stats: async () => store.stats(),
56
+ cleanup: async () => store.cleanup(),
57
+ };
58
+ }
59
+ return {
60
+ store: fallbackStore,
61
+ stats: async () => {
62
+ const files = await fallbackStore.list();
63
+ const totalBytes = files.reduce((sum, file) => sum + file.sizeBytes, 0);
64
+ return {
65
+ backend: "memory",
66
+ policy: config.service.storage,
67
+ stats: {
68
+ fileCount: files.length,
69
+ totalBytes,
70
+ },
71
+ };
72
+ },
73
+ cleanup: async () => ({
74
+ backend: "memory",
75
+ deletedExpired: 0,
76
+ deletedEvicted: 0,
77
+ stats: await (async () => {
78
+ const files = await fallbackStore.list();
79
+ return {
80
+ fileCount: files.length,
81
+ totalBytes: files.reduce((sum, file) => sum + file.sizeBytes, 0),
82
+ };
83
+ })(),
84
+ }),
85
+ };
86
+ };
@@ -0,0 +1,83 @@
1
+ import type { ProviderType, ReturnMode } from "./types.js";
2
+ export interface EchoPdfProviderConfig {
3
+ readonly type: ProviderType;
4
+ readonly apiKeyEnv: string;
5
+ readonly baseUrl?: string;
6
+ readonly headers?: Record<string, string>;
7
+ readonly timeoutMs?: number;
8
+ readonly endpoints?: {
9
+ readonly chatCompletionsPath?: string;
10
+ readonly modelsPath?: string;
11
+ };
12
+ }
13
+ export interface StoragePolicy {
14
+ readonly maxFileBytes: number;
15
+ readonly maxTotalBytes: number;
16
+ readonly ttlHours: number;
17
+ readonly cleanupBatchSize: number;
18
+ }
19
+ export interface EchoPdfConfig {
20
+ readonly service: {
21
+ readonly name: string;
22
+ readonly publicBaseUrl?: string;
23
+ readonly computeAuth?: {
24
+ readonly authHeader?: string;
25
+ readonly authEnv?: string;
26
+ };
27
+ readonly fileGet?: {
28
+ readonly authHeader?: string;
29
+ readonly authEnv?: string;
30
+ readonly cacheTtlSeconds?: number;
31
+ };
32
+ readonly maxPdfBytes: number;
33
+ readonly maxPagesPerRequest: number;
34
+ readonly defaultRenderScale: number;
35
+ readonly storage: StoragePolicy;
36
+ };
37
+ readonly pdfium: {
38
+ readonly wasmUrl: string;
39
+ };
40
+ readonly agent: {
41
+ readonly defaultProvider: string;
42
+ readonly defaultModel: string;
43
+ readonly ocrPrompt: string;
44
+ readonly tablePrompt: string;
45
+ };
46
+ readonly providers: Record<string, EchoPdfProviderConfig>;
47
+ readonly mcp: {
48
+ readonly serverName: string;
49
+ readonly version: string;
50
+ readonly authHeader?: string;
51
+ readonly authEnv?: string;
52
+ };
53
+ }
54
+ export interface AgentTraceEvent {
55
+ readonly kind: "step";
56
+ readonly phase: "start" | "end" | "log";
57
+ readonly name: string;
58
+ readonly level?: "info" | "error";
59
+ readonly payload?: unknown;
60
+ }
61
+ export interface PdfOperationRequest {
62
+ readonly operation: "extract_pages" | "ocr_pages" | "tables_to_latex";
63
+ readonly fileId?: string;
64
+ readonly url?: string;
65
+ readonly base64?: string;
66
+ readonly filename?: string;
67
+ readonly pages: ReadonlyArray<number>;
68
+ readonly renderScale?: number;
69
+ readonly provider?: string;
70
+ readonly model: string;
71
+ readonly providerApiKeys?: Record<string, string>;
72
+ readonly returnMode?: ReturnMode;
73
+ readonly prompt?: string;
74
+ }
75
+ export interface ToolSchema {
76
+ readonly name: string;
77
+ readonly description: string;
78
+ readonly inputSchema: Record<string, unknown>;
79
+ readonly source: {
80
+ readonly kind: "local";
81
+ readonly toolName: string;
82
+ };
83
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,9 @@
1
+ import type { EchoPdfConfig } from "./pdf-types.js";
2
+ export declare const getPdfPageCount: (config: EchoPdfConfig, bytes: Uint8Array) => Promise<number>;
3
+ export declare const renderPdfPageToPng: (config: EchoPdfConfig, bytes: Uint8Array, pageIndex: number, scale?: number) => Promise<{
4
+ width: number;
5
+ height: number;
6
+ png: Uint8Array;
7
+ }>;
8
+ export declare const extractPdfPageText: (config: EchoPdfConfig, bytes: Uint8Array, pageIndex: number) => Promise<string>;
9
+ export declare const toBytes: (value: string) => Promise<Uint8Array>;
@@ -0,0 +1,180 @@
1
+ import { init } from "@embedpdf/pdfium";
2
+ import { encode as encodePng } from "@cf-wasm/png";
3
+ let moduleInstance = null;
4
+ let libraryInitialized = false;
5
+ const toUint8 = (value) => new Uint8Array(value);
6
+ const textDecoder = new TextDecoder();
7
+ const isWorkerdRuntime = () => typeof globalThis.WebSocketPair === "function";
8
+ const ensureWasmFunctionShim = () => {
9
+ const wasmApi = WebAssembly;
10
+ if (typeof wasmApi.Function === "function")
11
+ return;
12
+ wasmApi.Function = (_sig, fn) => fn;
13
+ };
14
+ const ensurePdfium = async (config) => {
15
+ ensureWasmFunctionShim();
16
+ if (!moduleInstance) {
17
+ if (isWorkerdRuntime()) {
18
+ const wasmModuleImport = await import("@embedpdf/pdfium/pdfium.wasm");
19
+ const maybeModule = wasmModuleImport.default ?? wasmModuleImport;
20
+ if (maybeModule instanceof WebAssembly.Module) {
21
+ moduleInstance = await init({
22
+ instantiateWasm: (imports, successCallback) => {
23
+ const instance = new WebAssembly.Instance(maybeModule, imports);
24
+ successCallback(instance, maybeModule);
25
+ return instance.exports;
26
+ },
27
+ });
28
+ }
29
+ }
30
+ if (!moduleInstance) {
31
+ const wasmBinary = await fetch(config.pdfium.wasmUrl).then((res) => res.arrayBuffer());
32
+ moduleInstance = await init({ wasmBinary });
33
+ }
34
+ }
35
+ if (!libraryInitialized) {
36
+ moduleInstance.FPDF_InitLibrary();
37
+ libraryInitialized = true;
38
+ }
39
+ return moduleInstance;
40
+ };
41
+ const makeDoc = (pdfium, bytes) => {
42
+ const memPtr = pdfium.pdfium.wasmExports.malloc(bytes.length);
43
+ pdfium.pdfium.HEAPU8.set(bytes, memPtr);
44
+ const doc = pdfium.FPDF_LoadMemDocument(memPtr, bytes.length, "");
45
+ if (!doc) {
46
+ pdfium.pdfium.wasmExports.free(memPtr);
47
+ throw new Error("Failed to load PDF document");
48
+ }
49
+ return { doc, memPtr };
50
+ };
51
+ const closeDoc = (pdfium, doc, memPtr) => {
52
+ pdfium.FPDF_CloseDocument(doc);
53
+ pdfium.pdfium.wasmExports.free(memPtr);
54
+ };
55
+ const bgraToRgba = (bgra) => {
56
+ const rgba = new Uint8Array(bgra.length);
57
+ for (let i = 0; i < bgra.length; i += 4) {
58
+ rgba[i] = bgra[i + 2] ?? 0;
59
+ rgba[i + 1] = bgra[i + 1] ?? 0;
60
+ rgba[i + 2] = bgra[i] ?? 0;
61
+ rgba[i + 3] = bgra[i + 3] ?? 255;
62
+ }
63
+ return rgba;
64
+ };
65
+ const decodeUtf16Le = (buf) => {
66
+ const view = new Uint16Array(buf.buffer, buf.byteOffset, Math.floor(buf.byteLength / 2));
67
+ const chars = [];
68
+ for (const code of view) {
69
+ if (code === 0)
70
+ break;
71
+ chars.push(code);
72
+ }
73
+ return String.fromCharCode(...chars);
74
+ };
75
+ export const getPdfPageCount = async (config, bytes) => {
76
+ const pdfium = await ensurePdfium(config);
77
+ const { doc, memPtr } = makeDoc(pdfium, bytes);
78
+ try {
79
+ return pdfium.FPDF_GetPageCount(doc);
80
+ }
81
+ finally {
82
+ closeDoc(pdfium, doc, memPtr);
83
+ }
84
+ };
85
+ export const renderPdfPageToPng = async (config, bytes, pageIndex, scale = config.service.defaultRenderScale) => {
86
+ const pdfium = await ensurePdfium(config);
87
+ const { doc, memPtr } = makeDoc(pdfium, bytes);
88
+ let page = 0;
89
+ let bitmap = 0;
90
+ try {
91
+ page = pdfium.FPDF_LoadPage(doc, pageIndex);
92
+ if (!page) {
93
+ throw new Error(`Failed to load page ${pageIndex}`);
94
+ }
95
+ const width = Math.max(1, Math.round(pdfium.FPDF_GetPageWidthF(page) * scale));
96
+ const height = Math.max(1, Math.round(pdfium.FPDF_GetPageHeightF(page) * scale));
97
+ bitmap = pdfium.FPDFBitmap_Create(width, height, 1);
98
+ if (!bitmap) {
99
+ throw new Error("Failed to create bitmap");
100
+ }
101
+ pdfium.FPDFBitmap_FillRect(bitmap, 0, 0, width, height, 0xffffffff);
102
+ pdfium.FPDF_RenderPageBitmap(bitmap, page, 0, 0, width, height, 0, 0);
103
+ const stride = pdfium.FPDFBitmap_GetStride(bitmap);
104
+ const bufferPtr = pdfium.FPDFBitmap_GetBuffer(bitmap);
105
+ const heap = pdfium.pdfium.HEAPU8;
106
+ const bgra = heap.slice(bufferPtr, bufferPtr + stride * height);
107
+ const rgba = bgraToRgba(bgra);
108
+ const png = encodePng(rgba, width, height);
109
+ return { width, height, png };
110
+ }
111
+ finally {
112
+ if (bitmap)
113
+ pdfium.FPDFBitmap_Destroy(bitmap);
114
+ if (page)
115
+ pdfium.FPDF_ClosePage(page);
116
+ closeDoc(pdfium, doc, memPtr);
117
+ }
118
+ };
119
+ export const extractPdfPageText = async (config, bytes, pageIndex) => {
120
+ const pdfium = await ensurePdfium(config);
121
+ const { doc, memPtr } = makeDoc(pdfium, bytes);
122
+ let page = 0;
123
+ let textPage = 0;
124
+ let outPtr = 0;
125
+ try {
126
+ page = pdfium.FPDF_LoadPage(doc, pageIndex);
127
+ if (!page) {
128
+ throw new Error(`Failed to load page ${pageIndex}`);
129
+ }
130
+ textPage = pdfium.FPDFText_LoadPage(page);
131
+ if (!textPage)
132
+ return "";
133
+ const chars = pdfium.FPDFText_CountChars(textPage);
134
+ if (chars <= 0)
135
+ return "";
136
+ const bytesLen = (chars + 1) * 2;
137
+ outPtr = pdfium.pdfium.wasmExports.malloc(bytesLen);
138
+ pdfium.FPDFText_GetText(textPage, 0, chars, outPtr);
139
+ const heap = pdfium.pdfium.HEAPU8;
140
+ const raw = heap.slice(outPtr, outPtr + bytesLen);
141
+ return decodeUtf16Le(raw).trim();
142
+ }
143
+ finally {
144
+ if (outPtr)
145
+ pdfium.pdfium.wasmExports.free(outPtr);
146
+ if (textPage)
147
+ pdfium.FPDFText_ClosePage(textPage);
148
+ if (page)
149
+ pdfium.FPDF_ClosePage(page);
150
+ closeDoc(pdfium, doc, memPtr);
151
+ }
152
+ };
153
+ export const toBytes = async (value) => {
154
+ const response = await fetch(value);
155
+ if (!response.ok) {
156
+ throw new Error(`Failed to fetch source: HTTP ${response.status}`);
157
+ }
158
+ const contentType = (response.headers.get("content-type") ?? "").toLowerCase();
159
+ const bytes = toUint8(await response.arrayBuffer());
160
+ const signature = textDecoder.decode(bytes.subarray(0, Math.min(8, bytes.length)));
161
+ if (contentType.includes("application/pdf") || signature.startsWith("%PDF-")) {
162
+ return bytes;
163
+ }
164
+ const html = textDecoder.decode(bytes);
165
+ const pdfMatch = html.match(/https?:\/\/[^"' )]+\.pdf[^"' )]*/i);
166
+ if (!pdfMatch || pdfMatch.length === 0) {
167
+ throw new Error("URL does not point to a PDF and no PDF link was found in the page");
168
+ }
169
+ const resolvedUrl = pdfMatch[0].replace(/&amp;/g, "&");
170
+ const pdfResponse = await fetch(resolvedUrl);
171
+ if (!pdfResponse.ok) {
172
+ throw new Error(`Failed to fetch resolved PDF url: HTTP ${pdfResponse.status}`);
173
+ }
174
+ const pdfBytes = toUint8(await pdfResponse.arrayBuffer());
175
+ const pdfSignature = textDecoder.decode(pdfBytes.subarray(0, Math.min(8, pdfBytes.length)));
176
+ if (!pdfSignature.startsWith("%PDF-")) {
177
+ throw new Error("Resolved file is not a valid PDF");
178
+ }
179
+ return pdfBytes;
180
+ };
@@ -0,0 +1,20 @@
1
+ import type { Env } from "./types.js";
2
+ import type { EchoPdfConfig } from "./pdf-types.js";
3
+ export declare const listProviderModels: (config: EchoPdfConfig, env: Env, alias: string, runtimeApiKeys?: Record<string, string>) => Promise<ReadonlyArray<string>>;
4
+ export declare const visionRecognize: (input: {
5
+ config: EchoPdfConfig;
6
+ env: Env;
7
+ providerAlias: string;
8
+ model: string;
9
+ prompt: string;
10
+ imageDataUrl: string;
11
+ runtimeApiKeys?: Record<string, string>;
12
+ }) => Promise<string>;
13
+ export declare const generateText: (input: {
14
+ config: EchoPdfConfig;
15
+ env: Env;
16
+ providerAlias: string;
17
+ model: string;
18
+ prompt: string;
19
+ runtimeApiKeys?: Record<string, string>;
20
+ }) => Promise<string>;