@echofiles/echo-pdf 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +75 -0
  2. package/dist/agent-defaults.d.ts +3 -0
  3. package/dist/agent-defaults.js +18 -0
  4. package/dist/auth.d.ts +18 -0
  5. package/dist/auth.js +24 -0
  6. package/dist/core/index.d.ts +50 -0
  7. package/dist/core/index.js +7 -0
  8. package/dist/file-ops.d.ts +11 -0
  9. package/dist/file-ops.js +36 -0
  10. package/dist/file-store-do.d.ts +36 -0
  11. package/dist/file-store-do.js +298 -0
  12. package/dist/file-utils.d.ts +6 -0
  13. package/dist/file-utils.js +36 -0
  14. package/dist/http-error.d.ts +9 -0
  15. package/dist/http-error.js +14 -0
  16. package/dist/index.d.ts +1 -0
  17. package/dist/index.js +1 -0
  18. package/dist/mcp-server.d.ts +3 -0
  19. package/dist/mcp-server.js +127 -0
  20. package/dist/pdf-agent.d.ts +18 -0
  21. package/dist/pdf-agent.js +217 -0
  22. package/dist/pdf-config.d.ts +4 -0
  23. package/dist/pdf-config.js +130 -0
  24. package/dist/pdf-storage.d.ts +8 -0
  25. package/dist/pdf-storage.js +86 -0
  26. package/dist/pdf-types.d.ts +79 -0
  27. package/dist/pdf-types.js +1 -0
  28. package/dist/pdfium-engine.d.ts +9 -0
  29. package/dist/pdfium-engine.js +180 -0
  30. package/dist/provider-client.d.ts +12 -0
  31. package/dist/provider-client.js +134 -0
  32. package/dist/provider-keys.d.ts +10 -0
  33. package/dist/provider-keys.js +27 -0
  34. package/dist/r2-file-store.d.ts +20 -0
  35. package/dist/r2-file-store.js +176 -0
  36. package/dist/response-schema.d.ts +15 -0
  37. package/dist/response-schema.js +159 -0
  38. package/dist/tool-registry.d.ts +16 -0
  39. package/dist/tool-registry.js +175 -0
  40. package/dist/types.d.ts +91 -0
  41. package/dist/types.js +1 -0
  42. package/dist/worker.d.ts +7 -0
  43. package/dist/worker.js +366 -0
  44. package/package.json +22 -4
  45. package/wrangler.toml +1 -1
  46. package/src/agent-defaults.ts +0 -25
  47. package/src/file-ops.ts +0 -50
  48. package/src/file-store-do.ts +0 -349
  49. package/src/file-utils.ts +0 -43
  50. package/src/http-error.ts +0 -21
  51. package/src/index.ts +0 -415
  52. package/src/mcp-server.ts +0 -171
  53. package/src/pdf-agent.ts +0 -252
  54. package/src/pdf-config.ts +0 -143
  55. package/src/pdf-storage.ts +0 -109
  56. package/src/pdf-types.ts +0 -85
  57. package/src/pdfium-engine.ts +0 -207
  58. package/src/provider-client.ts +0 -176
  59. package/src/provider-keys.ts +0 -44
  60. package/src/r2-file-store.ts +0 -195
  61. package/src/response-schema.ts +0 -182
  62. package/src/tool-registry.ts +0 -203
  63. package/src/types.ts +0 -40
  64. package/src/wasm.d.ts +0 -4
@@ -1,182 +0,0 @@
1
- import type { JsonObject } from "./types"
2
-
3
- export interface ToolArtifact {
4
- readonly id?: string
5
- readonly kind: "image" | "pdf" | "file" | "json" | "text"
6
- readonly mimeType?: string
7
- readonly filename?: string
8
- readonly sizeBytes?: number
9
- readonly url?: string
10
- }
11
-
12
- export interface ToolOutputEnvelope {
13
- readonly ok: true
14
- readonly data: unknown
15
- readonly artifacts: ToolArtifact[]
16
- }
17
-
18
- const MAX_TEXT_STRING = 1200
19
- const MAX_TEXT_ARRAY = 40
20
- const MAX_TEXT_DEPTH = 8
21
-
22
- const asObj = (value: unknown): JsonObject =>
23
- typeof value === "object" && value !== null && !Array.isArray(value)
24
- ? (value as JsonObject)
25
- : {}
26
-
27
- const inferKind = (mimeType?: string): ToolArtifact["kind"] => {
28
- const mime = (mimeType || "").toLowerCase()
29
- if (mime.startsWith("image/")) return "image"
30
- if (mime === "application/pdf") return "pdf"
31
- if (mime.includes("json")) return "json"
32
- if (mime.startsWith("text/")) return "text"
33
- return "file"
34
- }
35
-
36
- const toAbsoluteUrl = (value: string, baseUrl: string): string => {
37
- try {
38
- return new URL(value, baseUrl).toString()
39
- } catch {
40
- return value
41
- }
42
- }
43
-
44
- const addArtifact = (artifacts: ToolArtifact[], artifact: ToolArtifact): void => {
45
- if (!artifact.id && !artifact.url && !artifact.filename) return
46
- artifacts.push(artifact)
47
- }
48
-
49
- export const buildToolOutputEnvelope = (
50
- result: unknown,
51
- baseUrl: string
52
- ): ToolOutputEnvelope => {
53
- const root = asObj(result)
54
- const artifacts: ToolArtifact[] = []
55
-
56
- const fileMeta = asObj(root.file)
57
- if (typeof fileMeta.id === "string") {
58
- addArtifact(artifacts, {
59
- id: fileMeta.id,
60
- kind: inferKind(typeof fileMeta.mimeType === "string" ? fileMeta.mimeType : undefined),
61
- mimeType: typeof fileMeta.mimeType === "string" ? fileMeta.mimeType : undefined,
62
- filename: typeof fileMeta.filename === "string" ? fileMeta.filename : undefined,
63
- sizeBytes: typeof fileMeta.sizeBytes === "number" ? fileMeta.sizeBytes : undefined,
64
- url: typeof root.url === "string" ? toAbsoluteUrl(root.url, baseUrl) : undefined,
65
- })
66
- }
67
-
68
- const images = Array.isArray(root.images) ? root.images : []
69
- for (const item of images) {
70
- const image = asObj(item)
71
- const fileId = typeof image.fileId === "string" ? image.fileId : undefined
72
- const rawUrl = typeof image.url === "string" ? image.url : undefined
73
- if (!fileId && !rawUrl) continue
74
- addArtifact(artifacts, {
75
- id: fileId,
76
- kind: "image",
77
- mimeType: typeof image.mimeType === "string" ? image.mimeType : "image/png",
78
- filename: fileId ? `artifact-${fileId}.png` : undefined,
79
- url: rawUrl ? toAbsoluteUrl(rawUrl, baseUrl) : undefined,
80
- })
81
- }
82
-
83
- const files = Array.isArray(root.files) ? root.files : []
84
- for (const item of files) {
85
- const meta = asObj(item)
86
- if (typeof meta.id !== "string") continue
87
- addArtifact(artifacts, {
88
- id: meta.id,
89
- kind: inferKind(typeof meta.mimeType === "string" ? meta.mimeType : undefined),
90
- mimeType: typeof meta.mimeType === "string" ? meta.mimeType : undefined,
91
- filename: typeof meta.filename === "string" ? meta.filename : undefined,
92
- sizeBytes: typeof meta.sizeBytes === "number" ? meta.sizeBytes : undefined,
93
- })
94
- }
95
-
96
- return {
97
- ok: true,
98
- data: result,
99
- artifacts,
100
- }
101
- }
102
-
103
- const summarizeData = (data: unknown): string => {
104
- const root = asObj(data)
105
- if (typeof root.returnMode === "string" && Array.isArray(root.images)) {
106
- return `Extracted ${root.images.length} page image(s) in returnMode=${root.returnMode}.`
107
- }
108
- if (Array.isArray(root.pages)) {
109
- return `Processed ${root.pages.length} page(s).`
110
- }
111
- if (Array.isArray(root.files)) {
112
- return `Listed ${root.files.length} file(s).`
113
- }
114
- if (typeof root.deleted === "boolean") {
115
- return root.deleted ? "File deleted." : "File not found."
116
- }
117
- return "Tool executed successfully."
118
- }
119
-
120
- const sanitizeString = (value: string): string => {
121
- if (value.startsWith("data:")) {
122
- const [head] = value.split(",", 1)
123
- return `${head},<omitted>`
124
- }
125
- if (/^[A-Za-z0-9+/=]{300,}$/.test(value)) {
126
- return `<base64 omitted len=${value.length}>`
127
- }
128
- if (value.length > MAX_TEXT_STRING) {
129
- return `${value.slice(0, MAX_TEXT_STRING)}...(truncated ${value.length - MAX_TEXT_STRING} chars)`
130
- }
131
- return value
132
- }
133
-
134
- const sanitizeForText = (value: unknown, depth = 0): unknown => {
135
- if (depth >= MAX_TEXT_DEPTH) return "<max-depth>"
136
- if (typeof value === "string") return sanitizeString(value)
137
- if (typeof value !== "object" || value === null) return value
138
- if (Array.isArray(value)) {
139
- const items = value.slice(0, MAX_TEXT_ARRAY).map((item) => sanitizeForText(item, depth + 1))
140
- if (value.length > MAX_TEXT_ARRAY) {
141
- items.push(`<truncated ${value.length - MAX_TEXT_ARRAY} items>`)
142
- }
143
- return items
144
- }
145
- const out: Record<string, unknown> = {}
146
- for (const [key, nested] of Object.entries(value)) {
147
- out[key] = sanitizeForText(nested, depth + 1)
148
- }
149
- return out
150
- }
151
-
152
- export const buildMcpContent = (envelope: ToolOutputEnvelope): Array<Record<string, unknown>> => {
153
- const lines: string[] = [summarizeData(envelope.data)]
154
- if (envelope.artifacts.length > 0) {
155
- lines.push("Artifacts:")
156
- for (const artifact of envelope.artifacts) {
157
- const descriptor = [
158
- artifact.kind,
159
- artifact.filename ?? artifact.id ?? "artifact",
160
- artifact.mimeType ?? "",
161
- artifact.url ?? "",
162
- ]
163
- .filter((v) => v.length > 0)
164
- .join(" | ")
165
- lines.push(`- ${descriptor}`)
166
- }
167
- }
168
- lines.push("")
169
- lines.push(JSON.stringify(sanitizeForText(envelope), null, 2))
170
-
171
- const content: Array<Record<string, unknown>> = [{ type: "text", text: lines.join("\n") }]
172
- for (const artifact of envelope.artifacts) {
173
- if (!artifact.url) continue
174
- content.push({
175
- type: "resource_link",
176
- name: artifact.filename ?? artifact.id ?? "artifact",
177
- uri: artifact.url,
178
- mimeType: artifact.mimeType ?? "application/octet-stream",
179
- })
180
- }
181
- return content
182
- }
@@ -1,203 +0,0 @@
1
- import { normalizeReturnMode } from "./file-utils"
2
- import { runFileOp } from "./file-ops"
3
- import { runPdfAgent } from "./pdf-agent"
4
- import type { EchoPdfConfig, PdfOperationRequest, ToolSchema } from "./pdf-types"
5
- import type { Env, FileStore, JsonObject } from "./types"
6
-
7
- export interface ToolRuntimeContext {
8
- readonly config: EchoPdfConfig
9
- readonly env: Env
10
- readonly fileStore: FileStore
11
- readonly providerApiKeys?: Record<string, string>
12
- readonly trace?: (event: { kind: "step"; phase: "start" | "end" | "log"; name: string; payload?: unknown }) => void
13
- }
14
-
15
- interface ToolDefinition {
16
- readonly schema: ToolSchema
17
- run: (ctx: ToolRuntimeContext, args: JsonObject) => Promise<unknown>
18
- }
19
-
20
- const asNumberArray = (value: unknown): number[] =>
21
- Array.isArray(value) ? value.map((item) => Number(item)).filter((item) => Number.isInteger(item) && item > 0) : []
22
-
23
- const asObject = (value: unknown): JsonObject =>
24
- typeof value === "object" && value !== null && !Array.isArray(value)
25
- ? (value as JsonObject)
26
- : {}
27
-
28
- const readString = (obj: JsonObject, key: string): string | undefined => {
29
- const value = obj[key]
30
- return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined
31
- }
32
-
33
- const toolDefinitions: ReadonlyArray<ToolDefinition> = [
34
- {
35
- schema: {
36
- name: "pdf_extract_pages",
37
- description: "Render specific PDF pages to image and return inline/file_id/url mode.",
38
- inputSchema: {
39
- type: "object",
40
- properties: {
41
- fileId: { type: "string" },
42
- url: { type: "string" },
43
- base64: { type: "string" },
44
- filename: { type: "string" },
45
- pages: { type: "array", items: { type: "integer" } },
46
- renderScale: { type: "number" },
47
- returnMode: { type: "string", enum: ["inline", "file_id", "url"] },
48
- },
49
- required: ["pages"],
50
- },
51
- source: { kind: "local", toolName: "pdf.extract_pages" },
52
- },
53
- run: async (ctx, args) => {
54
- const req: PdfOperationRequest = {
55
- operation: "extract_pages",
56
- fileId: readString(args, "fileId"),
57
- url: readString(args, "url"),
58
- base64: readString(args, "base64"),
59
- filename: readString(args, "filename"),
60
- pages: asNumberArray(args.pages),
61
- renderScale: typeof args.renderScale === "number" ? args.renderScale : undefined,
62
- provider: undefined,
63
- model: "not-required",
64
- providerApiKeys: ctx.providerApiKeys,
65
- returnMode: normalizeReturnMode(args.returnMode),
66
- }
67
- return runPdfAgent(ctx.config, ctx.env, req, {
68
- fileStore: ctx.fileStore,
69
- trace: ctx.trace,
70
- })
71
- },
72
- },
73
- {
74
- schema: {
75
- name: "pdf_ocr_pages",
76
- description: "OCR specific PDF pages using configured multimodal model.",
77
- inputSchema: {
78
- type: "object",
79
- properties: {
80
- fileId: { type: "string" },
81
- url: { type: "string" },
82
- base64: { type: "string" },
83
- filename: { type: "string" },
84
- pages: { type: "array", items: { type: "integer" } },
85
- renderScale: { type: "number" },
86
- provider: { type: "string" },
87
- model: { type: "string" },
88
- prompt: { type: "string" },
89
- },
90
- required: ["pages"],
91
- },
92
- source: { kind: "local", toolName: "pdf.ocr_pages" },
93
- },
94
- run: async (ctx, args) => {
95
- const req: PdfOperationRequest = {
96
- operation: "ocr_pages",
97
- fileId: readString(args, "fileId"),
98
- url: readString(args, "url"),
99
- base64: readString(args, "base64"),
100
- filename: readString(args, "filename"),
101
- pages: asNumberArray(args.pages),
102
- renderScale: typeof args.renderScale === "number" ? args.renderScale : undefined,
103
- provider: readString(args, "provider"),
104
- model: readString(args, "model") ?? "",
105
- prompt: readString(args, "prompt"),
106
- providerApiKeys: ctx.providerApiKeys,
107
- returnMode: "inline",
108
- }
109
- return runPdfAgent(ctx.config, ctx.env, req, {
110
- fileStore: ctx.fileStore,
111
- trace: ctx.trace,
112
- })
113
- },
114
- },
115
- {
116
- schema: {
117
- name: "pdf_tables_to_latex",
118
- description: "Recognize tables from pages and return LaTeX tabular output.",
119
- inputSchema: {
120
- type: "object",
121
- properties: {
122
- fileId: { type: "string" },
123
- url: { type: "string" },
124
- base64: { type: "string" },
125
- filename: { type: "string" },
126
- pages: { type: "array", items: { type: "integer" } },
127
- renderScale: { type: "number" },
128
- provider: { type: "string" },
129
- model: { type: "string" },
130
- prompt: { type: "string" },
131
- },
132
- required: ["pages"],
133
- },
134
- source: { kind: "local", toolName: "pdf.tables_to_latex" },
135
- },
136
- run: async (ctx, args) => {
137
- const req: PdfOperationRequest = {
138
- operation: "tables_to_latex",
139
- fileId: readString(args, "fileId"),
140
- url: readString(args, "url"),
141
- base64: readString(args, "base64"),
142
- filename: readString(args, "filename"),
143
- pages: asNumberArray(args.pages),
144
- renderScale: typeof args.renderScale === "number" ? args.renderScale : undefined,
145
- provider: readString(args, "provider"),
146
- model: readString(args, "model") ?? "",
147
- prompt: readString(args, "prompt"),
148
- providerApiKeys: ctx.providerApiKeys,
149
- returnMode: "inline",
150
- }
151
- return runPdfAgent(ctx.config, ctx.env, req, {
152
- fileStore: ctx.fileStore,
153
- trace: ctx.trace,
154
- })
155
- },
156
- },
157
- {
158
- schema: {
159
- name: "file_ops",
160
- description: "Basic file operations: list/read/delete/put for runtime file store.",
161
- inputSchema: {
162
- type: "object",
163
- properties: {
164
- op: { type: "string", enum: ["list", "read", "delete", "put"] },
165
- fileId: { type: "string" },
166
- includeBase64: { type: "boolean" },
167
- text: { type: "string" },
168
- filename: { type: "string" },
169
- mimeType: { type: "string" },
170
- base64: { type: "string" },
171
- returnMode: { type: "string", enum: ["inline", "file_id", "url"] },
172
- },
173
- required: ["op"],
174
- },
175
- source: { kind: "local", toolName: "file.ops" },
176
- },
177
- run: async (ctx, args) =>
178
- runFileOp(ctx.fileStore, {
179
- op: (readString(args, "op") as "list" | "read" | "delete" | "put") ?? "list",
180
- fileId: readString(args, "fileId"),
181
- includeBase64: Boolean(args.includeBase64),
182
- text: readString(args, "text"),
183
- filename: readString(args, "filename"),
184
- mimeType: readString(args, "mimeType"),
185
- base64: readString(args, "base64"),
186
- returnMode: normalizeReturnMode(args.returnMode),
187
- }),
188
- },
189
- ]
190
-
191
- export const listToolSchemas = (): ReadonlyArray<ToolSchema> => toolDefinitions.map((item) => item.schema)
192
-
193
- export const callTool = async (
194
- name: string,
195
- args: unknown,
196
- ctx: ToolRuntimeContext
197
- ): Promise<unknown> => {
198
- const definition = toolDefinitions.find((item) => item.schema.name === name)
199
- if (!definition) {
200
- throw new Error(`Unknown tool: ${name}`)
201
- }
202
- return definition.run(ctx, asObject(args))
203
- }
package/src/types.ts DELETED
@@ -1,40 +0,0 @@
1
- export type JsonPrimitive = string | number | boolean | null
2
- export type JsonValue = JsonPrimitive | JsonObject | JsonArray
3
- export type JsonArray = JsonValue[]
4
- export interface JsonObject {
5
- [key: string]: JsonValue
6
- }
7
-
8
- export type ProviderType = "openai" | "openrouter" | "vercel-ai-gateway"
9
- export type ReturnMode = "inline" | "file_id" | "url"
10
-
11
- export interface Env {
12
- readonly ECHO_PDF_CONFIG_JSON?: string
13
- readonly ASSETS?: Fetcher
14
- readonly FILE_STORE_BUCKET?: R2Bucket
15
- readonly FILE_STORE_DO?: DurableObjectNamespace
16
- readonly [key: string]: string | Fetcher | DurableObjectNamespace | R2Bucket | undefined
17
- }
18
-
19
- export interface StoredFileMeta {
20
- readonly id: string
21
- readonly filename: string
22
- readonly mimeType: string
23
- readonly sizeBytes: number
24
- readonly createdAt: string
25
- }
26
-
27
- export interface StoredFileRecord extends StoredFileMeta {
28
- readonly bytes: Uint8Array
29
- }
30
-
31
- export interface FileStore {
32
- put(input: {
33
- readonly filename: string
34
- readonly mimeType: string
35
- readonly bytes: Uint8Array
36
- }): Promise<StoredFileMeta>
37
- get(fileId: string): Promise<StoredFileRecord | null>
38
- list(): Promise<ReadonlyArray<StoredFileMeta>>
39
- delete(fileId: string): Promise<boolean>
40
- }
package/src/wasm.d.ts DELETED
@@ -1,4 +0,0 @@
1
- declare module "*.wasm" {
2
- const wasmModule: WebAssembly.Module
3
- export default wasmModule
4
- }