@echofiles/echo-pdf 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ import type { ReturnMode, StoredFileRecord } from "./types"
2
+
3
+ export const fromBase64 = (value: string): Uint8Array => {
4
+ const raw = atob(value.replace(/^data:.*;base64,/, ""))
5
+ const out = new Uint8Array(raw.length)
6
+ for (let i = 0; i < raw.length; i++) {
7
+ out[i] = raw.charCodeAt(i)
8
+ }
9
+ return out
10
+ }
11
+
12
+ export const toBase64 = (bytes: Uint8Array): string => {
13
+ let binary = ""
14
+ const chunkSize = 0x8000
15
+ for (let i = 0; i < bytes.length; i += chunkSize) {
16
+ const chunk = bytes.subarray(i, i + chunkSize)
17
+ binary += String.fromCharCode(...chunk)
18
+ }
19
+ return btoa(binary)
20
+ }
21
+
22
+ export const toDataUrl = (bytes: Uint8Array, mimeType: string): string =>
23
+ `data:${mimeType};base64,${toBase64(bytes)}`
24
+
25
+ export const normalizeReturnMode = (value: unknown): ReturnMode => {
26
+ if (value === "file_id" || value === "url" || value === "inline") {
27
+ return value
28
+ }
29
+ return "inline"
30
+ }
31
+
32
+ export const toInlineFilePayload = (file: StoredFileRecord, includeBase64: boolean): Record<string, unknown> => ({
33
+ file: {
34
+ id: file.id,
35
+ filename: file.filename,
36
+ mimeType: file.mimeType,
37
+ sizeBytes: file.sizeBytes,
38
+ createdAt: file.createdAt,
39
+ },
40
+ dataUrl: file.mimeType.startsWith("image/") ? toDataUrl(file.bytes, file.mimeType) : undefined,
41
+ base64: includeBase64 ? toBase64(file.bytes) : undefined,
42
+ text: file.mimeType.startsWith("text/") ? new TextDecoder().decode(file.bytes) : undefined,
43
+ })
package/src/index.ts ADDED
@@ -0,0 +1,334 @@
1
+ import { normalizeReturnMode } from "./file-utils"
2
+ import { FileStoreDO } from "./file-store-do"
3
+ import { resolveModelForProvider, resolveProviderAlias } from "./agent-defaults"
4
+ import { handleMcpRequest } from "./mcp-server"
5
+ import { loadEchoPdfConfig } from "./pdf-config"
6
+ import { getRuntimeFileStore } from "./pdf-storage"
7
+ import { listProviderModels } from "./provider-client"
8
+ import { callTool, listToolSchemas } from "./tool-registry"
9
+ import type { AgentTraceEvent, PdfOperationRequest } from "./pdf-types"
10
+ import type { Env, JsonObject } from "./types"
11
+
12
+ const json = (data: unknown, status = 200): Response =>
13
+ new Response(JSON.stringify(data), {
14
+ status,
15
+ headers: {
16
+ "Content-Type": "application/json; charset=utf-8",
17
+ "Cache-Control": "no-store",
18
+ },
19
+ })
20
+
21
+ const toError = (error: unknown): string =>
22
+ error instanceof Error ? error.message : String(error)
23
+
24
+ const readJson = async (request: Request): Promise<Record<string, unknown>> => {
25
+ try {
26
+ const body = await request.json()
27
+ if (typeof body === "object" && body !== null && !Array.isArray(body)) {
28
+ return body as Record<string, unknown>
29
+ }
30
+ return {}
31
+ } catch {
32
+ return {}
33
+ }
34
+ }
35
+
36
+ const asObj = (value: unknown): JsonObject =>
37
+ typeof value === "object" && value !== null && !Array.isArray(value)
38
+ ? (value as JsonObject)
39
+ : {}
40
+
41
+ const sseResponse = (stream: ReadableStream<Uint8Array>): Response =>
42
+ new Response(stream, {
43
+ headers: {
44
+ "Content-Type": "text/event-stream; charset=utf-8",
45
+ "Cache-Control": "no-store",
46
+ Connection: "keep-alive",
47
+ },
48
+ })
49
+
50
+ const encodeSse = (event: string, data: unknown): Uint8Array => {
51
+ const encoder = new TextEncoder()
52
+ return encoder.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`)
53
+ }
54
+
55
+ const isValidOperation = (value: unknown): value is PdfOperationRequest["operation"] =>
56
+ value === "extract_pages" || value === "ocr_pages" || value === "tables_to_latex"
57
+
58
+ const toPdfOperation = (input: Record<string, unknown>, defaultProvider: string): PdfOperationRequest => ({
59
+ operation: isValidOperation(input.operation) ? input.operation : "extract_pages",
60
+ fileId: typeof input.fileId === "string" ? input.fileId : undefined,
61
+ url: typeof input.url === "string" ? input.url : undefined,
62
+ base64: typeof input.base64 === "string" ? input.base64 : undefined,
63
+ filename: typeof input.filename === "string" ? input.filename : undefined,
64
+ pages: Array.isArray(input.pages) ? input.pages.map((v) => Number(v)) : [],
65
+ renderScale: typeof input.renderScale === "number" ? input.renderScale : undefined,
66
+ provider: typeof input.provider === "string" ? input.provider : defaultProvider,
67
+ model: typeof input.model === "string" ? input.model : "",
68
+ providerApiKeys: typeof input.providerApiKeys === "object" && input.providerApiKeys !== null
69
+ ? (input.providerApiKeys as Record<string, string>)
70
+ : undefined,
71
+ returnMode: normalizeReturnMode(input.returnMode),
72
+ prompt: typeof input.prompt === "string" ? input.prompt : undefined,
73
+ })
74
+
75
+ const toolNameByOperation: Record<PdfOperationRequest["operation"], string> = {
76
+ extract_pages: "pdf_extract_pages",
77
+ ocr_pages: "pdf_ocr_pages",
78
+ tables_to_latex: "pdf_tables_to_latex",
79
+ }
80
+
81
+ const operationArgsFromRequest = (request: PdfOperationRequest): JsonObject => {
82
+ const args: JsonObject = {
83
+ pages: request.pages as unknown as JsonObject["pages"],
84
+ }
85
+ if (request.fileId) args.fileId = request.fileId
86
+ if (request.url) args.url = request.url
87
+ if (request.base64) args.base64 = request.base64
88
+ if (request.filename) args.filename = request.filename
89
+ if (typeof request.renderScale === "number") args.renderScale = request.renderScale
90
+ if (request.returnMode) args.returnMode = request.returnMode
91
+ if (request.provider) args.provider = request.provider
92
+ if (request.model) args.model = request.model
93
+ if (request.prompt) args.prompt = request.prompt
94
+ return args
95
+ }
96
+
97
+ export default {
98
+ async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise<Response> {
99
+ const url = new URL(request.url)
100
+ const config = loadEchoPdfConfig(env)
101
+ const runtimeStore = getRuntimeFileStore(env, config)
102
+ const fileStore = runtimeStore.store
103
+
104
+ if (request.method === "GET" && url.pathname === "/health") {
105
+ return json({ ok: true, service: config.service.name, now: new Date().toISOString() })
106
+ }
107
+
108
+ if (request.method === "GET" && url.pathname === "/config") {
109
+ return json({
110
+ service: config.service,
111
+ agent: config.agent,
112
+ providers: Object.entries(config.providers).map(([alias, provider]) => ({ alias, type: provider.type })),
113
+ capabilities: {
114
+ toolCatalogEndpoint: "/tools/catalog",
115
+ toolCallEndpoint: "/tools/call",
116
+ fileOpsEndpoint: "/api/files/op",
117
+ fileUploadEndpoint: "/api/files/upload",
118
+ fileStatsEndpoint: "/api/files/stats",
119
+ fileCleanupEndpoint: "/api/files/cleanup",
120
+ supportedReturnModes: ["inline", "file_id"],
121
+ },
122
+ mcp: {
123
+ serverName: config.mcp.serverName,
124
+ version: config.mcp.version,
125
+ authHeader: config.mcp.authHeader ?? null,
126
+ },
127
+ })
128
+ }
129
+
130
+ if (request.method === "GET" && url.pathname === "/tools/catalog") {
131
+ return json({ tools: listToolSchemas() })
132
+ }
133
+
134
+ if (request.method === "POST" && url.pathname === "/tools/call") {
135
+ const body = await readJson(request)
136
+ const name = typeof body.name === "string" ? body.name : ""
137
+ if (!name) return json({ error: "Missing required field: name" }, 400)
138
+ try {
139
+ const args = asObj(body.arguments)
140
+ const preferredProvider = resolveProviderAlias(
141
+ config,
142
+ typeof body.provider === "string" ? body.provider : undefined
143
+ )
144
+ const preferredModel = resolveModelForProvider(
145
+ config,
146
+ preferredProvider,
147
+ typeof body.model === "string" ? body.model : undefined
148
+ )
149
+ if (name.startsWith("pdf_")) {
150
+ if (typeof args.provider !== "string" || args.provider.length === 0) {
151
+ args.provider = preferredProvider
152
+ }
153
+ if (typeof args.model !== "string" || args.model.length === 0) {
154
+ args.model = preferredModel
155
+ }
156
+ }
157
+
158
+ const result = await callTool(name, args, {
159
+ config,
160
+ env,
161
+ fileStore,
162
+ providerApiKeys: typeof body.providerApiKeys === "object" && body.providerApiKeys !== null
163
+ ? (body.providerApiKeys as Record<string, string>)
164
+ : undefined,
165
+ })
166
+ return json({ name, output: result })
167
+ } catch (error) {
168
+ return json({ error: toError(error) }, 500)
169
+ }
170
+ }
171
+
172
+ if (request.method === "POST" && url.pathname === "/providers/models") {
173
+ const body = await readJson(request)
174
+ const provider = resolveProviderAlias(config, typeof body.provider === "string" ? body.provider : undefined)
175
+ const runtimeKeys = typeof body.providerApiKeys === "object" && body.providerApiKeys !== null
176
+ ? (body.providerApiKeys as Record<string, string>)
177
+ : undefined
178
+ try {
179
+ const models = await listProviderModels(config, env, provider, runtimeKeys)
180
+ return json({ provider, models })
181
+ } catch (error) {
182
+ return json({ error: toError(error) }, 500)
183
+ }
184
+ }
185
+
186
+ if (request.method === "POST" && url.pathname === "/api/agent/run") {
187
+ const body = await readJson(request)
188
+ if (Object.hasOwn(body, "operation") && !isValidOperation(body.operation)) {
189
+ return json({ error: "Invalid operation. Must be one of: extract_pages, ocr_pages, tables_to_latex" }, 400)
190
+ }
191
+ const requestPayload = toPdfOperation(body, config.agent.defaultProvider)
192
+ try {
193
+ const result = await callTool(toolNameByOperation[requestPayload.operation], operationArgsFromRequest(requestPayload), {
194
+ config,
195
+ env,
196
+ fileStore,
197
+ providerApiKeys: requestPayload.providerApiKeys,
198
+ })
199
+ return json(result)
200
+ } catch (error) {
201
+ return json({ error: toError(error) }, 500)
202
+ }
203
+ }
204
+
205
+ if (request.method === "POST" && url.pathname === "/api/agent/stream") {
206
+ const body = await readJson(request)
207
+ if (Object.hasOwn(body, "operation") && !isValidOperation(body.operation)) {
208
+ return json({ error: "Invalid operation. Must be one of: extract_pages, ocr_pages, tables_to_latex" }, 400)
209
+ }
210
+ const requestPayload = toPdfOperation(body, config.agent.defaultProvider)
211
+ const stream = new TransformStream<Uint8Array, Uint8Array>()
212
+ const writer = stream.writable.getWriter()
213
+ let queue: Promise<void> = Promise.resolve()
214
+ const send = (event: string, data: unknown): void => {
215
+ queue = queue.then(() => writer.write(encodeSse(event, data))).catch(() => undefined)
216
+ }
217
+
218
+ const run = async (): Promise<void> => {
219
+ try {
220
+ send("meta", { kind: "meta", startedAt: new Date().toISOString(), streaming: true })
221
+ send("io", { kind: "io", direction: "input", content: requestPayload })
222
+
223
+ const result = await callTool(toolNameByOperation[requestPayload.operation], operationArgsFromRequest(requestPayload), {
224
+ config,
225
+ env,
226
+ fileStore,
227
+ providerApiKeys: requestPayload.providerApiKeys,
228
+ trace: (event: AgentTraceEvent) => send("step", event),
229
+ })
230
+
231
+ send("io", { kind: "io", direction: "output", content: "operation completed" })
232
+ send("result", { kind: "result", output: result })
233
+ send("done", { ok: true })
234
+ } catch (error) {
235
+ send("error", { kind: "error", message: toError(error) })
236
+ send("done", { ok: false })
237
+ } finally {
238
+ await queue
239
+ await writer.close()
240
+ }
241
+ }
242
+ ctx.waitUntil(run())
243
+ return sseResponse(stream.readable)
244
+ }
245
+
246
+ if (request.method === "POST" && url.pathname === "/api/files/op") {
247
+ const body = await readJson(request)
248
+ try {
249
+ const result = await callTool("file_ops", asObj(body), {
250
+ config,
251
+ env,
252
+ fileStore,
253
+ })
254
+ return json(result)
255
+ } catch (error) {
256
+ return json({ error: toError(error) }, 500)
257
+ }
258
+ }
259
+
260
+ if (request.method === "POST" && url.pathname === "/api/files/upload") {
261
+ try {
262
+ const formData = await request.formData()
263
+ const file = formData.get("file") as {
264
+ readonly name?: string
265
+ readonly type?: string
266
+ arrayBuffer?: () => Promise<ArrayBuffer>
267
+ } | null
268
+ if (!file || typeof file.arrayBuffer !== "function") {
269
+ return json({ error: "Missing file field: file" }, 400)
270
+ }
271
+ const bytes = new Uint8Array(await file.arrayBuffer())
272
+ const stored = await fileStore.put({
273
+ filename: file.name || `upload-${Date.now()}.pdf`,
274
+ mimeType: file.type || "application/pdf",
275
+ bytes,
276
+ })
277
+ return json({ file: stored }, 200)
278
+ } catch (error) {
279
+ return json({ error: toError(error) }, 500)
280
+ }
281
+ }
282
+
283
+ if (request.method === "GET" && url.pathname === "/api/files/stats") {
284
+ try {
285
+ return json(await runtimeStore.stats(), 200)
286
+ } catch (error) {
287
+ return json({ error: toError(error) }, 500)
288
+ }
289
+ }
290
+
291
+ if (request.method === "POST" && url.pathname === "/api/files/cleanup") {
292
+ try {
293
+ return json(await runtimeStore.cleanup(), 200)
294
+ } catch (error) {
295
+ return json({ error: toError(error) }, 500)
296
+ }
297
+ }
298
+
299
+ if (request.method === "POST" && url.pathname === "/mcp") {
300
+ return await handleMcpRequest(request, env, config, fileStore)
301
+ }
302
+
303
+ if (request.method === "GET" && env.ASSETS) {
304
+ const assetReq = url.pathname === "/"
305
+ ? new Request(new URL("/index.html", url), request)
306
+ : request
307
+ const asset = await env.ASSETS.fetch(assetReq)
308
+ if (asset.status !== 404) return asset
309
+ }
310
+
311
+ return json(
312
+ {
313
+ error: "Not found",
314
+ routes: {
315
+ health: "GET /health",
316
+ config: "GET /config",
317
+ toolsCatalog: "GET /tools/catalog",
318
+ toolCall: "POST /tools/call",
319
+ models: "POST /providers/models",
320
+ run: "POST /api/agent/run",
321
+ stream: "POST /api/agent/stream",
322
+ files: "POST /api/files/op",
323
+ fileUpload: "POST /api/files/upload",
324
+ fileStats: "GET /api/files/stats",
325
+ fileCleanup: "POST /api/files/cleanup",
326
+ mcp: "POST /mcp",
327
+ },
328
+ },
329
+ 404
330
+ )
331
+ },
332
+ }
333
+
334
+ export { FileStoreDO }
@@ -0,0 +1,109 @@
1
+ import type { Env, FileStore } from "./types"
2
+ import type { EchoPdfConfig } from "./pdf-types"
3
+ import { callTool, listToolSchemas } from "./tool-registry"
4
+
5
+ interface JsonRpcRequest {
6
+ readonly jsonrpc?: string
7
+ readonly id?: string | number | null
8
+ readonly method?: string
9
+ readonly params?: unknown
10
+ }
11
+
12
+ const ok = (id: JsonRpcRequest["id"], result: unknown): Response =>
13
+ new Response(
14
+ JSON.stringify({
15
+ jsonrpc: "2.0",
16
+ id: id ?? null,
17
+ result,
18
+ }),
19
+ { headers: { "Content-Type": "application/json" } }
20
+ )
21
+
22
+ const err = (id: JsonRpcRequest["id"], code: number, message: string): Response =>
23
+ new Response(
24
+ JSON.stringify({
25
+ jsonrpc: "2.0",
26
+ id: id ?? null,
27
+ error: { code, message },
28
+ }),
29
+ { status: 400, headers: { "Content-Type": "application/json" } }
30
+ )
31
+
32
+ const asObj = (v: unknown): Record<string, unknown> =>
33
+ typeof v === "object" && v !== null && !Array.isArray(v) ? (v as Record<string, unknown>) : {}
34
+
35
+ const maybeAuthorized = (request: Request, env: Env, config: EchoPdfConfig): boolean => {
36
+ if (!config.mcp.authHeader || !config.mcp.authEnv) return true
37
+ const required = env[config.mcp.authEnv]
38
+ if (typeof required !== "string" || required.length === 0) return true
39
+ return request.headers.get(config.mcp.authHeader) === required
40
+ }
41
+
42
+ export const handleMcpRequest = async (
43
+ request: Request,
44
+ env: Env,
45
+ config: EchoPdfConfig,
46
+ fileStore: FileStore
47
+ ): Promise<Response> => {
48
+ if (!maybeAuthorized(request, env, config)) {
49
+ return new Response("Unauthorized", { status: 401 })
50
+ }
51
+
52
+ let body: JsonRpcRequest
53
+ try {
54
+ body = (await request.json()) as JsonRpcRequest
55
+ } catch {
56
+ return err(null, -32700, "Parse error")
57
+ }
58
+ if (typeof body !== "object" || body === null) {
59
+ return err(null, -32600, "Invalid Request")
60
+ }
61
+ if (body.jsonrpc !== "2.0") {
62
+ return err(body.id ?? null, -32600, "Invalid Request: jsonrpc must be '2.0'")
63
+ }
64
+ const method = body.method ?? ""
65
+ const id = body.id ?? null
66
+ if (typeof method !== "string" || method.length === 0) {
67
+ return err(id, -32600, "Invalid Request: method is required")
68
+ }
69
+ const params = asObj(body.params)
70
+
71
+ if (method === "initialize") {
72
+ return ok(id, {
73
+ protocolVersion: "2024-11-05",
74
+ serverInfo: {
75
+ name: config.mcp.serverName,
76
+ version: config.mcp.version,
77
+ },
78
+ capabilities: {
79
+ tools: {},
80
+ },
81
+ })
82
+ }
83
+
84
+ if (method === "tools/list") {
85
+ return ok(id, { tools: listToolSchemas().map((tool) => ({
86
+ name: tool.name,
87
+ description: tool.description,
88
+ inputSchema: tool.inputSchema,
89
+ })) })
90
+ }
91
+
92
+ if (method !== "tools/call") {
93
+ return err(id, -32601, `Unsupported method: ${method}`)
94
+ }
95
+
96
+ const toolName = typeof params.name === "string" ? params.name : ""
97
+ const args = asObj(params.arguments)
98
+
99
+ try {
100
+ const result = await callTool(toolName, args, {
101
+ config,
102
+ env,
103
+ fileStore,
104
+ })
105
+ return ok(id, { content: [{ type: "text", text: JSON.stringify(result) }] })
106
+ } catch (error) {
107
+ return err(id, -32000, error instanceof Error ? error.message : String(error))
108
+ }
109
+ }