@echofiles/echo-pdf 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ required_node_major="${REQUIRED_NODE_MAJOR:-20}"
5
+ current_node_major="$(node -p "Number(process.versions.node.split('.')[0])")"
6
+
7
+ if [[ -z "${current_node_major}" ]] || (( current_node_major < required_node_major )); then
8
+ echo "Node.js >=${required_node_major} is required. Current: $(node -v 2>/dev/null || echo 'not installed')"
9
+ exit 1
10
+ fi
11
+
12
+ for cmd in npm curl grep sed; do
13
+ if ! command -v "${cmd}" >/dev/null 2>&1; then
14
+ echo "Missing required command: ${cmd}"
15
+ exit 1
16
+ fi
17
+ done
18
+
19
+ if [[ "${CHECK_LLM_KEYS:-0}" == "1" ]]; then
20
+ if [[ -z "${OPENAI_API_KEY:-}" && -z "${OPENROUTER_KEY:-}" && -z "${OPENROUTER_API_KEY:-}" && -z "${VERCEL_AI_GATEWAY_API_KEY:-}" && -z "${VERCEL_AI_GATEWAY_KEY:-}" ]]; then
21
+ echo "CHECK_LLM_KEYS=1 but no provider key found (OPENAI_API_KEY / OPENROUTER_KEY / OPENROUTER_API_KEY / VERCEL_AI_GATEWAY_API_KEY / VERCEL_AI_GATEWAY_KEY)."
22
+ exit 1
23
+ fi
24
+ fi
25
+
26
+ echo "runtime check passed: node=$(node -v), npm=$(npm -v)"
@@ -0,0 +1,204 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
5
+ OUT_DIR="${ROOT_DIR}/fixtures/output"
6
+ EXPORT_PORT="${EXPORT_PORT:-8798}"
7
+ BASE_URL="${BASE_URL:-http://127.0.0.1:${EXPORT_PORT}}"
8
+ INPUT_PDF="${INPUT_PDF:-${ROOT_DIR}/fixtures/input.pdf}"
9
+ START_LOCAL_DEV="${START_LOCAL_DEV:-1}"
10
+ RUN_TABLES="${RUN_TABLES:-1}"
11
+ REQUIRE_LLM_SUCCESS="${REQUIRE_LLM_SUCCESS:-1}"
12
+
13
+ mkdir -p "$OUT_DIR"
14
+ rm -rf "${OUT_DIR:?}/"*
15
+
16
+ if [[ -f "${ROOT_DIR}/../.env.local" ]]; then
17
+ set -a
18
+ # shellcheck source=/dev/null
19
+ source "${ROOT_DIR}/../.env.local"
20
+ set +a
21
+ fi
22
+
23
+ if [[ ! -f "${INPUT_PDF}" ]]; then
24
+ echo "missing input pdf: ${INPUT_PDF}" >&2
25
+ exit 1
26
+ fi
27
+
28
+ cli() {
29
+ node "${ROOT_DIR}/bin/echo-pdf.js" "$@"
30
+ }
31
+
32
+ run_json() {
33
+ local name="$1"
34
+ shift
35
+ if "$@" > "${OUT_DIR}/${name}.json" 2> "${OUT_DIR}/${name}.err"; then
36
+ rm -f "${OUT_DIR}/${name}.err"
37
+ else
38
+ printf '{"ok":false,"error_file":"%s.err"}\n' "$name" > "${OUT_DIR}/${name}.json"
39
+ fi
40
+ }
41
+
42
+ validate_ocr_json() {
43
+ local json_file="$1"
44
+ node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.output?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
45
+ }
46
+
47
+ validate_tables_json() {
48
+ local json_file="$1"
49
+ node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.output?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
50
+ }
51
+
52
+ # 1) Save test logs locally (do not block artifact export on transient network failure)
53
+ set +e
54
+ {
55
+ echo "[typecheck]"
56
+ npm --prefix "$ROOT_DIR" run typecheck
57
+ TYPECHECK_CODE=$?
58
+ echo
59
+ echo "[test]"
60
+ npm --prefix "$ROOT_DIR" run test
61
+ TEST_CODE=$?
62
+ echo
63
+ echo "[smoke]"
64
+ npm --prefix "$ROOT_DIR" run smoke
65
+ SMOKE_CODE=$?
66
+ echo
67
+ echo "typecheck_exit=${TYPECHECK_CODE}"
68
+ echo "test_exit=${TEST_CODE}"
69
+ echo "smoke_exit=${SMOKE_CODE}"
70
+ } > "${OUT_DIR}/test.log" 2>&1
71
+ set -e
72
+
73
+ cat > "${OUT_DIR}/test-status.json" <<JSON
74
+ {"typecheck":${TYPECHECK_CODE:-1},"test":${TEST_CODE:-1},"smoke":${SMOKE_CODE:-1}}
75
+ JSON
76
+
77
+ DEV_PID=""
78
+ cleanup() {
79
+ if [[ -n "${DEV_PID}" ]] && kill -0 "${DEV_PID}" >/dev/null 2>&1; then
80
+ kill "${DEV_PID}" >/dev/null 2>&1 || true
81
+ wait "${DEV_PID}" 2>/dev/null || true
82
+ fi
83
+ }
84
+ trap cleanup EXIT
85
+
86
+ if [[ "${START_LOCAL_DEV}" == "1" ]]; then
87
+ npm --prefix "$ROOT_DIR" run dev -- --ip 127.0.0.1 --port "${EXPORT_PORT}" --inspector-port 0 > "${OUT_DIR}/export-local-dev.log" 2>&1 &
88
+ DEV_PID=$!
89
+ for _ in $(seq 1 120); do
90
+ if node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null 2>&1; then
91
+ break
92
+ fi
93
+ sleep 0.5
94
+ done
95
+ node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null
96
+ fi
97
+
98
+ # 2) Init CLI + provider settings
99
+ cli init --service-url "$BASE_URL" > "${OUT_DIR}/cli-init.json"
100
+
101
+ node -e 'const fs=require("fs");const cfg=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const entries=Object.entries(cfg.providers||{});const pick=(key)=>{const keys=[key];if(key.endsWith("_API_KEY"))keys.push(key.replace(/_API_KEY$/,"_KEY"));if(key.endsWith("_KEY"))keys.push(key.replace(/_KEY$/,"_API_KEY"));for(const k of keys){const v=process.env[k];if(typeof v==="string"&&v.trim())return {k,v:v.trim()};}return null;};const forced=String(process.env.SMOKE_LLM_PROVIDER||"").trim();if(forced&&cfg.providers?.[forced]){const found=pick(String(cfg.providers[forced].apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:forced,apiKey:found.v,env:found.k,forced:true}));process.exit(0);}}const preferred=String(cfg.agent?.defaultProvider||"");const ordered=entries.sort((a,b)=>a[0]===preferred?-1:b[0]===preferred?1:0);for(const [alias,p] of ordered){const found=pick(String(p.apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:alias,apiKey:found.v,env:found.k,forced:false}));process.exit(0);}}process.stdout.write(JSON.stringify({provider:preferred||"",apiKey:"",env:"",forced:false}));' "${ROOT_DIR}/echo-pdf.config.json" > "${OUT_DIR}/provider-selection.json"
102
+ PROVIDER="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.provider||""))' "${OUT_DIR}/provider-selection.json")"
103
+ PROVIDER_KEY="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.apiKey||""))' "${OUT_DIR}/provider-selection.json")"
104
+ PREFERRED_MODEL="${SMOKE_LLM_MODEL:-${ECHO_PDF_DEFAULT_MODEL:-}}"
105
+ if [[ -n "${PROVIDER}" ]] && [[ -n "${PROVIDER_KEY}" ]]; then
106
+ cli provider set --provider "${PROVIDER}" --api-key "${PROVIDER_KEY}" > "${OUT_DIR}/provider-set.json"
107
+ cli provider use --provider "${PROVIDER}" > "${OUT_DIR}/provider-use.json"
108
+ else
109
+ echo '{"warning":"No provider key found in env, LLM calls may fail"}' > "${OUT_DIR}/provider-warning.json"
110
+ fi
111
+
112
+ # 3) Pull models via CLI and select one
113
+ if [[ -n "${PROVIDER}" ]]; then
114
+ run_json "models" cli models --provider "${PROVIDER}"
115
+ else
116
+ echo '{"warning":"No provider selected, skip model list"}' > "${OUT_DIR}/models.json"
117
+ fi
118
+ MODEL="${PREFERRED_MODEL}"
119
+ if [[ -n "$MODEL" ]] && [[ -n "${PROVIDER}" ]]; then
120
+ if ! node -e 'const fs=require("fs");const file=process.argv[1];const model=process.argv[2];const j=JSON.parse(fs.readFileSync(file,"utf8"));const models=Array.isArray(j.models)?j.models:[];process.exit(models.includes(model)?0:1)' "${OUT_DIR}/models.json" "$MODEL"; then
121
+ echo "Configured model not found in provider model list: ${MODEL}" >&2
122
+ exit 1
123
+ fi
124
+ cli model set --provider "${PROVIDER}" --model "$MODEL" > "${OUT_DIR}/model-set.json"
125
+ else
126
+ echo '{"warning":"Missing ECHO_PDF_DEFAULT_MODEL / SMOKE_LLM_MODEL"}' > "${OUT_DIR}/model-warning.json"
127
+ exit 1
128
+ fi
129
+
130
+ # 4) Upload the exact local fixture for subsequent CLI/MCP calls
131
+ node -e 'const fs=require("fs"); const path=require("path"); (async()=>{ const base=process.argv[1]; const file=process.argv[2]; const bytes=fs.readFileSync(file); const fd=new FormData(); fd.set("file", new Blob([bytes], {type:"application/pdf"}), path.basename(file)); const res=await fetch(`${base}/api/files/upload`, {method:"POST", body:fd}); const txt=await res.text(); fs.writeFileSync(process.argv[3], txt); if(!res.ok){process.stderr.write(txt); process.exit(1);} })().catch((e)=>{console.error(String(e)); process.exit(1)})' "$BASE_URL" "$INPUT_PDF" "${OUT_DIR}/upload.json"
132
+ FILE_ID="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(j.file?.id||"")' "${OUT_DIR}/upload.json")"
133
+ if [[ -z "${FILE_ID}" ]]; then
134
+ echo "upload did not return file id" >&2
135
+ exit 1
136
+ fi
137
+
138
+ # 5) CLI tool calls
139
+ run_json "tools-catalog" cli tools
140
+ if [[ -n "${PROVIDER}" ]]; then
141
+ run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}" --provider "${PROVIDER}" --model "${MODEL:-}"
142
+ else
143
+ run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
144
+ fi
145
+ node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.output?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
146
+
147
+ # 6) MCP tool calls
148
+ run_json "mcp-initialize" cli mcp initialize
149
+ run_json "mcp-tools" cli mcp tools
150
+ run_json "mcp-call-fileops" cli mcp call --tool file_ops --args '{"op":"list"}'
151
+ run_json "mcp-extract-pages" cli mcp call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
152
+
153
+ # 7) LLM tool calls
154
+ OCR_OK=0
155
+ TABLES_OK=0
156
+ if [[ -n "${PROVIDER}" ]]; then
157
+ : > "${OUT_DIR}/llm-attempts.log"
158
+ echo "[ocr] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
159
+ if cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-ocr-pages.json" 2> "${OUT_DIR}/cli-ocr-pages.err"; then
160
+ if validate_ocr_json "${OUT_DIR}/cli-ocr-pages.json"; then
161
+ OCR_OK=1
162
+ echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/ocr-selected-model.json"
163
+ fi
164
+ fi
165
+ else
166
+ run_json "cli-ocr-pages" cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
167
+ fi
168
+
169
+ if [[ "${RUN_TABLES}" == "1" ]]; then
170
+ if [[ -n "${PROVIDER}" ]]; then
171
+ echo "[tables] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
172
+ if cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-tables-to-latex.json" 2> "${OUT_DIR}/cli-tables-to-latex.err"; then
173
+ if validate_tables_json "${OUT_DIR}/cli-tables-to-latex.json"; then
174
+ TABLES_OK=1
175
+ echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/tables-selected-model.json"
176
+ fi
177
+ fi
178
+ else
179
+ run_json "cli-tables-to-latex" cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
180
+ fi
181
+ else
182
+ echo '{"skipped":true,"reason":"Set RUN_TABLES=1 to enable table-latex call"}' > "${OUT_DIR}/cli-tables-to-latex.json"
183
+ fi
184
+
185
+ if [[ "${REQUIRE_LLM_SUCCESS}" == "1" ]]; then
186
+ if [[ "${OCR_OK}" != "1" ]]; then
187
+ echo "OCR failed for configured model. See ${OUT_DIR}/cli-ocr-pages.err and llm-attempts.log" >&2
188
+ exit 1
189
+ fi
190
+ if [[ "${RUN_TABLES}" == "1" ]] && [[ "${TABLES_OK}" != "1" ]]; then
191
+ echo "Tables failed for configured model. See ${OUT_DIR}/cli-tables-to-latex.err and llm-attempts.log" >&2
192
+ exit 1
193
+ fi
194
+ fi
195
+
196
+ cat > "${OUT_DIR}/summary.txt" <<TXT
197
+ base_url=${BASE_URL}
198
+ input_pdf=${INPUT_PDF}
199
+ file_id=${FILE_ID}
200
+ model=${MODEL}
201
+ outputs_dir=${OUT_DIR}
202
+ TXT
203
+
204
+ ls -la "$OUT_DIR"
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
5
+ bash "${SCRIPT_DIR}/check-runtime.sh"
6
+
7
+ # Smoke now reuses integration tests as the single E2E source of truth.
8
+ # Supported env vars (forwarded to integration tests):
9
+ # - SMOKE_BASE_URL
10
+ # - SMOKE_REQUIRE_LLM
11
+ # - SMOKE_LLM_PROVIDER
12
+ # - SMOKE_LLM_MODEL
13
+ # - TESTCASE_DIR
14
+ npm run test:integration
@@ -0,0 +1,25 @@
1
+ import type { EchoPdfConfig } from "./pdf-types"
2
+
3
+ const normalize = (value: string): string => value.trim()
4
+
5
+ export const resolveProviderAlias = (
6
+ config: EchoPdfConfig,
7
+ requestedProvider?: string
8
+ ): string => {
9
+ const raw = normalize(requestedProvider ?? "")
10
+ if (raw.length === 0) return config.agent.defaultProvider
11
+ if (config.providers[raw]) return raw
12
+ const fromType = Object.entries(config.providers).find(([, provider]) => provider.type === raw)?.[0]
13
+ if (fromType) return fromType
14
+ throw new Error(`Provider "${raw}" not configured`)
15
+ }
16
+
17
+ export const resolveModelForProvider = (
18
+ config: EchoPdfConfig,
19
+ _providerAlias: string,
20
+ requestedModel?: string
21
+ ): string => {
22
+ const explicit = normalize(requestedModel ?? "")
23
+ if (explicit.length > 0) return explicit
24
+ return normalize(config.agent.defaultModel ?? "")
25
+ }
@@ -0,0 +1,52 @@
1
+ import { fromBase64, normalizeReturnMode, toInlineFilePayload } from "./file-utils"
2
+ import type { FileStore, ReturnMode } from "./types"
3
+
4
+ export const runFileOp = async (
5
+ fileStore: FileStore,
6
+ input: {
7
+ readonly op: "list" | "read" | "delete" | "put"
8
+ readonly fileId?: string
9
+ readonly includeBase64?: boolean
10
+ readonly text?: string
11
+ readonly filename?: string
12
+ readonly mimeType?: string
13
+ readonly base64?: string
14
+ readonly returnMode?: ReturnMode
15
+ }
16
+ ): Promise<unknown> => {
17
+ if (input.op === "list") {
18
+ return { files: await fileStore.list() }
19
+ }
20
+
21
+ if (input.op === "put") {
22
+ const bytes = input.base64 ? fromBase64(input.base64) : new TextEncoder().encode(input.text ?? "")
23
+ const meta = await fileStore.put({
24
+ filename: input.filename ?? `file-${Date.now()}.txt`,
25
+ mimeType: input.mimeType ?? "text/plain; charset=utf-8",
26
+ bytes,
27
+ })
28
+ const returnMode = normalizeReturnMode(input.returnMode)
29
+ if (returnMode === "url") {
30
+ throw new Error("returnMode=url is not implemented; use inline or file_id")
31
+ }
32
+ if (returnMode === "file_id") return { returnMode, file: meta }
33
+ const stored = await fileStore.get(meta.id)
34
+ if (!stored) throw new Error(`File not found after put: ${meta.id}`)
35
+ return {
36
+ returnMode,
37
+ ...toInlineFilePayload(stored, true),
38
+ }
39
+ }
40
+
41
+ if (!input.fileId) {
42
+ throw new Error("fileId is required")
43
+ }
44
+
45
+ if (input.op === "delete") {
46
+ return { deleted: await fileStore.delete(input.fileId), fileId: input.fileId }
47
+ }
48
+
49
+ const file = await fileStore.get(input.fileId)
50
+ if (!file) throw new Error(`File not found: ${input.fileId}`)
51
+ return toInlineFilePayload(file, Boolean(input.includeBase64))
52
+ }
@@ -0,0 +1,340 @@
1
+ import { fromBase64, toBase64 } from "./file-utils"
2
+ import type { StoragePolicy } from "./pdf-types"
3
+ import type { StoredFileMeta, StoredFileRecord } from "./types"
4
+
5
+ interface StoredValue {
6
+ readonly id: string
7
+ readonly filename: string
8
+ readonly mimeType: string
9
+ readonly sizeBytes: number
10
+ readonly createdAt: string
11
+ readonly bytesBase64: string
12
+ }
13
+
14
+ interface StoreStats {
15
+ readonly fileCount: number
16
+ readonly totalBytes: number
17
+ }
18
+
19
+ const json = (data: unknown, status = 200): Response =>
20
+ new Response(JSON.stringify(data), {
21
+ status,
22
+ headers: { "Content-Type": "application/json; charset=utf-8" },
23
+ })
24
+
25
+ const readJson = async (request: Request): Promise<Record<string, unknown>> => {
26
+ try {
27
+ const body = await request.json()
28
+ if (typeof body === "object" && body !== null && !Array.isArray(body)) {
29
+ return body as Record<string, unknown>
30
+ }
31
+ return {}
32
+ } catch {
33
+ return {}
34
+ }
35
+ }
36
+
37
+ const defaultPolicy = (): StoragePolicy => ({
38
+ maxFileBytes: 1_200_000,
39
+ maxTotalBytes: 52_428_800,
40
+ ttlHours: 24,
41
+ cleanupBatchSize: 50,
42
+ })
43
+
44
+ const parsePolicy = (input: unknown): StoragePolicy => {
45
+ const raw = typeof input === "object" && input !== null && !Array.isArray(input)
46
+ ? (input as Record<string, unknown>)
47
+ : {}
48
+ const fallback = defaultPolicy()
49
+
50
+ const maxFileBytes = Number(raw.maxFileBytes ?? fallback.maxFileBytes)
51
+ const maxTotalBytes = Number(raw.maxTotalBytes ?? fallback.maxTotalBytes)
52
+ const ttlHours = Number(raw.ttlHours ?? fallback.ttlHours)
53
+ const cleanupBatchSize = Number(raw.cleanupBatchSize ?? fallback.cleanupBatchSize)
54
+
55
+ return {
56
+ maxFileBytes: Number.isFinite(maxFileBytes) && maxFileBytes > 0 ? Math.floor(maxFileBytes) : fallback.maxFileBytes,
57
+ maxTotalBytes: Number.isFinite(maxTotalBytes) && maxTotalBytes > 0 ? Math.floor(maxTotalBytes) : fallback.maxTotalBytes,
58
+ ttlHours: Number.isFinite(ttlHours) && ttlHours > 0 ? ttlHours : fallback.ttlHours,
59
+ cleanupBatchSize:
60
+ Number.isFinite(cleanupBatchSize) && cleanupBatchSize > 0 ? Math.floor(cleanupBatchSize) : fallback.cleanupBatchSize,
61
+ }
62
+ }
63
+
64
+ const toMeta = (value: StoredValue): StoredFileMeta => ({
65
+ id: value.id,
66
+ filename: value.filename,
67
+ mimeType: value.mimeType,
68
+ sizeBytes: value.sizeBytes,
69
+ createdAt: value.createdAt,
70
+ })
71
+
72
+ const listStoredValues = async (state: DurableObjectState): Promise<StoredValue[]> => {
73
+ const listed = await state.storage.list<StoredValue>({ prefix: "file:" })
74
+ return [...listed.values()]
75
+ }
76
+
77
+ const computeStats = (files: ReadonlyArray<StoredValue>): StoreStats => ({
78
+ fileCount: files.length,
79
+ totalBytes: files.reduce((sum, file) => sum + file.sizeBytes, 0),
80
+ })
81
+
82
+ const isExpired = (createdAt: string, ttlHours: number): boolean => {
83
+ const createdMs = Date.parse(createdAt)
84
+ if (!Number.isFinite(createdMs)) return false
85
+ return Date.now() - createdMs > ttlHours * 60 * 60 * 1000
86
+ }
87
+
88
+ const deleteFiles = async (state: DurableObjectState, files: ReadonlyArray<StoredValue>): Promise<number> => {
89
+ let deleted = 0
90
+ for (const file of files) {
91
+ const ok = await state.storage.delete(`file:${file.id}`)
92
+ if (ok) deleted += 1
93
+ }
94
+ return deleted
95
+ }
96
+
97
+ export class FileStoreDO {
98
+ constructor(private readonly state: DurableObjectState) {}
99
+
100
+ async fetch(request: Request): Promise<Response> {
101
+ const url = new URL(request.url)
102
+
103
+ if (request.method === "POST" && url.pathname === "/put") {
104
+ const body = await readJson(request)
105
+ const policy = parsePolicy(body.policy)
106
+ const filename = typeof body.filename === "string" ? body.filename : `file-${Date.now()}`
107
+ const mimeType = typeof body.mimeType === "string" ? body.mimeType : "application/octet-stream"
108
+ const bytesBase64 = typeof body.bytesBase64 === "string" ? body.bytesBase64 : ""
109
+
110
+ const bytes = fromBase64(bytesBase64)
111
+ if (bytes.byteLength > policy.maxFileBytes) {
112
+ return json(
113
+ {
114
+ error: `file too large: ${bytes.byteLength} bytes exceeds maxFileBytes ${policy.maxFileBytes}`,
115
+ code: "FILE_TOO_LARGE",
116
+ policy,
117
+ },
118
+ 413
119
+ )
120
+ }
121
+
122
+ let files = await listStoredValues(this.state)
123
+ const expired = files.filter((file) => isExpired(file.createdAt, policy.ttlHours))
124
+ if (expired.length > 0) {
125
+ await deleteFiles(this.state, expired)
126
+ files = await listStoredValues(this.state)
127
+ }
128
+
129
+ let stats = computeStats(files)
130
+ const projected = stats.totalBytes + bytes.byteLength
131
+ if (projected > policy.maxTotalBytes) {
132
+ const needFree = projected - policy.maxTotalBytes
133
+ const candidates = [...files]
134
+ .sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
135
+ .slice(0, policy.cleanupBatchSize)
136
+
137
+ let freed = 0
138
+ const evictList: StoredValue[] = []
139
+ for (const file of candidates) {
140
+ evictList.push(file)
141
+ freed += file.sizeBytes
142
+ if (freed >= needFree) break
143
+ }
144
+ if (evictList.length > 0) {
145
+ await deleteFiles(this.state, evictList)
146
+ files = await listStoredValues(this.state)
147
+ stats = computeStats(files)
148
+ }
149
+ }
150
+
151
+ if (stats.totalBytes + bytes.byteLength > policy.maxTotalBytes) {
152
+ return json(
153
+ {
154
+ error: `storage quota exceeded: total ${stats.totalBytes} + incoming ${bytes.byteLength} > maxTotalBytes ${policy.maxTotalBytes}`,
155
+ code: "STORAGE_QUOTA_EXCEEDED",
156
+ policy,
157
+ stats,
158
+ },
159
+ 507
160
+ )
161
+ }
162
+
163
+ const id = crypto.randomUUID()
164
+ const value: StoredValue = {
165
+ id,
166
+ filename,
167
+ mimeType,
168
+ sizeBytes: bytes.byteLength,
169
+ createdAt: new Date().toISOString(),
170
+ bytesBase64,
171
+ }
172
+ await this.state.storage.put(`file:${id}`, value)
173
+ return json({ file: toMeta(value), policy })
174
+ }
175
+
176
+ if (request.method === "GET" && url.pathname === "/get") {
177
+ const fileId = url.searchParams.get("fileId")
178
+ if (!fileId) return json({ error: "Missing fileId" }, 400)
179
+ const value = await this.state.storage.get<StoredValue>(`file:${fileId}`)
180
+ if (!value) return json({ file: null })
181
+ return json({ file: value })
182
+ }
183
+
184
+ if (request.method === "GET" && url.pathname === "/list") {
185
+ const listed = await this.state.storage.list<StoredValue>({ prefix: "file:" })
186
+ const files = [...listed.values()].map(toMeta)
187
+ return json({ files })
188
+ }
189
+
190
+ if (request.method === "POST" && url.pathname === "/delete") {
191
+ const body = await readJson(request)
192
+ const fileId = typeof body.fileId === "string" ? body.fileId : ""
193
+ if (!fileId) return json({ error: "Missing fileId" }, 400)
194
+ const key = `file:${fileId}`
195
+ const existing = await this.state.storage.get(key)
196
+ if (!existing) return json({ deleted: false })
197
+ await this.state.storage.delete(key)
198
+ return json({ deleted: true })
199
+ }
200
+
201
+ if (request.method === "GET" && url.pathname === "/stats") {
202
+ let policyInput: unknown
203
+ const encoded = url.searchParams.get("policy")
204
+ if (encoded) {
205
+ try {
206
+ policyInput = JSON.parse(encoded)
207
+ } catch {
208
+ policyInput = undefined
209
+ }
210
+ }
211
+ const policy = parsePolicy(policyInput)
212
+ const files = await listStoredValues(this.state)
213
+ const stats = computeStats(files)
214
+ return json({ policy, stats })
215
+ }
216
+
217
+ if (request.method === "POST" && url.pathname === "/cleanup") {
218
+ const body = await readJson(request)
219
+ const policy = parsePolicy(body.policy)
220
+ const files = await listStoredValues(this.state)
221
+ const expired = files.filter((file) => isExpired(file.createdAt, policy.ttlHours))
222
+ const deletedExpired = await deleteFiles(this.state, expired)
223
+
224
+ const afterExpired = await listStoredValues(this.state)
225
+ let stats = computeStats(afterExpired)
226
+ let deletedEvicted = 0
227
+ if (stats.totalBytes > policy.maxTotalBytes) {
228
+ const sorted = [...afterExpired].sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
229
+ const evictList: StoredValue[] = []
230
+ for (const file of sorted) {
231
+ evictList.push(file)
232
+ const projected = stats.totalBytes - evictList.reduce((sum, item) => sum + item.sizeBytes, 0)
233
+ if (projected <= policy.maxTotalBytes) break
234
+ if (evictList.length >= policy.cleanupBatchSize) break
235
+ }
236
+ deletedEvicted = await deleteFiles(this.state, evictList)
237
+ stats = computeStats(await listStoredValues(this.state))
238
+ }
239
+
240
+ return json({
241
+ policy,
242
+ deletedExpired,
243
+ deletedEvicted,
244
+ stats,
245
+ })
246
+ }
247
+
248
+ return json({ error: "Not found" }, 404)
249
+ }
250
+ }
251
+
252
+ export class DurableObjectFileStore {
253
+ constructor(
254
+ private readonly namespace: DurableObjectNamespace,
255
+ private readonly policy: StoragePolicy
256
+ ) {}
257
+
258
+ private stub(): DurableObjectStub {
259
+ return this.namespace.get(this.namespace.idFromName("echo-pdf-file-store"))
260
+ }
261
+
262
+ async put(input: {
263
+ readonly filename: string
264
+ readonly mimeType: string
265
+ readonly bytes: Uint8Array
266
+ }): Promise<StoredFileMeta> {
267
+ const response = await this.stub().fetch("https://do/put", {
268
+ method: "POST",
269
+ headers: { "Content-Type": "application/json" },
270
+ body: JSON.stringify({
271
+ filename: input.filename,
272
+ mimeType: input.mimeType,
273
+ bytesBase64: toBase64(input.bytes),
274
+ policy: this.policy,
275
+ }),
276
+ })
277
+ const payload = (await response.json()) as { file?: StoredFileMeta; error?: string }
278
+ if (!response.ok || !payload.file) {
279
+ throw new Error(payload.error ?? "DO put failed")
280
+ }
281
+ return payload.file
282
+ }
283
+
284
+ async get(fileId: string): Promise<StoredFileRecord | null> {
285
+ const response = await this.stub().fetch(`https://do/get?fileId=${encodeURIComponent(fileId)}`)
286
+ const payload = (await response.json()) as { file?: StoredValue | null }
287
+ if (!response.ok) throw new Error("DO get failed")
288
+ if (!payload.file) return null
289
+ return {
290
+ id: payload.file.id,
291
+ filename: payload.file.filename,
292
+ mimeType: payload.file.mimeType,
293
+ sizeBytes: payload.file.sizeBytes,
294
+ createdAt: payload.file.createdAt,
295
+ bytes: fromBase64(payload.file.bytesBase64),
296
+ }
297
+ }
298
+
299
+ async list(): Promise<ReadonlyArray<StoredFileMeta>> {
300
+ const response = await this.stub().fetch("https://do/list")
301
+ const payload = (await response.json()) as { files?: StoredFileMeta[] }
302
+ if (!response.ok) throw new Error("DO list failed")
303
+ return payload.files ?? []
304
+ }
305
+
306
+ async delete(fileId: string): Promise<boolean> {
307
+ const response = await this.stub().fetch("https://do/delete", {
308
+ method: "POST",
309
+ headers: { "Content-Type": "application/json" },
310
+ body: JSON.stringify({ fileId }),
311
+ })
312
+ const payload = (await response.json()) as { deleted?: boolean }
313
+ if (!response.ok) throw new Error("DO delete failed")
314
+ return payload.deleted === true
315
+ }
316
+
317
+ async stats(): Promise<{ policy: StoragePolicy; stats: StoreStats }> {
318
+ const policyEncoded = encodeURIComponent(JSON.stringify(this.policy))
319
+ const response = await this.stub().fetch(`https://do/stats?policy=${policyEncoded}`)
320
+ const payload = (await response.json()) as { policy: StoragePolicy; stats: StoreStats }
321
+ if (!response.ok) throw new Error("DO stats failed")
322
+ return payload
323
+ }
324
+
325
+ async cleanup(): Promise<{ policy: StoragePolicy; deletedExpired: number; deletedEvicted: number; stats: StoreStats }> {
326
+ const response = await this.stub().fetch("https://do/cleanup", {
327
+ method: "POST",
328
+ headers: { "Content-Type": "application/json" },
329
+ body: JSON.stringify({ policy: this.policy }),
330
+ })
331
+ const payload = (await response.json()) as {
332
+ policy: StoragePolicy
333
+ deletedExpired: number
334
+ deletedEvicted: number
335
+ stats: StoreStats
336
+ }
337
+ if (!response.ok) throw new Error("DO cleanup failed")
338
+ return payload
339
+ }
340
+ }