@echofiles/echo-pdf 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +240 -0
- package/bin/echo-pdf.js +593 -0
- package/echo-pdf.config.json +58 -0
- package/package.json +44 -0
- package/scripts/check-runtime.sh +26 -0
- package/scripts/export-fixtures.sh +204 -0
- package/scripts/smoke.sh +14 -0
- package/src/agent-defaults.ts +25 -0
- package/src/file-ops.ts +52 -0
- package/src/file-store-do.ts +340 -0
- package/src/file-utils.ts +43 -0
- package/src/index.ts +334 -0
- package/src/mcp-server.ts +109 -0
- package/src/pdf-agent.ts +224 -0
- package/src/pdf-config.ts +105 -0
- package/src/pdf-storage.ts +94 -0
- package/src/pdf-types.ts +79 -0
- package/src/pdfium-engine.ts +207 -0
- package/src/provider-client.ts +176 -0
- package/src/provider-keys.ts +44 -0
- package/src/tool-registry.ts +203 -0
- package/src/types.ts +39 -0
- package/src/wasm.d.ts +4 -0
- package/wrangler.toml +15 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
required_node_major="${REQUIRED_NODE_MAJOR:-20}"
|
|
5
|
+
current_node_major="$(node -p "Number(process.versions.node.split('.')[0])")"
|
|
6
|
+
|
|
7
|
+
if [[ -z "${current_node_major}" ]] || (( current_node_major < required_node_major )); then
|
|
8
|
+
echo "Node.js >=${required_node_major} is required. Current: $(node -v 2>/dev/null || echo 'not installed')"
|
|
9
|
+
exit 1
|
|
10
|
+
fi
|
|
11
|
+
|
|
12
|
+
for cmd in npm curl grep sed; do
|
|
13
|
+
if ! command -v "${cmd}" >/dev/null 2>&1; then
|
|
14
|
+
echo "Missing required command: ${cmd}"
|
|
15
|
+
exit 1
|
|
16
|
+
fi
|
|
17
|
+
done
|
|
18
|
+
|
|
19
|
+
if [[ "${CHECK_LLM_KEYS:-0}" == "1" ]]; then
|
|
20
|
+
if [[ -z "${OPENAI_API_KEY:-}" && -z "${OPENROUTER_KEY:-}" && -z "${OPENROUTER_API_KEY:-}" && -z "${VERCEL_AI_GATEWAY_API_KEY:-}" && -z "${VERCEL_AI_GATEWAY_KEY:-}" ]]; then
|
|
21
|
+
echo "CHECK_LLM_KEYS=1 but no provider key found (OPENAI_API_KEY / OPENROUTER_KEY / OPENROUTER_API_KEY / VERCEL_AI_GATEWAY_API_KEY / VERCEL_AI_GATEWAY_KEY)."
|
|
22
|
+
exit 1
|
|
23
|
+
fi
|
|
24
|
+
fi
|
|
25
|
+
|
|
26
|
+
echo "runtime check passed: node=$(node -v), npm=$(npm -v)"
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
5
|
+
OUT_DIR="${ROOT_DIR}/fixtures/output"
|
|
6
|
+
EXPORT_PORT="${EXPORT_PORT:-8798}"
|
|
7
|
+
BASE_URL="${BASE_URL:-http://127.0.0.1:${EXPORT_PORT}}"
|
|
8
|
+
INPUT_PDF="${INPUT_PDF:-${ROOT_DIR}/fixtures/input.pdf}"
|
|
9
|
+
START_LOCAL_DEV="${START_LOCAL_DEV:-1}"
|
|
10
|
+
RUN_TABLES="${RUN_TABLES:-1}"
|
|
11
|
+
REQUIRE_LLM_SUCCESS="${REQUIRE_LLM_SUCCESS:-1}"
|
|
12
|
+
|
|
13
|
+
mkdir -p "$OUT_DIR"
|
|
14
|
+
rm -rf "${OUT_DIR:?}/"*
|
|
15
|
+
|
|
16
|
+
if [[ -f "${ROOT_DIR}/../.env.local" ]]; then
|
|
17
|
+
set -a
|
|
18
|
+
# shellcheck source=/dev/null
|
|
19
|
+
source "${ROOT_DIR}/../.env.local"
|
|
20
|
+
set +a
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
if [[ ! -f "${INPUT_PDF}" ]]; then
|
|
24
|
+
echo "missing input pdf: ${INPUT_PDF}" >&2
|
|
25
|
+
exit 1
|
|
26
|
+
fi
|
|
27
|
+
|
|
28
|
+
cli() {
|
|
29
|
+
node "${ROOT_DIR}/bin/echo-pdf.js" "$@"
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
run_json() {
|
|
33
|
+
local name="$1"
|
|
34
|
+
shift
|
|
35
|
+
if "$@" > "${OUT_DIR}/${name}.json" 2> "${OUT_DIR}/${name}.err"; then
|
|
36
|
+
rm -f "${OUT_DIR}/${name}.err"
|
|
37
|
+
else
|
|
38
|
+
printf '{"ok":false,"error_file":"%s.err"}\n' "$name" > "${OUT_DIR}/${name}.json"
|
|
39
|
+
fi
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
validate_ocr_json() {
|
|
43
|
+
local json_file="$1"
|
|
44
|
+
node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.output?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
validate_tables_json() {
|
|
48
|
+
local json_file="$1"
|
|
49
|
+
node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.output?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
# 1) Save test logs locally (do not block artifact export on transient network failure)
|
|
53
|
+
set +e
|
|
54
|
+
{
|
|
55
|
+
echo "[typecheck]"
|
|
56
|
+
npm --prefix "$ROOT_DIR" run typecheck
|
|
57
|
+
TYPECHECK_CODE=$?
|
|
58
|
+
echo
|
|
59
|
+
echo "[test]"
|
|
60
|
+
npm --prefix "$ROOT_DIR" run test
|
|
61
|
+
TEST_CODE=$?
|
|
62
|
+
echo
|
|
63
|
+
echo "[smoke]"
|
|
64
|
+
npm --prefix "$ROOT_DIR" run smoke
|
|
65
|
+
SMOKE_CODE=$?
|
|
66
|
+
echo
|
|
67
|
+
echo "typecheck_exit=${TYPECHECK_CODE}"
|
|
68
|
+
echo "test_exit=${TEST_CODE}"
|
|
69
|
+
echo "smoke_exit=${SMOKE_CODE}"
|
|
70
|
+
} > "${OUT_DIR}/test.log" 2>&1
|
|
71
|
+
set -e
|
|
72
|
+
|
|
73
|
+
cat > "${OUT_DIR}/test-status.json" <<JSON
|
|
74
|
+
{"typecheck":${TYPECHECK_CODE:-1},"test":${TEST_CODE:-1},"smoke":${SMOKE_CODE:-1}}
|
|
75
|
+
JSON
|
|
76
|
+
|
|
77
|
+
DEV_PID=""
|
|
78
|
+
cleanup() {
|
|
79
|
+
if [[ -n "${DEV_PID}" ]] && kill -0 "${DEV_PID}" >/dev/null 2>&1; then
|
|
80
|
+
kill "${DEV_PID}" >/dev/null 2>&1 || true
|
|
81
|
+
wait "${DEV_PID}" 2>/dev/null || true
|
|
82
|
+
fi
|
|
83
|
+
}
|
|
84
|
+
trap cleanup EXIT
|
|
85
|
+
|
|
86
|
+
if [[ "${START_LOCAL_DEV}" == "1" ]]; then
|
|
87
|
+
npm --prefix "$ROOT_DIR" run dev -- --ip 127.0.0.1 --port "${EXPORT_PORT}" --inspector-port 0 > "${OUT_DIR}/export-local-dev.log" 2>&1 &
|
|
88
|
+
DEV_PID=$!
|
|
89
|
+
for _ in $(seq 1 120); do
|
|
90
|
+
if node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null 2>&1; then
|
|
91
|
+
break
|
|
92
|
+
fi
|
|
93
|
+
sleep 0.5
|
|
94
|
+
done
|
|
95
|
+
node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null
|
|
96
|
+
fi
|
|
97
|
+
|
|
98
|
+
# 2) Init CLI + provider settings
|
|
99
|
+
cli init --service-url "$BASE_URL" > "${OUT_DIR}/cli-init.json"
|
|
100
|
+
|
|
101
|
+
node -e 'const fs=require("fs");const cfg=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const entries=Object.entries(cfg.providers||{});const pick=(key)=>{const keys=[key];if(key.endsWith("_API_KEY"))keys.push(key.replace(/_API_KEY$/,"_KEY"));if(key.endsWith("_KEY"))keys.push(key.replace(/_KEY$/,"_API_KEY"));for(const k of keys){const v=process.env[k];if(typeof v==="string"&&v.trim())return {k,v:v.trim()};}return null;};const forced=String(process.env.SMOKE_LLM_PROVIDER||"").trim();if(forced&&cfg.providers?.[forced]){const found=pick(String(cfg.providers[forced].apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:forced,apiKey:found.v,env:found.k,forced:true}));process.exit(0);}}const preferred=String(cfg.agent?.defaultProvider||"");const ordered=entries.sort((a,b)=>a[0]===preferred?-1:b[0]===preferred?1:0);for(const [alias,p] of ordered){const found=pick(String(p.apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:alias,apiKey:found.v,env:found.k,forced:false}));process.exit(0);}}process.stdout.write(JSON.stringify({provider:preferred||"",apiKey:"",env:"",forced:false}));' "${ROOT_DIR}/echo-pdf.config.json" > "${OUT_DIR}/provider-selection.json"
|
|
102
|
+
PROVIDER="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.provider||""))' "${OUT_DIR}/provider-selection.json")"
|
|
103
|
+
PROVIDER_KEY="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.apiKey||""))' "${OUT_DIR}/provider-selection.json")"
|
|
104
|
+
PREFERRED_MODEL="${SMOKE_LLM_MODEL:-${ECHO_PDF_DEFAULT_MODEL:-}}"
|
|
105
|
+
if [[ -n "${PROVIDER}" ]] && [[ -n "${PROVIDER_KEY}" ]]; then
|
|
106
|
+
cli provider set --provider "${PROVIDER}" --api-key "${PROVIDER_KEY}" > "${OUT_DIR}/provider-set.json"
|
|
107
|
+
cli provider use --provider "${PROVIDER}" > "${OUT_DIR}/provider-use.json"
|
|
108
|
+
else
|
|
109
|
+
echo '{"warning":"No provider key found in env, LLM calls may fail"}' > "${OUT_DIR}/provider-warning.json"
|
|
110
|
+
fi
|
|
111
|
+
|
|
112
|
+
# 3) Pull models via CLI and select one
|
|
113
|
+
if [[ -n "${PROVIDER}" ]]; then
|
|
114
|
+
run_json "models" cli models --provider "${PROVIDER}"
|
|
115
|
+
else
|
|
116
|
+
echo '{"warning":"No provider selected, skip model list"}' > "${OUT_DIR}/models.json"
|
|
117
|
+
fi
|
|
118
|
+
MODEL="${PREFERRED_MODEL}"
|
|
119
|
+
if [[ -n "$MODEL" ]] && [[ -n "${PROVIDER}" ]]; then
|
|
120
|
+
if ! node -e 'const fs=require("fs");const file=process.argv[1];const model=process.argv[2];const j=JSON.parse(fs.readFileSync(file,"utf8"));const models=Array.isArray(j.models)?j.models:[];process.exit(models.includes(model)?0:1)' "${OUT_DIR}/models.json" "$MODEL"; then
|
|
121
|
+
echo "Configured model not found in provider model list: ${MODEL}" >&2
|
|
122
|
+
exit 1
|
|
123
|
+
fi
|
|
124
|
+
cli model set --provider "${PROVIDER}" --model "$MODEL" > "${OUT_DIR}/model-set.json"
|
|
125
|
+
else
|
|
126
|
+
echo '{"warning":"Missing ECHO_PDF_DEFAULT_MODEL / SMOKE_LLM_MODEL"}' > "${OUT_DIR}/model-warning.json"
|
|
127
|
+
exit 1
|
|
128
|
+
fi
|
|
129
|
+
|
|
130
|
+
# 4) Upload the exact local fixture for subsequent CLI/MCP calls
|
|
131
|
+
node -e 'const fs=require("fs"); const path=require("path"); (async()=>{ const base=process.argv[1]; const file=process.argv[2]; const bytes=fs.readFileSync(file); const fd=new FormData(); fd.set("file", new Blob([bytes], {type:"application/pdf"}), path.basename(file)); const res=await fetch(`${base}/api/files/upload`, {method:"POST", body:fd}); const txt=await res.text(); fs.writeFileSync(process.argv[3], txt); if(!res.ok){process.stderr.write(txt); process.exit(1);} })().catch((e)=>{console.error(String(e)); process.exit(1)})' "$BASE_URL" "$INPUT_PDF" "${OUT_DIR}/upload.json"
|
|
132
|
+
FILE_ID="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(j.file?.id||"")' "${OUT_DIR}/upload.json")"
|
|
133
|
+
if [[ -z "${FILE_ID}" ]]; then
|
|
134
|
+
echo "upload did not return file id" >&2
|
|
135
|
+
exit 1
|
|
136
|
+
fi
|
|
137
|
+
|
|
138
|
+
# 5) CLI tool calls
|
|
139
|
+
run_json "tools-catalog" cli tools
|
|
140
|
+
if [[ -n "${PROVIDER}" ]]; then
|
|
141
|
+
run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}" --provider "${PROVIDER}" --model "${MODEL:-}"
|
|
142
|
+
else
|
|
143
|
+
run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
|
|
144
|
+
fi
|
|
145
|
+
node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.output?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
|
|
146
|
+
|
|
147
|
+
# 6) MCP tool calls
|
|
148
|
+
run_json "mcp-initialize" cli mcp initialize
|
|
149
|
+
run_json "mcp-tools" cli mcp tools
|
|
150
|
+
run_json "mcp-call-fileops" cli mcp call --tool file_ops --args '{"op":"list"}'
|
|
151
|
+
run_json "mcp-extract-pages" cli mcp call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
|
|
152
|
+
|
|
153
|
+
# 7) LLM tool calls
|
|
154
|
+
OCR_OK=0
|
|
155
|
+
TABLES_OK=0
|
|
156
|
+
if [[ -n "${PROVIDER}" ]]; then
|
|
157
|
+
: > "${OUT_DIR}/llm-attempts.log"
|
|
158
|
+
echo "[ocr] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
|
|
159
|
+
if cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-ocr-pages.json" 2> "${OUT_DIR}/cli-ocr-pages.err"; then
|
|
160
|
+
if validate_ocr_json "${OUT_DIR}/cli-ocr-pages.json"; then
|
|
161
|
+
OCR_OK=1
|
|
162
|
+
echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/ocr-selected-model.json"
|
|
163
|
+
fi
|
|
164
|
+
fi
|
|
165
|
+
else
|
|
166
|
+
run_json "cli-ocr-pages" cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
if [[ "${RUN_TABLES}" == "1" ]]; then
|
|
170
|
+
if [[ -n "${PROVIDER}" ]]; then
|
|
171
|
+
echo "[tables] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
|
|
172
|
+
if cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-tables-to-latex.json" 2> "${OUT_DIR}/cli-tables-to-latex.err"; then
|
|
173
|
+
if validate_tables_json "${OUT_DIR}/cli-tables-to-latex.json"; then
|
|
174
|
+
TABLES_OK=1
|
|
175
|
+
echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/tables-selected-model.json"
|
|
176
|
+
fi
|
|
177
|
+
fi
|
|
178
|
+
else
|
|
179
|
+
run_json "cli-tables-to-latex" cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
|
|
180
|
+
fi
|
|
181
|
+
else
|
|
182
|
+
echo '{"skipped":true,"reason":"Set RUN_TABLES=1 to enable table-latex call"}' > "${OUT_DIR}/cli-tables-to-latex.json"
|
|
183
|
+
fi
|
|
184
|
+
|
|
185
|
+
if [[ "${REQUIRE_LLM_SUCCESS}" == "1" ]]; then
|
|
186
|
+
if [[ "${OCR_OK}" != "1" ]]; then
|
|
187
|
+
echo "OCR failed for configured model. See ${OUT_DIR}/cli-ocr-pages.err and llm-attempts.log" >&2
|
|
188
|
+
exit 1
|
|
189
|
+
fi
|
|
190
|
+
if [[ "${RUN_TABLES}" == "1" ]] && [[ "${TABLES_OK}" != "1" ]]; then
|
|
191
|
+
echo "Tables failed for configured model. See ${OUT_DIR}/cli-tables-to-latex.err and llm-attempts.log" >&2
|
|
192
|
+
exit 1
|
|
193
|
+
fi
|
|
194
|
+
fi
|
|
195
|
+
|
|
196
|
+
cat > "${OUT_DIR}/summary.txt" <<TXT
|
|
197
|
+
base_url=${BASE_URL}
|
|
198
|
+
input_pdf=${INPUT_PDF}
|
|
199
|
+
file_id=${FILE_ID}
|
|
200
|
+
model=${MODEL}
|
|
201
|
+
outputs_dir=${OUT_DIR}
|
|
202
|
+
TXT
|
|
203
|
+
|
|
204
|
+
ls -la "$OUT_DIR"
|
package/scripts/smoke.sh
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
5
|
+
bash "${SCRIPT_DIR}/check-runtime.sh"
|
|
6
|
+
|
|
7
|
+
# Smoke now reuses integration tests as the single E2E source of truth.
|
|
8
|
+
# Supported env vars (forwarded to integration tests):
|
|
9
|
+
# - SMOKE_BASE_URL
|
|
10
|
+
# - SMOKE_REQUIRE_LLM
|
|
11
|
+
# - SMOKE_LLM_PROVIDER
|
|
12
|
+
# - SMOKE_LLM_MODEL
|
|
13
|
+
# - TESTCASE_DIR
|
|
14
|
+
npm run test:integration
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { EchoPdfConfig } from "./pdf-types"
|
|
2
|
+
|
|
3
|
+
const normalize = (value: string): string => value.trim()
|
|
4
|
+
|
|
5
|
+
export const resolveProviderAlias = (
|
|
6
|
+
config: EchoPdfConfig,
|
|
7
|
+
requestedProvider?: string
|
|
8
|
+
): string => {
|
|
9
|
+
const raw = normalize(requestedProvider ?? "")
|
|
10
|
+
if (raw.length === 0) return config.agent.defaultProvider
|
|
11
|
+
if (config.providers[raw]) return raw
|
|
12
|
+
const fromType = Object.entries(config.providers).find(([, provider]) => provider.type === raw)?.[0]
|
|
13
|
+
if (fromType) return fromType
|
|
14
|
+
throw new Error(`Provider "${raw}" not configured`)
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export const resolveModelForProvider = (
|
|
18
|
+
config: EchoPdfConfig,
|
|
19
|
+
_providerAlias: string,
|
|
20
|
+
requestedModel?: string
|
|
21
|
+
): string => {
|
|
22
|
+
const explicit = normalize(requestedModel ?? "")
|
|
23
|
+
if (explicit.length > 0) return explicit
|
|
24
|
+
return normalize(config.agent.defaultModel ?? "")
|
|
25
|
+
}
|
package/src/file-ops.ts
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { fromBase64, normalizeReturnMode, toInlineFilePayload } from "./file-utils"
|
|
2
|
+
import type { FileStore, ReturnMode } from "./types"
|
|
3
|
+
|
|
4
|
+
export const runFileOp = async (
|
|
5
|
+
fileStore: FileStore,
|
|
6
|
+
input: {
|
|
7
|
+
readonly op: "list" | "read" | "delete" | "put"
|
|
8
|
+
readonly fileId?: string
|
|
9
|
+
readonly includeBase64?: boolean
|
|
10
|
+
readonly text?: string
|
|
11
|
+
readonly filename?: string
|
|
12
|
+
readonly mimeType?: string
|
|
13
|
+
readonly base64?: string
|
|
14
|
+
readonly returnMode?: ReturnMode
|
|
15
|
+
}
|
|
16
|
+
): Promise<unknown> => {
|
|
17
|
+
if (input.op === "list") {
|
|
18
|
+
return { files: await fileStore.list() }
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (input.op === "put") {
|
|
22
|
+
const bytes = input.base64 ? fromBase64(input.base64) : new TextEncoder().encode(input.text ?? "")
|
|
23
|
+
const meta = await fileStore.put({
|
|
24
|
+
filename: input.filename ?? `file-${Date.now()}.txt`,
|
|
25
|
+
mimeType: input.mimeType ?? "text/plain; charset=utf-8",
|
|
26
|
+
bytes,
|
|
27
|
+
})
|
|
28
|
+
const returnMode = normalizeReturnMode(input.returnMode)
|
|
29
|
+
if (returnMode === "url") {
|
|
30
|
+
throw new Error("returnMode=url is not implemented; use inline or file_id")
|
|
31
|
+
}
|
|
32
|
+
if (returnMode === "file_id") return { returnMode, file: meta }
|
|
33
|
+
const stored = await fileStore.get(meta.id)
|
|
34
|
+
if (!stored) throw new Error(`File not found after put: ${meta.id}`)
|
|
35
|
+
return {
|
|
36
|
+
returnMode,
|
|
37
|
+
...toInlineFilePayload(stored, true),
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (!input.fileId) {
|
|
42
|
+
throw new Error("fileId is required")
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (input.op === "delete") {
|
|
46
|
+
return { deleted: await fileStore.delete(input.fileId), fileId: input.fileId }
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const file = await fileStore.get(input.fileId)
|
|
50
|
+
if (!file) throw new Error(`File not found: ${input.fileId}`)
|
|
51
|
+
return toInlineFilePayload(file, Boolean(input.includeBase64))
|
|
52
|
+
}
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import { fromBase64, toBase64 } from "./file-utils"
|
|
2
|
+
import type { StoragePolicy } from "./pdf-types"
|
|
3
|
+
import type { StoredFileMeta, StoredFileRecord } from "./types"
|
|
4
|
+
|
|
5
|
+
interface StoredValue {
|
|
6
|
+
readonly id: string
|
|
7
|
+
readonly filename: string
|
|
8
|
+
readonly mimeType: string
|
|
9
|
+
readonly sizeBytes: number
|
|
10
|
+
readonly createdAt: string
|
|
11
|
+
readonly bytesBase64: string
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
interface StoreStats {
|
|
15
|
+
readonly fileCount: number
|
|
16
|
+
readonly totalBytes: number
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const json = (data: unknown, status = 200): Response =>
|
|
20
|
+
new Response(JSON.stringify(data), {
|
|
21
|
+
status,
|
|
22
|
+
headers: { "Content-Type": "application/json; charset=utf-8" },
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
const readJson = async (request: Request): Promise<Record<string, unknown>> => {
|
|
26
|
+
try {
|
|
27
|
+
const body = await request.json()
|
|
28
|
+
if (typeof body === "object" && body !== null && !Array.isArray(body)) {
|
|
29
|
+
return body as Record<string, unknown>
|
|
30
|
+
}
|
|
31
|
+
return {}
|
|
32
|
+
} catch {
|
|
33
|
+
return {}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const defaultPolicy = (): StoragePolicy => ({
|
|
38
|
+
maxFileBytes: 1_200_000,
|
|
39
|
+
maxTotalBytes: 52_428_800,
|
|
40
|
+
ttlHours: 24,
|
|
41
|
+
cleanupBatchSize: 50,
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
const parsePolicy = (input: unknown): StoragePolicy => {
|
|
45
|
+
const raw = typeof input === "object" && input !== null && !Array.isArray(input)
|
|
46
|
+
? (input as Record<string, unknown>)
|
|
47
|
+
: {}
|
|
48
|
+
const fallback = defaultPolicy()
|
|
49
|
+
|
|
50
|
+
const maxFileBytes = Number(raw.maxFileBytes ?? fallback.maxFileBytes)
|
|
51
|
+
const maxTotalBytes = Number(raw.maxTotalBytes ?? fallback.maxTotalBytes)
|
|
52
|
+
const ttlHours = Number(raw.ttlHours ?? fallback.ttlHours)
|
|
53
|
+
const cleanupBatchSize = Number(raw.cleanupBatchSize ?? fallback.cleanupBatchSize)
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
maxFileBytes: Number.isFinite(maxFileBytes) && maxFileBytes > 0 ? Math.floor(maxFileBytes) : fallback.maxFileBytes,
|
|
57
|
+
maxTotalBytes: Number.isFinite(maxTotalBytes) && maxTotalBytes > 0 ? Math.floor(maxTotalBytes) : fallback.maxTotalBytes,
|
|
58
|
+
ttlHours: Number.isFinite(ttlHours) && ttlHours > 0 ? ttlHours : fallback.ttlHours,
|
|
59
|
+
cleanupBatchSize:
|
|
60
|
+
Number.isFinite(cleanupBatchSize) && cleanupBatchSize > 0 ? Math.floor(cleanupBatchSize) : fallback.cleanupBatchSize,
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const toMeta = (value: StoredValue): StoredFileMeta => ({
|
|
65
|
+
id: value.id,
|
|
66
|
+
filename: value.filename,
|
|
67
|
+
mimeType: value.mimeType,
|
|
68
|
+
sizeBytes: value.sizeBytes,
|
|
69
|
+
createdAt: value.createdAt,
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
const listStoredValues = async (state: DurableObjectState): Promise<StoredValue[]> => {
|
|
73
|
+
const listed = await state.storage.list<StoredValue>({ prefix: "file:" })
|
|
74
|
+
return [...listed.values()]
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const computeStats = (files: ReadonlyArray<StoredValue>): StoreStats => ({
|
|
78
|
+
fileCount: files.length,
|
|
79
|
+
totalBytes: files.reduce((sum, file) => sum + file.sizeBytes, 0),
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
const isExpired = (createdAt: string, ttlHours: number): boolean => {
|
|
83
|
+
const createdMs = Date.parse(createdAt)
|
|
84
|
+
if (!Number.isFinite(createdMs)) return false
|
|
85
|
+
return Date.now() - createdMs > ttlHours * 60 * 60 * 1000
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const deleteFiles = async (state: DurableObjectState, files: ReadonlyArray<StoredValue>): Promise<number> => {
|
|
89
|
+
let deleted = 0
|
|
90
|
+
for (const file of files) {
|
|
91
|
+
const ok = await state.storage.delete(`file:${file.id}`)
|
|
92
|
+
if (ok) deleted += 1
|
|
93
|
+
}
|
|
94
|
+
return deleted
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export class FileStoreDO {
|
|
98
|
+
constructor(private readonly state: DurableObjectState) {}
|
|
99
|
+
|
|
100
|
+
async fetch(request: Request): Promise<Response> {
|
|
101
|
+
const url = new URL(request.url)
|
|
102
|
+
|
|
103
|
+
if (request.method === "POST" && url.pathname === "/put") {
|
|
104
|
+
const body = await readJson(request)
|
|
105
|
+
const policy = parsePolicy(body.policy)
|
|
106
|
+
const filename = typeof body.filename === "string" ? body.filename : `file-${Date.now()}`
|
|
107
|
+
const mimeType = typeof body.mimeType === "string" ? body.mimeType : "application/octet-stream"
|
|
108
|
+
const bytesBase64 = typeof body.bytesBase64 === "string" ? body.bytesBase64 : ""
|
|
109
|
+
|
|
110
|
+
const bytes = fromBase64(bytesBase64)
|
|
111
|
+
if (bytes.byteLength > policy.maxFileBytes) {
|
|
112
|
+
return json(
|
|
113
|
+
{
|
|
114
|
+
error: `file too large: ${bytes.byteLength} bytes exceeds maxFileBytes ${policy.maxFileBytes}`,
|
|
115
|
+
code: "FILE_TOO_LARGE",
|
|
116
|
+
policy,
|
|
117
|
+
},
|
|
118
|
+
413
|
|
119
|
+
)
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
let files = await listStoredValues(this.state)
|
|
123
|
+
const expired = files.filter((file) => isExpired(file.createdAt, policy.ttlHours))
|
|
124
|
+
if (expired.length > 0) {
|
|
125
|
+
await deleteFiles(this.state, expired)
|
|
126
|
+
files = await listStoredValues(this.state)
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let stats = computeStats(files)
|
|
130
|
+
const projected = stats.totalBytes + bytes.byteLength
|
|
131
|
+
if (projected > policy.maxTotalBytes) {
|
|
132
|
+
const needFree = projected - policy.maxTotalBytes
|
|
133
|
+
const candidates = [...files]
|
|
134
|
+
.sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
|
|
135
|
+
.slice(0, policy.cleanupBatchSize)
|
|
136
|
+
|
|
137
|
+
let freed = 0
|
|
138
|
+
const evictList: StoredValue[] = []
|
|
139
|
+
for (const file of candidates) {
|
|
140
|
+
evictList.push(file)
|
|
141
|
+
freed += file.sizeBytes
|
|
142
|
+
if (freed >= needFree) break
|
|
143
|
+
}
|
|
144
|
+
if (evictList.length > 0) {
|
|
145
|
+
await deleteFiles(this.state, evictList)
|
|
146
|
+
files = await listStoredValues(this.state)
|
|
147
|
+
stats = computeStats(files)
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (stats.totalBytes + bytes.byteLength > policy.maxTotalBytes) {
|
|
152
|
+
return json(
|
|
153
|
+
{
|
|
154
|
+
error: `storage quota exceeded: total ${stats.totalBytes} + incoming ${bytes.byteLength} > maxTotalBytes ${policy.maxTotalBytes}`,
|
|
155
|
+
code: "STORAGE_QUOTA_EXCEEDED",
|
|
156
|
+
policy,
|
|
157
|
+
stats,
|
|
158
|
+
},
|
|
159
|
+
507
|
|
160
|
+
)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const id = crypto.randomUUID()
|
|
164
|
+
const value: StoredValue = {
|
|
165
|
+
id,
|
|
166
|
+
filename,
|
|
167
|
+
mimeType,
|
|
168
|
+
sizeBytes: bytes.byteLength,
|
|
169
|
+
createdAt: new Date().toISOString(),
|
|
170
|
+
bytesBase64,
|
|
171
|
+
}
|
|
172
|
+
await this.state.storage.put(`file:${id}`, value)
|
|
173
|
+
return json({ file: toMeta(value), policy })
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (request.method === "GET" && url.pathname === "/get") {
|
|
177
|
+
const fileId = url.searchParams.get("fileId")
|
|
178
|
+
if (!fileId) return json({ error: "Missing fileId" }, 400)
|
|
179
|
+
const value = await this.state.storage.get<StoredValue>(`file:${fileId}`)
|
|
180
|
+
if (!value) return json({ file: null })
|
|
181
|
+
return json({ file: value })
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (request.method === "GET" && url.pathname === "/list") {
|
|
185
|
+
const listed = await this.state.storage.list<StoredValue>({ prefix: "file:" })
|
|
186
|
+
const files = [...listed.values()].map(toMeta)
|
|
187
|
+
return json({ files })
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (request.method === "POST" && url.pathname === "/delete") {
|
|
191
|
+
const body = await readJson(request)
|
|
192
|
+
const fileId = typeof body.fileId === "string" ? body.fileId : ""
|
|
193
|
+
if (!fileId) return json({ error: "Missing fileId" }, 400)
|
|
194
|
+
const key = `file:${fileId}`
|
|
195
|
+
const existing = await this.state.storage.get(key)
|
|
196
|
+
if (!existing) return json({ deleted: false })
|
|
197
|
+
await this.state.storage.delete(key)
|
|
198
|
+
return json({ deleted: true })
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (request.method === "GET" && url.pathname === "/stats") {
|
|
202
|
+
let policyInput: unknown
|
|
203
|
+
const encoded = url.searchParams.get("policy")
|
|
204
|
+
if (encoded) {
|
|
205
|
+
try {
|
|
206
|
+
policyInput = JSON.parse(encoded)
|
|
207
|
+
} catch {
|
|
208
|
+
policyInput = undefined
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
const policy = parsePolicy(policyInput)
|
|
212
|
+
const files = await listStoredValues(this.state)
|
|
213
|
+
const stats = computeStats(files)
|
|
214
|
+
return json({ policy, stats })
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (request.method === "POST" && url.pathname === "/cleanup") {
|
|
218
|
+
const body = await readJson(request)
|
|
219
|
+
const policy = parsePolicy(body.policy)
|
|
220
|
+
const files = await listStoredValues(this.state)
|
|
221
|
+
const expired = files.filter((file) => isExpired(file.createdAt, policy.ttlHours))
|
|
222
|
+
const deletedExpired = await deleteFiles(this.state, expired)
|
|
223
|
+
|
|
224
|
+
const afterExpired = await listStoredValues(this.state)
|
|
225
|
+
let stats = computeStats(afterExpired)
|
|
226
|
+
let deletedEvicted = 0
|
|
227
|
+
if (stats.totalBytes > policy.maxTotalBytes) {
|
|
228
|
+
const sorted = [...afterExpired].sort((a, b) => Date.parse(a.createdAt) - Date.parse(b.createdAt))
|
|
229
|
+
const evictList: StoredValue[] = []
|
|
230
|
+
for (const file of sorted) {
|
|
231
|
+
evictList.push(file)
|
|
232
|
+
const projected = stats.totalBytes - evictList.reduce((sum, item) => sum + item.sizeBytes, 0)
|
|
233
|
+
if (projected <= policy.maxTotalBytes) break
|
|
234
|
+
if (evictList.length >= policy.cleanupBatchSize) break
|
|
235
|
+
}
|
|
236
|
+
deletedEvicted = await deleteFiles(this.state, evictList)
|
|
237
|
+
stats = computeStats(await listStoredValues(this.state))
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return json({
|
|
241
|
+
policy,
|
|
242
|
+
deletedExpired,
|
|
243
|
+
deletedEvicted,
|
|
244
|
+
stats,
|
|
245
|
+
})
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return json({ error: "Not found" }, 404)
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
export class DurableObjectFileStore {
|
|
253
|
+
constructor(
|
|
254
|
+
private readonly namespace: DurableObjectNamespace,
|
|
255
|
+
private readonly policy: StoragePolicy
|
|
256
|
+
) {}
|
|
257
|
+
|
|
258
|
+
private stub(): DurableObjectStub {
|
|
259
|
+
return this.namespace.get(this.namespace.idFromName("echo-pdf-file-store"))
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
async put(input: {
|
|
263
|
+
readonly filename: string
|
|
264
|
+
readonly mimeType: string
|
|
265
|
+
readonly bytes: Uint8Array
|
|
266
|
+
}): Promise<StoredFileMeta> {
|
|
267
|
+
const response = await this.stub().fetch("https://do/put", {
|
|
268
|
+
method: "POST",
|
|
269
|
+
headers: { "Content-Type": "application/json" },
|
|
270
|
+
body: JSON.stringify({
|
|
271
|
+
filename: input.filename,
|
|
272
|
+
mimeType: input.mimeType,
|
|
273
|
+
bytesBase64: toBase64(input.bytes),
|
|
274
|
+
policy: this.policy,
|
|
275
|
+
}),
|
|
276
|
+
})
|
|
277
|
+
const payload = (await response.json()) as { file?: StoredFileMeta; error?: string }
|
|
278
|
+
if (!response.ok || !payload.file) {
|
|
279
|
+
throw new Error(payload.error ?? "DO put failed")
|
|
280
|
+
}
|
|
281
|
+
return payload.file
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
async get(fileId: string): Promise<StoredFileRecord | null> {
|
|
285
|
+
const response = await this.stub().fetch(`https://do/get?fileId=${encodeURIComponent(fileId)}`)
|
|
286
|
+
const payload = (await response.json()) as { file?: StoredValue | null }
|
|
287
|
+
if (!response.ok) throw new Error("DO get failed")
|
|
288
|
+
if (!payload.file) return null
|
|
289
|
+
return {
|
|
290
|
+
id: payload.file.id,
|
|
291
|
+
filename: payload.file.filename,
|
|
292
|
+
mimeType: payload.file.mimeType,
|
|
293
|
+
sizeBytes: payload.file.sizeBytes,
|
|
294
|
+
createdAt: payload.file.createdAt,
|
|
295
|
+
bytes: fromBase64(payload.file.bytesBase64),
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
async list(): Promise<ReadonlyArray<StoredFileMeta>> {
|
|
300
|
+
const response = await this.stub().fetch("https://do/list")
|
|
301
|
+
const payload = (await response.json()) as { files?: StoredFileMeta[] }
|
|
302
|
+
if (!response.ok) throw new Error("DO list failed")
|
|
303
|
+
return payload.files ?? []
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
async delete(fileId: string): Promise<boolean> {
|
|
307
|
+
const response = await this.stub().fetch("https://do/delete", {
|
|
308
|
+
method: "POST",
|
|
309
|
+
headers: { "Content-Type": "application/json" },
|
|
310
|
+
body: JSON.stringify({ fileId }),
|
|
311
|
+
})
|
|
312
|
+
const payload = (await response.json()) as { deleted?: boolean }
|
|
313
|
+
if (!response.ok) throw new Error("DO delete failed")
|
|
314
|
+
return payload.deleted === true
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
async stats(): Promise<{ policy: StoragePolicy; stats: StoreStats }> {
|
|
318
|
+
const policyEncoded = encodeURIComponent(JSON.stringify(this.policy))
|
|
319
|
+
const response = await this.stub().fetch(`https://do/stats?policy=${policyEncoded}`)
|
|
320
|
+
const payload = (await response.json()) as { policy: StoragePolicy; stats: StoreStats }
|
|
321
|
+
if (!response.ok) throw new Error("DO stats failed")
|
|
322
|
+
return payload
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
async cleanup(): Promise<{ policy: StoragePolicy; deletedExpired: number; deletedEvicted: number; stats: StoreStats }> {
|
|
326
|
+
const response = await this.stub().fetch("https://do/cleanup", {
|
|
327
|
+
method: "POST",
|
|
328
|
+
headers: { "Content-Type": "application/json" },
|
|
329
|
+
body: JSON.stringify({ policy: this.policy }),
|
|
330
|
+
})
|
|
331
|
+
const payload = (await response.json()) as {
|
|
332
|
+
policy: StoragePolicy
|
|
333
|
+
deletedExpired: number
|
|
334
|
+
deletedEvicted: number
|
|
335
|
+
stats: StoreStats
|
|
336
|
+
}
|
|
337
|
+
if (!response.ok) throw new Error("DO cleanup failed")
|
|
338
|
+
return payload
|
|
339
|
+
}
|
|
340
|
+
}
|