@echofiles/echo-pdf 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +100 -563
- package/bin/echo-pdf.js +147 -536
- package/dist/file-utils.d.ts +0 -3
- package/dist/file-utils.js +0 -18
- package/dist/local/document.d.ts +10 -0
- package/dist/local/document.js +133 -0
- package/dist/local/index.d.ts +3 -135
- package/dist/local/index.js +2 -555
- package/dist/local/semantic.d.ts +2 -0
- package/dist/local/semantic.js +231 -0
- package/dist/local/shared.d.ts +50 -0
- package/dist/local/shared.js +173 -0
- package/dist/local/types.d.ts +183 -0
- package/dist/local/types.js +2 -0
- package/dist/node/pdfium-local.js +30 -6
- package/dist/pdf-config.js +2 -65
- package/dist/pdf-types.d.ts +2 -59
- package/dist/provider-client.js +1 -1
- package/dist/provider-keys.js +4 -1
- package/dist/types.d.ts +2 -88
- package/echo-pdf.config.json +10 -21
- package/package.json +25 -22
- package/bin/lib/http.js +0 -97
- package/bin/lib/mcp-stdio.js +0 -99
- package/dist/auth.d.ts +0 -18
- package/dist/auth.js +0 -36
- package/dist/core/index.d.ts +0 -50
- package/dist/core/index.js +0 -7
- package/dist/file-ops.d.ts +0 -11
- package/dist/file-ops.js +0 -36
- package/dist/file-store-do.d.ts +0 -36
- package/dist/file-store-do.js +0 -298
- package/dist/http-error.d.ts +0 -9
- package/dist/http-error.js +0 -14
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/mcp-server.d.ts +0 -3
- package/dist/mcp-server.js +0 -124
- package/dist/node/semantic-local.d.ts +0 -16
- package/dist/node/semantic-local.js +0 -113
- package/dist/pdf-agent.d.ts +0 -18
- package/dist/pdf-agent.js +0 -217
- package/dist/pdf-storage.d.ts +0 -8
- package/dist/pdf-storage.js +0 -86
- package/dist/pdfium-engine.d.ts +0 -9
- package/dist/pdfium-engine.js +0 -180
- package/dist/r2-file-store.d.ts +0 -20
- package/dist/r2-file-store.js +0 -176
- package/dist/response-schema.d.ts +0 -15
- package/dist/response-schema.js +0 -159
- package/dist/tool-registry.d.ts +0 -16
- package/dist/tool-registry.js +0 -175
- package/dist/worker.d.ts +0 -7
- package/dist/worker.js +0 -386
- package/scripts/export-fixtures.sh +0 -204
- package/wrangler.toml +0 -19
|
@@ -1,204 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
set -euo pipefail
|
|
3
|
-
|
|
4
|
-
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
5
|
-
OUT_DIR="${ROOT_DIR}/fixtures/output"
|
|
6
|
-
EXPORT_PORT="${EXPORT_PORT:-8798}"
|
|
7
|
-
BASE_URL="${BASE_URL:-http://127.0.0.1:${EXPORT_PORT}}"
|
|
8
|
-
INPUT_PDF="${INPUT_PDF:-${ROOT_DIR}/fixtures/input.pdf}"
|
|
9
|
-
START_LOCAL_DEV="${START_LOCAL_DEV:-1}"
|
|
10
|
-
RUN_TABLES="${RUN_TABLES:-1}"
|
|
11
|
-
REQUIRE_LLM_SUCCESS="${REQUIRE_LLM_SUCCESS:-1}"
|
|
12
|
-
|
|
13
|
-
mkdir -p "$OUT_DIR"
|
|
14
|
-
rm -rf "${OUT_DIR:?}/"*
|
|
15
|
-
|
|
16
|
-
if [[ -f "${ROOT_DIR}/../.env.local" ]]; then
|
|
17
|
-
set -a
|
|
18
|
-
# shellcheck source=/dev/null
|
|
19
|
-
source "${ROOT_DIR}/../.env.local"
|
|
20
|
-
set +a
|
|
21
|
-
fi
|
|
22
|
-
|
|
23
|
-
if [[ ! -f "${INPUT_PDF}" ]]; then
|
|
24
|
-
echo "missing input pdf: ${INPUT_PDF}" >&2
|
|
25
|
-
exit 1
|
|
26
|
-
fi
|
|
27
|
-
|
|
28
|
-
cli() {
|
|
29
|
-
node "${ROOT_DIR}/bin/echo-pdf.js" "$@"
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
run_json() {
|
|
33
|
-
local name="$1"
|
|
34
|
-
shift
|
|
35
|
-
if "$@" > "${OUT_DIR}/${name}.json" 2> "${OUT_DIR}/${name}.err"; then
|
|
36
|
-
rm -f "${OUT_DIR}/${name}.err"
|
|
37
|
-
else
|
|
38
|
-
printf '{"ok":false,"error_file":"%s.err"}\n' "$name" > "${OUT_DIR}/${name}.json"
|
|
39
|
-
fi
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
validate_ocr_json() {
|
|
43
|
-
local json_file="$1"
|
|
44
|
-
node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
validate_tables_json() {
|
|
48
|
-
local json_file="$1"
|
|
49
|
-
node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
# 1) Save test logs locally (do not block artifact export on transient network failure)
|
|
53
|
-
set +e
|
|
54
|
-
{
|
|
55
|
-
echo "[typecheck]"
|
|
56
|
-
npm --prefix "$ROOT_DIR" run typecheck
|
|
57
|
-
TYPECHECK_CODE=$?
|
|
58
|
-
echo
|
|
59
|
-
echo "[test]"
|
|
60
|
-
npm --prefix "$ROOT_DIR" run test
|
|
61
|
-
TEST_CODE=$?
|
|
62
|
-
echo
|
|
63
|
-
echo "[smoke]"
|
|
64
|
-
npm --prefix "$ROOT_DIR" run smoke
|
|
65
|
-
SMOKE_CODE=$?
|
|
66
|
-
echo
|
|
67
|
-
echo "typecheck_exit=${TYPECHECK_CODE}"
|
|
68
|
-
echo "test_exit=${TEST_CODE}"
|
|
69
|
-
echo "smoke_exit=${SMOKE_CODE}"
|
|
70
|
-
} > "${OUT_DIR}/test.log" 2>&1
|
|
71
|
-
set -e
|
|
72
|
-
|
|
73
|
-
cat > "${OUT_DIR}/test-status.json" <<JSON
|
|
74
|
-
{"typecheck":${TYPECHECK_CODE:-1},"test":${TEST_CODE:-1},"smoke":${SMOKE_CODE:-1}}
|
|
75
|
-
JSON
|
|
76
|
-
|
|
77
|
-
DEV_PID=""
|
|
78
|
-
cleanup() {
|
|
79
|
-
if [[ -n "${DEV_PID}" ]] && kill -0 "${DEV_PID}" >/dev/null 2>&1; then
|
|
80
|
-
kill "${DEV_PID}" >/dev/null 2>&1 || true
|
|
81
|
-
wait "${DEV_PID}" 2>/dev/null || true
|
|
82
|
-
fi
|
|
83
|
-
}
|
|
84
|
-
trap cleanup EXIT
|
|
85
|
-
|
|
86
|
-
if [[ "${START_LOCAL_DEV}" == "1" ]]; then
|
|
87
|
-
npm --prefix "$ROOT_DIR" run dev -- --ip 127.0.0.1 --port "${EXPORT_PORT}" --inspector-port 0 > "${OUT_DIR}/export-local-dev.log" 2>&1 &
|
|
88
|
-
DEV_PID=$!
|
|
89
|
-
for _ in $(seq 1 120); do
|
|
90
|
-
if node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null 2>&1; then
|
|
91
|
-
break
|
|
92
|
-
fi
|
|
93
|
-
sleep 0.5
|
|
94
|
-
done
|
|
95
|
-
node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null
|
|
96
|
-
fi
|
|
97
|
-
|
|
98
|
-
# 2) Init CLI + provider settings
|
|
99
|
-
cli init --service-url "$BASE_URL" > "${OUT_DIR}/cli-init.json"
|
|
100
|
-
|
|
101
|
-
node -e 'const fs=require("fs");const cfg=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const entries=Object.entries(cfg.providers||{});const pick=(key)=>{const keys=[key];if(key.endsWith("_API_KEY"))keys.push(key.replace(/_API_KEY$/,"_KEY"));if(key.endsWith("_KEY"))keys.push(key.replace(/_KEY$/,"_API_KEY"));for(const k of keys){const v=process.env[k];if(typeof v==="string"&&v.trim())return {k,v:v.trim()};}return null;};const forced=String(process.env.SMOKE_LLM_PROVIDER||"").trim();if(forced&&cfg.providers?.[forced]){const found=pick(String(cfg.providers[forced].apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:forced,apiKey:found.v,env:found.k,forced:true}));process.exit(0);}}const preferred=String(cfg.agent?.defaultProvider||"");const ordered=entries.sort((a,b)=>a[0]===preferred?-1:b[0]===preferred?1:0);for(const [alias,p] of ordered){const found=pick(String(p.apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:alias,apiKey:found.v,env:found.k,forced:false}));process.exit(0);}}process.stdout.write(JSON.stringify({provider:preferred||"",apiKey:"",env:"",forced:false}));' "${ROOT_DIR}/echo-pdf.config.json" > "${OUT_DIR}/provider-selection.json"
|
|
102
|
-
PROVIDER="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.provider||""))' "${OUT_DIR}/provider-selection.json")"
|
|
103
|
-
PROVIDER_KEY="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.apiKey||""))' "${OUT_DIR}/provider-selection.json")"
|
|
104
|
-
PREFERRED_MODEL="${SMOKE_LLM_MODEL:-${ECHO_PDF_DEFAULT_MODEL:-}}"
|
|
105
|
-
if [[ -n "${PROVIDER}" ]] && [[ -n "${PROVIDER_KEY}" ]]; then
|
|
106
|
-
cli provider set --provider "${PROVIDER}" --api-key "${PROVIDER_KEY}" > "${OUT_DIR}/provider-set.json"
|
|
107
|
-
cli provider use --provider "${PROVIDER}" > "${OUT_DIR}/provider-use.json"
|
|
108
|
-
else
|
|
109
|
-
echo '{"warning":"No provider key found in env, LLM calls may fail"}' > "${OUT_DIR}/provider-warning.json"
|
|
110
|
-
fi
|
|
111
|
-
|
|
112
|
-
# 3) Pull models via CLI and select one
|
|
113
|
-
if [[ -n "${PROVIDER}" ]]; then
|
|
114
|
-
run_json "models" cli models --provider "${PROVIDER}"
|
|
115
|
-
else
|
|
116
|
-
echo '{"warning":"No provider selected, skip model list"}' > "${OUT_DIR}/models.json"
|
|
117
|
-
fi
|
|
118
|
-
MODEL="${PREFERRED_MODEL}"
|
|
119
|
-
if [[ -n "$MODEL" ]] && [[ -n "${PROVIDER}" ]]; then
|
|
120
|
-
if ! node -e 'const fs=require("fs");const file=process.argv[1];const model=process.argv[2];const j=JSON.parse(fs.readFileSync(file,"utf8"));const models=Array.isArray(j.models)?j.models:[];process.exit(models.includes(model)?0:1)' "${OUT_DIR}/models.json" "$MODEL"; then
|
|
121
|
-
echo "Configured model not found in provider model list: ${MODEL}" >&2
|
|
122
|
-
exit 1
|
|
123
|
-
fi
|
|
124
|
-
cli model set --provider "${PROVIDER}" --model "$MODEL" > "${OUT_DIR}/model-set.json"
|
|
125
|
-
else
|
|
126
|
-
echo '{"warning":"Missing ECHO_PDF_DEFAULT_MODEL / SMOKE_LLM_MODEL"}' > "${OUT_DIR}/model-warning.json"
|
|
127
|
-
exit 1
|
|
128
|
-
fi
|
|
129
|
-
|
|
130
|
-
# 4) Upload the exact local fixture for subsequent CLI/MCP calls
|
|
131
|
-
node -e 'const fs=require("fs"); const path=require("path"); (async()=>{ const base=process.argv[1]; const file=process.argv[2]; const bytes=fs.readFileSync(file); const fd=new FormData(); fd.set("file", new Blob([bytes], {type:"application/pdf"}), path.basename(file)); const res=await fetch(`${base}/api/files/upload`, {method:"POST", body:fd}); const txt=await res.text(); fs.writeFileSync(process.argv[3], txt); if(!res.ok){process.stderr.write(txt); process.exit(1);} })().catch((e)=>{console.error(String(e)); process.exit(1)})' "$BASE_URL" "$INPUT_PDF" "${OUT_DIR}/upload.json"
|
|
132
|
-
FILE_ID="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(j.file?.id||"")' "${OUT_DIR}/upload.json")"
|
|
133
|
-
if [[ -z "${FILE_ID}" ]]; then
|
|
134
|
-
echo "upload did not return file id" >&2
|
|
135
|
-
exit 1
|
|
136
|
-
fi
|
|
137
|
-
|
|
138
|
-
# 5) CLI tool calls
|
|
139
|
-
run_json "tools-catalog" cli tools
|
|
140
|
-
if [[ -n "${PROVIDER}" ]]; then
|
|
141
|
-
run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}" --provider "${PROVIDER}" --model "${MODEL:-}"
|
|
142
|
-
else
|
|
143
|
-
run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
|
|
144
|
-
fi
|
|
145
|
-
node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.data?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
|
|
146
|
-
|
|
147
|
-
# 6) MCP tool calls
|
|
148
|
-
run_json "mcp-initialize" cli mcp initialize
|
|
149
|
-
run_json "mcp-tools" cli mcp tools
|
|
150
|
-
run_json "mcp-call-fileops" cli mcp call --tool file_ops --args '{"op":"list"}'
|
|
151
|
-
run_json "mcp-extract-pages" cli mcp call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
|
|
152
|
-
|
|
153
|
-
# 7) LLM tool calls
|
|
154
|
-
OCR_OK=0
|
|
155
|
-
TABLES_OK=0
|
|
156
|
-
if [[ -n "${PROVIDER}" ]]; then
|
|
157
|
-
: > "${OUT_DIR}/llm-attempts.log"
|
|
158
|
-
echo "[ocr] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
|
|
159
|
-
if cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-ocr-pages.json" 2> "${OUT_DIR}/cli-ocr-pages.err"; then
|
|
160
|
-
if validate_ocr_json "${OUT_DIR}/cli-ocr-pages.json"; then
|
|
161
|
-
OCR_OK=1
|
|
162
|
-
echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/ocr-selected-model.json"
|
|
163
|
-
fi
|
|
164
|
-
fi
|
|
165
|
-
else
|
|
166
|
-
run_json "cli-ocr-pages" cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
|
|
167
|
-
fi
|
|
168
|
-
|
|
169
|
-
if [[ "${RUN_TABLES}" == "1" ]]; then
|
|
170
|
-
if [[ -n "${PROVIDER}" ]]; then
|
|
171
|
-
echo "[tables] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
|
|
172
|
-
if cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-tables-to-latex.json" 2> "${OUT_DIR}/cli-tables-to-latex.err"; then
|
|
173
|
-
if validate_tables_json "${OUT_DIR}/cli-tables-to-latex.json"; then
|
|
174
|
-
TABLES_OK=1
|
|
175
|
-
echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/tables-selected-model.json"
|
|
176
|
-
fi
|
|
177
|
-
fi
|
|
178
|
-
else
|
|
179
|
-
run_json "cli-tables-to-latex" cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
|
|
180
|
-
fi
|
|
181
|
-
else
|
|
182
|
-
echo '{"skipped":true,"reason":"Set RUN_TABLES=1 to enable table-latex call"}' > "${OUT_DIR}/cli-tables-to-latex.json"
|
|
183
|
-
fi
|
|
184
|
-
|
|
185
|
-
if [[ "${REQUIRE_LLM_SUCCESS}" == "1" ]]; then
|
|
186
|
-
if [[ "${OCR_OK}" != "1" ]]; then
|
|
187
|
-
echo "OCR failed for configured model. See ${OUT_DIR}/cli-ocr-pages.err and llm-attempts.log" >&2
|
|
188
|
-
exit 1
|
|
189
|
-
fi
|
|
190
|
-
if [[ "${RUN_TABLES}" == "1" ]] && [[ "${TABLES_OK}" != "1" ]]; then
|
|
191
|
-
echo "Tables failed for configured model. See ${OUT_DIR}/cli-tables-to-latex.err and llm-attempts.log" >&2
|
|
192
|
-
exit 1
|
|
193
|
-
fi
|
|
194
|
-
fi
|
|
195
|
-
|
|
196
|
-
cat > "${OUT_DIR}/summary.txt" <<TXT
|
|
197
|
-
base_url=${BASE_URL}
|
|
198
|
-
input_pdf=${INPUT_PDF}
|
|
199
|
-
file_id=${FILE_ID}
|
|
200
|
-
model=${MODEL}
|
|
201
|
-
outputs_dir=${OUT_DIR}
|
|
202
|
-
TXT
|
|
203
|
-
|
|
204
|
-
ls -la "$OUT_DIR"
|
package/wrangler.toml
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
name = "echo-pdf"
|
|
2
|
-
main = "src/worker.ts"
|
|
3
|
-
compatibility_date = "2026-03-06"
|
|
4
|
-
|
|
5
|
-
[assets]
|
|
6
|
-
directory = "./assets"
|
|
7
|
-
binding = "ASSETS"
|
|
8
|
-
|
|
9
|
-
[[r2_buckets]]
|
|
10
|
-
binding = "FILE_STORE_BUCKET"
|
|
11
|
-
bucket_name = "echo-pdf-files"
|
|
12
|
-
|
|
13
|
-
[[durable_objects.bindings]]
|
|
14
|
-
name = "FILE_STORE_DO"
|
|
15
|
-
class_name = "FileStoreDO"
|
|
16
|
-
|
|
17
|
-
[[migrations]]
|
|
18
|
-
tag = "v1"
|
|
19
|
-
new_sqlite_classes = ["FileStoreDO"]
|