@echofiles/echo-pdf 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +100 -563
  3. package/bin/echo-pdf.js +147 -536
  4. package/dist/file-utils.d.ts +0 -3
  5. package/dist/file-utils.js +0 -18
  6. package/dist/local/document.d.ts +10 -0
  7. package/dist/local/document.js +133 -0
  8. package/dist/local/index.d.ts +3 -135
  9. package/dist/local/index.js +2 -555
  10. package/dist/local/semantic.d.ts +2 -0
  11. package/dist/local/semantic.js +231 -0
  12. package/dist/local/shared.d.ts +50 -0
  13. package/dist/local/shared.js +173 -0
  14. package/dist/local/types.d.ts +183 -0
  15. package/dist/local/types.js +2 -0
  16. package/dist/node/pdfium-local.js +30 -6
  17. package/dist/pdf-config.js +2 -65
  18. package/dist/pdf-types.d.ts +2 -59
  19. package/dist/provider-client.js +1 -1
  20. package/dist/provider-keys.js +4 -1
  21. package/dist/types.d.ts +2 -88
  22. package/echo-pdf.config.json +10 -21
  23. package/package.json +25 -22
  24. package/bin/lib/http.js +0 -97
  25. package/bin/lib/mcp-stdio.js +0 -99
  26. package/dist/auth.d.ts +0 -18
  27. package/dist/auth.js +0 -36
  28. package/dist/core/index.d.ts +0 -50
  29. package/dist/core/index.js +0 -7
  30. package/dist/file-ops.d.ts +0 -11
  31. package/dist/file-ops.js +0 -36
  32. package/dist/file-store-do.d.ts +0 -36
  33. package/dist/file-store-do.js +0 -298
  34. package/dist/http-error.d.ts +0 -9
  35. package/dist/http-error.js +0 -14
  36. package/dist/index.d.ts +0 -1
  37. package/dist/index.js +0 -1
  38. package/dist/mcp-server.d.ts +0 -3
  39. package/dist/mcp-server.js +0 -124
  40. package/dist/node/semantic-local.d.ts +0 -16
  41. package/dist/node/semantic-local.js +0 -113
  42. package/dist/pdf-agent.d.ts +0 -18
  43. package/dist/pdf-agent.js +0 -217
  44. package/dist/pdf-storage.d.ts +0 -8
  45. package/dist/pdf-storage.js +0 -86
  46. package/dist/pdfium-engine.d.ts +0 -9
  47. package/dist/pdfium-engine.js +0 -180
  48. package/dist/r2-file-store.d.ts +0 -20
  49. package/dist/r2-file-store.js +0 -176
  50. package/dist/response-schema.d.ts +0 -15
  51. package/dist/response-schema.js +0 -159
  52. package/dist/tool-registry.d.ts +0 -16
  53. package/dist/tool-registry.js +0 -175
  54. package/dist/worker.d.ts +0 -7
  55. package/dist/worker.js +0 -386
  56. package/scripts/export-fixtures.sh +0 -204
  57. package/wrangler.toml +0 -19
@@ -1,204 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
-
4
- ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
5
- OUT_DIR="${ROOT_DIR}/fixtures/output"
6
- EXPORT_PORT="${EXPORT_PORT:-8798}"
7
- BASE_URL="${BASE_URL:-http://127.0.0.1:${EXPORT_PORT}}"
8
- INPUT_PDF="${INPUT_PDF:-${ROOT_DIR}/fixtures/input.pdf}"
9
- START_LOCAL_DEV="${START_LOCAL_DEV:-1}"
10
- RUN_TABLES="${RUN_TABLES:-1}"
11
- REQUIRE_LLM_SUCCESS="${REQUIRE_LLM_SUCCESS:-1}"
12
-
13
- mkdir -p "$OUT_DIR"
14
- rm -rf "${OUT_DIR:?}/"*
15
-
16
- if [[ -f "${ROOT_DIR}/../.env.local" ]]; then
17
- set -a
18
- # shellcheck source=/dev/null
19
- source "${ROOT_DIR}/../.env.local"
20
- set +a
21
- fi
22
-
23
- if [[ ! -f "${INPUT_PDF}" ]]; then
24
- echo "missing input pdf: ${INPUT_PDF}" >&2
25
- exit 1
26
- fi
27
-
28
- cli() {
29
- node "${ROOT_DIR}/bin/echo-pdf.js" "$@"
30
- }
31
-
32
- run_json() {
33
- local name="$1"
34
- shift
35
- if "$@" > "${OUT_DIR}/${name}.json" 2> "${OUT_DIR}/${name}.err"; then
36
- rm -f "${OUT_DIR}/${name}.err"
37
- else
38
- printf '{"ok":false,"error_file":"%s.err"}\n' "$name" > "${OUT_DIR}/${name}.json"
39
- fi
40
- }
41
-
42
- validate_ocr_json() {
43
- local json_file="$1"
44
- node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.text||"").trim();if(t.length===0)process.exit(1);' "$json_file"
45
- }
46
-
47
- validate_tables_json() {
48
- local json_file="$1"
49
- node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const pages=j?.data?.pages;if(!Array.isArray(pages)||pages.length===0)process.exit(1);const t=String(pages[0]?.latex||"").trim();if(t.length===0)process.exit(1);' "$json_file"
50
- }
51
-
52
- # 1) Save test logs locally (do not block artifact export on transient network failure)
53
- set +e
54
- {
55
- echo "[typecheck]"
56
- npm --prefix "$ROOT_DIR" run typecheck
57
- TYPECHECK_CODE=$?
58
- echo
59
- echo "[test]"
60
- npm --prefix "$ROOT_DIR" run test
61
- TEST_CODE=$?
62
- echo
63
- echo "[smoke]"
64
- npm --prefix "$ROOT_DIR" run smoke
65
- SMOKE_CODE=$?
66
- echo
67
- echo "typecheck_exit=${TYPECHECK_CODE}"
68
- echo "test_exit=${TEST_CODE}"
69
- echo "smoke_exit=${SMOKE_CODE}"
70
- } > "${OUT_DIR}/test.log" 2>&1
71
- set -e
72
-
73
- cat > "${OUT_DIR}/test-status.json" <<JSON
74
- {"typecheck":${TYPECHECK_CODE:-1},"test":${TEST_CODE:-1},"smoke":${SMOKE_CODE:-1}}
75
- JSON
76
-
77
- DEV_PID=""
78
- cleanup() {
79
- if [[ -n "${DEV_PID}" ]] && kill -0 "${DEV_PID}" >/dev/null 2>&1; then
80
- kill "${DEV_PID}" >/dev/null 2>&1 || true
81
- wait "${DEV_PID}" 2>/dev/null || true
82
- fi
83
- }
84
- trap cleanup EXIT
85
-
86
- if [[ "${START_LOCAL_DEV}" == "1" ]]; then
87
- npm --prefix "$ROOT_DIR" run dev -- --ip 127.0.0.1 --port "${EXPORT_PORT}" --inspector-port 0 > "${OUT_DIR}/export-local-dev.log" 2>&1 &
88
- DEV_PID=$!
89
- for _ in $(seq 1 120); do
90
- if node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null 2>&1; then
91
- break
92
- fi
93
- sleep 0.5
94
- done
95
- node -e 'fetch(process.argv[1]+"/health").then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))' "${BASE_URL}" >/dev/null
96
- fi
97
-
98
- # 2) Init CLI + provider settings
99
- cli init --service-url "$BASE_URL" > "${OUT_DIR}/cli-init.json"
100
-
101
- node -e 'const fs=require("fs");const cfg=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));const entries=Object.entries(cfg.providers||{});const pick=(key)=>{const keys=[key];if(key.endsWith("_API_KEY"))keys.push(key.replace(/_API_KEY$/,"_KEY"));if(key.endsWith("_KEY"))keys.push(key.replace(/_KEY$/,"_API_KEY"));for(const k of keys){const v=process.env[k];if(typeof v==="string"&&v.trim())return {k,v:v.trim()};}return null;};const forced=String(process.env.SMOKE_LLM_PROVIDER||"").trim();if(forced&&cfg.providers?.[forced]){const found=pick(String(cfg.providers[forced].apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:forced,apiKey:found.v,env:found.k,forced:true}));process.exit(0);}}const preferred=String(cfg.agent?.defaultProvider||"");const ordered=entries.sort((a,b)=>a[0]===preferred?-1:b[0]===preferred?1:0);for(const [alias,p] of ordered){const found=pick(String(p.apiKeyEnv||""));if(found){process.stdout.write(JSON.stringify({provider:alias,apiKey:found.v,env:found.k,forced:false}));process.exit(0);}}process.stdout.write(JSON.stringify({provider:preferred||"",apiKey:"",env:"",forced:false}));' "${ROOT_DIR}/echo-pdf.config.json" > "${OUT_DIR}/provider-selection.json"
102
- PROVIDER="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.provider||""))' "${OUT_DIR}/provider-selection.json")"
103
- PROVIDER_KEY="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(String(j.apiKey||""))' "${OUT_DIR}/provider-selection.json")"
104
- PREFERRED_MODEL="${SMOKE_LLM_MODEL:-${ECHO_PDF_DEFAULT_MODEL:-}}"
105
- if [[ -n "${PROVIDER}" ]] && [[ -n "${PROVIDER_KEY}" ]]; then
106
- cli provider set --provider "${PROVIDER}" --api-key "${PROVIDER_KEY}" > "${OUT_DIR}/provider-set.json"
107
- cli provider use --provider "${PROVIDER}" > "${OUT_DIR}/provider-use.json"
108
- else
109
- echo '{"warning":"No provider key found in env, LLM calls may fail"}' > "${OUT_DIR}/provider-warning.json"
110
- fi
111
-
112
- # 3) Pull models via CLI and select one
113
- if [[ -n "${PROVIDER}" ]]; then
114
- run_json "models" cli models --provider "${PROVIDER}"
115
- else
116
- echo '{"warning":"No provider selected, skip model list"}' > "${OUT_DIR}/models.json"
117
- fi
118
- MODEL="${PREFERRED_MODEL}"
119
- if [[ -n "$MODEL" ]] && [[ -n "${PROVIDER}" ]]; then
120
- if ! node -e 'const fs=require("fs");const file=process.argv[1];const model=process.argv[2];const j=JSON.parse(fs.readFileSync(file,"utf8"));const models=Array.isArray(j.models)?j.models:[];process.exit(models.includes(model)?0:1)' "${OUT_DIR}/models.json" "$MODEL"; then
121
- echo "Configured model not found in provider model list: ${MODEL}" >&2
122
- exit 1
123
- fi
124
- cli model set --provider "${PROVIDER}" --model "$MODEL" > "${OUT_DIR}/model-set.json"
125
- else
126
- echo '{"warning":"Missing ECHO_PDF_DEFAULT_MODEL / SMOKE_LLM_MODEL"}' > "${OUT_DIR}/model-warning.json"
127
- exit 1
128
- fi
129
-
130
- # 4) Upload the exact local fixture for subsequent CLI/MCP calls
131
- node -e 'const fs=require("fs"); const path=require("path"); (async()=>{ const base=process.argv[1]; const file=process.argv[2]; const bytes=fs.readFileSync(file); const fd=new FormData(); fd.set("file", new Blob([bytes], {type:"application/pdf"}), path.basename(file)); const res=await fetch(`${base}/api/files/upload`, {method:"POST", body:fd}); const txt=await res.text(); fs.writeFileSync(process.argv[3], txt); if(!res.ok){process.stderr.write(txt); process.exit(1);} })().catch((e)=>{console.error(String(e)); process.exit(1)})' "$BASE_URL" "$INPUT_PDF" "${OUT_DIR}/upload.json"
132
- FILE_ID="$(node -e 'const fs=require("fs");const j=JSON.parse(fs.readFileSync(process.argv[1],"utf8"));process.stdout.write(j.file?.id||"")' "${OUT_DIR}/upload.json")"
133
- if [[ -z "${FILE_ID}" ]]; then
134
- echo "upload did not return file id" >&2
135
- exit 1
136
- fi
137
-
138
- # 5) CLI tool calls
139
- run_json "tools-catalog" cli tools
140
- if [[ -n "${PROVIDER}" ]]; then
141
- run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}" --provider "${PROVIDER}" --model "${MODEL:-}"
142
- else
143
- run_json "cli-extract-pages" cli call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
144
- fi
145
- node -e 'const fs=require("fs");const p=process.argv[1];const out=process.argv[2];const j=JSON.parse(fs.readFileSync(p,"utf8"));const d=j.data?.images?.[0]?.data||"";if(!d.startsWith("data:image/"))process.exit(1);fs.writeFileSync(out, Buffer.from(d.split(",")[1]||"","base64"));' "${OUT_DIR}/cli-extract-pages.json" "${OUT_DIR}/page-1-cli.png"
146
-
147
- # 6) MCP tool calls
148
- run_json "mcp-initialize" cli mcp initialize
149
- run_json "mcp-tools" cli mcp tools
150
- run_json "mcp-call-fileops" cli mcp call --tool file_ops --args '{"op":"list"}'
151
- run_json "mcp-extract-pages" cli mcp call --tool pdf_extract_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"returnMode\":\"inline\"}"
152
-
153
- # 7) LLM tool calls
154
- OCR_OK=0
155
- TABLES_OK=0
156
- if [[ -n "${PROVIDER}" ]]; then
157
- : > "${OUT_DIR}/llm-attempts.log"
158
- echo "[ocr] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
159
- if cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-ocr-pages.json" 2> "${OUT_DIR}/cli-ocr-pages.err"; then
160
- if validate_ocr_json "${OUT_DIR}/cli-ocr-pages.json"; then
161
- OCR_OK=1
162
- echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/ocr-selected-model.json"
163
- fi
164
- fi
165
- else
166
- run_json "cli-ocr-pages" cli call --tool pdf_ocr_pages --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
167
- fi
168
-
169
- if [[ "${RUN_TABLES}" == "1" ]]; then
170
- if [[ -n "${PROVIDER}" ]]; then
171
- echo "[tables] using provider=${PROVIDER} model=${MODEL}" >> "${OUT_DIR}/llm-attempts.log"
172
- if cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1],\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" --provider "${PROVIDER}" --model "${MODEL}" > "${OUT_DIR}/cli-tables-to-latex.json" 2> "${OUT_DIR}/cli-tables-to-latex.err"; then
173
- if validate_tables_json "${OUT_DIR}/cli-tables-to-latex.json"; then
174
- TABLES_OK=1
175
- echo "{\"provider\":\"${PROVIDER}\",\"model\":\"${MODEL}\"}" > "${OUT_DIR}/tables-selected-model.json"
176
- fi
177
- fi
178
- else
179
- run_json "cli-tables-to-latex" cli call --tool pdf_tables_to_latex --args "{\"fileId\":\"${FILE_ID}\",\"pages\":[1]}"
180
- fi
181
- else
182
- echo '{"skipped":true,"reason":"Set RUN_TABLES=1 to enable table-latex call"}' > "${OUT_DIR}/cli-tables-to-latex.json"
183
- fi
184
-
185
- if [[ "${REQUIRE_LLM_SUCCESS}" == "1" ]]; then
186
- if [[ "${OCR_OK}" != "1" ]]; then
187
- echo "OCR failed for configured model. See ${OUT_DIR}/cli-ocr-pages.err and llm-attempts.log" >&2
188
- exit 1
189
- fi
190
- if [[ "${RUN_TABLES}" == "1" ]] && [[ "${TABLES_OK}" != "1" ]]; then
191
- echo "Tables failed for configured model. See ${OUT_DIR}/cli-tables-to-latex.err and llm-attempts.log" >&2
192
- exit 1
193
- fi
194
- fi
195
-
196
- cat > "${OUT_DIR}/summary.txt" <<TXT
197
- base_url=${BASE_URL}
198
- input_pdf=${INPUT_PDF}
199
- file_id=${FILE_ID}
200
- model=${MODEL}
201
- outputs_dir=${OUT_DIR}
202
- TXT
203
-
204
- ls -la "$OUT_DIR"
package/wrangler.toml DELETED
@@ -1,19 +0,0 @@
1
- name = "echo-pdf"
2
- main = "src/worker.ts"
3
- compatibility_date = "2026-03-06"
4
-
5
- [assets]
6
- directory = "./assets"
7
- binding = "ASSETS"
8
-
9
- [[r2_buckets]]
10
- binding = "FILE_STORE_BUCKET"
11
- bucket_name = "echo-pdf-files"
12
-
13
- [[durable_objects.bindings]]
14
- name = "FILE_STORE_DO"
15
- class_name = "FileStoreDO"
16
-
17
- [[migrations]]
18
- tag = "v1"
19
- new_sqlite_classes = ["FileStoreDO"]