@echofiles/echo-pdf 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -5
- package/dist/local/semantic.js +47 -45
- package/dist/local/shared.js +1 -1
- package/dist/local/understanding.d.ts +4 -1
- package/dist/local/understanding.js +3 -3
- package/package.json +1 -1
- /package/dist/{agent-defaults.d.ts → provider-defaults.d.ts} +0 -0
- /package/dist/{agent-defaults.js → provider-defaults.js} +0 -0
package/README.md
CHANGED
|
@@ -58,17 +58,21 @@ echo-pdf page ./sample.pdf --page 1
|
|
|
58
58
|
echo-pdf render ./sample.pdf --page 1 --scale 2
|
|
59
59
|
```
|
|
60
60
|
|
|
61
|
-
To run
|
|
61
|
+
To run provider-required primitives, configure a provider key and model once:
|
|
62
62
|
|
|
63
63
|
```bash
|
|
64
64
|
echo-pdf provider set --provider openai --api-key "$OPENAI_API_KEY"
|
|
65
65
|
echo-pdf model set --provider openai --model gpt-4.1-mini
|
|
66
|
+
|
|
66
67
|
echo-pdf semantic ./sample.pdf
|
|
68
|
+
echo-pdf tables ./sample.pdf --page 1
|
|
69
|
+
echo-pdf formulas ./sample.pdf --page 1
|
|
70
|
+
echo-pdf understanding ./sample.pdf --page 1
|
|
67
71
|
```
|
|
68
72
|
|
|
69
|
-
|
|
73
|
+
Provider-required primitives (`semantic`, `tables`, `formulas`, `understanding`) use the CLI profile's provider/model/api-key settings. If the selected provider or model is missing, they fail early with a clear setup error.
|
|
70
74
|
|
|
71
|
-
|
|
75
|
+
The CLI ships with a built-in `ollama` provider alias pointing at `http://127.0.0.1:11434/v1`. To use a local Ollama server:
|
|
72
76
|
|
|
73
77
|
```bash
|
|
74
78
|
echo-pdf provider set --provider ollama --api-key ""
|
|
@@ -76,7 +80,7 @@ echo-pdf model set --provider ollama --model llava:13b
|
|
|
76
80
|
echo-pdf semantic ./sample.pdf --provider ollama
|
|
77
81
|
```
|
|
78
82
|
|
|
79
|
-
|
|
83
|
+
The built-in provider aliases are `openai`, `vercel_gateway`, `openrouter`, and `ollama`. Other local OpenAI-compatible servers (llama.cpp, vLLM, LM Studio, LocalAI) can be configured by overriding the config via the `ECHO_PDF_CONFIG_JSON` environment variable or by editing the bundled `echo-pdf.config.json` in a source checkout. The selected model must support vision input.
|
|
80
84
|
|
|
81
85
|
What these commands map to:
|
|
82
86
|
|
|
@@ -146,8 +150,9 @@ const understanding = await get_page_understanding({ pdfPath: "./sample.pdf", pa
|
|
|
146
150
|
Notes:
|
|
147
151
|
|
|
148
152
|
- `get_document_structure()` returns the stable page index: `document -> pages[]`
|
|
149
|
-
- `get_semantic_document_structure()` returns a
|
|
153
|
+
- `get_semantic_document_structure()` returns a heading/section tree plus optional cross-page merged `tables[]`, `formulas[]`, and `figures[]`; it does not replace `pages[]`
|
|
150
154
|
- `get_page_render()` materializes a reusable PNG plus render metadata and is the mainline visual input path
|
|
155
|
+
- `get_page_understanding()` extracts tables, formulas, and figures from a single page in one LLM call
|
|
151
156
|
|
|
152
157
|
Migration note:
|
|
153
158
|
|
package/dist/local/semantic.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/// <reference path="../node/compat.d.ts" />
|
|
2
2
|
import { readFile } from "node:fs/promises";
|
|
3
3
|
import path from "node:path";
|
|
4
|
-
import { resolveModelForProvider, resolveProviderAlias } from "../
|
|
4
|
+
import { resolveModelForProvider, resolveProviderAlias } from "../provider-defaults.js";
|
|
5
5
|
import { toDataUrl } from "../file-utils.js";
|
|
6
6
|
import { generateText, visionRecognize } from "../provider-client.js";
|
|
7
7
|
import { ensureRenderArtifact, indexDocumentInternal } from "./document.js";
|
|
8
8
|
import { fileExists, matchesSourceSnapshot, matchesStrategyKey, pageLabel, parseJsonObject, readJson, resolveConfig, resolveEnv, writeJson, } from "./shared.js";
|
|
9
|
-
import {
|
|
9
|
+
import { normalizeFigureItems, normalizeUnderstandingFormulas, normalizeUnderstandingTables } from "./understanding.js";
|
|
10
10
|
const resolveSemanticExtractionBudget = (input) => ({
|
|
11
11
|
pageSelection: "all",
|
|
12
12
|
chunkMaxChars: typeof input?.chunkMaxChars === "number" && Number.isFinite(input.chunkMaxChars) && input.chunkMaxChars > 400
|
|
@@ -59,29 +59,32 @@ const toSemanticTree = (value, pageArtifactPaths) => {
|
|
|
59
59
|
});
|
|
60
60
|
return nodes;
|
|
61
61
|
};
|
|
62
|
-
const
|
|
62
|
+
const buildCombinedPagePrompt = (page, renderScale) => {
|
|
63
63
|
return [
|
|
64
|
-
"
|
|
65
|
-
"Primary evidence is the page image layout. Use the extracted page text
|
|
64
|
+
"Analyze this rendered PDF page image. Extract headings, tables, formulas, and figures.",
|
|
65
|
+
"Primary evidence is the page image layout. Use the extracted page text as supporting context.",
|
|
66
66
|
"Return JSON only.",
|
|
67
67
|
"Schema:",
|
|
68
68
|
"{",
|
|
69
|
-
' "candidates": [',
|
|
70
|
-
"
|
|
71
|
-
'
|
|
72
|
-
'
|
|
73
|
-
' "excerpt": "short evidence string",',
|
|
74
|
-
' "confidence": 0.0',
|
|
75
|
-
" }",
|
|
76
|
-
" ]",
|
|
69
|
+
' "candidates": [{ "title": "string", "level": 1, "excerpt": "short evidence", "confidence": 0.0 }],',
|
|
70
|
+
' "tables": [{ "latexTabular": "\\\\begin{tabular}...\\\\end{tabular}", "caption": "optional", "truncatedTop": false, "truncatedBottom": false }],',
|
|
71
|
+
' "formulas": [{ "latexMath": "LaTeX expression", "label": "optional", "truncatedTop": false, "truncatedBottom": false }],',
|
|
72
|
+
' "figures": [{ "figureType": "schematic|chart|photo|diagram|other", "caption": "optional", "description": "brief description", "truncatedTop": false, "truncatedBottom": false }]',
|
|
77
73
|
"}",
|
|
78
|
-
"
|
|
79
|
-
"-
|
|
80
|
-
"- Prefer conservative extraction
|
|
81
|
-
"-
|
|
82
|
-
"
|
|
83
|
-
"-
|
|
84
|
-
|
|
74
|
+
"Heading rules:",
|
|
75
|
+
"- candidates: true document headings/sections supported by page layout plus text.",
|
|
76
|
+
"- Prefer conservative extraction. Do not include table headers, field labels, or prose sentences.",
|
|
77
|
+
"- Confidence reflects how likely the candidate is a real navigational section heading.",
|
|
78
|
+
"Table rules:",
|
|
79
|
+
"- Tables must be complete LaTeX tabular environments.",
|
|
80
|
+
"Formula rules:",
|
|
81
|
+
"- Use LaTeX math notation. Skip trivial inline math or single symbols.",
|
|
82
|
+
"Figure rules:",
|
|
83
|
+
"- Describe by type, caption, and brief visual description. Do not crop or encode images.",
|
|
84
|
+
"Truncation:",
|
|
85
|
+
"- Set truncatedTop/truncatedBottom to true if elements appear cut off at the page boundary.",
|
|
86
|
+
"Empty:",
|
|
87
|
+
'- If nothing found for a category, return an empty array for that key.',
|
|
85
88
|
`Page number: ${page.pageNumber}`,
|
|
86
89
|
`Render scale: ${renderScale}`,
|
|
87
90
|
"",
|
|
@@ -131,7 +134,7 @@ const resolveSemanticAgentContext = (config, request) => {
|
|
|
131
134
|
}
|
|
132
135
|
return { provider, model };
|
|
133
136
|
};
|
|
134
|
-
const
|
|
137
|
+
const extractCombinedPageData = async (input) => {
|
|
135
138
|
const renderArtifact = await ensureRenderArtifact({
|
|
136
139
|
pdfPath: input.request.pdfPath,
|
|
137
140
|
workspaceDir: input.request.workspaceDir,
|
|
@@ -146,14 +149,26 @@ const extractSemanticCandidatesFromRenderedPage = async (input) => {
|
|
|
146
149
|
env: input.env,
|
|
147
150
|
providerAlias: input.provider,
|
|
148
151
|
model: input.model,
|
|
149
|
-
prompt:
|
|
152
|
+
prompt: buildCombinedPagePrompt(input.page, renderArtifact.renderScale),
|
|
150
153
|
imageDataUrl,
|
|
151
154
|
runtimeApiKeys: input.request.providerApiKeys,
|
|
152
155
|
});
|
|
153
156
|
const parsed = parseJsonObject(response);
|
|
154
|
-
|
|
155
|
-
.map((
|
|
156
|
-
.filter((
|
|
157
|
+
const candidates = (Array.isArray(parsed?.candidates) ? parsed.candidates : [])
|
|
158
|
+
.map((c) => normalizeSemanticAgentCandidate(c, input.page.pageNumber))
|
|
159
|
+
.filter((c) => c !== null);
|
|
160
|
+
const tables = normalizeUnderstandingTables(parsed?.tables);
|
|
161
|
+
const formulas = normalizeUnderstandingFormulas(parsed?.formulas);
|
|
162
|
+
const figures = normalizeFigureItems(parsed?.figures);
|
|
163
|
+
return {
|
|
164
|
+
candidates,
|
|
165
|
+
elements: {
|
|
166
|
+
pageNumber: input.page.pageNumber,
|
|
167
|
+
tables,
|
|
168
|
+
formulas,
|
|
169
|
+
figures,
|
|
170
|
+
},
|
|
171
|
+
};
|
|
157
172
|
};
|
|
158
173
|
const mergeCrossPageTables = (understandings) => {
|
|
159
174
|
const merged = [];
|
|
@@ -262,8 +277,9 @@ const ensureSemanticStructureArtifact = async (request) => {
|
|
|
262
277
|
}
|
|
263
278
|
const pageArtifactPaths = new Map(pages.map((page) => [page.pageNumber, page.artifactPath]));
|
|
264
279
|
const candidateMap = new Map();
|
|
280
|
+
const pageElements = [];
|
|
265
281
|
for (const page of pages) {
|
|
266
|
-
const
|
|
282
|
+
const result = await extractCombinedPageData({
|
|
267
283
|
page,
|
|
268
284
|
request,
|
|
269
285
|
config,
|
|
@@ -271,28 +287,14 @@ const ensureSemanticStructureArtifact = async (request) => {
|
|
|
271
287
|
provider,
|
|
272
288
|
model,
|
|
273
289
|
});
|
|
274
|
-
for (const candidate of candidates) {
|
|
290
|
+
for (const candidate of result.candidates) {
|
|
275
291
|
const key = `${candidate.pageNumber}:${candidate.level}:${candidate.title}`;
|
|
276
292
|
const existing = candidateMap.get(key);
|
|
277
293
|
if (!existing || candidate.confidence > existing.confidence) {
|
|
278
294
|
candidateMap.set(key, candidate);
|
|
279
295
|
}
|
|
280
296
|
}
|
|
281
|
-
|
|
282
|
-
const understandings = [];
|
|
283
|
-
for (const page of pages) {
|
|
284
|
-
const pu = await get_page_understanding({
|
|
285
|
-
pdfPath: request.pdfPath,
|
|
286
|
-
workspaceDir: request.workspaceDir,
|
|
287
|
-
forceRefresh: request.forceRefresh,
|
|
288
|
-
config,
|
|
289
|
-
pageNumber: page.pageNumber,
|
|
290
|
-
provider,
|
|
291
|
-
model,
|
|
292
|
-
env,
|
|
293
|
-
providerApiKeys: request.providerApiKeys,
|
|
294
|
-
});
|
|
295
|
-
understandings.push(pu);
|
|
297
|
+
pageElements.push(result.elements);
|
|
296
298
|
}
|
|
297
299
|
const aggregated = await generateText({
|
|
298
300
|
config,
|
|
@@ -304,9 +306,9 @@ const ensureSemanticStructureArtifact = async (request) => {
|
|
|
304
306
|
});
|
|
305
307
|
const parsed = parseJsonObject(aggregated);
|
|
306
308
|
const sections = toSemanticTree(parsed?.sections, pageArtifactPaths);
|
|
307
|
-
const mergedTables = mergeCrossPageTables(
|
|
308
|
-
const mergedFormulas = mergeCrossPageFormulas(
|
|
309
|
-
const mergedFigures = mergeCrossPageFigures(
|
|
309
|
+
const mergedTables = mergeCrossPageTables(pageElements);
|
|
310
|
+
const mergedFormulas = mergeCrossPageFormulas(pageElements);
|
|
311
|
+
const mergedFigures = mergeCrossPageFigures(pageElements);
|
|
310
312
|
const artifact = {
|
|
311
313
|
documentId: record.documentId,
|
|
312
314
|
generatedAt: new Date().toISOString(),
|
package/dist/local/shared.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
2
|
import { mkdir, readFile, stat, writeFile } from "node:fs/promises";
|
|
3
3
|
import path from "node:path";
|
|
4
|
-
import { resolveModelForProvider, resolveProviderAlias } from "../
|
|
4
|
+
import { resolveModelForProvider, resolveProviderAlias } from "../provider-defaults.js";
|
|
5
5
|
import { loadEchoPdfConfig } from "../pdf-config.js";
|
|
6
6
|
export const defaultWorkspaceDir = () => path.resolve(process.cwd(), ".echo-pdf-workspace");
|
|
7
7
|
export const resolveWorkspaceDir = (workspaceDir) => path.resolve(process.cwd(), workspaceDir?.trim() || defaultWorkspaceDir());
|
|
@@ -1,2 +1,5 @@
|
|
|
1
|
-
import type { LocalPageUnderstandingArtifact, LocalPageUnderstandingRequest } from "./types.js";
|
|
1
|
+
import type { LocalFigureArtifactItem, LocalPageUnderstandingArtifact, LocalPageUnderstandingFormulaItem, LocalPageUnderstandingRequest, LocalPageUnderstandingTableItem } from "./types.js";
|
|
2
|
+
export declare const normalizeFigureItems: (value: unknown) => LocalFigureArtifactItem[];
|
|
3
|
+
export declare const normalizeUnderstandingTables: (value: unknown) => LocalPageUnderstandingTableItem[];
|
|
4
|
+
export declare const normalizeUnderstandingFormulas: (value: unknown) => LocalPageUnderstandingFormulaItem[];
|
|
2
5
|
export declare const get_page_understanding: (request: LocalPageUnderstandingRequest) => Promise<LocalPageUnderstandingArtifact>;
|
|
@@ -20,7 +20,7 @@ const DEFAULT_UNDERSTANDING_PROMPT = [
|
|
|
20
20
|
"- Set truncatedTop/truncatedBottom to true if the element appears cut off at the page boundary.",
|
|
21
21
|
'- If nothing is found for a category, return an empty array for that key.',
|
|
22
22
|
].join("\n");
|
|
23
|
-
const normalizeFigureItems = (value) => {
|
|
23
|
+
export const normalizeFigureItems = (value) => {
|
|
24
24
|
if (!Array.isArray(value))
|
|
25
25
|
return [];
|
|
26
26
|
return value.flatMap((item, index) => {
|
|
@@ -37,7 +37,7 @@ const normalizeFigureItems = (value) => {
|
|
|
37
37
|
}];
|
|
38
38
|
});
|
|
39
39
|
};
|
|
40
|
-
const normalizeUnderstandingTables = (value) => {
|
|
40
|
+
export const normalizeUnderstandingTables = (value) => {
|
|
41
41
|
if (!Array.isArray(value))
|
|
42
42
|
return [];
|
|
43
43
|
return value.flatMap((item, index) => {
|
|
@@ -54,7 +54,7 @@ const normalizeUnderstandingTables = (value) => {
|
|
|
54
54
|
}];
|
|
55
55
|
});
|
|
56
56
|
};
|
|
57
|
-
const normalizeUnderstandingFormulas = (value) => {
|
|
57
|
+
export const normalizeUnderstandingFormulas = (value) => {
|
|
58
58
|
if (!Array.isArray(value))
|
|
59
59
|
return [];
|
|
60
60
|
return value.flatMap((item, index) => {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@echofiles/echo-pdf",
|
|
3
3
|
"description": "Local-first PDF document component core with CLI, workspace artifacts, and reusable page primitives.",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.10.1",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"homepage": "https://pdf.echofile.ai/",
|
|
7
7
|
"repository": {
|
|
File without changes
|
|
File without changes
|