@echofiles/echo-pdf 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import { toDataUrl } from "../file-utils.js";
6
6
  import { generateText, visionRecognize } from "../provider-client.js";
7
7
  import { ensureRenderArtifact, indexDocumentInternal } from "./document.js";
8
8
  import { fileExists, matchesSourceSnapshot, matchesStrategyKey, pageLabel, parseJsonObject, readJson, resolveConfig, resolveEnv, writeJson, } from "./shared.js";
9
- import { get_page_understanding } from "./understanding.js";
9
+ import { normalizeFigureItems, normalizeUnderstandingFormulas, normalizeUnderstandingTables } from "./understanding.js";
10
10
  const resolveSemanticExtractionBudget = (input) => ({
11
11
  pageSelection: "all",
12
12
  chunkMaxChars: typeof input?.chunkMaxChars === "number" && Number.isFinite(input.chunkMaxChars) && input.chunkMaxChars > 400
@@ -59,29 +59,32 @@ const toSemanticTree = (value, pageArtifactPaths) => {
59
59
  });
60
60
  return nodes;
61
61
  };
62
- const buildSemanticPageUnderstandingPrompt = (page, renderScale) => {
62
+ const buildCombinedPagePrompt = (page, renderScale) => {
63
63
  return [
64
- "You extract semantic heading candidates from one rendered PDF page.",
65
- "Primary evidence is the page image layout. Use the extracted page text only as supporting context.",
64
+ "Analyze this rendered PDF page image. Extract headings, tables, formulas, and figures.",
65
+ "Primary evidence is the page image layout. Use the extracted page text as supporting context.",
66
66
  "Return JSON only.",
67
67
  "Schema:",
68
68
  "{",
69
- ' "candidates": [',
70
- " {",
71
- ' "title": "string",',
72
- ' "level": 1,',
73
- ' "excerpt": "short evidence string",',
74
- ' "confidence": 0.0',
75
- " }",
76
- " ]",
69
+ ' "candidates": [{ "title": "string", "level": 1, "excerpt": "short evidence", "confidence": 0.0 }],',
70
+ ' "tables": [{ "latexTabular": "\\\\begin{tabular}...\\\\end{tabular}", "caption": "optional", "truncatedTop": false, "truncatedBottom": false }],',
71
+ ' "formulas": [{ "latexMath": "LaTeX expression", "label": "optional", "truncatedTop": false, "truncatedBottom": false }],',
72
+ ' "figures": [{ "figureType": "schematic|chart|photo|diagram|other", "caption": "optional", "description": "brief description", "truncatedTop": false, "truncatedBottom": false }]',
77
73
  "}",
78
- "Rules:",
79
- "- Use only true document headings/sections that are clearly supported by page layout plus text.",
80
- "- Prefer conservative extraction over guessing.",
81
- "- Do not include table column headers, field labels, figure labels, unit/value rows, worksheet fragments, or prose sentences.",
82
- "- Do not infer hierarchy beyond the explicit heading numbering or structure visible on the page.",
83
- "- Confidence should reflect how likely the candidate is to be a real navigational section heading in the document.",
84
- '- If no reliable semantic structure is detectable, return {"candidates":[]}.',
74
+ "Heading rules:",
75
+ "- candidates: true document headings/sections supported by page layout plus text.",
76
+ "- Prefer conservative extraction. Do not include table headers, field labels, or prose sentences.",
77
+ "- Confidence reflects how likely the candidate is a real navigational section heading.",
78
+ "Table rules:",
79
+ "- Tables must be complete LaTeX tabular environments.",
80
+ "Formula rules:",
81
+ "- Use LaTeX math notation. Skip trivial inline math or single symbols.",
82
+ "Figure rules:",
83
+ "- Describe by type, caption, and brief visual description. Do not crop or encode images.",
84
+ "Truncation:",
85
+ "- Set truncatedTop/truncatedBottom to true if elements appear cut off at the page boundary.",
86
+ "Empty:",
87
+ '- If nothing found for a category, return an empty array for that key.',
85
88
  `Page number: ${page.pageNumber}`,
86
89
  `Render scale: ${renderScale}`,
87
90
  "",
@@ -131,7 +134,7 @@ const resolveSemanticAgentContext = (config, request) => {
131
134
  }
132
135
  return { provider, model };
133
136
  };
134
- const extractSemanticCandidatesFromRenderedPage = async (input) => {
137
+ const extractCombinedPageData = async (input) => {
135
138
  const renderArtifact = await ensureRenderArtifact({
136
139
  pdfPath: input.request.pdfPath,
137
140
  workspaceDir: input.request.workspaceDir,
@@ -146,14 +149,26 @@ const extractSemanticCandidatesFromRenderedPage = async (input) => {
146
149
  env: input.env,
147
150
  providerAlias: input.provider,
148
151
  model: input.model,
149
- prompt: buildSemanticPageUnderstandingPrompt(input.page, renderArtifact.renderScale),
152
+ prompt: buildCombinedPagePrompt(input.page, renderArtifact.renderScale),
150
153
  imageDataUrl,
151
154
  runtimeApiKeys: input.request.providerApiKeys,
152
155
  });
153
156
  const parsed = parseJsonObject(response);
154
- return (Array.isArray(parsed?.candidates) ? parsed.candidates : [])
155
- .map((candidate) => normalizeSemanticAgentCandidate(candidate, input.page.pageNumber))
156
- .filter((candidate) => candidate !== null);
157
+ const candidates = (Array.isArray(parsed?.candidates) ? parsed.candidates : [])
158
+ .map((c) => normalizeSemanticAgentCandidate(c, input.page.pageNumber))
159
+ .filter((c) => c !== null);
160
+ const tables = normalizeUnderstandingTables(parsed?.tables);
161
+ const formulas = normalizeUnderstandingFormulas(parsed?.formulas);
162
+ const figures = normalizeFigureItems(parsed?.figures);
163
+ return {
164
+ candidates,
165
+ elements: {
166
+ pageNumber: input.page.pageNumber,
167
+ tables,
168
+ formulas,
169
+ figures,
170
+ },
171
+ };
157
172
  };
158
173
  const mergeCrossPageTables = (understandings) => {
159
174
  const merged = [];
@@ -262,8 +277,9 @@ const ensureSemanticStructureArtifact = async (request) => {
262
277
  }
263
278
  const pageArtifactPaths = new Map(pages.map((page) => [page.pageNumber, page.artifactPath]));
264
279
  const candidateMap = new Map();
280
+ const pageElements = [];
265
281
  for (const page of pages) {
266
- const candidates = await extractSemanticCandidatesFromRenderedPage({
282
+ const result = await extractCombinedPageData({
267
283
  page,
268
284
  request,
269
285
  config,
@@ -271,28 +287,14 @@ const ensureSemanticStructureArtifact = async (request) => {
271
287
  provider,
272
288
  model,
273
289
  });
274
- for (const candidate of candidates) {
290
+ for (const candidate of result.candidates) {
275
291
  const key = `${candidate.pageNumber}:${candidate.level}:${candidate.title}`;
276
292
  const existing = candidateMap.get(key);
277
293
  if (!existing || candidate.confidence > existing.confidence) {
278
294
  candidateMap.set(key, candidate);
279
295
  }
280
296
  }
281
- }
282
- const understandings = [];
283
- for (const page of pages) {
284
- const pu = await get_page_understanding({
285
- pdfPath: request.pdfPath,
286
- workspaceDir: request.workspaceDir,
287
- forceRefresh: request.forceRefresh,
288
- config,
289
- pageNumber: page.pageNumber,
290
- provider,
291
- model,
292
- env,
293
- providerApiKeys: request.providerApiKeys,
294
- });
295
- understandings.push(pu);
297
+ pageElements.push(result.elements);
296
298
  }
297
299
  const aggregated = await generateText({
298
300
  config,
@@ -304,9 +306,9 @@ const ensureSemanticStructureArtifact = async (request) => {
304
306
  });
305
307
  const parsed = parseJsonObject(aggregated);
306
308
  const sections = toSemanticTree(parsed?.sections, pageArtifactPaths);
307
- const mergedTables = mergeCrossPageTables(understandings);
308
- const mergedFormulas = mergeCrossPageFormulas(understandings);
309
- const mergedFigures = mergeCrossPageFigures(understandings);
309
+ const mergedTables = mergeCrossPageTables(pageElements);
310
+ const mergedFormulas = mergeCrossPageFormulas(pageElements);
311
+ const mergedFigures = mergeCrossPageFigures(pageElements);
310
312
  const artifact = {
311
313
  documentId: record.documentId,
312
314
  generatedAt: new Date().toISOString(),
@@ -1,2 +1,5 @@
1
- import type { LocalPageUnderstandingArtifact, LocalPageUnderstandingRequest } from "./types.js";
1
+ import type { LocalFigureArtifactItem, LocalPageUnderstandingArtifact, LocalPageUnderstandingFormulaItem, LocalPageUnderstandingRequest, LocalPageUnderstandingTableItem } from "./types.js";
2
+ export declare const normalizeFigureItems: (value: unknown) => LocalFigureArtifactItem[];
3
+ export declare const normalizeUnderstandingTables: (value: unknown) => LocalPageUnderstandingTableItem[];
4
+ export declare const normalizeUnderstandingFormulas: (value: unknown) => LocalPageUnderstandingFormulaItem[];
2
5
  export declare const get_page_understanding: (request: LocalPageUnderstandingRequest) => Promise<LocalPageUnderstandingArtifact>;
@@ -20,7 +20,7 @@ const DEFAULT_UNDERSTANDING_PROMPT = [
20
20
  "- Set truncatedTop/truncatedBottom to true if the element appears cut off at the page boundary.",
21
21
  '- If nothing is found for a category, return an empty array for that key.',
22
22
  ].join("\n");
23
- const normalizeFigureItems = (value) => {
23
+ export const normalizeFigureItems = (value) => {
24
24
  if (!Array.isArray(value))
25
25
  return [];
26
26
  return value.flatMap((item, index) => {
@@ -37,7 +37,7 @@ const normalizeFigureItems = (value) => {
37
37
  }];
38
38
  });
39
39
  };
40
- const normalizeUnderstandingTables = (value) => {
40
+ export const normalizeUnderstandingTables = (value) => {
41
41
  if (!Array.isArray(value))
42
42
  return [];
43
43
  return value.flatMap((item, index) => {
@@ -54,7 +54,7 @@ const normalizeUnderstandingTables = (value) => {
54
54
  }];
55
55
  });
56
56
  };
57
- const normalizeUnderstandingFormulas = (value) => {
57
+ export const normalizeUnderstandingFormulas = (value) => {
58
58
  if (!Array.isArray(value))
59
59
  return [];
60
60
  return value.flatMap((item, index) => {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@echofiles/echo-pdf",
3
3
  "description": "Local-first PDF document component core with CLI, workspace artifacts, and reusable page primitives.",
4
- "version": "0.9.0",
4
+ "version": "0.10.0",
5
5
  "type": "module",
6
6
  "homepage": "https://pdf.echofile.ai/",
7
7
  "repository": {