npm - @echofiles/echo-pdf - Versions diffs - 0.9.0 → 0.10.0 - Mend

@echofiles/echo-pdf 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/local/semantic.js +46 -44
package/dist/local/understanding.d.ts +4 -1
package/dist/local/understanding.js +3 -3
package/package.json +1 -1

package/dist/local/semantic.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { toDataUrl } from "../file-utils.js";
 import { generateText, visionRecognize } from "../provider-client.js";
 import { ensureRenderArtifact, indexDocumentInternal } from "./document.js";
 import { fileExists, matchesSourceSnapshot, matchesStrategyKey, pageLabel, parseJsonObject, readJson, resolveConfig, resolveEnv, writeJson, } from "./shared.js";
-import { get_page_understanding } from "./understanding.js";
+import { normalizeFigureItems, normalizeUnderstandingFormulas, normalizeUnderstandingTables } from "./understanding.js";
 const resolveSemanticExtractionBudget = (input) => ({
     pageSelection: "all",
     chunkMaxChars: typeof input?.chunkMaxChars === "number" && Number.isFinite(input.chunkMaxChars) && input.chunkMaxChars > 400
@@ -59,29 +59,32 @@ const toSemanticTree = (value, pageArtifactPaths) => {
     });
     return nodes;
 };
-const buildSemanticPageUnderstandingPrompt = (page, renderScale) => {
+const buildCombinedPagePrompt = (page, renderScale) => {
     return [
-        "You extract semantic heading candidates from one rendered PDF page.",
-        "Primary evidence is the page image layout. Use the extracted page text only as supporting context.",
+        "Analyze this rendered PDF page image. Extract headings, tables, formulas, and figures.",
+        "Primary evidence is the page image layout. Use the extracted page text as supporting context.",
         "Return JSON only.",
         "Schema:",
         "{",
-        '  "candidates": [',
-        "    {",
-        '      "title": "string",',
-        '      "level": 1,',
-        '      "excerpt": "short evidence string",',
-        '      "confidence": 0.0',
-        "    }",
-        "  ]",
+        '  "candidates": [{ "title": "string", "level": 1, "excerpt": "short evidence", "confidence": 0.0 }],',
+        '  "tables": [{ "latexTabular": "\\\\begin{tabular}...\\\\end{tabular}", "caption": "optional", "truncatedTop": false, "truncatedBottom": false }],',
+        '  "formulas": [{ "latexMath": "LaTeX expression", "label": "optional", "truncatedTop": false, "truncatedBottom": false }],',
+        '  "figures": [{ "figureType": "schematic|chart|photo|diagram|other", "caption": "optional", "description": "brief description", "truncatedTop": false, "truncatedBottom": false }]',
         "}",
-        "Rules:",
-        "- Use only true document headings/sections that are clearly supported by page layout plus text.",
-        "- Prefer conservative extraction over guessing.",
-        "- Do not include table column headers, field labels, figure labels, unit/value rows, worksheet fragments, or prose sentences.",
-        "- Do not infer hierarchy beyond the explicit heading numbering or structure visible on the page.",
-        "- Confidence should reflect how likely the candidate is to be a real navigational section heading in the document.",
-        '- If no reliable semantic structure is detectable, return {"candidates":[]}.',
+        "Heading rules:",
+        "- candidates: true document headings/sections supported by page layout plus text.",
+        "- Prefer conservative extraction. Do not include table headers, field labels, or prose sentences.",
+        "- Confidence reflects how likely the candidate is a real navigational section heading.",
+        "Table rules:",
+        "- Tables must be complete LaTeX tabular environments.",
+        "Formula rules:",
+        "- Use LaTeX math notation. Skip trivial inline math or single symbols.",
+        "Figure rules:",
+        "- Describe by type, caption, and brief visual description. Do not crop or encode images.",
+        "Truncation:",
+        "- Set truncatedTop/truncatedBottom to true if elements appear cut off at the page boundary.",
+        "Empty:",
+        '- If nothing found for a category, return an empty array for that key.',
         `Page number: ${page.pageNumber}`,
         `Render scale: ${renderScale}`,
         "",
@@ -131,7 +134,7 @@ const resolveSemanticAgentContext = (config, request) => {
     }
     return { provider, model };
 };
-const extractSemanticCandidatesFromRenderedPage = async (input) => {
+const extractCombinedPageData = async (input) => {
     const renderArtifact = await ensureRenderArtifact({
         pdfPath: input.request.pdfPath,
         workspaceDir: input.request.workspaceDir,
@@ -146,14 +149,26 @@ const extractSemanticCandidatesFromRenderedPage = async (input) => {
         env: input.env,
         providerAlias: input.provider,
         model: input.model,
-        prompt: buildSemanticPageUnderstandingPrompt(input.page, renderArtifact.renderScale),
+        prompt: buildCombinedPagePrompt(input.page, renderArtifact.renderScale),
         imageDataUrl,
         runtimeApiKeys: input.request.providerApiKeys,
     });
     const parsed = parseJsonObject(response);
-    return (Array.isArray(parsed?.candidates) ? parsed.candidates : [])
-        .map((candidate) => normalizeSemanticAgentCandidate(candidate, input.page.pageNumber))
-        .filter((candidate) => candidate !== null);
+    const candidates = (Array.isArray(parsed?.candidates) ? parsed.candidates : [])
+        .map((c) => normalizeSemanticAgentCandidate(c, input.page.pageNumber))
+        .filter((c) => c !== null);
+    const tables = normalizeUnderstandingTables(parsed?.tables);
+    const formulas = normalizeUnderstandingFormulas(parsed?.formulas);
+    const figures = normalizeFigureItems(parsed?.figures);
+    return {
+        candidates,
+        elements: {
+            pageNumber: input.page.pageNumber,
+            tables,
+            formulas,
+            figures,
+        },
+    };
 };
 const mergeCrossPageTables = (understandings) => {
     const merged = [];
@@ -262,8 +277,9 @@ const ensureSemanticStructureArtifact = async (request) => {
     }
     const pageArtifactPaths = new Map(pages.map((page) => [page.pageNumber, page.artifactPath]));
     const candidateMap = new Map();
+    const pageElements = [];
     for (const page of pages) {
-        const candidates = await extractSemanticCandidatesFromRenderedPage({
+        const result = await extractCombinedPageData({
             page,
             request,
             config,
@@ -271,28 +287,14 @@ const ensureSemanticStructureArtifact = async (request) => {
             provider,
             model,
         });
-        for (const candidate of candidates) {
+        for (const candidate of result.candidates) {
             const key = `${candidate.pageNumber}:${candidate.level}:${candidate.title}`;
             const existing = candidateMap.get(key);
             if (!existing || candidate.confidence > existing.confidence) {
                 candidateMap.set(key, candidate);
             }
         }
-    }
-    const understandings = [];
-    for (const page of pages) {
-        const pu = await get_page_understanding({
-            pdfPath: request.pdfPath,
-            workspaceDir: request.workspaceDir,
-            forceRefresh: request.forceRefresh,
-            config,
-            pageNumber: page.pageNumber,
-            provider,
-            model,
-            env,
-            providerApiKeys: request.providerApiKeys,
-        });
-        understandings.push(pu);
+        pageElements.push(result.elements);
     }
     const aggregated = await generateText({
         config,
@@ -304,9 +306,9 @@ const ensureSemanticStructureArtifact = async (request) => {
     });
     const parsed = parseJsonObject(aggregated);
     const sections = toSemanticTree(parsed?.sections, pageArtifactPaths);
-    const mergedTables = mergeCrossPageTables(understandings);
-    const mergedFormulas = mergeCrossPageFormulas(understandings);
-    const mergedFigures = mergeCrossPageFigures(understandings);
+    const mergedTables = mergeCrossPageTables(pageElements);
+    const mergedFormulas = mergeCrossPageFormulas(pageElements);
+    const mergedFigures = mergeCrossPageFigures(pageElements);
     const artifact = {
         documentId: record.documentId,
         generatedAt: new Date().toISOString(),

package/dist/local/understanding.d.ts CHANGED Viewed

@@ -1,2 +1,5 @@
-import type { LocalPageUnderstandingArtifact, LocalPageUnderstandingRequest } from "./types.js";
+import type { LocalFigureArtifactItem, LocalPageUnderstandingArtifact, LocalPageUnderstandingFormulaItem, LocalPageUnderstandingRequest, LocalPageUnderstandingTableItem } from "./types.js";
+export declare const normalizeFigureItems: (value: unknown) => LocalFigureArtifactItem[];
+export declare const normalizeUnderstandingTables: (value: unknown) => LocalPageUnderstandingTableItem[];
+export declare const normalizeUnderstandingFormulas: (value: unknown) => LocalPageUnderstandingFormulaItem[];
 export declare const get_page_understanding: (request: LocalPageUnderstandingRequest) => Promise<LocalPageUnderstandingArtifact>;

package/dist/local/understanding.js CHANGED Viewed

@@ -20,7 +20,7 @@ const DEFAULT_UNDERSTANDING_PROMPT = [
     "- Set truncatedTop/truncatedBottom to true if the element appears cut off at the page boundary.",
     '- If nothing is found for a category, return an empty array for that key.',
 ].join("\n");
-const normalizeFigureItems = (value) => {
+export const normalizeFigureItems = (value) => {
     if (!Array.isArray(value))
         return [];
     return value.flatMap((item, index) => {
@@ -37,7 +37,7 @@ const normalizeFigureItems = (value) => {
             }];
     });
 };
-const normalizeUnderstandingTables = (value) => {
+export const normalizeUnderstandingTables = (value) => {
     if (!Array.isArray(value))
         return [];
     return value.flatMap((item, index) => {
@@ -54,7 +54,7 @@ const normalizeUnderstandingTables = (value) => {
             }];
     });
 };
-const normalizeUnderstandingFormulas = (value) => {
+export const normalizeUnderstandingFormulas = (value) => {
     if (!Array.isArray(value))
         return [];
     return value.flatMap((item, index) => {

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@echofiles/echo-pdf",
   "description": "Local-first PDF document component core with CLI, workspace artifacts, and reusable page primitives.",
-  "version": "0.9.0",
+  "version": "0.10.0",
   "type": "module",
   "homepage": "https://pdf.echofile.ai/",
   "repository": {