@echofiles/echo-pdf 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -87,6 +87,7 @@ What these commands map to:
87
87
  - `render` -> `get_page_render`
88
88
  - `tables` -> `get_page_tables_latex`
89
89
  - `formulas` -> `get_page_formulas_latex`
90
+ - `understanding` -> `get_page_understanding`
90
91
 
91
92
  By default, `echo-pdf` writes reusable artifacts into a local workspace:
92
93
 
@@ -107,6 +108,8 @@ By default, `echo-pdf` writes reusable artifacts into a local workspace:
107
108
  0001.scale-2.provider-openai.model-gpt-4.1-mini.prompt-<hash>.json
108
109
  formulas/
109
110
  0001.scale-2.provider-openai.model-gpt-4.1-mini.prompt-<hash>.json
111
+ understanding/
112
+ 0001.scale-2.provider-openai.model-gpt-4.1-mini.prompt-<hash>.json
110
113
  ```
111
114
 
112
115
  These artifacts are meant to be inspected, cached, and reused by downstream local tools.
@@ -123,6 +126,7 @@ import {
123
126
  get_page_render,
124
127
  get_page_tables_latex,
125
128
  get_page_formulas_latex,
129
+ get_page_understanding,
126
130
  } from "@echofiles/echo-pdf/local"
127
131
 
128
132
  const document = await get_document({ pdfPath: "./sample.pdf" })
@@ -136,6 +140,7 @@ const page1 = await get_page_content({ pdfPath: "./sample.pdf", pageNumber: 1 })
136
140
  const render1 = await get_page_render({ pdfPath: "./sample.pdf", pageNumber: 1, scale: 2 })
137
141
  const tables = await get_page_tables_latex({ pdfPath: "./sample.pdf", pageNumber: 1, provider: "openai", model: "gpt-4.1-mini" })
138
142
  const formulas = await get_page_formulas_latex({ pdfPath: "./sample.pdf", pageNumber: 1, provider: "openai", model: "gpt-4.1-mini" })
143
+ const understanding = await get_page_understanding({ pdfPath: "./sample.pdf", pageNumber: 1, provider: "openai", model: "gpt-4.1-mini" })
139
144
  ```
140
145
 
141
146
  Notes:
package/bin/echo-pdf.js CHANGED
@@ -215,7 +215,7 @@ const loadLocalDocumentApi = async () => {
215
215
  return import(LOCAL_DOCUMENT_DIST_ENTRY.href)
216
216
  }
217
217
 
218
- const LOCAL_PRIMITIVE_COMMANDS = ["document", "structure", "semantic", "page", "render", "tables", "formulas"]
218
+ const LOCAL_PRIMITIVE_COMMANDS = ["document", "structure", "semantic", "page", "render", "tables", "formulas", "understanding"]
219
219
  const REMOVED_DOCUMENT_ALIAS_TO_PRIMITIVE = {
220
220
  index: "document",
221
221
  get: "document",
@@ -336,6 +336,23 @@ const runLocalPrimitiveCommand = async (command, subcommand, rest, flags) => {
336
336
  return
337
337
  }
338
338
 
339
+ if (primitive === "understanding") {
340
+ const semanticContext = resolveLocalSemanticContext(flags)
341
+ const local = await loadLocalDocumentApi()
342
+ print(await local.get_page_understanding({
343
+ pdfPath,
344
+ workspaceDir,
345
+ forceRefresh,
346
+ pageNumber,
347
+ renderScale,
348
+ provider: semanticContext.provider,
349
+ model: semanticContext.model,
350
+ providerApiKeys: semanticContext.providerApiKeys,
351
+ prompt: typeof flags.prompt === "string" ? flags.prompt : undefined,
352
+ }))
353
+ return
354
+ }
355
+
339
356
  throw new Error(`Unsupported local primitive command: ${primitive}`)
340
357
  }
341
358
 
@@ -349,6 +366,7 @@ const usage = () => {
349
366
  process.stdout.write(` render <file.pdf> --page <N> [--scale N] [--workspace DIR] [--force-refresh]\n`)
350
367
  process.stdout.write(` tables <file.pdf> --page <N> [--provider alias] [--model model] [--scale N] [--prompt text] [--workspace DIR] [--force-refresh]\n`)
351
368
  process.stdout.write(` formulas <file.pdf> --page <N> [--provider alias] [--model model] [--scale N] [--prompt text] [--workspace DIR] [--force-refresh]\n`)
369
+ process.stdout.write(` understanding <file.pdf> --page <N> [--provider alias] [--model model] [--scale N] [--prompt text] [--workspace DIR] [--force-refresh]\n`)
352
370
  process.stdout.write(`\nLocal config commands:\n`)
353
371
  process.stdout.write(` provider set --provider <${getProviderSetNames().join("|")}> --api-key <KEY> [--profile name]\n`)
354
372
  process.stdout.write(` provider use --provider <${getProviderAliases().join("|")}> [--profile name]\n`)
@@ -1,5 +1,6 @@
1
- export type { LocalDocumentArtifactPaths, LocalDocumentMetadata, LocalDocumentRequest, LocalDocumentStructure, LocalDocumentStructureNode, LocalFormulaArtifactItem, LocalPageContent, LocalPageContentRequest, LocalPageFormulasArtifact, LocalPageFormulasRequest, LocalPageRenderArtifact, LocalPageRenderRequest, LocalPageTablesArtifact, LocalPageTablesRequest, LocalSemanticDocumentRequest, LocalSemanticDocumentStructure, LocalSemanticStructureNode, LocalTableArtifactItem, } from "./types.js";
1
+ export type { LocalDocumentArtifactPaths, LocalDocumentMetadata, LocalDocumentRequest, LocalDocumentStructure, LocalDocumentStructureNode, LocalFigureArtifactItem, LocalFormulaArtifactItem, LocalPageContent, LocalPageContentRequest, LocalPageFormulasArtifact, LocalPageFormulasRequest, LocalPageRenderArtifact, LocalPageRenderRequest, LocalPageTablesArtifact, LocalPageTablesRequest, LocalPageUnderstandingArtifact, LocalPageUnderstandingRequest, LocalSemanticDocumentRequest, LocalSemanticDocumentStructure, LocalSemanticStructureNode, LocalTableArtifactItem, MergedFigureItem, MergedFormulaItem, MergedTableItem, } from "./types.js";
2
2
  export { get_document, get_document_structure, get_page_content, get_page_render } from "./document.js";
3
3
  export { get_page_formulas_latex } from "./formulas.js";
4
4
  export { get_semantic_document_structure } from "./semantic.js";
5
5
  export { get_page_tables_latex } from "./tables.js";
6
+ export { get_page_understanding } from "./understanding.js";
@@ -2,3 +2,4 @@ export { get_document, get_document_structure, get_page_content, get_page_render
2
2
  export { get_page_formulas_latex } from "./formulas.js";
3
3
  export { get_semantic_document_structure } from "./semantic.js";
4
4
  export { get_page_tables_latex } from "./tables.js";
5
+ export { get_page_understanding } from "./understanding.js";
@@ -6,6 +6,7 @@ import { toDataUrl } from "../file-utils.js";
6
6
  import { generateText, visionRecognize } from "../provider-client.js";
7
7
  import { ensureRenderArtifact, indexDocumentInternal } from "./document.js";
8
8
  import { fileExists, matchesSourceSnapshot, matchesStrategyKey, pageLabel, parseJsonObject, readJson, resolveConfig, resolveEnv, writeJson, } from "./shared.js";
9
+ import { normalizeFigureItems, normalizeUnderstandingFormulas, normalizeUnderstandingTables } from "./understanding.js";
9
10
  const resolveSemanticExtractionBudget = (input) => ({
10
11
  pageSelection: "all",
11
12
  chunkMaxChars: typeof input?.chunkMaxChars === "number" && Number.isFinite(input.chunkMaxChars) && input.chunkMaxChars > 400
@@ -58,29 +59,32 @@ const toSemanticTree = (value, pageArtifactPaths) => {
58
59
  });
59
60
  return nodes;
60
61
  };
61
- const buildSemanticPageUnderstandingPrompt = (page, renderScale) => {
62
+ const buildCombinedPagePrompt = (page, renderScale) => {
62
63
  return [
63
- "You extract semantic heading candidates from one rendered PDF page.",
64
- "Primary evidence is the page image layout. Use the extracted page text only as supporting context.",
64
+ "Analyze this rendered PDF page image. Extract headings, tables, formulas, and figures.",
65
+ "Primary evidence is the page image layout. Use the extracted page text as supporting context.",
65
66
  "Return JSON only.",
66
67
  "Schema:",
67
68
  "{",
68
- ' "candidates": [',
69
- " {",
70
- ' "title": "string",',
71
- ' "level": 1,',
72
- ' "excerpt": "short evidence string",',
73
- ' "confidence": 0.0',
74
- " }",
75
- " ]",
69
+ ' "candidates": [{ "title": "string", "level": 1, "excerpt": "short evidence", "confidence": 0.0 }],',
70
+ ' "tables": [{ "latexTabular": "\\\\begin{tabular}...\\\\end{tabular}", "caption": "optional", "truncatedTop": false, "truncatedBottom": false }],',
71
+ ' "formulas": [{ "latexMath": "LaTeX expression", "label": "optional", "truncatedTop": false, "truncatedBottom": false }],',
72
+ ' "figures": [{ "figureType": "schematic|chart|photo|diagram|other", "caption": "optional", "description": "brief description", "truncatedTop": false, "truncatedBottom": false }]',
76
73
  "}",
77
- "Rules:",
78
- "- Use only true document headings/sections that are clearly supported by page layout plus text.",
79
- "- Prefer conservative extraction over guessing.",
80
- "- Do not include table column headers, field labels, figure labels, unit/value rows, worksheet fragments, or prose sentences.",
81
- "- Do not infer hierarchy beyond the explicit heading numbering or structure visible on the page.",
82
- "- Confidence should reflect how likely the candidate is to be a real navigational section heading in the document.",
83
- '- If no reliable semantic structure is detectable, return {"candidates":[]}.',
74
+ "Heading rules:",
75
+ "- candidates: true document headings/sections supported by page layout plus text.",
76
+ "- Prefer conservative extraction. Do not include table headers, field labels, or prose sentences.",
77
+ "- Confidence reflects how likely the candidate is a real navigational section heading.",
78
+ "Table rules:",
79
+ "- Tables must be complete LaTeX tabular environments.",
80
+ "Formula rules:",
81
+ "- Use LaTeX math notation. Skip trivial inline math or single symbols.",
82
+ "Figure rules:",
83
+ "- Describe by type, caption, and brief visual description. Do not crop or encode images.",
84
+ "Truncation:",
85
+ "- Set truncatedTop/truncatedBottom to true if elements appear cut off at the page boundary.",
86
+ "Empty:",
87
+ '- If nothing found for a category, return an empty array for that key.',
84
88
  `Page number: ${page.pageNumber}`,
85
89
  `Render scale: ${renderScale}`,
86
90
  "",
@@ -130,7 +134,7 @@ const resolveSemanticAgentContext = (config, request) => {
130
134
  }
131
135
  return { provider, model };
132
136
  };
133
- const extractSemanticCandidatesFromRenderedPage = async (input) => {
137
+ const extractCombinedPageData = async (input) => {
134
138
  const renderArtifact = await ensureRenderArtifact({
135
139
  pdfPath: input.request.pdfPath,
136
140
  workspaceDir: input.request.workspaceDir,
@@ -145,14 +149,108 @@ const extractSemanticCandidatesFromRenderedPage = async (input) => {
145
149
  env: input.env,
146
150
  providerAlias: input.provider,
147
151
  model: input.model,
148
- prompt: buildSemanticPageUnderstandingPrompt(input.page, renderArtifact.renderScale),
152
+ prompt: buildCombinedPagePrompt(input.page, renderArtifact.renderScale),
149
153
  imageDataUrl,
150
154
  runtimeApiKeys: input.request.providerApiKeys,
151
155
  });
152
156
  const parsed = parseJsonObject(response);
153
- return (Array.isArray(parsed?.candidates) ? parsed.candidates : [])
154
- .map((candidate) => normalizeSemanticAgentCandidate(candidate, input.page.pageNumber))
155
- .filter((candidate) => candidate !== null);
157
+ const candidates = (Array.isArray(parsed?.candidates) ? parsed.candidates : [])
158
+ .map((c) => normalizeSemanticAgentCandidate(c, input.page.pageNumber))
159
+ .filter((c) => c !== null);
160
+ const tables = normalizeUnderstandingTables(parsed?.tables);
161
+ const formulas = normalizeUnderstandingFormulas(parsed?.formulas);
162
+ const figures = normalizeFigureItems(parsed?.figures);
163
+ return {
164
+ candidates,
165
+ elements: {
166
+ pageNumber: input.page.pageNumber,
167
+ tables,
168
+ formulas,
169
+ figures,
170
+ },
171
+ };
172
+ };
173
+ const mergeCrossPageTables = (understandings) => {
174
+ const merged = [];
175
+ let nextId = 1;
176
+ for (const pu of understandings) {
177
+ for (const table of pu.tables) {
178
+ const prev = merged[merged.length - 1];
179
+ if (prev?.crossPageHint && table.truncatedTop) {
180
+ merged[merged.length - 1] = {
181
+ ...prev,
182
+ latexTabular: prev.latexTabular + "\n" + table.latexTabular,
183
+ endPage: pu.pageNumber,
184
+ };
185
+ }
186
+ else {
187
+ merged.push({
188
+ id: `merged-table-${nextId++}`,
189
+ latexTabular: table.latexTabular,
190
+ caption: table.caption,
191
+ startPage: pu.pageNumber,
192
+ endPage: pu.pageNumber,
193
+ crossPageHint: table.truncatedBottom === true ? true : undefined,
194
+ });
195
+ }
196
+ }
197
+ }
198
+ return merged;
199
+ };
200
+ const mergeCrossPageFormulas = (understandings) => {
201
+ const merged = [];
202
+ let nextId = 1;
203
+ for (const pu of understandings) {
204
+ for (const formula of pu.formulas) {
205
+ const prev = merged[merged.length - 1];
206
+ if (prev?.crossPageHint && formula.truncatedTop) {
207
+ merged[merged.length - 1] = {
208
+ ...prev,
209
+ latexMath: prev.latexMath + " " + formula.latexMath,
210
+ endPage: pu.pageNumber,
211
+ };
212
+ }
213
+ else {
214
+ merged.push({
215
+ id: `merged-formula-${nextId++}`,
216
+ latexMath: formula.latexMath,
217
+ label: formula.label,
218
+ startPage: pu.pageNumber,
219
+ endPage: pu.pageNumber,
220
+ crossPageHint: formula.truncatedBottom === true ? true : undefined,
221
+ });
222
+ }
223
+ }
224
+ }
225
+ return merged;
226
+ };
227
+ const mergeCrossPageFigures = (understandings) => {
228
+ const merged = [];
229
+ let nextId = 1;
230
+ for (const pu of understandings) {
231
+ for (const figure of pu.figures) {
232
+ const prev = merged[merged.length - 1];
233
+ if (prev?.crossPageHint && figure.truncatedTop) {
234
+ merged[merged.length - 1] = {
235
+ ...prev,
236
+ description: [prev.description, figure.description].filter(Boolean).join(" "),
237
+ endPage: pu.pageNumber,
238
+ };
239
+ }
240
+ else {
241
+ merged.push({
242
+ id: `merged-figure-${nextId++}`,
243
+ figureType: figure.figureType,
244
+ caption: figure.caption,
245
+ description: figure.description,
246
+ startPage: pu.pageNumber,
247
+ endPage: pu.pageNumber,
248
+ crossPageHint: figure.truncatedBottom === true ? true : undefined,
249
+ });
250
+ }
251
+ }
252
+ }
253
+ return merged;
156
254
  };
157
255
  const ensureSemanticStructureArtifact = async (request) => {
158
256
  const env = resolveEnv(request.env);
@@ -179,8 +277,9 @@ const ensureSemanticStructureArtifact = async (request) => {
179
277
  }
180
278
  const pageArtifactPaths = new Map(pages.map((page) => [page.pageNumber, page.artifactPath]));
181
279
  const candidateMap = new Map();
280
+ const pageElements = [];
182
281
  for (const page of pages) {
183
- const candidates = await extractSemanticCandidatesFromRenderedPage({
282
+ const result = await extractCombinedPageData({
184
283
  page,
185
284
  request,
186
285
  config,
@@ -188,13 +287,14 @@ const ensureSemanticStructureArtifact = async (request) => {
188
287
  provider,
189
288
  model,
190
289
  });
191
- for (const candidate of candidates) {
290
+ for (const candidate of result.candidates) {
192
291
  const key = `${candidate.pageNumber}:${candidate.level}:${candidate.title}`;
193
292
  const existing = candidateMap.get(key);
194
293
  if (!existing || candidate.confidence > existing.confidence) {
195
294
  candidateMap.set(key, candidate);
196
295
  }
197
296
  }
297
+ pageElements.push(result.elements);
198
298
  }
199
299
  const aggregated = await generateText({
200
300
  config,
@@ -206,6 +306,9 @@ const ensureSemanticStructureArtifact = async (request) => {
206
306
  });
207
307
  const parsed = parseJsonObject(aggregated);
208
308
  const sections = toSemanticTree(parsed?.sections, pageArtifactPaths);
309
+ const mergedTables = mergeCrossPageTables(pageElements);
310
+ const mergedFormulas = mergeCrossPageFormulas(pageElements);
311
+ const mergedFigures = mergeCrossPageFigures(pageElements);
209
312
  const artifact = {
210
313
  documentId: record.documentId,
211
314
  generatedAt: new Date().toISOString(),
@@ -221,6 +324,9 @@ const ensureSemanticStructureArtifact = async (request) => {
221
324
  title: record.filename,
222
325
  children: sections,
223
326
  },
327
+ ...(mergedTables.length > 0 ? { tables: mergedTables } : {}),
328
+ ...(mergedFormulas.length > 0 ? { formulas: mergedFormulas } : {}),
329
+ ...(mergedFigures.length > 0 ? { figures: mergedFigures } : {}),
224
330
  };
225
331
  await writeJson(artifactPath, artifact);
226
332
  return {
@@ -56,6 +56,9 @@ export interface LocalSemanticDocumentStructure {
56
56
  readonly pageIndexArtifactPath: string;
57
57
  readonly artifactPath: string;
58
58
  readonly root: LocalSemanticStructureNode;
59
+ readonly tables?: ReadonlyArray<MergedTableItem>;
60
+ readonly formulas?: ReadonlyArray<MergedFormulaItem>;
61
+ readonly figures?: ReadonlyArray<MergedFigureItem>;
59
62
  readonly cacheStatus: "fresh" | "reused";
60
63
  }
61
64
  export interface LocalPageContent {
@@ -127,6 +130,79 @@ export interface LocalPageFormulasArtifact {
127
130
  readonly formulas: ReadonlyArray<LocalFormulaArtifactItem>;
128
131
  readonly cacheStatus: "fresh" | "reused";
129
132
  }
133
+ export interface LocalFigureArtifactItem {
134
+ readonly id: string;
135
+ readonly figureType: "schematic" | "chart" | "photo" | "diagram" | "other";
136
+ readonly caption?: string;
137
+ readonly description?: string;
138
+ readonly truncatedTop?: boolean;
139
+ readonly truncatedBottom?: boolean;
140
+ }
141
+ export interface LocalPageUnderstandingTableItem {
142
+ readonly id: string;
143
+ readonly latexTabular: string;
144
+ readonly caption?: string;
145
+ readonly truncatedTop?: boolean;
146
+ readonly truncatedBottom?: boolean;
147
+ }
148
+ export interface LocalPageUnderstandingFormulaItem {
149
+ readonly id: string;
150
+ readonly latexMath: string;
151
+ readonly label?: string;
152
+ readonly truncatedTop?: boolean;
153
+ readonly truncatedBottom?: boolean;
154
+ }
155
+ export interface LocalPageUnderstandingArtifact {
156
+ readonly documentId: string;
157
+ readonly pageNumber: number;
158
+ readonly renderScale: number;
159
+ readonly sourceSizeBytes: number;
160
+ readonly sourceMtimeMs: number;
161
+ readonly provider: string;
162
+ readonly model: string;
163
+ readonly prompt: string;
164
+ readonly imagePath: string;
165
+ readonly pageArtifactPath: string;
166
+ readonly renderArtifactPath: string;
167
+ readonly artifactPath: string;
168
+ readonly generatedAt: string;
169
+ readonly tables: ReadonlyArray<LocalPageUnderstandingTableItem>;
170
+ readonly formulas: ReadonlyArray<LocalPageUnderstandingFormulaItem>;
171
+ readonly figures: ReadonlyArray<LocalFigureArtifactItem>;
172
+ readonly cacheStatus: "fresh" | "reused";
173
+ }
174
+ export interface LocalPageUnderstandingRequest extends LocalPageRenderRequest {
175
+ readonly provider?: string;
176
+ readonly model?: string;
177
+ readonly prompt?: string;
178
+ readonly env?: Env;
179
+ readonly providerApiKeys?: Record<string, string>;
180
+ }
181
+ export interface MergedTableItem {
182
+ readonly id: string;
183
+ readonly latexTabular: string;
184
+ readonly caption?: string;
185
+ readonly startPage: number;
186
+ readonly endPage: number;
187
+ readonly crossPageHint?: boolean;
188
+ }
189
+ export interface MergedFormulaItem {
190
+ readonly id: string;
191
+ readonly latexMath: string;
192
+ readonly label?: string;
193
+ readonly startPage: number;
194
+ readonly endPage: number;
195
+ readonly crossPageHint?: boolean;
196
+ }
197
+ export interface MergedFigureItem {
198
+ readonly id: string;
199
+ readonly figureType: "schematic" | "chart" | "photo" | "diagram" | "other";
200
+ readonly caption?: string;
201
+ readonly description?: string;
202
+ readonly startPage: number;
203
+ readonly endPage: number;
204
+ readonly crossPageHint?: boolean;
205
+ }
130
206
  export interface LocalDocumentRequest {
131
207
  readonly pdfPath: string;
132
208
  readonly workspaceDir?: string;
@@ -0,0 +1,5 @@
1
+ import type { LocalFigureArtifactItem, LocalPageUnderstandingArtifact, LocalPageUnderstandingFormulaItem, LocalPageUnderstandingRequest, LocalPageUnderstandingTableItem } from "./types.js";
2
+ export declare const normalizeFigureItems: (value: unknown) => LocalFigureArtifactItem[];
3
+ export declare const normalizeUnderstandingTables: (value: unknown) => LocalPageUnderstandingTableItem[];
4
+ export declare const normalizeUnderstandingFormulas: (value: unknown) => LocalPageUnderstandingFormulaItem[];
5
+ export declare const get_page_understanding: (request: LocalPageUnderstandingRequest) => Promise<LocalPageUnderstandingArtifact>;
@@ -0,0 +1,136 @@
1
+ /// <reference path="../node/compat.d.ts" />
2
+ import { readFile } from "node:fs/promises";
3
+ import path from "node:path";
4
+ import { toDataUrl } from "../file-utils.js";
5
+ import { visionRecognize } from "../provider-client.js";
6
+ import { ensureRenderArtifact, indexDocumentInternal } from "./document.js";
7
+ import { buildStructuredArtifactPath, ensurePageNumber, fileExists, matchesSourceSnapshot, pageLabel, parseJsonObject, readJson, resolveAgentSelection, resolveConfig, resolveEnv, resolveRenderScale, stripCodeFences, writeJson, } from "./shared.js";
8
+ const DEFAULT_UNDERSTANDING_PROMPT = [
9
+ "Analyze this rendered PDF page image. Extract all tables, displayed formulas, and figures.",
10
+ "Return JSON only. Schema:",
11
+ "{",
12
+ ' "tables": [{ "latexTabular": "\\\\begin{tabular}...\\\\end{tabular}", "caption": "optional", "truncatedTop": false, "truncatedBottom": false }],',
13
+ ' "formulas": [{ "latexMath": "LaTeX expression", "label": "optional", "truncatedTop": false, "truncatedBottom": false }],',
14
+ ' "figures": [{ "figureType": "schematic|chart|photo|diagram|other", "caption": "optional", "description": "brief visual description", "truncatedTop": false, "truncatedBottom": false }]',
15
+ "}",
16
+ "Rules:",
17
+ "- Tables must be complete LaTeX tabular environments.",
18
+ "- Formulas must use LaTeX math notation. Skip trivial inline math or single symbols.",
19
+ "- Figures should be described by type, caption, and a brief visual description. Do not crop or encode images.",
20
+ "- Set truncatedTop/truncatedBottom to true if the element appears cut off at the page boundary.",
21
+ '- If nothing is found for a category, return an empty array for that key.',
22
+ ].join("\n");
23
+ export const normalizeFigureItems = (value) => {
24
+ if (!Array.isArray(value))
25
+ return [];
26
+ return value.flatMap((item, index) => {
27
+ const figure = item;
28
+ const figureType = typeof figure.figureType === "string" ? figure.figureType.trim() : "other";
29
+ const validTypes = new Set(["schematic", "chart", "photo", "diagram", "other"]);
30
+ return [{
31
+ id: `figure-${index + 1}`,
32
+ figureType: validTypes.has(figureType) ? figureType : "other",
33
+ caption: typeof figure.caption === "string" ? figure.caption.trim() : undefined,
34
+ description: typeof figure.description === "string" ? figure.description.trim() : undefined,
35
+ truncatedTop: figure.truncatedTop === true,
36
+ truncatedBottom: figure.truncatedBottom === true,
37
+ }];
38
+ });
39
+ };
40
+ export const normalizeUnderstandingTables = (value) => {
41
+ if (!Array.isArray(value))
42
+ return [];
43
+ return value.flatMap((item, index) => {
44
+ const table = item;
45
+ const latexTabular = typeof table.latexTabular === "string" ? stripCodeFences(table.latexTabular).trim() : "";
46
+ if (!latexTabular.includes("\\begin{tabular}") || !latexTabular.includes("\\end{tabular}"))
47
+ return [];
48
+ return [{
49
+ id: `table-${index + 1}`,
50
+ latexTabular,
51
+ caption: typeof table.caption === "string" ? table.caption.trim() : undefined,
52
+ truncatedTop: table.truncatedTop === true,
53
+ truncatedBottom: table.truncatedBottom === true,
54
+ }];
55
+ });
56
+ };
57
+ export const normalizeUnderstandingFormulas = (value) => {
58
+ if (!Array.isArray(value))
59
+ return [];
60
+ return value.flatMap((item, index) => {
61
+ const formula = item;
62
+ const latexMath = typeof formula.latexMath === "string" ? stripCodeFences(formula.latexMath).trim() : "";
63
+ if (!latexMath)
64
+ return [];
65
+ return [{
66
+ id: `formula-${index + 1}`,
67
+ latexMath,
68
+ label: typeof formula.label === "string" ? formula.label.trim() : undefined,
69
+ truncatedTop: formula.truncatedTop === true,
70
+ truncatedBottom: formula.truncatedBottom === true,
71
+ }];
72
+ });
73
+ };
74
+ export const get_page_understanding = async (request) => {
75
+ const env = resolveEnv(request.env);
76
+ const config = resolveConfig(request.config, env);
77
+ const { record } = await indexDocumentInternal(request);
78
+ ensurePageNumber(record.pageCount, request.pageNumber);
79
+ const { provider, model } = resolveAgentSelection(config, request);
80
+ const renderScale = resolveRenderScale(config, request.renderScale);
81
+ const prompt = typeof request.prompt === "string" && request.prompt.trim().length > 0
82
+ ? request.prompt.trim()
83
+ : DEFAULT_UNDERSTANDING_PROMPT;
84
+ const understandingDir = path.join(record.artifactPaths.documentDir, "understanding");
85
+ const artifactPath = buildStructuredArtifactPath(understandingDir, request.pageNumber, renderScale, provider, model, prompt);
86
+ if (!request.forceRefresh && await fileExists(artifactPath)) {
87
+ const cached = await readJson(artifactPath);
88
+ if (matchesSourceSnapshot(cached, record)) {
89
+ return { ...cached, cacheStatus: "reused" };
90
+ }
91
+ }
92
+ const renderArtifact = await ensureRenderArtifact({
93
+ pdfPath: request.pdfPath,
94
+ workspaceDir: request.workspaceDir,
95
+ forceRefresh: request.forceRefresh,
96
+ config,
97
+ pageNumber: request.pageNumber,
98
+ renderScale: request.renderScale,
99
+ });
100
+ const imageBytes = new Uint8Array(await readFile(renderArtifact.imagePath));
101
+ const imageDataUrl = toDataUrl(imageBytes, renderArtifact.mimeType);
102
+ const response = await visionRecognize({
103
+ config,
104
+ env,
105
+ providerAlias: provider,
106
+ model,
107
+ prompt,
108
+ imageDataUrl,
109
+ runtimeApiKeys: request.providerApiKeys,
110
+ });
111
+ const parsed = parseJsonObject(response);
112
+ const tables = normalizeUnderstandingTables(parsed?.tables);
113
+ const formulas = normalizeUnderstandingFormulas(parsed?.formulas);
114
+ const figures = normalizeFigureItems(parsed?.figures);
115
+ const pageArtifactPath = path.join(record.artifactPaths.pagesDir, `${pageLabel(request.pageNumber)}.json`);
116
+ const artifact = {
117
+ documentId: record.documentId,
118
+ pageNumber: request.pageNumber,
119
+ renderScale,
120
+ sourceSizeBytes: record.sizeBytes,
121
+ sourceMtimeMs: record.mtimeMs,
122
+ provider,
123
+ model,
124
+ prompt,
125
+ imagePath: renderArtifact.imagePath,
126
+ pageArtifactPath,
127
+ renderArtifactPath: renderArtifact.artifactPath,
128
+ artifactPath,
129
+ generatedAt: new Date().toISOString(),
130
+ tables,
131
+ formulas,
132
+ figures,
133
+ };
134
+ await writeJson(artifactPath, artifact);
135
+ return { ...artifact, cacheStatus: "fresh" };
136
+ };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@echofiles/echo-pdf",
3
3
  "description": "Local-first PDF document component core with CLI, workspace artifacts, and reusable page primitives.",
4
- "version": "0.8.0",
4
+ "version": "0.10.0",
5
5
  "type": "module",
6
6
  "homepage": "https://pdf.echofile.ai/",
7
7
  "repository": {