@echofiles/echo-pdf 0.11.2 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -187,12 +187,12 @@ Published docs site:
187
187
  ## Development
188
188
 
189
189
  ```bash
190
- npm ci
191
- npm run build
192
- npm run typecheck
193
- npm run test:unit
194
- npm run test:acceptance
195
- npm run test:integration
190
+ bun install --frozen-lockfile
191
+ bun run build
192
+ bun run typecheck
193
+ bun run test:unit
194
+ bun run test:acceptance
195
+ bun run test:integration
196
196
  ```
197
197
 
198
198
  For source-checkout CLI development and repo-local workflows, see [docs/DEVELOPMENT.md](./docs/DEVELOPMENT.md).
package/bin/echo-pdf.js CHANGED
@@ -203,13 +203,13 @@ const loadLocalDocumentApi = async () => {
203
203
  }
204
204
  throw new Error(
205
205
  "Internal source-checkout CLI dev mode requires Bun and src/local/index.ts. " +
206
- "Use `npm run cli:dev -- <primitive> ...` only from a source checkout."
206
+ "Use `bun run cli:dev -- <primitive> ...` only from a source checkout."
207
207
  )
208
208
  }
209
209
  if (!fs.existsSync(LOCAL_DOCUMENT_DIST_PATH)) {
210
210
  throw new Error(
211
211
  "Local primitive commands require built artifacts in a source checkout. " +
212
- "Run `npm run build` first, use the internal `npm run cli:dev -- <primitive> ...` path in this repo, or install the published package."
212
+ "Run `bun run build` first, use the internal `bun run cli:dev -- <primitive> ...` path in this repo, or install the published package."
213
213
  )
214
214
  }
215
215
  return import(LOCAL_DOCUMENT_DIST_ENTRY.href)
@@ -5,7 +5,7 @@ import { resolveModelForProvider, resolveProviderAlias } from "../provider-defau
5
5
  import { toDataUrl } from "../file-utils.js";
6
6
  import { generateText, visionRecognize } from "../provider-client.js";
7
7
  import { ensureRenderArtifact, indexDocumentInternal } from "./document.js";
8
- import { fileExists, matchesSourceSnapshot, matchesStrategyKey, pageLabel, parseJsonObject, readJson, resolveConfig, resolveEnv, writeJson, } from "./shared.js";
8
+ import { fileExists, matchesSourceSnapshot, matchesStrategyKey, pageLabel, parseJsonObject, parseJsonObjectWithRepair, readJson, resolveConfig, resolveEnv, writeJson, } from "./shared.js";
9
9
  import { normalizeFigureItems, normalizeUnderstandingFormulas, normalizeUnderstandingTables } from "./understanding.js";
10
10
  const resolveSemanticExtractionBudget = (input) => ({
11
11
  pageSelection: "all",
@@ -134,6 +134,15 @@ const resolveSemanticAgentContext = (config, request) => {
134
134
  }
135
135
  return { provider, model };
136
136
  };
137
+ class SemanticAggregationModelOutputError extends Error {
138
+ detail;
139
+ code = "SEMANTIC_AGGREGATION_INVALID_JSON";
140
+ constructor(message, detail) {
141
+ super(message);
142
+ this.detail = detail;
143
+ this.name = "SemanticAggregationModelOutputError";
144
+ }
145
+ }
137
146
  const extractCombinedPageData = async (input) => {
138
147
  const renderArtifact = await ensureRenderArtifact({
139
148
  pdfPath: input.request.pdfPath,
@@ -170,6 +179,55 @@ const extractCombinedPageData = async (input) => {
170
179
  },
171
180
  };
172
181
  };
182
+ const buildSemanticAggregationRetryPrompt = (record, candidates) => {
183
+ return [
184
+ buildSemanticAggregationPrompt(record, candidates),
185
+ "",
186
+ "Your previous response was not strict JSON.",
187
+ "Return the same semantic structure again, but this time produce strict RFC 8259 JSON only.",
188
+ "Do not wrap in markdown fences.",
189
+ "Do not use invalid backslash escapes such as \\(, \\), \\_, or \\- inside JSON strings.",
190
+ ].join("\n");
191
+ };
192
+ const parseSemanticAggregationResponse = async (input) => {
193
+ try {
194
+ const parsed = parseJsonObjectWithRepair(input.aggregated);
195
+ return {
196
+ sections: parsed.parsed?.sections,
197
+ repaired: parsed.repaired,
198
+ retried: false,
199
+ };
200
+ }
201
+ catch (firstError) {
202
+ const causeMessage = firstError instanceof Error ? firstError.message : String(firstError);
203
+ const retried = await generateText({
204
+ config: input.config,
205
+ env: input.env,
206
+ providerAlias: input.provider,
207
+ model: input.model,
208
+ prompt: buildSemanticAggregationRetryPrompt(input.record, input.candidates),
209
+ runtimeApiKeys: input.runtimeApiKeys,
210
+ });
211
+ try {
212
+ const parsed = parseJsonObjectWithRepair(retried);
213
+ return {
214
+ sections: parsed.parsed?.sections,
215
+ repaired: parsed.repaired,
216
+ retried: true,
217
+ };
218
+ }
219
+ catch (retryError) {
220
+ const retryCauseMessage = retryError instanceof Error ? retryError.message : String(retryError);
221
+ throw new SemanticAggregationModelOutputError("semantic aggregation returned invalid JSON after repair and retry", {
222
+ provider: input.provider,
223
+ model: input.model,
224
+ repaired: false,
225
+ retried: true,
226
+ causeMessage: `${causeMessage}; retry=${retryCauseMessage}`,
227
+ });
228
+ }
229
+ }
230
+ };
173
231
  const mergeCrossPageTables = (understandings) => {
174
232
  const merged = [];
175
233
  let nextId = 1;
@@ -304,8 +362,17 @@ const ensureSemanticStructureArtifact = async (request) => {
304
362
  prompt: buildSemanticAggregationPrompt(record, [...candidateMap.values()]),
305
363
  runtimeApiKeys: request.providerApiKeys,
306
364
  });
307
- const parsed = parseJsonObject(aggregated);
308
- const sections = toSemanticTree(parsed?.sections, pageArtifactPaths);
365
+ const parsed = await parseSemanticAggregationResponse({
366
+ aggregated,
367
+ record,
368
+ candidates: [...candidateMap.values()],
369
+ config,
370
+ env,
371
+ provider,
372
+ model,
373
+ runtimeApiKeys: request.providerApiKeys,
374
+ });
375
+ const sections = toSemanticTree(parsed.sections, pageArtifactPaths);
309
376
  const mergedTables = mergeCrossPageTables(pageElements);
310
377
  const mergedFormulas = mergeCrossPageFormulas(pageElements);
311
378
  const mergedFigures = mergeCrossPageFigures(pageElements);
@@ -19,6 +19,10 @@ export declare const createPreview: (text: string) => string;
19
19
  export declare const createPageTitle: (pageNumber: number, text: string) => string;
20
20
  export declare const stripCodeFences: (value: string) => string;
21
21
  export declare const parseJsonObject: (value: string) => unknown;
22
+ export declare const parseJsonObjectWithRepair: (value: string) => {
23
+ parsed: unknown;
24
+ repaired: boolean;
25
+ };
22
26
  export declare const normalizeTableItems: (value: unknown) => LocalTableArtifactItem[];
23
27
  export declare const normalizeFormulaItems: (value: unknown) => LocalFormulaArtifactItem[];
24
28
  export declare const resolveEnv: (env?: Env) => Env;
@@ -77,6 +77,96 @@ export const parseJsonObject = (value) => {
77
77
  throw new Error("model output was not valid JSON");
78
78
  }
79
79
  };
80
+ const validJsonEscape = (value) => /["\\/bfnrt]/.test(value);
81
+ const repairInvalidJsonEscapes = (value) => {
82
+ let repaired = false;
83
+ let inString = false;
84
+ let escaping = false;
85
+ let unicodeDigitsRemaining = 0;
86
+ let output = "";
87
+ for (let index = 0; index < value.length; index += 1) {
88
+ const char = value[index] ?? "";
89
+ if (!inString) {
90
+ output += char;
91
+ if (char === "\"")
92
+ inString = true;
93
+ continue;
94
+ }
95
+ if (unicodeDigitsRemaining > 0) {
96
+ output += char;
97
+ if (/^[0-9a-fA-F]$/.test(char)) {
98
+ unicodeDigitsRemaining -= 1;
99
+ }
100
+ else {
101
+ repaired = true;
102
+ unicodeDigitsRemaining = 0;
103
+ }
104
+ continue;
105
+ }
106
+ if (escaping) {
107
+ if (validJsonEscape(char)) {
108
+ output += char;
109
+ }
110
+ else if (char === "u") {
111
+ output += char;
112
+ unicodeDigitsRemaining = 4;
113
+ }
114
+ else {
115
+ output += `\\${char}`;
116
+ repaired = true;
117
+ }
118
+ escaping = false;
119
+ continue;
120
+ }
121
+ if (char === "\\") {
122
+ output += char;
123
+ escaping = true;
124
+ continue;
125
+ }
126
+ output += char;
127
+ if (char === "\"")
128
+ inString = false;
129
+ }
130
+ if (escaping) {
131
+ output += "\\";
132
+ repaired = true;
133
+ }
134
+ return { repairedText: output, repaired };
135
+ };
136
+ export const parseJsonObjectWithRepair = (value) => {
137
+ const trimmed = stripCodeFences(value).trim();
138
+ if (!trimmed)
139
+ return { parsed: null, repaired: false };
140
+ const candidates = [trimmed];
141
+ const start = trimmed.indexOf("{");
142
+ const end = trimmed.lastIndexOf("}");
143
+ if (start >= 0 && end > start) {
144
+ const sliced = trimmed.slice(start, end + 1);
145
+ if (sliced !== trimmed)
146
+ candidates.push(sliced);
147
+ }
148
+ let lastError = null;
149
+ for (const candidate of candidates) {
150
+ try {
151
+ return { parsed: JSON.parse(candidate), repaired: false };
152
+ }
153
+ catch (error) {
154
+ lastError = error instanceof Error ? error : new Error(String(error));
155
+ }
156
+ const repairedCandidate = repairInvalidJsonEscapes(candidate);
157
+ if (!repairedCandidate.repaired)
158
+ continue;
159
+ try {
160
+ return { parsed: JSON.parse(repairedCandidate.repairedText), repaired: true };
161
+ }
162
+ catch (error) {
163
+ lastError = error instanceof Error ? error : new Error(String(error));
164
+ }
165
+ }
166
+ if (lastError)
167
+ throw lastError;
168
+ throw new Error("model output was not valid JSON");
169
+ };
80
170
  export const normalizeTableItems = (value) => {
81
171
  if (!Array.isArray(value))
82
172
  return [];
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@echofiles/echo-pdf",
3
3
  "description": "Local-first PDF document component core with CLI, workspace artifacts, and reusable page primitives.",
4
- "version": "0.11.2",
4
+ "version": "0.11.4",
5
5
  "type": "module",
6
6
  "homepage": "https://pdf.echofile.ai/",
7
7
  "repository": {
@@ -18,6 +18,7 @@
18
18
  "cli",
19
19
  "vision-language"
20
20
  ],
21
+ "packageManager": "bun@1.3.5",
21
22
  "publishConfig": {
22
23
  "access": "public"
23
24
  },
@@ -53,14 +54,14 @@
53
54
  "eval:stress": "node ./eval/run-local.mjs --suite stress",
54
55
  "eval:known-bad": "node ./eval/run-local.mjs --suite known-bad",
55
56
  "eval:fetch-public-samples": "node ./eval/fetch-public-samples.mjs",
56
- "typecheck": "npm run check:runtime && tsc --noEmit",
57
- "test:unit": "npm run check:runtime && vitest run tests/unit",
58
- "test:acceptance": "npm run check:runtime && npm run build && vitest run tests/acceptance",
59
- "test:import-smoke": "npm run check:runtime && npm run build && vitest run tests/integration/npm-pack-import.integration.test.ts tests/integration/ts-nodenext-consumer.integration.test.ts",
60
- "test:integration": "npm run check:runtime && npm run build && vitest run tests/integration/local-document-cli.integration.test.ts tests/integration/local-document.integration.test.ts tests/integration/local-provider-stability.integration.test.ts tests/integration/local-semantic-structure.integration.test.ts tests/integration/npm-pack-import.integration.test.ts tests/integration/ts-nodenext-consumer.integration.test.ts",
61
- "test": "npm run test:unit && npm run test:acceptance && npm run test:integration",
57
+ "typecheck": "bun run check:runtime && tsc --noEmit",
58
+ "test:unit": "bun run check:runtime && vitest run tests/unit",
59
+ "test:acceptance": "bun run check:runtime && bun run build && vitest run tests/acceptance",
60
+ "test:import-smoke": "bun run check:runtime && bun run build && vitest run tests/integration/npm-pack-import.integration.test.ts tests/integration/ts-nodenext-consumer.integration.test.ts",
61
+ "test:integration": "bun run check:runtime && bun run build && vitest run tests/integration/local-document-cli.integration.test.ts tests/integration/local-document.integration.test.ts tests/integration/local-provider-stability.integration.test.ts tests/integration/local-semantic-structure.integration.test.ts tests/integration/npm-pack-import.integration.test.ts tests/integration/ts-nodenext-consumer.integration.test.ts",
62
+ "test": "bun run test:unit && bun run test:acceptance && bun run test:integration",
62
63
  "smoke": "bash ./scripts/smoke.sh",
63
- "prepublishOnly": "npm run build && npm run typecheck && npm run test"
64
+ "prepublishOnly": "bun run build && bun run typecheck && bun run test"
64
65
  },
65
66
  "engines": {
66
67
  "node": ">=20.0.0"
@@ -9,7 +9,7 @@ if [[ -z "${current_node_major}" ]] || (( current_node_major < required_node_maj
9
9
  exit 1
10
10
  fi
11
11
 
12
- for cmd in npm curl grep sed; do
12
+ for cmd in bun curl grep sed; do
13
13
  if ! command -v "${cmd}" >/dev/null 2>&1; then
14
14
  echo "Missing required command: ${cmd}"
15
15
  exit 1
@@ -23,4 +23,4 @@ if [[ "${CHECK_LLM_KEYS:-0}" == "1" ]]; then
23
23
  fi
24
24
  fi
25
25
 
26
- echo "runtime check passed: node=$(node -v), npm=$(npm -v)"
26
+ echo "runtime check passed: node=$(node -v), bun=$(bun -v)"
package/scripts/smoke.sh CHANGED
@@ -11,4 +11,4 @@ bash "${SCRIPT_DIR}/check-runtime.sh"
11
11
  # - SMOKE_LLM_PROVIDER
12
12
  # - SMOKE_LLM_MODEL
13
13
  # - TESTCASE_DIR
14
- npm run test:integration
14
+ bun run test:integration