@dragon708/docmind-node 1.1.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,13 +1,25 @@
1
- import { DetectFileKindInput, NamedInput, AnalysisResult } from '@dragon708/docmind-shared';
2
- export { AnalysisAnalyzer, AnalysisResult, DetectFileKindInput, DocxAnalysisCoreResult, FileKind, FileKindMetadata, GenericAnalysisResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult, detectFileKind } from '@dragon708/docmind-shared';
1
+ import { DocMindAnalyzeOptions, DetectFileKindInput, NamedInput, AnalysisResult, FileKind, RuntimeDescriptor, DocMindPublicIntent, AnalysisAnalyzer, ProcessingPlanDescriptor, ExplainAnalysisPlanOptions, GetCapabilitiesOptions } from '@dragon708/docmind-shared';
2
+ export { AnalysisAnalyzer, AnalysisResult, CapabilityDescriptor, DetectFileKindInput, DocMindPublicIntent, DocxAnalysisCoreResult, ExplainAnalysisPlanOptions, ExplainAnalysisPlanResult, FileKind, FileKindMetadata, GenericAnalysisResult, GetCapabilitiesOptions, GetCapabilitiesResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult, detectFileKind } from '@dragon708/docmind-shared';
3
3
  import { OcrOptions } from '@dragon708/docmind-ocr';
4
4
  import { PdfAnalyzeOptions } from '@dragon708/docmind-pdf';
5
5
 
6
- /** Options for {@link analyzeFile} in the Node entry (`pdf` / `ocr` forwarded to format packages). */
7
- interface NodeAnalyzeOptions {
8
- readonly signal?: AbortSignal;
6
+ /**
7
+ * Options for Node public APIs (`analyzeFile`, intent methods).
8
+ *
9
+ * - **`pdf`**: forwarded to `@dragon708/docmind-pdf`. `analyzeFile` defaults `pdf.ocr` to `"auto"` when omitted.
10
+ * {@link extractText} / {@link convertToHtml} merge a default of `ocr: "off"` unless you set `pdf.ocr` explicitly.
11
+ * - **`ocr`**: forwarded to `@dragon708/docmind-ocr` for raster images; language string also feeds PDF OCR when `pdf.ocrLangs` is unset.
12
+ * - **`pdfNativeTextSource`**: when `pdf.ocr` resolves to `"off"`, chooses how native text is obtained (see {@link extractText} default).
13
+ */
14
+ interface NodeAnalyzeOptions extends DocMindAnalyzeOptions {
9
15
  readonly pdf?: PdfAnalyzeOptions;
10
16
  readonly ocr?: OcrOptions;
17
+ /**
18
+ * Native PDF text when `pdf.ocr` is `"off"`:
19
+ * - **`pdfjs-per-page`** (default for {@link extractText}): PDF.js text per page, merged for `text` (aligns with OCR raster engine).
20
+ * - **`pdf-parse`**: single pdf-parse pass (default for {@link analyzeFile} when you set `pdf.ocr: "off"` without this flag).
21
+ */
22
+ readonly pdfNativeTextSource?: "pdf-parse" | "pdfjs-per-page";
11
23
  }
12
24
 
13
25
  /**
@@ -27,29 +39,93 @@ declare function bufferToInput(buffer: Buffer, name?: string): NamedInput<Buffer
27
39
  declare function resolveNodeAnalyzeInput(input: NodeAnalyzeInput): Promise<DetectFileKindInput>;
28
40
 
29
41
  /**
30
- * Node router: PDF, DOCX, images (OCR), and text. Paths and `file:` URLs are read via `fs`.
42
+ * Resolves {@link NodeAnalyzeInput} (paths read from disk), classifies with {@link detectFileKind}, then runs
43
+ * the PDF, DOCX, image, or text pipeline. PDF OCR defaults to `"auto"` when `options.pdf.ocr` is omitted.
31
44
  */
32
45
  declare function analyzeFile(input: NodeAnalyzeInput, options?: NodeAnalyzeOptions): Promise<AnalysisResult>;
33
46
 
34
47
  /**
35
- * Text only: PDF `extractTextFromPdf` (capa de texto, sin OCR); DOCX → `extractTextFromDocx`;
36
- * imagen `ocr`; texto `analyzeText`.
48
+ * Plain-text extraction using {@link analyzeFile} routing. PDFs default to **native text only**
49
+ * (`pdf.ocr: "off"`) unless you set `options.pdf.ocr` explicitly, and to **PDF.js per-page** assembly
50
+ * (`pdfNativeTextSource: "pdfjs-per-page"`) unless you set `options.pdfNativeTextSource` or `pdf.ocr` enables OCR.
37
51
  */
38
52
  declare function extractText(input: NodeAnalyzeInput, options?: NodeAnalyzeOptions): Promise<AnalysisResult>;
39
53
  /**
40
- * Metadatos: PDF `extractPdfMetadata`; resto con mejor esfuerzo o aviso.
41
- * El resultado sigue siendo `AnalysisResult` (PDF rellena `metadata` en forma `PdfAnalysisCoreResult`).
54
+ * Metadata: PDF uses lightweight metadata extraction; DOCX/images return stubs; plain text uses the same
55
+ * router as {@link extractText} (`analyzeFile` with PDF OCR off by default).
42
56
  */
43
57
  declare function extractMetadata(input: NodeAnalyzeInput, options?: NodeAnalyzeOptions): Promise<AnalysisResult>;
44
58
  /**
45
- * HTML: DOCX `convertDocxToHtml`; PDF/texto `<pre>` a partir de texto extraído;
46
- * imágenes vacío con aviso.
59
+ * HTML: DOCX and plain text go through {@link analyzeFile} (then `<pre>` for text). PDF uses the text layer
60
+ * only wrapped in `<pre>` (no OCR). Images return a stub without running OCR.
47
61
  */
48
62
  declare function convertToHtml(input: NodeAnalyzeInput, options?: NodeAnalyzeOptions): Promise<AnalysisResult>;
49
63
  /**
50
- * OCR: PDF `analyzePdf` con `ocr: "force"`; imagen `ocr`; DOCX → texto estructurado con aviso
51
- * (sin OCR); texto `analyzeText`.
64
+ * OCR intent: PDF always runs {@link analyzePdf} with `ocr: "force"` (merged with `options.pdf`).
65
+ * Raster images run Tesseract via `options.ocr`. DOCX returns structured extract with a notice.
52
66
  */
53
67
  declare function runOcr(input: NodeAnalyzeInput, options?: NodeAnalyzeOptions): Promise<AnalysisResult>;
54
68
 
55
- export { type NodeAnalyzeInput, type NodeAnalyzeOptions, analyzeFile, bufferToInput, convertToHtml, extractMetadata, extractText, readFileToInput, resolveNodeAnalyzeInput, runOcr };
69
+ /**
70
+ * Whether DocMind will try a non-OCR text/HTML path (e.g. Mammoth, pdf-parse text layer, UTF-8).
71
+ */
72
+ interface NativeExtractionPlan {
73
+ readonly willAttempt: boolean;
74
+ readonly description: string;
75
+ }
76
+ /** Whether OCR (raster or PDF pipeline) may run for this intent + kind. */
77
+ interface OcrPlan {
78
+ readonly mayUse: boolean;
79
+ readonly description: string;
80
+ }
81
+ /**
82
+ * Structured explanation of what DocMind would do for a public intent in Node (no heavy I/O).
83
+ */
84
+ interface ExplainAnalysisPlanReport {
85
+ readonly kind: FileKind;
86
+ readonly detectedKind: FileKind;
87
+ readonly runtime: RuntimeDescriptor;
88
+ readonly intent: DocMindPublicIntent | (string & {});
89
+ readonly primaryAnalyzer: AnalysisAnalyzer;
90
+ readonly nativeExtraction: NativeExtractionPlan;
91
+ readonly ocr: OcrPlan;
92
+ readonly limitations: readonly string[];
93
+ readonly plan: ProcessingPlanDescriptor;
94
+ readonly warnings?: readonly string[];
95
+ }
96
+
97
+ /** High-level features the user can ask DocMind for (per input kind and runtime). */
98
+ type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages";
99
+ /** Whether a {@link PublicCapabilityId} applies to the detected file in this runtime. */
100
+ interface PublicCapabilitySupport {
101
+ readonly id: PublicCapabilityId;
102
+ readonly supported: boolean;
103
+ readonly warnings?: readonly string[];
104
+ }
105
+ /**
106
+ * Result of {@link getCapabilities}: detected kind, runtime id, per-feature support for this input, and optional global warnings.
107
+ */
108
+ interface GetCapabilitiesReport {
109
+ readonly kind: FileKind;
110
+ readonly runtime: RuntimeDescriptor;
111
+ readonly capabilities: readonly PublicCapabilitySupport[];
112
+ readonly warnings?: readonly string[];
113
+ }
114
+
115
+ /** Options for {@link explainAnalysisPlan} including PDF/OCR hints for accurate planning. */
116
+ type NodeExplainAnalysisPlanOptions = ExplainAnalysisPlanOptions & Pick<NodeAnalyzeOptions, "pdf" | "ocr">;
117
+
118
+ /**
119
+ * Epic 1 — **Capabilities:** after resolving {@link NodeAnalyzeInput}, lists which of
120
+ * `text` | `metadata` | `html` | `ocr` | `pages` apply for that kind in Node (for PDF, `text` / `metadata` /
121
+ * `pages` / `ocr` describe the v2 pdf-parse + PDF.js + OCR stack; see {@link buildNodeCapabilityReport}).
122
+ * Does not run Mammoth/Tesseract/PDF bodies beyond path resolution.
123
+ */
124
+ declare function getCapabilities(input: NodeAnalyzeInput, options?: GetCapabilitiesOptions): Promise<GetCapabilitiesReport>;
125
+ /**
126
+ * Epic 1 — **Plan preview:** same shape as browser; PDF branches include `pdf.ocr` from options (`off` | `auto` | `force`).
127
+ * No full document parse unless resolving a path reads the file.
128
+ */
129
+ declare function explainAnalysisPlan(input: NodeAnalyzeInput, options?: NodeExplainAnalysisPlanOptions): Promise<ExplainAnalysisPlanReport>;
130
+
131
+ export { type ExplainAnalysisPlanReport, type GetCapabilitiesReport, type NativeExtractionPlan, type NodeAnalyzeInput, type NodeAnalyzeOptions, type NodeExplainAnalysisPlanOptions, type OcrPlan, type PublicCapabilityId, type PublicCapabilitySupport, analyzeFile, bufferToInput, convertToHtml, explainAnalysisPlan, extractMetadata, extractText, getCapabilities, readFileToInput, resolveNodeAnalyzeInput, runOcr };
package/dist/index.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, isNamedInput, isBinaryInput, isBlob, isFile } from '@dragon708/docmind-shared';
2
2
  export { detectFileKind } from '@dragon708/docmind-shared';
3
- import { extractTextFromDocx, convertDocxToHtml, analyzeDocx } from '@dragon708/docmind-docx';
3
+ import { analyzeDocx } from '@dragon708/docmind-docx';
4
4
  import { ocr } from '@dragon708/docmind-ocr';
5
- import { extractTextFromPdf, extractPdfMetadata, analyzePdf } from '@dragon708/docmind-pdf';
5
+ import { extractPdfMetadata, extractTextFromPdf, analyzePdf, extractPdfTextByPage } from '@dragon708/docmind-pdf';
6
6
  import { readFile } from 'fs/promises';
7
7
  import { basename } from 'path';
8
8
  import { fileURLToPath } from 'url';
@@ -112,17 +112,45 @@ async function analyzePdfForNode(input, options) {
112
112
  signal: userPdf?.signal ?? options?.signal
113
113
  };
114
114
  const r = await analyzePdf(data, pdfOpts);
115
+ const usePdfJsPerPage = pdfOpts.ocr === "off" && (options?.pdfNativeTextSource ?? "pdf-parse") === "pdfjs-per-page";
116
+ if (!usePdfJsPerPage) {
117
+ return {
118
+ fileKind: "pdf",
119
+ analyzer: "pdf",
120
+ status: "ok",
121
+ kind: "pdf",
122
+ text: r.text,
123
+ pages: r.pages,
124
+ metadata: r.metadata,
125
+ warnings: [...r.warnings],
126
+ needsOCR: r.needsOCR,
127
+ ocrUsed: r.ocrUsed
128
+ };
129
+ }
130
+ let text = r.text;
131
+ const extra = [];
132
+ try {
133
+ const rows = await extractPdfTextByPage(data, {
134
+ maxPages: pdfOpts.maxPages,
135
+ signal: pdfOpts.signal
136
+ });
137
+ text = rows.map((row) => row.text).join("\n\n");
138
+ } catch (e) {
139
+ const msg = e instanceof Error ? e.message : String(e);
140
+ extra.push(`warning: PDF.js per-page text failed; using pdf-parse text: ${msg}`);
141
+ }
142
+ const needsOCR = r.pages > 0 && text.trim().length === 0;
115
143
  return {
116
144
  fileKind: "pdf",
117
145
  analyzer: "pdf",
118
146
  status: "ok",
119
147
  kind: "pdf",
120
- text: r.text,
148
+ text,
121
149
  pages: r.pages,
122
150
  metadata: r.metadata,
123
- warnings: [...r.warnings],
124
- needsOCR: r.needsOCR,
125
- ocrUsed: r.ocrUsed
151
+ warnings: [...r.warnings, ...extra],
152
+ needsOCR,
153
+ ocrUsed: false
126
154
  };
127
155
  }
128
156
  function toPathString(pathOrUrl) {
@@ -169,6 +197,19 @@ async function analyzeFile(input, options) {
169
197
  return notImplementedResult(fileKind, "none", [UNKNOWN_FORMAT_WARNING]);
170
198
  }
171
199
  }
200
+
201
+ // src/intentPdfOptions.ts
202
+ function withPdfOcrDefaultOff(options) {
203
+ return {
204
+ ...options,
205
+ pdf: {
206
+ ...options?.pdf,
207
+ ocr: options?.pdf?.ocr ?? "off"
208
+ }
209
+ };
210
+ }
211
+
212
+ // src/internal/abort.ts
172
213
  function throwIfAborted(signal) {
173
214
  if (signal?.aborted) {
174
215
  const err = new Error("The operation was aborted");
@@ -176,117 +217,40 @@ function throwIfAborted(signal) {
176
217
  throw err;
177
218
  }
178
219
  }
220
+
221
+ // src/publicActions.ts
222
+ var DOCX_METADATA_STUB = "Structured document metadata for DOCX is not exposed as a separate API; use extractText or analyzeFile.";
223
+ var IMAGE_METADATA_NOTE = "Raster images have no document metadata bundle in this API.";
224
+ var RUN_OCR_PDF_FORCE_SEMANTICS = 'runOcr: PDF pipeline ran with `ocr: "force"` so text may include raster OCR output even when a text layer exists.';
225
+ function escapeHtmlMinimal(s) {
226
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
227
+ }
179
228
  async function prepare(input) {
180
229
  const resolved = await resolveNodeAnalyzeInput(input);
181
230
  assertValidAnalyzeFileInput(resolved);
182
231
  return resolved;
183
232
  }
184
- function escapeHtmlMinimal(s) {
185
- return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
233
+ function toExtractTextResult(full) {
234
+ if (full.status !== "ok") return full;
235
+ if (full.fileKind === "docx") {
236
+ return { ...full, html: "" };
237
+ }
238
+ return full;
186
239
  }
187
- var DOCX_METADATA_STUB = "Structured document metadata for DOCX is not exposed as a separate API; use extractText or analyzeFile.";
188
- var IMAGE_METADATA_NOTE = "Raster images have no document metadata bundle in this API.";
189
240
  async function extractText(input, options) {
190
241
  throwIfAborted(options?.signal);
191
- const resolved = await prepare(input);
192
- const kind = detectFileKind(resolved);
193
- const signal = options?.signal;
194
- switch (kind) {
195
- case "pdf": {
196
- const data = await bytesFromDetectInput(resolved);
197
- if (data.byteLength === 0) {
198
- return {
199
- fileKind: "pdf",
200
- analyzer: "pdf",
201
- status: "ok",
202
- kind: "pdf",
203
- text: "",
204
- pages: 0,
205
- metadata: { info: {} },
206
- warnings: ["No document bytes were provided for analysis."],
207
- needsOCR: false,
208
- ocrUsed: false
209
- };
210
- }
211
- const r = await extractTextFromPdf(data);
212
- return {
213
- fileKind: "pdf",
214
- analyzer: "pdf",
215
- status: "ok",
216
- kind: "pdf",
217
- text: r.text,
218
- pages: r.pages,
219
- metadata: { info: {} },
220
- warnings: r.warnings,
221
- needsOCR: false,
222
- ocrUsed: false
223
- };
224
- }
225
- case "docx": {
226
- const data = await bytesFromDetectInput(resolved);
227
- if (data.byteLength === 0) {
228
- return {
229
- fileKind: "docx",
230
- analyzer: "docx",
231
- status: "ok",
232
- kind: "docx",
233
- text: "",
234
- html: "",
235
- warnings: ["No document bytes were provided for analysis."]
236
- };
237
- }
238
- const r = await extractTextFromDocx(data);
239
- return {
240
- fileKind: "docx",
241
- analyzer: "docx",
242
- status: "ok",
243
- kind: "docx",
244
- text: r.text,
245
- html: "",
246
- warnings: r.warnings
247
- };
248
- }
249
- case "image": {
250
- const data = await bytesFromDetectInput(resolved);
251
- if (data.byteLength === 0) {
252
- return {
253
- fileKind: "image",
254
- analyzer: "image",
255
- status: "ok",
256
- kind: "image",
257
- text: "",
258
- confidence: 0,
259
- ocrUsed: true,
260
- warnings: ["No image bytes were provided for analysis."]
261
- };
262
- }
263
- const ocrOpts = {
264
- ...options?.ocr ?? {},
265
- signal: options?.ocr?.signal ?? signal
266
- };
267
- const r = await ocr(data, ocrOpts);
268
- return {
269
- fileKind: "image",
270
- analyzer: "image",
271
- status: "ok",
272
- kind: "image",
273
- text: r.text,
274
- confidence: r.confidence,
275
- ocrUsed: r.ocrUsed,
276
- warnings: []
277
- };
278
- }
279
- case "text":
280
- return analyzeText(resolved, { signal });
281
- default:
282
- return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
283
- }
242
+ const merged = {
243
+ ...withPdfOcrDefaultOff(options),
244
+ pdfNativeTextSource: options?.pdfNativeTextSource ?? "pdfjs-per-page"
245
+ };
246
+ const full = await analyzeFile(input, merged);
247
+ return toExtractTextResult(full);
284
248
  }
285
249
  async function extractMetadata(input, options) {
286
250
  throwIfAborted(options?.signal);
287
251
  const resolved = await prepare(input);
288
252
  const kind = detectFileKind(resolved);
289
- const signal = options?.signal;
253
+ options?.signal;
290
254
  switch (kind) {
291
255
  case "pdf": {
292
256
  const data = await bytesFromDetectInput(resolved);
@@ -340,7 +304,7 @@ async function extractMetadata(input, options) {
340
304
  warnings: [IMAGE_METADATA_NOTE]
341
305
  };
342
306
  case "text":
343
- return analyzeText(resolved, { signal });
307
+ return analyzeFile(input, withPdfOcrDefaultOff(options));
344
308
  default:
345
309
  return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
346
310
  }
@@ -349,96 +313,72 @@ async function convertToHtml(input, options) {
349
313
  throwIfAborted(options?.signal);
350
314
  const resolved = await prepare(input);
351
315
  const kind = detectFileKind(resolved);
352
- const signal = options?.signal;
353
- switch (kind) {
354
- case "docx": {
355
- const data = await bytesFromDetectInput(resolved);
356
- if (data.byteLength === 0) {
357
- return {
358
- fileKind: "docx",
359
- analyzer: "docx",
360
- status: "ok",
361
- kind: "docx",
362
- text: "",
363
- html: "",
364
- warnings: ["No document bytes were provided for analysis."]
365
- };
366
- }
367
- const [textPart, htmlPart] = await Promise.all([
368
- extractTextFromDocx(data),
369
- convertDocxToHtml(data)
370
- ]);
371
- return {
372
- fileKind: "docx",
373
- analyzer: "docx",
374
- status: "ok",
375
- kind: "docx",
376
- text: textPart.text,
377
- html: htmlPart.html,
378
- warnings: [...textPart.warnings, ...htmlPart.warnings]
379
- };
380
- }
381
- case "pdf": {
382
- const data = await bytesFromDetectInput(resolved);
383
- if (data.byteLength === 0) {
384
- return {
385
- fileKind: "pdf",
386
- analyzer: "pdf",
387
- status: "ok",
388
- kind: "pdf",
389
- text: "",
390
- pages: 0,
391
- metadata: { info: {} },
392
- warnings: ["No document bytes were provided for analysis."],
393
- needsOCR: false,
394
- ocrUsed: false
395
- };
396
- }
397
- const r = await extractTextFromPdf(data);
398
- const html = `<pre>${escapeHtmlMinimal(r.text)}</pre>`;
316
+ options?.signal;
317
+ if (kind === "docx") {
318
+ return analyzeFile(input, withPdfOcrDefaultOff(options));
319
+ }
320
+ if (kind === "text") {
321
+ const r = await analyzeFile(input, withPdfOcrDefaultOff(options));
322
+ if (r.status !== "ok") return r;
323
+ if (r.fileKind !== "text") return r;
324
+ const html = `<pre>${escapeHtmlMinimal(r.text)}</pre>`;
325
+ return {
326
+ ...r,
327
+ html,
328
+ warnings: [
329
+ ...r.warnings,
330
+ "HTML for plain text is a <pre> wrapper around decoded UTF-8 content."
331
+ ]
332
+ };
333
+ }
334
+ if (kind === "pdf") {
335
+ const data = await bytesFromDetectInput(resolved);
336
+ if (data.byteLength === 0) {
399
337
  return {
400
338
  fileKind: "pdf",
401
339
  analyzer: "pdf",
402
340
  status: "ok",
403
341
  kind: "pdf",
404
- text: r.text,
405
- pages: r.pages,
342
+ text: "",
343
+ pages: 0,
406
344
  metadata: { info: {} },
407
- html,
408
- warnings: [
409
- ...r.warnings,
410
- "PDF HTML is a plain-text preview wrapped in <pre> (not a visual layout)."
411
- ],
345
+ warnings: ["No document bytes were provided for analysis."],
412
346
  needsOCR: false,
413
347
  ocrUsed: false
414
348
  };
415
349
  }
416
- case "text": {
417
- const t = await analyzeText(resolved, { signal });
418
- const html = `<pre>${escapeHtmlMinimal(t.text)}</pre>`;
419
- return {
420
- ...t,
421
- html,
422
- warnings: [
423
- ...t.warnings,
424
- "HTML for plain text is a <pre> wrapper around decoded UTF-8 content."
425
- ]
426
- };
427
- }
428
- case "image":
429
- return {
430
- fileKind: "image",
431
- analyzer: "image",
432
- status: "ok",
433
- kind: "image",
434
- text: "",
435
- confidence: 0,
436
- ocrUsed: true,
437
- warnings: ["No HTML representation for raster images; use extractText / runOcr."]
438
- };
439
- default:
440
- return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
350
+ const r = await extractTextFromPdf(data);
351
+ const html = `<pre>${escapeHtmlMinimal(r.text)}</pre>`;
352
+ return {
353
+ fileKind: "pdf",
354
+ analyzer: "pdf",
355
+ status: "ok",
356
+ kind: "pdf",
357
+ text: r.text,
358
+ pages: r.pages,
359
+ metadata: { info: {} },
360
+ html,
361
+ warnings: [
362
+ ...r.warnings,
363
+ "PDF HTML is a plain-text preview wrapped in <pre> (not a visual layout)."
364
+ ],
365
+ needsOCR: false,
366
+ ocrUsed: false
367
+ };
441
368
  }
369
+ if (kind === "image") {
370
+ return {
371
+ fileKind: "image",
372
+ analyzer: "image",
373
+ status: "ok",
374
+ kind: "image",
375
+ text: "",
376
+ confidence: 0,
377
+ ocrUsed: true,
378
+ warnings: ["No HTML representation for raster images; use extractText / runOcr."]
379
+ };
380
+ }
381
+ return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
442
382
  }
443
383
  async function runOcr(input, options) {
444
384
  throwIfAborted(options?.signal);
@@ -477,7 +417,7 @@ async function runOcr(input, options) {
477
417
  text: r.text,
478
418
  pages: r.pages,
479
419
  metadata: r.metadata,
480
- warnings: [...r.warnings],
420
+ warnings: [RUN_OCR_PDF_FORCE_SEMANTICS, ...r.warnings],
481
421
  needsOCR: r.needsOCR,
482
422
  ocrUsed: r.ocrUsed
483
423
  };
@@ -546,6 +486,458 @@ async function runOcr(input, options) {
546
486
  }
547
487
  }
548
488
 
549
- export { analyzeFile, bufferToInput, convertToHtml, extractMetadata, extractText, readFileToInput, resolveNodeAnalyzeInput, runOcr };
489
+ // src/analysisPlanReport.ts
490
+ function lim(...items) {
491
+ return items.filter(Boolean);
492
+ }
493
+ function buildNodeExplainReport(kind, intent, pdfOcr, plan) {
494
+ const runtime = { id: "node" };
495
+ const primaryAnalyzer = kind === "pdf" ? "pdf" : kind === "docx" ? "docx" : kind === "image" ? "image" : kind === "text" ? "text" : "none";
496
+ let nativeExtraction;
497
+ let ocr3;
498
+ let limitations = [];
499
+ if (kind === "unknown") {
500
+ limitations = lim(
501
+ "Could not classify the file from name, MIME, or bytes; analysis will return not_implemented until hints improve."
502
+ );
503
+ return {
504
+ kind,
505
+ detectedKind: kind,
506
+ runtime,
507
+ intent,
508
+ primaryAnalyzer: "none",
509
+ nativeExtraction: { willAttempt: false, description: "No analyzer without a known file kind." },
510
+ ocr: { mayUse: false, description: "OCR is not used for unknown kinds." },
511
+ limitations,
512
+ plan
513
+ };
514
+ }
515
+ switch (intent) {
516
+ case "analyzeFile":
517
+ if (kind === "pdf") {
518
+ nativeExtraction = {
519
+ willAttempt: true,
520
+ description: "pdf-parse supplies embedded text, metadata, and page count; PDF.js drives raster OCR when enabled."
521
+ };
522
+ ocr3 = {
523
+ mayUse: pdfOcr !== "off",
524
+ description: pdfOcr === "off" ? "Raster OCR pipeline is off (pdf.ocr: off)." : pdfOcr === "force" ? "Raster OCR may run on all pages when pdf.ocr is force." : "Raster OCR may run when native text looks insufficient (pdf.ocr: auto + heuristics)."
525
+ };
526
+ } else if (kind === "docx") {
527
+ nativeExtraction = {
528
+ willAttempt: true,
529
+ description: "Mammoth extracts text and HTML from OOXML."
530
+ };
531
+ ocr3 = { mayUse: false, description: "DOCX does not use OCR in DocMind." };
532
+ } else if (kind === "image") {
533
+ nativeExtraction = {
534
+ willAttempt: false,
535
+ description: "Images have no native text layer; text comes from OCR only."
536
+ };
537
+ ocr3 = { mayUse: true, description: "Tesseract runs on supported raster formats." };
538
+ } else {
539
+ nativeExtraction = {
540
+ willAttempt: true,
541
+ description: "UTF-8 decode with BOM handling for plain text."
542
+ };
543
+ ocr3 = { mayUse: false, description: "OCR does not apply to text files." };
544
+ }
545
+ break;
546
+ case "extractText":
547
+ if (kind === "pdf") {
548
+ nativeExtraction = {
549
+ willAttempt: true,
550
+ description: "Node: pdf-parse for metadata/page baseline, then PDF.js per-page text merged into `text` (pdfNativeTextSource pdfjs-per-page default)."
551
+ };
552
+ ocr3 = {
553
+ mayUse: false,
554
+ description: "extractText defaults pdf.ocr off; set pdf.ocr explicitly to allow auto/force raster OCR."
555
+ };
556
+ } else if (kind === "docx") {
557
+ nativeExtraction = {
558
+ willAttempt: true,
559
+ description: "Mammoth plain text; HTML cleared in the extractText response."
560
+ };
561
+ ocr3 = { mayUse: false, description: "DOCX does not use OCR." };
562
+ } else if (kind === "image") {
563
+ nativeExtraction = { willAttempt: false, description: "No embedded text layer." };
564
+ ocr3 = { mayUse: true, description: "OCR produces text for images." };
565
+ } else {
566
+ nativeExtraction = {
567
+ willAttempt: true,
568
+ description: "UTF-8 decode only."
569
+ };
570
+ ocr3 = { mayUse: false, description: "OCR does not apply." };
571
+ }
572
+ break;
573
+ case "extractMetadata":
574
+ if (kind === "pdf") {
575
+ nativeExtraction = {
576
+ willAttempt: true,
577
+ description: "Lightweight PDF info/XMP normalization without full OCR."
578
+ };
579
+ ocr3 = { mayUse: false, description: "extractMetadata does not run the OCR pipeline." };
580
+ } else if (kind === "docx" || kind === "image") {
581
+ nativeExtraction = {
582
+ willAttempt: false,
583
+ description: "Stub response; no heavy extractor."
584
+ };
585
+ ocr3 = { mayUse: false, description: "OCR not used for this metadata path." };
586
+ limitations = lim(
587
+ kind === "docx" ? "Structured DOCX metadata is not exposed separately." : "Raster images have no document metadata bundle."
588
+ );
589
+ } else {
590
+ nativeExtraction = {
591
+ willAttempt: true,
592
+ description: "Decoded text only; no structured document metadata."
593
+ };
594
+ ocr3 = { mayUse: false, description: "OCR does not apply." };
595
+ limitations = lim("Plain text has no structured document metadata.");
596
+ }
597
+ break;
598
+ case "convertToHtml":
599
+ if (kind === "pdf") {
600
+ nativeExtraction = {
601
+ willAttempt: true,
602
+ description: "Text layer extracted then wrapped in <pre> (not visual layout)."
603
+ };
604
+ ocr3 = { mayUse: false, description: "convertToHtml does not run PDF OCR." };
605
+ limitations = lim("PDF HTML is a plain-text preview, not page layout.");
606
+ } else if (kind === "docx") {
607
+ nativeExtraction = {
608
+ willAttempt: true,
609
+ description: "Mammoth HTML output via analyzeFile routing."
610
+ };
611
+ ocr3 = { mayUse: false, description: "DOCX path does not use OCR." };
612
+ } else if (kind === "text") {
613
+ nativeExtraction = {
614
+ willAttempt: true,
615
+ description: "UTF-8 decode then <pre> wrapper."
616
+ };
617
+ ocr3 = { mayUse: false, description: "OCR does not apply." };
618
+ } else {
619
+ nativeExtraction = {
620
+ willAttempt: false,
621
+ description: "No HTML path for raster images."
622
+ };
623
+ ocr3 = { mayUse: false, description: "OCR does not emit layout HTML here." };
624
+ limitations = lim("Use extractText or runOcr for image text.");
625
+ }
626
+ break;
627
+ case "runOcr":
628
+ if (kind === "pdf") {
629
+ nativeExtraction = {
630
+ willAttempt: true,
631
+ description: "pdf-parse runs first; text may be replaced by raster OCR output."
632
+ };
633
+ ocr3 = {
634
+ mayUse: true,
635
+ description: 'runOcr always sets pdf.ocr to "force" for PDFs.'
636
+ };
637
+ limitations = lim("Forced OCR may run even when a text layer exists.");
638
+ } else if (kind === "image") {
639
+ nativeExtraction = { willAttempt: false, description: "No native text layer." };
640
+ ocr3 = { mayUse: true, description: "Tesseract OCR on the image bytes." };
641
+ } else if (kind === "docx") {
642
+ nativeExtraction = {
643
+ willAttempt: true,
644
+ description: "Full Mammoth extract (text + HTML); not OCR."
645
+ };
646
+ ocr3 = { mayUse: false, description: "DOCX is not OCR'd." };
647
+ limitations = lim("Result is structured extract, not OCR output.");
648
+ } else {
649
+ nativeExtraction = {
650
+ willAttempt: true,
651
+ description: "UTF-8 decode only."
652
+ };
653
+ ocr3 = { mayUse: false, description: "OCR does not apply to text files." };
654
+ }
655
+ break;
656
+ default:
657
+ nativeExtraction = { willAttempt: false, description: "Generic intent; see plan." };
658
+ ocr3 = { mayUse: false, description: "See plan steps." };
659
+ }
660
+ return {
661
+ kind,
662
+ detectedKind: kind,
663
+ runtime,
664
+ intent,
665
+ primaryAnalyzer,
666
+ nativeExtraction,
667
+ ocr: ocr3,
668
+ limitations,
669
+ plan
670
+ };
671
+ }
672
+
673
+ // src/capabilityReport.ts
674
+ var DOCX_META = "Structured document metadata is not exposed separately; extractMetadata returns a stub for DOCX.";
675
+ var IMAGE_META = "Raster images have no document metadata bundle; extractMetadata returns a stub.";
676
+ var IMAGE_HTML = "No layout HTML for raster images; use extractText or runOcr for text.";
677
+ var TEXT_META_NOTE = "Plain text has no structured document metadata; extractMetadata still returns decoded content.";
678
+ var UNKNOWN_KIND = "Could not determine file kind from name, MIME, or bytes; all features are reported as unsupported until the kind is known.";
679
+ function slot(id, supported, warnings) {
680
+ return warnings?.length ? { id, supported, warnings } : { id, supported };
681
+ }
682
+ function buildNodeCapabilityReport(kind) {
683
+ const runtime = { id: "node" };
684
+ let capabilities;
685
+ const topWarnings = [];
686
+ switch (kind) {
687
+ case "pdf":
688
+ capabilities = [
689
+ slot("text", true, [
690
+ "Native text via pdf-parse and (in Node extractText) PDF.js per-page text; set pdf.ocr for raster OCR."
691
+ ]),
692
+ slot("metadata", true, [
693
+ "Document info / XMP-style metadata via pdf-parse without running the OCR pipeline."
694
+ ]),
695
+ slot("pages", true, [
696
+ "Page count and per-page native extraction (PDF.js) where used; OCR respects pdf.maxPages."
697
+ ]),
698
+ slot("ocr", true, [
699
+ "Raster OCR pipeline (pdf.ocr auto with quality heuristics, force, or runOcr)."
700
+ ]),
701
+ slot("html", true, ["HTML is a <pre> preview of extracted text, not visual layout."])
702
+ ];
703
+ break;
704
+ case "docx":
705
+ capabilities = [
706
+ slot("text", true),
707
+ slot("metadata", false, [DOCX_META]),
708
+ slot("html", true),
709
+ slot("ocr", false, ["OCR does not apply to DOCX in DocMind."]),
710
+ slot("pages", false)
711
+ ];
712
+ break;
713
+ case "image":
714
+ capabilities = [
715
+ slot("text", true, ["Text is obtained via OCR."]),
716
+ slot("metadata", false, [IMAGE_META]),
717
+ slot("html", false, [IMAGE_HTML]),
718
+ slot("ocr", true),
719
+ slot("pages", false)
720
+ ];
721
+ break;
722
+ case "text":
723
+ capabilities = [
724
+ slot("text", true),
725
+ slot("metadata", true, [TEXT_META_NOTE]),
726
+ slot("html", true),
727
+ slot("ocr", false, ["OCR does not apply to plain text files."]),
728
+ slot("pages", false)
729
+ ];
730
+ break;
731
+ default:
732
+ topWarnings.push(UNKNOWN_KIND);
733
+ capabilities = [
734
+ slot("text", false),
735
+ slot("metadata", false),
736
+ slot("html", false),
737
+ slot("ocr", false),
738
+ slot("pages", false)
739
+ ];
740
+ }
741
+ return {
742
+ kind,
743
+ runtime,
744
+ capabilities,
745
+ warnings: topWarnings.length > 0 ? topWarnings : void 0
746
+ };
747
+ }
748
+
749
+ // src/introspection.ts
750
+ function resolvePdfOcrMode(pdf) {
751
+ return pdf?.ocr ?? "auto";
752
+ }
753
+ function planAnalyzeFile(kind, pdfOcr) {
754
+ switch (kind) {
755
+ case "pdf":
756
+ return {
757
+ intent: "analyzeFile",
758
+ steps: [
759
+ { id: "detect_kind", status: "done" },
760
+ { id: "pdf_parse", status: "planned" },
761
+ {
762
+ id: "pdf_ocr",
763
+ status: pdfOcr === "off" ? "skipped" : "planned"
764
+ }
765
+ ]
766
+ };
767
+ case "docx":
768
+ return {
769
+ intent: "analyzeFile",
770
+ steps: [
771
+ { id: "detect_kind", status: "done" },
772
+ { id: "docx_mammoth", status: "planned" }
773
+ ]
774
+ };
775
+ case "image":
776
+ return {
777
+ intent: "analyzeFile",
778
+ steps: [
779
+ { id: "detect_kind", status: "done" },
780
+ { id: "image_ocr", status: "planned" }
781
+ ]
782
+ };
783
+ case "text":
784
+ return {
785
+ intent: "analyzeFile",
786
+ steps: [
787
+ { id: "detect_kind", status: "done" },
788
+ { id: "utf8_decode", status: "planned" }
789
+ ]
790
+ };
791
+ default:
792
+ return {
793
+ intent: "analyzeFile",
794
+ steps: [
795
+ { id: "detect_kind", status: "done" },
796
+ { id: "route", status: "failed" }
797
+ ]
798
+ };
799
+ }
800
+ }
801
+ function planForIntent(intentOpt, kind, pdfOcrForAnalyze) {
802
+ const intent = intentOpt ?? "analyzeFile";
803
+ if (intent === "analyzeFile") return planAnalyzeFile(kind, pdfOcrForAnalyze);
804
+ if (intent === "extractText") {
805
+ if (kind === "pdf") {
806
+ return {
807
+ intent: "extractText",
808
+ steps: [
809
+ { id: "detect_kind", status: "done" },
810
+ { id: "pdf_parse", status: "planned" },
811
+ { id: "pdfjs_per_page", status: "planned" },
812
+ { id: "pdf_ocr", status: "skipped" }
813
+ ]
814
+ };
815
+ }
816
+ const p = planAnalyzeFile(kind, "off");
817
+ return { ...p, intent: "extractText" };
818
+ }
819
+ if (intent === "extractMetadata") {
820
+ if (kind === "pdf") {
821
+ return {
822
+ intent: "extractMetadata",
823
+ steps: [
824
+ { id: "detect_kind", status: "done" },
825
+ { id: "pdf_metadata", status: "planned" }
826
+ ]
827
+ };
828
+ }
829
+ if (kind === "text") {
830
+ return {
831
+ intent: "extractMetadata",
832
+ steps: [
833
+ { id: "detect_kind", status: "done" },
834
+ { id: "utf8_decode", status: "planned" }
835
+ ]
836
+ };
837
+ }
838
+ return {
839
+ intent: "extractMetadata",
840
+ steps: [
841
+ { id: "detect_kind", status: "done" },
842
+ { id: "metadata_stub", status: kind === "docx" || kind === "image" ? "planned" : "skipped" }
843
+ ]
844
+ };
845
+ }
846
+ if (intent === "convertToHtml") {
847
+ if (kind === "docx") {
848
+ return {
849
+ intent: "convertToHtml",
850
+ steps: [
851
+ { id: "detect_kind", status: "done" },
852
+ { id: "docx_mammoth_html", status: "planned" }
853
+ ]
854
+ };
855
+ }
856
+ if (kind === "text") {
857
+ return {
858
+ intent: "convertToHtml",
859
+ steps: [
860
+ { id: "detect_kind", status: "done" },
861
+ { id: "utf8_decode", status: "planned" },
862
+ { id: "wrap_pre", status: "planned" }
863
+ ]
864
+ };
865
+ }
866
+ if (kind === "pdf") {
867
+ return {
868
+ intent: "convertToHtml",
869
+ steps: [
870
+ { id: "detect_kind", status: "done" },
871
+ { id: "pdf_text_layer", status: "planned" },
872
+ { id: "wrap_pre", status: "planned" }
873
+ ]
874
+ };
875
+ }
876
+ return {
877
+ intent: "convertToHtml",
878
+ steps: [
879
+ { id: "detect_kind", status: "done" },
880
+ { id: "rich_html", status: "skipped" }
881
+ ]
882
+ };
883
+ }
884
+ if (intent === "runOcr") {
885
+ if (kind === "pdf") {
886
+ return {
887
+ intent: "runOcr",
888
+ steps: [
889
+ { id: "detect_kind", status: "done" },
890
+ { id: "pdf_parse", status: "planned" },
891
+ { id: "pdf_ocr_forced", status: "planned" }
892
+ ]
893
+ };
894
+ }
895
+ if (kind === "image") {
896
+ return {
897
+ intent: "runOcr",
898
+ steps: [
899
+ { id: "detect_kind", status: "done" },
900
+ { id: "tesseract_ocr", status: "planned" }
901
+ ]
902
+ };
903
+ }
904
+ if (kind === "docx") {
905
+ return {
906
+ intent: "runOcr",
907
+ steps: [
908
+ { id: "detect_kind", status: "done" },
909
+ { id: "docx_structured_extract", status: "planned" }
910
+ ]
911
+ };
912
+ }
913
+ return {
914
+ intent: "runOcr",
915
+ steps: [
916
+ { id: "detect_kind", status: "done" },
917
+ { id: "ocr", status: "skipped" }
918
+ ]
919
+ };
920
+ }
921
+ return planAnalyzeFile(kind, pdfOcrForAnalyze);
922
+ }
923
+ async function getCapabilities(input, options) {
924
+ throwIfAborted(options?.signal);
925
+ const resolved = await resolveNodeAnalyzeInput(input);
926
+ assertValidAnalyzeFileInput(resolved);
927
+ const kind = detectFileKind(resolved);
928
+ return buildNodeCapabilityReport(kind);
929
+ }
930
+ async function explainAnalysisPlan(input, options) {
931
+ throwIfAborted(options?.signal);
932
+ const resolved = await resolveNodeAnalyzeInput(input);
933
+ assertValidAnalyzeFileInput(resolved);
934
+ const kind = detectFileKind(resolved);
935
+ const intent = options?.intent ?? "analyzeFile";
936
+ const pdfOcrAnalyze = resolvePdfOcrMode(options?.pdf);
937
+ const plan = planForIntent(intent, kind, pdfOcrAnalyze);
938
+ return buildNodeExplainReport(kind, intent, pdfOcrAnalyze, plan);
939
+ }
940
+
941
+ export { analyzeFile, bufferToInput, convertToHtml, explainAnalysisPlan, extractMetadata, extractText, getCapabilities, readFileToInput, resolveNodeAnalyzeInput, runOcr };
550
942
  //# sourceMappingURL=index.js.map
551
943
  //# sourceMappingURL=index.js.map
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@dragon708/docmind-node",
3
- "version": "1.1.0",
4
- "description": "Node.js DocMind entry: PDF, DOCX, OCR, text, and fs helpers.",
3
+ "version": "1.4.0",
4
+ "description": "Official DocMind Node facade: analyzeFile, intent APIs, PDF/DOCX/OCR, and fs helpers.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
7
7
  "module": "./dist/index.js",
@@ -14,7 +14,8 @@
14
14
  }
15
15
  },
16
16
  "files": [
17
- "dist"
17
+ "dist/**/*.js",
18
+ "dist/**/*.d.ts"
18
19
  ],
19
20
  "publishConfig": {
20
21
  "access": "public"
@@ -33,8 +34,8 @@
33
34
  "dependencies": {
34
35
  "@dragon708/docmind-docx": "^1.0.0",
35
36
  "@dragon708/docmind-ocr": "^1.0.0",
36
- "@dragon708/docmind-pdf": "^1.0.0",
37
- "@dragon708/docmind-shared": "^1.0.0"
37
+ "@dragon708/docmind-pdf": "^2.0.0",
38
+ "@dragon708/docmind-shared": "^1.1.0"
38
39
  },
39
40
  "devDependencies": {
40
41
  "@types/node": "^20.19.37",
package/dist/index.js.map DELETED
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/inputBytes.ts","../src/analyzers/docx.ts","../src/analyzers/image.ts","../src/analyzers/pdf.ts","../src/resolveNodeInput.ts","../src/analyze.ts","../src/publicActions.ts"],"names":["extractDocx","runPdf","assertValidAnalyzeFileInput","detectFileKind","ocr","analyzeText","notImplementedResult","UNKNOWN_FORMAT_WARNING"],"mappings":";;;;;;;;;;AASO,SAAS,kBAAkB,KAAA,EAAoD;AACpF,EAAA,OAAO,YAAA,CAAa,KAAK,CAAA,IAAK,aAAA,CAAc,KAAK,KAAK,MAAA,CAAO,KAAK,CAAA,IAAK,MAAA,CAAO,KAAK,CAAA;AACrF;AAGA,eAAsB,qBAAqB,KAAA,EAAiD;AAC1F,EAAA,IAAI,CAAC,iBAAA,CAAkB,KAAK,CAAA,EAAG;AAC7B,IAAA,OAAO,IAAI,WAAW,CAAC,CAAA;AAAA,EACzB;AACA,EAAA,OAAO,aAAa,KAAK,CAAA;AAC3B;;;ACZA,eAAsB,kBAAA,CACpB,OACA,MAAA,EACyB;AACzB,EAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,KAAK,CAAA;AAC7C,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,QAAA,EAAU,MAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,MAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,KAC5D;AAAA,EACF;AAEA,EAAA,MAAM,CAAA,GAAI,MAAMA,WAAA,CAAY,IAAI,CAAA;AAChC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,MAAA;AAAA,IACV,QAAA,EAAU,MAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,MAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,QAAA,EAAU,CAAC,GAAG,CAAA,CAAE,QAAQ;AAAA,GAC1B;AACF;AChCA,eAAsB,mBAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,KAAK,CAAA;AAC7C,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,OAAA;AAAA,MACV,QAAA,EAAU,OAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,OAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,UAAA,EAAY,CAAA;AAAA,MACZ,OAAA,EAAS,IAAA;AAAA,MACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,KACzD;AAAA,EACF;AAEA,EAAA,MAAM,OAAA,GAAU;AAAA,IACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,IACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU,OAAA,EAAS;AAAA,GAC3C;AAEA,EAAA,MAAM,CAAA,GAAI,MAAM,GAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,OAAA;AAAA,IACV,QAAA,EAAU,OAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,OAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,YAAY,CAAA,CAAE,UAAA;AAAA,IACd,SAAS,CAAA,CAAE,OAAA;AAAA,IACX,UAAU;AAAC,GACb;AACF;AClCA,eAAsB,iBAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,KAAK,CAAA;AAC7C,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,KAAA;AAAA,MACV,QAAA,EAAU,KAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,KAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,KAAA,EAAO,CAAA;AAAA,MACP,QAAA,EAAU,EAAE,IAAA,EAAM,EAAC,EAAE;AAAA,MACrB,QAAA,EAAU,CAAC,+CAA+C,CAAA;AAAA,MAC1D,QAAA,EAAU,KAAA;AAAA,MACV,OAAA,EAAS;AAAA,KACX;AAAA,EACF;AAEA,EAAA,MAAM,UAAU,OAAA,EAAS,GAAA;AACzB,EAAA,MAAM,OAAA,GAA6B;AAAA,IACjC,GAAG,OAAA;AAAA,IACH,GAAA,EAAK,SAAS,GAAA,IAAO,MAAA;AAAA,IACrB,QAAA,EAAU,OAAA,EAAS,QAAA,IAAY,OAAA,EAAS,GAAA,EAAK,KAAA;AAAA,IAC7C,MAAA,EAAQ,OAAA,EAAS,MAAA,IAAU,OAAA,EAAS;AAAA,GACtC;AAEA,EAAA,MAAM,CAAA,GAAI,MAAMC,UAAA,CAAO,IAAA,EAAM,OAAO,CAAA;AACpC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,KAAA;AAAA,IACV,QAAA,EAAU,KAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,KAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,OAAO,CAAA,CAAE,KAAA;AAAA,IACT,UAAU,CAAA,CAAE,QAAA;AAAA,IACZ,QAAA,EAAU,CAAC,GAAG,CAAA,CAAE,QAAQ,CAAA;AAAA,IACxB,UAAU,CAAA,CAAE,QAAA;AAAA,IACZ,SAAS,CAAA,CAAE;AAAA,GACb;AACF;AClDA,SAAS,aAAa,SAAA,EAAiC;AACrD,EAAA,OAAO,SAAA,YAAqB,GAAA,GAAM,aAAA,CAAc,SAAS,CAAA,GAAI,SAAA;AAC/D;AAKA,eAAsB,gBAAgB,IAAA,EAAiD;AACrF,EAAA,MAAM,MAAA,GAAS,aAAa,IAAI,CAAA;AAChC,EAAA,MAAM,IAAA,GAAO,MAAM,QAAA,CAAS,MAAM,CAAA;AAClC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,IAAA,EAAM,SAAS,MAAM;AAAA,GACvB;AACF;AAGO,SAAS,aAAA,CAAc,QAAgB,IAAA,EAAmC;AAC/E,EAAA,OAAO,IAAA,KAAS,SAAY,EAAE,IAAA,EAAM,QAAQ,IAAA,EAAK,GAAI,EAAE,IAAA,EAAM,MAAA,EAAO;AACtE;AAKA,eAAsB,wBAAwB,KAAA,EAAuD;AACnG,EAAA,IAAI,OAAO,KAAA,KAAU,QAAA,IAAY,KAAA,YAAiB,GAAA,EAAK;AACrD,IAAA,OAAO,gBAAgB,KAAK,CAAA;AAAA,EAC9B;AACA,EAAA,OAAO,KAAA;AACT;;;ACvBA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,QAAA,GAAW,MAAM,uBAAA,CAAwB,KAAK,CAAA;AACpD,EAAA,2BAAA,CAA4B,QAAQ,CAAA;AAEpC,EAAA,MAAM,QAAA,GAAW,eAAe,QAAQ,CAAA;AAExC,EAAA,QAAQ,QAAA;AAAU,IAChB,KAAK,KAAA;AACH,MAAA,OAAO,iBAAA,CAAkB,UAAiC,OAAO,CAAA;AAAA,IACnE,KAAK,MAAA;AACH,MAAA,OAAO,kBAAA,CAAmB,QAAA,EAAiC,OAAA,EAAS,MAAM,CAAA;AAAA,IAC5E,KAAK,OAAA;AACH,MAAA,OAAO,mBAAA,CAAoB,UAAiC,OAAO,CAAA;AAAA,IACrE,KAAK,MAAA;AACH,MAAA,OAAO,YAAY,QAAA,EAAiC,EAAE,MAAA,EAAQ,OAAA,EAAS,QAAQ,CAAA;AAAA,IACjF;AACE,MAAA,OAAO,oBAAA,CAAqB,QAAA,EAAU,MAAA,EAAQ,CAAC,sBAAsB,CAAC,CAAA;AAAA;AAE5E;ACrBA,SAAS,eAAe,MAAA,EAA4B;AAClD,EAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AACF;AAEA,eAAe,QAAQ,KAAA,EAAuD;AAC5E,EAAA,MAAM,QAAA,GAAW,MAAM,uBAAA,CAAwB,KAAK,CAAA;AACpD,EAAAC,4BAA4B,QAAQ,CAAA;AACpC,EAAA,OAAO,QAAA;AACT;AAEA,SAAS,kBAAkB,CAAA,EAAmB;AAC5C,EAAA,OAAO,CAAA,CACJ,OAAA,CAAQ,IAAA,EAAM,OAAO,EACrB,OAAA,CAAQ,IAAA,EAAM,MAAM,CAAA,CACpB,QAAQ,IAAA,EAAM,MAAM,CAAA,CACpB,OAAA,CAAQ,MAAM,QAAQ,CAAA;AAC3B;AAEA,IAAM,kBAAA,GACJ,yGAAA;AAEF,IAAM,mBAAA,GACJ,6DAAA;AAMF,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,OAAA,CAAQ,KAAK,CAAA;AACpC,EAAA,MAAM,IAAA,GAAOC,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA,EAAO;AACV,MAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,QAAQ,CAAA;AAChD,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,KAAA;AAAA,UACV,QAAA,EAAU,KAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,KAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,KAAA,EAAO,CAAA;AAAA,UACP,QAAA,EAAU,EAAE,IAAA,EAAM,EAAC,EAAE;AAAA,UACrB,QAAA,EAAU,CAAC,+CAA+C,CAAA;AAAA,UAC1D,QAAA,EAAU,KAAA;AAAA,UACV,OAAA,EAAS;AAAA,SACX;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,kBAAA,CAAmB,IAAI,CAAA;AACvC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,KAAA;AAAA,QACV,QAAA,EAAU,KAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,KAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,OAAO,CAAA,CAAE,KAAA;AAAA,QACT,QAAA,EAAU,EAAE,IAAA,EAAM,EAAC,EAAE;AAAA,QACrB,UAAU,CAAA,CAAE,QAAA;AAAA,QACZ,QAAA,EAAU,KAAA;AAAA,QACV,OAAA,EAAS;AAAA,OACX;AAAA,IACF;AAAA,IACA,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,QAAQ,CAAA;AAChD,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,MAAA;AAAA,UACV,QAAA,EAAU,MAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,MAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,SAC5D;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,mBAAA,CAAoB,IAAI,CAAA;AACxC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,IAAA,EAAM,EAAA;AAAA,QACN,UAAU,CAAA,CAAE;AAAA,OACd;AAAA,IACF;AAAA,IACA,KAAK,OAAA,EAAS;AACZ,MAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,QAAQ,CAAA;AAChD,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,OAAA;AAAA,UACV,QAAA,EAAU,OAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,OAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,UAAA,EAAY,CAAA;AAAA,UACZ,OAAA,EAAS,IAAA;AAAA,UACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,SACzD;AAAA,MACF;AACA,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,QACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU;AAAA,OAClC;AACA,MAAA,MAAM,CAAA,GAAI,MAAMC,GAAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,YAAY,CAAA,CAAE,UAAA;AAAA,QACd,SAAS,CAAA,CAAE,OAAA;AAAA,QACX,UAAU;AAAC,OACb;AAAA,IACF;AAAA,IACA,KAAK,MAAA;AACH,MAAA,OAAOC,WAAAA,CAAY,QAAA,EAAiC,EAAE,MAAA,EAAQ,CAAA;AAAA,IAChE;AACE,MAAA,OAAOC,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACC,sBAAsB,CAAC,CAAA;AAAA;AAExE;AAMA,eAAsB,eAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,OAAA,CAAQ,KAAK,CAAA;AACpC,EAAA,MAAM,IAAA,GAAOJ,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA,EAAO;AACV,MAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,QAAQ,CAAA;AAChD,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,KAAA;AAAA,UACV,QAAA,EAAU,KAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,KAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,KAAA,EAAO,CAAA;AAAA,UACP,QAAA,EAAU,EAAE,IAAA,EAAM,EAAC,EAAE;AAAA,UACrB,QAAA,EAAU,CAAC,+CAA+C,CAAA;AAAA,UAC1D,QAAA,EAAU,KAAA;AAAA,UACV,OAAA,EAAS;AAAA,SACX;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,kBAAA,CAAmB,IAAI,CAAA;AACvC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,KAAA;AAAA,QACV,QAAA,EAAU,KAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,KAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,KAAA,EAAO,CAAA;AAAA,QACP,UAAU,CAAA,CAAE,QAAA;AAAA,QACZ,UAAU,CAAA,CAAE,QAAA;AAAA,QACZ,QAAA,EAAU,KAAA;AAAA,QACV,OAAA,EAAS;AAAA,OACX;AAAA,IACF;AAAA,IACA,KAAK,MAAA;AACH,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,QAAA,EAAU,CAAC,kBAAkB;AAAA,OAC/B;AAAA,IACF,KAAK,OAAA;AACH,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,UAAA,EAAY,CAAA;AAAA,QACZ,OAAA,EAAS,IAAA;AAAA,QACT,QAAA,EAAU,CAAC,mBAAmB;AAAA,OAChC;AAAA,IACF,KAAK,MAAA;AACH,MAAA,OAAOE,WAAAA,CAAY,QAAA,EAAiC,EAAE,MAAA,EAAQ,CAAA;AAAA,IAChE;AACE,MAAA,OAAOC,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACC,sBAAsB,CAAC,CAAA;AAAA;AAExE;AAMA,eAAsB,aAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,OAAA,CAAQ,KAAK,CAAA;AACpC,EAAA,MAAM,IAAA,GAAOJ,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,QAAQ,CAAA;AAChD,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,MAAA;AAAA,UACV,QAAA,EAAU,MAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,MAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,SAC5D;AAAA,MACF;AACA,MAAA,MAAM,CAAC,QAAA,EAAU,QAAQ,CAAA,GAAI,MAAM,QAAQ,GAAA,CAAI;AAAA,QAC7C,oBAAoB,IAAI,CAAA;AAAA,QACxB,kBAAkB,IAAI;AAAA,OACvB,CAAA;AACD,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,MAAM,QAAA,CAAS,IAAA;AAAA,QACf,MAAM,QAAA,CAAS,IAAA;AAAA,QACf,UAAU,CAAC,GAAG,SAAS,QAAA,EAAU,GAAG,SAAS,QAAQ;AAAA,OACvD;AAAA,IACF;AAAA,IACA,KAAK,KAAA,EAAO;AACV,MAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,QAAQ,CAAA;AAChD,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,KAAA;AAAA,UACV,QAAA,EAAU,KAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,KAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,KAAA,EAAO,CAAA;AAAA,UACP,QAAA,EAAU,EAAE,IAAA,EAAM,EAAC,EAAE;AAAA,UACrB,QAAA,EAAU,CAAC,+CAA+C,CAAA;AAAA,UAC1D,QAAA,EAAU,KAAA;AAAA,UACV,OAAA,EAAS;AAAA,SACX;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,kBAAA,CAAmB,IAAI,CAAA;AACvC,MAAA,MAAM,IAAA,GAAO,CAAA,KAAA,EAAQ,iBAAA,CAAkB,CAAA,CAAE,IAAI,CAAC,CAAA,MAAA,CAAA;AAC9C,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,KAAA;AAAA,QACV,QAAA,EAAU,KAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,KAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,OAAO,CAAA,CAAE,KAAA;AAAA,QACT,QAAA,EAAU,EAAE,IAAA,EAAM,EAAC,EAAE;AAAA,QACrB,IAAA;AAAA,QACA,QAAA,EAAU;AAAA,UACR,GAAG,CAAA,CAAE,QAAA;AAAA,UACL;AAAA,SACF;AAAA,QACA,QAAA,EAAU,KAAA;AAAA,QACV,OAAA,EAAS;AAAA,OACX;AAAA,IACF;AAAA,IACA,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAI,MAAME,WAAAA,CAAY,QAAA,EAAiC,EAAE,QAAQ,CAAA;AACvE,MAAA,MAAM,IAAA,GAAO,CAAA,KAAA,EAAQ,iBAAA,CAAkB,CAAA,CAAE,IAAI,CAAC,CAAA,MAAA,CAAA;AAC9C,MAAA,OAAO;AAAA,QACL,GAAG,CAAA;AAAA,QACH,IAAA;AAAA,QACA,QAAA,EAAU;AAAA,UACR,GAAG,CAAA,CAAE,QAAA;AAAA,UACL;AAAA;AACF,OACF;AAAA,IACF;AAAA,IACA,KAAK,OAAA;AACH,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,UAAA,EAAY,CAAA;AAAA,QACZ,OAAA,EAAS,IAAA;AAAA,QACT,QAAA,EAAU,CAAC,qEAAqE;AAAA,OAClF;AAAA,IACF;AACE,MAAA,OAAOC,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACC,sBAAsB,CAAC,CAAA;AAAA;AAExE;AAMA,eAAsB,MAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,OAAA,CAAQ,KAAK,CAAA;AACpC,EAAA,MAAM,IAAA,GAAOJ,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AACxB,EAAA,MAAM,IAAA,GAAO,OAAA,EAAS,GAAA,EAAK,KAAA,IAAS,SAAS,GAAA,EAAK,QAAA;AAElD,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA,EAAO;AACV,MAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,QAAQ,CAAA;AAChD,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,KAAA;AAAA,UACV,QAAA,EAAU,KAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,KAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,KAAA,EAAO,CAAA;AAAA,UACP,QAAA,EAAU,EAAE,IAAA,EAAM,EAAC,EAAE;AAAA,UACrB,QAAA,EAAU,CAAC,+CAA+C,CAAA;AAAA,UAC1D,QAAA,EAAU,KAAA;AAAA,UACV,OAAA,EAAS;AAAA,SACX;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,UAAA,CAAW,IAAA,EAAM;AAAA,QAC/B,GAAG,OAAA,EAAS,GAAA;AAAA,QACZ,GAAA,EAAK,OAAA;AAAA,QACL,QAAA,EAAU,IAAA,IAAQ,OAAA,EAAS,GAAA,EAAK,QAAA;AAAA,QAChC,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU;AAAA,OACjC,CAAA;AACD,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,KAAA;AAAA,QACV,QAAA,EAAU,KAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,KAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,OAAO,CAAA,CAAE,KAAA;AAAA,QACT,UAAU,CAAA,CAAE,QAAA;AAAA,QACZ,QAAA,EAAU,CAAC,GAAG,CAAA,CAAE,QAAQ,CAAA;AAAA,QACxB,UAAU,CAAA,CAAE,QAAA;AAAA,QACZ,SAAS,CAAA,CAAE;AAAA,OACb;AAAA,IACF;AAAA,IACA,KAAK,OAAA,EAAS;AACZ,MAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,QAAQ,CAAA;AAChD,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,OAAA;AAAA,UACV,QAAA,EAAU,OAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,OAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,UAAA,EAAY,CAAA;AAAA,UACZ,OAAA,EAAS,IAAA;AAAA,UACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,SACzD;AAAA,MACF;AACA,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,QACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU;AAAA,OAClC;AACA,MAAA,MAAM,CAAA,GAAI,MAAMC,GAAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,YAAY,CAAA,CAAE,UAAA;AAAA,QACd,SAAS,CAAA,CAAE,OAAA;AAAA,QACX,UAAU;AAAC,OACb;AAAA,IACF;AAAA,IACA,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,QAAQ,CAAA;AAChD,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,MAAA;AAAA,UACV,QAAA,EAAU,MAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,MAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,SAC5D;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,WAAA,CAAY,IAAI,CAAA;AAChC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,QAAA,EAAU;AAAA,UACR,GAAG,CAAA,CAAE,QAAA;AAAA,UACL;AAAA;AACF,OACF;AAAA,IACF;AAAA,IACA,KAAK,MAAA;AACH,MAAA,OAAOC,WAAAA,CAAY,QAAA,EAAiC,EAAE,MAAA,EAAQ,CAAA;AAAA,IAChE;AACE,MAAA,OAAOC,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACC,sBAAsB,CAAC,CAAA;AAAA;AAExE","file":"index.js","sourcesContent":["import type { DetectFileKindInput, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport {\n isBinaryInput,\n isBlob,\n isFile,\n isNamedInput,\n toUint8Array,\n} from \"@dragon708/docmind-shared\";\n\nexport function isByteBackedInput(input: DetectFileKindInput): input is FileLikeInput {\n return isNamedInput(input) || isBinaryInput(input) || isBlob(input) || isFile(input);\n}\n\n/** Resolves bytes when the input carries a body; otherwise an empty `Uint8Array`. */\nexport async function bytesFromDetectInput(input: DetectFileKindInput): Promise<Uint8Array> {\n if (!isByteBackedInput(input)) {\n return new Uint8Array(0);\n }\n return toUint8Array(input);\n}\n","import { analyzeDocx as extractDocx } from \"@dragon708/docmind-docx\";\nimport type { AnalysisResult, DetectFileKindInput } from \"@dragon708/docmind-shared\";\nimport { bytesFromDetectInput } from \"../inputBytes.js\";\n\n/**\n * DOCX → `@dragon708/docmind-docx`.\n */\nexport async function analyzeDocxForNode(\n input: DetectFileKindInput,\n signal?: AbortSignal,\n): Promise<AnalysisResult> {\n if (signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await bytesFromDetectInput(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n\n const r = await extractDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: r.html,\n warnings: [...r.warnings],\n };\n}\n","import { ocr } from \"@dragon708/docmind-ocr\";\nimport type { AnalysisResult, DetectFileKindInput } from \"@dragon708/docmind-shared\";\nimport type { NodeAnalyzeOptions } from \"../nodeAnalyzeOptions.js\";\nimport { bytesFromDetectInput } from \"../inputBytes.js\";\n\n/**\n * Image → `@dragon708/docmind-ocr`.\n */\nexport async function analyzeImageForNode(\n input: DetectFileKindInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await bytesFromDetectInput(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? options?.signal,\n };\n\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n}\n","import { analyzePdf as runPdf } from \"@dragon708/docmind-pdf\";\nimport type { PdfAnalyzeOptions } from \"@dragon708/docmind-pdf\";\nimport type { AnalysisResult, DetectFileKindInput } from \"@dragon708/docmind-shared\";\nimport type { NodeAnalyzeOptions } from \"../nodeAnalyzeOptions.js\";\nimport { bytesFromDetectInput } from \"../inputBytes.js\";\n\n/**\n * PDF → `@dragon708/docmind-pdf` (Node / pdf-parse + OCR).\n *\n * Unlike `analyzePdf` from `@dragon708/docmind-pdf` (OCR off unless set), `analyzeFile` defaults\n * to `pdf.ocr: \"auto\"`: when the PDF has\n * pages but almost no extractable text (typical scan), the raster OCR pipeline runs. Pass\n * `pdf: { ocr: \"off\" }` to skip OCR for speed.\n */\nexport async function analyzePdfForNode(\n input: DetectFileKindInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await bytesFromDetectInput(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: \"\",\n pages: 0,\n metadata: { info: {} },\n warnings: [\"No document bytes were provided for analysis.\"],\n needsOCR: false,\n ocrUsed: false,\n };\n }\n\n const userPdf = options?.pdf;\n const pdfOpts: PdfAnalyzeOptions = {\n ...userPdf,\n ocr: userPdf?.ocr ?? \"auto\",\n ocrLangs: userPdf?.ocrLangs ?? options?.ocr?.langs,\n signal: userPdf?.signal ?? options?.signal,\n };\n\n const r = await runPdf(data, pdfOpts);\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: r.text,\n pages: r.pages,\n metadata: r.metadata,\n warnings: [...r.warnings],\n needsOCR: r.needsOCR,\n ocrUsed: r.ocrUsed,\n };\n}\n","import type { DetectFileKindInput, NamedInput } from \"@dragon708/docmind-shared\";\nimport { readFile } from \"node:fs/promises\";\nimport { basename } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\n\n/**\n * Inputs accepted by {@link analyzeFile} in this package.\n * Paths and `file:` URLs are read with `fs`; other values pass through as {@link DetectFileKindInput}.\n */\nexport type NodeAnalyzeInput = string | URL | DetectFileKindInput;\n\nfunction toPathString(pathOrUrl: string | URL): string {\n return pathOrUrl instanceof URL ? fileURLToPath(pathOrUrl) : pathOrUrl;\n}\n\n/**\n * Reads a file from disk into a {@link NamedInput} (binary `Buffer`, basename as `name` for hints).\n */\nexport async function readFileToInput(path: string | URL): Promise<NamedInput<Buffer>> {\n const fsPath = toPathString(path);\n const data = await readFile(fsPath);\n return {\n data,\n name: basename(fsPath),\n };\n}\n\n/** Wraps a `Buffer` as a named payload when you already know the filename. */\nexport function bufferToInput(buffer: Buffer, name?: string): NamedInput<Buffer> {\n return name !== undefined ? { data: buffer, name } : { data: buffer };\n}\n\n/**\n * Resolves paths / `file:` URLs to a {@link DetectFileKindInput}; leaves other values untouched.\n */\nexport async function resolveNodeAnalyzeInput(input: NodeAnalyzeInput): Promise<DetectFileKindInput> {\n if (typeof input === \"string\" || input instanceof URL) {\n return readFileToInput(input);\n }\n return input;\n}\n","import type { AnalysisResult, DetectFileKindInput } from \"@dragon708/docmind-shared\";\nimport {\n analyzeText,\n assertValidAnalyzeFileInput,\n detectFileKind,\n notImplementedResult,\n UNKNOWN_FORMAT_WARNING,\n} from \"@dragon708/docmind-shared\";\nimport { analyzeDocxForNode } from \"./analyzers/docx.js\";\nimport { analyzeImageForNode } from \"./analyzers/image.js\";\nimport { analyzePdfForNode } from \"./analyzers/pdf.js\";\nimport type { NodeAnalyzeOptions } from \"./nodeAnalyzeOptions.js\";\nimport { resolveNodeAnalyzeInput, type NodeAnalyzeInput } from \"./resolveNodeInput.js\";\n\n/**\n * Node router: PDF, DOCX, images (OCR), and text. Paths and `file:` URLs are read via `fs`.\n */\nexport async function analyzeFile(\n input: NodeAnalyzeInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const resolved = await resolveNodeAnalyzeInput(input);\n assertValidAnalyzeFileInput(resolved);\n\n const fileKind = detectFileKind(resolved);\n\n switch (fileKind) {\n case \"pdf\":\n return analyzePdfForNode(resolved as DetectFileKindInput, options);\n case \"docx\":\n return analyzeDocxForNode(resolved as DetectFileKindInput, options?.signal);\n case \"image\":\n return analyzeImageForNode(resolved as DetectFileKindInput, options);\n case \"text\":\n return analyzeText(resolved as DetectFileKindInput, { signal: options?.signal });\n default:\n return notImplementedResult(fileKind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n","import type { AnalysisResult, DetectFileKindInput } from \"@dragon708/docmind-shared\";\nimport {\n analyzeText,\n assertValidAnalyzeFileInput,\n detectFileKind,\n notImplementedResult,\n UNKNOWN_FORMAT_WARNING,\n} from \"@dragon708/docmind-shared\";\nimport {\n analyzePdf,\n extractPdfMetadata,\n extractTextFromPdf,\n} from \"@dragon708/docmind-pdf\";\nimport {\n analyzeDocx,\n convertDocxToHtml,\n extractTextFromDocx,\n} from \"@dragon708/docmind-docx\";\nimport { ocr } from \"@dragon708/docmind-ocr\";\nimport { bytesFromDetectInput } from \"./inputBytes.js\";\nimport type { NodeAnalyzeOptions } from \"./nodeAnalyzeOptions.js\";\nimport { resolveNodeAnalyzeInput, type NodeAnalyzeInput } from \"./resolveNodeInput.js\";\n\nfunction throwIfAborted(signal?: AbortSignal): void {\n if (signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n}\n\nasync function prepare(input: NodeAnalyzeInput): Promise<DetectFileKindInput> {\n const resolved = await resolveNodeAnalyzeInput(input);\n assertValidAnalyzeFileInput(resolved);\n return resolved;\n}\n\nfunction escapeHtmlMinimal(s: string): string {\n return s\n .replace(/&/g, \"&amp;\")\n .replace(/</g, \"&lt;\")\n .replace(/>/g, \"&gt;\")\n .replace(/\"/g, \"&quot;\");\n}\n\nconst DOCX_METADATA_STUB =\n \"Structured document metadata for DOCX is not exposed as a separate API; use extractText or analyzeFile.\";\n\nconst IMAGE_METADATA_NOTE =\n \"Raster images have no document metadata bundle in this API.\";\n\n/**\n * Text only: PDF → `extractTextFromPdf` (capa de texto, sin OCR); DOCX → `extractTextFromDocx`;\n * imagen → `ocr`; texto → `analyzeText`.\n */\nexport async function extractText(\n input: NodeAnalyzeInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await prepare(input);\n const kind = detectFileKind(resolved);\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\": {\n const data = await bytesFromDetectInput(resolved);\n if (data.byteLength === 0) {\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: \"\",\n pages: 0,\n metadata: { info: {} },\n warnings: [\"No document bytes were provided for analysis.\"],\n needsOCR: false,\n ocrUsed: false,\n };\n }\n const r = await extractTextFromPdf(data);\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: r.text,\n pages: r.pages,\n metadata: { info: {} },\n warnings: r.warnings,\n needsOCR: false,\n ocrUsed: false,\n };\n }\n case \"docx\": {\n const data = await bytesFromDetectInput(resolved);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n const r = await extractTextFromDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: \"\",\n warnings: r.warnings,\n };\n }\n case \"image\": {\n const data = await bytesFromDetectInput(resolved);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? signal,\n };\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n }\n case \"text\":\n return analyzeText(resolved as DetectFileKindInput, { signal });\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n\n/**\n * Metadatos: PDF → `extractPdfMetadata`; resto con mejor esfuerzo o aviso.\n * El resultado sigue siendo `AnalysisResult` (PDF rellena `metadata` en forma `PdfAnalysisCoreResult`).\n */\nexport async function extractMetadata(\n input: NodeAnalyzeInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await prepare(input);\n const kind = detectFileKind(resolved);\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\": {\n const data = await bytesFromDetectInput(resolved);\n if (data.byteLength === 0) {\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: \"\",\n pages: 0,\n metadata: { info: {} },\n warnings: [\"No document bytes were provided for analysis.\"],\n needsOCR: false,\n ocrUsed: false,\n };\n }\n const r = await extractPdfMetadata(data);\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: \"\",\n pages: 0,\n metadata: r.metadata,\n warnings: r.warnings,\n needsOCR: false,\n ocrUsed: false,\n };\n }\n case \"docx\":\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [DOCX_METADATA_STUB],\n };\n case \"image\":\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [IMAGE_METADATA_NOTE],\n };\n case \"text\":\n return analyzeText(resolved as DetectFileKindInput, { signal });\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n\n/**\n * HTML: DOCX → `convertDocxToHtml`; PDF/texto → `<pre>` a partir de texto extraído;\n * imágenes → vacío con aviso.\n */\nexport async function convertToHtml(\n input: NodeAnalyzeInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await prepare(input);\n const kind = detectFileKind(resolved);\n const signal = options?.signal;\n\n switch (kind) {\n case \"docx\": {\n const data = await bytesFromDetectInput(resolved);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n const [textPart, htmlPart] = await Promise.all([\n extractTextFromDocx(data),\n convertDocxToHtml(data),\n ]);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: textPart.text,\n html: htmlPart.html,\n warnings: [...textPart.warnings, ...htmlPart.warnings],\n };\n }\n case \"pdf\": {\n const data = await bytesFromDetectInput(resolved);\n if (data.byteLength === 0) {\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: \"\",\n pages: 0,\n metadata: { info: {} },\n warnings: [\"No document bytes were provided for analysis.\"],\n needsOCR: false,\n ocrUsed: false,\n };\n }\n const r = await extractTextFromPdf(data);\n const html = `<pre>${escapeHtmlMinimal(r.text)}</pre>`;\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: r.text,\n pages: r.pages,\n metadata: { info: {} },\n html,\n warnings: [\n ...r.warnings,\n \"PDF HTML is a plain-text preview wrapped in <pre> (not a visual layout).\",\n ],\n needsOCR: false,\n ocrUsed: false,\n } as AnalysisResult;\n }\n case \"text\": {\n const t = await analyzeText(resolved as DetectFileKindInput, { signal });\n const html = `<pre>${escapeHtmlMinimal(t.text)}</pre>`;\n return {\n ...t,\n html,\n warnings: [\n ...t.warnings,\n \"HTML for plain text is a <pre> wrapper around decoded UTF-8 content.\",\n ],\n } as AnalysisResult;\n }\n case \"image\":\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No HTML representation for raster images; use extractText / runOcr.\"],\n };\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n\n/**\n * OCR: PDF → `analyzePdf` con `ocr: \"force\"`; imagen → `ocr`; DOCX → texto estructurado con aviso\n * (sin OCR); texto → `analyzeText`.\n */\nexport async function runOcr(\n input: NodeAnalyzeInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await prepare(input);\n const kind = detectFileKind(resolved);\n const signal = options?.signal;\n const lang = options?.ocr?.langs ?? options?.pdf?.ocrLangs;\n\n switch (kind) {\n case \"pdf\": {\n const data = await bytesFromDetectInput(resolved);\n if (data.byteLength === 0) {\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: \"\",\n pages: 0,\n metadata: { info: {} },\n warnings: [\"No document bytes were provided for analysis.\"],\n needsOCR: false,\n ocrUsed: false,\n };\n }\n const r = await analyzePdf(data, {\n ...options?.pdf,\n ocr: \"force\",\n ocrLangs: lang ?? options?.pdf?.ocrLangs,\n signal: options?.pdf?.signal ?? signal,\n });\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: r.text,\n pages: r.pages,\n metadata: r.metadata,\n warnings: [...r.warnings],\n needsOCR: r.needsOCR,\n ocrUsed: r.ocrUsed,\n };\n }\n case \"image\": {\n const data = await bytesFromDetectInput(resolved);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? signal,\n };\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n }\n case \"docx\": {\n const data = await bytesFromDetectInput(resolved);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n const r = await analyzeDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: r.html,\n warnings: [\n ...r.warnings,\n \"OCR does not apply to DOCX; returned structured text/HTML extract.\",\n ],\n };\n }\n case \"text\":\n return analyzeText(resolved as DetectFileKindInput, { signal });\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n"]}