@dragon708/docmind-browser 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { DocMindAnalyzeOptions, AnalysisResult, FileKind, RuntimeDescriptor, DocMindPublicIntent, AnalysisAnalyzer, ProcessingPlanDescriptor, ExplainAnalysisPlanOptions, GetCapabilitiesOptions } from '@dragon708/docmind-shared';
2
2
  export { AnalysisAnalyzer, AnalysisResult, CapabilityDescriptor, DetectFileKindInput, DocMindPublicIntent, DocxAnalysisCoreResult, ExplainAnalysisPlanOptions, ExplainAnalysisPlanResult, FileKind, FileKindMetadata, GenericAnalysisResult, GetCapabilitiesOptions, GetCapabilitiesResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult, detectFileKind } from '@dragon708/docmind-shared';
3
- import { OcrOptions } from '@dragon708/docmind-ocr';
3
+ import { OcrOptions, OcrTiffOptions, PreprocessImageOptions } from '@dragon708/docmind-ocr';
4
4
  import { AnalyzeDocxIncludeFlags, DocxToHtmlOptions } from '@dragon708/docmind-docx';
5
5
  export { AnalyzeDocxIncludeFlags } from '@dragon708/docmind-docx';
6
6
 
@@ -16,11 +16,15 @@ interface BrowserAnalyzeDocxOptionsSlice {
16
16
  * - `off`: do not invoke Tesseract; text stays empty with an explanatory warning.
17
17
  * - `auto` (default): run OCR when the input is classified as an image.
18
18
  * - `force`: same as `auto` in the browser runtime (no PDF-style text layer to compare); reserved for parity with Node.
19
+ *
20
+ * Multipage TIFF (when sniffed): `maxPages` and `pageSeparator` match `OcrTiffOptions` in `@dragon708/docmind-ocr` (best-effort UTIF in-browser).
21
+ * Optional {@link PreprocessImageOptions} runs in-browser (canvas) on the normalized raster before Tesseract when using `ocrImageDetailed`.
19
22
  */
20
23
  type BrowserOcrMode = "off" | "auto" | "force";
21
- /** Browser OCR options: Tesseract knobs from `@dragon708/docmind-ocr` plus optional {@link BrowserOcrMode}. */
22
- interface BrowserOcrOptions extends OcrOptions {
24
+ /** Browser OCR options: Tesseract knobs from `@dragon708/docmind-ocr` plus optional {@link BrowserOcrMode}, TIFF caps, and canvas preprocess. */
25
+ interface BrowserOcrOptions extends OcrOptions, Pick<OcrTiffOptions, "maxPages" | "pageSeparator"> {
23
26
  readonly mode?: BrowserOcrMode;
27
+ readonly preprocess?: PreprocessImageOptions;
24
28
  }
25
29
  /**
26
30
  * Options for public browser methods (`analyzeFile`, intent APIs).
@@ -67,12 +71,14 @@ declare function extractMetadata(input: BrowserAnalyzeInput, options?: BrowserAn
67
71
  declare function convertToHtml(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
68
72
  /**
69
73
  * OCR-focused intent. Honors {@link BrowserAnalyzeOptions.ocr} **mode** (`off` | `auto` | `force`) for images.
74
+ * Raster path uses `normalizeImageForOcr` via `ocrImageDetailed` (or `ocrTiff` for TIFF); no Node-only libraries.
75
+ * HEIC/HEIF and hard failures yield empty text + warnings instead of throwing (abort still propagates).
70
76
  * DOCX returns structured extract with a notice (no OCR). Text decodes as UTF-8 (no OCR).
71
77
  */
72
78
  declare function runOcr(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
73
79
 
74
80
  /** High-level features the user can ask DocMind for (per input kind and runtime). */
75
- type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages";
81
+ type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages" | "image-normalization" | "gif-first-frame" | "bmp" | "heic" | "tiff";
76
82
  declare function docxIncludeRequested(flags?: AnalyzeDocxIncludeFlags): boolean;
77
83
  /** DOCX `word/media` en runtime browser (JSZip; sin pipeline Node). */
78
84
  interface DocxEmbeddedImageCapabilities {
@@ -149,7 +155,7 @@ type BrowserExplainAnalysisPlanOptions = ExplainAnalysisPlanOptions & Pick<Brows
149
155
 
150
156
  /**
151
157
  * Epic 1 — **Capabilities:** detects kind from the same hints as `analyzeFile`, then lists which of
152
- * `text` | `metadata` | `html` | `ocr` | `pages` apply in the browser (PDF always unsupported).
158
+ * `text` | `metadata` | `html` | `ocr` | `pages` and image-specific ids (`image-normalization`, `bmp`, `gif-first-frame`, `heic`, `tiff`) apply in the browser (PDF always unsupported).
153
159
  * No Mammoth/Tesseract/PDF parsing. For DOCX, {@link GetCapabilitiesReport.docxStructure} / `docxEmbeddedImages` describe v2 opt-in features.
154
160
  */
155
161
  declare function getCapabilities(input: BrowserAnalyzeInput, options?: GetCapabilitiesOptions): Promise<GetCapabilitiesReport>;
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
- import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, InvalidInputError } from '@dragon708/docmind-shared';
1
+ import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, InvalidInputError, getMimeType } from '@dragon708/docmind-shared';
2
2
  export { detectFileKind } from '@dragon708/docmind-shared';
3
3
  import { analyzeDocx } from '@dragon708/docmind-docx';
4
- import { ocr } from '@dragon708/docmind-ocr';
4
+ import { preprocessHasEffect, resolveImageFormat, normalizeImageForOcr, ocrTiff, ocrImageDetailed } from '@dragon708/docmind-ocr';
5
5
 
6
6
  // src/analyzeFile.ts
7
7
  function assertBrowserInput(input) {
@@ -78,6 +78,93 @@ async function analyzeDocxForBrowser(input, options) {
78
78
  const r = docxOpts !== void 0 ? await analyzeDocx(data, docxOpts) : await analyzeDocx(data);
79
79
  return docxPackageResultToAnalysisResult(r);
80
80
  }
81
+ var BROWSER_TIFF_BEST_EFFORT_WARNING = "Browser TIFF support is best-effort: decoding uses UTIF in JavaScript/WebAssembly\u2014some compressions, color modes, very large or multipage files may fail, hang, or exhaust memory. For heavy TIFF workloads use @dragon708/docmind-node (optional sharp).";
82
+ function meanPageConfidence(pages) {
83
+ if (pages.length === 0) return 0;
84
+ return pages.reduce((s, p) => s + p.confidence, 0) / pages.length;
85
+ }
86
+ function rethrowIfAbort(e) {
87
+ if (e instanceof Error && e.name === "AbortError") throw e;
88
+ }
89
+ function ocrFailureWarnings(prefix, e) {
90
+ const msg = e instanceof Error ? e.message : String(e);
91
+ return [`${prefix} ${msg}`];
92
+ }
93
+ async function runRasterOcrForBrowser(data, input, options) {
94
+ const signal = options?.ocr?.signal ?? options?.signal;
95
+ const langs = options?.ocr?.langs;
96
+ const mimeHint = getMimeType(input);
97
+ const format = resolveImageFormat(data, mimeHint);
98
+ if (format === "heic" || format === "heif") {
99
+ const norm = await normalizeImageForOcr(data, { signal, mimeHint });
100
+ return {
101
+ text: "",
102
+ confidence: 0,
103
+ ocrUsed: true,
104
+ warnings: [
105
+ "HEIC/HEIF cannot be OCR'd in the browser; convert to PNG or JPEG server-side (e.g. @dragon708/docmind-node with sharp), then retry.",
106
+ ...norm.warnings
107
+ ],
108
+ inputFormat: norm.format,
109
+ normalizedFormat: norm.normalizedFormat
110
+ };
111
+ }
112
+ if (format === "tiff") {
113
+ try {
114
+ const tiff = await ocrTiff(data, {
115
+ langs,
116
+ signal,
117
+ maxPages: options?.ocr?.maxPages,
118
+ pageSeparator: options?.ocr?.pageSeparator
119
+ });
120
+ return {
121
+ text: tiff.text.trim(),
122
+ confidence: meanPageConfidence(tiff.textByPage),
123
+ ocrUsed: true,
124
+ warnings: [BROWSER_TIFF_BEST_EFFORT_WARNING, ...tiff.warnings],
125
+ pages: tiff.pagesProcessed,
126
+ textByPage: tiff.textByPage
127
+ };
128
+ } catch (e) {
129
+ rethrowIfAbort(e);
130
+ return {
131
+ text: "",
132
+ confidence: 0,
133
+ ocrUsed: true,
134
+ warnings: [
135
+ BROWSER_TIFF_BEST_EFFORT_WARNING,
136
+ ...ocrFailureWarnings("TIFF OCR failed in the browser:", e)
137
+ ]
138
+ };
139
+ }
140
+ }
141
+ try {
142
+ const detailed = await ocrImageDetailed(data, {
143
+ langs,
144
+ signal,
145
+ preprocess: options?.ocr?.preprocess
146
+ });
147
+ return {
148
+ text: detailed.text.trim(),
149
+ confidence: detailed.confidence,
150
+ ocrUsed: true,
151
+ warnings: [...detailed.warnings],
152
+ pages: detailed.pages,
153
+ inputFormat: detailed.inputFormat,
154
+ normalizedFormat: detailed.normalizedFormat
155
+ };
156
+ } catch (e) {
157
+ rethrowIfAbort(e);
158
+ return {
159
+ text: "",
160
+ confidence: 0,
161
+ ocrUsed: true,
162
+ warnings: ocrFailureWarnings("OCR could not complete in the browser:", e)
163
+ };
164
+ }
165
+ }
166
+
167
+ // src/analyzers/image.ts
81
168
  var OCR_OFF_WARNING = 'OCR mode is "off"; no recognition was run. Use mode "auto" or "force" to extract text from images.';
82
169
  function resolveOcrMode(options) {
83
170
  return options?.ocr?.mode ?? "auto";
@@ -114,21 +201,26 @@ async function analyzeImageForBrowser(input, options) {
114
201
  warnings: [OCR_OFF_WARNING]
115
202
  };
116
203
  }
117
- const ocrOpts = {
118
- ...options?.ocr ?? {},
119
- signal: options?.ocr?.signal ?? options?.signal
120
- };
121
- const r = await ocr(data, ocrOpts);
122
- return {
204
+ const ocrPart = await runRasterOcrForBrowser(data, input, options);
205
+ const base = {
123
206
  fileKind: "image",
124
207
  analyzer: "image",
125
208
  status: "ok",
126
209
  kind: "image",
127
- text: r.text,
128
- confidence: r.confidence,
129
- ocrUsed: r.ocrUsed,
130
- warnings: []
210
+ text: ocrPart.text,
211
+ confidence: ocrPart.confidence,
212
+ ocrUsed: true,
213
+ warnings: ocrPart.warnings
131
214
  };
215
+ const extra = {};
216
+ if (ocrPart.pages !== void 0) extra.pages = ocrPart.pages;
217
+ if (ocrPart.textByPage !== void 0) extra.textByPage = ocrPart.textByPage;
218
+ if (ocrPart.inputFormat !== void 0) extra.inputFormat = ocrPart.inputFormat;
219
+ if (ocrPart.normalizedFormat !== void 0) extra.normalizedFormat = ocrPart.normalizedFormat;
220
+ if (Object.keys(extra).length > 0) {
221
+ return { ...base, ...extra };
222
+ }
223
+ return base;
132
224
  }
133
225
 
134
226
  // src/analyzeFile.ts
@@ -361,11 +453,33 @@ function buildBrowserCapabilityReport(kind) {
361
453
  break;
362
454
  case "image":
363
455
  capabilities = [
364
- slot("text", true, ["Text is obtained via OCR when enabled."]),
456
+ slot("text", true, [
457
+ "Text via `@dragon708/docmind-ocr` when `ocr.mode` is not off: PNG, JPEG, WebP, BMP, GIF (first frame), TIFF (partial), after sniff/MIME."
458
+ ]),
365
459
  slot("metadata", false, [IMAGE_META]),
366
460
  slot("html", false, [IMAGE_HTML]),
367
- slot("ocr", true, [OCR_OFF_NOTE]),
368
- slot("pages", false)
461
+ slot("ocr", true, [
462
+ OCR_OFF_NOTE,
463
+ "Uses `ocrImageDetailed` (single-frame path) or multipage `ocrTiff` for TIFF; WASM Tesseract in-browser."
464
+ ]),
465
+ slot("image-normalization", true, [
466
+ "`normalizeImageForOcr` runs inside the OCR package (canvas/`createImageBitmap` in-browser for BMP, GIF, etc.; not HEIC)."
467
+ ]),
468
+ slot("bmp", true, [
469
+ "BMP is decoded via browser canvas/`createImageBitmap` into a PNG-oriented buffer before Tesseract."
470
+ ]),
471
+ slot("gif-first-frame", true, [
472
+ "Animated GIF: only the first decoded frame is normalized and OCR'd; see result warnings when multi-frame is detected."
473
+ ]),
474
+ slot("heic", false, [
475
+ "HEIC/HEIF is not decoded in the browser. `runOcr` / `analyzeFile` return empty text with explicit warnings; convert server-side (e.g. @dragon708/docmind-node)."
476
+ ]),
477
+ slot("tiff", true, [
478
+ "Partial / best-effort: multipage `ocrTiff` with UTIF in JS/WASM\u2014not all compressions or huge files; prefer Node for production TIFF."
479
+ ]),
480
+ slot("pages", true, [
481
+ "Multipage TIFF may populate `pages` and `textByPage` when OCR succeeds; other formats may expose `pages` when normalization reports it."
482
+ ])
369
483
  ];
370
484
  break;
371
485
  case "text":
@@ -405,6 +519,9 @@ function lim(...items) {
405
519
  }
406
520
  var DOCX_MAMMOTH_PLUS_OPTIONAL_BROWSER = "Mammoth (`analyzeDocx`) extracts text and HTML from OOXML in-browser; optional parallel OOXML/ZIP extractors run when options.docx.include is set.";
407
521
  var DOCX_ZIP_NOTE_BROWSER = "Embedded files under word/media are available via @dragon708/docmind-docx when options.docx.include requests embeddedImages (or call extractImagesFromDocx on the same bytes).";
522
+ var BROWSER_TIFF_RASTER_NOTE = "TIFF (if detected): multipage OCR uses `ocrTiff` with UTIF in-browser\u2014best-effort only; failures return empty text + warnings (no throw). Prefer `@dragon708/docmind-node` for production TIFF.";
523
+ var BROWSER_IMAGE_PIPELINE = "Browser raster OCR: sniff format \u2192 `normalizeImageForOcr` (canvas/`createImageBitmap` for BMP/GIF; not HEIC) \u2192 optional `preprocessImageForOcr` when `options.ocr.preprocess` applies \u2192 Tesseract via `ocrImageDetailed`, or `ocrTiff` for TIFF. HEIC/HEIF: no decode\u2014expect empty text and explicit warnings. GIF: first frame only.";
524
+ var BROWSER_HEIC_NOTE = "HEIC/HEIF is never decoded in-browser; there is no `sharp` dependency. Convert server-side, then OCR PNG/JPEG bytes.";
408
525
  function finalizeBrowserDocxExplainReport(report) {
409
526
  if (report.kind !== "docx") return report;
410
527
  const limitations = report.limitations.includes(DOCX_ZIP_NOTE_BROWSER) ? report.limitations : [...report.limitations, DOCX_ZIP_NOTE_BROWSER];
@@ -415,12 +532,12 @@ function finalizeBrowserDocxExplainReport(report) {
415
532
  limitations
416
533
  };
417
534
  }
418
- function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
535
+ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocrSlice) {
419
536
  const runtime = { id: "browser" };
420
537
  const imageOcrActive = ocrMode !== "off";
421
538
  let primaryAnalyzer = kind === "pdf" ? "pdf" : kind === "docx" ? "docx" : kind === "image" ? "image" : kind === "text" ? "text" : "none";
422
539
  let nativeExtraction;
423
- let ocr2;
540
+ let ocr;
424
541
  let limitations = [];
425
542
  const ocrOffNote = ocrMode === "off" ? 'Image OCR is skipped when ocr.mode is "off".' : "";
426
543
  if (kind === "pdf") {
@@ -429,7 +546,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
429
546
  willAttempt: false,
430
547
  description: "PDF is not processed in the browser runtime; use @dragon708/docmind-node."
431
548
  };
432
- ocr2 = {
549
+ ocr = {
433
550
  mayUse: false,
434
551
  description: "PDF OCR is not available in the browser."
435
552
  };
@@ -440,7 +557,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
440
557
  intent,
441
558
  primaryAnalyzer: "pdf",
442
559
  nativeExtraction,
443
- ocr: ocr2,
560
+ ocr,
444
561
  limitations,
445
562
  plan,
446
563
  warnings: [BROWSER_PDF_UNSUPPORTED_WARNING]
@@ -451,7 +568,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
451
568
  "Could not classify the file from name, MIME, or bytes; analysis will return not_implemented until hints improve."
452
569
  );
453
570
  nativeExtraction = { willAttempt: false, description: "No analyzer selected without a known file kind." };
454
- ocr2 = { mayUse: false, description: "OCR is not used for unknown kinds." };
571
+ ocr = { mayUse: false, description: "OCR is not used for unknown kinds." };
455
572
  return finalizeBrowserDocxExplainReport({
456
573
  kind,
457
574
  detectedKind: kind,
@@ -459,7 +576,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
459
576
  intent,
460
577
  primaryAnalyzer: "none",
461
578
  nativeExtraction,
462
- ocr: ocr2,
579
+ ocr,
463
580
  limitations,
464
581
  plan
465
582
  });
@@ -472,23 +589,23 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
472
589
  willAttempt: true,
473
590
  description: docxIncludeRequested(docxInclude) ? "Mammoth plus parallel OOXML extractors (per options.docx.include)." + (intent === "extractText" ? " HTML cleared in extractText." : "") : DOCX_MAMMOTH_PLUS_OPTIONAL_BROWSER + (intent === "extractText" ? " HTML omitted in extractText." : "")
474
591
  };
475
- ocr2 = { mayUse: false, description: "DOCX does not use OCR in DocMind." };
592
+ ocr = { mayUse: false, description: "DOCX does not use OCR in DocMind." };
476
593
  } else if (kind === "image") {
477
594
  nativeExtraction = {
478
595
  willAttempt: false,
479
- description: "Raster images have no native text layer in this pipeline."
596
+ description: BROWSER_IMAGE_PIPELINE
480
597
  };
481
- ocr2 = {
598
+ ocr = {
482
599
  mayUse: imageOcrActive,
483
- description: imageOcrActive ? "Tesseract.js may run to recover text (subject to format support)." : "OCR skipped while ocr.mode is off."
600
+ description: imageOcrActive ? "`ocrImageDetailed` (normalize + optional preprocess) for single-frame paths; TIFF \u2192 `ocrTiff` (UTIF, partial). HEIC unsupported in-browser." : "OCR skipped while ocr.mode is off."
484
601
  };
485
- limitations = lim(ocrOffNote);
602
+ limitations = lim(ocrOffNote, BROWSER_TIFF_RASTER_NOTE, BROWSER_HEIC_NOTE);
486
603
  } else {
487
604
  nativeExtraction = {
488
605
  willAttempt: true,
489
606
  description: "Plain text is decoded as UTF-8 (BOM stripped, replacement on invalid bytes)."
490
607
  };
491
- ocr2 = { mayUse: false, description: "OCR does not apply to text files." };
608
+ ocr = { mayUse: false, description: "OCR does not apply to text files." };
492
609
  }
493
610
  break;
494
611
  case "extractMetadata":
@@ -497,16 +614,16 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
497
614
  willAttempt: false,
498
615
  description: "No heavy extractor; extractMetadata returns a stub with guidance."
499
616
  };
500
- ocr2 = { mayUse: false, description: "OCR is not invoked for this metadata path." };
617
+ ocr = { mayUse: false, description: "OCR is not invoked for this metadata path." };
501
618
  limitations = lim(
502
- kind === "docx" ? "Structured DOCX metadata is not exposed separately in the browser; use analyzeFile / extractText / convertToHtml with options.docx.include for OOXML fields." : "Raster images have no document metadata bundle in this API."
619
+ kind === "docx" ? "Structured DOCX metadata is not exposed separately in the browser; use analyzeFile / extractText / convertToHtml with options.docx.include for OOXML fields." : "Raster images have no document metadata bundle. TIFF/HEIC caveats: see getCapabilities (`tiff` partial, `heic` unsupported) and runOcr warnings."
503
620
  );
504
621
  } else {
505
622
  nativeExtraction = {
506
623
  willAttempt: true,
507
624
  description: "Plain text is decoded; metadata is limited to decoded content."
508
625
  };
509
- ocr2 = { mayUse: false, description: "OCR does not apply." };
626
+ ocr = { mayUse: false, description: "OCR does not apply." };
510
627
  limitations = lim("Plain text has no structured document metadata.");
511
628
  }
512
629
  break;
@@ -516,21 +633,21 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
516
633
  willAttempt: true,
517
634
  description: docxIncludeRequested(docxInclude) ? "Mammoth HTML via analyzeFile plus optional OOXML extractors." : "Mammoth HTML via analyzeFile; optional OOXML v2 when options.docx.include is set."
518
635
  };
519
- ocr2 = { mayUse: false, description: "DOCX path does not use OCR." };
636
+ ocr = { mayUse: false, description: "DOCX path does not use OCR." };
520
637
  } else if (kind === "text") {
521
638
  nativeExtraction = {
522
639
  willAttempt: true,
523
640
  description: "UTF-8 decode then wrap in a <pre> element."
524
641
  };
525
- ocr2 = { mayUse: false, description: "OCR does not apply." };
642
+ ocr = { mayUse: false, description: "OCR does not apply." };
526
643
  } else {
527
644
  nativeExtraction = {
528
645
  willAttempt: false,
529
646
  description: "No rich HTML path for this kind in the browser."
530
647
  };
531
- ocr2 = { mayUse: false, description: "OCR does not produce layout HTML here." };
648
+ ocr = { mayUse: false, description: "OCR does not produce layout HTML here." };
532
649
  limitations = lim(
533
- kind === "image" ? "Raster images have no HTML representation; use extractText or runOcr." : ""
650
+ kind === "image" ? "Raster images have no HTML representation; use extractText or runOcr. Expect HEIC to yield warnings only; TIFF is best-effort." : ""
534
651
  );
535
652
  }
536
653
  break;
@@ -538,31 +655,37 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
538
655
  if (kind === "image") {
539
656
  nativeExtraction = {
540
657
  willAttempt: false,
541
- description: "No native text layer; recognition is OCR-only."
658
+ description: BROWSER_IMAGE_PIPELINE
542
659
  };
543
- ocr2 = {
660
+ ocr = {
544
661
  mayUse: imageOcrActive,
545
- description: imageOcrActive ? "Tesseract.js runs for raster text." : "OCR skipped while ocr.mode is off."
662
+ description: imageOcrActive ? "Same as analyzeFile: `normalizeImageForOcr` inside `ocrImageDetailed`, optional canvas preprocess, or `ocrTiff` for TIFF. Errors surface as warnings, not uncaught exceptions (except abort)." : "OCR skipped while ocr.mode is off."
546
663
  };
547
- limitations = lim(ocrOffNote);
664
+ limitations = lim(ocrOffNote, BROWSER_TIFF_RASTER_NOTE, BROWSER_HEIC_NOTE);
548
665
  } else if (kind === "docx") {
549
666
  nativeExtraction = {
550
667
  willAttempt: true,
551
668
  description: docxIncludeRequested(docxInclude) ? "Mammoth text/HTML plus optional OOXML extractors; not OCR." : "Mammoth text/HTML; optional OOXML v2 via options.docx.include; not OCR."
552
669
  };
553
- ocr2 = { mayUse: false, description: "DOCX is not OCR'd." };
670
+ ocr = { mayUse: false, description: "DOCX is not OCR'd." };
554
671
  limitations = lim("Returned content is structured extract, not OCR output.");
555
672
  } else {
556
673
  nativeExtraction = {
557
674
  willAttempt: true,
558
675
  description: "Plain text is UTF-8 decoded only."
559
676
  };
560
- ocr2 = { mayUse: false, description: "OCR does not apply to text files." };
677
+ ocr = { mayUse: false, description: "OCR does not apply to text files." };
561
678
  }
562
679
  break;
563
680
  default:
564
681
  nativeExtraction = { willAttempt: false, description: "Intent not specialized in this runtime." };
565
- ocr2 = { mayUse: false, description: "See plan steps." };
682
+ ocr = { mayUse: false, description: "See plan steps." };
683
+ }
684
+ if (kind === "image" && preprocessHasEffect(ocrSlice?.preprocess)) {
685
+ limitations = [
686
+ ...limitations,
687
+ "options.ocr.preprocess applies to the `ocrImageDetailed` path only; multipage TIFF (`ocrTiff`) does not run preprocess per frame."
688
+ ];
566
689
  }
567
690
  return finalizeBrowserDocxExplainReport({
568
691
  kind,
@@ -571,17 +694,38 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
571
694
  intent,
572
695
  primaryAnalyzer,
573
696
  nativeExtraction,
574
- ocr: ocr2,
697
+ ocr,
575
698
  limitations,
576
699
  plan
577
700
  });
578
701
  }
579
702
 
580
703
  // src/introspection.ts
581
- function resolveOcrMode2(ocr2) {
582
- return ocr2?.mode ?? "auto";
704
+ function resolveOcrMode2(ocr) {
705
+ return ocr?.mode ?? "auto";
706
+ }
707
+ function imageBrowserPlanSteps(ocrMode, ocr) {
708
+ if (ocrMode === "off") {
709
+ return [
710
+ { id: "detect_kind", status: "done" },
711
+ { id: "image_format_detect", status: "skipped" },
712
+ { id: "normalize_image_for_ocr", status: "skipped" },
713
+ { id: "preprocess_image_for_ocr", status: "skipped" },
714
+ { id: "tesseract_ocr", status: "skipped" }
715
+ ];
716
+ }
717
+ return [
718
+ { id: "detect_kind", status: "done" },
719
+ { id: "image_format_detect", status: "planned" },
720
+ { id: "normalize_image_for_ocr", status: "planned" },
721
+ {
722
+ id: "preprocess_image_for_ocr",
723
+ status: preprocessHasEffect(ocr?.preprocess) ? "planned" : "skipped"
724
+ },
725
+ { id: "tesseract_ocr", status: "planned" }
726
+ ];
583
727
  }
584
- function planForAnalyzeFile(kind, ocrMode, docxInclude) {
728
+ function planForAnalyzeFile(kind, ocrMode, docxInclude, ocr) {
585
729
  switch (kind) {
586
730
  case "pdf":
587
731
  return {
@@ -605,13 +749,7 @@ function planForAnalyzeFile(kind, ocrMode, docxInclude) {
605
749
  case "image":
606
750
  return {
607
751
  intent: "analyzeFile",
608
- steps: [
609
- { id: "detect_kind", status: "done" },
610
- {
611
- id: "image_ocr",
612
- status: ocrMode === "off" ? "skipped" : "planned"
613
- }
614
- ]
752
+ steps: imageBrowserPlanSteps(ocrMode, ocr)
615
753
  };
616
754
  case "text":
617
755
  return {
@@ -628,11 +766,11 @@ function planForAnalyzeFile(kind, ocrMode, docxInclude) {
628
766
  };
629
767
  }
630
768
  }
631
- function planForIntent(intentOpt, kind, ocrMode, docxInclude) {
769
+ function planForIntent(intentOpt, kind, ocrMode, docxInclude, ocr) {
632
770
  const intent = intentOpt ?? "analyzeFile";
633
- if (intent === "analyzeFile") return planForAnalyzeFile(kind, ocrMode, docxInclude);
771
+ if (intent === "analyzeFile") return planForAnalyzeFile(kind, ocrMode, docxInclude, ocr);
634
772
  if (intent === "extractText") {
635
- const base = planForAnalyzeFile(kind, ocrMode, docxInclude);
773
+ const base = planForAnalyzeFile(kind, ocrMode, docxInclude, ocr);
636
774
  return { ...base, intent: "extractText" };
637
775
  }
638
776
  if (intent === "extractMetadata") {
@@ -687,10 +825,7 @@ function planForIntent(intentOpt, kind, ocrMode, docxInclude) {
687
825
  if (kind === "image") {
688
826
  return {
689
827
  intent: "runOcr",
690
- steps: [
691
- { id: "detect_kind", status: "done" },
692
- { id: "tesseract_ocr", status: ocrMode === "off" ? "skipped" : "planned" }
693
- ]
828
+ steps: imageBrowserPlanSteps(ocrMode, ocr)
694
829
  };
695
830
  }
696
831
  if (kind === "docx") {
@@ -712,7 +847,7 @@ function planForIntent(intentOpt, kind, ocrMode, docxInclude) {
712
847
  ]
713
848
  };
714
849
  }
715
- return planForAnalyzeFile(kind, ocrMode, docxInclude);
850
+ return planForAnalyzeFile(kind, ocrMode, docxInclude, ocr);
716
851
  }
717
852
  async function getCapabilities(input, options) {
718
853
  throwIfAborted(options?.signal);
@@ -727,8 +862,9 @@ async function explainAnalysisPlan(input, options) {
727
862
  const intent = options?.intent ?? "analyzeFile";
728
863
  const ocrMode = resolveOcrMode2(options?.ocr);
729
864
  const docxInc = options?.docx?.include;
730
- const plan = planForIntent(intent, kind, ocrMode, docxInc);
731
- return buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInc);
865
+ const ocrSlice = options?.ocr;
866
+ const plan = planForIntent(intent, kind, ocrMode, docxInc, ocrSlice);
867
+ return buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInc, ocrSlice);
732
868
  }
733
869
 
734
870
  export { BROWSER_PDF_UNSUPPORTED_WARNING, DOCX_EMBEDDED_IMAGE_CAPABILITIES_BROWSER, DOCX_STRUCTURE_CAPABILITIES_BROWSER, analyzeFile, convertToHtml, docxIncludeRequested, explainAnalysisPlan, extractMetadata, extractText, getCapabilities, runOcr };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dragon708/docmind-browser",
3
- "version": "1.4.0",
3
+ "version": "1.5.0",
4
4
  "description": "Official DocMind browser facade: analyzeFile and intent APIs (DOCX, image OCR, text). PDF and fs paths use @dragon708/docmind-node.",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -34,7 +34,7 @@
34
34
  "license": "MIT",
35
35
  "dependencies": {
36
36
  "@dragon708/docmind-docx": "^1.7.0",
37
- "@dragon708/docmind-ocr": "^1.0.0",
37
+ "@dragon708/docmind-ocr": "^1.1.0",
38
38
  "@dragon708/docmind-shared": "^1.1.0"
39
39
  },
40
40
  "devDependencies": {