npm - @dragon708/docmind-browser - Versions diffs - 1.4.0 → 1.5.0 - Mend

@dragon708/docmind-browser 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { DocMindAnalyzeOptions, AnalysisResult, FileKind, RuntimeDescriptor, DocMindPublicIntent, AnalysisAnalyzer, ProcessingPlanDescriptor, ExplainAnalysisPlanOptions, GetCapabilitiesOptions } from '@dragon708/docmind-shared';
 export { AnalysisAnalyzer, AnalysisResult, CapabilityDescriptor, DetectFileKindInput, DocMindPublicIntent, DocxAnalysisCoreResult, ExplainAnalysisPlanOptions, ExplainAnalysisPlanResult, FileKind, FileKindMetadata, GenericAnalysisResult, GetCapabilitiesOptions, GetCapabilitiesResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult, detectFileKind } from '@dragon708/docmind-shared';
-import { OcrOptions } from '@dragon708/docmind-ocr';
+import { OcrOptions, OcrTiffOptions, PreprocessImageOptions } from '@dragon708/docmind-ocr';
 import { AnalyzeDocxIncludeFlags, DocxToHtmlOptions } from '@dragon708/docmind-docx';
 export { AnalyzeDocxIncludeFlags } from '@dragon708/docmind-docx';
@@ -16,11 +16,15 @@ interface BrowserAnalyzeDocxOptionsSlice {
  * - `off`: do not invoke Tesseract; text stays empty with an explanatory warning.
  * - `auto` (default): run OCR when the input is classified as an image.
  * - `force`: same as `auto` in the browser runtime (no PDF-style text layer to compare); reserved for parity with Node.
+ *
+ * Multipage TIFF (when sniffed): `maxPages` and `pageSeparator` match `OcrTiffOptions` in `@dragon708/docmind-ocr` (best-effort UTIF in-browser).
+ * Optional {@link PreprocessImageOptions} runs in-browser (canvas) on the normalized raster before Tesseract when using `ocrImageDetailed`.
  */
 type BrowserOcrMode = "off" | "auto" | "force";
-/** Browser OCR options: Tesseract knobs from `@dragon708/docmind-ocr` plus optional {@link BrowserOcrMode}. */
-interface BrowserOcrOptions extends OcrOptions {
+/** Browser OCR options: Tesseract knobs from `@dragon708/docmind-ocr` plus optional {@link BrowserOcrMode}, TIFF caps, and canvas preprocess. */
+interface BrowserOcrOptions extends OcrOptions, Pick<OcrTiffOptions, "maxPages" | "pageSeparator"> {
     readonly mode?: BrowserOcrMode;
+    readonly preprocess?: PreprocessImageOptions;
 }
 /**
  * Options for public browser methods (`analyzeFile`, intent APIs).
@@ -67,12 +71,14 @@ declare function extractMetadata(input: BrowserAnalyzeInput, options?: BrowserAn
 declare function convertToHtml(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
 /**
  * OCR-focused intent. Honors {@link BrowserAnalyzeOptions.ocr} **mode** (`off` | `auto` | `force`) for images.
+ * Raster path uses `normalizeImageForOcr` via `ocrImageDetailed` (or `ocrTiff` for TIFF); no Node-only libraries.
+ * HEIC/HEIF and hard failures yield empty text + warnings instead of throwing (abort still propagates).
  * DOCX returns structured extract with a notice (no OCR). Text decodes as UTF-8 (no OCR).
  */
 declare function runOcr(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
 /** High-level features the user can ask DocMind for (per input kind and runtime). */
-type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages";
+type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages" | "image-normalization" | "gif-first-frame" | "bmp" | "heic" | "tiff";
 declare function docxIncludeRequested(flags?: AnalyzeDocxIncludeFlags): boolean;
 /** DOCX `word/media` en runtime browser (JSZip; sin pipeline Node). */
 interface DocxEmbeddedImageCapabilities {
@@ -149,7 +155,7 @@ type BrowserExplainAnalysisPlanOptions = ExplainAnalysisPlanOptions & Pick<Brows
 /**
  * Epic 1 — **Capabilities:** detects kind from the same hints as `analyzeFile`, then lists which of
- * `text` | `metadata` | `html` | `ocr` | `pages` apply in the browser (PDF always unsupported).
+ * `text` | `metadata` | `html` | `ocr` | `pages` and image-specific ids (`image-normalization`, `bmp`, `gif-first-frame`, `heic`, `tiff`) apply in the browser (PDF always unsupported).
  * No Mammoth/Tesseract/PDF parsing. For DOCX, {@link GetCapabilitiesReport.docxStructure} / `docxEmbeddedImages` describe v2 opt-in features.
  */
 declare function getCapabilities(input: BrowserAnalyzeInput, options?: GetCapabilitiesOptions): Promise<GetCapabilitiesReport>;

package/dist/index.js CHANGED Viewed

@@ -1,7 +1,7 @@
-import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, InvalidInputError } from '@dragon708/docmind-shared';
+import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, InvalidInputError, getMimeType } from '@dragon708/docmind-shared';
 export { detectFileKind } from '@dragon708/docmind-shared';
 import { analyzeDocx } from '@dragon708/docmind-docx';
-import { ocr } from '@dragon708/docmind-ocr';
+import { preprocessHasEffect, resolveImageFormat, normalizeImageForOcr, ocrTiff, ocrImageDetailed } from '@dragon708/docmind-ocr';
 // src/analyzeFile.ts
 function assertBrowserInput(input) {
@@ -78,6 +78,93 @@ async function analyzeDocxForBrowser(input, options) {
   const r = docxOpts !== void 0 ? await analyzeDocx(data, docxOpts) : await analyzeDocx(data);
   return docxPackageResultToAnalysisResult(r);
 }
+var BROWSER_TIFF_BEST_EFFORT_WARNING = "Browser TIFF support is best-effort: decoding uses UTIF in JavaScript/WebAssembly\u2014some compressions, color modes, very large or multipage files may fail, hang, or exhaust memory. For heavy TIFF workloads use @dragon708/docmind-node (optional sharp).";
+function meanPageConfidence(pages) {
+  if (pages.length === 0) return 0;
+  return pages.reduce((s, p) => s + p.confidence, 0) / pages.length;
+}
+function rethrowIfAbort(e) {
+  if (e instanceof Error && e.name === "AbortError") throw e;
+}
+function ocrFailureWarnings(prefix, e) {
+  const msg = e instanceof Error ? e.message : String(e);
+  return [`${prefix} ${msg}`];
+}
+async function runRasterOcrForBrowser(data, input, options) {
+  const signal = options?.ocr?.signal ?? options?.signal;
+  const langs = options?.ocr?.langs;
+  const mimeHint = getMimeType(input);
+  const format = resolveImageFormat(data, mimeHint);
+  if (format === "heic" || format === "heif") {
+    const norm = await normalizeImageForOcr(data, { signal, mimeHint });
+    return {
+      text: "",
+      confidence: 0,
+      ocrUsed: true,
+      warnings: [
+        "HEIC/HEIF cannot be OCR'd in the browser; convert to PNG or JPEG server-side (e.g. @dragon708/docmind-node with sharp), then retry.",
+        ...norm.warnings
+      ],
+      inputFormat: norm.format,
+      normalizedFormat: norm.normalizedFormat
+    };
+  }
+  if (format === "tiff") {
+    try {
+      const tiff = await ocrTiff(data, {
+        langs,
+        signal,
+        maxPages: options?.ocr?.maxPages,
+        pageSeparator: options?.ocr?.pageSeparator
+      });
+      return {
+        text: tiff.text.trim(),
+        confidence: meanPageConfidence(tiff.textByPage),
+        ocrUsed: true,
+        warnings: [BROWSER_TIFF_BEST_EFFORT_WARNING, ...tiff.warnings],
+        pages: tiff.pagesProcessed,
+        textByPage: tiff.textByPage
+      };
+    } catch (e) {
+      rethrowIfAbort(e);
+      return {
+        text: "",
+        confidence: 0,
+        ocrUsed: true,
+        warnings: [
+          BROWSER_TIFF_BEST_EFFORT_WARNING,
+          ...ocrFailureWarnings("TIFF OCR failed in the browser:", e)
+        ]
+      };
+    }
+  }
+  try {
+    const detailed = await ocrImageDetailed(data, {
+      langs,
+      signal,
+      preprocess: options?.ocr?.preprocess
+    });
+    return {
+      text: detailed.text.trim(),
+      confidence: detailed.confidence,
+      ocrUsed: true,
+      warnings: [...detailed.warnings],
+      pages: detailed.pages,
+      inputFormat: detailed.inputFormat,
+      normalizedFormat: detailed.normalizedFormat
+    };
+  } catch (e) {
+    rethrowIfAbort(e);
+    return {
+      text: "",
+      confidence: 0,
+      ocrUsed: true,
+      warnings: ocrFailureWarnings("OCR could not complete in the browser:", e)
+    };
+  }
+}
+// src/analyzers/image.ts
 var OCR_OFF_WARNING = 'OCR mode is "off"; no recognition was run. Use mode "auto" or "force" to extract text from images.';
 function resolveOcrMode(options) {
   return options?.ocr?.mode ?? "auto";
@@ -114,21 +201,26 @@ async function analyzeImageForBrowser(input, options) {
       warnings: [OCR_OFF_WARNING]
     };
   }
-  const ocrOpts = {
-    ...options?.ocr ?? {},
-    signal: options?.ocr?.signal ?? options?.signal
-  };
-  const r = await ocr(data, ocrOpts);
-  return {
+  const ocrPart = await runRasterOcrForBrowser(data, input, options);
+  const base = {
     fileKind: "image",
     analyzer: "image",
     status: "ok",
     kind: "image",
-    text: r.text,
-    confidence: r.confidence,
-    ocrUsed: r.ocrUsed,
-    warnings: []
+    text: ocrPart.text,
+    confidence: ocrPart.confidence,
+    ocrUsed: true,
+    warnings: ocrPart.warnings
   };
+  const extra = {};
+  if (ocrPart.pages !== void 0) extra.pages = ocrPart.pages;
+  if (ocrPart.textByPage !== void 0) extra.textByPage = ocrPart.textByPage;
+  if (ocrPart.inputFormat !== void 0) extra.inputFormat = ocrPart.inputFormat;
+  if (ocrPart.normalizedFormat !== void 0) extra.normalizedFormat = ocrPart.normalizedFormat;
+  if (Object.keys(extra).length > 0) {
+    return { ...base, ...extra };
+  }
+  return base;
 }
 // src/analyzeFile.ts
@@ -361,11 +453,33 @@ function buildBrowserCapabilityReport(kind) {
       break;
     case "image":
       capabilities = [
-        slot("text", true, ["Text is obtained via OCR when enabled."]),
+        slot("text", true, [
+          "Text via `@dragon708/docmind-ocr` when `ocr.mode` is not off: PNG, JPEG, WebP, BMP, GIF (first frame), TIFF (partial), after sniff/MIME."
+        ]),
         slot("metadata", false, [IMAGE_META]),
         slot("html", false, [IMAGE_HTML]),
-        slot("ocr", true, [OCR_OFF_NOTE]),
-        slot("pages", false)
+        slot("ocr", true, [
+          OCR_OFF_NOTE,
+          "Uses `ocrImageDetailed` (single-frame path) or multipage `ocrTiff` for TIFF; WASM Tesseract in-browser."
+        ]),
+        slot("image-normalization", true, [
+          "`normalizeImageForOcr` runs inside the OCR package (canvas/`createImageBitmap` in-browser for BMP, GIF, etc.; not HEIC)."
+        ]),
+        slot("bmp", true, [
+          "BMP is decoded via browser canvas/`createImageBitmap` into a PNG-oriented buffer before Tesseract."
+        ]),
+        slot("gif-first-frame", true, [
+          "Animated GIF: only the first decoded frame is normalized and OCR'd; see result warnings when multi-frame is detected."
+        ]),
+        slot("heic", false, [
+          "HEIC/HEIF is not decoded in the browser. `runOcr` / `analyzeFile` return empty text with explicit warnings; convert server-side (e.g. @dragon708/docmind-node)."
+        ]),
+        slot("tiff", true, [
+          "Partial / best-effort: multipage `ocrTiff` with UTIF in JS/WASM\u2014not all compressions or huge files; prefer Node for production TIFF."
+        ]),
+        slot("pages", true, [
+          "Multipage TIFF may populate `pages` and `textByPage` when OCR succeeds; other formats may expose `pages` when normalization reports it."
+        ])
       ];
       break;
     case "text":
@@ -405,6 +519,9 @@ function lim(...items) {
 }
 var DOCX_MAMMOTH_PLUS_OPTIONAL_BROWSER = "Mammoth (`analyzeDocx`) extracts text and HTML from OOXML in-browser; optional parallel OOXML/ZIP extractors run when options.docx.include is set.";
 var DOCX_ZIP_NOTE_BROWSER = "Embedded files under word/media are available via @dragon708/docmind-docx when options.docx.include requests embeddedImages (or call extractImagesFromDocx on the same bytes).";
+var BROWSER_TIFF_RASTER_NOTE = "TIFF (if detected): multipage OCR uses `ocrTiff` with UTIF in-browser\u2014best-effort only; failures return empty text + warnings (no throw). Prefer `@dragon708/docmind-node` for production TIFF.";
+var BROWSER_IMAGE_PIPELINE = "Browser raster OCR: sniff format \u2192 `normalizeImageForOcr` (canvas/`createImageBitmap` for BMP/GIF; not HEIC) \u2192 optional `preprocessImageForOcr` when `options.ocr.preprocess` applies \u2192 Tesseract via `ocrImageDetailed`, or `ocrTiff` for TIFF. HEIC/HEIF: no decode\u2014expect empty text and explicit warnings. GIF: first frame only.";
+var BROWSER_HEIC_NOTE = "HEIC/HEIF is never decoded in-browser; there is no `sharp` dependency. Convert server-side, then OCR PNG/JPEG bytes.";
 function finalizeBrowserDocxExplainReport(report) {
   if (report.kind !== "docx") return report;
   const limitations = report.limitations.includes(DOCX_ZIP_NOTE_BROWSER) ? report.limitations : [...report.limitations, DOCX_ZIP_NOTE_BROWSER];
@@ -415,12 +532,12 @@ function finalizeBrowserDocxExplainReport(report) {
     limitations
   };
 }
-function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
+function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocrSlice) {
   const runtime = { id: "browser" };
   const imageOcrActive = ocrMode !== "off";
   let primaryAnalyzer = kind === "pdf" ? "pdf" : kind === "docx" ? "docx" : kind === "image" ? "image" : kind === "text" ? "text" : "none";
   let nativeExtraction;
-  let ocr2;
+  let ocr;
   let limitations = [];
   const ocrOffNote = ocrMode === "off" ? 'Image OCR is skipped when ocr.mode is "off".' : "";
   if (kind === "pdf") {
@@ -429,7 +546,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
       willAttempt: false,
       description: "PDF is not processed in the browser runtime; use @dragon708/docmind-node."
     };
-    ocr2 = {
+    ocr = {
       mayUse: false,
       description: "PDF OCR is not available in the browser."
     };
@@ -440,7 +557,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
       intent,
       primaryAnalyzer: "pdf",
       nativeExtraction,
-      ocr: ocr2,
+      ocr,
       limitations,
       plan,
       warnings: [BROWSER_PDF_UNSUPPORTED_WARNING]
@@ -451,7 +568,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
       "Could not classify the file from name, MIME, or bytes; analysis will return not_implemented until hints improve."
     );
     nativeExtraction = { willAttempt: false, description: "No analyzer selected without a known file kind." };
-    ocr2 = { mayUse: false, description: "OCR is not used for unknown kinds." };
+    ocr = { mayUse: false, description: "OCR is not used for unknown kinds." };
     return finalizeBrowserDocxExplainReport({
       kind,
       detectedKind: kind,
@@ -459,7 +576,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
       intent,
       primaryAnalyzer: "none",
       nativeExtraction,
-      ocr: ocr2,
+      ocr,
       limitations,
       plan
     });
@@ -472,23 +589,23 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
           willAttempt: true,
           description: docxIncludeRequested(docxInclude) ? "Mammoth plus parallel OOXML extractors (per options.docx.include)." + (intent === "extractText" ? " HTML cleared in extractText." : "") : DOCX_MAMMOTH_PLUS_OPTIONAL_BROWSER + (intent === "extractText" ? " HTML omitted in extractText." : "")
         };
-        ocr2 = { mayUse: false, description: "DOCX does not use OCR in DocMind." };
+        ocr = { mayUse: false, description: "DOCX does not use OCR in DocMind." };
       } else if (kind === "image") {
         nativeExtraction = {
           willAttempt: false,
-          description: "Raster images have no native text layer in this pipeline."
+          description: BROWSER_IMAGE_PIPELINE
         };
-        ocr2 = {
+        ocr = {
           mayUse: imageOcrActive,
-          description: imageOcrActive ? "Tesseract.js may run to recover text (subject to format support)." : "OCR skipped while ocr.mode is off."
+          description: imageOcrActive ? "`ocrImageDetailed` (normalize + optional preprocess) for single-frame paths; TIFF \u2192 `ocrTiff` (UTIF, partial). HEIC unsupported in-browser." : "OCR skipped while ocr.mode is off."
         };
-        limitations = lim(ocrOffNote);
+        limitations = lim(ocrOffNote, BROWSER_TIFF_RASTER_NOTE, BROWSER_HEIC_NOTE);
       } else {
         nativeExtraction = {
           willAttempt: true,
           description: "Plain text is decoded as UTF-8 (BOM stripped, replacement on invalid bytes)."
         };
-        ocr2 = { mayUse: false, description: "OCR does not apply to text files." };
+        ocr = { mayUse: false, description: "OCR does not apply to text files." };
       }
       break;
     case "extractMetadata":
@@ -497,16 +614,16 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
           willAttempt: false,
           description: "No heavy extractor; extractMetadata returns a stub with guidance."
         };
-        ocr2 = { mayUse: false, description: "OCR is not invoked for this metadata path." };
+        ocr = { mayUse: false, description: "OCR is not invoked for this metadata path." };
         limitations = lim(
-          kind === "docx" ? "Structured DOCX metadata is not exposed separately in the browser; use analyzeFile / extractText / convertToHtml with options.docx.include for OOXML fields." : "Raster images have no document metadata bundle in this API."
+          kind === "docx" ? "Structured DOCX metadata is not exposed separately in the browser; use analyzeFile / extractText / convertToHtml with options.docx.include for OOXML fields." : "Raster images have no document metadata bundle. TIFF/HEIC caveats: see getCapabilities (`tiff` partial, `heic` unsupported) and runOcr warnings."
         );
       } else {
         nativeExtraction = {
           willAttempt: true,
           description: "Plain text is decoded; metadata is limited to decoded content."
         };
-        ocr2 = { mayUse: false, description: "OCR does not apply." };
+        ocr = { mayUse: false, description: "OCR does not apply." };
         limitations = lim("Plain text has no structured document metadata.");
       }
       break;
@@ -516,21 +633,21 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
           willAttempt: true,
           description: docxIncludeRequested(docxInclude) ? "Mammoth HTML via analyzeFile plus optional OOXML extractors." : "Mammoth HTML via analyzeFile; optional OOXML v2 when options.docx.include is set."
         };
-        ocr2 = { mayUse: false, description: "DOCX path does not use OCR." };
+        ocr = { mayUse: false, description: "DOCX path does not use OCR." };
       } else if (kind === "text") {
         nativeExtraction = {
           willAttempt: true,
           description: "UTF-8 decode then wrap in a <pre> element."
         };
-        ocr2 = { mayUse: false, description: "OCR does not apply." };
+        ocr = { mayUse: false, description: "OCR does not apply." };
       } else {
         nativeExtraction = {
           willAttempt: false,
           description: "No rich HTML path for this kind in the browser."
         };
-        ocr2 = { mayUse: false, description: "OCR does not produce layout HTML here." };
+        ocr = { mayUse: false, description: "OCR does not produce layout HTML here." };
         limitations = lim(
-          kind === "image" ? "Raster images have no HTML representation; use extractText or runOcr." : ""
+          kind === "image" ? "Raster images have no HTML representation; use extractText or runOcr. Expect HEIC to yield warnings only; TIFF is best-effort." : ""
         );
       }
       break;
@@ -538,31 +655,37 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
       if (kind === "image") {
         nativeExtraction = {
           willAttempt: false,
-          description: "No native text layer; recognition is OCR-only."
+          description: BROWSER_IMAGE_PIPELINE
         };
-        ocr2 = {
+        ocr = {
           mayUse: imageOcrActive,
-          description: imageOcrActive ? "Tesseract.js runs for raster text." : "OCR skipped while ocr.mode is off."
+          description: imageOcrActive ? "Same as analyzeFile: `normalizeImageForOcr` inside `ocrImageDetailed`, optional canvas preprocess, or `ocrTiff` for TIFF. Errors surface as warnings, not uncaught exceptions (except abort)." : "OCR skipped while ocr.mode is off."
         };
-        limitations = lim(ocrOffNote);
+        limitations = lim(ocrOffNote, BROWSER_TIFF_RASTER_NOTE, BROWSER_HEIC_NOTE);
       } else if (kind === "docx") {
         nativeExtraction = {
           willAttempt: true,
           description: docxIncludeRequested(docxInclude) ? "Mammoth text/HTML plus optional OOXML extractors; not OCR." : "Mammoth text/HTML; optional OOXML v2 via options.docx.include; not OCR."
         };
-        ocr2 = { mayUse: false, description: "DOCX is not OCR'd." };
+        ocr = { mayUse: false, description: "DOCX is not OCR'd." };
         limitations = lim("Returned content is structured extract, not OCR output.");
       } else {
         nativeExtraction = {
           willAttempt: true,
           description: "Plain text is UTF-8 decoded only."
         };
-        ocr2 = { mayUse: false, description: "OCR does not apply to text files." };
+        ocr = { mayUse: false, description: "OCR does not apply to text files." };
       }
       break;
     default:
       nativeExtraction = { willAttempt: false, description: "Intent not specialized in this runtime." };
-      ocr2 = { mayUse: false, description: "See plan steps." };
+      ocr = { mayUse: false, description: "See plan steps." };
+  }
+  if (kind === "image" && preprocessHasEffect(ocrSlice?.preprocess)) {
+    limitations = [
+      ...limitations,
+      "options.ocr.preprocess applies to the `ocrImageDetailed` path only; multipage TIFF (`ocrTiff`) does not run preprocess per frame."
+    ];
   }
   return finalizeBrowserDocxExplainReport({
     kind,
@@ -571,17 +694,38 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude) {
     intent,
     primaryAnalyzer,
     nativeExtraction,
-    ocr: ocr2,
+    ocr,
     limitations,
     plan
   });
 }
 // src/introspection.ts
-function resolveOcrMode2(ocr2) {
-  return ocr2?.mode ?? "auto";
+function resolveOcrMode2(ocr) {
+  return ocr?.mode ?? "auto";
+}
+function imageBrowserPlanSteps(ocrMode, ocr) {
+  if (ocrMode === "off") {
+    return [
+      { id: "detect_kind", status: "done" },
+      { id: "image_format_detect", status: "skipped" },
+      { id: "normalize_image_for_ocr", status: "skipped" },
+      { id: "preprocess_image_for_ocr", status: "skipped" },
+      { id: "tesseract_ocr", status: "skipped" }
+    ];
+  }
+  return [
+    { id: "detect_kind", status: "done" },
+    { id: "image_format_detect", status: "planned" },
+    { id: "normalize_image_for_ocr", status: "planned" },
+    {
+      id: "preprocess_image_for_ocr",
+      status: preprocessHasEffect(ocr?.preprocess) ? "planned" : "skipped"
+    },
+    { id: "tesseract_ocr", status: "planned" }
+  ];
 }
-function planForAnalyzeFile(kind, ocrMode, docxInclude) {
+function planForAnalyzeFile(kind, ocrMode, docxInclude, ocr) {
   switch (kind) {
     case "pdf":
       return {
@@ -605,13 +749,7 @@ function planForAnalyzeFile(kind, ocrMode, docxInclude) {
     case "image":
       return {
         intent: "analyzeFile",
-        steps: [
-          { id: "detect_kind", status: "done" },
-          {
-            id: "image_ocr",
-            status: ocrMode === "off" ? "skipped" : "planned"
-          }
-        ]
+        steps: imageBrowserPlanSteps(ocrMode, ocr)
       };
     case "text":
       return {
@@ -628,11 +766,11 @@ function planForAnalyzeFile(kind, ocrMode, docxInclude) {
       };
   }
 }
-function planForIntent(intentOpt, kind, ocrMode, docxInclude) {
+function planForIntent(intentOpt, kind, ocrMode, docxInclude, ocr) {
   const intent = intentOpt ?? "analyzeFile";
-  if (intent === "analyzeFile") return planForAnalyzeFile(kind, ocrMode, docxInclude);
+  if (intent === "analyzeFile") return planForAnalyzeFile(kind, ocrMode, docxInclude, ocr);
   if (intent === "extractText") {
-    const base = planForAnalyzeFile(kind, ocrMode, docxInclude);
+    const base = planForAnalyzeFile(kind, ocrMode, docxInclude, ocr);
     return { ...base, intent: "extractText" };
   }
   if (intent === "extractMetadata") {
@@ -687,10 +825,7 @@ function planForIntent(intentOpt, kind, ocrMode, docxInclude) {
     if (kind === "image") {
       return {
         intent: "runOcr",
-        steps: [
-          { id: "detect_kind", status: "done" },
-          { id: "tesseract_ocr", status: ocrMode === "off" ? "skipped" : "planned" }
-        ]
+        steps: imageBrowserPlanSteps(ocrMode, ocr)
       };
     }
     if (kind === "docx") {
@@ -712,7 +847,7 @@ function planForIntent(intentOpt, kind, ocrMode, docxInclude) {
       ]
     };
   }
-  return planForAnalyzeFile(kind, ocrMode, docxInclude);
+  return planForAnalyzeFile(kind, ocrMode, docxInclude, ocr);
 }
 async function getCapabilities(input, options) {
   throwIfAborted(options?.signal);
@@ -727,8 +862,9 @@ async function explainAnalysisPlan(input, options) {
   const intent = options?.intent ?? "analyzeFile";
   const ocrMode = resolveOcrMode2(options?.ocr);
   const docxInc = options?.docx?.include;
-  const plan = planForIntent(intent, kind, ocrMode, docxInc);
-  return buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInc);
+  const ocrSlice = options?.ocr;
+  const plan = planForIntent(intent, kind, ocrMode, docxInc, ocrSlice);
+  return buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInc, ocrSlice);
 }
 export { BROWSER_PDF_UNSUPPORTED_WARNING, DOCX_EMBEDDED_IMAGE_CAPABILITIES_BROWSER, DOCX_STRUCTURE_CAPABILITIES_BROWSER, analyzeFile, convertToHtml, docxIncludeRequested, explainAnalysisPlan, extractMetadata, extractText, getCapabilities, runOcr };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dragon708/docmind-browser",
-  "version": "1.4.0",
+  "version": "1.5.0",
   "description": "Official DocMind browser facade: analyzeFile and intent APIs (DOCX, image OCR, text). PDF and fs paths use @dragon708/docmind-node.",
   "type": "module",
   "sideEffects": false,
@@ -34,7 +34,7 @@
   "license": "MIT",
   "dependencies": {
     "@dragon708/docmind-docx": "^1.7.0",
-    "@dragon708/docmind-ocr": "^1.0.0",
+    "@dragon708/docmind-ocr": "^1.1.0",
     "@dragon708/docmind-shared": "^1.1.0"
   },
   "devDependencies": {