npm - @dragon708/docmind-node - Versions diffs - 1.7.0 → 1.9.0 - Mend

@dragon708/docmind-node 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { DocMindAnalyzeOptions, DetectFileKindInput, NamedInput, AnalysisResult, FileKind, RuntimeDescriptor, DocMindPublicIntent, AnalysisAnalyzer, ProcessingPlanDescriptor, ExplainAnalysisPlanOptions, GetCapabilitiesOptions } from '@dragon708/docmind-shared';
 export { AnalysisAnalyzer, AnalysisResult, CapabilityDescriptor, DetectFileKindInput, DocMindPublicIntent, DocxAnalysisCoreResult, ExplainAnalysisPlanOptions, ExplainAnalysisPlanResult, FileKind, FileKindMetadata, GenericAnalysisResult, GetCapabilitiesOptions, GetCapabilitiesResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult, detectFileKind } from '@dragon708/docmind-shared';
-import { OcrOptions } from '@dragon708/docmind-ocr';
+import { OcrOptions, OcrTiffOptions, PreprocessImageOptions } from '@dragon708/docmind-ocr';
 import { PdfAnalyzeOptions } from '@dragon708/docmind-pdf';
 import { AnalyzeDocxIncludeFlags, DocxToHtmlOptions } from '@dragon708/docmind-docx';
 export { AnalyzeDocxIncludeFlags, DocxEmbeddedImage, DocxEmbeddedImageConversionOptions, DocxEmbeddedImageConversionResult, DocxEmbeddedImageWebSlice, DocxImageExtractionMode, ExtractImagesFromDocxOptions, ExtractImagesFromDocxResult, convertDocxEmbeddedImageToWeb, convertDocxImagesForWeb, docxImageIsBrowserRenderable, docxImageRequiresConversion, docxImageSuggestedTargetFormat, docxImageToDataUri, extractImagesFromDocx, isNodeJsRuntime } from '@dragon708/docmind-docx';
@@ -10,7 +10,7 @@ export { AnalyzeDocxIncludeFlags, DocxEmbeddedImage, DocxEmbeddedImageConversion
  *
  * - **`pdf`**: forwarded to `@dragon708/docmind-pdf`. `analyzeFile` defaults `pdf.ocr` to `"auto"` when omitted.
  *   {@link extractText} / {@link convertToHtml} merge a default of `ocr: "off"` unless you set `pdf.ocr` explicitly.
- * - **`ocr`**: forwarded to `@dragon708/docmind-ocr` for raster images; language string also feeds PDF OCR when `pdf.ocrLangs` is unset.
+ * - **`ocr`**: forwarded to `@dragon708/docmind-ocr` for raster images (`ocrImageDetailed` + `normalizeImageForOcr`, or `ocrTiff` for TIFF); optional `preprocess` maps to {@link PreprocessImageOptions}. `maxPages` / `pageSeparator` apply to multipage TIFF. Language also feeds PDF OCR when `pdf.ocrLangs` is unset.
  * - **`pdfNativeTextSource`**: when `pdf.ocr` resolves to `"off"`, chooses how native text is obtained (see {@link extractText} default).
  */
 /**
@@ -26,7 +26,9 @@ interface NodeAnalyzeDocxOptionsSlice {
 }
 interface NodeAnalyzeOptions extends DocMindAnalyzeOptions {
     readonly pdf?: PdfAnalyzeOptions;
-    readonly ocr?: OcrOptions;
+    readonly ocr?: OcrOptions & Pick<OcrTiffOptions, "maxPages" | "pageSeparator"> & {
+        readonly preprocess?: PreprocessImageOptions;
+    };
     /** Solo DOCX: ver {@link NodeAnalyzeDocxOptionsSlice}. */
     readonly docx?: NodeAnalyzeDocxOptionsSlice;
     /**
@@ -77,12 +79,14 @@ declare function extractMetadata(input: NodeAnalyzeInput, options?: NodeAnalyzeO
 declare function convertToHtml(input: NodeAnalyzeInput, options?: NodeAnalyzeOptions): Promise<AnalysisResult>;
 /**
  * OCR intent: PDF always runs {@link analyzePdf} with `ocr: "force"` (merged with `options.pdf`).
- * Raster images run Tesseract via `options.ocr`. DOCX returns structured extract with a notice.
+ * Raster images: `ocrImageDetailed` (normalize → optional preprocess → Tesseract) for supported single-frame inputs;
+ * TIFF via multipage `ocrTiff` (`options.ocr.maxPages`, `pageSeparator`).
+ * DOCX returns structured extract with a notice.
  */
 declare function runOcr(input: NodeAnalyzeInput, options?: NodeAnalyzeOptions): Promise<AnalysisResult>;
 /** High-level features the user can ask DocMind for (per input kind and runtime). */
-type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages";
+type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages" | "ocr-multipage" | "image-normalization" | "tiff" | "heic-node-only" | "bmp" | "gif-first-frame";
 /**
  * DOCX-only: what the stack can do with OOXML embedded bitmaps/vector payloads under `word/media`.
  * Present on {@link GetCapabilitiesReport} when `kind === "docx"`.
@@ -174,7 +178,7 @@ type NodeExplainAnalysisPlanOptions = ExplainAnalysisPlanOptions & Pick<NodeAnal
 /**
  * Epic 1 — **Capabilities:** after resolving {@link NodeAnalyzeInput}, lists which of
- * `text` | `metadata` | `html` | `ocr` | `pages` apply for that kind in Node (for PDF, `text` / `metadata` /
+ * `text` | `metadata` | `html` | `ocr` | `pages` (and image-specific ids such as `ocr-multipage`, `image-normalization`, `tiff`, `heic-node-only`, `bmp`, `gif-first-frame`) apply for that kind in Node (for PDF, `text` / `metadata` /
  * `pages` / `ocr` describe the v2 pdf-parse + PDF.js + OCR stack; see {@link buildNodeCapabilityReport}).
  * For **DOCX**, `docxEmbeddedImages` and `docxStructure` describe ZIP media and optional OOXML v2 extractors (`options.docx.include`).
  * Does not run Mammoth/Tesseract/PDF bodies beyond path resolution.

package/dist/index.js CHANGED Viewed

@@ -1,8 +1,8 @@
-import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, isNamedInput, isBinaryInput, isBlob, isFile } from '@dragon708/docmind-shared';
+import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, isNamedInput, isBinaryInput, isBlob, isFile, getMimeType } from '@dragon708/docmind-shared';
 export { detectFileKind } from '@dragon708/docmind-shared';
 import { analyzeDocx } from '@dragon708/docmind-docx';
 export { convertDocxEmbeddedImageToWeb, convertDocxImagesForWeb, docxImageIsBrowserRenderable, docxImageRequiresConversion, docxImageSuggestedTargetFormat, docxImageToDataUri, extractImagesFromDocx, isNodeJsRuntime } from '@dragon708/docmind-docx';
-import { ocr } from '@dragon708/docmind-ocr';
+import { preprocessHasEffect, resolveImageFormat, ocrTiff, ocrImageDetailed } from '@dragon708/docmind-ocr';
 import { extractPdfMetadata, extractTextFromPdf, analyzePdf, extractPdfTextByPage } from '@dragon708/docmind-pdf';
 import { readFile } from 'fs/promises';
 import { basename } from 'path';
@@ -74,6 +74,53 @@ async function analyzeDocxForNode(input, options) {
   const r = docxOpts !== void 0 ? await analyzeDocx(data, docxOpts) : await analyzeDocx(data);
   return docxPackageResultToAnalysisResult(r);
 }
+function meanPageConfidence(pages) {
+  if (pages.length === 0) return 0;
+  return pages.reduce((s, p) => s + p.confidence, 0) / pages.length;
+}
+function mimeHintFromDetectInput(input) {
+  if (!isByteBackedInput(input)) return void 0;
+  return getMimeType(input);
+}
+async function runRasterOcrForNode(data, input, options) {
+  const signal = options?.ocr?.signal ?? options?.signal;
+  const langs = options?.ocr?.langs;
+  const mimeHint = mimeHintFromDetectInput(input);
+  const format = resolveImageFormat(data, mimeHint);
+  if (format === "tiff") {
+    const tiff = await ocrTiff(data, {
+      langs,
+      signal,
+      maxPages: options?.ocr?.maxPages,
+      pageSeparator: options?.ocr?.pageSeparator
+    });
+    const warnings = [...tiff.warnings];
+    return {
+      text: tiff.text.trim(),
+      confidence: meanPageConfidence(tiff.textByPage),
+      ocrUsed: true,
+      warnings,
+      pages: tiff.pagesProcessed,
+      textByPage: tiff.textByPage
+    };
+  }
+  const detailed = await ocrImageDetailed(data, {
+    langs,
+    signal,
+    preprocess: options?.ocr?.preprocess
+  });
+  return {
+    text: detailed.text.trim(),
+    confidence: detailed.confidence,
+    ocrUsed: true,
+    warnings: [...detailed.warnings],
+    pages: detailed.pages,
+    inputFormat: detailed.inputFormat,
+    normalizedFormat: detailed.normalizedFormat
+  };
+}
+// src/analyzers/image.ts
 async function analyzeImageForNode(input, options) {
   if (options?.signal?.aborted) {
     const err = new Error("The operation was aborted");
@@ -93,21 +140,26 @@ async function analyzeImageForNode(input, options) {
       warnings: ["No image bytes were provided for analysis."]
     };
   }
-  const ocrOpts = {
-    ...options?.ocr ?? {},
-    signal: options?.ocr?.signal ?? options?.signal
-  };
-  const r = await ocr(data, ocrOpts);
-  return {
+  const ocrPart = await runRasterOcrForNode(data, input, options);
+  const base = {
     fileKind: "image",
     analyzer: "image",
     status: "ok",
     kind: "image",
-    text: r.text,
-    confidence: r.confidence,
-    ocrUsed: r.ocrUsed,
-    warnings: []
+    text: ocrPart.text,
+    confidence: ocrPart.confidence,
+    ocrUsed: true,
+    warnings: ocrPart.warnings
   };
+  const extra = {};
+  if (ocrPart.pages !== void 0) extra.pages = ocrPart.pages;
+  if (ocrPart.textByPage !== void 0) extra.textByPage = ocrPart.textByPage;
+  if (ocrPart.inputFormat !== void 0) extra.inputFormat = ocrPart.inputFormat;
+  if (ocrPart.normalizedFormat !== void 0) extra.normalizedFormat = ocrPart.normalizedFormat;
+  if (Object.keys(extra).length > 0) {
+    return { ...base, ...extra };
+  }
+  return base;
 }
 async function analyzePdfForNode(input, options) {
   if (options?.signal?.aborted) {
@@ -301,7 +353,7 @@ async function extractMetadata(input, options) {
         status: "ok",
         kind: "pdf",
         text: "",
-        pages: 0,
+        pages: r.pages,
         metadata: r.metadata,
         warnings: r.warnings,
         needsOCR: false,
@@ -449,34 +501,10 @@ async function runOcr(input, options) {
       };
     }
     case "image": {
-      const data = await bytesFromDetectInput(resolved);
-      if (data.byteLength === 0) {
-        return {
-          fileKind: "image",
-          analyzer: "image",
-          status: "ok",
-          kind: "image",
-          text: "",
-          confidence: 0,
-          ocrUsed: true,
-          warnings: ["No image bytes were provided for analysis."]
-        };
-      }
-      const ocrOpts = {
-        ...options?.ocr ?? {},
-        signal: options?.ocr?.signal ?? signal
-      };
-      const r = await ocr(data, ocrOpts);
-      return {
-        fileKind: "image",
-        analyzer: "image",
-        status: "ok",
-        kind: "image",
-        text: r.text,
-        confidence: r.confidence,
-        ocrUsed: r.ocrUsed,
-        warnings: []
-      };
+      return analyzeImageForNode(resolved, {
+        ...options,
+        ocr: { ...options?.ocr ?? {}, signal: options?.ocr?.signal ?? signal }
+      });
     }
     case "docx": {
       const data = await bytesFromDetectInput(resolved);
@@ -589,11 +617,33 @@ function buildNodeCapabilityReport(kind) {
       break;
     case "image":
       capabilities = [
-        slot("text", true, ["Text is obtained via OCR."]),
+        slot("text", true, [
+          "Text via `@dragon708/docmind-ocr` after format sniff (PNG, JPEG, WebP, TIFF, BMP, GIF first frame, HEIC/HEIF in Node via conversion)."
+        ]),
         slot("metadata", false, [IMAGE_META]),
         slot("html", false, [IMAGE_HTML]),
-        slot("ocr", true),
-        slot("pages", false)
+        slot("ocr", true, [
+          "Single-frame pipeline: `normalizeImageForOcr` \u2192 optional `preprocessImageForOcr` (`options.ocr.preprocess`) \u2192 Tesseract via `ocrImageDetailed`."
+        ]),
+        slot("ocr-multipage", true, [
+          "Multipage TIFF: `ocrTiff` with per-page text; `text` joins pages (see `options.ocr.pageSeparator`)."
+        ]),
+        slot("image-normalization", true, [
+          "Bytes are normalized to a Tesseract-friendly raster (PNG-oriented) inside the OCR package before recognition."
+        ]),
+        slot("tiff", true, [
+          "TIFF detected by magic/MIME routes to `ocrTiff` (not the single-frame `ocrImageDetailed` path)."
+        ]),
+        slot("heic-node-only", true, [
+          "HEIC/HEIF uses Node `sharp` conversion in `@dragon708/docmind-ocr`; not available in the browser facade."
+        ]),
+        slot("bmp", true, ["BMP inputs are supported through the universal normalization path."]),
+        slot("gif-first-frame", true, [
+          "Animated GIF: only the first frame is normalized and OCR'd."
+        ]),
+        slot("pages", true, [
+          "TIFF: `pages` and `textByPage` mirror frames processed. Other formats may expose `pages` when the normalizer reports it (e.g. GIF)."
+        ])
       ];
       break;
     case "text":
@@ -643,11 +693,12 @@ function finalizeDocxExplainReport(report) {
   };
 }
 var DOCX_MAMMOTH_PLUS_OPTIONAL = "Mammoth extracts text and HTML from OOXML; optional parallel OOXML/ZIP extractors run when options.docx.include is set (structure, headings, tables, blocks, pagesApprox, embeddedImages).";
-function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
+var NODE_IMAGE_OCR_PIPELINE = "Node raster OCR: detect format (magic/MIME) \u2192 `normalizeImageForOcr` \u2192 optional `preprocessImageForOcr` when `options.ocr.preprocess` is set \u2192 Tesseract. TIFF is handled with `ocrTiff` (multipage; per-page `textByPage` and joined `text`). HEIC/HEIF is decoded via `sharp` on Node (not in the browser package). BMP and static images share the normalization path; GIF uses the first frame only.";
+function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude, ocrSlice) {
   const runtime = { id: "node" };
   const primaryAnalyzer = kind === "pdf" ? "pdf" : kind === "docx" ? "docx" : kind === "image" ? "image" : kind === "text" ? "text" : "none";
   let nativeExtraction;
-  let ocr3;
+  let ocr;
   let limitations = [];
   if (kind === "unknown") {
     limitations = lim(
@@ -672,7 +723,7 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
           willAttempt: true,
           description: "pdf-parse supplies embedded text, metadata, and page count; PDF.js drives raster OCR when enabled."
         };
-        ocr3 = {
+        ocr = {
           mayUse: pdfOcr !== "off",
           description: pdfOcr === "off" ? "Raster OCR pipeline is off (pdf.ocr: off)." : pdfOcr === "force" ? "Raster OCR may run on all pages when pdf.ocr is force." : "Raster OCR may run when native text looks insufficient (pdf.ocr: auto + heuristics)."
         };
@@ -681,19 +732,22 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
           willAttempt: true,
           description: docxIncludeRequested(docxInclude) ? "Mammoth plus parallel OOXML extractors (per options.docx.include)." : DOCX_MAMMOTH_PLUS_OPTIONAL
         };
-        ocr3 = { mayUse: false, description: "DOCX does not use OCR in DocMind." };
+        ocr = { mayUse: false, description: "DOCX does not use OCR in DocMind." };
       } else if (kind === "image") {
         nativeExtraction = {
           willAttempt: false,
-          description: "Images have no native text layer; text comes from OCR only."
+          description: NODE_IMAGE_OCR_PIPELINE
+        };
+        ocr = {
+          mayUse: true,
+          description: "Same stack as `runOcr` / `analyzeFile` for images: universal normalization, optional preprocess, then `ocrImageDetailed` or multipage `ocrTiff` for TIFF."
         };
-        ocr3 = { mayUse: true, description: "Tesseract runs on supported raster formats." };
       } else {
         nativeExtraction = {
           willAttempt: true,
           description: "UTF-8 decode with BOM handling for plain text."
         };
-        ocr3 = { mayUse: false, description: "OCR does not apply to text files." };
+        ocr = { mayUse: false, description: "OCR does not apply to text files." };
       }
       break;
     case "extractText":
@@ -702,7 +756,7 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
           willAttempt: true,
           description: "Node: pdf-parse for metadata/page baseline, then PDF.js per-page text merged into `text` (pdfNativeTextSource pdfjs-per-page default)."
         };
-        ocr3 = {
+        ocr = {
           mayUse: false,
           description: "extractText defaults pdf.ocr off; set pdf.ocr explicitly to allow auto/force raster OCR."
         };
@@ -711,16 +765,22 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
           willAttempt: true,
           description: docxIncludeRequested(docxInclude) ? "Same DOCX router as analyzeFile: Mammoth text + optional OOXML fields; HTML cleared in extractText." : "Mammoth plain text; HTML cleared. Optional OOXML v2 fields when options.docx.include is set."
         };
-        ocr3 = { mayUse: false, description: "DOCX does not use OCR." };
+        ocr = { mayUse: false, description: "DOCX does not use OCR." };
       } else if (kind === "image") {
-        nativeExtraction = { willAttempt: false, description: "No embedded text layer." };
-        ocr3 = { mayUse: true, description: "OCR produces text for images." };
+        nativeExtraction = {
+          willAttempt: false,
+          description: NODE_IMAGE_OCR_PIPELINE
+        };
+        ocr = {
+          mayUse: true,
+          description: "Same Node image pipeline as analyzeFile (normalize \u2192 optional preprocess \u2192 `ocrImageDetailed` or `ocrTiff`)."
+        };
       } else {
         nativeExtraction = {
           willAttempt: true,
           description: "UTF-8 decode only."
         };
-        ocr3 = { mayUse: false, description: "OCR does not apply." };
+        ocr = { mayUse: false, description: "OCR does not apply." };
       }
       break;
     case "extractMetadata":
@@ -729,13 +789,13 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
           willAttempt: true,
           description: "Lightweight PDF info/XMP normalization without full OCR."
         };
-        ocr3 = { mayUse: false, description: "extractMetadata does not run the OCR pipeline." };
+        ocr = { mayUse: false, description: "extractMetadata does not run the OCR pipeline." };
       } else if (kind === "docx" || kind === "image") {
         nativeExtraction = {
           willAttempt: false,
           description: "Stub response; no heavy extractor."
         };
-        ocr3 = { mayUse: false, description: "OCR not used for this metadata path." };
+        ocr = { mayUse: false, description: "OCR not used for this metadata path." };
         limitations = lim(
           kind === "docx" ? "Structured DOCX metadata is not exposed separately; use analyzeFile / extractText / convertToHtml with options.docx.include for OOXML structure." : "Raster images have no document metadata bundle."
         );
@@ -744,7 +804,7 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
           willAttempt: true,
           description: "Decoded text only; no structured document metadata."
         };
-        ocr3 = { mayUse: false, description: "OCR does not apply." };
+        ocr = { mayUse: false, description: "OCR does not apply." };
         limitations = lim("Plain text has no structured document metadata.");
       }
       break;
@@ -754,26 +814,26 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
           willAttempt: true,
           description: "Text layer extracted then wrapped in <pre> (not visual layout)."
         };
-        ocr3 = { mayUse: false, description: "convertToHtml does not run PDF OCR." };
+        ocr = { mayUse: false, description: "convertToHtml does not run PDF OCR." };
         limitations = lim("PDF HTML is a plain-text preview, not page layout.");
       } else if (kind === "docx") {
         nativeExtraction = {
           willAttempt: true,
           description: docxIncludeRequested(docxInclude) ? "Mammoth HTML plus optional OOXML extractors (same router as analyzeFile)." : "Mammoth HTML via analyzeFile routing; optional OOXML v2 when options.docx.include is set."
         };
-        ocr3 = { mayUse: false, description: "DOCX path does not use OCR." };
+        ocr = { mayUse: false, description: "DOCX path does not use OCR." };
       } else if (kind === "text") {
         nativeExtraction = {
           willAttempt: true,
           description: "UTF-8 decode then <pre> wrapper."
         };
-        ocr3 = { mayUse: false, description: "OCR does not apply." };
+        ocr = { mayUse: false, description: "OCR does not apply." };
       } else {
         nativeExtraction = {
           willAttempt: false,
           description: "No HTML path for raster images."
         };
-        ocr3 = { mayUse: false, description: "OCR does not emit layout HTML here." };
+        ocr = { mayUse: false, description: "OCR does not emit layout HTML here." };
         limitations = lim("Use extractText or runOcr for image text.");
       }
       break;
@@ -783,32 +843,44 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
           willAttempt: true,
           description: "pdf-parse runs first; text may be replaced by raster OCR output."
         };
-        ocr3 = {
+        ocr = {
           mayUse: true,
           description: 'runOcr always sets pdf.ocr to "force" for PDFs.'
         };
         limitations = lim("Forced OCR may run even when a text layer exists.");
       } else if (kind === "image") {
-        nativeExtraction = { willAttempt: false, description: "No native text layer." };
-        ocr3 = { mayUse: true, description: "Tesseract OCR on the image bytes." };
+        nativeExtraction = {
+          willAttempt: false,
+          description: NODE_IMAGE_OCR_PIPELINE
+        };
+        ocr = {
+          mayUse: true,
+          description: "Forced OCR path for rasters: TIFF \u2192 `ocrTiff` with `textByPage` when multipage; other formats \u2192 `ocrImageDetailed` after normalization (HEIC converted with `sharp` on Node)."
+        };
       } else if (kind === "docx") {
         nativeExtraction = {
           willAttempt: true,
           description: docxIncludeRequested(docxInclude) ? "Mammoth text/HTML plus optional OOXML extractors; still not OCR." : "Full Mammoth extract (text + HTML); optional OOXML v2 via options.docx.include; not OCR."
         };
-        ocr3 = { mayUse: false, description: "DOCX is not OCR'd." };
+        ocr = { mayUse: false, description: "DOCX is not OCR'd." };
         limitations = lim("Result is structured extract, not OCR output.");
       } else {
         nativeExtraction = {
           willAttempt: true,
           description: "UTF-8 decode only."
         };
-        ocr3 = { mayUse: false, description: "OCR does not apply to text files." };
+        ocr = { mayUse: false, description: "OCR does not apply to text files." };
       }
       break;
     default:
       nativeExtraction = { willAttempt: false, description: "Generic intent; see plan." };
-      ocr3 = { mayUse: false, description: "See plan steps." };
+      ocr = { mayUse: false, description: "See plan steps." };
+  }
+  if (kind === "image" && preprocessHasEffect(ocrSlice?.preprocess)) {
+    limitations = [
+      ...limitations,
+      "options.ocr.preprocess applies to the single-frame `ocrImageDetailed` path; multipage TIFF (`ocrTiff`) does not run this preprocess on each frame."
+    ];
   }
   return finalizeDocxExplainReport({
     kind,
@@ -817,17 +889,27 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude) {
     intent,
     primaryAnalyzer,
     nativeExtraction,
-    ocr: ocr3,
+    ocr,
     limitations,
     plan
   });
 }
-// src/introspection.ts
 function resolvePdfOcrMode(pdf) {
   return pdf?.ocr ?? "auto";
 }
-function planAnalyzeFile(kind, pdfOcr, docxInclude) {
+function imageOcrPlanSteps(ocr) {
+  return [
+    { id: "detect_kind", status: "done" },
+    { id: "image_format_detect", status: "planned" },
+    { id: "normalize_image_for_ocr", status: "planned" },
+    {
+      id: "preprocess_image_for_ocr",
+      status: preprocessHasEffect(ocr?.preprocess) ? "planned" : "skipped"
+    },
+    { id: "ocr_tesseract", status: "planned" }
+  ];
+}
+function planAnalyzeFile(kind, pdfOcr, docxInclude, ocr) {
   switch (kind) {
     case "pdf":
       return {
@@ -855,10 +937,7 @@ function planAnalyzeFile(kind, pdfOcr, docxInclude) {
     case "image":
       return {
         intent: "analyzeFile",
-        steps: [
-          { id: "detect_kind", status: "done" },
-          { id: "image_ocr", status: "planned" }
-        ]
+        steps: imageOcrPlanSteps(ocr)
       };
     case "text":
       return {
@@ -878,9 +957,9 @@ function planAnalyzeFile(kind, pdfOcr, docxInclude) {
       };
   }
 }
-function planForIntent(intentOpt, kind, pdfOcrForAnalyze, docxInclude) {
+function planForIntent(intentOpt, kind, pdfOcrForAnalyze, docxInclude, ocr) {
   const intent = intentOpt ?? "analyzeFile";
-  if (intent === "analyzeFile") return planAnalyzeFile(kind, pdfOcrForAnalyze, docxInclude);
+  if (intent === "analyzeFile") return planAnalyzeFile(kind, pdfOcrForAnalyze, docxInclude, ocr);
   if (intent === "extractText") {
     if (kind === "pdf") {
       return {
@@ -893,7 +972,7 @@ function planForIntent(intentOpt, kind, pdfOcrForAnalyze, docxInclude) {
         ]
       };
     }
-    const p = planAnalyzeFile(kind, "off", docxInclude);
+    const p = planAnalyzeFile(kind, "off", docxInclude, ocr);
     return { ...p, intent: "extractText" };
   }
   if (intent === "extractMetadata") {
@@ -977,10 +1056,7 @@ function planForIntent(intentOpt, kind, pdfOcrForAnalyze, docxInclude) {
     if (kind === "image") {
       return {
         intent: "runOcr",
-        steps: [
-          { id: "detect_kind", status: "done" },
-          { id: "tesseract_ocr", status: "planned" }
-        ]
+        steps: imageOcrPlanSteps(ocr)
       };
     }
     if (kind === "docx") {
@@ -1002,7 +1078,7 @@ function planForIntent(intentOpt, kind, pdfOcrForAnalyze, docxInclude) {
       ]
     };
   }
-  return planAnalyzeFile(kind, pdfOcrForAnalyze, docxInclude);
+  return planAnalyzeFile(kind, pdfOcrForAnalyze, docxInclude, ocr);
 }
 async function getCapabilities(input, options) {
   throwIfAborted(options?.signal);
@@ -1019,8 +1095,9 @@ async function explainAnalysisPlan(input, options) {
   const intent = options?.intent ?? "analyzeFile";
   const pdfOcrAnalyze = resolvePdfOcrMode(options?.pdf);
   const docxInc = options?.docx?.include;
-  const plan = planForIntent(intent, kind, pdfOcrAnalyze, docxInc);
-  return buildNodeExplainReport(kind, intent, pdfOcrAnalyze, plan, docxInc);
+  const ocrSlice = options?.ocr;
+  const plan = planForIntent(intent, kind, pdfOcrAnalyze, docxInc, ocrSlice);
+  return buildNodeExplainReport(kind, intent, pdfOcrAnalyze, plan, docxInc, ocrSlice);
 }
 export { DOCX_EMBEDDED_IMAGE_CAPABILITIES, DOCX_STRUCTURE_CAPABILITIES, analyzeFile, bufferToInput, convertToHtml, docxIncludeRequested, explainAnalysisPlan, extractMetadata, extractText, getCapabilities, readFileToInput, resolveNodeAnalyzeInput, runOcr };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dragon708/docmind-node",
-  "version": "1.7.0",
+  "version": "1.9.0",
   "description": "Official DocMind Node facade: analyzeFile, intent APIs, PDF/DOCX/OCR, and fs helpers.",
   "type": "module",
   "main": "./dist/index.js",
@@ -33,8 +33,8 @@
   "license": "MIT",
   "dependencies": {
     "@dragon708/docmind-docx": "^1.7.0",
-    "@dragon708/docmind-ocr": "^1.0.0",
-    "@dragon708/docmind-pdf": "^2.0.0",
+    "@dragon708/docmind-ocr": "^1.1.0",
+    "@dragon708/docmind-pdf": "^2.1.0",
     "@dragon708/docmind-shared": "^1.1.0"
   },
   "devDependencies": {