npm - @dragon708/docmind-browser - Versions diffs - 1.6.0 → 1.8.0 - Mend

@dragon708/docmind-browser 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -1,9 +1,11 @@
-import { DocMindAnalyzeOptions, AnalyzeFileOutputOptions, AnalysisResult, NormalizeStructuredOptions, StructuredDocumentResult, FileKind, RuntimeDescriptor, DocMindPublicIntent, AnalysisAnalyzer, ProcessingPlanDescriptor, ExplainAnalysisPlanOptions, GetCapabilitiesOptions } from '@dragon708/docmind-shared';
+import { DocMindAnalyzeOptions, AnalyzeFileOutputOptions, NormalizeStructuredOptions, AnalysisResult, StructuredDocumentResult, FileKind, RuntimeDescriptor, DocMindPublicIntent, AnalysisAnalyzer, ProcessingPlanDescriptor, ExplainAnalysisPlanOptions, GetCapabilitiesOptions } from '@dragon708/docmind-shared';
 export { AnalysisAnalyzer, AnalysisResult, CapabilityDescriptor, DetectFileKindInput, DocMindPublicIntent, DocumentBlock, DocumentImageRef, DocumentPage, DocumentTable, DocxAnalysisCoreResult, ExplainAnalysisPlanOptions, ExplainAnalysisPlanResult, FileKind, FileKindMetadata, GenericAnalysisResult, GetCapabilitiesOptions, GetCapabilitiesResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, StructuredDocumentResult, TextAnalysisResult, analyzeFileRequestsStructured, detectFileKind, isStructuredDocumentResult } from '@dragon708/docmind-shared';
-import { OcrOptions, OcrTiffOptions, PreprocessImageOptions } from '@dragon708/docmind-ocr';
-export { ExtractStructuredDataFromImageOptions, extractStructuredDataFromImage } from '@dragon708/docmind-ocr';
+import { RenderLlmTextOptions, RenderMarkdownOptions, RenderMarkdownSectionsOptions, MarkdownSection } from '@dragon708/docmind-markdown';
+export { MarkdownSection } from '@dragon708/docmind-markdown';
 import { AnalyzeDocxIncludeFlags, DocxToHtmlOptions } from '@dragon708/docmind-docx';
 export { AnalyzeDocxIncludeFlags, ExtractStructuredDataFromDocxOptions, extractStructuredDataFromDocx } from '@dragon708/docmind-docx';
+import { OcrOptions, OcrTiffOptions, PreprocessImageOptions } from '@dragon708/docmind-ocr';
+export { ExtractStructuredDataFromImageOptions, extractStructuredDataFromImage } from '@dragon708/docmind-ocr';
 /**
  * Opciones DOCX para el facade browser (Mammoth + inclusiones v2 de `@dragon708/docmind-docx`; sin APIs Node-only).
@@ -37,6 +39,27 @@ interface BrowserAnalyzeOptions extends DocMindAnalyzeOptions, AnalyzeFileOutput
     /** Solo DOCX: ver {@link BrowserAnalyzeDocxOptionsSlice}. */
     readonly docx?: BrowserAnalyzeDocxOptionsSlice;
 }
+/** Options for {@link extractStructuredData}: same as {@link BrowserAnalyzeOptions} plus shared normalize knobs. */
+type BrowserExtractStructuredDataOptions = BrowserAnalyzeOptions & {
+    readonly normalize?: NormalizeStructuredOptions;
+};
+/**
+ * {@link extractMarkdown}: structured options plus `markdown` (passed through to `extractMarkdown` in
+ * `@dragon708/docmind-markdown`, including structured-serializer knobs). Binary PDF/DOCX converters inside that
+ * package are not used for PDF in-browser and the DOCX bytes→Turndown path is Node-only; the browser still
+ * gets correct DOCX Markdown via structured fallback from {@link extractStructuredData}.
+ */
+interface BrowserExtractMarkdownOptions extends BrowserExtractStructuredDataOptions {
+    readonly markdown?: RenderMarkdownOptions;
+}
+/** {@link extractLlmContent}: optional `llm` passed to `renderLlmText`. */
+interface BrowserExtractLlmContentOptions extends BrowserExtractStructuredDataOptions {
+    readonly llm?: RenderLlmTextOptions;
+}
+/** {@link extractStructuredChunks}: optional `chunks` (split / section sizing). */
+interface BrowserExtractStructuredChunksOptions extends BrowserExtractStructuredDataOptions {
+    readonly chunks?: RenderMarkdownSectionsOptions;
+}
 /**
  * Inputs supported by the browser entry (DOM types only — no `fs`, no Node `Buffer` in the public surface).
@@ -90,10 +113,6 @@ declare function runOcr(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOpti
  * PDF is not supported in this runtime (clear warnings, no PDF package import).
  */
-/** Options for {@link extractStructuredData}: same as {@link BrowserAnalyzeOptions} plus shared normalize knobs. */
-type BrowserExtractStructuredDataOptions = BrowserAnalyzeOptions & {
-    readonly normalize?: NormalizeStructuredOptions;
-};
 /**
  * Returns a {@link StructuredDocumentResult} for inputs the browser runtime actually supports:
  * **DOCX** (`extractStructuredDataFromDocx`), **images** (`extractStructuredDataFromImage` when OCR is not off),
@@ -103,8 +122,34 @@ type BrowserExtractStructuredDataOptions = BrowserAnalyzeOptions & {
  */
 declare function extractStructuredData(input: BrowserAnalyzeInput, options?: BrowserExtractStructuredDataOptions): Promise<StructuredDocumentResult>;
+/**
+ * {@link extractStructuredData} for a full structured envelope, then `extractMarkdown` from
+ * `@dragon708/docmind-markdown` on `{ data, filename?, mimeType? }` with that result as `structuredFallback`.
+ *
+ * - **PDF:** the markdown package does not load `@opendataloader/pdf` here; output comes from the structured
+ *   fallback (empty in-browser stub — see {@link getCapabilities}).
+ * - **DOCX:** the package’s direct bytes → Mammoth → Turndown path is **Node-only**; in-browser, Markdown is
+ *   produced via `convertStructuredToMarkdown` on the structured envelope (still Mammoth/OOXML-backed via
+ *   `@dragon708/docmind-docx`), with an explanatory warning from the markdown package.
+ * - **Text / image:** unidentified or non-binary bytes use the same structured serializer.
+ *
+ * @param options - `markdown` options plus the same routing as {@link extractStructuredData} (`ocr`, `docx`, `normalize`).
+ */
+declare function extractMarkdown(input: BrowserAnalyzeInput, options?: BrowserExtractMarkdownOptions): Promise<string>;
+/**
+ * {@link extractStructuredData} then `renderLlmText` (`@dragon708/docmind-markdown`). For a structured value you already have, that package's `extractLlmContent` matches `renderLlmText` (no file I/O).
+ */
+declare function extractLlmContent(input: BrowserAnalyzeInput, options?: BrowserExtractLlmContentOptions): Promise<string>;
+/**
+ * Structured extract → `renderMarkdownSections` (`splitStructuredIntoChunks` with Markdown; same as
+ * `extractStructuredChunks` alias in `@dragon708/docmind-markdown`).
+ */
+declare function extractStructuredChunks(input: BrowserAnalyzeInput, options?: BrowserExtractStructuredChunksOptions): Promise<readonly MarkdownSection[]>;
 /** High-level features the user can ask DocMind for (per input kind and runtime). */
-type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages" | "structured-output" | "image-normalization" | "gif-first-frame" | "bmp" | "heic" | "tiff";
+type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages" | "structured-output"
+/** Browser: {@link extractMarkdown} via `@dragon708/docmind-markdown` `extractMarkdown` + structured fallback (PDF empty; DOCX structured path when binary converter is Node-only). */
+ | "markdown" | "llm-text" | "structured-chunks" | "image-normalization" | "gif-first-frame" | "bmp" | "heic" | "tiff";
 declare function docxIncludeRequested(flags?: AnalyzeDocxIncludeFlags): boolean;
 /** DOCX `word/media` en runtime browser (JSZip; sin pipeline Node). */
 interface DocxEmbeddedImageCapabilities {
@@ -176,12 +221,17 @@ interface ExplainAnalysisPlanReport {
     readonly warnings?: readonly string[];
 }
+/** Browser facade intents that run `@dragon708/docmind-markdown` after structured extraction. */
+type BrowserMarkdownFacadeIntent = "extractMarkdown" | "extractLlmContent" | "extractStructuredChunks";
 /** Options for {@link explainAnalysisPlan}: shared fields plus optional `ocr` / `docx` for accurate step preview. */
-type BrowserExplainAnalysisPlanOptions = ExplainAnalysisPlanOptions & Pick<BrowserAnalyzeOptions, "ocr" | "docx" | "structuredOutput" | "output">;
+type BrowserExplainAnalysisPlanOptions = Omit<ExplainAnalysisPlanOptions, "intent"> & Pick<BrowserAnalyzeOptions, "ocr" | "docx" | "structuredOutput" | "output"> & {
+    readonly intent?: DocMindPublicIntent | BrowserMarkdownFacadeIntent;
+};
 /**
  * Epic 1 — **Capabilities:** detects kind from the same hints as `analyzeFile`, then lists which of
- * `text` | `metadata` | `html` | `ocr` | `pages` | `structured-output` and image-specific ids (`image-normalization`, `bmp`, `gif-first-frame`, `heic`, `tiff`) apply in the browser (PDF always unsupported).
+ * `text` | `metadata` | `html` | `ocr` | `pages` | `structured-output` | `markdown` (package `extractMarkdown` + structured fallback; PDF empty here) | `llm-text` | `structured-chunks` (split + Markdown sections)
+ * and image-specific ids (`image-normalization`, `bmp`, `gif-first-frame`, `heic`, `tiff`) apply in the browser (PDF always unsupported for meaningful content).
  * No Mammoth/Tesseract/PDF parsing. For DOCX, {@link GetCapabilitiesReport.docxStructure} / `docxEmbeddedImages` describe v2 opt-in features.
  */
 declare function getCapabilities(input: BrowserAnalyzeInput, options?: GetCapabilitiesOptions): Promise<GetCapabilitiesReport>;
@@ -191,4 +241,4 @@ declare function getCapabilities(input: BrowserAnalyzeInput, options?: GetCapabi
  */
 declare function explainAnalysisPlan(input: BrowserAnalyzeInput, options?: BrowserExplainAnalysisPlanOptions): Promise<ExplainAnalysisPlanReport>;
-export { BROWSER_PDF_STRUCTURED_UNSUPPORTED_WARNING, BROWSER_PDF_UNSUPPORTED_WARNING, type BrowserAnalyzeDocxOptionsSlice, type BrowserAnalyzeInput, type BrowserAnalyzeOptions, type BrowserExplainAnalysisPlanOptions, type BrowserExtractStructuredDataOptions, type BrowserOcrMode, type BrowserOcrOptions, DOCX_EMBEDDED_IMAGE_CAPABILITIES_BROWSER, DOCX_STRUCTURE_CAPABILITIES_BROWSER, type DocxEmbeddedImageCapabilities, type DocxStructuralCapabilities, type ExplainAnalysisPlanReport, type GetCapabilitiesReport, type NativeExtractionPlan, type OcrPlan, type PublicCapabilityId, type PublicCapabilitySupport, analyzeFile, convertToHtml, docxIncludeRequested, explainAnalysisPlan, extractMetadata, extractStructuredData, extractText, getCapabilities, runOcr };
+export { BROWSER_PDF_STRUCTURED_UNSUPPORTED_WARNING, BROWSER_PDF_UNSUPPORTED_WARNING, type BrowserAnalyzeDocxOptionsSlice, type BrowserAnalyzeInput, type BrowserAnalyzeOptions, type BrowserExplainAnalysisPlanOptions, type BrowserExtractLlmContentOptions, type BrowserExtractMarkdownOptions, type BrowserExtractStructuredChunksOptions, type BrowserExtractStructuredDataOptions, type BrowserMarkdownFacadeIntent, type BrowserOcrMode, type BrowserOcrOptions, DOCX_EMBEDDED_IMAGE_CAPABILITIES_BROWSER, DOCX_STRUCTURE_CAPABILITIES_BROWSER, type DocxEmbeddedImageCapabilities, type DocxStructuralCapabilities, type ExplainAnalysisPlanReport, type GetCapabilitiesReport, type NativeExtractionPlan, type OcrPlan, type PublicCapabilityId, type PublicCapabilitySupport, analyzeFile, convertToHtml, docxIncludeRequested, explainAnalysisPlan, extractLlmContent, extractMarkdown, extractMetadata, extractStructuredChunks, extractStructuredData, extractText, getCapabilities, runOcr };

package/dist/index.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { extractStructuredDataFromDocx, analyzeDocx } from '@dragon708/docmind-d
 export { extractStructuredDataFromDocx } from '@dragon708/docmind-docx';
 import { extractStructuredDataFromImage, preprocessHasEffect, resolveImageFormat, normalizeImageForOcr, ocrTiff, ocrImageDetailed } from '@dragon708/docmind-ocr';
 export { extractStructuredDataFromImage } from '@dragon708/docmind-ocr';
+import { extractMarkdown as extractMarkdown$1, renderLlmText, renderMarkdownSections } from '@dragon708/docmind-markdown';
 // src/analyzeFile.ts
 function assertBrowserInput(input) {
@@ -519,6 +520,39 @@ async function runOcr(input, options) {
       return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
   }
 }
+function browserFileHints(input) {
+  if (input instanceof File) {
+    return {
+      filename: input.name,
+      mimeType: input.type ? input.type : void 0
+    };
+  }
+  return {};
+}
+async function extractMarkdown(input, options) {
+  throwIfAborted(options?.signal);
+  const { markdown: markdownOpts, ...structuredOpts } = options ?? {};
+  const structured = await extractStructuredData(input, structuredOpts);
+  const data = await toUint8Array(input);
+  const hints = browserFileHints(input);
+  const r = await extractMarkdown$1(
+    { data, filename: hints.filename, mimeType: hints.mimeType },
+    { ...markdownOpts ?? {}, structuredFallback: structured }
+  );
+  return r.markdown;
+}
+async function extractLlmContent(input, options) {
+  throwIfAborted(options?.signal);
+  const { llm: llmOpts, ...structuredOpts } = options ?? {};
+  const structured = await extractStructuredData(input, structuredOpts);
+  return renderLlmText(structured, llmOpts);
+}
+async function extractStructuredChunks(input, options) {
+  throwIfAborted(options?.signal);
+  const { chunks: chunkOpts, ...structuredOpts } = options ?? {};
+  const structured = await extractStructuredData(input, structuredOpts);
+  return renderMarkdownSections(structured, chunkOpts);
+}
 // src/capabilityReport.ts
 function docxIncludeRequested(flags) {
@@ -557,6 +591,8 @@ var TEXT_META_NOTE = "Plain text has no structured document metadata; extractMet
 var OCR_OFF_NOTE = 'Image OCR may be skipped when `ocr.mode` is "off" in analyze options.';
 var STRUCTURED_OCR_OFF = 'Structured image output uses OCR; when `ocr.mode` is "off", `extractStructuredData` returns an empty envelope with a warning.';
 var UNKNOWN_KIND = "Could not determine file kind from name, MIME, or bytes; all features are reported as unsupported until the kind is known.";
+var MARKDOWN_PDF_BROWSER = "PDF: no parser in-browser \u2014 `@opendataloader/pdf` is not loaded here. extractMarkdown still calls `extractMarkdown` in `@dragon708/docmind-markdown`, which falls back to the empty structured stub (same empty Markdown). extractLlmContent / extractStructuredChunks use the structured envelope only. Use @dragon708/docmind-node for real PDF \u2192 Markdown / LLM text / chunks.";
+var MARKDOWN_IMAGE_OCR_OFF = 'Same as structured-output: when ocr.mode is "off", structured (and thus Markdown/LLM/chunk exports) are empty aside from warnings.';
 function slot(id, supported, warnings) {
   return warnings?.length ? { id, supported, warnings } : { id, supported };
 }
@@ -573,7 +609,10 @@ function buildBrowserCapabilityReport(kind) {
         slot("html", false, [pdf]),
         slot("ocr", false, [pdf]),
         slot("pages", false, [pdf]),
-        slot("structured-output", false, [BROWSER_PDF_STRUCTURED_UNSUPPORTED_WARNING])
+        slot("structured-output", false, [BROWSER_PDF_STRUCTURED_UNSUPPORTED_WARNING]),
+        slot("markdown", false, [MARKDOWN_PDF_BROWSER]),
+        slot("llm-text", false, [MARKDOWN_PDF_BROWSER]),
+        slot("structured-chunks", false, [MARKDOWN_PDF_BROWSER])
       ];
       break;
     case "docx":
@@ -593,6 +632,15 @@ function buildBrowserCapabilityReport(kind) {
         ]),
         slot("structured-output", true, [
           "`extractStructuredData` uses `@dragon708/docmind-docx` (Mammoth + OOXML) and returns `StructuredDocumentResult`; optional `options.docx` slices are forwarded."
+        ]),
+        slot("markdown", true, [
+          "extractMarkdown: `@dragon708/docmind-markdown` `extractMarkdown` on bytes + structured fallback. The package\u2019s DOCX-bytes Mammoth\u2192Turndown path is Node-only; in-browser, Markdown is produced from `extractStructuredData` (Mammoth/OOXML in `@dragon708/docmind-docx`) via structured serialization, with a clear package warning that the binary shortcut is skipped."
+        ]),
+        slot("llm-text", true, [
+          "extractLlmContent: structured envelope \u2192 `renderLlmText` (LLM-ready plain text; no binary PDF/DOCX Markdown routes)."
+        ]),
+        slot("structured-chunks", true, [
+          "extractStructuredChunks: structured \u2192 `renderMarkdownSections` / `splitStructuredIntoChunks` (heading-aware chunking + optional parallel `text`)."
         ])
       ];
       break;
@@ -629,6 +677,18 @@ function buildBrowserCapabilityReport(kind) {
           "`extractStructuredData` uses `extractStructuredDataFromImage` (same OCR path as analyzeFile when `ocr.mode` is not off).",
           STRUCTURED_OCR_OFF,
           "HEIC/HEIF and TIFF limitations match `getCapabilities` (`heic`, `tiff`) and OCR warnings."
+        ]),
+        slot("markdown", true, [
+          "extractMarkdown: same bytes + structured fallback through package `extractMarkdown` when applicable; OCR structured layout \u2192 Markdown when OCR runs. HEIC unsupported; TIFF best-effort.",
+          MARKDOWN_IMAGE_OCR_OFF
+        ]),
+        slot("llm-text", true, [
+          "extractLlmContent: OCR structured \u2192 LLM plain text under the same OCR and format limits.",
+          MARKDOWN_IMAGE_OCR_OFF
+        ]),
+        slot("structured-chunks", true, [
+          "extractStructuredChunks: OCR structured \u2192 sectioned Markdown; empty when OCR is off or HEIC.",
+          MARKDOWN_IMAGE_OCR_OFF
         ])
       ];
       break;
@@ -641,6 +701,15 @@ function buildBrowserCapabilityReport(kind) {
         slot("pages", false),
         slot("structured-output", true, [
           "`extractStructuredData` decodes UTF-8 (via `analyzeText`) and normalizes to `StructuredDocumentResult` (paragraph block rollup)."
+        ]),
+        slot("markdown", true, [
+          "extractMarkdown: bytes + structured fallback through `@dragon708/docmind-markdown` `extractMarkdown` (typically structured serializer for UTF-8 text)."
+        ]),
+        slot("llm-text", true, [
+          "extractLlmContent: UTF-8 structured rollup \u2192 `renderLlmText` in `@dragon708/docmind-markdown`."
+        ]),
+        slot("structured-chunks", true, [
+          "extractStructuredChunks: typically one Markdown section when only paragraph rollup exists."
         ])
       ];
       break;
@@ -652,7 +721,10 @@ function buildBrowserCapabilityReport(kind) {
         slot("html", false),
         slot("ocr", false),
         slot("pages", false),
-        slot("structured-output", false, [UNKNOWN_KIND])
+        slot("structured-output", false, [UNKNOWN_KIND]),
+        slot("markdown", false, [UNKNOWN_KIND]),
+        slot("llm-text", false, [UNKNOWN_KIND]),
+        slot("structured-chunks", false, [UNKNOWN_KIND])
       ];
   }
   return {
@@ -695,9 +767,10 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocr
   let limitations = [];
   const ocrOffNote = ocrMode === "off" ? 'Image OCR is skipped when ocr.mode is "off".' : "";
   if (kind === "pdf") {
+    const structuredLikeIntent = intent === "extractStructuredData" || intent === "extractMarkdown" || intent === "extractLlmContent" || intent === "extractStructuredChunks";
     limitations = lim(
       BROWSER_PDF_UNSUPPORTED_WARNING,
-      intent === "extractStructuredData" ? "`extractStructuredData` only returns an empty `StructuredDocumentResult` with warnings for PDF in-browser; use @dragon708/docmind-node for real PDF structured extraction." : ""
+      structuredLikeIntent ? "`extractStructuredData` / extractMarkdown / extractLlmContent / extractStructuredChunks only see an empty structured envelope in-browser for PDF; use @dragon708/docmind-node for real PDF extraction and Markdown/LLM/chunk exports." : ""
     );
     nativeExtraction = {
       willAttempt: false,
@@ -721,9 +794,10 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocr
     });
   }
   if (kind === "unknown") {
+    const structuredLikeIntent = intent === "extractStructuredData" || intent === "extractMarkdown" || intent === "extractLlmContent" || intent === "extractStructuredChunks";
     limitations = lim(
       "Could not classify the file from name, MIME, or bytes; analysis will return not_implemented until hints improve.",
-      intent === "extractStructuredData" ? "`extractStructuredData` needs a known kind (text, DOCX, or image) to produce structured output." : ""
+      structuredLikeIntent ? "Structured and Markdown/LLM/chunk exports need a known kind (text, DOCX, or image) in the browser runtime." : ""
     );
     nativeExtraction = { willAttempt: false, description: "No analyzer selected without a known file kind." };
     ocr = { mayUse: false, description: "OCR is not used for unknown kinds." };
@@ -836,11 +910,26 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocr
       }
       break;
     case "extractStructuredData":
+    case "extractMarkdown":
+    case "extractLlmContent":
+    case "extractStructuredChunks":
       if (kind === "docx") {
-        nativeExtraction = {
-          willAttempt: true,
-          description: "`extractStructuredDataFromDocx`: Mammoth plus required OOXML extractors (structure, headings, tables, blocks, pagesApprox, embeddedImages unless disabled), then `normalizeToStructuredResult`. Optional `options.docx` is forwarded."
-        };
+        if (intent === "extractStructuredData") {
+          nativeExtraction = {
+            willAttempt: true,
+            description: "`extractStructuredDataFromDocx`: Mammoth plus required OOXML extractors (structure, headings, tables, blocks, pagesApprox, embeddedImages unless disabled), then `normalizeToStructuredResult`. Optional `options.docx` is forwarded."
+          };
+        } else if (intent === "extractMarkdown") {
+          nativeExtraction = {
+            willAttempt: true,
+            description: "extractMarkdown: `extractStructuredData` (Mammoth/OOXML) for a full structured envelope, then `extractMarkdown` in `@dragon708/docmind-markdown`. The package\u2019s DOCX-bytes Mammoth\u2192Turndown shortcut is Node-only; in-browser Markdown uses structured serialization on that envelope (with a package warning)."
+          };
+        } else {
+          nativeExtraction = {
+            willAttempt: true,
+            description: `${String(intent)}: same structured DOCX pipeline as extractStructuredData, then \`@dragon708/docmind-markdown\` (\`renderLlmText\` or \`renderMarkdownSections\`).`
+          };
+        }
         ocr = { mayUse: false, description: "DOCX structured path does not use OCR." };
         limitations = lim(DOCX_ZIP_NOTE_BROWSER);
       } else if (kind === "image") {
@@ -856,10 +945,30 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocr
       } else {
         nativeExtraction = {
           willAttempt: true,
-          description: "UTF-8 decode via `analyzeText`, then `normalizeToStructuredResult` with a paragraph block rollup."
+          description: intent === "extractStructuredData" ? "UTF-8 decode via `analyzeText`, then `normalizeToStructuredResult` with a paragraph block rollup." : `${String(intent)}: UTF-8 structured envelope, then \`@dragon708/docmind-markdown\`.`
         };
         ocr = { mayUse: false, description: "OCR does not apply to text files." };
       }
+      if (intent === "extractMarkdown") {
+        limitations = [
+          ...limitations,
+          ...lim(
+            "Output: Markdown string from `extractMarkdown` in `@dragon708/docmind-markdown`. PDF in-browser: empty (no `@opendataloader/pdf`). DOCX: structured Markdown path when the binary converter is Node-only."
+          )
+        ];
+      } else if (intent === "extractLlmContent") {
+        limitations = [
+          ...limitations,
+          ...lim("Output: compact plain text via `renderLlmText` (structured input only in this runtime).")
+        ];
+      } else if (intent === "extractStructuredChunks") {
+        limitations = [
+          ...limitations,
+          ...lim(
+            "Output: MarkdownSection[] via `renderMarkdownSections` (`splitStructuredIntoChunks` / `extractStructuredChunks` alias)."
+          )
+        ];
+      }
       break;
     default:
       nativeExtraction = { willAttempt: false, description: "Intent not specialized in this runtime." };
@@ -1012,6 +1121,40 @@ function planForIntent(intentOpt, kind, ocrMode, docxInclude, ocr, analyzeFileOu
         };
     }
   }
+  if (intent === "extractMarkdown") {
+    const sub = planForIntent(
+      "extractStructuredData",
+      kind,
+      ocrMode,
+      docxInclude,
+      ocr,
+      analyzeFileOutput
+    );
+    return {
+      intent,
+      steps: [...sub.steps ?? [], { id: "markdown_hybrid_package", status: "planned" }]
+    };
+  }
+  if (intent === "extractLlmContent" || intent === "extractStructuredChunks") {
+    const sub = planForIntent(
+      "extractStructuredData",
+      kind,
+      ocrMode,
+      docxInclude,
+      ocr,
+      analyzeFileOutput
+    );
+    return {
+      intent,
+      steps: [
+        ...sub.steps ?? [],
+        {
+          id: intent === "extractLlmContent" ? "docmind_markdown_llm_text" : "docmind_markdown_chunk_sections",
+          status: "planned"
+        }
+      ]
+    };
+  }
   if (intent === "analyzeFile") {
     const base = planForAnalyzeFile(kind, ocrMode, docxInclude, ocr);
     if (!analyzeFileRequestsStructured(analyzeFileOutput)) return base;
@@ -1121,6 +1264,6 @@ async function explainAnalysisPlan(input, options) {
   return buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInc, ocrSlice);
 }
-export { BROWSER_PDF_STRUCTURED_UNSUPPORTED_WARNING, BROWSER_PDF_UNSUPPORTED_WARNING, DOCX_EMBEDDED_IMAGE_CAPABILITIES_BROWSER, DOCX_STRUCTURE_CAPABILITIES_BROWSER, analyzeFile, convertToHtml, docxIncludeRequested, explainAnalysisPlan, extractMetadata, extractStructuredData, extractText, getCapabilities, runOcr };
+export { BROWSER_PDF_STRUCTURED_UNSUPPORTED_WARNING, BROWSER_PDF_UNSUPPORTED_WARNING, DOCX_EMBEDDED_IMAGE_CAPABILITIES_BROWSER, DOCX_STRUCTURE_CAPABILITIES_BROWSER, analyzeFile, convertToHtml, docxIncludeRequested, explainAnalysisPlan, extractLlmContent, extractMarkdown, extractMetadata, extractStructuredChunks, extractStructuredData, extractText, getCapabilities, runOcr };
 //# sourceMappingURL=index.js.map
 //# sourceMappingURL=index.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dragon708/docmind-browser",
-  "version": "1.6.0",
+  "version": "1.8.0",
   "description": "Official DocMind browser facade: analyzeFile and intent APIs (DOCX, image OCR, text). PDF and fs paths use @dragon708/docmind-node.",
   "type": "module",
   "sideEffects": false,
@@ -34,6 +34,7 @@
   "license": "MIT",
   "dependencies": {
     "@dragon708/docmind-docx": "^1.8.0",
+    "@dragon708/docmind-markdown": "^1.1.0",
     "@dragon708/docmind-ocr": "^1.1.4",
     "@dragon708/docmind-shared": "^1.2.0"
   },