npm - @dragon708/docmind-browser - Versions diffs - 1.8.2 → 1.8.4 - Mend

@dragon708/docmind-browser 1.8.2 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -45,9 +45,9 @@ type BrowserExtractStructuredDataOptions = BrowserAnalyzeOptions & {
 };
 /**
  * {@link extractMarkdown}: structured options plus `markdown` (passed through to `extractMarkdown` in
- * `@dragon708/docmind-markdown`, including structured-serializer knobs). Binary PDF/DOCX converters inside that
- * package are not used for PDF in-browser and the DOCX bytes→Turndown path is Node-only; the browser still
- * gets correct DOCX Markdown via structured fallback from {@link extractStructuredData}.
+ * `@dragon708/docmind-markdown`). PDF: `convertPdfToMarkdown` / `@cognipeer/to-markdown` never run in-browser — only
+ * structured fallback (empty PDF stub here). DOCX bytes→Mammoth→Turndown is Node-only; the browser still gets
+ * DOCX Markdown via structured fallback from {@link extractStructuredData}.
  */
 interface BrowserExtractMarkdownOptions extends BrowserExtractStructuredDataOptions {
     readonly markdown?: RenderMarkdownOptions;
@@ -126,8 +126,9 @@ declare function extractStructuredData(input: BrowserAnalyzeInput, options?: Bro
  * {@link extractStructuredData} for a full structured envelope, then `extractMarkdown` from
  * `@dragon708/docmind-markdown` on `{ data, filename?, mimeType? }` with that result as `structuredFallback`.
  *
- * - **PDF:** the markdown package does not load `@opendataloader/pdf` here; output comes from the structured
- *   fallback (empty in-browser stub — see {@link getCapabilities}).
+ * - **PDF:** specialized PDF→Markdown (`convertPdfToMarkdown` / `@cognipeer/to-markdown`) is **Node-only** and is
+ *   never loaded in the browser. The markdown package detects PDF, returns `pdf-unsupported-runtime` with clear
+ *   warnings, and uses structured fallback — in this facade that envelope is an **empty PDF stub** (see {@link getCapabilities}).
  * - **DOCX:** the package’s direct bytes → Mammoth → Turndown path is **Node-only**; in-browser, Markdown is
  *   produced via `convertStructuredToMarkdown` on the structured envelope (still Mammoth/OOXML-backed via
  *   `@dragon708/docmind-docx`), with an explanatory warning from the markdown package.
@@ -137,7 +138,8 @@ declare function extractStructuredData(input: BrowserAnalyzeInput, options?: Bro
  */
 declare function extractMarkdown(input: BrowserAnalyzeInput, options?: BrowserExtractMarkdownOptions): Promise<string>;
 /**
- * {@link extractStructuredData} then `renderLlmText` (`@dragon708/docmind-markdown`). For a structured value you already have, that package's `extractLlmContent` matches `renderLlmText` (no file I/O).
+ * {@link extractStructuredData} then `renderLlmText` (`@dragon708/docmind-markdown`). No binary PDF/DOCX Markdown
+ * pipelines run here; for PDF in-browser the envelope is empty and warnings describe the limitation.
  */
 declare function extractLlmContent(input: BrowserAnalyzeInput, options?: BrowserExtractLlmContentOptions): Promise<string>;
 /**
@@ -148,7 +150,7 @@ declare function extractStructuredChunks(input: BrowserAnalyzeInput, options?: B
 /** High-level features the user can ask DocMind for (per input kind and runtime). */
 type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages" | "structured-output"
-/** Browser: {@link extractMarkdown} via `@dragon708/docmind-markdown` `extractMarkdown` + structured fallback (PDF empty; DOCX structured path when binary converter is Node-only). */
+/** Browser: {@link extractMarkdown} via `@dragon708/docmind-markdown` + structured fallback (PDF: no specialized Markdown; DOCX: structured path when bytes→Turndown is Node-only). */
  | "markdown" | "llm-text" | "structured-chunks" | "image-normalization" | "gif-first-frame" | "bmp" | "heic" | "tiff";
 declare function docxIncludeRequested(flags?: AnalyzeDocxIncludeFlags): boolean;
 /** DOCX `word/media` en runtime browser (JSZip; sin pipeline Node). */
@@ -230,9 +232,9 @@ type BrowserExplainAnalysisPlanOptions = Omit<ExplainAnalysisPlanOptions, "inten
 /**
  * Epic 1 — **Capabilities:** detects kind from the same hints as `analyzeFile`, then lists which of
- * `text` | `metadata` | `html` | `ocr` | `pages` | `structured-output` | `markdown` (package `extractMarkdown` + structured fallback; PDF empty here) | `llm-text` | `structured-chunks` (split + Markdown sections)
+ * `text` | `metadata` | `html` | `ocr` | `pages` | `structured-output` | `markdown` (hybrid `extractMarkdown`; **no** Node-only PDF `@cognipeer/to-markdown` — PDF Markdown/LLM/chunks unsupported here) | `llm-text` | `structured-chunks`
  * and image-specific ids (`image-normalization`, `bmp`, `gif-first-frame`, `heic`, `tiff`) apply in the browser (PDF always unsupported for meaningful content).
- * No Mammoth/Tesseract/PDF parsing. For DOCX, {@link GetCapabilitiesReport.docxStructure} / `docxEmbeddedImages` describe v2 opt-in features.
+ * No PDF parser; Mammoth/Tesseract apply to DOCX/images only. For DOCX, {@link GetCapabilitiesReport.docxStructure} / `docxEmbeddedImages` describe v2 opt-in features.
  */
 declare function getCapabilities(input: BrowserAnalyzeInput, options?: GetCapabilitiesOptions): Promise<GetCapabilitiesReport>;
 /**

package/dist/index.js CHANGED Viewed

@@ -591,7 +591,7 @@ var TEXT_META_NOTE = "Plain text has no structured document metadata; extractMet
 var OCR_OFF_NOTE = 'Image OCR may be skipped when `ocr.mode` is "off" in analyze options.';
 var STRUCTURED_OCR_OFF = 'Structured image output uses OCR; when `ocr.mode` is "off", `extractStructuredData` returns an empty envelope with a warning.';
 var UNKNOWN_KIND = "Could not determine file kind from name, MIME, or bytes; all features are reported as unsupported until the kind is known.";
-var MARKDOWN_PDF_BROWSER = "PDF: no parser in-browser \u2014 `@opendataloader/pdf` is not loaded here. extractMarkdown still calls `extractMarkdown` in `@dragon708/docmind-markdown`, which falls back to the empty structured stub (same empty Markdown). extractLlmContent / extractStructuredChunks use the structured envelope only. Use @dragon708/docmind-node for real PDF \u2192 Markdown / LLM text / chunks.";
+var MARKDOWN_PDF_BROWSER = "PDF: no specialized PDF\u2192Markdown in-browser (`@cognipeer/to-markdown` / `convertPdfToMarkdown` are Node-only). `extractMarkdown` still calls `@dragon708/docmind-markdown` `extractMarkdown`, which skips the Node pipeline, warns (`pdf-unsupported-runtime`), and uses structured fallback \u2014 here that envelope is empty, so Markdown stays empty. `extractLlmContent` / `extractStructuredChunks` use the same empty structured envelope with browser PDF warnings. Use @dragon708/docmind-node for real PDF extraction and Markdown / LLM text / chunks.";
 var MARKDOWN_IMAGE_OCR_OFF = 'Same as structured-output: when ocr.mode is "off", structured (and thus Markdown/LLM/chunk exports) are empty aside from warnings.';
 function slot(id, supported, warnings) {
   return warnings?.length ? { id, supported, warnings } : { id, supported };
@@ -770,7 +770,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocr
     const structuredLikeIntent = intent === "extractStructuredData" || intent === "extractMarkdown" || intent === "extractLlmContent" || intent === "extractStructuredChunks";
     limitations = lim(
       BROWSER_PDF_UNSUPPORTED_WARNING,
-      structuredLikeIntent ? "`extractStructuredData` / extractMarkdown / extractLlmContent / extractStructuredChunks only see an empty structured envelope in-browser for PDF; use @dragon708/docmind-node for real PDF extraction and Markdown/LLM/chunk exports." : ""
+      structuredLikeIntent ? "`extractStructuredData` / extractMarkdown / extractLlmContent / extractStructuredChunks only see an empty structured envelope in-browser for PDF (no `@cognipeer/to-markdown`; specialized PDF\u2192Markdown runs on Node via @dragon708/docmind-node)." : ""
     );
     nativeExtraction = {
       willAttempt: false,
@@ -953,7 +953,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocr
         limitations = [
           ...limitations,
           ...lim(
-            "Output: Markdown string from `extractMarkdown` in `@dragon708/docmind-markdown`. PDF in-browser: empty (no `@opendataloader/pdf`). DOCX: structured Markdown path when the binary converter is Node-only."
+            "Output: Markdown string from `extractMarkdown` in `@dragon708/docmind-markdown`. PDF in-browser: empty specialized path (no `@cognipeer/to-markdown`; structured fallback only). DOCX: structured Markdown when bytes\u2192Turndown is Node-only."
           )
         ];
       } else if (intent === "extractLlmContent") {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dragon708/docmind-browser",
-  "version": "1.8.2",
+  "version": "1.8.4",
   "description": "Official DocMind browser facade: analyzeFile and intent APIs (DOCX, image OCR, text). PDF and fs paths use @dragon708/docmind-node.",
   "type": "module",
   "sideEffects": false,
@@ -34,7 +34,7 @@
   "license": "MIT",
   "dependencies": {
     "@dragon708/docmind-docx": "^1.8.0",
-    "@dragon708/docmind-markdown": "^1.1.2",
+    "@dragon708/docmind-markdown": "^1.2.1",
     "@dragon708/docmind-ocr": "^1.1.4",
     "@dragon708/docmind-shared": "^1.2.0"
   },