@dragon708/docmind-browser 1.8.3 → 1.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +11 -9
- package/dist/index.js +3 -3
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -45,9 +45,9 @@ type BrowserExtractStructuredDataOptions = BrowserAnalyzeOptions & {
|
|
|
45
45
|
};
|
|
46
46
|
/**
|
|
47
47
|
* {@link extractMarkdown}: structured options plus `markdown` (passed through to `extractMarkdown` in
|
|
48
|
-
* `@dragon708/docmind-markdown
|
|
49
|
-
*
|
|
50
|
-
*
|
|
48
|
+
* `@dragon708/docmind-markdown`). PDF: `convertPdfToMarkdown` / `@cognipeer/to-markdown` never run in-browser — only
|
|
49
|
+
* structured fallback (empty PDF stub here). DOCX bytes→Mammoth→Turndown is Node-only; the browser still gets
|
|
50
|
+
* DOCX Markdown via structured fallback from {@link extractStructuredData}.
|
|
51
51
|
*/
|
|
52
52
|
interface BrowserExtractMarkdownOptions extends BrowserExtractStructuredDataOptions {
|
|
53
53
|
readonly markdown?: RenderMarkdownOptions;
|
|
@@ -126,8 +126,9 @@ declare function extractStructuredData(input: BrowserAnalyzeInput, options?: Bro
|
|
|
126
126
|
* {@link extractStructuredData} for a full structured envelope, then `extractMarkdown` from
|
|
127
127
|
* `@dragon708/docmind-markdown` on `{ data, filename?, mimeType? }` with that result as `structuredFallback`.
|
|
128
128
|
*
|
|
129
|
-
* - **PDF:**
|
|
130
|
-
*
|
|
129
|
+
* - **PDF:** specialized PDF→Markdown (`convertPdfToMarkdown` / `@cognipeer/to-markdown`) is **Node-only** and is
|
|
130
|
+
* never loaded in the browser. The markdown package detects PDF, returns `pdf-unsupported-runtime` with clear
|
|
131
|
+
* warnings, and uses structured fallback — in this facade that envelope is an **empty PDF stub** (see {@link getCapabilities}).
|
|
131
132
|
* - **DOCX:** the package’s direct bytes → Mammoth → Turndown path is **Node-only**; in-browser, Markdown is
|
|
132
133
|
* produced via `convertStructuredToMarkdown` on the structured envelope (still Mammoth/OOXML-backed via
|
|
133
134
|
* `@dragon708/docmind-docx`), with an explanatory warning from the markdown package.
|
|
@@ -137,7 +138,8 @@ declare function extractStructuredData(input: BrowserAnalyzeInput, options?: Bro
|
|
|
137
138
|
*/
|
|
138
139
|
declare function extractMarkdown(input: BrowserAnalyzeInput, options?: BrowserExtractMarkdownOptions): Promise<string>;
|
|
139
140
|
/**
|
|
140
|
-
* {@link extractStructuredData} then `renderLlmText` (`@dragon708/docmind-markdown`).
|
|
141
|
+
* {@link extractStructuredData} then `renderLlmText` (`@dragon708/docmind-markdown`). No binary PDF/DOCX Markdown
|
|
142
|
+
* pipelines run here; for PDF in-browser the envelope is empty and warnings describe the limitation.
|
|
141
143
|
*/
|
|
142
144
|
declare function extractLlmContent(input: BrowserAnalyzeInput, options?: BrowserExtractLlmContentOptions): Promise<string>;
|
|
143
145
|
/**
|
|
@@ -148,7 +150,7 @@ declare function extractStructuredChunks(input: BrowserAnalyzeInput, options?: B
|
|
|
148
150
|
|
|
149
151
|
/** High-level features the user can ask DocMind for (per input kind and runtime). */
|
|
150
152
|
type PublicCapabilityId = "text" | "metadata" | "html" | "ocr" | "pages" | "structured-output"
|
|
151
|
-
/** Browser: {@link extractMarkdown} via `@dragon708/docmind-markdown`
|
|
153
|
+
/** Browser: {@link extractMarkdown} via `@dragon708/docmind-markdown` + structured fallback (PDF: no specialized Markdown; DOCX: structured path when bytes→Turndown is Node-only). */
|
|
152
154
|
| "markdown" | "llm-text" | "structured-chunks" | "image-normalization" | "gif-first-frame" | "bmp" | "heic" | "tiff";
|
|
153
155
|
declare function docxIncludeRequested(flags?: AnalyzeDocxIncludeFlags): boolean;
|
|
154
156
|
/** DOCX `word/media` en runtime browser (JSZip; sin pipeline Node). */
|
|
@@ -230,9 +232,9 @@ type BrowserExplainAnalysisPlanOptions = Omit<ExplainAnalysisPlanOptions, "inten
|
|
|
230
232
|
|
|
231
233
|
/**
|
|
232
234
|
* Epic 1 — **Capabilities:** detects kind from the same hints as `analyzeFile`, then lists which of
|
|
233
|
-
* `text` | `metadata` | `html` | `ocr` | `pages` | `structured-output` | `markdown` (
|
|
235
|
+
* `text` | `metadata` | `html` | `ocr` | `pages` | `structured-output` | `markdown` (hybrid `extractMarkdown`; **no** Node-only PDF `@cognipeer/to-markdown` — PDF Markdown/LLM/chunks unsupported here) | `llm-text` | `structured-chunks`
|
|
234
236
|
* and image-specific ids (`image-normalization`, `bmp`, `gif-first-frame`, `heic`, `tiff`) apply in the browser (PDF always unsupported for meaningful content).
|
|
235
|
-
* No Mammoth/Tesseract/
|
|
237
|
+
* No PDF parser; Mammoth/Tesseract apply to DOCX/images only. For DOCX, {@link GetCapabilitiesReport.docxStructure} / `docxEmbeddedImages` describe v2 opt-in features.
|
|
236
238
|
*/
|
|
237
239
|
declare function getCapabilities(input: BrowserAnalyzeInput, options?: GetCapabilitiesOptions): Promise<GetCapabilitiesReport>;
|
|
238
240
|
/**
|
package/dist/index.js
CHANGED
|
@@ -591,7 +591,7 @@ var TEXT_META_NOTE = "Plain text has no structured document metadata; extractMet
|
|
|
591
591
|
var OCR_OFF_NOTE = 'Image OCR may be skipped when `ocr.mode` is "off" in analyze options.';
|
|
592
592
|
var STRUCTURED_OCR_OFF = 'Structured image output uses OCR; when `ocr.mode` is "off", `extractStructuredData` returns an empty envelope with a warning.';
|
|
593
593
|
var UNKNOWN_KIND = "Could not determine file kind from name, MIME, or bytes; all features are reported as unsupported until the kind is known.";
|
|
594
|
-
var MARKDOWN_PDF_BROWSER = "PDF: no
|
|
594
|
+
var MARKDOWN_PDF_BROWSER = "PDF: no specialized PDF\u2192Markdown in-browser (`@cognipeer/to-markdown` / `convertPdfToMarkdown` are Node-only). `extractMarkdown` still calls `@dragon708/docmind-markdown` `extractMarkdown`, which skips the Node pipeline, warns (`pdf-unsupported-runtime`), and uses structured fallback \u2014 here that envelope is empty, so Markdown stays empty. `extractLlmContent` / `extractStructuredChunks` use the same empty structured envelope with browser PDF warnings. Use @dragon708/docmind-node for real PDF extraction and Markdown / LLM text / chunks.";
|
|
595
595
|
var MARKDOWN_IMAGE_OCR_OFF = 'Same as structured-output: when ocr.mode is "off", structured (and thus Markdown/LLM/chunk exports) are empty aside from warnings.';
|
|
596
596
|
function slot(id, supported, warnings) {
|
|
597
597
|
return warnings?.length ? { id, supported, warnings } : { id, supported };
|
|
@@ -770,7 +770,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocr
|
|
|
770
770
|
const structuredLikeIntent = intent === "extractStructuredData" || intent === "extractMarkdown" || intent === "extractLlmContent" || intent === "extractStructuredChunks";
|
|
771
771
|
limitations = lim(
|
|
772
772
|
BROWSER_PDF_UNSUPPORTED_WARNING,
|
|
773
|
-
structuredLikeIntent ? "`extractStructuredData` / extractMarkdown / extractLlmContent / extractStructuredChunks only see an empty structured envelope in-browser for PDF
|
|
773
|
+
structuredLikeIntent ? "`extractStructuredData` / extractMarkdown / extractLlmContent / extractStructuredChunks only see an empty structured envelope in-browser for PDF (no `@cognipeer/to-markdown`; specialized PDF\u2192Markdown runs on Node via @dragon708/docmind-node)." : ""
|
|
774
774
|
);
|
|
775
775
|
nativeExtraction = {
|
|
776
776
|
willAttempt: false,
|
|
@@ -953,7 +953,7 @@ function buildBrowserExplainReport(kind, intent, ocrMode, plan, docxInclude, ocr
|
|
|
953
953
|
limitations = [
|
|
954
954
|
...limitations,
|
|
955
955
|
...lim(
|
|
956
|
-
"Output: Markdown string from `extractMarkdown` in `@dragon708/docmind-markdown`. PDF in-browser: empty (no `@
|
|
956
|
+
"Output: Markdown string from `extractMarkdown` in `@dragon708/docmind-markdown`. PDF in-browser: empty specialized path (no `@cognipeer/to-markdown`; structured fallback only). DOCX: structured Markdown when bytes\u2192Turndown is Node-only."
|
|
957
957
|
)
|
|
958
958
|
];
|
|
959
959
|
} else if (intent === "extractLlmContent") {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dragon708/docmind-browser",
|
|
3
|
-
"version": "1.8.
|
|
3
|
+
"version": "1.8.4",
|
|
4
4
|
"description": "Official DocMind browser facade: analyzeFile and intent APIs (DOCX, image OCR, text). PDF and fs paths use @dragon708/docmind-node.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"license": "MIT",
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@dragon708/docmind-docx": "^1.8.0",
|
|
37
|
-
"@dragon708/docmind-markdown": "^1.1
|
|
37
|
+
"@dragon708/docmind-markdown": "^1.2.1",
|
|
38
38
|
"@dragon708/docmind-ocr": "^1.1.4",
|
|
39
39
|
"@dragon708/docmind-shared": "^1.2.0"
|
|
40
40
|
},
|