@dragon708/docmind-node 1.13.0 → 1.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +13 -6
- package/dist/index.js +2 -2
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -55,17 +55,17 @@ interface NodeExtractStructuredDataOptions extends DocMindAnalyzeOptions {
|
|
|
55
55
|
/**
|
|
56
56
|
* {@link extractMarkdown}: inherits {@link NodeExtractStructuredDataOptions}; `markdown` maps to structured-serializer
|
|
57
57
|
* options passed through `extractMarkdown` in `@dragon708/docmind-markdown`. `markdownDocx` / `markdownPdf`
|
|
58
|
-
* configure Mammoth→Turndown and `@
|
|
58
|
+
* configure Mammoth→Turndown and `@cognipeer/to-markdown` (`convertPdfToMarkdown`) respectively — separate from `docx` / `pdf` used only by {@link extractStructuredData}.
|
|
59
59
|
*/
|
|
60
60
|
interface NodeExtractMarkdownOptions extends NodeExtractStructuredDataOptions {
|
|
61
61
|
readonly markdown?: RenderMarkdownOptions;
|
|
62
62
|
/** Forwarded to `extractMarkdown` → `convertDocxToMarkdown` when bytes are DOCX (Node). */
|
|
63
63
|
readonly markdownDocx?: ConvertDocxToMarkdownOptions;
|
|
64
|
-
/** Forwarded to `extractMarkdown` → `convertPdfToMarkdown` when bytes are PDF. */
|
|
64
|
+
/** Forwarded to `extractMarkdown` → `convertPdfToMarkdown` when bytes are PDF (Node; `@cognipeer/to-markdown`). */
|
|
65
65
|
readonly markdownPdf?: ConvertPdfToMarkdownOptions;
|
|
66
66
|
/**
|
|
67
67
|
* Optional: receive `strategy` and merged `warnings` from `@dragon708/docmind-markdown` `extractMarkdown`
|
|
68
|
-
* (e.g. `pdf-
|
|
68
|
+
* (e.g. `pdf-cognipeer-specialized` vs `pdf-structured-fallback`, `docx-mammoth` vs `docx-structured-fallback`) without changing the `Promise<string>` return type.
|
|
69
69
|
*/
|
|
70
70
|
readonly onMarkdownExtract?: (info: {
|
|
71
71
|
strategy: ExtractMarkdownStrategy;
|
|
@@ -141,8 +141,14 @@ declare function extractStructuredData(input: NodeAnalyzeInput, options?: NodeEx
|
|
|
141
141
|
/**
|
|
142
142
|
* End-to-end: {@link extractStructuredData} (for fallback + option parity) plus
|
|
143
143
|
* `extractMarkdown` from `@dragon708/docmind-markdown` on `{ data, filename, mimeType }`.
|
|
144
|
-
*
|
|
145
|
-
*
|
|
144
|
+
*
|
|
145
|
+
* On Node, **DOCX** bytes use **Mammoth → Turndown**; **PDF** bytes use **`convertPdfToMarkdown`**
|
|
146
|
+
* (`@cognipeer/to-markdown`, Node-first, no JVM). The structured envelope is always passed as `structuredFallback`
|
|
147
|
+
* so specialized routes can fall back to structured → Markdown serialization with clear warnings when needed.
|
|
148
|
+
*
|
|
149
|
+
* {@link NodeExtractMarkdownOptions.onMarkdownExtract} receives `strategy` and merged `warnings` from the markdown
|
|
150
|
+
* package (for example `pdf-cognipeer-specialized`, `pdf-structured-fallback`, `docx-structured-fallback`, and
|
|
151
|
+
* `[docmind-markdown:extractMarkdown]` trace lines).
|
|
146
152
|
*
|
|
147
153
|
* @param input - Path, buffer, or {@link NodeAnalyzeInput} accepted by the Node facade.
|
|
148
154
|
* @param options - Structured routing (`pdf` / `docx` / `ocr` / `normalize`), optional `markdown` serializer knobs,
|
|
@@ -151,7 +157,8 @@ declare function extractStructuredData(input: NodeAnalyzeInput, options?: NodeEx
|
|
|
151
157
|
declare function extractMarkdown(input: NodeAnalyzeInput, options?: NodeExtractMarkdownOptions): Promise<string>;
|
|
152
158
|
/**
|
|
153
159
|
* {@link extractStructuredData} then `renderLlmText` in `@dragon708/docmind-markdown` (tagged plain text for LLMs).
|
|
154
|
-
*
|
|
160
|
+
* This path **does not** run the binary PDF/DOCX Markdown pipelines (no `@cognipeer/to-markdown` / Mammoth→Turndown here);
|
|
161
|
+
* it linearizes the structured envelope only. Use {@link extractMarkdown} when you need specialized PDF or DOCX Markdown layout.
|
|
155
162
|
*
|
|
156
163
|
* @param options - Structured routing plus optional `llm` passed through to `renderLlmText`.
|
|
157
164
|
*/
|
package/dist/index.js
CHANGED
|
@@ -724,7 +724,7 @@ function buildNodeCapabilityReport(kind) {
|
|
|
724
724
|
"extractStructuredData: analyzePdf + PDF.js per-page text, outline, links, annotations, merged via normalizeToStructuredResult; respects pdf.ocr like analyzeFile."
|
|
725
725
|
]),
|
|
726
726
|
slot("markdown", true, [
|
|
727
|
-
"extractMarkdown: `@dragon708/docmind-markdown` hybrid \u2014 PDF bytes
|
|
727
|
+
"extractMarkdown: `@dragon708/docmind-markdown` hybrid \u2014 PDF bytes use `@cognipeer/to-markdown` (Node-first, no Java); structured PDF (`extractStructuredData`, respects pdf.ocr) is always built as fallback."
|
|
728
728
|
]),
|
|
729
729
|
slot("llm-text", true, [
|
|
730
730
|
"extractLlmContent: structured envelope \u2192 LLM-oriented plain text (`renderLlmText` in `@dragon708/docmind-markdown`)."
|
|
@@ -1047,7 +1047,7 @@ function buildNodeExplainReport(kind, intent, pdfOcr, plan, docxInclude, ocrSlic
|
|
|
1047
1047
|
if (kind === "pdf") {
|
|
1048
1048
|
nativeExtraction = {
|
|
1049
1049
|
willAttempt: true,
|
|
1050
|
-
description: intent === "extractStructuredData" ? "extractStructuredData: analyzePdf plus PDF.js page rows, outline, links, and annotations, normalized to StructuredDocumentResult." : intent === "extractMarkdown" ? "extractMarkdown: structured PDF extract (same as extractStructuredData) for fallback; primary Markdown
|
|
1050
|
+
description: intent === "extractStructuredData" ? "extractStructuredData: analyzePdf plus PDF.js page rows, outline, links, and annotations, normalized to StructuredDocumentResult." : intent === "extractMarkdown" ? "extractMarkdown: structured PDF extract (same as extractStructuredData) for fallback; primary Markdown on PDF bytes uses `@cognipeer/to-markdown` via `@dragon708/docmind-markdown` (Node-first, no Java)." : `${intent}: same structured PDF pipeline as extractStructuredData, then \`@dragon708/docmind-markdown\` (renderLlmText or chunk sections).`
|
|
1051
1051
|
};
|
|
1052
1052
|
ocr = {
|
|
1053
1053
|
mayUse: pdfOcr !== "off",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dragon708/docmind-node",
|
|
3
|
-
"version": "1.13.
|
|
3
|
+
"version": "1.13.2",
|
|
4
4
|
"description": "Official DocMind Node facade: analyzeFile, intent APIs, PDF/DOCX/OCR, and fs helpers.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
"license": "MIT",
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"@dragon708/docmind-docx": "^1.8.0",
|
|
36
|
-
"@dragon708/docmind-markdown": "^1.2.
|
|
36
|
+
"@dragon708/docmind-markdown": "^1.2.6",
|
|
37
37
|
"@dragon708/docmind-ocr": "^1.1.4",
|
|
38
38
|
"@dragon708/docmind-pdf": "^2.2.0",
|
|
39
39
|
"@dragon708/docmind-shared": "^1.2.0"
|