@dragon708/docmind-browser 0.1.0-alpha.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +14 -6
- package/dist/index.js +91 -3
- package/dist/index.js.map +1 -1
- package/package.json +7 -4
package/dist/index.d.ts
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
|
-
import {
|
|
2
|
-
export { AnalysisAnalyzer, AnalysisResult,
|
|
1
|
+
import { AnalysisResult } from '@dragon708/docmind-shared';
|
|
2
|
+
export { AnalysisAnalyzer, AnalysisResult, DetectFileKindInput, DocxAnalysisCoreResult, FileKind, FileKindMetadata, GenericAnalysisResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult } from '@dragon708/docmind-shared';
|
|
3
|
+
import { OcrOptions } from '@dragon708/docmind-ocr';
|
|
3
4
|
|
|
5
|
+
/** Options for {@link analyzeFile} in the browser entry (no PDF pipeline). */
|
|
6
|
+
interface BrowserAnalyzeOptions {
|
|
7
|
+
readonly signal?: AbortSignal;
|
|
8
|
+
readonly ocr?: OcrOptions;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/** PDF is not processed in the browser; use `@dragon708/docmind-node` on the server. */
|
|
12
|
+
declare const BROWSER_PDF_UNSUPPORTED_WARNING = "PDF text extraction is not available in the browser runtime; use @dragon708/docmind-node on the server.";
|
|
4
13
|
/**
|
|
5
14
|
* Inputs supported by the browser entry (DOM types only — no `fs`, no Node `Buffer` in the public surface).
|
|
6
|
-
* For richer hints (`NamedInput`, etc.), import `analyzeFile` from the `core` package directly.
|
|
7
15
|
*/
|
|
8
16
|
type BrowserAnalyzeInput = File | Blob | ArrayBuffer;
|
|
9
17
|
/**
|
|
10
|
-
*
|
|
18
|
+
* Browser-only router: DOCX, images (OCR), and text. PDF yields `not_implemented` with a clear warning.
|
|
11
19
|
*/
|
|
12
|
-
declare function analyzeFile(input: BrowserAnalyzeInput, options?:
|
|
20
|
+
declare function analyzeFile(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
|
|
13
21
|
|
|
14
|
-
export { type BrowserAnalyzeInput, analyzeFile };
|
|
22
|
+
export { BROWSER_PDF_UNSUPPORTED_WARNING, type BrowserAnalyzeInput, type BrowserAnalyzeOptions, analyzeFile };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,75 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, InvalidInputError, toUint8Array } from '@dragon708/docmind-shared';
|
|
2
|
+
import { analyzeDocx } from '@dragon708/docmind-docx';
|
|
3
|
+
import { ocr } from '@dragon708/docmind-ocr';
|
|
2
4
|
|
|
3
5
|
// src/analyzeFile.ts
|
|
6
|
+
async function analyzeDocxForBrowser(input, signal) {
|
|
7
|
+
if (signal?.aborted) {
|
|
8
|
+
const err = new Error("The operation was aborted");
|
|
9
|
+
err.name = "AbortError";
|
|
10
|
+
throw err;
|
|
11
|
+
}
|
|
12
|
+
const data = await toUint8Array(input);
|
|
13
|
+
if (data.byteLength === 0) {
|
|
14
|
+
return {
|
|
15
|
+
fileKind: "docx",
|
|
16
|
+
analyzer: "docx",
|
|
17
|
+
status: "ok",
|
|
18
|
+
kind: "docx",
|
|
19
|
+
text: "",
|
|
20
|
+
html: "",
|
|
21
|
+
warnings: ["No document bytes were provided for analysis."]
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
const r = await analyzeDocx(data);
|
|
25
|
+
return {
|
|
26
|
+
fileKind: "docx",
|
|
27
|
+
analyzer: "docx",
|
|
28
|
+
status: "ok",
|
|
29
|
+
kind: "docx",
|
|
30
|
+
text: r.text,
|
|
31
|
+
html: r.html,
|
|
32
|
+
warnings: [...r.warnings]
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
async function analyzeImageForBrowser(input, options) {
|
|
36
|
+
if (options?.signal?.aborted) {
|
|
37
|
+
const err = new Error("The operation was aborted");
|
|
38
|
+
err.name = "AbortError";
|
|
39
|
+
throw err;
|
|
40
|
+
}
|
|
41
|
+
const data = await toUint8Array(input);
|
|
42
|
+
if (data.byteLength === 0) {
|
|
43
|
+
return {
|
|
44
|
+
fileKind: "image",
|
|
45
|
+
analyzer: "image",
|
|
46
|
+
status: "ok",
|
|
47
|
+
kind: "image",
|
|
48
|
+
text: "",
|
|
49
|
+
confidence: 0,
|
|
50
|
+
ocrUsed: true,
|
|
51
|
+
warnings: ["No image bytes were provided for analysis."]
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
const ocrOpts = {
|
|
55
|
+
...options?.ocr ?? {},
|
|
56
|
+
signal: options?.ocr?.signal ?? options?.signal
|
|
57
|
+
};
|
|
58
|
+
const r = await ocr(data, ocrOpts);
|
|
59
|
+
return {
|
|
60
|
+
fileKind: "image",
|
|
61
|
+
analyzer: "image",
|
|
62
|
+
status: "ok",
|
|
63
|
+
kind: "image",
|
|
64
|
+
text: r.text,
|
|
65
|
+
confidence: r.confidence,
|
|
66
|
+
ocrUsed: r.ocrUsed,
|
|
67
|
+
warnings: []
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// src/analyzeFile.ts
|
|
72
|
+
var BROWSER_PDF_UNSUPPORTED_WARNING = "PDF text extraction is not available in the browser runtime; use @dragon708/docmind-node on the server.";
|
|
4
73
|
function assertBrowserInput(input) {
|
|
5
74
|
const ok = input instanceof File || input instanceof Blob || input instanceof ArrayBuffer;
|
|
6
75
|
if (!ok) {
|
|
@@ -8,10 +77,29 @@ function assertBrowserInput(input) {
|
|
|
8
77
|
}
|
|
9
78
|
}
|
|
10
79
|
async function analyzeFile(input, options) {
|
|
80
|
+
if (options?.signal?.aborted) {
|
|
81
|
+
const err = new Error("The operation was aborted");
|
|
82
|
+
err.name = "AbortError";
|
|
83
|
+
throw err;
|
|
84
|
+
}
|
|
11
85
|
assertBrowserInput(input);
|
|
12
|
-
|
|
86
|
+
assertValidAnalyzeFileInput(input);
|
|
87
|
+
const fileKind = detectFileKind(input);
|
|
88
|
+
const bytesInput = input;
|
|
89
|
+
switch (fileKind) {
|
|
90
|
+
case "pdf":
|
|
91
|
+
return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
|
|
92
|
+
case "docx":
|
|
93
|
+
return analyzeDocxForBrowser(bytesInput, options?.signal);
|
|
94
|
+
case "image":
|
|
95
|
+
return analyzeImageForBrowser(bytesInput, options);
|
|
96
|
+
case "text":
|
|
97
|
+
return analyzeText(bytesInput, { signal: options?.signal });
|
|
98
|
+
default:
|
|
99
|
+
return notImplementedResult(fileKind, "none", [UNKNOWN_FORMAT_WARNING]);
|
|
100
|
+
}
|
|
13
101
|
}
|
|
14
102
|
|
|
15
|
-
export { analyzeFile };
|
|
103
|
+
export { BROWSER_PDF_UNSUPPORTED_WARNING, analyzeFile };
|
|
16
104
|
//# sourceMappingURL=index.js.map
|
|
17
105
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/analyzeFile.ts"],"names":["
|
|
1
|
+
{"version":3,"sources":["../src/analyzers/docx.ts","../src/analyzers/image.ts","../src/analyzeFile.ts"],"names":["extractDocx","toUint8Array"],"mappings":";;;;;AAOA,eAAsB,qBAAA,CACpB,OACA,MAAA,EACyB;AACzB,EAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,YAAA,CAAa,KAAK,CAAA;AACrC,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,QAAA,EAAU,MAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,MAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,KAC5D;AAAA,EACF;AAEA,EAAA,MAAM,CAAA,GAAI,MAAMA,WAAA,CAAY,IAAI,CAAA;AAChC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,MAAA;AAAA,IACV,QAAA,EAAU,MAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,MAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,QAAA,EAAU,CAAC,GAAG,CAAA,CAAE,QAAQ;AAAA,GAC1B;AACF;AChCA,eAAsB,sBAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAMC,YAAAA,CAAa,KAAK,CAAA;AACrC,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,OAAA;AAAA,MACV,QAAA,EAAU,OAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,OAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,UAAA,EAAY,CAAA;AAAA,MACZ,OAAA,EAAS,IAAA;AAAA,MACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,KACzD;AAAA,EACF;AAEA,EAAA,MAAM,OAAA,GAAU;AAAA,IACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,IACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU,OAAA,EAAS;AAAA,GAC3C;AAEA,EAAA,MAAM,CAAA,GAAI,MAAM,GAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,OAAA;AAAA,IACV,QAAA,EAAU,OAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,OAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,YAAY,CAAA,CAAE,UAAA;AAAA,IACd,SAAS,CAAA,CAAE,OAAA;AAAA,IACX,UAAU;AAAC,GACb;AACF;;;ACjCO,IAAM,+BAAA,GACX;AAOF,SAAS,mBAAmB,KAAA,EAAsD;AAChF,EAAA,MAAM,EAAA,GACJ,KAAA,YAAiB,IAAA,IACjB,KAAA,YAAiB,QACjB,KAAA,YAAiB,WAAA;AACnB,EAAA,IAAI,CAAC,EAAA,EAAI;AACP,IAAA,MAAM,IAAI,kBAAkB,wCAAwC,CAAA;AAAA,EACtE;AACF;AAKA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,kBAAA,CAAmB,KAAK,CAAA;AACxB,EAAA,2BAAA,CAA4B,KAAK,CAAA;AAEjC,EAAA,MAAM,QAAA,GAAW,eAAe,KAA4B,CAAA;AAE5D,EAAA,MAAM,UAAA,GAAa,KAAA;AAEnB,EAAA,QAAQ,QAAA;AAAU,IAChB,KAAK,KAAA;AACH,MAAA,OAAO,oBAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA;AACH,MAAA,OAAO,qBAAA,CAAsB,UAAA,EAAY,OAAA,EAAS,MAAM,CAAA;AAAA,IAC1D,KAAK,OAAA;AACH,MAAA,OAAO,sBAAA,CAAuB,YAAY,OAAO,CAAA;AAAA,IACnD,KAAK,MAAA;AACH,MAAA,OAAO,YAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,OAAA,EAAS,QAAQ,CAAA;AAAA,IAC5D;AACE,MAAA,OAAO,oBAAA,CAAqB,QAAA,EAAU,MAAA,EAAQ,CAAC,sBAAsB,CAAC,CAAA;AAAA;AAE5E","file":"index.js","sourcesContent":["import { analyzeDocx as extractDocx } from \"@dragon708/docmind-docx\";\nimport type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport { toUint8Array } from \"@dragon708/docmind-shared\";\n\n/**\n * DOCX → `@dragon708/docmind-docx` (browser-safe: Mammoth + JSZip).\n */\nexport async function analyzeDocxForBrowser(\n input: FileLikeInput,\n signal?: AbortSignal,\n): Promise<AnalysisResult> {\n if (signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await toUint8Array(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n\n const r = await extractDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: r.html,\n warnings: [...r.warnings],\n };\n}\n","import { ocr } from \"@dragon708/docmind-ocr\";\nimport type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport { toUint8Array } from \"@dragon708/docmind-shared\";\nimport type { BrowserAnalyzeOptions } from \"../browserAnalyzeOptions.js\";\n\n/**\n * Image → `@dragon708/docmind-ocr` (Tesseract in WASM / browser).\n */\nexport async function analyzeImageForBrowser(\n input: FileLikeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await toUint8Array(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? options?.signal,\n };\n\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n}\n","import type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\r\nimport {\r\n analyzeText,\r\n assertValidAnalyzeFileInput,\r\n detectFileKind,\r\n InvalidInputError,\r\n notImplementedResult,\r\n UNKNOWN_FORMAT_WARNING,\r\n} from \"@dragon708/docmind-shared\";\r\nimport type { DetectFileKindInput } from \"@dragon708/docmind-shared\";\r\nimport type { BrowserAnalyzeOptions } from \"./browserAnalyzeOptions.js\";\r\nimport { analyzeDocxForBrowser } from \"./analyzers/docx.js\";\r\nimport { analyzeImageForBrowser } from \"./analyzers/image.js\";\r\n\r\n/** PDF is not processed in the browser; use `@dragon708/docmind-node` on the server. */\r\nexport const BROWSER_PDF_UNSUPPORTED_WARNING =\r\n \"PDF text extraction is not available in the browser runtime; use @dragon708/docmind-node on the server.\";\r\n\r\n/**\r\n * Inputs supported by the browser entry (DOM types only — no `fs`, no Node `Buffer` in the public surface).\r\n */\r\nexport type BrowserAnalyzeInput = File | Blob | ArrayBuffer;\r\n\r\nfunction assertBrowserInput(input: unknown): asserts input is BrowserAnalyzeInput {\r\n const ok =\r\n input instanceof File ||\r\n input instanceof Blob ||\r\n input instanceof ArrayBuffer;\r\n if (!ok) {\r\n throw new InvalidInputError(\"Expected a File, Blob, or ArrayBuffer.\");\r\n }\r\n}\r\n\r\n/**\r\n * Browser-only router: DOCX, images (OCR), and text. PDF yields `not_implemented` with a clear warning.\r\n */\r\nexport async function analyzeFile(\r\n input: BrowserAnalyzeInput,\r\n options?: BrowserAnalyzeOptions,\r\n): Promise<AnalysisResult> {\r\n if (options?.signal?.aborted) {\r\n const err = new Error(\"The operation was aborted\");\r\n err.name = \"AbortError\";\r\n throw err;\r\n }\r\n\r\n assertBrowserInput(input);\r\n assertValidAnalyzeFileInput(input);\r\n\r\n const fileKind = detectFileKind(input as DetectFileKindInput);\r\n\r\n const bytesInput = input as FileLikeInput;\r\n\r\n switch (fileKind) {\r\n case \"pdf\":\r\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\r\n case \"docx\":\r\n return analyzeDocxForBrowser(bytesInput, options?.signal);\r\n case \"image\":\r\n return analyzeImageForBrowser(bytesInput, options);\r\n case \"text\":\r\n return analyzeText(bytesInput, { signal: options?.signal });\r\n default:\r\n return notImplementedResult(fileKind, \"none\", [UNKNOWN_FORMAT_WARNING]);\r\n }\r\n}\r\n"]}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dragon708/docmind-browser",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "Browser
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Browser DocMind entry: DOCX, OCR, text (no PDF; use @dragon708/docmind-node on the server).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
7
7
|
"main": "./dist/index.js",
|
|
@@ -32,11 +32,14 @@
|
|
|
32
32
|
],
|
|
33
33
|
"license": "MIT",
|
|
34
34
|
"dependencies": {
|
|
35
|
-
"@dragon708/docmind-
|
|
35
|
+
"@dragon708/docmind-docx": "^1.0.0",
|
|
36
|
+
"@dragon708/docmind-ocr": "^1.0.0",
|
|
37
|
+
"@dragon708/docmind-shared": "^1.0.0"
|
|
36
38
|
},
|
|
37
39
|
"devDependencies": {
|
|
38
40
|
"@types/node": "^20.19.37",
|
|
39
41
|
"tsup": "^8.5.1",
|
|
40
|
-
"typescript": "^5.9.3"
|
|
42
|
+
"typescript": "^5.9.3",
|
|
43
|
+
"vitest": "^1.6.1"
|
|
41
44
|
}
|
|
42
45
|
}
|