@dragon708/docmind-browser 0.1.0-alpha.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,14 +1,43 @@
1
- import { AnalyzeOptions, AnalysisResult } from '@dragon708/docmind-core';
2
- export { AnalysisAnalyzer, AnalysisResult, AnalyzeOptions, DetectFileKindInput, FileKind, FileKindMetadata, GenericAnalysisResult, TextAnalysisResult } from '@dragon708/docmind-core';
1
+ import { AnalysisResult } from '@dragon708/docmind-shared';
2
+ export { AnalysisAnalyzer, AnalysisResult, DetectFileKindInput, DocxAnalysisCoreResult, FileKind, FileKindMetadata, GenericAnalysisResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult, detectFileKind } from '@dragon708/docmind-shared';
3
+ import { OcrOptions } from '@dragon708/docmind-ocr';
3
4
 
5
+ /** Options for {@link analyzeFile} in the browser entry (no PDF pipeline). */
6
+ interface BrowserAnalyzeOptions {
7
+ readonly signal?: AbortSignal;
8
+ readonly ocr?: OcrOptions;
9
+ }
10
+
11
+ /** PDF is not processed in the browser; use `@dragon708/docmind-node` on the server. */
12
+ declare const BROWSER_PDF_UNSUPPORTED_WARNING = "PDF text extraction is not available in the browser runtime; use @dragon708/docmind-node on the server.";
4
13
  /**
5
14
  * Inputs supported by the browser entry (DOM types only — no `fs`, no Node `Buffer` in the public surface).
6
- * For richer hints (`NamedInput`, etc.), import `analyzeFile` from the `core` package directly.
7
15
  */
8
16
  type BrowserAnalyzeInput = File | Blob | ArrayBuffer;
9
17
  /**
10
- * Thin wrapper around `@dragon708/docmind-core`’s `analyzeFile` for browser runtimes.
18
+ * Browser-only router: DOCX, images (OCR), and text. PDF yields `not_implemented` with a clear warning.
19
+ */
20
+ declare function analyzeFile(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
21
+
22
+ /**
23
+ * Text only: DOCX → `extractTextFromDocx`; imagen → `ocr`; texto → `analyzeText`.
24
+ * PDF no está soportado en el navegador (mismo aviso que `analyzeFile`).
25
+ */
26
+ declare function extractText(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
27
+ /**
28
+ * Metadatos: en el navegador no hay pipeline PDF ni metadatos DOCX dedicados;
29
+ * DOCX/imagen con avisos; texto → `analyzeText`.
30
+ */
31
+ declare function extractMetadata(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
32
+ /**
33
+ * HTML: DOCX → `extractTextFromDocx` + `convertDocxToHtml`; texto → `<pre>`;
34
+ * PDF/imagen no aplican en browser como HTML rico.
35
+ */
36
+ declare function convertToHtml(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
37
+ /**
38
+ * OCR: imagen → `ocr`; DOCX → `analyzeDocx` con aviso (sin OCR); texto → `analyzeText`.
39
+ * PDF no soportado en browser.
11
40
  */
12
- declare function analyzeFile(input: BrowserAnalyzeInput, options?: AnalyzeOptions): Promise<AnalysisResult>;
41
+ declare function runOcr(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
13
42
 
14
- export { type BrowserAnalyzeInput, analyzeFile };
43
+ export { BROWSER_PDF_UNSUPPORTED_WARNING, type BrowserAnalyzeInput, type BrowserAnalyzeOptions, analyzeFile, convertToHtml, extractMetadata, extractText, runOcr };
package/dist/index.js CHANGED
@@ -1,6 +1,76 @@
1
- import { analyzeFile as analyzeFile$1, InvalidInputError } from '@dragon708/docmind-core';
1
+ import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, InvalidInputError } from '@dragon708/docmind-shared';
2
+ export { detectFileKind } from '@dragon708/docmind-shared';
3
+ import { extractTextFromDocx, convertDocxToHtml, analyzeDocx } from '@dragon708/docmind-docx';
4
+ import { ocr } from '@dragon708/docmind-ocr';
2
5
 
3
6
  // src/analyzeFile.ts
7
+ async function analyzeDocxForBrowser(input, signal) {
8
+ if (signal?.aborted) {
9
+ const err = new Error("The operation was aborted");
10
+ err.name = "AbortError";
11
+ throw err;
12
+ }
13
+ const data = await toUint8Array(input);
14
+ if (data.byteLength === 0) {
15
+ return {
16
+ fileKind: "docx",
17
+ analyzer: "docx",
18
+ status: "ok",
19
+ kind: "docx",
20
+ text: "",
21
+ html: "",
22
+ warnings: ["No document bytes were provided for analysis."]
23
+ };
24
+ }
25
+ const r = await analyzeDocx(data);
26
+ return {
27
+ fileKind: "docx",
28
+ analyzer: "docx",
29
+ status: "ok",
30
+ kind: "docx",
31
+ text: r.text,
32
+ html: r.html,
33
+ warnings: [...r.warnings]
34
+ };
35
+ }
36
+ async function analyzeImageForBrowser(input, options) {
37
+ if (options?.signal?.aborted) {
38
+ const err = new Error("The operation was aborted");
39
+ err.name = "AbortError";
40
+ throw err;
41
+ }
42
+ const data = await toUint8Array(input);
43
+ if (data.byteLength === 0) {
44
+ return {
45
+ fileKind: "image",
46
+ analyzer: "image",
47
+ status: "ok",
48
+ kind: "image",
49
+ text: "",
50
+ confidence: 0,
51
+ ocrUsed: true,
52
+ warnings: ["No image bytes were provided for analysis."]
53
+ };
54
+ }
55
+ const ocrOpts = {
56
+ ...options?.ocr ?? {},
57
+ signal: options?.ocr?.signal ?? options?.signal
58
+ };
59
+ const r = await ocr(data, ocrOpts);
60
+ return {
61
+ fileKind: "image",
62
+ analyzer: "image",
63
+ status: "ok",
64
+ kind: "image",
65
+ text: r.text,
66
+ confidence: r.confidence,
67
+ ocrUsed: r.ocrUsed,
68
+ warnings: []
69
+ };
70
+ }
71
+
72
+ // src/analyzeFile.ts
73
+ var BROWSER_PDF_UNSUPPORTED_WARNING = "PDF text extraction is not available in the browser runtime; use @dragon708/docmind-node on the server.";
4
74
  function assertBrowserInput(input) {
5
75
  const ok = input instanceof File || input instanceof Blob || input instanceof ArrayBuffer;
6
76
  if (!ok) {
@@ -8,10 +78,292 @@ function assertBrowserInput(input) {
8
78
  }
9
79
  }
10
80
  async function analyzeFile(input, options) {
81
+ if (options?.signal?.aborted) {
82
+ const err = new Error("The operation was aborted");
83
+ err.name = "AbortError";
84
+ throw err;
85
+ }
11
86
  assertBrowserInput(input);
12
- return analyzeFile$1(input, options);
87
+ assertValidAnalyzeFileInput(input);
88
+ const fileKind = detectFileKind(input);
89
+ const bytesInput = input;
90
+ switch (fileKind) {
91
+ case "pdf":
92
+ return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
93
+ case "docx":
94
+ return analyzeDocxForBrowser(bytesInput, options?.signal);
95
+ case "image":
96
+ return analyzeImageForBrowser(bytesInput, options);
97
+ case "text":
98
+ return analyzeText(bytesInput, { signal: options?.signal });
99
+ default:
100
+ return notImplementedResult(fileKind, "none", [UNKNOWN_FORMAT_WARNING]);
101
+ }
102
+ }
103
+ function assertBrowserInput2(input) {
104
+ const ok = input instanceof File || input instanceof Blob || input instanceof ArrayBuffer;
105
+ if (!ok) {
106
+ throw new InvalidInputError("Expected a File, Blob, or ArrayBuffer.");
107
+ }
108
+ }
109
+ function throwIfAborted(signal) {
110
+ if (signal?.aborted) {
111
+ const err = new Error("The operation was aborted");
112
+ err.name = "AbortError";
113
+ throw err;
114
+ }
115
+ }
116
+ function escapeHtmlMinimal(s) {
117
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
118
+ }
119
+ var DOCX_METADATA_STUB = "Structured document metadata for DOCX is not exposed as a separate API; use extractText or analyzeFile.";
120
+ var IMAGE_METADATA_NOTE = "Raster images have no document metadata bundle in this API.";
121
+ async function kindOf(input) {
122
+ assertBrowserInput2(input);
123
+ assertValidAnalyzeFileInput(input);
124
+ return input;
125
+ }
126
+ async function extractText(input, options) {
127
+ throwIfAborted(options?.signal);
128
+ const resolved = await kindOf(input);
129
+ const kind = detectFileKind(resolved);
130
+ const bytesInput = input;
131
+ const signal = options?.signal;
132
+ switch (kind) {
133
+ case "pdf":
134
+ return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
135
+ case "docx": {
136
+ const data = await toUint8Array(bytesInput);
137
+ if (data.byteLength === 0) {
138
+ return {
139
+ fileKind: "docx",
140
+ analyzer: "docx",
141
+ status: "ok",
142
+ kind: "docx",
143
+ text: "",
144
+ html: "",
145
+ warnings: ["No document bytes were provided for analysis."]
146
+ };
147
+ }
148
+ const r = await extractTextFromDocx(data);
149
+ return {
150
+ fileKind: "docx",
151
+ analyzer: "docx",
152
+ status: "ok",
153
+ kind: "docx",
154
+ text: r.text,
155
+ html: "",
156
+ warnings: r.warnings
157
+ };
158
+ }
159
+ case "image": {
160
+ const data = await toUint8Array(bytesInput);
161
+ if (data.byteLength === 0) {
162
+ return {
163
+ fileKind: "image",
164
+ analyzer: "image",
165
+ status: "ok",
166
+ kind: "image",
167
+ text: "",
168
+ confidence: 0,
169
+ ocrUsed: true,
170
+ warnings: ["No image bytes were provided for analysis."]
171
+ };
172
+ }
173
+ const ocrOpts = {
174
+ ...options?.ocr ?? {},
175
+ signal: options?.ocr?.signal ?? signal
176
+ };
177
+ const r = await ocr(data, ocrOpts);
178
+ return {
179
+ fileKind: "image",
180
+ analyzer: "image",
181
+ status: "ok",
182
+ kind: "image",
183
+ text: r.text,
184
+ confidence: r.confidence,
185
+ ocrUsed: r.ocrUsed,
186
+ warnings: []
187
+ };
188
+ }
189
+ case "text":
190
+ return analyzeText(bytesInput, { signal });
191
+ default:
192
+ return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
193
+ }
194
+ }
195
+ async function extractMetadata(input, options) {
196
+ throwIfAborted(options?.signal);
197
+ const resolved = await kindOf(input);
198
+ const kind = detectFileKind(resolved);
199
+ const bytesInput = input;
200
+ const signal = options?.signal;
201
+ switch (kind) {
202
+ case "pdf":
203
+ return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
204
+ case "docx":
205
+ return {
206
+ fileKind: "docx",
207
+ analyzer: "docx",
208
+ status: "ok",
209
+ kind: "docx",
210
+ text: "",
211
+ html: "",
212
+ warnings: [DOCX_METADATA_STUB]
213
+ };
214
+ case "image":
215
+ return {
216
+ fileKind: "image",
217
+ analyzer: "image",
218
+ status: "ok",
219
+ kind: "image",
220
+ text: "",
221
+ confidence: 0,
222
+ ocrUsed: true,
223
+ warnings: [IMAGE_METADATA_NOTE]
224
+ };
225
+ case "text":
226
+ return analyzeText(bytesInput, { signal });
227
+ default:
228
+ return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
229
+ }
230
+ }
231
+ async function convertToHtml(input, options) {
232
+ throwIfAborted(options?.signal);
233
+ const resolved = await kindOf(input);
234
+ const kind = detectFileKind(resolved);
235
+ const bytesInput = input;
236
+ const signal = options?.signal;
237
+ switch (kind) {
238
+ case "pdf":
239
+ return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
240
+ case "docx": {
241
+ const data = await toUint8Array(bytesInput);
242
+ if (data.byteLength === 0) {
243
+ return {
244
+ fileKind: "docx",
245
+ analyzer: "docx",
246
+ status: "ok",
247
+ kind: "docx",
248
+ text: "",
249
+ html: "",
250
+ warnings: ["No document bytes were provided for analysis."]
251
+ };
252
+ }
253
+ const [textPart, htmlPart] = await Promise.all([
254
+ extractTextFromDocx(data),
255
+ convertDocxToHtml(data)
256
+ ]);
257
+ return {
258
+ fileKind: "docx",
259
+ analyzer: "docx",
260
+ status: "ok",
261
+ kind: "docx",
262
+ text: textPart.text,
263
+ html: htmlPart.html,
264
+ warnings: [...textPart.warnings, ...htmlPart.warnings]
265
+ };
266
+ }
267
+ case "text": {
268
+ const t = await analyzeText(bytesInput, { signal });
269
+ const html = `<pre>${escapeHtmlMinimal(t.text)}</pre>`;
270
+ return {
271
+ ...t,
272
+ html,
273
+ warnings: [
274
+ ...t.warnings,
275
+ "HTML for plain text is a <pre> wrapper around decoded UTF-8 content."
276
+ ]
277
+ };
278
+ }
279
+ case "image":
280
+ return {
281
+ fileKind: "image",
282
+ analyzer: "image",
283
+ status: "ok",
284
+ kind: "image",
285
+ text: "",
286
+ confidence: 0,
287
+ ocrUsed: true,
288
+ warnings: ["No HTML representation for raster images; use extractText / runOcr."]
289
+ };
290
+ default:
291
+ return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
292
+ }
293
+ }
294
+ async function runOcr(input, options) {
295
+ throwIfAborted(options?.signal);
296
+ const resolved = await kindOf(input);
297
+ const kind = detectFileKind(resolved);
298
+ const bytesInput = input;
299
+ const signal = options?.signal;
300
+ switch (kind) {
301
+ case "pdf":
302
+ return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
303
+ case "image": {
304
+ const data = await toUint8Array(bytesInput);
305
+ if (data.byteLength === 0) {
306
+ return {
307
+ fileKind: "image",
308
+ analyzer: "image",
309
+ status: "ok",
310
+ kind: "image",
311
+ text: "",
312
+ confidence: 0,
313
+ ocrUsed: true,
314
+ warnings: ["No image bytes were provided for analysis."]
315
+ };
316
+ }
317
+ const ocrOpts = {
318
+ ...options?.ocr ?? {},
319
+ signal: options?.ocr?.signal ?? signal
320
+ };
321
+ const r = await ocr(data, ocrOpts);
322
+ return {
323
+ fileKind: "image",
324
+ analyzer: "image",
325
+ status: "ok",
326
+ kind: "image",
327
+ text: r.text,
328
+ confidence: r.confidence,
329
+ ocrUsed: r.ocrUsed,
330
+ warnings: []
331
+ };
332
+ }
333
+ case "docx": {
334
+ const data = await toUint8Array(bytesInput);
335
+ if (data.byteLength === 0) {
336
+ return {
337
+ fileKind: "docx",
338
+ analyzer: "docx",
339
+ status: "ok",
340
+ kind: "docx",
341
+ text: "",
342
+ html: "",
343
+ warnings: ["No document bytes were provided for analysis."]
344
+ };
345
+ }
346
+ const r = await analyzeDocx(data);
347
+ return {
348
+ fileKind: "docx",
349
+ analyzer: "docx",
350
+ status: "ok",
351
+ kind: "docx",
352
+ text: r.text,
353
+ html: r.html,
354
+ warnings: [
355
+ ...r.warnings,
356
+ "OCR does not apply to DOCX; returned structured text/HTML extract."
357
+ ]
358
+ };
359
+ }
360
+ case "text":
361
+ return analyzeText(bytesInput, { signal });
362
+ default:
363
+ return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
364
+ }
13
365
  }
14
366
 
15
- export { analyzeFile };
367
+ export { BROWSER_PDF_UNSUPPORTED_WARNING, analyzeFile, convertToHtml, extractMetadata, extractText, runOcr };
16
368
  //# sourceMappingURL=index.js.map
17
369
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/analyzeFile.ts"],"names":["analyzeWithCore"],"mappings":";;;AASA,SAAS,mBAAmB,KAAA,EAAsD;AAChF,EAAA,MAAM,EAAA,GACJ,KAAA,YAAiB,IAAA,IACjB,KAAA,YAAiB,QACjB,KAAA,YAAiB,WAAA;AACnB,EAAA,IAAI,CAAC,EAAA,EAAI;AACP,IAAA,MAAM,IAAI,kBAAkB,wCAAwC,CAAA;AAAA,EACtE;AACF;AAKA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,kBAAA,CAAmB,KAAK,CAAA;AACxB,EAAA,OAAOA,aAAA,CAAgB,OAAO,OAAO,CAAA;AACvC","file":"index.js","sourcesContent":["import { analyzeFile as analyzeWithCore, InvalidInputError } from \"@dragon708/docmind-core\";\r\nimport type { AnalyzeOptions, AnalysisResult } from \"@dragon708/docmind-core\";\r\n\r\n/**\r\n * Inputs supported by the browser entry (DOM types only — no `fs`, no Node `Buffer` in the public surface).\r\n * For richer hints (`NamedInput`, etc.), import `analyzeFile` from the `core` package directly.\r\n */\r\nexport type BrowserAnalyzeInput = File | Blob | ArrayBuffer;\r\n\r\nfunction assertBrowserInput(input: unknown): asserts input is BrowserAnalyzeInput {\r\n const ok =\r\n input instanceof File ||\r\n input instanceof Blob ||\r\n input instanceof ArrayBuffer;\r\n if (!ok) {\r\n throw new InvalidInputError(\"Expected a File, Blob, or ArrayBuffer.\");\r\n }\r\n}\r\n\r\n/**\r\n * Thin wrapper around `@dragon708/docmind-core`’s `analyzeFile` for browser runtimes.\r\n */\r\nexport async function analyzeFile(\r\n input: BrowserAnalyzeInput,\r\n options?: AnalyzeOptions,\r\n): Promise<AnalysisResult> {\r\n assertBrowserInput(input);\r\n return analyzeWithCore(input, options);\r\n}\r\n"]}
1
+ {"version":3,"sources":["../src/analyzers/docx.ts","../src/analyzers/image.ts","../src/analyzeFile.ts","../src/publicActions.ts"],"names":["extractDocx","toUint8Array","assertBrowserInput","InvalidInputError","assertValidAnalyzeFileInput","detectFileKind","notImplementedResult","ocr","analyzeText","UNKNOWN_FORMAT_WARNING"],"mappings":";;;;;;AAOA,eAAsB,qBAAA,CACpB,OACA,MAAA,EACyB;AACzB,EAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,YAAA,CAAa,KAAK,CAAA;AACrC,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,QAAA,EAAU,MAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,MAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,KAC5D;AAAA,EACF;AAEA,EAAA,MAAM,CAAA,GAAI,MAAMA,WAAA,CAAY,IAAI,CAAA;AAChC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,MAAA;AAAA,IACV,QAAA,EAAU,MAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,MAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,QAAA,EAAU,CAAC,GAAG,CAAA,CAAE,QAAQ;AAAA,GAC1B;AACF;AChCA,eAAsB,sBAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAMC,YAAAA,CAAa,KAAK,CAAA;AACrC,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,OAAA;AAAA,MACV,QAAA,EAAU,OAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,OAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,UAAA,EAAY,CAAA;AAAA,MACZ,OAAA,EAAS,IAAA;AAAA,MACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,KACzD;AAAA,EACF;AAEA,EAAA,MAAM,OAAA,GAAU;AAAA,IACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,IACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU,OAAA,EAAS;AAAA,GAC3C;AAEA,EAAA,MAAM,CAAA,GAAI,MAAM,GAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,OAAA;AAAA,IACV,QAAA,EAAU,OAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,OAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,YAAY,CAAA,CAAE,UAAA;AAAA,IACd,SAAS,CAAA,CAAE,OAAA;AAAA,IACX,UAAU;AAAC,GACb;AACF;;;ACjCO,IAAM,+BAAA,GACX;AAOF,SAAS,mBAAmB,KAAA,EAAsD;AAChF,EAAA,MAAM,EAAA,GACJ,KAAA,YAAiB,IAAA,IACjB,KAAA,YAAiB,QACjB,KAAA,YAAiB,WAAA;AACnB,EAAA,IAAI,CAAC,EAAA,EAAI;AACP,IAAA,MAAM,IAAI,kBAAkB,wCAAwC,CAAA;AAAA,EACtE;AACF;AAKA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,kBAAA,CAAmB,KAAK,CAAA;AACxB,EAAA,2BAAA,CAA4B,KAAK,CAAA;AAEjC,EAAA,MAAM,QAAA,GAAW,eAAe,KAA4B,CAAA;AAE5D,EAAA,MAAM,UAAA,GAAa,KAAA;AAEnB,EAAA,QAAQ,QAAA;AAAU,IAChB,KAAK,KAAA;AACH,MAAA,OAAO,oBAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA;AACH,MAAA,OAAO,qBAAA,CAAsB,UAAA,EAAY,OAAA,EAAS,MAAM,CAAA;AAAA,IAC1D,KAAK,OAAA;AACH,MAAA,OAAO,sBAAA,CAAuB,YAAY,OAAO,CAAA;AAAA,IACnD,KAAK,MAAA;AACH,MAAA,OAAO,YAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,OAAA,EAAS,QAAQ,CAAA;AAAA,IAC5D;AACE,MAAA,OAAO,oBAAA,CAAqB,QAAA,EAAU,MAAA,EAAQ,CAAC,sBAAsB,CAAC,CAAA;AAAA;AAE5E;AC7CA,SAASC,oBAAmB,KAAA,EAAsD;AAChF,EAAA,MAAM,EAAA,GACJ,KAAA,YAAiB,IAAA,IACjB,KAAA,YAAiB,QACjB,KAAA,YAAiB,WAAA;AACnB,EAAA,IAAI,CAAC,EAAA,EAAI;AACP,IAAA,MAAM,IAAIC,kBAAkB,wCAAwC,CAAA;AAAA,EACtE;AACF;AAEA,SAAS,eAAe,MAAA,EAA4B;AAClD,EAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AACF;AAEA,SAAS,kBAAkB,CAAA,EAAmB;AAC5C,EAAA,OAAO,CAAA,CACJ,OAAA,CAAQ,IAAA,EAAM,OAAO,EACrB,OAAA,CAAQ,IAAA,EAAM,MAAM,CAAA,CACpB,QAAQ,IAAA,EAAM,MAAM,CAAA,CACpB,OAAA,CAAQ,MAAM,QAAQ,CAAA;AAC3B;AAEA,IAAM,kBAAA,GACJ,yGAAA;AAEF,IAAM,mBAAA,GACJ,6DAAA;AAEF,eAAe,OAAO,KAAA,EAA0D;AAC9E,EAAAD,oBAAmB,KAAK,CAAA;AACxB,EAAAE,4BAA4B,KAAK,CAAA;AACjC,EAAA,OAAO,KAAA;AACT;AAMA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,MAAA,CAAO,KAAK,CAAA;AACnC,EAAA,MAAM,IAAA,GAAOC,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,UAAA,GAAa,KAAA;AACnB,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA;AACH,MAAA,OAAOC,oBAAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAA,GAAO,MAAML,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,MAAA;AAAA,UACV,QAAA,EAAU,MAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,MAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,SAC5D;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,mBAAA,CAAoB,IAAI,CAAA;AACxC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,IAAA,EAAM,EAAA;AAAA,QACN,UAAU,CAAA,CAAE;AAAA,OACd;AAAA,IACF;AAAA,IACA,KAAK,OAAA,EAAS;AACZ,MAAA,MAAM,IAAA,GAAO,MAAMA,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,OAAA;AAAA,UACV,QAAA,EAAU,OAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,OAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,UAAA,EAAY,CAAA;AAAA,UACZ,OAAA,EAAS,IAAA;AAAA,UACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,SACzD;AAAA,MACF;AACA,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,QACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU;AAAA,OAClC;AACA,MAAA,MAAM,CAAA,GAAI,MAAMM,GAAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,YAAY,CAAA,CAAE,UAAA;AAAA,QACd,SAAS,CAAA,CAAE,OAAA;AAAA,QACX,UAAU;AAAC,OACb;AAAA,IACF;AAAA,IACA,KAAK,MAAA;AACH,MAAA,OAAOC,WAAAA,CAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,CAAA;AAAA,IAC3C;AACE,MAAA,OAAOF,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACG,sBAAsB,CAAC,CAAA;AAAA;AAExE;AAMA,eAAsB,eAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,MAAA,CAAO,KAAK,CAAA;AACnC,EAAA,MAAM,IAAA,GAAOJ,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,UAAA,GAAa,KAAA;AACnB,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA;AACH,MAAA,OAAOC,oBAAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA;AACH,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,QAAA,EAAU,CAAC,kBAAkB;AAAA,OAC/B;AAAA,IACF,KAAK,OAAA;AACH,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,UAAA,EAAY,CAAA;AAAA,QACZ,OAAA,EAAS,IAAA;AAAA,QACT,QAAA,EAAU,CAAC,mBAAmB;AAAA,OAChC;AAAA,IACF,KAAK,MAAA;AACH,MAAA,OAAOE,WAAAA,CAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,CAAA;AAAA,IAC3C;AACE,MAAA,OAAOF,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACG,sBAAsB,CAAC,CAAA;AAAA;AAExE;AAMA,eAAsB,aAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,MAAA,CAAO,KAAK,CAAA;AACnC,EAAA,MAAM,IAAA,GAAOJ,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,UAAA,GAAa,KAAA;AACnB,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA;AACH,MAAA,OAAOC,oBAAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAA,GAAO,MAAML,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,MAAA;AAAA,UACV,QAAA,EAAU,MAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,MAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,SAC5D;AAAA,MACF;AACA,MAAA,MAAM,CAAC,QAAA,EAAU,QAAQ,CAAA,GAAI,MAAM,QAAQ,GAAA,CAAI;AAAA,QAC7C,oBAAoB,IAAI,CAAA;AAAA,QACxB,kBAAkB,IAAI;AAAA,OACvB,CAAA;AACD,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,MAAM,QAAA,CAAS,IAAA;AAAA,QACf,MAAM,QAAA,CAAS,IAAA;AAAA,QACf,UAAU,CAAC,GAAG,SAAS,QAAA,EAAU,GAAG,SAAS,QAAQ;AAAA,OACvD;AAAA,IACF;AAAA,IACA,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAI,MAAMO,WAAAA,CAAY,UAAA,EAAY,EAAE,QAAQ,CAAA;AAClD,MAAA,MAAM,IAAA,GAAO,CAAA,KAAA,EAAQ,iBAAA,CAAkB,CAAA,CAAE,IAAI,CAAC,CAAA,MAAA,CAAA;AAC9C,MAAA,OAAO;AAAA,QACL,GAAG,CAAA;AAAA,QACH,IAAA;AAAA,QACA,QAAA,EAAU;AAAA,UACR,GAAG,CAAA,CAAE,QAAA;AAAA,UACL;AAAA;AACF,OACF;AAAA,IACF;AAAA,IACA,KAAK,OAAA;AACH,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,UAAA,EAAY,CAAA;AAAA,QACZ,OAAA,EAAS,IAAA;AAAA,QACT,QAAA,EAAU,CAAC,qEAAqE;AAAA,OAClF;AAAA,IACF;AACE,MAAA,OAAOF,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACG,sBAAsB,CAAC,CAAA;AAAA;AAExE;AAMA,eAAsB,MAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,MAAA,CAAO,KAAK,CAAA;AACnC,EAAA,MAAM,IAAA,GAAOJ,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,UAAA,GAAa,KAAA;AACnB,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA;AACH,MAAA,OAAOC,oBAAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,OAAA,EAAS;AACZ,MAAA,MAAM,IAAA,GAAO,MAAML,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,OAAA;AAAA,UACV,QAAA,EAAU,OAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,OAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,UAAA,EAAY,CAAA;AAAA,UACZ,OAAA,EAAS,IAAA;AAAA,UACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,SACzD;AAAA,MACF;AACA,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,QACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU;AAAA,OAClC;AACA,MAAA,MAAM,CAAA,GAAI,MAAMM,GAAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,YAAY,CAAA,CAAE,UAAA;AAAA,QACd,SAAS,CAAA,CAAE,OAAA;AAAA,QACX,UAAU;AAAC,OACb;AAAA,IACF;AAAA,IACA,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAA,GAAO,MAAMN,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,MAAA;AAAA,UACV,QAAA,EAAU,MAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,MAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,SAC5D;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,WAAA,CAAY,IAAI,CAAA;AAChC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,QAAA,EAAU;AAAA,UACR,GAAG,CAAA,CAAE,QAAA;AAAA,UACL;AAAA;AACF,OACF;AAAA,IACF;AAAA,IACA,KAAK,MAAA;AACH,MAAA,OAAOO,WAAAA,CAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,CAAA;AAAA,IAC3C;AACE,MAAA,OAAOF,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACG,sBAAsB,CAAC,CAAA;AAAA;AAExE","file":"index.js","sourcesContent":["import { analyzeDocx as extractDocx } from \"@dragon708/docmind-docx\";\nimport type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport { toUint8Array } from \"@dragon708/docmind-shared\";\n\n/**\n * DOCX → `@dragon708/docmind-docx` (browser-safe: Mammoth + JSZip).\n */\nexport async function analyzeDocxForBrowser(\n input: FileLikeInput,\n signal?: AbortSignal,\n): Promise<AnalysisResult> {\n if (signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await toUint8Array(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n\n const r = await extractDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: r.html,\n warnings: [...r.warnings],\n };\n}\n","import { ocr } from \"@dragon708/docmind-ocr\";\nimport type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport { toUint8Array } from \"@dragon708/docmind-shared\";\nimport type { BrowserAnalyzeOptions } from \"../browserAnalyzeOptions.js\";\n\n/**\n * Image → `@dragon708/docmind-ocr` (Tesseract in WASM / browser).\n */\nexport async function analyzeImageForBrowser(\n input: FileLikeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await toUint8Array(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? options?.signal,\n };\n\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n}\n","import type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\r\nimport {\r\n analyzeText,\r\n assertValidAnalyzeFileInput,\r\n detectFileKind,\r\n InvalidInputError,\r\n notImplementedResult,\r\n UNKNOWN_FORMAT_WARNING,\r\n} from \"@dragon708/docmind-shared\";\r\nimport type { DetectFileKindInput } from \"@dragon708/docmind-shared\";\r\nimport type { BrowserAnalyzeOptions } from \"./browserAnalyzeOptions.js\";\r\nimport { analyzeDocxForBrowser } from \"./analyzers/docx.js\";\r\nimport { analyzeImageForBrowser } from \"./analyzers/image.js\";\r\n\r\n/** PDF is not processed in the browser; use `@dragon708/docmind-node` on the server. */\r\nexport const BROWSER_PDF_UNSUPPORTED_WARNING =\r\n \"PDF text extraction is not available in the browser runtime; use @dragon708/docmind-node on the server.\";\r\n\r\n/**\r\n * Inputs supported by the browser entry (DOM types only — no `fs`, no Node `Buffer` in the public surface).\r\n */\r\nexport type BrowserAnalyzeInput = File | Blob | ArrayBuffer;\r\n\r\nfunction assertBrowserInput(input: unknown): asserts input is BrowserAnalyzeInput {\r\n const ok =\r\n input instanceof File ||\r\n input instanceof Blob ||\r\n input instanceof ArrayBuffer;\r\n if (!ok) {\r\n throw new InvalidInputError(\"Expected a File, Blob, or ArrayBuffer.\");\r\n }\r\n}\r\n\r\n/**\r\n * Browser-only router: DOCX, images (OCR), and text. PDF yields `not_implemented` with a clear warning.\r\n */\r\nexport async function analyzeFile(\r\n input: BrowserAnalyzeInput,\r\n options?: BrowserAnalyzeOptions,\r\n): Promise<AnalysisResult> {\r\n if (options?.signal?.aborted) {\r\n const err = new Error(\"The operation was aborted\");\r\n err.name = \"AbortError\";\r\n throw err;\r\n }\r\n\r\n assertBrowserInput(input);\r\n assertValidAnalyzeFileInput(input);\r\n\r\n const fileKind = detectFileKind(input as DetectFileKindInput);\r\n\r\n const bytesInput = input as FileLikeInput;\r\n\r\n switch (fileKind) {\r\n case \"pdf\":\r\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\r\n case \"docx\":\r\n return analyzeDocxForBrowser(bytesInput, options?.signal);\r\n case \"image\":\r\n return analyzeImageForBrowser(bytesInput, options);\r\n case \"text\":\r\n return analyzeText(bytesInput, { signal: options?.signal });\r\n default:\r\n return notImplementedResult(fileKind, \"none\", [UNKNOWN_FORMAT_WARNING]);\r\n }\r\n}\r\n","import type { AnalysisResult, DetectFileKindInput, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport {\n analyzeText,\n assertValidAnalyzeFileInput,\n detectFileKind,\n InvalidInputError,\n notImplementedResult,\n UNKNOWN_FORMAT_WARNING,\n toUint8Array,\n} from \"@dragon708/docmind-shared\";\nimport {\n analyzeDocx,\n convertDocxToHtml,\n extractTextFromDocx,\n} from \"@dragon708/docmind-docx\";\nimport { ocr } from \"@dragon708/docmind-ocr\";\nimport type { BrowserAnalyzeOptions } from \"./browserAnalyzeOptions.js\";\nimport { BROWSER_PDF_UNSUPPORTED_WARNING } from \"./analyzeFile.js\";\nimport type { BrowserAnalyzeInput } from \"./analyzeFile.js\";\n\nfunction assertBrowserInput(input: unknown): asserts input is BrowserAnalyzeInput {\n const ok =\n input instanceof File ||\n input instanceof Blob ||\n input instanceof ArrayBuffer;\n if (!ok) {\n throw new InvalidInputError(\"Expected a File, Blob, or ArrayBuffer.\");\n }\n}\n\nfunction throwIfAborted(signal?: AbortSignal): void {\n if (signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n}\n\nfunction escapeHtmlMinimal(s: string): string {\n return s\n .replace(/&/g, \"&amp;\")\n .replace(/</g, \"&lt;\")\n .replace(/>/g, \"&gt;\")\n .replace(/\"/g, \"&quot;\");\n}\n\nconst DOCX_METADATA_STUB =\n \"Structured document metadata for DOCX is not exposed as a separate API; use extractText or analyzeFile.\";\n\nconst IMAGE_METADATA_NOTE =\n \"Raster images have no document metadata bundle in this API.\";\n\nasync function kindOf(input: BrowserAnalyzeInput): Promise<DetectFileKindInput> {\n assertBrowserInput(input);\n assertValidAnalyzeFileInput(input);\n return input as DetectFileKindInput;\n}\n\n/**\n * Text only: DOCX → `extractTextFromDocx`; imagen → `ocr`; texto → `analyzeText`.\n * PDF no está soportado en el navegador (mismo aviso que `analyzeFile`).\n */\nexport async function extractText(\n input: BrowserAnalyzeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await kindOf(input);\n const kind = detectFileKind(resolved);\n const bytesInput = input as FileLikeInput;\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\":\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\n case \"docx\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n const r = await extractTextFromDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: \"\",\n warnings: r.warnings,\n };\n }\n case \"image\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? signal,\n };\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n }\n case \"text\":\n return analyzeText(bytesInput, { signal });\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n\n/**\n * Metadatos: en el navegador no hay pipeline PDF ni metadatos DOCX dedicados;\n * DOCX/imagen con avisos; texto → `analyzeText`.\n */\nexport async function extractMetadata(\n input: BrowserAnalyzeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await kindOf(input);\n const kind = detectFileKind(resolved);\n const bytesInput = input as FileLikeInput;\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\":\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\n case \"docx\":\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [DOCX_METADATA_STUB],\n };\n case \"image\":\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [IMAGE_METADATA_NOTE],\n };\n case \"text\":\n return analyzeText(bytesInput, { signal });\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n\n/**\n * HTML: DOCX → `extractTextFromDocx` + `convertDocxToHtml`; texto → `<pre>`;\n * PDF/imagen no aplican en browser como HTML rico.\n */\nexport async function convertToHtml(\n input: BrowserAnalyzeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await kindOf(input);\n const kind = detectFileKind(resolved);\n const bytesInput = input as FileLikeInput;\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\":\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\n case \"docx\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n const [textPart, htmlPart] = await Promise.all([\n extractTextFromDocx(data),\n convertDocxToHtml(data),\n ]);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: textPart.text,\n html: htmlPart.html,\n warnings: [...textPart.warnings, ...htmlPart.warnings],\n };\n }\n case \"text\": {\n const t = await analyzeText(bytesInput, { signal });\n const html = `<pre>${escapeHtmlMinimal(t.text)}</pre>`;\n return {\n ...t,\n html,\n warnings: [\n ...t.warnings,\n \"HTML for plain text is a <pre> wrapper around decoded UTF-8 content.\",\n ],\n } as AnalysisResult;\n }\n case \"image\":\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No HTML representation for raster images; use extractText / runOcr.\"],\n };\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n\n/**\n * OCR: imagen → `ocr`; DOCX → `analyzeDocx` con aviso (sin OCR); texto → `analyzeText`.\n * PDF no soportado en browser.\n */\nexport async function runOcr(\n input: BrowserAnalyzeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await kindOf(input);\n const kind = detectFileKind(resolved);\n const bytesInput = input as FileLikeInput;\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\":\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\n case \"image\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? signal,\n };\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n }\n case \"docx\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n const r = await analyzeDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: r.html,\n warnings: [\n ...r.warnings,\n \"OCR does not apply to DOCX; returned structured text/HTML extract.\",\n ],\n };\n }\n case \"text\":\n return analyzeText(bytesInput, { signal });\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n"]}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@dragon708/docmind-browser",
3
- "version": "0.1.0-alpha.1",
4
- "description": "Browser-safe DocMind entry (File, Blob, ArrayBuffer) delegating to core.",
3
+ "version": "1.1.0",
4
+ "description": "Browser DocMind entry: DOCX, OCR, text (no PDF; use @dragon708/docmind-node on the server).",
5
5
  "type": "module",
6
6
  "sideEffects": false,
7
7
  "main": "./dist/index.js",
@@ -32,11 +32,14 @@
32
32
  ],
33
33
  "license": "MIT",
34
34
  "dependencies": {
35
- "@dragon708/docmind-core": "^0.1.0-alpha.1"
35
+ "@dragon708/docmind-docx": "^1.0.0",
36
+ "@dragon708/docmind-ocr": "^1.0.0",
37
+ "@dragon708/docmind-shared": "^1.0.0"
36
38
  },
37
39
  "devDependencies": {
38
40
  "@types/node": "^20.19.37",
39
41
  "tsup": "^8.5.1",
40
- "typescript": "^5.9.3"
42
+ "typescript": "^5.9.3",
43
+ "vitest": "^1.6.1"
41
44
  }
42
45
  }