@dragon708/docmind-browser 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +23 -2
- package/dist/index.js +267 -3
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { AnalysisResult } from '@dragon708/docmind-shared';
|
|
2
|
-
export { AnalysisAnalyzer, AnalysisResult, DetectFileKindInput, DocxAnalysisCoreResult, FileKind, FileKindMetadata, GenericAnalysisResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult } from '@dragon708/docmind-shared';
|
|
2
|
+
export { AnalysisAnalyzer, AnalysisResult, DetectFileKindInput, DocxAnalysisCoreResult, FileKind, FileKindMetadata, GenericAnalysisResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult, detectFileKind } from '@dragon708/docmind-shared';
|
|
3
3
|
import { OcrOptions } from '@dragon708/docmind-ocr';
|
|
4
4
|
|
|
5
5
|
/** Options for {@link analyzeFile} in the browser entry (no PDF pipeline). */
|
|
@@ -19,4 +19,25 @@ type BrowserAnalyzeInput = File | Blob | ArrayBuffer;
|
|
|
19
19
|
*/
|
|
20
20
|
declare function analyzeFile(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
/**
|
|
23
|
+
* Text only: DOCX → `extractTextFromDocx`; imagen → `ocr`; texto → `analyzeText`.
|
|
24
|
+
* PDF no está soportado en el navegador (mismo aviso que `analyzeFile`).
|
|
25
|
+
*/
|
|
26
|
+
declare function extractText(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
|
|
27
|
+
/**
|
|
28
|
+
* Metadatos: en el navegador no hay pipeline PDF ni metadatos DOCX dedicados;
|
|
29
|
+
* DOCX/imagen con avisos; texto → `analyzeText`.
|
|
30
|
+
*/
|
|
31
|
+
declare function extractMetadata(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
|
|
32
|
+
/**
|
|
33
|
+
* HTML: DOCX → `extractTextFromDocx` + `convertDocxToHtml`; texto → `<pre>`;
|
|
34
|
+
* PDF/imagen no aplican en browser como HTML rico.
|
|
35
|
+
*/
|
|
36
|
+
declare function convertToHtml(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
|
|
37
|
+
/**
|
|
38
|
+
* OCR: imagen → `ocr`; DOCX → `analyzeDocx` con aviso (sin OCR); texto → `analyzeText`.
|
|
39
|
+
* PDF no soportado en browser.
|
|
40
|
+
*/
|
|
41
|
+
declare function runOcr(input: BrowserAnalyzeInput, options?: BrowserAnalyzeOptions): Promise<AnalysisResult>;
|
|
42
|
+
|
|
43
|
+
export { BROWSER_PDF_UNSUPPORTED_WARNING, type BrowserAnalyzeInput, type BrowserAnalyzeOptions, analyzeFile, convertToHtml, extractMetadata, extractText, runOcr };
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText,
|
|
2
|
-
|
|
1
|
+
import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, InvalidInputError } from '@dragon708/docmind-shared';
|
|
2
|
+
export { detectFileKind } from '@dragon708/docmind-shared';
|
|
3
|
+
import { extractTextFromDocx, convertDocxToHtml, analyzeDocx } from '@dragon708/docmind-docx';
|
|
3
4
|
import { ocr } from '@dragon708/docmind-ocr';
|
|
4
5
|
|
|
5
6
|
// src/analyzeFile.ts
|
|
@@ -99,7 +100,270 @@ async function analyzeFile(input, options) {
|
|
|
99
100
|
return notImplementedResult(fileKind, "none", [UNKNOWN_FORMAT_WARNING]);
|
|
100
101
|
}
|
|
101
102
|
}
|
|
103
|
+
function assertBrowserInput2(input) {
|
|
104
|
+
const ok = input instanceof File || input instanceof Blob || input instanceof ArrayBuffer;
|
|
105
|
+
if (!ok) {
|
|
106
|
+
throw new InvalidInputError("Expected a File, Blob, or ArrayBuffer.");
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
function throwIfAborted(signal) {
|
|
110
|
+
if (signal?.aborted) {
|
|
111
|
+
const err = new Error("The operation was aborted");
|
|
112
|
+
err.name = "AbortError";
|
|
113
|
+
throw err;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
function escapeHtmlMinimal(s) {
|
|
117
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
118
|
+
}
|
|
119
|
+
var DOCX_METADATA_STUB = "Structured document metadata for DOCX is not exposed as a separate API; use extractText or analyzeFile.";
|
|
120
|
+
var IMAGE_METADATA_NOTE = "Raster images have no document metadata bundle in this API.";
|
|
121
|
+
async function kindOf(input) {
|
|
122
|
+
assertBrowserInput2(input);
|
|
123
|
+
assertValidAnalyzeFileInput(input);
|
|
124
|
+
return input;
|
|
125
|
+
}
|
|
126
|
+
async function extractText(input, options) {
|
|
127
|
+
throwIfAborted(options?.signal);
|
|
128
|
+
const resolved = await kindOf(input);
|
|
129
|
+
const kind = detectFileKind(resolved);
|
|
130
|
+
const bytesInput = input;
|
|
131
|
+
const signal = options?.signal;
|
|
132
|
+
switch (kind) {
|
|
133
|
+
case "pdf":
|
|
134
|
+
return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
|
|
135
|
+
case "docx": {
|
|
136
|
+
const data = await toUint8Array(bytesInput);
|
|
137
|
+
if (data.byteLength === 0) {
|
|
138
|
+
return {
|
|
139
|
+
fileKind: "docx",
|
|
140
|
+
analyzer: "docx",
|
|
141
|
+
status: "ok",
|
|
142
|
+
kind: "docx",
|
|
143
|
+
text: "",
|
|
144
|
+
html: "",
|
|
145
|
+
warnings: ["No document bytes were provided for analysis."]
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
const r = await extractTextFromDocx(data);
|
|
149
|
+
return {
|
|
150
|
+
fileKind: "docx",
|
|
151
|
+
analyzer: "docx",
|
|
152
|
+
status: "ok",
|
|
153
|
+
kind: "docx",
|
|
154
|
+
text: r.text,
|
|
155
|
+
html: "",
|
|
156
|
+
warnings: r.warnings
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
case "image": {
|
|
160
|
+
const data = await toUint8Array(bytesInput);
|
|
161
|
+
if (data.byteLength === 0) {
|
|
162
|
+
return {
|
|
163
|
+
fileKind: "image",
|
|
164
|
+
analyzer: "image",
|
|
165
|
+
status: "ok",
|
|
166
|
+
kind: "image",
|
|
167
|
+
text: "",
|
|
168
|
+
confidence: 0,
|
|
169
|
+
ocrUsed: true,
|
|
170
|
+
warnings: ["No image bytes were provided for analysis."]
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
const ocrOpts = {
|
|
174
|
+
...options?.ocr ?? {},
|
|
175
|
+
signal: options?.ocr?.signal ?? signal
|
|
176
|
+
};
|
|
177
|
+
const r = await ocr(data, ocrOpts);
|
|
178
|
+
return {
|
|
179
|
+
fileKind: "image",
|
|
180
|
+
analyzer: "image",
|
|
181
|
+
status: "ok",
|
|
182
|
+
kind: "image",
|
|
183
|
+
text: r.text,
|
|
184
|
+
confidence: r.confidence,
|
|
185
|
+
ocrUsed: r.ocrUsed,
|
|
186
|
+
warnings: []
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
case "text":
|
|
190
|
+
return analyzeText(bytesInput, { signal });
|
|
191
|
+
default:
|
|
192
|
+
return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
async function extractMetadata(input, options) {
|
|
196
|
+
throwIfAborted(options?.signal);
|
|
197
|
+
const resolved = await kindOf(input);
|
|
198
|
+
const kind = detectFileKind(resolved);
|
|
199
|
+
const bytesInput = input;
|
|
200
|
+
const signal = options?.signal;
|
|
201
|
+
switch (kind) {
|
|
202
|
+
case "pdf":
|
|
203
|
+
return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
|
|
204
|
+
case "docx":
|
|
205
|
+
return {
|
|
206
|
+
fileKind: "docx",
|
|
207
|
+
analyzer: "docx",
|
|
208
|
+
status: "ok",
|
|
209
|
+
kind: "docx",
|
|
210
|
+
text: "",
|
|
211
|
+
html: "",
|
|
212
|
+
warnings: [DOCX_METADATA_STUB]
|
|
213
|
+
};
|
|
214
|
+
case "image":
|
|
215
|
+
return {
|
|
216
|
+
fileKind: "image",
|
|
217
|
+
analyzer: "image",
|
|
218
|
+
status: "ok",
|
|
219
|
+
kind: "image",
|
|
220
|
+
text: "",
|
|
221
|
+
confidence: 0,
|
|
222
|
+
ocrUsed: true,
|
|
223
|
+
warnings: [IMAGE_METADATA_NOTE]
|
|
224
|
+
};
|
|
225
|
+
case "text":
|
|
226
|
+
return analyzeText(bytesInput, { signal });
|
|
227
|
+
default:
|
|
228
|
+
return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
async function convertToHtml(input, options) {
|
|
232
|
+
throwIfAborted(options?.signal);
|
|
233
|
+
const resolved = await kindOf(input);
|
|
234
|
+
const kind = detectFileKind(resolved);
|
|
235
|
+
const bytesInput = input;
|
|
236
|
+
const signal = options?.signal;
|
|
237
|
+
switch (kind) {
|
|
238
|
+
case "pdf":
|
|
239
|
+
return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
|
|
240
|
+
case "docx": {
|
|
241
|
+
const data = await toUint8Array(bytesInput);
|
|
242
|
+
if (data.byteLength === 0) {
|
|
243
|
+
return {
|
|
244
|
+
fileKind: "docx",
|
|
245
|
+
analyzer: "docx",
|
|
246
|
+
status: "ok",
|
|
247
|
+
kind: "docx",
|
|
248
|
+
text: "",
|
|
249
|
+
html: "",
|
|
250
|
+
warnings: ["No document bytes were provided for analysis."]
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
const [textPart, htmlPart] = await Promise.all([
|
|
254
|
+
extractTextFromDocx(data),
|
|
255
|
+
convertDocxToHtml(data)
|
|
256
|
+
]);
|
|
257
|
+
return {
|
|
258
|
+
fileKind: "docx",
|
|
259
|
+
analyzer: "docx",
|
|
260
|
+
status: "ok",
|
|
261
|
+
kind: "docx",
|
|
262
|
+
text: textPart.text,
|
|
263
|
+
html: htmlPart.html,
|
|
264
|
+
warnings: [...textPart.warnings, ...htmlPart.warnings]
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
case "text": {
|
|
268
|
+
const t = await analyzeText(bytesInput, { signal });
|
|
269
|
+
const html = `<pre>${escapeHtmlMinimal(t.text)}</pre>`;
|
|
270
|
+
return {
|
|
271
|
+
...t,
|
|
272
|
+
html,
|
|
273
|
+
warnings: [
|
|
274
|
+
...t.warnings,
|
|
275
|
+
"HTML for plain text is a <pre> wrapper around decoded UTF-8 content."
|
|
276
|
+
]
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
case "image":
|
|
280
|
+
return {
|
|
281
|
+
fileKind: "image",
|
|
282
|
+
analyzer: "image",
|
|
283
|
+
status: "ok",
|
|
284
|
+
kind: "image",
|
|
285
|
+
text: "",
|
|
286
|
+
confidence: 0,
|
|
287
|
+
ocrUsed: true,
|
|
288
|
+
warnings: ["No HTML representation for raster images; use extractText / runOcr."]
|
|
289
|
+
};
|
|
290
|
+
default:
|
|
291
|
+
return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
async function runOcr(input, options) {
|
|
295
|
+
throwIfAborted(options?.signal);
|
|
296
|
+
const resolved = await kindOf(input);
|
|
297
|
+
const kind = detectFileKind(resolved);
|
|
298
|
+
const bytesInput = input;
|
|
299
|
+
const signal = options?.signal;
|
|
300
|
+
switch (kind) {
|
|
301
|
+
case "pdf":
|
|
302
|
+
return notImplementedResult("pdf", "pdf", [BROWSER_PDF_UNSUPPORTED_WARNING]);
|
|
303
|
+
case "image": {
|
|
304
|
+
const data = await toUint8Array(bytesInput);
|
|
305
|
+
if (data.byteLength === 0) {
|
|
306
|
+
return {
|
|
307
|
+
fileKind: "image",
|
|
308
|
+
analyzer: "image",
|
|
309
|
+
status: "ok",
|
|
310
|
+
kind: "image",
|
|
311
|
+
text: "",
|
|
312
|
+
confidence: 0,
|
|
313
|
+
ocrUsed: true,
|
|
314
|
+
warnings: ["No image bytes were provided for analysis."]
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
const ocrOpts = {
|
|
318
|
+
...options?.ocr ?? {},
|
|
319
|
+
signal: options?.ocr?.signal ?? signal
|
|
320
|
+
};
|
|
321
|
+
const r = await ocr(data, ocrOpts);
|
|
322
|
+
return {
|
|
323
|
+
fileKind: "image",
|
|
324
|
+
analyzer: "image",
|
|
325
|
+
status: "ok",
|
|
326
|
+
kind: "image",
|
|
327
|
+
text: r.text,
|
|
328
|
+
confidence: r.confidence,
|
|
329
|
+
ocrUsed: r.ocrUsed,
|
|
330
|
+
warnings: []
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
case "docx": {
|
|
334
|
+
const data = await toUint8Array(bytesInput);
|
|
335
|
+
if (data.byteLength === 0) {
|
|
336
|
+
return {
|
|
337
|
+
fileKind: "docx",
|
|
338
|
+
analyzer: "docx",
|
|
339
|
+
status: "ok",
|
|
340
|
+
kind: "docx",
|
|
341
|
+
text: "",
|
|
342
|
+
html: "",
|
|
343
|
+
warnings: ["No document bytes were provided for analysis."]
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
const r = await analyzeDocx(data);
|
|
347
|
+
return {
|
|
348
|
+
fileKind: "docx",
|
|
349
|
+
analyzer: "docx",
|
|
350
|
+
status: "ok",
|
|
351
|
+
kind: "docx",
|
|
352
|
+
text: r.text,
|
|
353
|
+
html: r.html,
|
|
354
|
+
warnings: [
|
|
355
|
+
...r.warnings,
|
|
356
|
+
"OCR does not apply to DOCX; returned structured text/HTML extract."
|
|
357
|
+
]
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
case "text":
|
|
361
|
+
return analyzeText(bytesInput, { signal });
|
|
362
|
+
default:
|
|
363
|
+
return notImplementedResult(kind, "none", [UNKNOWN_FORMAT_WARNING]);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
102
366
|
|
|
103
|
-
export { BROWSER_PDF_UNSUPPORTED_WARNING, analyzeFile };
|
|
367
|
+
export { BROWSER_PDF_UNSUPPORTED_WARNING, analyzeFile, convertToHtml, extractMetadata, extractText, runOcr };
|
|
104
368
|
//# sourceMappingURL=index.js.map
|
|
105
369
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/analyzers/docx.ts","../src/analyzers/image.ts","../src/analyzeFile.ts"],"names":["extractDocx","toUint8Array"],"mappings":";;;;;AAOA,eAAsB,qBAAA,CACpB,OACA,MAAA,EACyB;AACzB,EAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,YAAA,CAAa,KAAK,CAAA;AACrC,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,QAAA,EAAU,MAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,MAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,KAC5D;AAAA,EACF;AAEA,EAAA,MAAM,CAAA,GAAI,MAAMA,WAAA,CAAY,IAAI,CAAA;AAChC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,MAAA;AAAA,IACV,QAAA,EAAU,MAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,MAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,QAAA,EAAU,CAAC,GAAG,CAAA,CAAE,QAAQ;AAAA,GAC1B;AACF;AChCA,eAAsB,sBAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAMC,YAAAA,CAAa,KAAK,CAAA;AACrC,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,OAAA;AAAA,MACV,QAAA,EAAU,OAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,OAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,UAAA,EAAY,CAAA;AAAA,MACZ,OAAA,EAAS,IAAA;AAAA,MACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,KACzD;AAAA,EACF;AAEA,EAAA,MAAM,OAAA,GAAU;AAAA,IACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,IACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU,OAAA,EAAS;AAAA,GAC3C;AAEA,EAAA,MAAM,CAAA,GAAI,MAAM,GAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,OAAA;AAAA,IACV,QAAA,EAAU,OAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,OAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,YAAY,CAAA,CAAE,UAAA;AAAA,IACd,SAAS,CAAA,CAAE,OAAA;AAAA,IACX,UAAU;AAAC,GACb;AACF;;;ACjCO,IAAM,+BAAA,GACX;AAOF,SAAS,mBAAmB,KAAA,EAAsD;AAChF,EAAA,MAAM,EAAA,GACJ,KAAA,YAAiB,IAAA,IACjB,KAAA,YAAiB,QACjB,KAAA,YAAiB,WAAA;AACnB,EAAA,IAAI,CAAC,EAAA,EAAI;AACP,IAAA,MAAM,IAAI,kBAAkB,wCAAwC,CAAA;AAAA,EACtE;AACF;AAKA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,kBAAA,CAAmB,KAAK,CAAA;AACxB,EAAA,2BAAA,CAA4B,KAAK,CAAA;AAEjC,EAAA,MAAM,QAAA,GAAW,eAAe,KAA4B,CAAA;AAE5D,EAAA,MAAM,UAAA,GAAa,KAAA;AAEnB,EAAA,QAAQ,QAAA;AAAU,IAChB,KAAK,KAAA;AACH,MAAA,OAAO,oBAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA;AACH,MAAA,OAAO,qBAAA,CAAsB,UAAA,EAAY,OAAA,EAAS,MAAM,CAAA;AAAA,IAC1D,KAAK,OAAA;AACH,MAAA,OAAO,sBAAA,CAAuB,YAAY,OAAO,CAAA;AAAA,IACnD,KAAK,MAAA;AACH,MAAA,OAAO,YAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,OAAA,EAAS,QAAQ,CAAA;AAAA,IAC5D;AACE,MAAA,OAAO,oBAAA,CAAqB,QAAA,EAAU,MAAA,EAAQ,CAAC,sBAAsB,CAAC,CAAA;AAAA;AAE5E","file":"index.js","sourcesContent":["import { analyzeDocx as extractDocx } from \"@dragon708/docmind-docx\";\nimport type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport { toUint8Array } from \"@dragon708/docmind-shared\";\n\n/**\n * DOCX → `@dragon708/docmind-docx` (browser-safe: Mammoth + JSZip).\n */\nexport async function analyzeDocxForBrowser(\n input: FileLikeInput,\n signal?: AbortSignal,\n): Promise<AnalysisResult> {\n if (signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await toUint8Array(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n\n const r = await extractDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: r.html,\n warnings: [...r.warnings],\n };\n}\n","import { ocr } from \"@dragon708/docmind-ocr\";\nimport type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport { toUint8Array } from \"@dragon708/docmind-shared\";\nimport type { BrowserAnalyzeOptions } from \"../browserAnalyzeOptions.js\";\n\n/**\n * Image → `@dragon708/docmind-ocr` (Tesseract in WASM / browser).\n */\nexport async function analyzeImageForBrowser(\n input: FileLikeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await toUint8Array(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? options?.signal,\n };\n\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n}\n","import type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\r\nimport {\r\n analyzeText,\r\n assertValidAnalyzeFileInput,\r\n detectFileKind,\r\n InvalidInputError,\r\n notImplementedResult,\r\n UNKNOWN_FORMAT_WARNING,\r\n} from \"@dragon708/docmind-shared\";\r\nimport type { DetectFileKindInput } from \"@dragon708/docmind-shared\";\r\nimport type { BrowserAnalyzeOptions } from \"./browserAnalyzeOptions.js\";\r\nimport { analyzeDocxForBrowser } from \"./analyzers/docx.js\";\r\nimport { analyzeImageForBrowser } from \"./analyzers/image.js\";\r\n\r\n/** PDF is not processed in the browser; use `@dragon708/docmind-node` on the server. */\r\nexport const BROWSER_PDF_UNSUPPORTED_WARNING =\r\n \"PDF text extraction is not available in the browser runtime; use @dragon708/docmind-node on the server.\";\r\n\r\n/**\r\n * Inputs supported by the browser entry (DOM types only — no `fs`, no Node `Buffer` in the public surface).\r\n */\r\nexport type BrowserAnalyzeInput = File | Blob | ArrayBuffer;\r\n\r\nfunction assertBrowserInput(input: unknown): asserts input is BrowserAnalyzeInput {\r\n const ok =\r\n input instanceof File ||\r\n input instanceof Blob ||\r\n input instanceof ArrayBuffer;\r\n if (!ok) {\r\n throw new InvalidInputError(\"Expected a File, Blob, or ArrayBuffer.\");\r\n }\r\n}\r\n\r\n/**\r\n * Browser-only router: DOCX, images (OCR), and text. PDF yields `not_implemented` with a clear warning.\r\n */\r\nexport async function analyzeFile(\r\n input: BrowserAnalyzeInput,\r\n options?: BrowserAnalyzeOptions,\r\n): Promise<AnalysisResult> {\r\n if (options?.signal?.aborted) {\r\n const err = new Error(\"The operation was aborted\");\r\n err.name = \"AbortError\";\r\n throw err;\r\n }\r\n\r\n assertBrowserInput(input);\r\n assertValidAnalyzeFileInput(input);\r\n\r\n const fileKind = detectFileKind(input as DetectFileKindInput);\r\n\r\n const bytesInput = input as FileLikeInput;\r\n\r\n switch (fileKind) {\r\n case \"pdf\":\r\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\r\n case \"docx\":\r\n return analyzeDocxForBrowser(bytesInput, options?.signal);\r\n case \"image\":\r\n return analyzeImageForBrowser(bytesInput, options);\r\n case \"text\":\r\n return analyzeText(bytesInput, { signal: options?.signal });\r\n default:\r\n return notImplementedResult(fileKind, \"none\", [UNKNOWN_FORMAT_WARNING]);\r\n }\r\n}\r\n"]}
|
|
1
|
+
{"version":3,"sources":["../src/analyzers/docx.ts","../src/analyzers/image.ts","../src/analyzeFile.ts","../src/publicActions.ts"],"names":["extractDocx","toUint8Array","assertBrowserInput","InvalidInputError","assertValidAnalyzeFileInput","detectFileKind","notImplementedResult","ocr","analyzeText","UNKNOWN_FORMAT_WARNING"],"mappings":";;;;;;AAOA,eAAsB,qBAAA,CACpB,OACA,MAAA,EACyB;AACzB,EAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,YAAA,CAAa,KAAK,CAAA;AACrC,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,QAAA,EAAU,MAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,MAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,KAC5D;AAAA,EACF;AAEA,EAAA,MAAM,CAAA,GAAI,MAAMA,WAAA,CAAY,IAAI,CAAA;AAChC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,MAAA;AAAA,IACV,QAAA,EAAU,MAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,MAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,QAAA,EAAU,CAAC,GAAG,CAAA,CAAE,QAAQ;AAAA,GAC1B;AACF;AChCA,eAAsB,sBAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAMC,YAAAA,CAAa,KAAK,CAAA;AACrC,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,OAAA;AAAA,MACV,QAAA,EAAU,OAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,OAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,UAAA,EAAY,CAAA;AAAA,MACZ,OAAA,EAAS,IAAA;AAAA,MACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,KACzD;AAAA,EACF;AAEA,EAAA,MAAM,OAAA,GAAU;AAAA,IACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,IACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU,OAAA,EAAS;AAAA,GAC3C;AAEA,EAAA,MAAM,CAAA,GAAI,MAAM,GAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,OAAA;AAAA,IACV,QAAA,EAAU,OAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,OAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,YAAY,CAAA,CAAE,UAAA;AAAA,IACd,SAAS,CAAA,CAAE,OAAA;AAAA,IACX,UAAU;AAAC,GACb;AACF;;;ACjCO,IAAM,+BAAA,GACX;AAOF,SAAS,mBAAmB,KAAA,EAAsD;AAChF,EAAA,MAAM,EAAA,GACJ,KAAA,YAAiB,IAAA,IACjB,KAAA,YAAiB,QACjB,KAAA,YAAiB,WAAA;AACnB,EAAA,IAAI,CAAC,EAAA,EAAI;AACP,IAAA,MAAM,IAAI,kBAAkB,wCAAwC,CAAA;AAAA,EACtE;AACF;AAKA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,kBAAA,CAAmB,KAAK,CAAA;AACxB,EAAA,2BAAA,CAA4B,KAAK,CAAA;AAEjC,EAAA,MAAM,QAAA,GAAW,eAAe,KAA4B,CAAA;AAE5D,EAAA,MAAM,UAAA,GAAa,KAAA;AAEnB,EAAA,QAAQ,QAAA;AAAU,IAChB,KAAK,KAAA;AACH,MAAA,OAAO,oBAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA;AACH,MAAA,OAAO,qBAAA,CAAsB,UAAA,EAAY,OAAA,EAAS,MAAM,CAAA;AAAA,IAC1D,KAAK,OAAA;AACH,MAAA,OAAO,sBAAA,CAAuB,YAAY,OAAO,CAAA;AAAA,IACnD,KAAK,MAAA;AACH,MAAA,OAAO,YAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,OAAA,EAAS,QAAQ,CAAA;AAAA,IAC5D;AACE,MAAA,OAAO,oBAAA,CAAqB,QAAA,EAAU,MAAA,EAAQ,CAAC,sBAAsB,CAAC,CAAA;AAAA;AAE5E;AC7CA,SAASC,oBAAmB,KAAA,EAAsD;AAChF,EAAA,MAAM,EAAA,GACJ,KAAA,YAAiB,IAAA,IACjB,KAAA,YAAiB,QACjB,KAAA,YAAiB,WAAA;AACnB,EAAA,IAAI,CAAC,EAAA,EAAI;AACP,IAAA,MAAM,IAAIC,kBAAkB,wCAAwC,CAAA;AAAA,EACtE;AACF;AAEA,SAAS,eAAe,MAAA,EAA4B;AAClD,EAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AACF;AAEA,SAAS,kBAAkB,CAAA,EAAmB;AAC5C,EAAA,OAAO,CAAA,CACJ,OAAA,CAAQ,IAAA,EAAM,OAAO,EACrB,OAAA,CAAQ,IAAA,EAAM,MAAM,CAAA,CACpB,QAAQ,IAAA,EAAM,MAAM,CAAA,CACpB,OAAA,CAAQ,MAAM,QAAQ,CAAA;AAC3B;AAEA,IAAM,kBAAA,GACJ,yGAAA;AAEF,IAAM,mBAAA,GACJ,6DAAA;AAEF,eAAe,OAAO,KAAA,EAA0D;AAC9E,EAAAD,oBAAmB,KAAK,CAAA;AACxB,EAAAE,4BAA4B,KAAK,CAAA;AACjC,EAAA,OAAO,KAAA;AACT;AAMA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,MAAA,CAAO,KAAK,CAAA;AACnC,EAAA,MAAM,IAAA,GAAOC,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,UAAA,GAAa,KAAA;AACnB,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA;AACH,MAAA,OAAOC,oBAAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAA,GAAO,MAAML,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,MAAA;AAAA,UACV,QAAA,EAAU,MAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,MAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,SAC5D;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,mBAAA,CAAoB,IAAI,CAAA;AACxC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,IAAA,EAAM,EAAA;AAAA,QACN,UAAU,CAAA,CAAE;AAAA,OACd;AAAA,IACF;AAAA,IACA,KAAK,OAAA,EAAS;AACZ,MAAA,MAAM,IAAA,GAAO,MAAMA,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,OAAA;AAAA,UACV,QAAA,EAAU,OAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,OAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,UAAA,EAAY,CAAA;AAAA,UACZ,OAAA,EAAS,IAAA;AAAA,UACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,SACzD;AAAA,MACF;AACA,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,QACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU;AAAA,OAClC;AACA,MAAA,MAAM,CAAA,GAAI,MAAMM,GAAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,YAAY,CAAA,CAAE,UAAA;AAAA,QACd,SAAS,CAAA,CAAE,OAAA;AAAA,QACX,UAAU;AAAC,OACb;AAAA,IACF;AAAA,IACA,KAAK,MAAA;AACH,MAAA,OAAOC,WAAAA,CAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,CAAA;AAAA,IAC3C;AACE,MAAA,OAAOF,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACG,sBAAsB,CAAC,CAAA;AAAA;AAExE;AAMA,eAAsB,eAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,MAAA,CAAO,KAAK,CAAA;AACnC,EAAA,MAAM,IAAA,GAAOJ,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,UAAA,GAAa,KAAA;AACnB,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA;AACH,MAAA,OAAOC,oBAAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA;AACH,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,QAAA,EAAU,CAAC,kBAAkB;AAAA,OAC/B;AAAA,IACF,KAAK,OAAA;AACH,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,UAAA,EAAY,CAAA;AAAA,QACZ,OAAA,EAAS,IAAA;AAAA,QACT,QAAA,EAAU,CAAC,mBAAmB;AAAA,OAChC;AAAA,IACF,KAAK,MAAA;AACH,MAAA,OAAOE,WAAAA,CAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,CAAA;AAAA,IAC3C;AACE,MAAA,OAAOF,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACG,sBAAsB,CAAC,CAAA;AAAA;AAExE;AAMA,eAAsB,aAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,MAAA,CAAO,KAAK,CAAA;AACnC,EAAA,MAAM,IAAA,GAAOJ,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,UAAA,GAAa,KAAA;AACnB,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA;AACH,MAAA,OAAOC,oBAAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAA,GAAO,MAAML,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,MAAA;AAAA,UACV,QAAA,EAAU,MAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,MAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,SAC5D;AAAA,MACF;AACA,MAAA,MAAM,CAAC,QAAA,EAAU,QAAQ,CAAA,GAAI,MAAM,QAAQ,GAAA,CAAI;AAAA,QAC7C,oBAAoB,IAAI,CAAA;AAAA,QACxB,kBAAkB,IAAI;AAAA,OACvB,CAAA;AACD,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,MAAM,QAAA,CAAS,IAAA;AAAA,QACf,MAAM,QAAA,CAAS,IAAA;AAAA,QACf,UAAU,CAAC,GAAG,SAAS,QAAA,EAAU,GAAG,SAAS,QAAQ;AAAA,OACvD;AAAA,IACF;AAAA,IACA,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAI,MAAMO,WAAAA,CAAY,UAAA,EAAY,EAAE,QAAQ,CAAA;AAClD,MAAA,MAAM,IAAA,GAAO,CAAA,KAAA,EAAQ,iBAAA,CAAkB,CAAA,CAAE,IAAI,CAAC,CAAA,MAAA,CAAA;AAC9C,MAAA,OAAO;AAAA,QACL,GAAG,CAAA;AAAA,QACH,IAAA;AAAA,QACA,QAAA,EAAU;AAAA,UACR,GAAG,CAAA,CAAE,QAAA;AAAA,UACL;AAAA;AACF,OACF;AAAA,IACF;AAAA,IACA,KAAK,OAAA;AACH,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,IAAA,EAAM,EAAA;AAAA,QACN,UAAA,EAAY,CAAA;AAAA,QACZ,OAAA,EAAS,IAAA;AAAA,QACT,QAAA,EAAU,CAAC,qEAAqE;AAAA,OAClF;AAAA,IACF;AACE,MAAA,OAAOF,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACG,sBAAsB,CAAC,CAAA;AAAA;AAExE;AAMA,eAAsB,MAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,cAAA,CAAe,SAAS,MAAM,CAAA;AAC9B,EAAA,MAAM,QAAA,GAAW,MAAM,MAAA,CAAO,KAAK,CAAA;AACnC,EAAA,MAAM,IAAA,GAAOJ,eAAe,QAAQ,CAAA;AACpC,EAAA,MAAM,UAAA,GAAa,KAAA;AACnB,EAAA,MAAM,SAAS,OAAA,EAAS,MAAA;AAExB,EAAA,QAAQ,IAAA;AAAM,IACZ,KAAK,KAAA;AACH,MAAA,OAAOC,oBAAAA,CAAqB,KAAA,EAAO,KAAA,EAAO,CAAC,+BAA+B,CAAC,CAAA;AAAA,IAC7E,KAAK,OAAA,EAAS;AACZ,MAAA,MAAM,IAAA,GAAO,MAAML,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,OAAA;AAAA,UACV,QAAA,EAAU,OAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,OAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,UAAA,EAAY,CAAA;AAAA,UACZ,OAAA,EAAS,IAAA;AAAA,UACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,SACzD;AAAA,MACF;AACA,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,QACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU;AAAA,OAClC;AACA,MAAA,MAAM,CAAA,GAAI,MAAMM,GAAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,OAAA;AAAA,QACV,QAAA,EAAU,OAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,OAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,YAAY,CAAA,CAAE,UAAA;AAAA,QACd,SAAS,CAAA,CAAE,OAAA;AAAA,QACX,UAAU;AAAC,OACb;AAAA,IACF;AAAA,IACA,KAAK,MAAA,EAAQ;AACX,MAAA,MAAM,IAAA,GAAO,MAAMN,YAAAA,CAAa,UAAU,CAAA;AAC1C,MAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,QAAA,OAAO;AAAA,UACL,QAAA,EAAU,MAAA;AAAA,UACV,QAAA,EAAU,MAAA;AAAA,UACV,MAAA,EAAQ,IAAA;AAAA,UACR,IAAA,EAAM,MAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,IAAA,EAAM,EAAA;AAAA,UACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,SAC5D;AAAA,MACF;AACA,MAAA,MAAM,CAAA,GAAI,MAAM,WAAA,CAAY,IAAI,CAAA;AAChC,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,MAAA;AAAA,QACV,QAAA,EAAU,MAAA;AAAA,QACV,MAAA,EAAQ,IAAA;AAAA,QACR,IAAA,EAAM,MAAA;AAAA,QACN,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,QAAA,EAAU;AAAA,UACR,GAAG,CAAA,CAAE,QAAA;AAAA,UACL;AAAA;AACF,OACF;AAAA,IACF;AAAA,IACA,KAAK,MAAA;AACH,MAAA,OAAOO,WAAAA,CAAY,UAAA,EAAY,EAAE,MAAA,EAAQ,CAAA;AAAA,IAC3C;AACE,MAAA,OAAOF,oBAAAA,CAAqB,IAAA,EAAM,MAAA,EAAQ,CAACG,sBAAsB,CAAC,CAAA;AAAA;AAExE","file":"index.js","sourcesContent":["import { analyzeDocx as extractDocx } from \"@dragon708/docmind-docx\";\nimport type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport { toUint8Array } from \"@dragon708/docmind-shared\";\n\n/**\n * DOCX → `@dragon708/docmind-docx` (browser-safe: Mammoth + JSZip).\n */\nexport async function analyzeDocxForBrowser(\n input: FileLikeInput,\n signal?: AbortSignal,\n): Promise<AnalysisResult> {\n if (signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await toUint8Array(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n\n const r = await extractDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: r.html,\n warnings: [...r.warnings],\n };\n}\n","import { ocr } from \"@dragon708/docmind-ocr\";\nimport type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport { toUint8Array } from \"@dragon708/docmind-shared\";\nimport type { BrowserAnalyzeOptions } from \"../browserAnalyzeOptions.js\";\n\n/**\n * Image → `@dragon708/docmind-ocr` (Tesseract in WASM / browser).\n */\nexport async function analyzeImageForBrowser(\n input: FileLikeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await toUint8Array(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? options?.signal,\n };\n\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n}\n","import type { AnalysisResult, FileLikeInput } from \"@dragon708/docmind-shared\";\r\nimport {\r\n analyzeText,\r\n assertValidAnalyzeFileInput,\r\n detectFileKind,\r\n InvalidInputError,\r\n notImplementedResult,\r\n UNKNOWN_FORMAT_WARNING,\r\n} from \"@dragon708/docmind-shared\";\r\nimport type { DetectFileKindInput } from \"@dragon708/docmind-shared\";\r\nimport type { BrowserAnalyzeOptions } from \"./browserAnalyzeOptions.js\";\r\nimport { analyzeDocxForBrowser } from \"./analyzers/docx.js\";\r\nimport { analyzeImageForBrowser } from \"./analyzers/image.js\";\r\n\r\n/** PDF is not processed in the browser; use `@dragon708/docmind-node` on the server. */\r\nexport const BROWSER_PDF_UNSUPPORTED_WARNING =\r\n \"PDF text extraction is not available in the browser runtime; use @dragon708/docmind-node on the server.\";\r\n\r\n/**\r\n * Inputs supported by the browser entry (DOM types only — no `fs`, no Node `Buffer` in the public surface).\r\n */\r\nexport type BrowserAnalyzeInput = File | Blob | ArrayBuffer;\r\n\r\nfunction assertBrowserInput(input: unknown): asserts input is BrowserAnalyzeInput {\r\n const ok =\r\n input instanceof File ||\r\n input instanceof Blob ||\r\n input instanceof ArrayBuffer;\r\n if (!ok) {\r\n throw new InvalidInputError(\"Expected a File, Blob, or ArrayBuffer.\");\r\n }\r\n}\r\n\r\n/**\r\n * Browser-only router: DOCX, images (OCR), and text. PDF yields `not_implemented` with a clear warning.\r\n */\r\nexport async function analyzeFile(\r\n input: BrowserAnalyzeInput,\r\n options?: BrowserAnalyzeOptions,\r\n): Promise<AnalysisResult> {\r\n if (options?.signal?.aborted) {\r\n const err = new Error(\"The operation was aborted\");\r\n err.name = \"AbortError\";\r\n throw err;\r\n }\r\n\r\n assertBrowserInput(input);\r\n assertValidAnalyzeFileInput(input);\r\n\r\n const fileKind = detectFileKind(input as DetectFileKindInput);\r\n\r\n const bytesInput = input as FileLikeInput;\r\n\r\n switch (fileKind) {\r\n case \"pdf\":\r\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\r\n case \"docx\":\r\n return analyzeDocxForBrowser(bytesInput, options?.signal);\r\n case \"image\":\r\n return analyzeImageForBrowser(bytesInput, options);\r\n case \"text\":\r\n return analyzeText(bytesInput, { signal: options?.signal });\r\n default:\r\n return notImplementedResult(fileKind, \"none\", [UNKNOWN_FORMAT_WARNING]);\r\n }\r\n}\r\n","import type { AnalysisResult, DetectFileKindInput, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport {\n analyzeText,\n assertValidAnalyzeFileInput,\n detectFileKind,\n InvalidInputError,\n notImplementedResult,\n UNKNOWN_FORMAT_WARNING,\n toUint8Array,\n} from \"@dragon708/docmind-shared\";\nimport {\n analyzeDocx,\n convertDocxToHtml,\n extractTextFromDocx,\n} from \"@dragon708/docmind-docx\";\nimport { ocr } from \"@dragon708/docmind-ocr\";\nimport type { BrowserAnalyzeOptions } from \"./browserAnalyzeOptions.js\";\nimport { BROWSER_PDF_UNSUPPORTED_WARNING } from \"./analyzeFile.js\";\nimport type { BrowserAnalyzeInput } from \"./analyzeFile.js\";\n\nfunction assertBrowserInput(input: unknown): asserts input is BrowserAnalyzeInput {\n const ok =\n input instanceof File ||\n input instanceof Blob ||\n input instanceof ArrayBuffer;\n if (!ok) {\n throw new InvalidInputError(\"Expected a File, Blob, or ArrayBuffer.\");\n }\n}\n\nfunction throwIfAborted(signal?: AbortSignal): void {\n if (signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n}\n\nfunction escapeHtmlMinimal(s: string): string {\n return s\n .replace(/&/g, \"&\")\n .replace(/</g, \"<\")\n .replace(/>/g, \">\")\n .replace(/\"/g, \""\");\n}\n\nconst DOCX_METADATA_STUB =\n \"Structured document metadata for DOCX is not exposed as a separate API; use extractText or analyzeFile.\";\n\nconst IMAGE_METADATA_NOTE =\n \"Raster images have no document metadata bundle in this API.\";\n\nasync function kindOf(input: BrowserAnalyzeInput): Promise<DetectFileKindInput> {\n assertBrowserInput(input);\n assertValidAnalyzeFileInput(input);\n return input as DetectFileKindInput;\n}\n\n/**\n * Text only: DOCX → `extractTextFromDocx`; imagen → `ocr`; texto → `analyzeText`.\n * PDF no está soportado en el navegador (mismo aviso que `analyzeFile`).\n */\nexport async function extractText(\n input: BrowserAnalyzeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await kindOf(input);\n const kind = detectFileKind(resolved);\n const bytesInput = input as FileLikeInput;\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\":\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\n case \"docx\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n const r = await extractTextFromDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: \"\",\n warnings: r.warnings,\n };\n }\n case \"image\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? signal,\n };\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n }\n case \"text\":\n return analyzeText(bytesInput, { signal });\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n\n/**\n * Metadatos: en el navegador no hay pipeline PDF ni metadatos DOCX dedicados;\n * DOCX/imagen con avisos; texto → `analyzeText`.\n */\nexport async function extractMetadata(\n input: BrowserAnalyzeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await kindOf(input);\n const kind = detectFileKind(resolved);\n const bytesInput = input as FileLikeInput;\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\":\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\n case \"docx\":\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [DOCX_METADATA_STUB],\n };\n case \"image\":\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [IMAGE_METADATA_NOTE],\n };\n case \"text\":\n return analyzeText(bytesInput, { signal });\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n\n/**\n * HTML: DOCX → `extractTextFromDocx` + `convertDocxToHtml`; texto → `<pre>`;\n * PDF/imagen no aplican en browser como HTML rico.\n */\nexport async function convertToHtml(\n input: BrowserAnalyzeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await kindOf(input);\n const kind = detectFileKind(resolved);\n const bytesInput = input as FileLikeInput;\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\":\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\n case \"docx\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n const [textPart, htmlPart] = await Promise.all([\n extractTextFromDocx(data),\n convertDocxToHtml(data),\n ]);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: textPart.text,\n html: htmlPart.html,\n warnings: [...textPart.warnings, ...htmlPart.warnings],\n };\n }\n case \"text\": {\n const t = await analyzeText(bytesInput, { signal });\n const html = `<pre>${escapeHtmlMinimal(t.text)}</pre>`;\n return {\n ...t,\n html,\n warnings: [\n ...t.warnings,\n \"HTML for plain text is a <pre> wrapper around decoded UTF-8 content.\",\n ],\n } as AnalysisResult;\n }\n case \"image\":\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No HTML representation for raster images; use extractText / runOcr.\"],\n };\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n\n/**\n * OCR: imagen → `ocr`; DOCX → `analyzeDocx` con aviso (sin OCR); texto → `analyzeText`.\n * PDF no soportado en browser.\n */\nexport async function runOcr(\n input: BrowserAnalyzeInput,\n options?: BrowserAnalyzeOptions,\n): Promise<AnalysisResult> {\n throwIfAborted(options?.signal);\n const resolved = await kindOf(input);\n const kind = detectFileKind(resolved);\n const bytesInput = input as FileLikeInput;\n const signal = options?.signal;\n\n switch (kind) {\n case \"pdf\":\n return notImplementedResult(\"pdf\", \"pdf\", [BROWSER_PDF_UNSUPPORTED_WARNING]);\n case \"image\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? signal,\n };\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n }\n case \"docx\": {\n const data = await toUint8Array(bytesInput);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n const r = await analyzeDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: r.html,\n warnings: [\n ...r.warnings,\n \"OCR does not apply to DOCX; returned structured text/HTML extract.\",\n ],\n };\n }\n case \"text\":\n return analyzeText(bytesInput, { signal });\n default:\n return notImplementedResult(kind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n"]}
|
package/package.json
CHANGED