albex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/albex.ts ADDED
@@ -0,0 +1,577 @@
1
+ /**
2
+ * Albex — local full-text search engine.
3
+ *
4
+ * Zero-dependency TypeScript/ESM wrapper around albex_wasm_bg.wasm and
5
+ * (optionally) albex_pdf.wasm. All text stays in-browser; nothing is sent
6
+ * to any server.
7
+ *
8
+ * @example
9
+ * ```ts
10
+ * const engine = new AlbexEngine({ wasmUrl: './wasm/pkg/albex_wasm_bg.wasm' });
11
+ * await engine.init();
12
+ * await engine.indexFile(myFile);
13
+ * const results = engine.search('contrato marco');
14
+ * ```
15
+ */
16
+
17
+ // ─────────────────────────────────────────────────────────────────────────────
18
+ // Public types
19
+ // ─────────────────────────────────────────────────────────────────────────────
20
+
21
+ export interface AlbexOptions {
22
+ /** URL to albex_wasm_bg.wasm (required). */
23
+ wasmUrl: string;
24
+ /** URL to albex_pdf.wasm. Required only if you call indexFile() with PDFs. */
25
+ pdfWasmUrl?: string;
26
+ }
27
+
28
+ export interface IndexedDocument {
29
+ name: string;
30
+ ext: string;
31
+ chunks: number;
32
+ indexTimeMs: number;
33
+ textBytes: number;
34
+ }
35
+
36
+ export interface SearchResult {
37
+ documentName: string;
38
+ /** Paragraph index (DOCX/TXT) or page number (PDF, 1-based). */
39
+ location: number;
40
+ /** Relevance score 0–1000. */
41
+ score: number;
42
+ /** Raw snippet text (original, with accents). */
43
+ snippet: string;
44
+ /** Match start byte offset within snippet. */
45
+ matchStart: number;
46
+ /** Match end byte offset within snippet (exclusive). */
47
+ matchEnd: number;
48
+ }
49
+
50
+ export interface EngineStats {
51
+ documents: number;
52
+ chunks: number;
53
+ textUsed: number;
54
+ textCapacity: number;
55
+ wasmMemoryBytes: number;
56
+ }
57
+
58
+ export interface SearchStats {
59
+ query: string;
60
+ timeMs: number;
61
+ results: number;
62
+ bloomTested: number;
63
+ bloomPassed: number;
64
+ bitapMatched: number;
65
+ }
66
+
67
+ // ─────────────────────────────────────────────────────────────────────────────
68
+ // Query parsing
69
+ // ─────────────────────────────────────────────────────────────────────────────
70
+
71
+ type SimpleQuery = { kind: 'simple'; tokens: string[] };
72
+ type PhraseQuery = { kind: 'phrase'; tokens: string[]; raw: string };
73
+ type OrQuery = { kind: 'or'; branches: string[][] };
74
+ type ParsedQuery = SimpleQuery | PhraseQuery | OrQuery;
75
+
76
+ function tokenize(q: string): string[] {
77
+ return q.trim().split(/\s+/).filter(t => t.length > 0);
78
+ }
79
+
80
+ function parseQuery(q: string): ParsedQuery {
81
+ const trimmed = q.trim();
82
+
83
+ // OR: "term1 | term2" or "phrase one | phrase two"
84
+ if (trimmed.includes('|')) {
85
+ const branches = trimmed.split('|')
86
+ .map(p => tokenize(p.replace(/"/g, '')))
87
+ .filter(b => b.length > 0);
88
+ return { kind: 'or', branches };
89
+ }
90
+
91
+ // Phrase: "exact phrase here"
92
+ const phraseMatch = /^"(.+)"$/.exec(trimmed);
93
+ if (phraseMatch) {
94
+ const inner = phraseMatch[1] ?? '';
95
+ const tokens = tokenize(inner);
96
+ return { kind: 'phrase', tokens, raw: inner };
97
+ }
98
+
99
+ return { kind: 'simple', tokens: tokenize(trimmed) };
100
+ }
101
+
102
+ /**
103
+ * Reconstruct a WASM-compatible query string from parsed tokens.
104
+ * The WASM engine accepts up to 4 space-separated tokens (AND semantics).
105
+ */
106
+ function tokensToWasmQuery(tokens: string[]): string {
107
+ return tokens.slice(0, 4).join(' ');
108
+ }
109
+
110
+ // ─────────────────────────────────────────────────────────────────────────────
111
+ // Phrase post-filter
112
+ // ─────────────────────────────────────────────────────────────────────────────
113
+
114
+ /**
115
+ * Returns true if `snippet` contains the phrase formed by `tokens` in order,
116
+ * with at most `maxGap` characters between consecutive tokens.
117
+ * Comparison is case- and accent-insensitive.
118
+ */
119
+ function containsPhrase(snippet: string, tokens: string[], maxGap = 30): boolean {
120
+ const norm = (s: string): string =>
121
+ s.toLowerCase().normalize('NFKD').replace(/[̀-ͯ]/g, '');
122
+
123
+ const text = norm(snippet);
124
+ const normTokens = tokens.map(norm).filter(t => t.length > 0);
125
+ if (normTokens.length === 0) return true;
126
+
127
+ let pos = 0;
128
+ for (let t = 0; t < normTokens.length; t++) {
129
+ const tok = normTokens[t] ?? '';
130
+ if (!tok) continue;
131
+ const idx = text.indexOf(tok, pos);
132
+ if (idx === -1) return false;
133
+ if (t > 0 && idx - pos > maxGap) return false;
134
+ pos = idx + tok.length;
135
+ }
136
+ return true;
137
+ }
138
+
139
+ // ─────────────────────────────────────────────────────────────────────────────
140
+ // ZIP helpers (DOCX + XLSX)
141
+ // ─────────────────────────────────────────────────────────────────────────────
142
+
143
+ const _enc = new TextEncoder();
144
+ const _dec = new TextDecoder();
145
+
146
+ function zipCentralDir(bytes: Uint8Array): { v: DataView; cdOff: number; cdN: number } {
147
+ const v = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
148
+ let p = bytes.length - 22;
149
+ while (p >= 0 && v.getUint32(p, true) !== 0x06054b50) p--;
150
+ if (p < 0) throw new Error('Not a ZIP file');
151
+ return { v, cdOff: v.getUint32(p + 16, true), cdN: v.getUint16(p + 10, true) };
152
+ }
153
+
154
+ function listZipEntries(bytes: Uint8Array): string[] {
155
+ const { v, cdOff, cdN } = zipCentralDir(bytes);
156
+ const names: string[] = [];
157
+ let cp = cdOff;
158
+ for (let i = 0; i < cdN; i++) {
159
+ if (v.getUint32(cp, true) !== 0x02014b50) break;
160
+ const nl = v.getUint16(cp + 28, true), xl = v.getUint16(cp + 30, true), cl = v.getUint16(cp + 32, true);
161
+ names.push(_dec.decode(bytes.subarray(cp + 46, cp + 46 + nl)));
162
+ cp += 46 + nl + xl + cl;
163
+ }
164
+ return names;
165
+ }
166
+
167
+ async function findZipEntry(bytes: Uint8Array, name: string): Promise<Uint8Array> {
168
+ const { v, cdOff, cdN } = zipCentralDir(bytes);
169
+ let cp = cdOff;
170
+ for (let i = 0; i < cdN; i++) {
171
+ if (v.getUint32(cp, true) !== 0x02014b50) break;
172
+ const nl = v.getUint16(cp + 28, true), xl = v.getUint16(cp + 30, true), cl = v.getUint16(cp + 32, true);
173
+ const off = v.getUint32(cp + 42, true), compSz = v.getUint32(cp + 20, true);
174
+ if (_dec.decode(bytes.subarray(cp + 46, cp + 46 + nl)) === name) {
175
+ return decompEntry(bytes, v, off, compSz);
176
+ }
177
+ cp += 46 + nl + xl + cl;
178
+ }
179
+ throw new Error(`Entry "${name}" not found in ZIP`);
180
+ }
181
+
182
+ async function decompEntry(bytes: Uint8Array, v: DataView, off: number, compSize: number): Promise<Uint8Array> {
183
+ const meth = v.getUint16(off + 8, true);
184
+ const nl = v.getUint16(off + 26, true);
185
+ const xl = v.getUint16(off + 28, true);
186
+ const ds = off + 30 + nl + xl;
187
+ // Use central-directory size (reliable even for streaming ZIPs with local-header=0).
188
+ const cs = compSize > 0 ? compSize : v.getUint32(off + 18, true);
189
+ const data = bytes.subarray(ds, ds + cs);
190
+
191
+ if (meth === 0) return data;
192
+ if (meth === 8) {
193
+ const s = new DecompressionStream('deflate-raw');
194
+ const w = s.writable.getWriter();
195
+ const r = s.readable.getReader();
196
+ // Slice to a plain ArrayBuffer-backed Uint8Array to satisfy strict DOM types.
197
+ const plain = data.slice();
198
+ w.write(plain).catch(() => {}); w.close().catch(() => {});
199
+ const chunks: Uint8Array[] = []; let len = 0;
200
+ for (;;) { const { done, value } = await r.read(); if (done) break; chunks.push(value); len += value.length; }
201
+ const out = new Uint8Array(len); let o = 0;
202
+ for (const c of chunks) { out.set(c, o); o += c.length; }
203
+ return out;
204
+ }
205
+ throw new Error(`Unsupported ZIP compression method ${meth}`);
206
+ }
207
+
208
+ // ─────────────────────────────────────────────────────────────────────────────
209
+ // WASM memory helpers (internal)
210
+ // ─────────────────────────────────────────────────────────────────────────────
211
+
212
+ const FEED_SIZE = 32_768; // 32 KB — fits in 64 KB scratchpad
213
+
214
+ // ─────────────────────────────────────────────────────────────────────────────
215
+ // PDF WASM imports shim
216
+ // ─────────────────────────────────────────────────────────────────────────────
217
+
218
+ function makePdfWasmImports(getPdfMem: () => WebAssembly.Memory): WebAssembly.Imports {
219
+ const heap: unknown[] = [];
220
+ let freeIdx = -1;
221
+ return {
222
+ __wbindgen_placeholder__: {
223
+ __wbindgen_describe: () => {},
224
+ __wbg_getRandomValues_3f44b700395062e5: (ptr: number, len: number) => {
225
+ const mem = getPdfMem();
226
+ crypto.getRandomValues(new Uint8Array(mem.buffer, ptr >>> 0, len >>> 0));
227
+ },
228
+ __wbindgen_object_drop_ref: (idx: number) => {
229
+ heap[idx] = freeIdx; freeIdx = idx;
230
+ },
231
+ },
232
+ __wbindgen_externref_xform__: {
233
+ __wbindgen_externref_table_grow: (delta: number) => {
234
+ const old = heap.length;
235
+ for (let i = 0; i < delta; i++) heap.push(undefined);
236
+ return old;
237
+ },
238
+ __wbindgen_externref_table_set_null: (idx: number) => { heap[idx] = undefined; },
239
+ },
240
+ };
241
+ }
242
+
243
+ // ─────────────────────────────────────────────────────────────────────────────
244
+ // AlbexEngine
245
+ // ─────────────────────────────────────────────────────────────────────────────
246
+
247
+ export class AlbexEngine {
248
+ // ── main WASM ──
249
+ private _wasm!: WebAssembly.Exports;
250
+ private _mem!: WebAssembly.Memory;
251
+
252
+ // ── PDF WASM (lazy) ──
253
+ private _pdfWasm: WebAssembly.Exports | null = null;
254
+ private _pdfMem: WebAssembly.Memory | null = null;
255
+
256
+ private _docs: IndexedDocument[] = [];
257
+ private _lastSearch: SearchStats | null = null;
258
+ private readonly _opts: AlbexOptions;
259
+
260
+ constructor(opts: AlbexOptions) {
261
+ this._opts = opts;
262
+ }
263
+
264
+ /** Load and initialise the main WASM module. Must be called before any other method. */
265
+ async init(): Promise<void> {
266
+ const res = await fetch(this._opts.wasmUrl);
267
+ if (!res.ok) throw new Error(`Failed to fetch WASM: ${res.status}`);
268
+ const { instance } = await WebAssembly.instantiateStreaming(res, {});
269
+ this._wasm = instance.exports;
270
+ this._mem = instance.exports.memory as WebAssembly.Memory;
271
+ (this._wasm.init as Function)();
272
+ }
273
+
274
+ // ── Internal helpers ──────────────────────────────────────────────────────
275
+
276
+ private _u8(off: number, n: number): Uint8Array {
277
+ return new Uint8Array(this._mem.buffer, off, n);
278
+ }
279
+
280
+ private _writePad(b: Uint8Array): number {
281
+ const ptr = (this._wasm.getBuffer as Function)(b.length) as number;
282
+ if (!ptr) throw new Error('Scratchpad too small for this chunk');
283
+ this._u8(ptr, b.length).set(b);
284
+ return ptr;
285
+ }
286
+
287
+ private _writeStr(s: string): number {
288
+ const b = _enc.encode(s);
289
+ this._writePad(b);
290
+ return b.length;
291
+ }
292
+
293
+ private _readPad(n: number): string {
294
+ const ptr = (this._wasm.getBuffer as Function)(0) as number;
295
+ return _dec.decode(this._u8(ptr, n));
296
+ }
297
+
298
+ private _feedText(text: string): void {
299
+ const b = _enc.encode(text);
300
+ for (let i = 0; i < b.length; i += FEED_SIZE) {
301
+ const c = b.subarray(i, i + FEED_SIZE);
302
+ this._writePad(c);
303
+ (this._wasm.feedText as Function)(c.length);
304
+ }
305
+ }
306
+
307
+ private _feedXmlBytes(xml: Uint8Array, fn: 'feedXmlBytes' | 'feedXlsxBytes'): void {
308
+ for (let i = 0; i < xml.length; i += FEED_SIZE) {
309
+ const c = xml.subarray(i, i + FEED_SIZE);
310
+ this._writePad(c);
311
+ (this._wasm[fn] as Function)(c.length);
312
+ }
313
+ }
314
+
315
+ // ── PDF WASM (lazy load) ─────────────────────────────────────────────────
316
+
317
+ private async _ensurePdfWasm(): Promise<void> {
318
+ if (this._pdfWasm) return;
319
+ if (!this._opts.pdfWasmUrl) throw new Error('pdfWasmUrl not set in AlbexOptions');
320
+ const res = await fetch(this._opts.pdfWasmUrl);
321
+ if (!res.ok) throw new Error(`Failed to fetch PDF WASM: ${res.status}`);
322
+ const imports = makePdfWasmImports(() => this._pdfMem!);
323
+ const { instance } = await WebAssembly.instantiateStreaming(res, imports);
324
+ this._pdfWasm = instance.exports;
325
+ this._pdfMem = instance.exports.memory as WebAssembly.Memory;
326
+ }
327
+
328
+ // ── Indexers ──────────────────────────────────────────────────────────────
329
+
330
+ private async _indexDocx(file: File): Promise<number> {
331
+ const bytes = new Uint8Array(await file.arrayBuffer());
332
+ const xml = await findZipEntry(bytes, 'word/document.xml');
333
+ (this._wasm.setDocumentName as Function)(this._writeStr(file.name));
334
+ (this._wasm.beginDocument as Function)();
335
+ this._feedXmlBytes(xml, 'feedXmlBytes');
336
+ return (this._wasm.endDocument as Function)() as number;
337
+ }
338
+
339
+ private async _indexXlsx(file: File): Promise<number> {
340
+ const bytes = new Uint8Array(await file.arrayBuffer());
341
+ (this._wasm.setDocumentName as Function)(this._writeStr(file.name));
342
+ (this._wasm.beginXlsx as Function)();
343
+
344
+ try {
345
+ const xml = await findZipEntry(bytes, 'xl/sharedStrings.xml');
346
+ this._feedXmlBytes(xml, 'feedXlsxBytes');
347
+ } catch { /* workbook may have no shared strings */ }
348
+
349
+ const sheets = listZipEntries(bytes).filter(n => /^xl\/worksheets\/sheet\d+\.xml$/.test(n));
350
+ for (const name of sheets) {
351
+ try {
352
+ const xml = await findZipEntry(bytes, name);
353
+ this._feedXmlBytes(xml, 'feedXlsxBytes');
354
+ } catch { /* skip corrupt/missing sheet */ }
355
+ }
356
+
357
+ return (this._wasm.endDocument as Function)() as number;
358
+ }
359
+
360
+ private async _indexPdf(file: File): Promise<number> {
361
+ await this._ensurePdfWasm();
362
+ const pw = this._pdfWasm!;
363
+ const pm = this._pdfMem!;
364
+ const bytes = new Uint8Array(await file.arrayBuffer());
365
+
366
+ const inPtr = (pw.allocInput as Function)(bytes.length) as number;
367
+ new Uint8Array(pm.buffer, inPtr, bytes.length).set(bytes);
368
+ const pageCount = (pw.extractPdf as Function)(bytes.length) as number;
369
+
370
+ (this._wasm.setDocumentName as Function)(this._writeStr(file.name));
371
+ (this._wasm.beginDocument as Function)();
372
+
373
+ if (pageCount === -2) {
374
+ // Image-only PDF — register doc with zero chunks.
375
+ return (this._wasm.endDocument as Function)() as number;
376
+ }
377
+ if (pageCount < 0) {
378
+ const errLen = (pw.getErrorLen as Function)() as number;
379
+ const errPtr = (pw.getErrorPtr as Function)() as number;
380
+ const msg = errLen > 0
381
+ ? new TextDecoder().decode(new Uint8Array(pm.buffer, errPtr, errLen))
382
+ : 'PDF parse error';
383
+ throw new Error(msg);
384
+ }
385
+
386
+ for (let p = 0; p < pageCount; p++) {
387
+ const len = (pw.getPageLen as Function)(p) as number;
388
+ if (!len) continue;
389
+ const text = new TextDecoder('utf-8').decode(
390
+ new Uint8Array(pm.buffer, (pw.getPagePtr as Function)(p) as number, len)
391
+ );
392
+ this._feedText(text);
393
+ (this._wasm.flushParagraph as Function)();
394
+ }
395
+
396
+ return (this._wasm.endDocument as Function)() as number;
397
+ }
398
+
399
+ private async _indexTxt(file: File): Promise<number> {
400
+ const text = await file.text();
401
+ (this._wasm.setDocumentName as Function)(this._writeStr(file.name));
402
+ (this._wasm.beginDocument as Function)();
403
+ for (const para of text.split(/\n{2,}/)) {
404
+ const l = para.replace(/\n/g, ' ').trim();
405
+ if (l) { this._feedText(l); (this._wasm.flushParagraph as Function)(); }
406
+ }
407
+ return (this._wasm.endDocument as Function)() as number;
408
+ }
409
+
410
+ private async _indexXml(file: File): Promise<number> {
411
+ const plain = (await file.text())
412
+ .replace(/<[^]*?>/g, '\n')
413
+ .replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
414
+ .replace(/&quot;/g, '"').replace(/&apos;/g, "'")
415
+ .replace(/[ \t]+/g, ' ').trim();
416
+ (this._wasm.setDocumentName as Function)(this._writeStr(file.name));
417
+ (this._wasm.beginDocument as Function)();
418
+ for (const seg of plain.split(/\n{2,}/)) {
419
+ const l = seg.replace(/\n/g, ' ').trim();
420
+ if (l) { this._feedText(l); (this._wasm.flushParagraph as Function)(); }
421
+ }
422
+ return (this._wasm.endDocument as Function)() as number;
423
+ }
424
+
425
+ private static readonly _INDEXERS: Record<string, (engine: AlbexEngine, file: File) => Promise<number>> = {
426
+ docx: (e, f) => e._indexDocx(f),
427
+ xlsx: (e, f) => e._indexXlsx(f),
428
+ pdf: (e, f) => e._indexPdf(f),
429
+ txt: (e, f) => e._indexTxt(f),
430
+ xml: (e, f) => e._indexXml(f),
431
+ };
432
+
433
+ // ── Public API ────────────────────────────────────────────────────────────
434
+
435
+ /**
436
+ * Index a file. Supported formats: DOCX, XLSX, PDF, TXT, XML.
437
+ * Throws for unsupported formats or parse errors.
438
+ */
439
+ async indexFile(file: File): Promise<IndexedDocument> {
440
+ const ext = file.name.split('.').pop()?.toLowerCase() ?? '';
441
+ const indexer = AlbexEngine._INDEXERS[ext];
442
+ if (!indexer) throw new Error(`Unsupported format: .${ext}`);
443
+
444
+ const t0 = performance.now();
445
+ const textPre = (this._wasm.getTextUsed as Function)() as number;
446
+ const chunks = await indexer(this, file);
447
+ const doc: IndexedDocument = {
448
+ name: file.name,
449
+ ext,
450
+ chunks,
451
+ indexTimeMs: performance.now() - t0,
452
+ textBytes: ((this._wasm.getTextUsed as Function)() as number) - textPre,
453
+ };
454
+ this._docs.push(doc);
455
+ return doc;
456
+ }
457
+
458
+ /**
459
+ * Search the index. Supports:
460
+ * - Simple queries: `contrato` (AND of tokens, accent-insensitive)
461
+ * - Phrase queries: `"contrato marco"` (must appear as phrase)
462
+ * - OR queries: `contrato | acuerdo` (union of two searches)
463
+ */
464
+ search(query: string): SearchResult[] {
465
+ const parsed = parseQuery(query);
466
+
467
+ if (parsed.kind === 'or') {
468
+ return this._searchOr(parsed.branches, query);
469
+ }
470
+
471
+ const results = this._runSearch(tokensToWasmQuery(parsed.tokens), query);
472
+
473
+ if (parsed.kind === 'phrase') {
474
+ return results.filter(r => containsPhrase(r.snippet, parsed.tokens));
475
+ }
476
+
477
+ return results;
478
+ }
479
+
480
+ private _searchOr(branches: string[][], rawQuery: string): SearchResult[] {
481
+ const seen = new Set<string>();
482
+ const all: SearchResult[] = [];
483
+
484
+ for (const tokens of branches) {
485
+ const q = tokensToWasmQuery(tokens);
486
+ if (!q) continue;
487
+ const results = this._runSearch(q, rawQuery);
488
+ for (const r of results) {
489
+ const key = `${r.documentName}:${r.location}:${r.matchStart}`;
490
+ if (!seen.has(key)) { seen.add(key); all.push(r); }
491
+ }
492
+ }
493
+
494
+ // Re-rank the merged list by score descending.
495
+ all.sort((a, b) => b.score - a.score);
496
+ return all;
497
+ }
498
+
499
+ private _runSearch(wasmQuery: string, displayQuery: string): SearchResult[] {
500
+ const ql = this._writeStr(wasmQuery);
501
+ (this._wasm.setPattern as Function)(ql);
502
+
503
+ const t0 = performance.now();
504
+ const count = (this._wasm.search as Function)() as number;
505
+ const ms = performance.now() - t0;
506
+
507
+ this._lastSearch = {
508
+ query: displayQuery,
509
+ timeMs: ms,
510
+ results: count,
511
+ bloomTested: (this._wasm.getStatBloomTested as Function)() as number,
512
+ bloomPassed: (this._wasm.getStatBloomPassed as Function)() as number,
513
+ bitapMatched: (this._wasm.getStatBitapMatched as Function)() as number,
514
+ };
515
+
516
+ const results: SearchResult[] = [];
517
+ for (let i = 0; i < count; i++) {
518
+ const score = (this._wasm.getResultScore as Function)(i) as number;
519
+ const location = (this._wasm.getResultLocation as Function)(i) as number;
520
+ const matchStart = (this._wasm.getResultStart as Function)(i) as number;
521
+ const matchEnd = (this._wasm.getResultEnd as Function)(i) as number;
522
+ const nl = (this._wasm.getResultDocName as Function)(i) as number;
523
+ const name = nl > 0 ? this._readPad(nl) : '?';
524
+ const sl = (this._wasm.getSnippet as Function)(i) as number;
525
+ const snippet = sl > 0 ? this._readPad(sl) : '';
526
+
527
+ results.push({ documentName: name, location, score, snippet, matchStart, matchEnd });
528
+ }
529
+ return results;
530
+ }
531
+
532
+ /** Returns current engine statistics. */
533
+ getStats(): EngineStats {
534
+ return {
535
+ documents: this._docs.length,
536
+ chunks: (this._wasm.getChunkCount as Function)() as number,
537
+ textUsed: (this._wasm.getTextUsed as Function)() as number,
538
+ textCapacity: (this._wasm.getTextCapacity as Function)() as number,
539
+ wasmMemoryBytes: this._mem.buffer.byteLength,
540
+ };
541
+ }
542
+
543
+ /** Returns stats from the most recent search, or null. */
544
+ getLastSearchStats(): SearchStats | null {
545
+ return this._lastSearch;
546
+ }
547
+
548
+ /** Returns the list of indexed documents. */
549
+ get documents(): readonly IndexedDocument[] {
550
+ return this._docs;
551
+ }
552
+
553
+ /** Supported file extensions. */
554
+ static get supportedExtensions(): string[] {
555
+ return Object.keys(AlbexEngine._INDEXERS);
556
+ }
557
+
558
+ /** Configure search sensitivity. */
559
+ setMaxErrors(errors: 0 | 1 | 2 | 3): void {
560
+ (this._wasm.setMaxErrors as Function)(errors);
561
+ }
562
+
563
+ setThreshold(threshold: number): void {
564
+ (this._wasm.setThreshold as Function)(Math.max(0, Math.min(1000, threshold)));
565
+ }
566
+
567
+ setMaxResults(max: number): void {
568
+ (this._wasm.setMaxResults as Function)(Math.max(1, Math.min(200, max)));
569
+ }
570
+
571
+ /** Full reset — clears all indexed documents and chunks. */
572
+ reset(): void {
573
+ (this._wasm.init as Function)();
574
+ this._docs = [];
575
+ this._lastSearch = null;
576
+ }
577
+ }
Binary file
Binary file