albex 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +141 -0
- package/README.md +242 -112
- package/dist/albex-worker.d.ts +70 -0
- package/dist/albex-worker.d.ts.map +1 -0
- package/dist/albex-worker.js +153 -0
- package/dist/albex-worker.js.map +1 -0
- package/dist/albex.d.ts +368 -6
- package/dist/albex.d.ts.map +1 -1
- package/dist/albex.js +1692 -95
- package/dist/albex.js.map +1 -1
- package/dist/errors.d.ts +38 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +63 -0
- package/dist/errors.js.map +1 -0
- package/dist/gpu/bloom-runtime.d.ts +60 -0
- package/dist/gpu/bloom-runtime.d.ts.map +1 -0
- package/dist/gpu/bloom-runtime.js +176 -0
- package/dist/gpu/bloom-runtime.js.map +1 -0
- package/dist/gpu/bloom-shader.wgsl.d.ts +19 -0
- package/dist/gpu/bloom-shader.wgsl.d.ts.map +1 -0
- package/dist/gpu/bloom-shader.wgsl.js +49 -0
- package/dist/gpu/bloom-shader.wgsl.js.map +1 -0
- package/dist/persistence.d.ts +21 -0
- package/dist/persistence.d.ts.map +1 -0
- package/dist/persistence.js +174 -0
- package/dist/persistence.js.map +1 -0
- package/dist/pool/coordinator.d.ts +98 -0
- package/dist/pool/coordinator.d.ts.map +1 -0
- package/dist/pool/coordinator.js +247 -0
- package/dist/pool/coordinator.js.map +1 -0
- package/dist/profile.d.ts +95 -0
- package/dist/profile.d.ts.map +1 -0
- package/dist/profile.js +207 -0
- package/dist/profile.js.map +1 -0
- package/dist/resource-manager.d.ts +56 -0
- package/dist/resource-manager.d.ts.map +1 -0
- package/dist/resource-manager.js +138 -0
- package/dist/resource-manager.js.map +1 -0
- package/dist/tiered-store.d.ts +98 -0
- package/dist/tiered-store.d.ts.map +1 -0
- package/dist/tiered-store.js +238 -0
- package/dist/tiered-store.js.map +1 -0
- package/dist/wasm-bindings.d.ts +139 -0
- package/dist/wasm-bindings.d.ts.map +1 -0
- package/dist/wasm-bindings.js +33 -0
- package/dist/wasm-bindings.js.map +1 -0
- package/dist/worker-protocol.d.ts +86 -0
- package/dist/worker-protocol.d.ts.map +1 -0
- package/dist/worker-protocol.js +20 -0
- package/dist/worker-protocol.js.map +1 -0
- package/dist/worker-runtime.d.ts +14 -0
- package/dist/worker-runtime.d.ts.map +1 -0
- package/dist/worker-runtime.js +100 -0
- package/dist/worker-runtime.js.map +1 -0
- package/package.json +56 -13
- package/src/albex-worker.ts +187 -0
- package/src/albex.ts +1845 -130
- package/src/errors.ts +60 -0
- package/src/gpu/bloom-runtime.ts +229 -0
- package/src/gpu/bloom-shader.wgsl.ts +48 -0
- package/src/persistence.ts +175 -0
- package/src/pool/coordinator.ts +324 -0
- package/src/profile.ts +279 -0
- package/src/resource-manager.ts +167 -0
- package/src/tiered-store.ts +259 -0
- package/src/wasm-bindings.ts +200 -0
- package/src/worker-protocol.ts +48 -0
- package/src/worker-runtime.ts +96 -0
- package/wasm/pkg/albex_pdf.wasm +0 -0
- package/wasm/pkg/albex_wasm_bg.wasm +0 -0
- package/wasm/pkg/albex_wasm_mini.wasm +0 -0
- package/wasm/pkg/albex_wasm_mini_simd.wasm +0 -0
- package/wasm/pkg/albex_wasm_pro.wasm +0 -0
- package/wasm/pkg/albex_wasm_pro_simd.wasm +0 -0
- package/wasm/pkg/albex_wasm_std.wasm +0 -0
- package/wasm/pkg/albex_wasm_std_simd.wasm +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"albex-worker.d.ts","sourceRoot":"","sources":["../src/albex-worker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,KAAK,EACV,YAAY,EACZ,eAAe,EACf,aAAa,EACb,YAAY,EACZ,WAAW,EACX,WAAW,EACZ,MAAM,YAAY,CAAC;AAcpB,MAAM,WAAW,kBAAmB,SAAQ,YAAY;IACtD,oEAAoE;IACpE,SAAS,EAAE,MAAM,GAAG,GAAG,CAAC;CACzB;AASD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqB;IAC3C,OAAO,CAAC,OAAO,CAAU;IACzB,OAAO,CAAC,OAAO,CAAK;IACpB,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,UAAU,CAAyB;gBAE/B,IAAI,EAAE,kBAAkB;IAI9B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAsB3B,OAAO,CAAC,KAAK;IASP,SAAS,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,eAAe,CAAC;IAWrD,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAIxE;;;;;;;OAOG;IACI,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB,GAAG,aAAa,CAAC,YAAY,CAAC;IAK9F;;OAEG;IACI,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB,GAAG,aAAa,CAAC,YAAY,CAAC;IAQnF,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAM5C,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IACxB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAK5B,QAAQ,IAAa,OAAO,CAAC,WAAW,CAAC;IACzC,kBAAkB,IAAI,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC;IACjD,YAAY,IAAU,OAAO,CAAC,SAAS,eAAe,EAAE,CAAC;IAEnD,YAAY,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAC7C,YAAY,CAAC,CAAC,EAAE,MAAM,GAAS,OAAO,CAAC,IAAI,CAAC;IAC5C,aAAa,CAAC,CAAC,EAAE,MAAM,GAAQ,OAAO,CAAC,IAAI,CAAC;IAC5C,WAAW,CAAC,IAAI,EAAE,KAAK,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAG9C,IAAI,CAAC,IAAI,EAAE,MAAM,GAAa,OAAO,CAAC,IAAI,CAAC;IAC3C,IAAI,CAAC,IAAI,EAAE,MAAM,GAAa,OAAO,CAAC,OAAO,CAAC;IAC9C,UAAU,CAAC,IAAI,EAAE,MAAM,GAAO,OAAO,CAAC,OAAO,CAAC;IAC9C,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAC3C,aAAa,IAAiB,OAAO,CAAC,MAAM,EAAE,CAAC;IAErD,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,IAAI;CAMzB"}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/*!
|
|
2
|
+
* albex v0.3.0
|
|
3
|
+
* Zero-config local full-text search for documents — runs entirely in the browser, no server, no upload.
|
|
4
|
+
* (c) 2026 RafaCalRob
|
|
5
|
+
* @license MIT
|
|
6
|
+
* https://github.com/RafaCalRob/Albex#readme
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* `AlbexEngineWorker` — a main-thread wrapper that runs the engine inside a
|
|
10
|
+
* Web Worker. Mirrors the surface of `AlbexEngine` so it can be swapped in
|
|
11
|
+
* without code changes.
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
*
|
|
15
|
+
* const engine = new AlbexEngineWorker({
|
|
16
|
+
* wasmUrl: '/assets/albex_wasm_bg.wasm',
|
|
17
|
+
* pdfWasmUrl: '/assets/albex_pdf.wasm',
|
|
18
|
+
* // Provide the URL to the bundled worker runtime.
|
|
19
|
+
* workerUrl: new URL('./worker-runtime.js', import.meta.url),
|
|
20
|
+
* });
|
|
21
|
+
* await engine.init();
|
|
22
|
+
*
|
|
23
|
+
* Why: a `search()` over 100k chunks can take 10–50 ms. On main thread that
|
|
24
|
+
* is visible jank for every keystroke. Off-main-thread keeps the UI at 60 fps.
|
|
25
|
+
*
|
|
26
|
+
* The runtime is single-threaded WASM, so requests are serialised: only one
|
|
27
|
+
* call is in flight at a time. This matches the actual `static mut` model
|
|
28
|
+
* inside the .wasm and is fine for an interactive search UI (each keystroke
|
|
29
|
+
* replaces the previous query).
|
|
30
|
+
*/
|
|
31
|
+
import { AlbexError, AlbexInitError, AlbexUnsupportedFormatError, AlbexParseError, AlbexCapacityError, } from './errors.js';
|
|
32
|
+
let _workerSearchStreamWarned = false;
|
|
33
|
+
export class AlbexEngineWorker {
|
|
34
|
+
_opts;
|
|
35
|
+
_worker;
|
|
36
|
+
_nextId = 1;
|
|
37
|
+
_pending = new Map();
|
|
38
|
+
_docsCache = [];
|
|
39
|
+
constructor(opts) {
|
|
40
|
+
this._opts = opts;
|
|
41
|
+
}
|
|
42
|
+
async init() {
|
|
43
|
+
this._worker = new Worker(this._opts.workerUrl, { type: 'module' });
|
|
44
|
+
this._worker.onmessage = (ev) => {
|
|
45
|
+
const { id } = ev.data;
|
|
46
|
+
const p = this._pending.get(id);
|
|
47
|
+
if (!p)
|
|
48
|
+
return;
|
|
49
|
+
this._pending.delete(id);
|
|
50
|
+
if (ev.data.ok)
|
|
51
|
+
p.resolve(ev.data.result);
|
|
52
|
+
else
|
|
53
|
+
p.reject(rehydrateError(ev.data.error));
|
|
54
|
+
};
|
|
55
|
+
this._worker.onerror = (e) => {
|
|
56
|
+
// Surface the error to every in-flight call.
|
|
57
|
+
const err = new AlbexInitError(`Worker crashed: ${e.message}`);
|
|
58
|
+
for (const [, p] of this._pending)
|
|
59
|
+
p.reject(err);
|
|
60
|
+
this._pending.clear();
|
|
61
|
+
};
|
|
62
|
+
await this._send({ kind: 'init', opts: {
|
|
63
|
+
wasmUrl: this._opts.wasmUrl,
|
|
64
|
+
pdfWasmUrl: this._opts.pdfWasmUrl,
|
|
65
|
+
} });
|
|
66
|
+
}
|
|
67
|
+
_send(op, transfer = []) {
|
|
68
|
+
const id = this._nextId++;
|
|
69
|
+
const req = { id, op };
|
|
70
|
+
return new Promise((resolve, reject) => {
|
|
71
|
+
this._pending.set(id, { resolve: resolve, reject });
|
|
72
|
+
this._worker.postMessage(req, transfer);
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
async indexFile(file) {
|
|
76
|
+
const buffer = await file.arrayBuffer();
|
|
77
|
+
// Transfer the buffer to avoid a copy.
|
|
78
|
+
const doc = await this._send({ kind: 'indexFile', name: file.name, buffer }, [buffer]);
|
|
79
|
+
this._docsCache.push(doc);
|
|
80
|
+
return doc;
|
|
81
|
+
}
|
|
82
|
+
search(query, opts = {}) {
|
|
83
|
+
return this._send({ kind: 'search', query, options: opts });
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Cooperative variant of `search`. Today the wire still sends a single
|
|
87
|
+
* batch — the result array is fetched in one round-trip from the worker
|
|
88
|
+
* and then exposed as an async iterator so callers can `break` early.
|
|
89
|
+
* A future iteration may use a `MessagePort` to stream individual results
|
|
90
|
+
* from the worker side; the iterator shape is preserved across that
|
|
91
|
+
* transition.
|
|
92
|
+
*/
|
|
93
|
+
async *searchCooperative(query, opts = {}) {
|
|
94
|
+
const results = await this.search(query, opts);
|
|
95
|
+
for (const r of results)
|
|
96
|
+
yield r;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* @deprecated Renamed to `searchCooperative` in 0.3.0. Alias removed in 0.4.0.
|
|
100
|
+
*/
|
|
101
|
+
async *searchStream(query, opts = {}) {
|
|
102
|
+
if (!_workerSearchStreamWarned) {
|
|
103
|
+
_workerSearchStreamWarned = true;
|
|
104
|
+
console.warn('[albex] `AlbexEngineWorker.searchStream` is deprecated; rename to `searchCooperative`. Alias removed in 0.4.0.');
|
|
105
|
+
}
|
|
106
|
+
yield* this.searchCooperative(query, opts);
|
|
107
|
+
}
|
|
108
|
+
async removeDocument(id) {
|
|
109
|
+
const ok = await this._send({ kind: 'removeDocument', id });
|
|
110
|
+
if (ok)
|
|
111
|
+
this._docsCache = this._docsCache.filter(d => d.name !== id && d.contentHash !== id);
|
|
112
|
+
return ok;
|
|
113
|
+
}
|
|
114
|
+
async compact() { await this._send({ kind: 'compact' }); }
|
|
115
|
+
async reset() {
|
|
116
|
+
await this._send({ kind: 'reset' });
|
|
117
|
+
this._docsCache = [];
|
|
118
|
+
}
|
|
119
|
+
getStats() { return this._send({ kind: 'getStats' }); }
|
|
120
|
+
getLastSearchStats() { return this._send({ kind: 'getLastSearchStats' }); }
|
|
121
|
+
getDocuments() { return this._send({ kind: 'getDocuments' }); }
|
|
122
|
+
async setMaxErrors(n) { await this._send({ kind: 'setMaxErrors', n }); }
|
|
123
|
+
async setThreshold(n) { await this._send({ kind: 'setThreshold', n }); }
|
|
124
|
+
async setMaxResults(n) { await this._send({ kind: 'setMaxResults', n }); }
|
|
125
|
+
async setLanguage(lang) { await this._send({ kind: 'setLanguage', lang }); }
|
|
126
|
+
// Persistence — mirror of AlbexEngine.
|
|
127
|
+
async save(name) { await this._send({ kind: 'save', name }); }
|
|
128
|
+
async load(name) { return this._send({ kind: 'load', name }); }
|
|
129
|
+
async loadOrInit(name) { return this._send({ kind: 'loadOrInit', name }); }
|
|
130
|
+
async deleteSnapshot(name) { await this._send({ kind: 'deleteSnapshot', name }); }
|
|
131
|
+
async listSnapshots() { return this._send({ kind: 'listSnapshots' }); }
|
|
132
|
+
[Symbol.dispose]() {
|
|
133
|
+
for (const [, p] of this._pending)
|
|
134
|
+
p.reject(new AlbexError('disposed', 'Engine disposed'));
|
|
135
|
+
this._pending.clear();
|
|
136
|
+
this._worker?.terminate();
|
|
137
|
+
this._docsCache = [];
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
function rehydrateError(e) {
|
|
141
|
+
switch (e.kind) {
|
|
142
|
+
case 'init': return new AlbexInitError(e.message);
|
|
143
|
+
case 'unsupported_format': return new AlbexUnsupportedFormatError(e.message.replace(/^Unsupported format: \./, ''));
|
|
144
|
+
case 'parse': return new AlbexParseError('unknown', e.message);
|
|
145
|
+
case 'capacity': return new AlbexCapacityError(e.message);
|
|
146
|
+
default: {
|
|
147
|
+
const err = new Error(e.message);
|
|
148
|
+
err.name = e.name;
|
|
149
|
+
return err;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
//# sourceMappingURL=albex-worker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"albex-worker.js","sourceRoot":"","sources":["../src/albex-worker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAeH,OAAO,EACL,UAAU,EACV,cAAc,EACd,2BAA2B,EAC3B,eAAe,EACf,kBAAkB,GACnB,MAAM,aAAa,CAAC;AAYrB,IAAI,yBAAyB,GAAG,KAAK,CAAC;AAEtC,MAAM,OAAO,iBAAiB;IACX,KAAK,CAAqB;IACnC,OAAO,CAAU;IACjB,OAAO,GAAG,CAAC,CAAC;IACZ,QAAQ,GAAG,IAAI,GAAG,EAAmB,CAAC;IACtC,UAAU,GAAsB,EAAE,CAAC;IAE3C,YAAY,IAAwB;QAClC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC,OAAO,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;QACpE,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,CAAC,EAAgC,EAAE,EAAE;YAC5D,MAAM,EAAE,EAAE,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC;YACvB,MAAM,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChC,IAAI,CAAC,CAAC;gBAAE,OAAO;YACf,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YACzB,IAAI,EAAE,CAAC,IAAI,CAAC,EAAE;gBAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;;gBAC1B,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC1D,CAAC,CAAC;QACF,IAAI,CAAC,OAAO,CAAC,OAAO,GAAG,CAAC,CAAC,EAAE,EAAE;YAC3B,6CAA6C;YAC7C,MAAM,GAAG,GAAG,IAAI,cAAc,CAAC,mBAAmB,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YAC/D,KAAK,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,QAAQ;gBAAE,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACjD,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;QACxB,CAAC,CAAC;QACF,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;gBACrC,OAAO,EAAK,IAAI,CAAC,KAAK,CAAC,OAAO;gBAC9B,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU;aAClC,EAAE,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAc,EAAY,EAAE,WAA2B,EAAE;QACpE,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;QAC1B,MAAM,GAAG,GAAkB,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC;QACtC,OAAO,IAAI,OAAO,CAAI,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACxC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,OAA+B,EAAE,MAAM,EAAE,CAAC,CAAC;YAC5E,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAU;QACxB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;QACxC,uCAAuC;QACvC,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,KAAK,CAC1B,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,EAC9C,CAAC,MAAM,CAAC,CACT,CAAC;QACF,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1B,OAAO,GAAG,CAAC;IACb,CAAC;IAED,MAAM,CAAC,KAAa,EAAE,OAAsB,EAAE;QAC5C,OAAO,IAAI,CAAC,KAAK,CAAiB,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,CAAC,iBAAiB,CAAC,KAAa,EAAE,OAAsB,EAAE;QAC9D,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QAC/C,KAAK,MAAM,CAAC,IAAI,OAAO;YAAE,MAAM,CAAC,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,YAAY,CAAC,KAAa,EAAE,OAAsB,EAAE;QACzD,IAAI,CAAC,yBAAyB,EAAE,CAAC;YAC/B,yBAAyB,GAAG,IAAI,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,gHAAgH,CAAC,CAAC;QACjI,CAAC;QACD,KAAK,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,EAAU;QAC7B,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,KAAK,CAAU,EAAE,IAAI,EAAE,gBAAgB,EAAE,EAAE,EAAE,CAAC,CAAC;QACrE,IAAI,EAAE;YAAE,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,IAAI,CAAC,CAAC,WAAW,KAAK,EAAE,CAAC,CAAC;QAC7F,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,KAAK,CAAC,OAAO,KAAoB,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC;IACzE,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;QACpC,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;IACvB,CAAC;IAED,QAAQ,KAA0C,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC;IAC5F,kBAAkB,KAAkC,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,oBAAoB,EAAE,CAAC,CAAC,CAAC,CAAC;IACxG,YAAY,KAAgD,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,CAAC;IAE1G,KAAK,CAAC,YAAY,CAAC,CAAgB,IAAmB,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,cAAc,EAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACvG,KAAK,CAAC,YAAY,CAAC,CAAS,IAAyB,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACrG,KAAK,CAAC,aAAa,CAAC,CAAS,IAAwB,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACtG,KAAK,CAAC,WAAW,CAAC,IAAkB,IAAmB,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAEzG,uCAAuC;IACvC,KAAK,CAAC,IAAI,CAAC,IAAY,IAAgC,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAClG,KAAK,CAAC,IAAI,CAAC,IAAY,IAAgC,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACnG,KAAK,CAAC,UAAU,CAAC,IAAY,IAA0B,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACzG,KAAK,CAAC,cAAc,CAAC,IAAY,IAAsB,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC5G,KAAK,CAAC,aAAa,KAAoC,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC,CAAC,CAAC;IAEtG,CAAC,MAAM,CAAC,OAAO,CAAC;QACd,KAAK,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,QAAQ;YAAE,CAAC,CAAC,MAAM,CAAC,IAAI,UAAU,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3F,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,CAAC;QAC1B,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;IACvB,CAAC;CACF;AAED,SAAS,cAAc,CAAC,CAAmD;IACzE,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACf,KAAK,MAAM,CAAC,CAAe,OAAO,IAAI,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAChE,KAAK,oBAAoB,CAAC,CAAC,OAAO,IAAI,2BAA2B,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC,CAAC,CAAC;QACpH,KAAK,OAAO,CAAC,CAAc,OAAO,IAAI,eAAe,CAAC,SAAS,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC;QAC5E,KAAK,UAAU,CAAC,CAAW,OAAO,IAAI,kBAAkB,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QACpE,OAAO,CAAC,CAAC,CAAC;YACR,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACjC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC;YAClB,OAAO,GAAG,CAAC;QACb,CAAC;IACH,CAAC;AACH,CAAC"}
|
package/dist/albex.d.ts
CHANGED
|
@@ -13,11 +13,59 @@
|
|
|
13
13
|
* const results = engine.search('contrato marco');
|
|
14
14
|
* ```
|
|
15
15
|
*/
|
|
16
|
+
import { type Tier } from './profile.js';
|
|
17
|
+
export { AlbexError, AlbexInitError, AlbexUnsupportedFormatError, AlbexParseError, AlbexCapacityError, } from './errors.js';
|
|
18
|
+
export { listPersisted, deletePersisted } from './persistence.js';
|
|
19
|
+
export { detectProfile, pickTier, pickWorkerCount, shouldUseGpu } from './profile.js';
|
|
20
|
+
export type { DeviceProfile, Tier } from './profile.js';
|
|
21
|
+
export { getResourceManager } from './resource-manager.js';
|
|
22
|
+
export type { ResourceState, ResourceMode } from './resource-manager.js';
|
|
23
|
+
export { AlbexPool } from './pool/coordinator.js';
|
|
24
|
+
export type { AlbexPoolOptions } from './pool/coordinator.js';
|
|
25
|
+
export { BloomGpu, packBloomsFromChunks } from './gpu/bloom-runtime.js';
|
|
26
|
+
export { TieredStore } from './tiered-store.js';
|
|
27
|
+
export type { TieredStoreOptions } from './tiered-store.js';
|
|
16
28
|
export interface AlbexOptions {
|
|
17
|
-
/**
|
|
18
|
-
|
|
29
|
+
/**
|
|
30
|
+
* Explicit URL to the main WASM binary.
|
|
31
|
+
*
|
|
32
|
+
* If you want automatic tier selection (mini/std/pro chosen from
|
|
33
|
+
* `deviceMemory`), pass `wasmBaseUrl` instead — the engine will fetch
|
|
34
|
+
* `albex_wasm_<tier>.wasm` from that directory.
|
|
35
|
+
*/
|
|
36
|
+
wasmUrl?: string;
|
|
37
|
+
/**
|
|
38
|
+
* Base directory containing tiered binaries (`albex_wasm_mini.wasm`,
|
|
39
|
+
* `_std.wasm`, `_pro.wasm`). Used when `wasmUrl` is omitted.
|
|
40
|
+
*/
|
|
41
|
+
wasmBaseUrl?: string;
|
|
19
42
|
/** URL to albex_pdf.wasm. Required only if you call indexFile() with PDFs. */
|
|
20
43
|
pdfWasmUrl?: string;
|
|
44
|
+
/**
|
|
45
|
+
* Override the tier auto-detection. Pass `'auto'` (default), or an
|
|
46
|
+
* explicit tier when you know the constraints of your target environment.
|
|
47
|
+
*/
|
|
48
|
+
tier?: 'auto' | 'mini' | 'std' | 'pro';
|
|
49
|
+
/**
|
|
50
|
+
* SIMD selection. When `'auto'` (default), Albex probes for v128 support
|
|
51
|
+
* and fetches the `_simd.wasm` variant when available. Pass `'off'` to
|
|
52
|
+
* stay on the baseline binary even on capable hosts (useful for
|
|
53
|
+
* regression testing or to align all clients in a corporate deployment).
|
|
54
|
+
*/
|
|
55
|
+
simd?: 'auto' | 'on' | 'off';
|
|
56
|
+
/**
|
|
57
|
+
* GPU acceleration policy for the Bloom pre-filter.
|
|
58
|
+
* `'auto'` — enable when WebGPU is available AND chunk count is large
|
|
59
|
+
* `'on'` — force enable (fall back to CPU silently if GPU fails)
|
|
60
|
+
* `'off'` — never use GPU
|
|
61
|
+
* Default: `'auto'`.
|
|
62
|
+
*/
|
|
63
|
+
gpu?: 'auto' | 'on' | 'off';
|
|
64
|
+
/**
|
|
65
|
+
* Minimum chunk count before `gpu: 'auto'` engages. Below this threshold
|
|
66
|
+
* the upload + dispatch overhead is bigger than the speedup. Default: 20_000.
|
|
67
|
+
*/
|
|
68
|
+
gpuThreshold?: number;
|
|
21
69
|
}
|
|
22
70
|
export interface IndexedDocument {
|
|
23
71
|
name: string;
|
|
@@ -25,6 +73,16 @@ export interface IndexedDocument {
|
|
|
25
73
|
chunks: number;
|
|
26
74
|
indexTimeMs: number;
|
|
27
75
|
textBytes: number;
|
|
76
|
+
/** WASM-side stable identifier (also acts as a slot index after compact). */
|
|
77
|
+
docId: number;
|
|
78
|
+
/** 64-bit FNV-1a hex of the source file bytes. Stable across runs. */
|
|
79
|
+
contentHash: string;
|
|
80
|
+
}
|
|
81
|
+
export interface MatchSpan {
|
|
82
|
+
/** Byte offset within `snippet` where this matched token begins. */
|
|
83
|
+
start: number;
|
|
84
|
+
/** Byte offset within `snippet` where this matched token ends (exclusive). */
|
|
85
|
+
end: number;
|
|
28
86
|
}
|
|
29
87
|
export interface SearchResult {
|
|
30
88
|
documentName: string;
|
|
@@ -32,12 +90,37 @@ export interface SearchResult {
|
|
|
32
90
|
location: number;
|
|
33
91
|
/** Relevance score 0–1000. */
|
|
34
92
|
score: number;
|
|
35
|
-
/**
|
|
93
|
+
/** Snippet text. With `windowed` search options this is a substring with
|
|
94
|
+
* ASCII ellipsis sentinels (`"... "` / `" ..."`) the UI should render
|
|
95
|
+
* as `…`. Without windowing, the full chunk text. */
|
|
36
96
|
snippet: string;
|
|
37
|
-
/**
|
|
97
|
+
/** Primary token match (kept for backwards compatibility). Equal to `matches[0]`. */
|
|
38
98
|
matchStart: number;
|
|
39
|
-
/** Match end byte offset within snippet (exclusive). */
|
|
40
99
|
matchEnd: number;
|
|
100
|
+
/** All matched token spans within `snippet`, in query order. Length 1–4. */
|
|
101
|
+
matches: MatchSpan[];
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Options that change how snippets are produced. Both fields are optional.
|
|
105
|
+
*
|
|
106
|
+
* `windowed` — when true, return a cropped window around the primary
|
|
107
|
+
* match instead of the full chunk text.
|
|
108
|
+
* `before/after` — bytes of context to include on each side of the primary
|
|
109
|
+
* match. Defaults: 60 before, 120 after.
|
|
110
|
+
*/
|
|
111
|
+
export interface SearchOptions {
|
|
112
|
+
windowed?: boolean;
|
|
113
|
+
before?: number;
|
|
114
|
+
after?: number;
|
|
115
|
+
/**
|
|
116
|
+
* Frame budget in milliseconds for `searchCooperative`. The engine
|
|
117
|
+
* processes chunks until the budget is exhausted, then yields to the
|
|
118
|
+
* event loop via `scheduler.yield()` (or `requestAnimationFrame`
|
|
119
|
+
* fallback) before resuming. Lower = smoother UI; higher = lower latency.
|
|
120
|
+
*
|
|
121
|
+
* Default: 8 ms (half a 60 fps frame). Ignored by synchronous `search()`.
|
|
122
|
+
*/
|
|
123
|
+
frameBudgetMs?: number;
|
|
41
124
|
}
|
|
42
125
|
export interface EngineStats {
|
|
43
126
|
documents: number;
|
|
@@ -45,6 +128,12 @@ export interface EngineStats {
|
|
|
45
128
|
textUsed: number;
|
|
46
129
|
textCapacity: number;
|
|
47
130
|
wasmMemoryBytes: number;
|
|
131
|
+
/** Tier loaded at init time (mini/std/pro). */
|
|
132
|
+
tier: Tier | null;
|
|
133
|
+
/** Compile-time chunk capacity for the loaded tier. */
|
|
134
|
+
maxChunks: number;
|
|
135
|
+
/** Compile-time document capacity for the loaded tier. */
|
|
136
|
+
maxDocs: number;
|
|
48
137
|
}
|
|
49
138
|
export interface SearchStats {
|
|
50
139
|
query: string;
|
|
@@ -54,17 +143,108 @@ export interface SearchStats {
|
|
|
54
143
|
bloomPassed: number;
|
|
55
144
|
bitapMatched: number;
|
|
56
145
|
}
|
|
146
|
+
/**
|
|
147
|
+
* Result shape returned by an attached OCR module. Kept structural here so
|
|
148
|
+
* the main package has no runtime dependency on `@albex/ocr` — the optional
|
|
149
|
+
* shape is just a contract.
|
|
150
|
+
*/
|
|
151
|
+
export interface OcrAttachedResult {
|
|
152
|
+
text: string;
|
|
153
|
+
confidence: number;
|
|
154
|
+
timeMs: number;
|
|
155
|
+
}
|
|
156
|
+
export interface OcrAttachedOptions {
|
|
157
|
+
lang?: string;
|
|
158
|
+
hint?: string;
|
|
159
|
+
}
|
|
57
160
|
export declare class AlbexEngine {
|
|
58
161
|
private _wasm;
|
|
59
162
|
private _mem;
|
|
163
|
+
/**
|
|
164
|
+
* OCR entry point installed by `@albex/ocr::enableOcr(engine)`. Undefined
|
|
165
|
+
* when the OCR module has not been wired. The main `albex` package has no
|
|
166
|
+
* runtime dependency on OCR — this is a structural slot that the optional
|
|
167
|
+
* companion package fills.
|
|
168
|
+
*/
|
|
169
|
+
ocrImage?: (image: unknown, opts?: OcrAttachedOptions) => Promise<OcrAttachedResult>;
|
|
170
|
+
/**
|
|
171
|
+
* Optional OCR-side configuration set by `@albex/ocr::enableOcr`. Read
|
|
172
|
+
* by the engine to decide whether to invoke OCR on top of the text it
|
|
173
|
+
* already extracted from a PDF (hybrid PDFs: native text + images that
|
|
174
|
+
* also contain text, like stamps, scanned annexes, or diagrams with
|
|
175
|
+
* labels).
|
|
176
|
+
*
|
|
177
|
+
* When `alwaysExtractEmbeddedImages` is true, every page of every PDF
|
|
178
|
+
* passes through `extractPageImages` after the normal text extraction;
|
|
179
|
+
* any image that meets the size filter (200×200 in Rust) is fed to
|
|
180
|
+
* `ocrImage`. Performance cost: 1–3 s per qualifying image.
|
|
181
|
+
*
|
|
182
|
+
* Off by default — set this opt-in via the OCR module's options.
|
|
183
|
+
*/
|
|
184
|
+
ocrConfig?: {
|
|
185
|
+
alwaysExtractEmbeddedImages?: boolean;
|
|
186
|
+
};
|
|
60
187
|
private _pdfWasm;
|
|
61
188
|
private _pdfMem;
|
|
62
189
|
private _docs;
|
|
63
190
|
private _lastSearch;
|
|
191
|
+
private _tier;
|
|
192
|
+
private _simd;
|
|
193
|
+
private _profile;
|
|
194
|
+
private _resources;
|
|
195
|
+
private _gpu;
|
|
196
|
+
private _gpuChunkCountUploaded;
|
|
197
|
+
private _unsubscribeResources;
|
|
64
198
|
private readonly _opts;
|
|
65
199
|
constructor(opts: AlbexOptions);
|
|
66
200
|
/** Load and initialise the main WASM module. Must be called before any other method. */
|
|
67
201
|
init(): Promise<void>;
|
|
202
|
+
/**
|
|
203
|
+
* Decide which `.wasm` binary to fetch. Order of precedence:
|
|
204
|
+
* 1. `opts.wasmUrl` if provided — used verbatim.
|
|
205
|
+
* 2. `opts.tier` if explicit — joined with `wasmBaseUrl`.
|
|
206
|
+
* 3. `opts.wasmBaseUrl` + tier picked from the device profile.
|
|
207
|
+
*
|
|
208
|
+
* Order of precedence:
|
|
209
|
+
* 1. `opts.wasmUrl` literal → use verbatim
|
|
210
|
+
* 2. `opts.wasmBaseUrl` + tier suffix → fetched from that directory
|
|
211
|
+
* 3. zero-config default → `albex_wasm_bg.wasm` packaged
|
|
212
|
+
* next to this file, resolved
|
|
213
|
+
* via `import.meta.url`
|
|
214
|
+
*
|
|
215
|
+
* The zero-config default loads the std-baseline binary. Tier auto-detection
|
|
216
|
+
* is only active when `wasmBaseUrl` is given, because picking a tier in
|
|
217
|
+
* runtime would defeat any bundler's static asset rewriting. Users who want
|
|
218
|
+
* tier optimisation must serve the six variants themselves and pass the
|
|
219
|
+
* directory through `wasmBaseUrl`.
|
|
220
|
+
*/
|
|
221
|
+
private _resolveWasmUrl;
|
|
222
|
+
/** The tier that was actually loaded. `null` until `init()` resolves. */
|
|
223
|
+
get tier(): Tier | null;
|
|
224
|
+
/** True if the SIMD-accelerated binary was loaded. */
|
|
225
|
+
get simdEnabled(): boolean;
|
|
226
|
+
/** True if a WebGPU device is acquired and the next search will use it. */
|
|
227
|
+
get gpuEngaged(): boolean;
|
|
228
|
+
/**
|
|
229
|
+
* Decide whether to use the GPU pre-filter for the upcoming search.
|
|
230
|
+
*
|
|
231
|
+
* Policy:
|
|
232
|
+
* - `gpu: 'off'` → never.
|
|
233
|
+
* - `gpu: 'on'` → always try (still fails over to CPU).
|
|
234
|
+
* - `gpu: 'auto'` (default) → only when WebGPU is available AND
|
|
235
|
+
* chunk count crosses `gpuThreshold`.
|
|
236
|
+
*/
|
|
237
|
+
private _shouldEngageGpu;
|
|
238
|
+
/**
|
|
239
|
+
* Run the GPU Bloom scan and install the resulting candidate bitset into
|
|
240
|
+
* WASM. The next `searchBegin` will see the mask and `searchSlice` will
|
|
241
|
+
* restrict its Bitap pass to those candidates.
|
|
242
|
+
*
|
|
243
|
+
* No-op if the GPU device hasn't been acquired yet — first call attempts
|
|
244
|
+
* `init()` lazily; if that fails, the candidate path is permanently
|
|
245
|
+
* disabled for this engine instance.
|
|
246
|
+
*/
|
|
247
|
+
private _gpuPreFilter;
|
|
68
248
|
private _u8;
|
|
69
249
|
private _writePad;
|
|
70
250
|
private _writeStr;
|
|
@@ -75,21 +255,157 @@ export declare class AlbexEngine {
|
|
|
75
255
|
private _indexDocx;
|
|
76
256
|
private _indexXlsx;
|
|
77
257
|
private _indexPdf;
|
|
258
|
+
/**
|
|
259
|
+
* Scanned-PDF OCR fallback. Called from `_indexPdf` when `extractPdf`
|
|
260
|
+
* returns `-2` (image-only PDF) AND `@albex/ocr` has been wired via
|
|
261
|
+
* `enableOcr(engine)`.
|
|
262
|
+
*
|
|
263
|
+
* Walks every page of the PDF, extracts embedded JPEG / JPEG2000 image
|
|
264
|
+
* XObjects, runs each through `engine.ocrImage`, and feeds the recognised
|
|
265
|
+
* text into the index — one paragraph per page so search snippets stay
|
|
266
|
+
* tied to the page they came from.
|
|
267
|
+
*
|
|
268
|
+
* Failure modes handled here (none re-thrown — the goal is best-effort
|
|
269
|
+
* indexing, not all-or-nothing):
|
|
270
|
+
*
|
|
271
|
+
* * A page's `extractPageImages` traps the WASM instance: the instance
|
|
272
|
+
* is discarded so the next PDF starts fresh, and we stop iterating
|
|
273
|
+
* (no more pages can be read from a poisoned instance). The doc is
|
|
274
|
+
* still committed with whatever text we got from earlier pages.
|
|
275
|
+
* * An individual image fails to OCR (Tesseract decode error, JP2 not
|
|
276
|
+
* supported in this browser, etc.): we skip that image and keep
|
|
277
|
+
* going. Partial coverage beats nothing.
|
|
278
|
+
* * A page yields no extractable images (e.g. uses Flate/CCITT/JBIG2):
|
|
279
|
+
* no paragraph is emitted; the page contributes 0 chunks.
|
|
280
|
+
*/
|
|
281
|
+
private _indexPdfScanned;
|
|
282
|
+
/**
|
|
283
|
+
* Walk one page's embedded image XObjects, OCR each image, and return
|
|
284
|
+
* the joined recognised text for that page.
|
|
285
|
+
*
|
|
286
|
+
* Used by:
|
|
287
|
+
* - `_indexPdfScanned`: image-only PDFs (extractPdf returned -2).
|
|
288
|
+
* - `_indexPdf` hybrid path: when `ocrConfig.alwaysExtractEmbeddedImages`
|
|
289
|
+
* is set, every page goes through here on top of the normal text
|
|
290
|
+
* extraction.
|
|
291
|
+
*
|
|
292
|
+
* Returns:
|
|
293
|
+
* - The recognised text (possibly empty if the page has no qualifying
|
|
294
|
+
* images or every OCR call failed).
|
|
295
|
+
* - `null` if the PDF WASM trapped during extractPageImages — the
|
|
296
|
+
* caller should abort the remaining pages because the instance is
|
|
297
|
+
* now poisoned.
|
|
298
|
+
*
|
|
299
|
+
* Failure-handling philosophy: best-effort. An OCR failure on one image
|
|
300
|
+
* does not stop the page; a page with no images does not stop the doc;
|
|
301
|
+
* only a WASM trap stops the doc.
|
|
302
|
+
*/
|
|
303
|
+
private _ocrPageEmbeddedImages;
|
|
304
|
+
/**
|
|
305
|
+
* Last-chance OCR path used when `extractPdf` itself trapped (pdf-extract
|
|
306
|
+
* crashed but lopdf may still be able to walk the file). Re-instantiates
|
|
307
|
+
* the PDF WASM, reloads the input bytes, and tries the image-extraction
|
|
308
|
+
* route directly — bypassing the text codec entirely.
|
|
309
|
+
*
|
|
310
|
+
* Returns:
|
|
311
|
+
* * the doc's chunk count on success (even 0 — that means lopdf could
|
|
312
|
+
* parse but no qualifying images existed, which still beats a hard
|
|
313
|
+
* parse error),
|
|
314
|
+
* * null if the recovery itself failed (binary lacks the image exports,
|
|
315
|
+
* re-instantiation failed, or lopdf also trapped). In the null case
|
|
316
|
+
* the caller throws AlbexParseError so the user sees a clear message.
|
|
317
|
+
*/
|
|
318
|
+
private _indexPdfViaImagesOnly;
|
|
78
319
|
private _indexTxt;
|
|
79
320
|
private _indexXml;
|
|
321
|
+
private _indexMd;
|
|
322
|
+
private _indexHtml;
|
|
323
|
+
private _indexJson;
|
|
324
|
+
private _indexCsv;
|
|
325
|
+
private _indexEml;
|
|
326
|
+
/**
|
|
327
|
+
* Walk the multipart tree until a text/plain section is found. Returns
|
|
328
|
+
* the decoded body as a string, or null if no text/plain part exists.
|
|
329
|
+
*
|
|
330
|
+
* The function is called with the headers and body of the *current*
|
|
331
|
+
* MIME entity (the top-level message at first, then each multipart child
|
|
332
|
+
* on recursion). For single-part entities it inspects the entity's own
|
|
333
|
+
* Content-Transfer-Encoding and decodes accordingly.
|
|
334
|
+
*/
|
|
335
|
+
private _extractEmlTextPlain;
|
|
336
|
+
private _indexRtf;
|
|
80
337
|
private static readonly _INDEXERS;
|
|
81
338
|
/**
|
|
82
339
|
* Index a file. Supported formats: DOCX, XLSX, PDF, TXT, XML.
|
|
83
340
|
* Throws for unsupported formats or parse errors.
|
|
84
341
|
*/
|
|
85
342
|
indexFile(file: File): Promise<IndexedDocument>;
|
|
343
|
+
/**
|
|
344
|
+
* Mark a previously indexed document as removed. Searches no longer return
|
|
345
|
+
* its chunks. Storage is reclaimed only after `compact()`.
|
|
346
|
+
*
|
|
347
|
+
* `id` can be the file name or the contentHash returned by `indexFile`.
|
|
348
|
+
* Returns `true` if a matching document was found and tombstoned.
|
|
349
|
+
*/
|
|
350
|
+
removeDocument(id: string): boolean;
|
|
351
|
+
/**
|
|
352
|
+
* Replace a previously indexed document with new content. Equivalent to
|
|
353
|
+
* `removeDocument(name)` + `indexFile(newFile)` but does not trigger the
|
|
354
|
+
* idempotency check (so re-indexing the *same* bytes after a remove works).
|
|
355
|
+
*/
|
|
356
|
+
replaceDocument(name: string, newFile: File): Promise<IndexedDocument>;
|
|
357
|
+
/**
|
|
358
|
+
* Reclaim storage from previously removed documents. Compacts CHUNKS,
|
|
359
|
+
* TEXT_POOL, DOC_NAMES and NAME_POOL in place. Idempotent.
|
|
360
|
+
*
|
|
361
|
+
* Note: doc_ids of surviving documents are preserved, so any stored
|
|
362
|
+
* references (e.g. in a UI) remain valid.
|
|
363
|
+
*/
|
|
364
|
+
compact(): void;
|
|
86
365
|
/**
|
|
87
366
|
* Search the index. Supports:
|
|
88
367
|
* - Simple queries: `contrato` (AND of tokens, accent-insensitive)
|
|
89
368
|
* - Phrase queries: `"contrato marco"` (must appear as phrase)
|
|
90
369
|
* - OR queries: `contrato | acuerdo` (union of two searches)
|
|
370
|
+
*
|
|
371
|
+
* Pass `{ windowed: true }` to receive cropped snippets with ASCII ellipsis
|
|
372
|
+
* markers instead of full chunk text. Defaults: 60 bytes before, 120 after.
|
|
91
373
|
*/
|
|
92
|
-
search(query: string): SearchResult[];
|
|
374
|
+
search(query: string, opts?: SearchOptions): SearchResult[];
|
|
375
|
+
/**
|
|
376
|
+
* Cooperative search. Processes the corpus in slices, yielding to the
|
|
377
|
+
* event loop between them so the host UI thread keeps a chance to paint
|
|
378
|
+
* even while a long scan is in flight.
|
|
379
|
+
*
|
|
380
|
+
* NOTE: this is NOT incremental streaming. Results are materialised
|
|
381
|
+
* once the search completes and then iterated out in score-descending
|
|
382
|
+
* order. The async iterator shape is preserved because the work that
|
|
383
|
+
* produces those results genuinely yields to the scheduler between
|
|
384
|
+
* slices — a future iteration may stream individual results before the
|
|
385
|
+
* heap sorts, but doing so today would deliver them in arbitrary order.
|
|
386
|
+
*
|
|
387
|
+
* Pass `opts.frameBudgetMs` to control the slice size (default 8 ms).
|
|
388
|
+
*/
|
|
389
|
+
searchCooperative(query: string, opts?: SearchOptions): AsyncIterable<SearchResult>;
|
|
390
|
+
/**
|
|
391
|
+
* @deprecated Renamed to `searchCooperative` in 0.3.0. The original name
|
|
392
|
+
* was misleading — this method does not stream incremental results, it
|
|
393
|
+
* yields to the scheduler between slices and returns a batch. The alias
|
|
394
|
+
* keeps existing integrations working; it will be removed in 0.4.0.
|
|
395
|
+
*/
|
|
396
|
+
searchStream(query: string, opts?: SearchOptions): AsyncIterable<SearchResult>;
|
|
397
|
+
/**
|
|
398
|
+
* Drive a resumable search until done, yielding to the scheduler when the
|
|
399
|
+
* frame budget is exceeded. Returns the materialised result array.
|
|
400
|
+
*
|
|
401
|
+
* Heuristic: each call to `searchSlice` processes a chunk batch, then we
|
|
402
|
+
* check elapsed time. The batch size doubles up to a cap to amortise the
|
|
403
|
+
* JS<->WASM overhead on fast machines; on slow machines a single batch
|
|
404
|
+
* may eat the entire budget, which is also fine.
|
|
405
|
+
*/
|
|
406
|
+
private _runSearchBudgeted;
|
|
407
|
+
/** Materialise results [0..count) into the public SearchResult shape. */
|
|
408
|
+
private _collectResults;
|
|
93
409
|
private _searchOr;
|
|
94
410
|
private _runSearch;
|
|
95
411
|
/** Returns current engine statistics. */
|
|
@@ -104,7 +420,53 @@ export declare class AlbexEngine {
|
|
|
104
420
|
setMaxErrors(errors: 0 | 1 | 2 | 3): void;
|
|
105
421
|
setThreshold(threshold: number): void;
|
|
106
422
|
setMaxResults(max: number): void;
|
|
423
|
+
/**
|
|
424
|
+
* Enable or disable query stemming.
|
|
425
|
+
*
|
|
426
|
+
* - `'off'` (default): tokens are used as-is. Strict matching.
|
|
427
|
+
* - `'es'`: Spanish stemmer applied to query tokens before search. A query
|
|
428
|
+
* for `"contratos"` matches `"contrato"` and vice versa.
|
|
429
|
+
*
|
|
430
|
+
* Indexed text is never stemmed, so snippets remain faithful to the
|
|
431
|
+
* source. Recall improvement comes from queries reducing to shared prefixes.
|
|
432
|
+
*/
|
|
433
|
+
setLanguage(lang: 'off' | 'es'): void;
|
|
107
434
|
/** Full reset — clears all indexed documents and chunks. */
|
|
108
435
|
reset(): void;
|
|
436
|
+
/**
|
|
437
|
+
* Persist the current index to OPFS (or IndexedDB as fallback) under `name`.
|
|
438
|
+
*
|
|
439
|
+
* The snapshot includes every chunk, document name and text byte currently
|
|
440
|
+
* indexed. Subsequent `load(name)` calls restore the engine to this exact
|
|
441
|
+
* state in roughly O(total bytes), bypassing re-parsing.
|
|
442
|
+
*/
|
|
443
|
+
save(name: string): Promise<void>;
|
|
444
|
+
/**
|
|
445
|
+
* Restore an index previously saved with `save(name)`. Returns `true` on
|
|
446
|
+
* success, `false` if the snapshot is missing or has an incompatible
|
|
447
|
+
* header (wrong magic, version, or struct sizes).
|
|
448
|
+
*/
|
|
449
|
+
load(name: string): Promise<boolean>;
|
|
450
|
+
/**
|
|
451
|
+
* Convenience: load if the snapshot exists, otherwise leave the engine
|
|
452
|
+
* empty. Returns whether a load actually happened.
|
|
453
|
+
*/
|
|
454
|
+
loadOrInit(name: string): Promise<boolean>;
|
|
455
|
+
/** Delete a previously persisted snapshot. */
|
|
456
|
+
deleteSnapshot(name: string): Promise<void>;
|
|
457
|
+
/** List names of persisted snapshots in the current origin. */
|
|
458
|
+
listSnapshots(): Promise<string[]>;
|
|
459
|
+
/**
|
|
460
|
+
* TC39 explicit-resource-management hook (Stage 3 in 2026). Lets the engine
|
|
461
|
+
* be used with `using` so the references are released deterministically:
|
|
462
|
+
*
|
|
463
|
+
* using engine = new AlbexEngine(opts); await engine.init();
|
|
464
|
+
*
|
|
465
|
+
* WebAssembly does not actually expose a way to release linear memory pages
|
|
466
|
+
* inside a Module instance, so we drop our references to the exports and
|
|
467
|
+
* the doc list. GC can then reclaim the engine, which in turn releases the
|
|
468
|
+
* WASM instance and its (typically 20 MB) backing memory.
|
|
469
|
+
*/
|
|
470
|
+
[Symbol.dispose](): void;
|
|
109
471
|
}
|
|
110
472
|
//# sourceMappingURL=albex.d.ts.map
|
package/dist/albex.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"albex.d.ts","sourceRoot":"","sources":["../src/albex.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;
|
|
1
|
+
{"version":3,"file":"albex.d.ts","sourceRoot":"","sources":["../src/albex.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAoBH,OAAO,EAAyC,KAAK,IAAI,EAAsB,MAAM,cAAc,CAAC;AAIpG,OAAO,EACL,UAAU,EACV,cAAc,EACd,2BAA2B,EAC3B,eAAe,EACf,kBAAkB,GACnB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAClE,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACtF,YAAY,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAC3D,YAAY,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACzE,OAAO,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AAClD,YAAY,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AACxE,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,YAAY,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AAwB5D,MAAM,WAAW,YAAY;IAC3B;;;;;;OAMG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,8EAA8E;IAC9E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,KAAK,CAAC;IACvC;;;;;OAKG;IACH,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,KAAK,CAAC;IAC7B;;;;;;OAMG;IACH,GAAG,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,KAAK,CAAC;IAC5B;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,6EAA6E;IAC7E,KAAK,EAAE,MAAM,CAAC;IACd,sEAAsE;IACtE,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,oEAAoE;IACpE,KAAK,EAAE,MAAM,CAAC;IACd,8EAA8E;IAC9E,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,gEAAgE;IAChE,QAAQ,EAAE,MAAM,CAAC;IACjB,8BAA8B;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd;;0DAEsD;IACtD,OAAO,EAAE,MAAM,CAAC;IAChB,qFAAqF;IACrF,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,4EAA4E;IAC5E,OAAO,EAAE,SAAS,EAAE,CAAC;CACtB;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;;;;;OAOG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,WAAW;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,+CAA+C;IAC/C,IAAI,EAAE,IAAI,GAAG,IAAI,CAAC;IAClB,uDAAuD;IACvD,SAAS,EAAE,MAAM,CAAC;IAClB,0DAA0D;IAC1D,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;CACtB;AAgbD;;;;GAIG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,qBAAa,WAAW;IAEtB,OAAO,CAAC,KAAK,CAAoB;IACjC,OAAO,CAAC,IAAI,CAAsB;IAElC;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,kBAAkB,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAErF;;;;;;;;;;;;;OAaG;IACH,SAAS,CAAC,EAAE;QAAE,2BAA2B,CAAC,EAAE,OAAO,CAAA;KAAE,CAAC;IAGtD,OAAO,CAAC,QAAQ,CAAgC;IAChD,OAAO,CAAC,OAAO,CAAmC;IAElD,OAAO,CAAC,KAAK,CAAyB;IACtC,OAAO,CAAC,WAAW,CAA4B;IAC/C,OAAO,CAAC,KAAK,CAAqB;IAClC,OAAO,CAAC,KAAK,CAAkB;IAC/B,OAAO,CAAC,QAAQ,CAA8B;IAC9C,OAAO,CAAC,UAAU,CAA8B;IAChD,OAAO,CAAC,IAAI,CAAyB;IACrC,OAAO,CAAC,sBAAsB,CAAK;IACnC,OAAO,CAAC,qBAAqB,CAA6B;IAC1D,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAe;gBAEzB,IAAI,EAAE,YAAY;IAI9B,wFAAwF;IAClF,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAwB3B;;;;;;;;;;;;;;;;;;OAkBG;YACW,eAAe;IAwC7B,yEAAyE;IACzE,IAAI,IAAI,IAAI,IAAI,GAAG,IAAI,CAAuB;IAE9C,sDAAsD;IACtD,IAAI,WAAW,IAAI,OAAO,CAAuB;IAEjD,2EAA2E;IAC3E,IAAI,UAAU,IAAI,OAAO,CAAmC;IAI5D;;;;;;;;OAQG;IACH,OAAO,CAAC,gBAAgB;IAUxB;;;;;;;;OAQG;YACW,aAAa;IAsC3B,OAAO,CAAC,GAAG;IAIX,OAAO,CAAC,SAAS;IAOjB,OAAO,CAAC,SAAS;IAMjB,OAAO,CAAC,QAAQ;IAKhB,OAAO,CAAC,SAAS;IASjB,OAAO,CAAC,aAAa;YAWP,cAAc;YA2Bd,UAAU;YAQV,UAAU;YAoBV,SAAS;IAyHvB;;;;;;;;;;;;;;;;;;;;;;OAsBG;YACW,gBAAgB;IAe9B;;;;;;;;;;;;;;;;;;;;OAoBG;YACW,sBAAsB;IAuEpC;;;;;;;;;;;;;OAaG;YACW,sBAAsB;YAuCtB,SAAS;YAWT,SAAS;YAkBT,QAAQ;YAgCR,UAAU;YA4BV,UAAU;YA4BV,SAAS;YAuDT,SAAS;IA8BvB;;;;;;;;OAQG;IACH,OAAO,CAAC,oBAAoB;YAqEd,SAAS;IAiGvB,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAc/B;IAIF;;;OAGG;IACG,SAAS,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,eAAe,CAAC;IAiDrD;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO;IAUnC;;;;OAIG;IACG,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,GAAG,OAAO,CAAC,eAAe,CAAC;IAS5E;;;;;;OAMG;IACH,OAAO,IAAI,IAAI;IAIf;;;;;;;;OAQG;IACH,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB,GAAG,YAAY,EAAE;IAgB/D;;;;;;;;;;;;;OAaG;IACI,iBAAiB,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB,GAAG,aAAa,CAAC,YAAY,CAAC;IA+B9F;;;;;OAKG;IACI,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,aAAkB,GAAG,aAAa,CAAC,YAAY,CAAC;IAKzF;;;;;;;;OAQG;YACW,kBAAkB;IA8EhC,yEAAyE;IACzE,OAAO,CAAC,eAAe;IAyDvB,OAAO,CAAC,SAAS;IAmBjB,OAAO,CAAC,UAAU;IA6ElB,yCAAyC;IACzC,QAAQ,IAAI,WAAW;IAavB,0DAA0D;IAC1D,kBAAkB,IAAI,WAAW,GAAG,IAAI;IAIxC,6CAA6C;IAC7C,IAAI,SAAS,IAAI,SAAS,eAAe,EAAE,CAE1C;IAED,iCAAiC;IACjC,MAAM,KAAK,mBAAmB,IAAI,MAAM,EAAE,CAEzC;IAED,oCAAoC;IACpC,YAAY,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI;IAIzC,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAIrC,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAIhC;;;;;;;;;OASG;IACH,WAAW,CAAC,IAAI,EAAE,KAAK,GAAG,IAAI,GAAG,IAAI;IAIrC,4DAA4D;IAC5D,KAAK,IAAI,IAAI;IAQb;;;;;;OAMG;IACG,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAqBvC;;;;OAIG;IACG,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IA8E1C;;;OAGG;IACG,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAMhD,8CAA8C;IACxC,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAIjD,+DAA+D;IACzD,aAAa,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;IAIxC;;;;;;;;;;OAUG;IACH,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,IAAI;CAazB"}
|