albex 0.1.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/CHANGELOG.md +416 -0
  2. package/README.md +244 -112
  3. package/dist/albex-worker.d.ts +70 -0
  4. package/dist/albex-worker.d.ts.map +1 -0
  5. package/dist/albex-worker.js +153 -0
  6. package/dist/albex-worker.js.map +1 -0
  7. package/dist/albex.d.ts +508 -6
  8. package/dist/albex.d.ts.map +1 -1
  9. package/dist/albex.js +1911 -141
  10. package/dist/albex.js.map +1 -1
  11. package/dist/errors.d.ts +52 -0
  12. package/dist/errors.d.ts.map +1 -0
  13. package/dist/errors.js +66 -0
  14. package/dist/errors.js.map +1 -0
  15. package/dist/gpu/bloom-runtime.d.ts +60 -0
  16. package/dist/gpu/bloom-runtime.d.ts.map +1 -0
  17. package/dist/gpu/bloom-runtime.js +176 -0
  18. package/dist/gpu/bloom-runtime.js.map +1 -0
  19. package/dist/gpu/bloom-shader.wgsl.d.ts +19 -0
  20. package/dist/gpu/bloom-shader.wgsl.d.ts.map +1 -0
  21. package/dist/gpu/bloom-shader.wgsl.js +49 -0
  22. package/dist/gpu/bloom-shader.wgsl.js.map +1 -0
  23. package/dist/persistence.d.ts +21 -0
  24. package/dist/persistence.d.ts.map +1 -0
  25. package/dist/persistence.js +174 -0
  26. package/dist/persistence.js.map +1 -0
  27. package/dist/pool/coordinator.d.ts +98 -0
  28. package/dist/pool/coordinator.d.ts.map +1 -0
  29. package/dist/pool/coordinator.js +247 -0
  30. package/dist/pool/coordinator.js.map +1 -0
  31. package/dist/profile.d.ts +100 -0
  32. package/dist/profile.d.ts.map +1 -0
  33. package/dist/profile.js +200 -0
  34. package/dist/profile.js.map +1 -0
  35. package/dist/resource-manager.d.ts +56 -0
  36. package/dist/resource-manager.d.ts.map +1 -0
  37. package/dist/resource-manager.js +138 -0
  38. package/dist/resource-manager.js.map +1 -0
  39. package/dist/tiered-store.d.ts +98 -0
  40. package/dist/tiered-store.d.ts.map +1 -0
  41. package/dist/tiered-store.js +238 -0
  42. package/dist/tiered-store.js.map +1 -0
  43. package/dist/wasm-bindings.d.ts +180 -0
  44. package/dist/wasm-bindings.d.ts.map +1 -0
  45. package/dist/wasm-bindings.js +128 -0
  46. package/dist/wasm-bindings.js.map +1 -0
  47. package/dist/worker-protocol.d.ts +86 -0
  48. package/dist/worker-protocol.d.ts.map +1 -0
  49. package/dist/worker-protocol.js +20 -0
  50. package/dist/worker-protocol.js.map +1 -0
  51. package/dist/worker-runtime.d.ts +14 -0
  52. package/dist/worker-runtime.d.ts.map +1 -0
  53. package/dist/worker-runtime.js +109 -0
  54. package/dist/worker-runtime.js.map +1 -0
  55. package/package.json +60 -13
  56. package/src/albex-worker.ts +187 -0
  57. package/src/albex.ts +2136 -189
  58. package/src/errors.ts +76 -0
  59. package/src/gpu/bloom-runtime.ts +229 -0
  60. package/src/gpu/bloom-shader.wgsl.ts +48 -0
  61. package/src/persistence.ts +175 -0
  62. package/src/pool/coordinator.ts +324 -0
  63. package/src/profile.ts +280 -0
  64. package/src/resource-manager.ts +167 -0
  65. package/src/tiered-store.ts +259 -0
  66. package/src/wasm-bindings.ts +349 -0
  67. package/src/worker-protocol.ts +48 -0
  68. package/src/worker-runtime.ts +106 -0
  69. package/wasm/pkg/albex_pdf.wasm +0 -0
  70. package/wasm/pkg/albex_wasm.wasm +0 -0
  71. package/wasm/pkg/albex_wasm_bg.wasm +0 -0
  72. package/wasm/pkg/albex_wasm_simd.wasm +0 -0
@@ -0,0 +1,349 @@
1
+ /**
2
+ * Typed interfaces for the two WASM modules Albex ships with.
3
+ *
4
+ * These types replace ad-hoc `as Function` casts and give us:
5
+ * - argument/return type checking at call sites,
6
+ * - autocompletion in IDEs,
7
+ * - safe refactors when an export name or signature changes.
8
+ *
9
+ * The interfaces mirror the `#[no_mangle] pub extern "C" fn` exports
10
+ * in `wasm/src/lib.rs` and `pdf-wasm/src/lib.rs`.
11
+ */
12
+
13
+ // ─────────────────────────────────────────────────────────────────────────────
14
+ // Main WASM module (albex_wasm_bg.wasm)
15
+ // ─────────────────────────────────────────────────────────────────────────────
16
+
17
+ export interface AlbexWasmExports {
18
+ readonly memory: WebAssembly.Memory;
19
+
20
+ // ABI / lifecycle
21
+ abiVersion(): number;
22
+ getBuffer(size: number): number;
23
+ init(): void;
24
+
25
+ /** Reset the streaming FNV-1a 64-bit hash state. Optional on the first
26
+ * hash of a session because the static initialiser is also FNV_OFFSET. */
27
+ hashBegin(): void;
28
+ /** Fold `len` bytes of scratchpad into the streaming hash. May be
29
+ * called repeatedly for files larger than SCRATCHPAD_SIZE. */
30
+ hashFeed(len: number): void;
31
+ /** Write the final 8 raw big-endian bytes at scratchpad[0..8] and
32
+ * reset the state so the next hash can start without an explicit Begin. */
33
+ hashFinish(): void;
34
+
35
+ // Document ingestion
36
+ setDocumentName(len: number): void;
37
+ beginDocument(): number;
38
+ feedXmlBytes(len: number): void;
39
+ endDocument(): number;
40
+
41
+ // XLSX
42
+ beginXlsx(): number;
43
+ feedXlsxBytes(len: number): void;
44
+
45
+ // Generic text path (PDF, TXT, XML, future formats)
46
+ feedText(len: number): void;
47
+ flushParagraph(): void;
48
+
49
+ // Search configuration
50
+ setMaxErrors(errors: number): void;
51
+ setThreshold(threshold: number): void;
52
+ setMaxResults(max: number): void;
53
+
54
+ // Query parsing (since ABI v2). Single source of truth for tokenization.
55
+ prepareQuery(len: number): number;
56
+ getQueryKind(): number;
57
+ getQueryBranchCount(): number;
58
+ getQueryBranchPattern(i: number): number;
59
+ selectQueryBranch(i: number): number;
60
+
61
+ // Search execution
62
+ setPattern(len: number): number;
63
+ search(): number;
64
+ // Resumable search (used by frame-budgeted searches and worker shards)
65
+ searchBegin(): number;
66
+ searchSlice(maxChunks: number): number;
67
+ getSearchCursor(): number;
68
+ getSearchTotal(): number;
69
+
70
+ // Result accessors
71
+ getResultCount(): number;
72
+ getResultDocId(i: number): number;
73
+ getResultLocation(i: number): number;
74
+ getResultScore(i: number): number;
75
+ getResultStart(i: number): number;
76
+ getResultEnd(i: number): number;
77
+ getResultChunkIdx(i: number): number;
78
+ getResultDocName(i: number): number;
79
+ getResultMatchCount(i: number): number;
80
+ getResultMatchStartAt(i: number, k: number): number;
81
+ getResultMatchEndAt(i: number, k: number): number;
82
+ getSnippet(i: number): number;
83
+ getSnippetWindow(i: number, before: number, after: number): number;
84
+ getSnippetWindowOffset(): number;
85
+
86
+ // Stats
87
+ getStatBloomTested(): number;
88
+ getStatBloomPassed(): number;
89
+ getStatBitapMatched(): number;
90
+ getChunkCount(): number;
91
+ getDocCount(): number;
92
+ getTextUsed(): number;
93
+ getTextCapacity(): number;
94
+ /** Bitflags of capacity limits hit during the most recent
95
+ * begin..endDocument cycle: 1 = chunks, 2 = text, 4 = docs, 8 = names.
96
+ * 0 = everything fit. Read by the host right after endDocument to raise a
97
+ * typed AlbexCapacityError instead of silently truncating the corpus. */
98
+ getLastIndexOverflow(): number;
99
+
100
+ // Snapshot / restore (v3 protocol; v1 and v2 still load)
101
+ snapshotSize(): number;
102
+ snapshotChunk(offset: number, maxLen: number): number;
103
+ /** Validate header. For v3 also reserves the staging buffer; state is
104
+ * NOT touched until restoreCommit succeeds. For v1/v2 (legacy) state is
105
+ * reset and counters are written immediately. */
106
+ restoreBegin(): number;
107
+ /** Feed payload bytes. For v3 they accumulate into staging; for v1/v2
108
+ * they are written straight to the state arrays as before. */
109
+ restoreFeed(len: number): number;
110
+ /** Atomic commit for v3 snapshots. Returns 1 if the staged payload was
111
+ * complete and decoded successfully; 0 otherwise — and in the 0 case
112
+ * the previous engine state is preserved. For v1/v2 this is a no-op
113
+ * that always returns 1. */
114
+ restoreCommit(): number;
115
+
116
+ // Incremental / per-doc
117
+ getDocId(index: number): number;
118
+ getDocChunkCount(index: number): number;
119
+ getDocName(index: number): number;
120
+ isDocDeleted(index: number): number;
121
+ removeDocument(docId: number): number;
122
+ compact(): void;
123
+
124
+ /**
125
+ * Per-document content hash (snapshot v2). Returns a pointer to 8 bytes
126
+ * holding the FNV-1a 64-bit hash of the original file bytes, or 0 if the
127
+ * doc index is out of range. All-zero bytes mean "hash not available"
128
+ * — either the host never called setDocumentContentHash for this doc
129
+ * (legacy code path) or it was restored from a v1 snapshot.
130
+ */
131
+ getDocContentHashPtr(index: number): number;
132
+ /** Always returns 8. Useful as a runtime feature-detect: older binaries
133
+ * without snapshot v2 will not export this function at all. */
134
+ getDocContentHashLen(): number;
135
+ /** Copy a content hash (up to 8 bytes from the scratchpad) into the
136
+ * pending slot. endDocument() then writes it into doc_hashes[]. */
137
+ setDocumentContentHash(len: number): void;
138
+
139
+ // Stemming
140
+ setLanguage(lang: number): void;
141
+
142
+ // Tier identification
143
+ getTier(): number; // 1=mini, 2=std, 3=pro
144
+ getMaxChunks(): number;
145
+ getMaxDocs(): number;
146
+ getNameCapacity(): number;
147
+
148
+ // GPU bridge (CD1): zero-copy access to chunk array + candidate mask
149
+ getChunksPtr(): number;
150
+ getChunkStructSize(): number;
151
+ setCandidateMask(byteLen: number): void;
152
+ clearCandidateMask(): void;
153
+ }
154
+
155
+ // ─────────────────────────────────────────────────────────────────────────────
156
+ // PDF WASM module (albex_pdf.wasm)
157
+ // ─────────────────────────────────────────────────────────────────────────────
158
+
159
+ export interface AlbexPdfExports {
160
+ readonly memory: WebAssembly.Memory;
161
+
162
+ /** ABI version of the PDF module. The host loader refuses any binary
163
+ * whose abiVersion is outside the supported range. */
164
+ abiVersion(): number;
165
+
166
+ /** Reserve `len` bytes inside the PDF module and return a pointer. */
167
+ allocInput(len: number): number;
168
+
169
+ /**
170
+ * Parse the PDF. Returns:
171
+ * N ≥ 0 — page count,
172
+ * -1 — parse error (read with getErrorPtr/getErrorLen),
173
+ * -2 — image-only / no extractable text.
174
+ *
175
+ * When `-2` is returned, the host can fall through to the scanned-PDF
176
+ * path via `getPageCount` + `extractPageImages`.
177
+ */
178
+ extractPdf(len: number): number;
179
+
180
+ getPageLen(page: number): number;
181
+ getPagePtr(page: number): number;
182
+ getErrorLen(): number;
183
+ getErrorPtr(): number;
184
+
185
+ // ── Scanned-PDF path (extracts embedded image XObjects) ──────────────────
186
+ //
187
+ // Available since pdf-wasm 0.2. Older binaries built before this addition
188
+ // will not expose these exports — `asAlbexPdfExports` keeps them typed,
189
+ // but the engine should feature-detect at runtime before relying on them.
190
+
191
+ /** Total page count of the loaded PDF; 0 if the input cannot be parsed. */
192
+ getPageCount(): number;
193
+
194
+ /**
195
+ * Extract every supported image XObject on page `page` (0-based) into the
196
+ * module's internal buffer.
197
+ *
198
+ * Returns:
199
+ * N ≥ 0 — number of images extracted on this page,
200
+ * -1 — parse error or page index out of range.
201
+ *
202
+ * Each extracted image is one of:
203
+ * * JPEG (kind = 1, from a `/DCTDecode` filter), or
204
+ * * JPEG2000 (kind = 2, from a `/JPXDecode` filter).
205
+ *
206
+ * Filters that require Rust-side reconstruction (`FlateDecode`,
207
+ * `CCITTFaxDecode`, `JBIG2Decode`) are intentionally skipped — they
208
+ * would roughly double the binary size for ~5 % more coverage.
209
+ */
210
+ extractPageImages(page: number): number;
211
+
212
+ /** Byte length of extracted image `i` (from the last `extractPageImages`). */
213
+ getPageImageLen(i: number): number;
214
+
215
+ /** Pointer to extracted image `i`'s raw bytes. */
216
+ getPageImagePtr(i: number): number;
217
+
218
+ /** Format tag for extracted image `i`: 1 = JPEG, 2 = JPEG2000, 0 = none. */
219
+ getPageImageKind(i: number): number;
220
+ }
221
+
222
+ // ─────────────────────────────────────────────────────────────────────────────
223
+ // Runtime validators
224
+ // ─────────────────────────────────────────────────────────────────────────────
225
+ //
226
+ // These replace the pre-0.5.0 `as unknown as` casts. They check three
227
+ // things at instantiation time:
228
+ // 1. memory is a WebAssembly.Memory instance.
229
+ // 2. abiVersion() returns a number inside the supported range.
230
+ // 3. every required export exists and is a function.
231
+ //
232
+ // If any of these fails, the loader throws a typed error before the
233
+ // engine returns from init(). This eliminates the audit 3.2 issue:
234
+ // previously a missing export only surfaced when its call site ran.
235
+
236
+ /** Range of ABI versions this host code understands for the main module.
237
+ * Update both ends together with the Rust `abiVersion()` constant when
238
+ * the export surface changes. */
239
+ // 0.6.0 requires ABI 3 (trigram pre-filter + getLastIndexOverflow). The
240
+ // required-exports list below already makes any older binary fail the
241
+ // missing-exports check, so a tolerant lower bound was dead code — the range
242
+ // is pinned to the one ABI this host actually speaks (audit 0.6.0, finding #7).
243
+ const MAIN_ABI_MIN = 3;
244
+ const MAIN_ABI_MAX = 3;
245
+
246
+ /** Range of ABI versions for the PDF module. */
247
+ const PDF_ABI_MIN = 1;
248
+ const PDF_ABI_MAX = 3;
249
+
250
+ /** Required function names on the main WASM. Adding a new one here forces
251
+ * the validator to check it; removing one is a breaking ABI bump. */
252
+ const MAIN_REQUIRED = [
253
+ 'abiVersion', 'getBuffer', 'init',
254
+ 'setDocumentName', 'beginDocument', 'feedXmlBytes', 'endDocument',
255
+ 'beginXlsx', 'feedXlsxBytes',
256
+ 'feedText', 'flushParagraph',
257
+ 'setMaxErrors', 'setThreshold', 'setMaxResults',
258
+ 'prepareQuery', 'getQueryKind', 'getQueryBranchCount',
259
+ 'getQueryBranchPattern', 'selectQueryBranch',
260
+ 'setPattern', 'search',
261
+ 'searchBegin', 'searchSlice', 'getSearchCursor', 'getSearchTotal',
262
+ 'getResultCount',
263
+ 'getResultDocId', 'getResultLocation', 'getResultScore',
264
+ 'getResultStart', 'getResultEnd', 'getResultChunkIdx',
265
+ 'getResultDocName', 'getResultMatchCount',
266
+ 'getResultMatchStartAt', 'getResultMatchEndAt',
267
+ 'getSnippet', 'getSnippetWindow', 'getSnippetWindowOffset',
268
+ 'getStatBloomTested', 'getStatBloomPassed', 'getStatBitapMatched',
269
+ 'getChunkCount', 'getDocCount', 'getTextUsed', 'getTextCapacity',
270
+ 'getLastIndexOverflow',
271
+ 'snapshotSize', 'snapshotChunk',
272
+ 'restoreBegin', 'restoreFeed', 'restoreCommit',
273
+ 'getDocId', 'getDocChunkCount', 'getDocName', 'isDocDeleted',
274
+ 'removeDocument', 'compact',
275
+ 'setLanguage',
276
+ 'getTier', 'getMaxChunks', 'getMaxDocs', 'getNameCapacity',
277
+ 'getChunksPtr', 'getChunkStructSize',
278
+ 'setCandidateMask', 'clearCandidateMask',
279
+ 'getDocContentHashPtr', 'getDocContentHashLen', 'setDocumentContentHash',
280
+ 'hashBegin', 'hashFeed', 'hashFinish',
281
+ ] as const;
282
+
283
+ const PDF_REQUIRED = [
284
+ 'abiVersion', 'allocInput', 'extractPdf',
285
+ 'getPageLen', 'getPagePtr', 'getErrorLen', 'getErrorPtr',
286
+ 'getPageCount', 'extractPageImages',
287
+ 'getPageImageLen', 'getPageImagePtr', 'getPageImageKind',
288
+ ] as const;
289
+
290
+ /** Thrown when an instantiated WASM module fails the ABI contract. */
291
+ export class AlbexAbiMismatchError extends Error {
292
+ readonly module: 'main' | 'pdf';
293
+ readonly missing?: readonly string[];
294
+ readonly version?: number;
295
+ constructor(module: 'main' | 'pdf', message: string, opts?: { missing?: readonly string[]; version?: number }) {
296
+ super(message);
297
+ this.name = 'AlbexAbiMismatchError';
298
+ this.module = module;
299
+ if (opts?.missing) this.missing = opts.missing;
300
+ if (opts?.version !== undefined) this.version = opts.version;
301
+ }
302
+ }
303
+
304
+ function validateExports(
305
+ exports: WebAssembly.Exports,
306
+ required: readonly string[],
307
+ module: 'main' | 'pdf',
308
+ abiMin: number,
309
+ abiMax: number,
310
+ ): void {
311
+ const mem = (exports as Record<string, unknown>)['memory'];
312
+ if (!(mem instanceof WebAssembly.Memory)) {
313
+ throw new AlbexAbiMismatchError(module, `${module}: \`memory\` is missing or not a WebAssembly.Memory instance.`);
314
+ }
315
+ const missing: string[] = [];
316
+ for (const name of required) {
317
+ if (typeof (exports as Record<string, unknown>)[name] !== 'function') missing.push(name);
318
+ }
319
+ if (missing.length) {
320
+ throw new AlbexAbiMismatchError(
321
+ module,
322
+ `${module}: WASM binary missing required exports: ${missing.join(', ')}. ` +
323
+ `The .wasm was built with an incompatible source — rebuild with the current toolchain.`,
324
+ { missing },
325
+ );
326
+ }
327
+ const version = ((exports as Record<string, unknown>)['abiVersion'] as () => number)();
328
+ if (version < abiMin || version > abiMax) {
329
+ throw new AlbexAbiMismatchError(
330
+ module,
331
+ `${module}: abiVersion ${version} outside supported range [${abiMin}..${abiMax}]. ` +
332
+ `The host TypeScript expects a different binary — upgrade albex or rebuild the WASM.`,
333
+ { version },
334
+ );
335
+ }
336
+ }
337
+
338
+ /** Validate and narrow `WebAssembly.Exports` to the typed Albex main
339
+ * interface. Throws `AlbexAbiMismatchError` if the contract is broken. */
340
+ export function asAlbexExports(exports: WebAssembly.Exports): AlbexWasmExports {
341
+ validateExports(exports, MAIN_REQUIRED, 'main', MAIN_ABI_MIN, MAIN_ABI_MAX);
342
+ return exports as unknown as AlbexWasmExports;
343
+ }
344
+
345
+ /** Validate and narrow `WebAssembly.Exports` to the typed PDF interface. */
346
+ export function asAlbexPdfExports(exports: WebAssembly.Exports): AlbexPdfExports {
347
+ validateExports(exports, PDF_REQUIRED, 'pdf', PDF_ABI_MIN, PDF_ABI_MAX);
348
+ return exports as unknown as AlbexPdfExports;
349
+ }
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Wire protocol between the main thread and the AlbexEngineWorker runtime.
3
+ *
4
+ * One request/response pair per call, identified by `id`. The runtime is
5
+ * single-threaded so we serialise requests on the main side (one in-flight
6
+ * call at a time per worker) — keeps the protocol trivial and matches the
7
+ * actual constraint of `static mut` WASM state.
8
+ *
9
+ * `Transferable` is opt-in per op; we use it for `indexFile` to avoid
10
+ * copying the file bytes into the worker.
11
+ */
12
+
13
+ import type { AlbexOptions, IndexedDocument, SearchOptions, SearchResult, EngineStats, SearchStats } from './albex.js';
14
+
15
+ export type WorkerOp =
16
+ | { kind: 'init'; opts: AlbexOptions }
17
+ | { kind: 'indexFile'; name: string; buffer: ArrayBuffer }
18
+ | { kind: 'search'; query: string; options: SearchOptions }
19
+ | { kind: 'removeDocument'; id: string }
20
+ | { kind: 'compact' }
21
+ | { kind: 'reset' }
22
+ | { kind: 'getStats' }
23
+ | { kind: 'getLastSearchStats' }
24
+ | { kind: 'getDocuments' }
25
+ | { kind: 'setMaxErrors'; n: 0 | 1 | 2 | 3 }
26
+ | { kind: 'setThreshold'; n: number }
27
+ | { kind: 'setMaxResults'; n: number }
28
+ | { kind: 'setLanguage'; lang: 'off' | 'es' }
29
+ | { kind: 'save'; name: string }
30
+ | { kind: 'load'; name: string }
31
+ | { kind: 'loadOrInit'; name: string }
32
+ | { kind: 'deleteSnapshot'; name: string }
33
+ | { kind: 'listSnapshots' };
34
+
35
+ export interface WorkerRequest {
36
+ id: number;
37
+ op: WorkerOp;
38
+ }
39
+
40
+ export type WorkerResponse =
41
+ | { id: number; ok: true; result: unknown }
42
+ | { id: number; ok: false; error: { name: string; kind?: string; message: string } };
43
+
44
+ export type IndexFileResult = IndexedDocument;
45
+ export type SearchResultArr = SearchResult[];
46
+ export type StatsResult = EngineStats;
47
+ export type SearchStatsRes = SearchStats | null;
48
+ export type DocsResult = readonly IndexedDocument[];
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Albex worker runtime.
3
+ *
4
+ * Loads inside a Web Worker, instantiates an `AlbexEngine`, and serves the
5
+ * wire protocol from `worker-protocol.ts`. Designed to be referenced as:
6
+ *
7
+ * new Worker(new URL('./worker-runtime.js', import.meta.url),
8
+ * { type: 'module' });
9
+ *
10
+ * The runtime is intentionally side-effectful (registers `onmessage` at
11
+ * import time). It is not meant to be imported from the main thread.
12
+ */
13
+
14
+ import { AlbexEngine, AlbexError } from './albex.js';
15
+ import type { WorkerRequest, WorkerResponse, WorkerOp } from './worker-protocol.js';
16
+
17
+ let engine: AlbexEngine | null = null;
18
+
19
+ function ensureEngine(): AlbexEngine {
20
+ if (!engine) throw new Error('Worker runtime: init() not called yet');
21
+ return engine;
22
+ }
23
+
24
+ async function dispatch(op: WorkerOp): Promise<unknown> {
25
+ switch (op.kind) {
26
+ case 'init': {
27
+ engine = new AlbexEngine(op.opts);
28
+ await engine.init();
29
+ return undefined;
30
+ }
31
+ case 'indexFile': {
32
+ // Wrap the transferred buffer in a File-like object so existing
33
+ // indexers work unchanged.
34
+ const file = new File([op.buffer], op.name);
35
+ return ensureEngine().indexFile(file);
36
+ }
37
+ case 'search':
38
+ return ensureEngine().search(op.query, op.options);
39
+ case 'removeDocument':
40
+ return ensureEngine().removeDocument(op.id);
41
+ case 'compact':
42
+ ensureEngine().compact();
43
+ return undefined;
44
+ case 'reset':
45
+ ensureEngine().reset();
46
+ return undefined;
47
+ case 'getStats':
48
+ return ensureEngine().getStats();
49
+ case 'getLastSearchStats':
50
+ return ensureEngine().getLastSearchStats();
51
+ case 'getDocuments':
52
+ return ensureEngine().documents.slice();
53
+ case 'setMaxErrors':
54
+ ensureEngine().setMaxErrors(op.n);
55
+ return undefined;
56
+ case 'setThreshold':
57
+ ensureEngine().setThreshold(op.n);
58
+ return undefined;
59
+ case 'setMaxResults':
60
+ ensureEngine().setMaxResults(op.n);
61
+ return undefined;
62
+ case 'setLanguage':
63
+ ensureEngine().setLanguage(op.lang);
64
+ return undefined;
65
+ case 'save':
66
+ return ensureEngine().save(op.name);
67
+ case 'load':
68
+ return ensureEngine().load(op.name);
69
+ case 'loadOrInit':
70
+ return ensureEngine().loadOrInit(op.name);
71
+ case 'deleteSnapshot':
72
+ return ensureEngine().deleteSnapshot(op.name);
73
+ case 'listSnapshots':
74
+ return ensureEngine().listSnapshots();
75
+ }
76
+ }
77
+
78
+ async function handle(req: WorkerRequest): Promise<void> {
79
+ const { id, op } = req;
80
+ try {
81
+ const result = await dispatch(op);
82
+ const res: WorkerResponse = { id, ok: true, result };
83
+ (self as unknown as Worker).postMessage(res);
84
+ } catch (err) {
85
+ const e = err as Error & { kind?: string };
86
+ const res: WorkerResponse = {
87
+ id, ok: false,
88
+ error: {
89
+ name: e.name ?? 'Error',
90
+ kind: err instanceof AlbexError ? err.kind : undefined,
91
+ message: e.message ?? String(err),
92
+ },
93
+ };
94
+ (self as unknown as Worker).postMessage(res);
95
+ }
96
+ }
97
+
98
+ // Process messages strictly in arrival order. The engine guards its own
99
+ // state, but a sync `search` arriving mid-`indexFile` await would otherwise
100
+ // be rejected as "busy"; queueing keeps the worker's externally-observable
101
+ // behaviour serial and matches the main-thread engine's serialization.
102
+ let _queue: Promise<void> = Promise.resolve();
103
+ self.onmessage = (ev: MessageEvent<WorkerRequest>) => {
104
+ const req = ev.data;
105
+ _queue = _queue.then(() => handle(req));
106
+ };
Binary file
Binary file
Binary file
Binary file