albex 0.3.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +275 -0
  2. package/README.md +4 -2
  3. package/dist/albex-worker.js +1 -1
  4. package/dist/albex.d.ts +157 -17
  5. package/dist/albex.d.ts.map +1 -1
  6. package/dist/albex.js +405 -232
  7. package/dist/albex.js.map +1 -1
  8. package/dist/errors.d.ts +16 -2
  9. package/dist/errors.d.ts.map +1 -1
  10. package/dist/errors.js +6 -3
  11. package/dist/errors.js.map +1 -1
  12. package/dist/persistence.js +1 -1
  13. package/dist/profile.d.ts +11 -6
  14. package/dist/profile.d.ts.map +1 -1
  15. package/dist/profile.js +6 -13
  16. package/dist/profile.js.map +1 -1
  17. package/dist/resource-manager.js +1 -1
  18. package/dist/tiered-store.js +1 -1
  19. package/dist/wasm-bindings.d.ts +46 -5
  20. package/dist/wasm-bindings.d.ts.map +1 -1
  21. package/dist/wasm-bindings.js +102 -7
  22. package/dist/wasm-bindings.js.map +1 -1
  23. package/dist/worker-protocol.js +1 -1
  24. package/dist/worker-runtime.js +12 -3
  25. package/dist/worker-runtime.js.map +1 -1
  26. package/package.json +13 -9
  27. package/src/albex.ts +478 -246
  28. package/src/errors.ts +18 -2
  29. package/src/profile.ts +11 -10
  30. package/src/wasm-bindings.ts +157 -8
  31. package/src/worker-runtime.ts +12 -2
  32. package/wasm/pkg/albex_pdf.wasm +0 -0
  33. package/wasm/pkg/albex_wasm.wasm +0 -0
  34. package/wasm/pkg/albex_wasm_bg.wasm +0 -0
  35. package/wasm/pkg/albex_wasm_simd.wasm +0 -0
  36. package/wasm/pkg/albex_wasm_mini.wasm +0 -0
  37. package/wasm/pkg/albex_wasm_mini_simd.wasm +0 -0
  38. package/wasm/pkg/albex_wasm_pro.wasm +0 -0
  39. package/wasm/pkg/albex_wasm_pro_simd.wasm +0 -0
  40. package/wasm/pkg/albex_wasm_std.wasm +0 -0
  41. package/wasm/pkg/albex_wasm_std_simd.wasm +0 -0
package/dist/albex.js CHANGED
@@ -1,5 +1,5 @@
1
1
  /*!
2
- * albex v0.3.0
2
+ * albex v0.6.0
3
3
  * Zero-config local full-text search for documents — runs entirely in the browser, no server, no upload.
4
4
  * (c) 2026 RafaCalRob
5
5
  * @license MIT
@@ -21,9 +21,9 @@
21
21
  * ```
22
22
  */
23
23
  import { asAlbexExports, asAlbexPdfExports, } from './wasm-bindings.js';
24
- import { AlbexInitError, AlbexUnsupportedFormatError, AlbexParseError, AlbexCapacityError, } from './errors.js';
24
+ import { AlbexError, AlbexInitError, AlbexUnsupportedFormatError, AlbexParseError, AlbexCapacityError, } from './errors.js';
25
25
  import { savePersisted, loadPersisted, deletePersisted, listPersisted, } from './persistence.js';
26
- import { detectProfile, pickTier, shouldUseGpu } from './profile.js';
26
+ import { detectProfile, shouldUseGpu } from './profile.js';
27
27
  import { getResourceManager } from './resource-manager.js';
28
28
  import { BloomGpu, packBloomsFromChunks } from './gpu/bloom-runtime.js';
29
29
  export { AlbexError, AlbexInitError, AlbexUnsupportedFormatError, AlbexParseError, AlbexCapacityError, } from './errors.js';
@@ -48,41 +48,36 @@ function warnSearchStreamDeprecated() {
48
48
  'scheduler between slices and returns a batch. The alias will be ' +
49
49
  'removed in 0.4.0.');
50
50
  }
51
- function tokenize(q) {
52
- return q.trim().split(/\s+/).filter(t => t.length > 0);
53
- }
54
- function parseQuery(q) {
55
- const trimmed = q.trim();
56
- // OR: "term1 | term2" or "phrase one | phrase two"
57
- if (trimmed.includes('|')) {
58
- const branches = trimmed.split('|')
59
- .map(p => tokenize(p.replace(/"/g, '')))
60
- .filter(b => b.length > 0);
61
- return { kind: 'or', branches };
62
- }
63
- // Phrase: "exact phrase here"
64
- const phraseMatch = /^"(.+)"$/.exec(trimmed);
65
- if (phraseMatch) {
66
- const inner = phraseMatch[1] ?? '';
67
- const tokens = tokenize(inner);
68
- return { kind: 'phrase', tokens, raw: inner };
69
- }
70
- return { kind: 'simple', tokens: tokenize(trimmed) };
71
- }
72
- /**
73
- * Reconstruct a WASM-compatible query string from parsed tokens.
74
- * The WASM engine accepts up to 4 space-separated tokens (AND semantics).
75
- */
76
- function tokensToWasmQuery(tokens) {
77
- return tokens.slice(0, 4).join(' ');
78
- }
79
51
  // ─────────────────────────────────────────────────────────────────────────────
80
- // Phrase post-filter
52
+ // Query parsing (WASM-side as of 0.5.0)
81
53
  // ─────────────────────────────────────────────────────────────────────────────
54
+ //
55
+ // Pre-0.5.0 this file owned parseQuery + tokenize. That created two
56
+ // truths about what a "token" was: one in TS for the query, one in Rust
57
+ // for the indexed text. The audit flagged this as the biggest divergence
58
+ // in the wrapper.
59
+ //
60
+ // 0.5.0 moves parseQuery/tokenize/tokensToWasmQuery to Rust. The TS
61
+ // dispatcher reduces to:
62
+ //
63
+ // 1. Write the raw UTF-8 query bytes to the scratchpad.
64
+ // 2. Call prepareQuery(len). Get back the kind (simple/phrase/or).
65
+ // 3. For OR: iterate getQueryBranchCount() branches, calling
66
+ // selectQueryBranch(i) + search() for each, then merge in TS.
67
+ // For simple/phrase: selectQueryBranch(0) + search().
68
+ // 4. For phrase: post-filter the snippets with containsPhrase().
69
+ //
70
+ // containsPhrase stays in TS because it operates on snippet text already
71
+ // produced by the WASM, not on the query. It is not a tokenizer.
82
72
  /**
83
- * Returns true if `snippet` contains the phrase formed by `tokens` in order,
84
- * with at most `maxGap` characters between consecutive tokens.
85
- * Comparison is case- and accent-insensitive.
73
+ * Phrase post-filter. Returns true if `snippet` contains the phrase
74
+ * formed by `tokens` in order, with at most `maxGap` characters between
75
+ * consecutive tokens. Comparison is case- and accent-insensitive.
76
+ *
77
+ * The tokens come from the WASM-compiled pattern of a phrase branch,
78
+ * not from a TS re-tokenization of the query, so there is no
79
+ * tokenization divergence: WASM said "these are the tokens", we just
80
+ * check adjacency in the snippet.
86
81
  */
87
82
  function containsPhrase(snippet, tokens, maxGap = 30) {
88
83
  const norm = (s) => s.toLowerCase().normalize('NFKD').replace(/[̀-ͯ]/g, '');
@@ -231,32 +226,11 @@ function computePatternBloom(query) {
231
226
  }
232
227
  return bits;
233
228
  }
234
- function contentHash(bytes) {
235
- // 64-bit arithmetic via two 32-bit halves (no BigInt to keep it fast in
236
- // engines without optimised BigInt support).
237
- let hi = 0xcbf29ce4 | 0;
238
- let lo = 0x84222325 | 0;
239
- // FNV prime: 0x100000001b3 = (0x100 << 32) | 0x000001b3
240
- for (let i = 0; i < bytes.length; i++) {
241
- lo ^= bytes[i];
242
- // multiply by FNV prime
243
- // (hi:lo) *= 0x100000001b3
244
- // low * prime
245
- const lo_lo = (lo & 0xffff) * 0x1b3;
246
- const lo_hi = (lo >>> 16) * 0x1b3;
247
- let new_lo = (lo_lo + ((lo_hi & 0xffff) << 16)) | 0;
248
- let carry = (lo_hi >>> 16) + ((lo_lo + ((lo_hi & 0xffff) << 16)) > 0xffffffff ? 1 : 0);
249
- // hi*prime + carry
250
- const hi_lo = (hi & 0xffff) * 0x1b3;
251
- const hi_hi = (hi >>> 16) * 0x1b3;
252
- const new_hi = ((hi_lo + ((hi_hi & 0xffff) << 16)) | 0) + carry + lo; // + lo because high 33rd bit
253
- lo = new_lo;
254
- hi = new_hi | 0;
255
- }
256
- const hexHi = (hi >>> 0).toString(16).padStart(8, '0');
257
- const hexLo = (lo >>> 0).toString(16).padStart(8, '0');
258
- return hexHi + hexLo;
259
- }
229
+ // Note: `contentHash` is implemented as a method on AlbexEngine below
230
+ // (it needs access to the WASM scratchpad). The standalone TS reference
231
+ // implementation that used to live here was removed in 0.4.0 — the
232
+ // canonical hash now lives in wasm/src/lib.rs::hashBytes so there is
233
+ // exactly one definition of "the content hash of these bytes".
260
234
  /**
261
235
  * 16-hex-char content hash → 8 raw bytes for setDocumentContentHash. The
262
236
  * byte order matches the snapshot format: the high 32 bits sit at offsets
@@ -450,11 +424,18 @@ function makePdfWasmImports(module, getPdfMem) {
450
424
  case '__wbindgen_externref_table_set_null':
451
425
  return (idx) => { heap[idx] = undefined; };
452
426
  }
453
- // Unknown import — return a stub that warns when called. Loading still
454
- // succeeds; only an actually-invoked unknown import will surface.
455
- return (...args) => {
456
- console.warn(`[albex] unhandled PDF WASM import ${modName}.${name}`, args);
457
- };
427
+ // Unknown import — fail fast. An import we don't recognise means the
428
+ // wasm-bindgen / lopdf / getrandom dependency graph has drifted from
429
+ // the prefixes this loader is written to satisfy. Accepting the
430
+ // module would defer the failure to an arbitrary execution path,
431
+ // typically deep inside extractPdf(), where the user gets either a
432
+ // hang or a misleading "PDF parse error". Refusing instantiation
433
+ // surfaces the version skew at boot, where the maintainer can act
434
+ // on it.
435
+ throw new AlbexInitError(`Unknown PDF WASM import "${modName}.${name}". ` +
436
+ `The albex_pdf.wasm binary was probably built with a newer Rust ` +
437
+ `toolchain or dependency graph than this loader was written for. ` +
438
+ `Rebuild with 'npm run build:pdf-wasm' or open an issue.`);
458
439
  };
459
440
  const imports = {};
460
441
  for (const { module: modName, name } of required) {
@@ -474,27 +455,29 @@ export class AlbexEngine {
474
455
  * runtime dependency on OCR — this is a structural slot that the optional
475
456
  * companion package fills.
476
457
  */
477
- ocrImage;
478
458
  /**
479
- * Optional OCR-side configuration set by `@albex/ocr::enableOcr`. Read
480
- * by the engine to decide whether to invoke OCR on top of the text it
481
- * already extracted from a PDF (hybrid PDFs: native text + images that
482
- * also contain text, like stamps, scanned annexes, or diagrams with
483
- * labels).
484
- *
485
- * When `alwaysExtractEmbeddedImages` is true, every page of every PDF
486
- * passes through `extractPageImages` after the normal text extraction;
487
- * any image that meets the size filter (200×200 in Rust) is fed to
488
- * `ocrImage`. Performance cost: 1–3 s per qualifying image.
489
- *
490
- * Off by default — set this opt-in via the OCR module's options.
459
+ * Public OCR entry point. Forwards to the attached OCR adapter installed
460
+ * via `attachOcr()`. Reading this property is a feature-detect for
461
+ * integrators: `if (engine.ocrImage) { ... OCR available ... }`. Writing
462
+ * to it directly is no longer supported in 0.5.0+ — use `attachOcr`.
491
463
  */
492
- ocrConfig;
464
+ get ocrImage() {
465
+ return this._ocrAdapter?.recognize;
466
+ }
467
+ /** Private adapter slot. Holds the OCR plugin contract installed by
468
+ * `attachOcr()`. The engine reads `recognize` and `options` here; the
469
+ * caller never gets a reference to this object directly. */
470
+ _ocrAdapter = null;
493
471
  // ── PDF WASM (lazy) ──
494
472
  _pdfWasm = null;
495
473
  _pdfMem = null;
496
474
  _docs = [];
497
475
  _lastSearch = null;
476
+ /** Structured diagnostics collected during the most recent operation.
477
+ * Drained by `takeDiagnostics()`. Capped at 256 entries to avoid
478
+ * unbounded memory growth in pathological cases (very corrupted
479
+ * corpora producing thousands of recovery warnings). */
480
+ _diagnostics = [];
498
481
  _tier = null;
499
482
  _simd = false;
500
483
  _profile = null;
@@ -503,9 +486,51 @@ export class AlbexEngine {
503
486
  _gpuChunkCountUploaded = 0;
504
487
  _unsubscribeResources = null;
505
488
  _opts;
489
+ // ── Concurrency guard ──────────────────────────────────────────────────────
490
+ // One WASM instance, global mutable state, async ops that yield to the
491
+ // scheduler between slices. Two overlapping operations corrupt each other
492
+ // (e.g. a fresh searchBegin resets the cursor of an in-flight cooperative
493
+ // search). Async ops serialize through `_opChain`; sync mutators/searches
494
+ // assert the engine is idle (audit 0.6.0, finding #2).
495
+ _opChain = Promise.resolve();
496
+ _busy = false;
506
497
  constructor(opts) {
507
498
  this._opts = opts;
508
499
  }
500
+ /** Serialize an async engine operation behind any in-flight one. */
501
+ _exclusive(fn) {
502
+ const run = this._opChain.then(async () => {
503
+ this._busy = true;
504
+ try {
505
+ return await fn();
506
+ }
507
+ finally {
508
+ this._busy = false;
509
+ }
510
+ });
511
+ // Swallow result/error on the chain so one failure can't wedge the queue.
512
+ this._opChain = run.then(() => undefined, () => undefined);
513
+ return run;
514
+ }
515
+ /** Guard a synchronous mutator/search: refuse to run mid-async-operation
516
+ * rather than silently corrupt the shared WASM state. */
517
+ _assertIdle(method) {
518
+ if (this._busy) {
519
+ throw new AlbexError('busy', `${method}() was called while an async engine operation is still ` +
520
+ `running. Await the previous indexFile/save/load/replaceDocument/` +
521
+ `searchCooperative call, or use searchCooperative instead of search().`);
522
+ }
523
+ }
524
+ /** Compact opportunistically when tombstones pile up under text pressure,
525
+ * so repeated removeDocument/replaceDocument don't exhaust the pool. */
526
+ _autoCompactIfNeeded() {
527
+ const w = this._wasm;
528
+ const cap = w.getTextCapacity();
529
+ const hasTombstones = w.getDocCount() > this._docs.length;
530
+ if (hasTombstones && cap > 0 && w.getTextUsed() / cap > 0.85) {
531
+ w.compact();
532
+ }
533
+ }
509
534
  /** Load and initialise the main WASM module. Must be called before any other method. */
510
535
  async init() {
511
536
  const url = await this._resolveWasmUrl();
@@ -562,28 +587,26 @@ export class AlbexEngine {
562
587
  // as an asset reference. They copy the .wasm to the output directory and
563
588
  // rewrite the URL automatically. Consumers who use one of those bundlers
564
589
  // get a working `new AlbexEngine()` with no manual setup.
565
- if (!o.wasmBaseUrl) {
566
- // We can't tier-select with one URL, so fall back to std baseline.
567
- // The integrator who wants tier optimisation must opt in via wasmBaseUrl.
568
- this._tier = 'std';
569
- this._simd = false;
570
- return new URL('../wasm/pkg/albex_wasm_bg.wasm', import.meta.url).href;
571
- }
572
- let tier;
573
- if (o.tier && o.tier !== 'auto')
574
- tier = o.tier;
575
- else
576
- tier = pickTier(profile);
577
- this._tier = tier;
590
+ // 0.5.0+: two main binaries only — baseline and SIMD. The tier
591
+ // system is gone (audit 4.1). Selection collapses to a single
592
+ // boolean: SIMD on or off, decided either by the explicit `simd`
593
+ // option or by a runtime probe.
578
594
  const simd = o.simd === 'on'
579
595
  ? true
580
596
  : o.simd === 'off'
581
597
  ? false
582
598
  : !!profile?.wasm.simd;
583
599
  this._simd = simd;
584
- const suffix = simd ? `${tier}_simd` : tier;
600
+ this._tier = 'std';
601
+ if (!o.wasmBaseUrl) {
602
+ // Zero-config: bundler resolves the .wasm next to dist/. We only
603
+ // ship the baseline alias (albex_wasm_bg.wasm) inside the npm
604
+ // package; integrators who want SIMD must serve both binaries
605
+ // themselves via `wasmBaseUrl`.
606
+ return new URL('../wasm/pkg/albex_wasm_bg.wasm', import.meta.url).href;
607
+ }
585
608
  const base = o.wasmBaseUrl.replace(/\/+$/, '');
586
- return `${base}/albex_wasm_${suffix}.wasm`;
609
+ return simd ? `${base}/albex_wasm_simd.wasm` : `${base}/albex_wasm.wasm`;
587
610
  }
588
611
  /** The tier that was actually loaded. `null` until `init()` resolves. */
589
612
  get tier() { return this._tier; }
@@ -684,6 +707,34 @@ export class AlbexEngine {
684
707
  this._wasm.feedText(c.length);
685
708
  }
686
709
  }
710
+ /**
711
+ * Compute the FNV-1a 64-bit content hash of `bytes` via the WASM
712
+ * streaming API. Returns a 16-character hex string identical in shape
713
+ * to what the TS implementation in 0.3.x returned, so all callers
714
+ * stay unchanged. Single source of truth — same hash whether we use
715
+ * it for indexFile dedup, for snapshot v2 persistence, or anywhere
716
+ * else. Large inputs are chunked at FEED_SIZE just like _feedText.
717
+ */
718
+ _contentHash(bytes) {
719
+ const w = this._wasm;
720
+ w.hashBegin();
721
+ for (let i = 0; i < bytes.length; i += FEED_SIZE) {
722
+ const c = bytes.subarray(i, i + FEED_SIZE);
723
+ this._writePad(c);
724
+ w.hashFeed(c.length);
725
+ }
726
+ w.hashFinish();
727
+ // Read 8 result bytes back from scratchpad[0..8].
728
+ const ptr = w.getBuffer(8);
729
+ const out = this._u8(ptr, 8);
730
+ // Big-endian to hex. Same layout as the old hexHi + hexLo output:
731
+ // high u32 first (4 bytes), low u32 second (4 bytes).
732
+ let s = '';
733
+ for (let i = 0; i < 8; i++) {
734
+ s += out[i].toString(16).padStart(2, '0');
735
+ }
736
+ return s;
737
+ }
687
738
  _feedXmlBytes(xml, fn) {
688
739
  const feeder = this._wasm[fn];
689
740
  for (let i = 0; i < xml.length; i += FEED_SIZE) {
@@ -706,7 +757,10 @@ export class AlbexEngine {
706
757
  // called when the user actually drops a PDF — but we issue a console
707
758
  // hint so embedders can surface a "this will download ~1 MB" prompt.
708
759
  if (this._resources?.constrainedNetwork) {
709
- console.info('[albex] downloading PDF WASM (~1 MB) on a constrained network connection');
760
+ this._diag({
761
+ kind: 'info', stage: 'network',
762
+ message: 'Downloading PDF WASM (~1 MB) on a constrained network connection',
763
+ });
710
764
  }
711
765
  const res = await fetch(pdfUrl);
712
766
  if (!res.ok)
@@ -831,20 +885,14 @@ export class AlbexEngine {
831
885
  this._feedText(text);
832
886
  this._wasm.flushParagraph();
833
887
  }
834
- // Hybrid OCR pass: when the OCR module is wired with
835
- // `alwaysExtractEmbeddedImages: true`, also walk every page for
836
- // embedded images and OCR them on top of the vector text.
837
- //
838
- // We always log the decision so users debugging "why isn't OCR
839
- // firing on my hybrid PDF" can see which precondition failed.
840
- const hybridOn = !!this.ocrConfig?.alwaysExtractEmbeddedImages;
841
- const hasOcr = !!this.ocrImage;
842
- const binSupportsImages = typeof pw.extractPageImages === 'function'
843
- && typeof pw.getPageCount === 'function';
844
- console.log(`[albex] hybrid OCR decision: ocrImage=${hasOcr} ocrConfig.alwaysExtractEmbeddedImages=${hybridOn} binarySupportsImages=${binSupportsImages}`);
845
- if (hasOcr && hybridOn && binSupportsImages) {
888
+ // Hybrid OCR pass: when the OCR adapter is wired with
889
+ // `options.alwaysExtractEmbeddedImages: true`, also walk every page
890
+ // for embedded images and OCR them on top of the vector text.
891
+ if (this._ocrAdapter
892
+ && this._ocrAdapter.options?.alwaysExtractEmbeddedImages
893
+ && typeof pw.extractPageImages === 'function'
894
+ && typeof pw.getPageCount === 'function') {
846
895
  const totalPages = pw.getPageCount();
847
- console.log(`[albex] hybrid OCR pass starting over ${totalPages} page(s)`);
848
896
  for (let p = 0; p < totalPages; p++) {
849
897
  const ocrText = await this._ocrPageEmbeddedImages(pw, p);
850
898
  if (ocrText === null)
@@ -930,7 +978,10 @@ export class AlbexEngine {
930
978
  // so `_ensurePdfWasm` re-instantiates on the next call.
931
979
  this._pdfWasm = null;
932
980
  this._pdfMem = null;
933
- console.warn(`[albex] PDF image extractor trapped on page ${page + 1}: ${e instanceof Error ? e.message : String(e)}. Stopping OCR.`);
981
+ this._diag({
982
+ kind: 'skipped', stage: 'pdf', page: page + 1,
983
+ message: `PDF image extractor trapped: ${e instanceof Error ? e.message : String(e)}. Remaining pages skipped.`,
984
+ });
934
985
  return null;
935
986
  }
936
987
  if (imageCount <= 0)
@@ -954,15 +1005,6 @@ export class AlbexEngine {
954
1005
  const copy = new Uint8Array(len);
955
1006
  copy.set(new Uint8Array(liveMem.buffer, ptr, len));
956
1007
  const blob = new Blob([copy.buffer], { type: mime });
957
- // Defensive diagnostics: when an OCR call goes wrong (Tesseract
958
- // worker abort, malformed JPEG, etc.) the first thing we want to
959
- // see is whether we even handed it valid image bytes. A real JPEG
960
- // starts with FF D8 FF (E0 for JFIF, E1 for EXIF). A JPEG2000
961
- // starts with 00 00 00 0C 6A 50 20 20.
962
- const magic = Array.from(copy.subarray(0, 4))
963
- .map(b => b.toString(16).padStart(2, '0'))
964
- .join(' ');
965
- console.log(`[albex] OCR page ${page + 1} image ${i + 1}/${imageCount}: kind=${kind} (${mime}) len=${len} bytes magic=${magic}`);
966
1008
  try {
967
1009
  const { text } = await ocr(blob);
968
1010
  const trimmed = text?.trim();
@@ -977,7 +1019,10 @@ export class AlbexEngine {
977
1019
  // "Aborted(-1)") are also caught here; if they bypass the
978
1020
  // promise rejection and surface as `uncaught` instead, the
979
1021
  // demo's window.onerror handler will keep the app alive.
980
- console.warn(`[albex] OCR failed on page ${page + 1} image ${i + 1}: ${e instanceof Error ? e.message : String(e)}`);
1022
+ this._diag({
1023
+ kind: 'skipped', stage: 'ocr', page: page + 1,
1024
+ message: `OCR failed on image ${i + 1}: ${e instanceof Error ? e.message : String(e)}`,
1025
+ });
981
1026
  }
982
1027
  }
983
1028
  return pageText;
@@ -1018,7 +1063,10 @@ export class AlbexEngine {
1018
1063
  new Uint8Array(pw.memory.buffer, inPtr, bytes.length).set(bytes);
1019
1064
  }
1020
1065
  catch (e) {
1021
- console.warn(`[albex] PDF re-load after extractor crash failed: ${e instanceof Error ? e.message : String(e)}`);
1066
+ this._diag({
1067
+ kind: 'skipped', stage: 'pdf',
1068
+ message: `PDF re-load after extractor crash failed: ${e instanceof Error ? e.message : String(e)}`,
1069
+ });
1022
1070
  return null;
1023
1071
  }
1024
1072
  // Set up the doc and let _indexPdfScanned do the page-by-page walk.
@@ -1027,7 +1075,10 @@ export class AlbexEngine {
1027
1075
  // first page, no paragraphs are emitted and we end up with 0 chunks.
1028
1076
  this._wasm.setDocumentName(this._writeStr(file.name));
1029
1077
  this._wasm.beginDocument();
1030
- console.info(`[albex] pdf-extract failed (${originalError}); attempting OCR-only fallback via lopdf for ${file.name}`);
1078
+ this._diag({
1079
+ kind: 'fallback', stage: 'pdf', file: file.name,
1080
+ message: `pdf-extract failed (${originalError}); attempting OCR-only fallback via lopdf`,
1081
+ });
1031
1082
  await this._indexPdfScanned(pw);
1032
1083
  return this._wasm.endDocument();
1033
1084
  }
@@ -1487,6 +1538,9 @@ export class AlbexEngine {
1487
1538
  * Throws for unsupported formats or parse errors.
1488
1539
  */
1489
1540
  async indexFile(file) {
1541
+ return this._exclusive(() => this._indexFileInner(file));
1542
+ }
1543
+ async _indexFileInner(file) {
1490
1544
  const ext = file.name.split('.').pop()?.toLowerCase() ?? '';
1491
1545
  const indexer = AlbexEngine._INDEXERS[ext];
1492
1546
  if (!indexer)
@@ -1494,7 +1548,7 @@ export class AlbexEngine {
1494
1548
  // Hash the source bytes for idempotency. We always read the bytes once
1495
1549
  // here so the indexer can reuse them — avoids a double File.arrayBuffer().
1496
1550
  const bytes = new Uint8Array(await file.arrayBuffer());
1497
- const hash = contentHash(bytes);
1551
+ const hash = this._contentHash(bytes);
1498
1552
  // Idempotency: if a non-deleted doc already has this hash, return it
1499
1553
  // unchanged. Cheap O(N) scan since MAX_DOCS = 128.
1500
1554
  const existing = this._docs.find(d => d.contentHash === hash);
@@ -1516,6 +1570,24 @@ export class AlbexEngine {
1516
1570
  w.setDocumentContentHash(hashBytes.length);
1517
1571
  }
1518
1572
  const chunks = await indexer(this, file, bytes);
1573
+ // Capacity check (0.6.0). The WASM pools fill silently and break out of
1574
+ // their ingest loops; getLastIndexOverflow reports which one filled.
1575
+ // Surface a typed error instead of returning a half-indexed document the
1576
+ // caller cannot tell apart from a complete one (audit finding #3).
1577
+ const overflow = w.getLastIndexOverflow();
1578
+ if (overflow !== 0) {
1579
+ const which = (overflow & 1) ? 'chunks' : (overflow & 2) ? 'text'
1580
+ : (overflow & 4) ? 'docs' : 'names';
1581
+ const pools = [
1582
+ overflow & 1 ? 'chunk pool' : '',
1583
+ overflow & 2 ? 'text pool' : '',
1584
+ overflow & 4 ? 'document table' : '',
1585
+ overflow & 8 ? 'name pool' : '',
1586
+ ].filter(Boolean).join(', ');
1587
+ throw new AlbexCapacityError(`Index capacity exceeded while indexing "${file.name}" (${pools} full). ` +
1588
+ `The document was rolled back (not indexed); treat the index as full ` +
1589
+ `(compact(), shard across an AlbexPool, or reset()).`, which);
1590
+ }
1519
1591
  // The new doc occupies slot `docCountBefore`.
1520
1592
  const docId = w.getDocId(docCountBefore);
1521
1593
  const doc = {
@@ -1538,6 +1610,10 @@ export class AlbexEngine {
1538
1610
  * Returns `true` if a matching document was found and tombstoned.
1539
1611
  */
1540
1612
  removeDocument(id) {
1613
+ this._assertIdle('removeDocument');
1614
+ return this._removeDocumentInner(id);
1615
+ }
1616
+ _removeDocumentInner(id) {
1541
1617
  const doc = this._docs.find(d => d.name === id || d.contentHash === id);
1542
1618
  if (!doc)
1543
1619
  return false;
@@ -1553,12 +1629,15 @@ export class AlbexEngine {
1553
1629
  * idempotency check (so re-indexing the *same* bytes after a remove works).
1554
1630
  */
1555
1631
  async replaceDocument(name, newFile) {
1556
- this.removeDocument(name);
1557
- // Force a unique-hash path by indexing directly; if the new file happens
1558
- // to hash identically to a still-tracked document, the dedupe in
1559
- // indexFile will return that one. The remove above prevents the
1560
- // common case.
1561
- return this.indexFile(newFile);
1632
+ return this._exclusive(async () => {
1633
+ this._removeDocumentInner(name);
1634
+ // Index directly via the inner path (we already hold the lock).
1635
+ const doc = await this._indexFileInner(newFile);
1636
+ // Repeated replaces leave tombstones in the text pool; reclaim under
1637
+ // pressure so the pool isn't silently exhausted (audit finding #7).
1638
+ this._autoCompactIfNeeded();
1639
+ return doc;
1640
+ });
1562
1641
  }
1563
1642
  /**
1564
1643
  * Reclaim storage from previously removed documents. Compacts CHUNKS,
@@ -1568,6 +1647,7 @@ export class AlbexEngine {
1568
1647
  * references (e.g. in a UI) remain valid.
1569
1648
  */
1570
1649
  compact() {
1650
+ this._assertIdle('compact');
1571
1651
  this._wasm.compact();
1572
1652
  }
1573
1653
  /**
@@ -1580,15 +1660,34 @@ export class AlbexEngine {
1580
1660
  * markers instead of full chunk text. Defaults: 60 bytes before, 120 after.
1581
1661
  */
1582
1662
  search(query, opts = {}) {
1583
- const parsed = parseQuery(query);
1584
- if (parsed.kind === 'or') {
1585
- return this._searchOr(parsed.branches, query, opts);
1586
- }
1587
- const results = this._runSearch(tokensToWasmQuery(parsed.tokens), query, opts);
1588
- if (parsed.kind === 'phrase') {
1589
- return results.filter(r => containsPhrase(r.snippet, parsed.tokens));
1590
- }
1591
- return results;
1663
+ this._assertIdle('search');
1664
+ const w = this._wasm;
1665
+ const ql = this._writeStr(query);
1666
+ const kind = w.prepareQuery(ql);
1667
+ if (kind < 0)
1668
+ return [];
1669
+ if (kind === 2) {
1670
+ // OR: iterate branches and merge in TS. WASM stores compiled
1671
+ // branches internally so we never re-tokenize on the host.
1672
+ return this._searchOr(query, opts);
1673
+ }
1674
+ w.selectQueryBranch(0);
1675
+ // Phrase queries (kind 1) post-filter on adjacency. Pass the tokens down
1676
+ // so the check runs against the FULL chunk text, not a cropped windowed
1677
+ // snippet — otherwise `{ windowed: true }` could drop a valid phrase hit
1678
+ // whose second term fell outside the window (audit finding #7).
1679
+ const phraseTokens = kind === 1 ? this._branchTokens(0) : undefined;
1680
+ return this._runSearch(query, opts, phraseTokens);
1681
+ }
1682
+ /** Read the WASM-compiled tokens of branch `i` for phrase post-filter.
1683
+ * The bytes returned are exactly what the WASM tokenizer produced —
1684
+ * no TS re-tokenization. */
1685
+ _branchTokens(i) {
1686
+ const n = this._wasm.getQueryBranchPattern(i);
1687
+ if (n === 0)
1688
+ return [];
1689
+ const pattern = this._readPad(n);
1690
+ return pattern.split(' ').filter(t => t.length > 0);
1592
1691
  }
1593
1692
  /**
1594
1693
  * Cooperative search. Processes the corpus in slices, yielding to the
@@ -1605,18 +1704,29 @@ export class AlbexEngine {
1605
1704
  * Pass `opts.frameBudgetMs` to control the slice size (default 8 ms).
1606
1705
  */
1607
1706
  async *searchCooperative(query, opts = {}) {
1608
- const parsed = parseQuery(query);
1707
+ // Collect under the exclusivity lock so no other engine op interleaves at
1708
+ // a slice boundary; the per-slice scheduler yields still happen inside.
1709
+ const results = await this._exclusive(() => this._searchCooperativeCollect(query, opts));
1710
+ for (const r of results)
1711
+ yield r;
1712
+ }
1713
+ /** Materialise a cooperative search to a sorted result array. Runs inside
1714
+ * the exclusivity lock. Frame-budget yielding lives in _runSearchBudgeted. */
1715
+ async _searchCooperativeCollect(query, opts) {
1609
1716
  const budget = opts.frameBudgetMs ?? 8;
1610
1717
  const w = this._wasm;
1611
- // OR queries: run each branch as its own resumable search, dedup, sort.
1612
- if (parsed.kind === 'or') {
1718
+ const ql = this._writeStr(query);
1719
+ const kind = w.prepareQuery(ql);
1720
+ if (kind < 0)
1721
+ return [];
1722
+ if (kind === 2) {
1723
+ // OR branches — run each as its own resumable search and merge.
1613
1724
  const seen = new Set();
1614
1725
  const all = [];
1615
- for (const tokens of parsed.branches) {
1616
- const q = tokensToWasmQuery(tokens);
1617
- if (!q)
1618
- continue;
1619
- const r = await this._runSearchBudgeted(q, query, opts, budget);
1726
+ const n = w.getQueryBranchCount();
1727
+ for (let i = 0; i < n; i++) {
1728
+ w.selectQueryBranch(i);
1729
+ const r = await this._runSearchBudgeted(query, opts, budget, undefined, i);
1620
1730
  for (const x of r) {
1621
1731
  const key = `${x.documentName}:${x.location}:${x.matchStart}`;
1622
1732
  if (!seen.has(key)) {
@@ -1626,17 +1736,11 @@ export class AlbexEngine {
1626
1736
  }
1627
1737
  }
1628
1738
  all.sort((a, b) => b.score - a.score);
1629
- for (const r of all)
1630
- yield r;
1631
- return;
1739
+ return all;
1632
1740
  }
1633
- const results = await this._runSearchBudgeted(tokensToWasmQuery(parsed.tokens), query, opts, budget);
1634
- const filtered = parsed.kind === 'phrase'
1635
- ? results.filter(r => containsPhrase(r.snippet, parsed.tokens))
1636
- : results;
1637
- for (const r of filtered)
1638
- yield r;
1639
- void w;
1741
+ w.selectQueryBranch(0);
1742
+ const phraseTokens = kind === 1 ? this._branchTokens(0) : undefined;
1743
+ return this._runSearchBudgeted(query, opts, budget, phraseTokens, 0);
1640
1744
  }
1641
1745
  /**
1642
1746
  * @deprecated Renamed to `searchCooperative` in 0.3.0. The original name
@@ -1657,21 +1761,28 @@ export class AlbexEngine {
1657
1761
  * JS<->WASM overhead on fast machines; on slow machines a single batch
1658
1762
  * may eat the entire budget, which is also fine.
1659
1763
  */
1660
- async _runSearchBudgeted(wasmQuery, displayQuery, opts, budgetMs) {
1764
+ async _runSearchBudgeted(displayQuery, opts, budgetMs, phraseTokens, branchIdx = 0) {
1661
1765
  const w = this._wasm;
1662
- const ql = this._writeStr(wasmQuery);
1663
- w.setPattern(ql);
1766
+ // Pattern is already set by the caller via selectQueryBranch(branchIdx).
1767
+ // Snapshot THAT branch's compiled pattern for the GPU pre-filter hash —
1768
+ // not branch 0, which would build the wrong candidate mask for OR
1769
+ // branches and silently drop their hits (audit finding #6).
1770
+ const activePatternLen = w.getQueryBranchPattern(branchIdx);
1771
+ const activePattern = activePatternLen > 0 ? this._readPad(activePatternLen) : '';
1664
1772
  // GPU pre-filter (CD1). If enabled AND the corpus is large enough,
1665
1773
  // the GPU computes the candidate bitset and we install it into WASM
1666
1774
  // before searchBegin so the slice loop only inspects candidates.
1667
1775
  // Failure here is silent: we fall back to CPU-only Bloom transparently.
1668
1776
  if (this._shouldEngageGpu()) {
1669
1777
  try {
1670
- await this._gpuPreFilter(wasmQuery);
1778
+ await this._gpuPreFilter(activePattern);
1671
1779
  }
1672
1780
  catch (e) {
1673
1781
  // Don't let a GPU hiccup kill the search — drop to CPU path.
1674
- console.warn('[albex] GPU pre-filter failed; falling back to CPU:', e);
1782
+ this._diag({
1783
+ kind: 'fallback', stage: 'gpu',
1784
+ message: `GPU pre-filter failed; falling back to CPU: ${e instanceof Error ? e.message : String(e)}`,
1785
+ });
1675
1786
  w.clearCandidateMask();
1676
1787
  }
1677
1788
  }
@@ -1719,16 +1830,28 @@ export class AlbexEngine {
1719
1830
  bloomPassed: w.getStatBloomPassed(),
1720
1831
  bitapMatched: w.getStatBitapMatched(),
1721
1832
  };
1722
- return this._collectResults(count, opts);
1833
+ return this._collectResults(count, opts, phraseTokens);
1723
1834
  }
1724
- /** Materialise results [0..count) into the public SearchResult shape. */
1725
- _collectResults(count, opts) {
1835
+ /** Materialise results [0..count) into the public SearchResult shape.
1836
+ * When `phraseTokens` is given, each result is kept only if those tokens
1837
+ * appear adjacently in the FULL chunk text — independent of any display
1838
+ * windowing — so phrase queries stay correct under `{ windowed: true }`. */
1839
+ _collectResults(count, opts, phraseTokens) {
1726
1840
  const w = this._wasm;
1727
1841
  const windowed = opts.windowed === true;
1728
1842
  const before = opts.before ?? 60;
1729
1843
  const after = opts.after ?? 120;
1844
+ const phraseFilter = phraseTokens && phraseTokens.length > 0 ? phraseTokens : null;
1730
1845
  const results = [];
1731
1846
  for (let i = 0; i < count; i++) {
1847
+ // Phrase adjacency check against the full chunk text (getSnippet), not
1848
+ // the possibly-cropped display window.
1849
+ if (phraseFilter) {
1850
+ const fl = w.getSnippet(i);
1851
+ const full = fl > 0 ? this._readPad(fl) : '';
1852
+ if (!containsPhrase(full, phraseFilter))
1853
+ continue;
1854
+ }
1732
1855
  const score = w.getResultScore(i);
1733
1856
  const location = w.getResultLocation(i);
1734
1857
  const matchStart = w.getResultStart(i);
@@ -1775,14 +1898,18 @@ export class AlbexEngine {
1775
1898
  }
1776
1899
  return results;
1777
1900
  }
1778
- _searchOr(branches, rawQuery, opts) {
1901
+ /** Run all OR branches and merge dedup-by-(doc, location, match). The
1902
+ * branches are already compiled inside the WASM (by prepareQuery); we
1903
+ * iterate them with selectQueryBranch. The "rawQuery" param is kept
1904
+ * only for the lastSearch.query field. */
1905
+ _searchOr(rawQuery, opts) {
1906
+ const w = this._wasm;
1779
1907
  const seen = new Set();
1780
1908
  const all = [];
1781
- for (const tokens of branches) {
1782
- const q = tokensToWasmQuery(tokens);
1783
- if (!q)
1784
- continue;
1785
- const results = this._runSearch(q, rawQuery, opts);
1909
+ const n = w.getQueryBranchCount();
1910
+ for (let i = 0; i < n; i++) {
1911
+ w.selectQueryBranch(i);
1912
+ const results = this._runSearch(rawQuery, opts);
1786
1913
  for (const r of results) {
1787
1914
  const key = `${r.documentName}:${r.location}:${r.matchStart}`;
1788
1915
  if (!seen.has(key)) {
@@ -1791,14 +1918,14 @@ export class AlbexEngine {
1791
1918
  }
1792
1919
  }
1793
1920
  }
1794
- // Re-rank the merged list by score descending.
1795
1921
  all.sort((a, b) => b.score - a.score);
1796
1922
  return all;
1797
1923
  }
1798
- _runSearch(wasmQuery, displayQuery, opts) {
1924
+ /** Execute a single search using whichever query branch is currently
1925
+ * active (set via selectQueryBranch). Returns the materialised
1926
+ * SearchResult[]. Caller is responsible for activating a branch first. */
1927
+ _runSearch(displayQuery, opts, phraseTokens) {
1799
1928
  const w = this._wasm;
1800
- const ql = this._writeStr(wasmQuery);
1801
- w.setPattern(ql);
1802
1929
  const t0 = performance.now();
1803
1930
  const count = w.search();
1804
1931
  const ms = performance.now() - t0;
@@ -1810,59 +1937,7 @@ export class AlbexEngine {
1810
1937
  bloomPassed: w.getStatBloomPassed(),
1811
1938
  bitapMatched: w.getStatBitapMatched(),
1812
1939
  };
1813
- const windowed = opts.windowed === true;
1814
- const before = opts.before ?? 60;
1815
- const after = opts.after ?? 120;
1816
- const results = [];
1817
- for (let i = 0; i < count; i++) {
1818
- const score = w.getResultScore(i);
1819
- const location = w.getResultLocation(i);
1820
- const matchStart = w.getResultStart(i);
1821
- const matchEnd = w.getResultEnd(i);
1822
- const nl = w.getResultDocName(i);
1823
- const name = nl > 0 ? this._readPad(nl) : '?';
1824
- const matchCount = w.getResultMatchCount(i);
1825
- const matches = [];
1826
- for (let k = 0; k < matchCount; k++) {
1827
- matches.push({ start: w.getResultMatchStartAt(i, k), end: w.getResultMatchEndAt(i, k) });
1828
- }
1829
- if (matches.length === 0) {
1830
- matches.push({ start: matchStart, end: matchEnd });
1831
- }
1832
- let snippet;
1833
- let primaryStart = matchStart;
1834
- let primaryEnd = matchEnd;
1835
- let adjustedMatches = matches;
1836
- if (windowed) {
1837
- const sl = w.getSnippetWindow(i, before, after);
1838
- snippet = sl > 0 ? this._readPad(sl) : '';
1839
- const offset = w.getSnippetWindowOffset();
1840
- // Spans came back chunk-relative; shift them into window-relative.
1841
- // Account for leading "... " prefix when present.
1842
- const leadingPrefix = offset > 0 ? 4 : 0;
1843
- const shift = leadingPrefix - offset;
1844
- adjustedMatches = matches.map(m => ({
1845
- start: Math.max(0, m.start + shift),
1846
- end: Math.max(0, m.end + shift),
1847
- }));
1848
- primaryStart = adjustedMatches[0]?.start ?? 0;
1849
- primaryEnd = adjustedMatches[0]?.end ?? 0;
1850
- }
1851
- else {
1852
- const sl = w.getSnippet(i);
1853
- snippet = sl > 0 ? this._readPad(sl) : '';
1854
- }
1855
- results.push({
1856
- documentName: name,
1857
- location,
1858
- score,
1859
- snippet,
1860
- matchStart: primaryStart,
1861
- matchEnd: primaryEnd,
1862
- matches: adjustedMatches,
1863
- });
1864
- }
1865
- return results;
1940
+ return this._collectResults(count, opts, phraseTokens);
1866
1941
  }
1867
1942
  /** Returns current engine statistics. */
1868
1943
  getStats() {
@@ -1914,9 +1989,87 @@ export class AlbexEngine {
1914
1989
  }
1915
1990
  /** Full reset — clears all indexed documents and chunks. */
1916
1991
  reset() {
1992
+ this._assertIdle('reset');
1993
+ this._resetInner();
1994
+ }
1995
+ _resetInner() {
1917
1996
  this._wasm.init();
1918
1997
  this._docs = [];
1919
1998
  this._lastSearch = null;
1999
+ this._diagnostics = [];
2000
+ }
2001
+ /**
2002
+ * Drain and return the diagnostics collected since the last call (or
2003
+ * since the engine was created). Use this to surface recoverable
2004
+ * issues to the caller after `indexFile`, `load`, or any other
2005
+ * operation that may run into a "best-effort" path.
2006
+ *
2007
+ * Example diagnostics:
2008
+ * - `{kind:'fallback', stage:'pdf', message:'pdf-extract crashed,
2009
+ * attempting OCR-only fallback', file:'invoice.pdf'}`
2010
+ * - `{kind:'skipped', stage:'ocr', message:'Tesseract abort on page
2011
+ * 3 image 1; remaining images on this page skipped', file:'...',
2012
+ * page:3}`
2013
+ * - `{kind:'fallback', stage:'gpu', message:'GPU pre-filter failed,
2014
+ * using CPU'}`
2015
+ *
2016
+ * The buffer is cleared on each call; callers should consume the
2017
+ * returned array immediately (e.g. log to their telemetry, surface
2018
+ * a UI banner). After `reset()` the buffer is also cleared.
2019
+ */
2020
+ takeDiagnostics() {
2021
+ const out = this._diagnostics;
2022
+ this._diagnostics = [];
2023
+ return out;
2024
+ }
2025
+ /** Internal: record a diagnostic. Capped at 256 to bound memory. */
2026
+ _diag(entry) {
2027
+ if (this._diagnostics.length >= 256)
2028
+ return;
2029
+ this._diagnostics.push(entry);
2030
+ }
2031
+ /**
2032
+ * Install an OCR adapter. Returns a handle whose `dispose()` removes the
2033
+ * adapter from the engine.
2034
+ *
2035
+ * The contract: the adapter must provide `recognize(image, opts)` that
2036
+ * returns `Promise<OcrAttachedResult>`. The engine validates the
2037
+ * contract at attach time and refuses adapters that don't expose a
2038
+ * recognise function. Only one adapter can be attached at a time; a
2039
+ * second call to `attachOcr` while one is active throws — the caller
2040
+ * must dispose the previous one first.
2041
+ *
2042
+ * @example
2043
+ * ```ts
2044
+ * import { enableOcr } from '@albex/ocr';
2045
+ * const handle = enableOcr(engine); // internally calls attachOcr
2046
+ * // ... later ...
2047
+ * await handle.dispose();
2048
+ * ```
2049
+ *
2050
+ * Direct use without the companion package:
2051
+ * ```ts
2052
+ * const handle = engine.attachOcr({
2053
+ * recognize: async (blob) => myCustomOcr(blob),
2054
+ * options: { alwaysExtractEmbeddedImages: false },
2055
+ * });
2056
+ * ```
2057
+ */
2058
+ attachOcr(adapter) {
2059
+ if (this._ocrAdapter) {
2060
+ throw new AlbexInitError('OCR adapter already attached. Call dispose() on the previous handle before attaching a new one.');
2061
+ }
2062
+ if (typeof adapter?.recognize !== 'function') {
2063
+ throw new AlbexInitError('attachOcr requires an adapter with a recognize(image, opts) function.');
2064
+ }
2065
+ this._ocrAdapter = adapter;
2066
+ return {
2067
+ dispose: async () => {
2068
+ // Idempotent: a double dispose is a no-op rather than a throw.
2069
+ if (this._ocrAdapter === adapter)
2070
+ this._ocrAdapter = null;
2071
+ },
2072
+ };
1920
2073
  }
1921
2074
  // ── Persistence ───────────────────────────────────────────────────────────
1922
2075
  /**
@@ -1927,6 +2080,9 @@ export class AlbexEngine {
1927
2080
  * state in roughly O(total bytes), bypassing re-parsing.
1928
2081
  */
1929
2082
  async save(name) {
2083
+ return this._exclusive(() => this._saveInner(name));
2084
+ }
2085
+ async _saveInner(name) {
1930
2086
  const w = this._wasm;
1931
2087
  const total = w.snapshotSize();
1932
2088
  if (total === 0) {
@@ -1953,6 +2109,9 @@ export class AlbexEngine {
1953
2109
  * header (wrong magic, version, or struct sizes).
1954
2110
  */
1955
2111
  async load(name) {
2112
+ return this._exclusive(() => this._loadInner(name));
2113
+ }
2114
+ async _loadInner(name) {
1956
2115
  const bytes = await loadPersisted(name);
1957
2116
  if (!bytes || bytes.length === 0)
1958
2117
  return false;
@@ -1975,6 +2134,17 @@ export class AlbexEngine {
1975
2134
  return false;
1976
2135
  off += n;
1977
2136
  }
2137
+ // Commit. For v3 this is the atomic apply step (state is untouched
2138
+ // until now); a failure here leaves the previous index intact so the
2139
+ // caller can keep using the engine. For v1/v2 snapshots `restoreCommit`
2140
+ // is a no-op that returns 1 (those formats applied in-place during
2141
+ // restoreFeed and have no rollback to offer). Older binaries that
2142
+ // predate v3 do not export `restoreCommit` — in that case we treat
2143
+ // the load as already committed by feature-detect.
2144
+ if (typeof w.restoreCommit === 'function') {
2145
+ if (w.restoreCommit() !== 1)
2146
+ return false;
2147
+ }
1978
2148
  // Rebuild _docs metadata from the restored WASM tables.
1979
2149
  //
1980
2150
  // What's available after a restore:
@@ -2035,10 +2205,12 @@ export class AlbexEngine {
2035
2205
  * empty. Returns whether a load actually happened.
2036
2206
  */
2037
2207
  async loadOrInit(name) {
2038
- const loaded = await this.load(name);
2039
- if (!loaded)
2040
- this.reset();
2041
- return loaded;
2208
+ return this._exclusive(async () => {
2209
+ const loaded = await this._loadInner(name);
2210
+ if (!loaded)
2211
+ this._resetInner();
2212
+ return loaded;
2213
+ });
2042
2214
  }
2043
2215
  /** Delete a previously persisted snapshot. */
2044
2216
  async deleteSnapshot(name) {
@@ -2060,7 +2232,8 @@ export class AlbexEngine {
2060
2232
  * WASM instance and its (typically 20 MB) backing memory.
2061
2233
  */
2062
2234
  [Symbol.dispose]() {
2063
- this.reset();
2235
+ // Terminal: bypass the idle guard — disposing mid-operation is allowed.
2236
+ this._resetInner();
2064
2237
  this._unsubscribeResources?.();
2065
2238
  this._unsubscribeResources = null;
2066
2239
  this._gpu?.destroy();