npm - albex - Versions diffs - 0.3.0 → 0.6.1 - Mend

albex 0.3.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/CHANGELOG.md +466 -0
package/README.md +32 -19
package/dist/albex-worker.d.ts +65 -2
package/dist/albex-worker.d.ts.map +1 -1
package/dist/albex-worker.js +97 -20
package/dist/albex-worker.js.map +1 -1
package/dist/albex.d.ts +359 -55
package/dist/albex.d.ts.map +1 -1
package/dist/albex.js +766 -312
package/dist/albex.js.map +1 -1
package/dist/errors.d.ts +47 -2
package/dist/errors.d.ts.map +1 -1
package/dist/errors.js +41 -3
package/dist/errors.js.map +1 -1
package/dist/persistence.js +1 -1
package/dist/pool/coordinator.d.ts +14 -6
package/dist/pool/coordinator.d.ts.map +1 -1
package/dist/pool/coordinator.js +65 -28
package/dist/pool/coordinator.js.map +1 -1
package/dist/profile.d.ts +11 -6
package/dist/profile.d.ts.map +1 -1
package/dist/profile.js +6 -13
package/dist/profile.js.map +1 -1
package/dist/resource-manager.js +1 -1
package/dist/tiered-store.js +1 -1
package/dist/wasm-bindings.d.ts +96 -6
package/dist/wasm-bindings.d.ts.map +1 -1
package/dist/wasm-bindings.js +110 -7
package/dist/wasm-bindings.js.map +1 -1
package/dist/worker-protocol.d.ts +23 -2
package/dist/worker-protocol.d.ts.map +1 -1
package/dist/worker-protocol.js +1 -1
package/dist/worker-runtime.js +27 -3
package/dist/worker-runtime.js.map +1 -1
package/package.json +13 -9
package/src/albex-worker.ts +103 -18
package/src/albex.ts +2937 -2292
package/src/errors.ts +63 -2
package/src/pool/coordinator.ts +61 -34
package/src/profile.ts +11 -10
package/src/wasm-bindings.ts +225 -10
package/src/worker-protocol.ts +12 -2
package/src/worker-runtime.ts +28 -3
package/wasm/pkg/albex_pdf.wasm +0 -0
package/wasm/pkg/albex_wasm.wasm +0 -0
package/wasm/pkg/albex_wasm_bg.wasm +0 -0
package/wasm/pkg/albex_wasm_simd.wasm +0 -0
package/wasm/pkg/albex_wasm_mini.wasm +0 -0
package/wasm/pkg/albex_wasm_mini_simd.wasm +0 -0
package/wasm/pkg/albex_wasm_pro.wasm +0 -0
package/wasm/pkg/albex_wasm_pro_simd.wasm +0 -0
package/wasm/pkg/albex_wasm_std.wasm +0 -0
package/wasm/pkg/albex_wasm_std_simd.wasm +0 -0

package/dist/albex.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /*!
- * albex v0.3.0
+ * albex v0.6.1
  * Zero-config local full-text search for documents — runs entirely in the browser, no server, no upload.
  * (c) 2026 RafaCalRob
  * @license MIT
@@ -21,9 +21,9 @@
  * ```
  */
 import { asAlbexExports, asAlbexPdfExports, } from './wasm-bindings.js';
-import { AlbexInitError, AlbexUnsupportedFormatError, AlbexParseError, AlbexCapacityError, } from './errors.js';
+import { AlbexError, AlbexInitError, AlbexUnsupportedFormatError, AlbexParseError, AlbexCapacityError, assertFileSizeWithinLimit, } from './errors.js';
 import { savePersisted, loadPersisted, deletePersisted, listPersisted, } from './persistence.js';
-import { detectProfile, pickTier, shouldUseGpu } from './profile.js';
+import { detectProfile, shouldUseGpu } from './profile.js';
 import { getResourceManager } from './resource-manager.js';
 import { BloomGpu, packBloomsFromChunks } from './gpu/bloom-runtime.js';
 export { AlbexError, AlbexInitError, AlbexUnsupportedFormatError, AlbexParseError, AlbexCapacityError, } from './errors.js';
@@ -48,41 +48,69 @@ function warnSearchStreamDeprecated() {
         'scheduler between slices and returns a batch. The alias will be ' +
         'removed in 0.4.0.');
 }
-function tokenize(q) {
-    return q.trim().split(/\s+/).filter(t => t.length > 0);
-}
-function parseQuery(q) {
-    const trimmed = q.trim();
-    // OR: "term1 | term2" or "phrase one | phrase two"
-    if (trimmed.includes('|')) {
-        const branches = trimmed.split('|')
-            .map(p => tokenize(p.replace(/"/g, '')))
-            .filter(b => b.length > 0);
-        return { kind: 'or', branches };
-    }
-    // Phrase: "exact phrase here"
-    const phraseMatch = /^"(.+)"$/.exec(trimmed);
-    if (phraseMatch) {
-        const inner = phraseMatch[1] ?? '';
-        const tokens = tokenize(inner);
-        return { kind: 'phrase', tokens, raw: inner };
-    }
-    return { kind: 'simple', tokens: tokenize(trimmed) };
-}
+/** The std preset = the historical compile-time defaults. */
+const CAPACITY_STD = {
+    maxDocs: 128,
+    maxChunks: 100_000,
+    textPoolBytes: 16 * 1024 * 1024,
+    namePoolBytes: 32 * 1024,
+};
+/** The large preset = the old compile-time "pro" tier. */
+const CAPACITY_LARGE = {
+    maxDocs: 1024,
+    maxChunks: 800_000,
+    textPoolBytes: 128 * 1024 * 1024,
+    namePoolBytes: 256 * 1024,
+};
 /**
- * Reconstruct a WASM-compatible query string from parsed tokens.
- * The WASM engine accepts up to 4 space-separated tokens (AND semantics).
+ * Resolve a user-facing capacity option into full numbers. Partial custom
+ * configs are completed from the std defaults scaled to keep std's ratios:
+ * `maxChunks` follows `maxDocs` (×782), `textPoolBytes` follows `maxChunks`
+ * (×168 B), `namePoolBytes` follows `maxDocs` (×256 B) — each with a floor
+ * so tiny configs stay usable. `maxChunks` is clamped to at least `maxDocs`
+ * (every document needs at least one chunk).
  */
-function tokensToWasmQuery(tokens) {
-    return tokens.slice(0, 4).join(' ');
+function resolveCapacity(capacity) {
+    if (capacity === undefined || capacity === 'std')
+        return { ...CAPACITY_STD };
+    if (capacity === 'large')
+        return { ...CAPACITY_LARGE };
+    const maxDocs = Math.floor(capacity.maxDocs ?? CAPACITY_STD.maxDocs);
+    const maxChunks = Math.max(Math.floor(capacity.maxChunks ?? Math.max(maxDocs * 782, 1024)), maxDocs);
+    const textPoolBytes = Math.floor(capacity.textPoolBytes ?? Math.max(maxChunks * 168, 64 * 1024));
+    const namePoolBytes = Math.floor(capacity.namePoolBytes ?? Math.max(maxDocs * 256, 4 * 1024));
+    return { maxDocs, maxChunks, textPoolBytes, namePoolBytes };
 }
 // ─────────────────────────────────────────────────────────────────────────────
-// Phrase post-filter
+// Query parsing (WASM-side as of 0.5.0)
 // ─────────────────────────────────────────────────────────────────────────────
+//
+// Pre-0.5.0 this file owned parseQuery + tokenize. That created two
+// truths about what a "token" was: one in TS for the query, one in Rust
+// for the indexed text. The audit flagged this as the biggest divergence
+// in the wrapper.
+//
+// 0.5.0 moves parseQuery/tokenize/tokensToWasmQuery to Rust. The TS
+// dispatcher reduces to:
+//
+//   1. Write the raw UTF-8 query bytes to the scratchpad.
+//   2. Call prepareQuery(len). Get back the kind (simple/phrase/or).
+//   3. For OR: iterate getQueryBranchCount() branches, calling
+//      selectQueryBranch(i) + search() for each, then merge in TS.
+//      For simple/phrase: selectQueryBranch(0) + search().
+//   4. For phrase: post-filter the snippets with containsPhrase().
+//
+// containsPhrase stays in TS because it operates on snippet text already
+// produced by the WASM, not on the query. It is not a tokenizer.
 /**
- * Returns true if `snippet` contains the phrase formed by `tokens` in order,
- * with at most `maxGap` characters between consecutive tokens.
- * Comparison is case- and accent-insensitive.
+ * Phrase post-filter. Returns true if `snippet` contains the phrase
+ * formed by `tokens` in order, with at most `maxGap` characters between
+ * consecutive tokens. Comparison is case- and accent-insensitive.
+ *
+ * The tokens come from the WASM-compiled pattern of a phrase branch,
+ * not from a TS re-tokenization of the query, so there is no
+ * tokenization divergence: WASM said "these are the tokens", we just
+ * check adjacency in the snippet.
  */
 function containsPhrase(snippet, tokens, maxGap = 30) {
     const norm = (s) => s.toLowerCase().normalize('NFKD').replace(/[̀-ͯ]/g, '');
@@ -201,62 +229,57 @@ const FEED_SIZE = 32_768; // 32 KB — fits in 64 KB scratchpad
  * The result is stable across runs and engines, so it can be persisted in
  * snapshots without versioning concerns.
  */
+// NOTE: the TS `computePatternBloom` that used to live here (the THIRD copy
+// of the accent fold, after the Rust index side and the Rust query side) was
+// removed in 0.8.0. The GPU pre-filter now reads the pattern Bloom straight
+// from WASM via `getPatternBloomLo/Hi` (ABI 6) — `setPattern` computes it
+// through the exact pipeline `searchBegin` uses, including Spanish stemming,
+// which the TS copy never applied (audit 2.4).
 /**
- * Compute the same 64-bit Bloom value the Rust side computes for a query.
+ * Convert a UTF-8 byte offset into `bytes` to the equivalent UTF-16
+ * code-unit index of the decoded string. Walks lead bytes only — O(offset)
+ * with no allocation — counting 1 unit per BMP code point and 2 per 4-byte
+ * (astral, e.g. emoji) sequence. Stray continuation bytes (malformed input)
+ * count 1 unit each, matching TextDecoder's per-byte U+FFFD replacement.
  *
- * Must stay in sync with `BloomFilter::from_text` and `fold_utf8_char` in
- * `core/src/bloom.rs`. The hashing is `c & 0x3F` over each accent-folded
- * lowercase ASCII byte; non-letters are skipped. The aggregate of all token
- * blooms is what the GPU pre-filter checks against.
+ * Offsets that land mid-sequence are attributed to the code point they fall
+ * inside (the engine only emits code-point-aligned offsets, so this is a
+ * defensive clamp, not an expected path).
  */
-function computePatternBloom(query) {
-    // Quick-and-faithful fold: lowercase, NFKD, strip combining marks. This
-    // matches the Rust Latin-1/Latin-A fold for the characters we care about
-    // (the rest fall through as non-letters which contribute nothing).
-    const norm = query.toLowerCase().normalize('NFKD').replace(/[̀-ͯ]/g, '');
-    let bits = 0n;
-    for (let i = 0; i < norm.length; i++) {
-        const code = norm.charCodeAt(i);
-        if ((code >= 0x61 && code <= 0x7a) || (code >= 0x30 && code <= 0x39)) {
-            bits |= 1n << BigInt(code & 0x3f);
-        }
-        else if (code === 0x20) {
-            // skip token separator
-        }
-        else if (code < 0x80) {
-            // other ASCII punctuation — they bias the filter; mirror Rust which
-            // also includes them via the 6-bit mask.
-            bits |= 1n << BigInt(code & 0x3f);
-        }
-    }
-    return bits;
-}
-function contentHash(bytes) {
-    // 64-bit arithmetic via two 32-bit halves (no BigInt to keep it fast in
-    // engines without optimised BigInt support).
-    let hi = 0xcbf29ce4 | 0;
-    let lo = 0x84222325 | 0;
-    // FNV prime: 0x100000001b3 = (0x100 << 32) | 0x000001b3
-    for (let i = 0; i < bytes.length; i++) {
-        lo ^= bytes[i];
-        // multiply by FNV prime
-        // (hi:lo) *= 0x100000001b3
-        // low * prime
-        const lo_lo = (lo & 0xffff) * 0x1b3;
-        const lo_hi = (lo >>> 16) * 0x1b3;
-        let new_lo = (lo_lo + ((lo_hi & 0xffff) << 16)) | 0;
-        let carry = (lo_hi >>> 16) + ((lo_lo + ((lo_hi & 0xffff) << 16)) > 0xffffffff ? 1 : 0);
-        // hi*prime + carry
-        const hi_lo = (hi & 0xffff) * 0x1b3;
-        const hi_hi = (hi >>> 16) * 0x1b3;
-        const new_hi = ((hi_lo + ((hi_hi & 0xffff) << 16)) | 0) + carry + lo; // + lo because high 33rd bit
-        lo = new_lo;
-        hi = new_hi | 0;
-    }
-    const hexHi = (hi >>> 0).toString(16).padStart(8, '0');
-    const hexLo = (lo >>> 0).toString(16).padStart(8, '0');
-    return hexHi + hexLo;
+function utf16IndexAtByte(bytes, byteOffset) {
+    const end = Math.min(byteOffset, bytes.length);
+    let units = 0;
+    let i = 0;
+    while (i < end) {
+        const b = bytes[i];
+        if (b < 0x80) {
+            i += 1;
+            units += 1;
+        } // ASCII
+        else if (b < 0xc0) {
+            i += 1;
+            units += 1;
+        } // stray continuation → U+FFFD
+        else if (b < 0xe0) {
+            i += 2;
+            units += 1;
+        } // 2-byte (é, ñ, …)
+        else if (b < 0xf0) {
+            i += 3;
+            units += 1;
+        } // 3-byte (…, €, CJK)
+        else {
+            i += 4;
+            units += 2;
+        } // 4-byte → surrogate pair
+    }
+    return units;
 }
+// Note: `contentHash` is implemented as a method on AlbexEngine below
+// (it needs access to the WASM scratchpad). The standalone TS reference
+// implementation that used to live here was removed in 0.4.0 — the
+// canonical hash now lives in wasm/src/lib.rs::hashBytes so there is
+// exactly one definition of "the content hash of these bytes".
 /**
  * 16-hex-char content hash → 8 raw bytes for setDocumentContentHash. The
  * byte order matches the snapshot format: the high 32 bits sit at offsets
@@ -450,11 +473,18 @@ function makePdfWasmImports(module, getPdfMem) {
             case '__wbindgen_externref_table_set_null':
                 return (idx) => { heap[idx] = undefined; };
         }
-        // Unknown import — return a stub that warns when called. Loading still
-        // succeeds; only an actually-invoked unknown import will surface.
-        return (...args) => {
-            console.warn(`[albex] unhandled PDF WASM import ${modName}.${name}`, args);
-        };
+        // Unknown import — fail fast. An import we don't recognise means the
+        // wasm-bindgen / lopdf / getrandom dependency graph has drifted from
+        // the prefixes this loader is written to satisfy. Accepting the
+        // module would defer the failure to an arbitrary execution path,
+        // typically deep inside extractPdf(), where the user gets either a
+        // hang or a misleading "PDF parse error". Refusing instantiation
+        // surfaces the version skew at boot, where the maintainer can act
+        // on it.
+        throw new AlbexInitError(`Unknown PDF WASM import "${modName}.${name}". ` +
+            `The albex_pdf.wasm binary was probably built with a newer Rust ` +
+            `toolchain or dependency graph than this loader was written for. ` +
+            `Rebuild with 'npm run build:pdf-wasm' or open an issue.`);
     };
     const imports = {};
     for (const { module: modName, name } of required) {
@@ -474,39 +504,105 @@ export class AlbexEngine {
      * runtime dependency on OCR — this is a structural slot that the optional
      * companion package fills.
      */
-    ocrImage;
     /**
-     * Optional OCR-side configuration set by `@albex/ocr::enableOcr`. Read
-     * by the engine to decide whether to invoke OCR on top of the text it
-     * already extracted from a PDF (hybrid PDFs: native text + images that
-     * also contain text, like stamps, scanned annexes, or diagrams with
-     * labels).
-     *
-     * When `alwaysExtractEmbeddedImages` is true, every page of every PDF
-     * passes through `extractPageImages` after the normal text extraction;
-     * any image that meets the size filter (200×200 in Rust) is fed to
-     * `ocrImage`. Performance cost: 1–3 s per qualifying image.
-     *
-     * Off by default — set this opt-in via the OCR module's options.
+     * Public OCR entry point. Forwards to the attached OCR adapter installed
+     * via `attachOcr()`. Reading this property is a feature-detect for
+     * integrators: `if (engine.ocrImage) { ... OCR available ... }`. Writing
+     * to it directly is no longer supported in 0.5.0+ — use `attachOcr`.
      */
-    ocrConfig;
+    get ocrImage() {
+        return this._ocrAdapter?.recognize;
+    }
+    /** Private adapter slot. Holds the OCR plugin contract installed by
+     * `attachOcr()`. The engine reads `recognize` and `options` here; the
+     * caller never gets a reference to this object directly. */
+    _ocrAdapter = null;
     // ── PDF WASM (lazy) ──
     _pdfWasm = null;
     _pdfMem = null;
     _docs = [];
     _lastSearch = null;
-    _tier = null;
+    /** Raw truncation bitflags from the most recent prepareQuery (ABI 5):
+     * 1 = branches dropped, 2 = tokens dropped/clipped, 4 = query bytes cut.
+     * Captured right after prepareQuery so every _lastSearch built for that
+     * query (including per-branch OR runs) reports the same flags. */
+    _lastTruncFlags = 0;
+    /** Structured diagnostics collected during the most recent operation.
+     * Drained by `takeDiagnostics()`. Capped at 256 entries to avoid
+     * unbounded memory growth in pathological cases (very corrupted
+     * corpora producing thousands of recovery warnings). */
+    _diagnostics = [];
+    /** Resolved runtime capacity (set in init(); reused by reset()). */
+    _capacity = { ...CAPACITY_STD };
     _simd = false;
     _profile = null;
     _resources = null;
     _gpu = null;
-    _gpuChunkCountUploaded = 0;
+    /** True when the GPU-resident Bloom array no longer mirrors the WASM
+     * chunk array. Set by EVERY index mutation (indexFile, removeDocument,
+     * compact, reset, load) and cleared after a successful upload. A plain
+     * chunk-count comparison is NOT enough: compact() can reorder blooms
+     * while keeping the count identical, which would silently filter the
+     * wrong chunks (audit 1.5). */
+    _gpuUploadDirty = true;
     _unsubscribeResources = null;
     _opts;
-    constructor(opts) {
+    // ── Concurrency guard ──────────────────────────────────────────────────────
+    // One WASM instance, global mutable state, async ops that yield to the
+    // scheduler between slices. Two overlapping operations corrupt each other
+    // (e.g. a fresh searchBegin resets the cursor of an in-flight cooperative
+    // search). Async ops serialize through `_opChain`; sync mutators/searches
+    // assert the engine is idle (audit 0.6.0, finding #2).
+    _opChain = Promise.resolve();
+    _busy = false;
+    constructor(opts = {}) {
         this._opts = opts;
     }
-    /** Load and initialise the main WASM module. Must be called before any other method. */
+    /** Serialize an async engine operation behind any in-flight one. */
+    _exclusive(fn) {
+        const run = this._opChain.then(async () => {
+            this._busy = true;
+            try {
+                return await fn();
+            }
+            finally {
+                this._busy = false;
+            }
+        });
+        // Swallow result/error on the chain so one failure can't wedge the queue.
+        this._opChain = run.then(() => undefined, () => undefined);
+        return run;
+    }
+    /** Guard a synchronous mutator/search: refuse to run mid-async-operation
+     * rather than silently corrupt the shared WASM state. */
+    _assertIdle(method) {
+        if (this._busy) {
+            throw new AlbexError('busy', `${method}() was called while an async engine operation is still ` +
+                `running. Await the previous indexFile/save/load/replaceDocument/` +
+                `searchCooperative call, or use searchCooperative instead of search().`);
+        }
+    }
+    /** Compact opportunistically when tombstones pile up under text pressure,
+     * so repeated removeDocument/replaceDocument don't exhaust the pool. */
+    _autoCompactIfNeeded() {
+        const w = this._wasm;
+        const cap = w.getTextCapacity();
+        const hasTombstones = w.getDocCount() > this._docs.length;
+        if (hasTombstones && cap > 0 && w.getTextUsed() / cap > 0.85) {
+            w.compact();
+            this._gpuUploadDirty = true;
+        }
+    }
+    /**
+     * Load and initialise the main WASM module. Must be called before any
+     * other method.
+     *
+     * Resolves `opts.capacity` ('std' default · 'large' · explicit object)
+     * and sizes the WASM pools accordingly via `initWithCapacity` (ABI 7).
+     * Memory cost ≈ `maxChunks × 64 B + textPoolBytes + namePoolBytes` —
+     * ~22 MB for 'std', ~180 MB for 'large'. Throws `AlbexInitError` if the
+     * requested capacity is out of range or the allocation fails.
+     */
     async init() {
         const url = await this._resolveWasmUrl();
         const res = await fetch(url);
@@ -515,7 +611,15 @@ export class AlbexEngine {
         const { instance } = await WebAssembly.instantiateStreaming(res, {});
         this._wasm = asAlbexExports(instance.exports);
         this._mem = this._wasm.memory;
-        this._wasm.init();
+        this._capacity = resolveCapacity(this._opts.capacity);
+        const c = this._capacity;
+        if (this._wasm.initWithCapacity(c.maxDocs, c.maxChunks, c.textPoolBytes, c.namePoolBytes) !== 1) {
+            throw new AlbexInitError(`initWithCapacity(${c.maxDocs} docs, ${c.maxChunks} chunks, ` +
+                `${c.textPoolBytes} text bytes, ${c.namePoolBytes} name bytes) failed — ` +
+                `parameters out of range (docs 1-65536, chunks ≥ docs and ≤ 4194304, ` +
+                `text 4 KiB-1 GiB, names 256 B-16 MiB) or the WASM memory allocation ` +
+                `was refused by the host.`);
+        }
         // Subscribe to environmental signals. Cheap and benign in node tests
         // (the manager tolerates missing globals).
         const rm = getResourceManager();
@@ -531,22 +635,17 @@ export class AlbexEngine {
     }
     /**
      * Decide which `.wasm` binary to fetch. Order of precedence:
-     *   1. `opts.wasmUrl` if provided — used verbatim.
-     *   2. `opts.tier` if explicit — joined with `wasmBaseUrl`.
-     *   3. `opts.wasmBaseUrl` + tier picked from the device profile.
-     *
-     * Order of precedence:
      *   1. `opts.wasmUrl` literal               → use verbatim
-     *   2. `opts.wasmBaseUrl` + tier suffix     → fetched from that directory
+     *   2. `opts.wasmBaseUrl` + simd suffix     → fetched from that directory
      *   3. zero-config default                  → `albex_wasm_bg.wasm` packaged
      *                                             next to this file, resolved
      *                                             via `import.meta.url`
      *
-     * The zero-config default loads the std-baseline binary. Tier auto-detection
-     * is only active when `wasmBaseUrl` is given, because picking a tier in
-     * runtime would defeat any bundler's static asset rewriting. Users who want
-     * tier optimisation must serve the six variants themselves and pass the
-     * directory through `wasmBaseUrl`.
+     * There are exactly two main binaries (baseline + SIMD); capacity is a
+     * RUNTIME parameter since ABI 7, so it never affects which file is
+     * fetched. SIMD auto-detection is only active when `wasmBaseUrl` is
+     * given, because picking a URL at runtime would defeat any bundler's
+     * static asset rewriting.
      */
     async _resolveWasmUrl() {
         const o = this._opts;
@@ -562,31 +661,26 @@ export class AlbexEngine {
         // as an asset reference. They copy the .wasm to the output directory and
         // rewrite the URL automatically. Consumers who use one of those bundlers
         // get a working `new AlbexEngine()` with no manual setup.
-        if (!o.wasmBaseUrl) {
-            // We can't tier-select with one URL, so fall back to std baseline.
-            // The integrator who wants tier optimisation must opt in via wasmBaseUrl.
-            this._tier = 'std';
-            this._simd = false;
-            return new URL('../wasm/pkg/albex_wasm_bg.wasm', import.meta.url).href;
-        }
-        let tier;
-        if (o.tier && o.tier !== 'auto')
-            tier = o.tier;
-        else
-            tier = pickTier(profile);
-        this._tier = tier;
+        // 0.5.0+: two main binaries only — baseline and SIMD (the tier system
+        // is gone; capacity became a runtime parameter in ABI 7). Selection
+        // collapses to a single boolean: SIMD on or off, decided either by the
+        // explicit `simd` option or by a runtime probe.
         const simd = o.simd === 'on'
             ? true
             : o.simd === 'off'
                 ? false
                 : !!profile?.wasm.simd;
         this._simd = simd;
-        const suffix = simd ? `${tier}_simd` : tier;
+        if (!o.wasmBaseUrl) {
+            // Zero-config: bundler resolves the .wasm next to dist/. We only
+            // ship the baseline alias (albex_wasm_bg.wasm) inside the npm
+            // package; integrators who want SIMD must serve both binaries
+            // themselves via `wasmBaseUrl`.
+            return new URL('../wasm/pkg/albex_wasm_bg.wasm', import.meta.url).href;
+        }
         const base = o.wasmBaseUrl.replace(/\/+$/, '');
-        return `${base}/albex_wasm_${suffix}.wasm`;
+        return simd ? `${base}/albex_wasm_simd.wasm` : `${base}/albex_wasm.wasm`;
     }
-    /** The tier that was actually loaded. `null` until `init()` resolves. */
-    get tier() { return this._tier; }
     /** True if the SIMD-accelerated binary was loaded. */
     get simdEnabled() { return this._simd; }
     /** True if a WebGPU device is acquired and the next search will use it. */
@@ -622,8 +716,14 @@ export class AlbexEngine {
      * No-op if the GPU device hasn't been acquired yet — first call attempts
      * `init()` lazily; if that fails, the candidate path is permanently
      * disabled for this engine instance.
+     *
+     * IMPORTANT: this method CLOBBERS the scratchpad (the candidate bitset
+     * is pushed through it via `setCandidateMask`). Any pattern previously
+     * staged by `selectQueryBranch` is destroyed — the caller MUST re-select
+     * the active branch before calling `searchBegin`, which snapshots the
+     * pattern from the scratchpad (audit 1.2).
      */
-    async _gpuPreFilter(wasmQuery) {
+    async _gpuPreFilter() {
         const gpu = this._gpu;
         if (!gpu)
             return;
@@ -637,20 +737,26 @@ export class AlbexEngine {
         const chunkCount = this._wasm.getChunkCount();
         if (chunkCount === 0)
             return;
-        // Upload blooms if the corpus changed. We re-upload everything on any
-        // delta; incremental delta-upload is a future optimisation.
-        if (chunkCount !== this._gpuChunkCountUploaded) {
+        // Upload blooms if the corpus changed since the last upload. The
+        // signal is a dirty flag set by every index mutation — not a chunk
+        // count comparison, because compact() can reorder blooms while
+        // keeping the count identical (audit 1.5). We re-upload everything
+        // on any delta; incremental delta-upload is a future optimisation.
+        if (this._gpuUploadDirty) {
             const ptr = this._wasm.getChunksPtr();
             const stride = this._wasm.getChunkStructSize();
             const bytes = new Uint8Array(this._mem.buffer, ptr, chunkCount * stride);
             const blooms = packBloomsFromChunks(bytes, chunkCount);
             gpu.uploadChunkBlooms(blooms, chunkCount);
-            this._gpuChunkCountUploaded = chunkCount;
-        }
-        // Build the pattern Bloom on the JS side: same hash as Rust
-        // (`c & 0x3F` after accent-folding), aggregated across all tokens.
-        const patternBloom = computePatternBloom(wasmQuery);
-        const passes = await gpu.scan(Number(patternBloom & 0xffffffffn), Number((patternBloom >> 32n) & 0xffffffffn));
+            this._gpuUploadDirty = false;
+        }
+        // Pattern Bloom comes straight from WASM (ABI 6): `selectQueryBranch`
+        // → `setPattern` computed it through the same pipeline `searchBegin`
+        // uses — split, optional Spanish stemming, accent fold, `c & 0x3F`.
+        // The retired TS copy of the fold never stemmed, so with `setLanguage
+        // ('es')` it could set bits for suffixes the CPU pattern no longer
+        // had → over-restrictive mask → silent false negatives (audit 2.4).
+        const passes = await gpu.scan(this._wasm.getPatternBloomLo(), this._wasm.getPatternBloomHi());
         // Push the bitset back into WASM via the scratchpad.
         const passBytes = new Uint8Array(passes.buffer, passes.byteOffset, passes.byteLength);
         this._writePad(passBytes);
@@ -676,6 +782,16 @@ export class AlbexEngine {
         const ptr = this._wasm.getBuffer(0);
         return _dec.decode(this._u8(ptr, n));
     }
+    /** Copy `n` scratchpad bytes out of WASM memory. The copy is private to
+     * JS, so it survives later WASM calls (and memory growth) — used when the
+     * caller needs both the raw bytes (UTF-16 span mapping) and the decoded
+     * string of the same payload. */
+    _readPadBytes(n) {
+        const ptr = this._wasm.getBuffer(0);
+        const out = new Uint8Array(n);
+        out.set(this._u8(ptr, n));
+        return out;
+    }
     _feedText(text) {
         const b = _enc.encode(text);
         for (let i = 0; i < b.length; i += FEED_SIZE) {
@@ -684,6 +800,34 @@ export class AlbexEngine {
             this._wasm.feedText(c.length);
         }
     }
+    /**
+     * Compute the FNV-1a 64-bit content hash of `bytes` via the WASM
+     * streaming API. Returns a 16-character hex string identical in shape
+     * to what the TS implementation in 0.3.x returned, so all callers
+     * stay unchanged. Single source of truth — same hash whether we use
+     * it for indexFile dedup, for snapshot v2 persistence, or anywhere
+     * else. Large inputs are chunked at FEED_SIZE just like _feedText.
+     */
+    _contentHash(bytes) {
+        const w = this._wasm;
+        w.hashBegin();
+        for (let i = 0; i < bytes.length; i += FEED_SIZE) {
+            const c = bytes.subarray(i, i + FEED_SIZE);
+            this._writePad(c);
+            w.hashFeed(c.length);
+        }
+        w.hashFinish();
+        // Read 8 result bytes back from scratchpad[0..8].
+        const ptr = w.getBuffer(8);
+        const out = this._u8(ptr, 8);
+        // Big-endian to hex. Same layout as the old hexHi + hexLo output:
+        // high u32 first (4 bytes), low u32 second (4 bytes).
+        let s = '';
+        for (let i = 0; i < 8; i++) {
+            s += out[i].toString(16).padStart(2, '0');
+        }
+        return s;
+    }
     _feedXmlBytes(xml, fn) {
         const feeder = this._wasm[fn];
         for (let i = 0; i < xml.length; i += FEED_SIZE) {
@@ -706,7 +850,10 @@ export class AlbexEngine {
         // called when the user actually drops a PDF — but we issue a console
         // hint so embedders can surface a "this will download ~1 MB" prompt.
         if (this._resources?.constrainedNetwork) {
-            console.info('[albex] downloading PDF WASM (~1 MB) on a constrained network connection');
+            this._diag({
+                kind: 'info', stage: 'network',
+                message: 'Downloading PDF WASM (~1 MB) on a constrained network connection',
+            });
         }
         const res = await fetch(pdfUrl);
         if (!res.ok)
@@ -831,20 +978,14 @@ export class AlbexEngine {
             this._feedText(text);
             this._wasm.flushParagraph();
         }
-        // Hybrid OCR pass: when the OCR module is wired with
-        // `alwaysExtractEmbeddedImages: true`, also walk every page for
-        // embedded images and OCR them on top of the vector text.
-        //
-        // We always log the decision so users debugging "why isn't OCR
-        // firing on my hybrid PDF" can see which precondition failed.
-        const hybridOn = !!this.ocrConfig?.alwaysExtractEmbeddedImages;
-        const hasOcr = !!this.ocrImage;
-        const binSupportsImages = typeof pw.extractPageImages === 'function'
-            && typeof pw.getPageCount === 'function';
-        console.log(`[albex] hybrid OCR decision: ocrImage=${hasOcr} ocrConfig.alwaysExtractEmbeddedImages=${hybridOn} binarySupportsImages=${binSupportsImages}`);
-        if (hasOcr && hybridOn && binSupportsImages) {
+        // Hybrid OCR pass: when the OCR adapter is wired with
+        // `options.alwaysExtractEmbeddedImages: true`, also walk every page
+        // for embedded images and OCR them on top of the vector text.
+        if (this._ocrAdapter
+            && this._ocrAdapter.options?.alwaysExtractEmbeddedImages
+            && typeof pw.extractPageImages === 'function'
+            && typeof pw.getPageCount === 'function') {
             const totalPages = pw.getPageCount();
-            console.log(`[albex] hybrid OCR pass starting over ${totalPages} page(s)`);
             for (let p = 0; p < totalPages; p++) {
                 const ocrText = await this._ocrPageEmbeddedImages(pw, p);
                 if (ocrText === null)
@@ -930,7 +1071,10 @@ export class AlbexEngine {
             // so `_ensurePdfWasm` re-instantiates on the next call.
             this._pdfWasm = null;
             this._pdfMem = null;
-            console.warn(`[albex] PDF image extractor trapped on page ${page + 1}: ${e instanceof Error ? e.message : String(e)}. Stopping OCR.`);
+            this._diag({
+                kind: 'skipped', stage: 'pdf', page: page + 1,
+                message: `PDF image extractor trapped: ${e instanceof Error ? e.message : String(e)}. Remaining pages skipped.`,
+            });
             return null;
         }
         if (imageCount <= 0)
@@ -954,15 +1098,6 @@ export class AlbexEngine {
             const copy = new Uint8Array(len);
             copy.set(new Uint8Array(liveMem.buffer, ptr, len));
             const blob = new Blob([copy.buffer], { type: mime });
-            // Defensive diagnostics: when an OCR call goes wrong (Tesseract
-            // worker abort, malformed JPEG, etc.) the first thing we want to
-            // see is whether we even handed it valid image bytes. A real JPEG
-            // starts with FF D8 FF (E0 for JFIF, E1 for EXIF). A JPEG2000
-            // starts with 00 00 00 0C 6A 50 20 20.
-            const magic = Array.from(copy.subarray(0, 4))
-                .map(b => b.toString(16).padStart(2, '0'))
-                .join(' ');
-            console.log(`[albex] OCR page ${page + 1} image ${i + 1}/${imageCount}: kind=${kind} (${mime}) len=${len} bytes magic=${magic}`);
             try {
                 const { text } = await ocr(blob);
                 const trimmed = text?.trim();
@@ -977,7 +1112,10 @@ export class AlbexEngine {
                 // "Aborted(-1)") are also caught here; if they bypass the
                 // promise rejection and surface as `uncaught` instead, the
                 // demo's window.onerror handler will keep the app alive.
-                console.warn(`[albex] OCR failed on page ${page + 1} image ${i + 1}: ${e instanceof Error ? e.message : String(e)}`);
+                this._diag({
+                    kind: 'skipped', stage: 'ocr', page: page + 1,
+                    message: `OCR failed on image ${i + 1}: ${e instanceof Error ? e.message : String(e)}`,
+                });
             }
         }
         return pageText;
@@ -1018,7 +1156,10 @@ export class AlbexEngine {
             new Uint8Array(pw.memory.buffer, inPtr, bytes.length).set(bytes);
         }
         catch (e) {
-            console.warn(`[albex] PDF re-load after extractor crash failed: ${e instanceof Error ? e.message : String(e)}`);
+            this._diag({
+                kind: 'skipped', stage: 'pdf',
+                message: `PDF re-load after extractor crash failed: ${e instanceof Error ? e.message : String(e)}`,
+            });
             return null;
         }
         // Set up the doc and let _indexPdfScanned do the page-by-page walk.
@@ -1027,7 +1168,10 @@ export class AlbexEngine {
         // first page, no paragraphs are emitted and we end up with 0 chunks.
         this._wasm.setDocumentName(this._writeStr(file.name));
         this._wasm.beginDocument();
-        console.info(`[albex] pdf-extract failed (${originalError}); attempting OCR-only fallback via lopdf for ${file.name}`);
+        this._diag({
+            kind: 'fallback', stage: 'pdf', file: file.name,
+            message: `pdf-extract failed (${originalError}); attempting OCR-only fallback via lopdf`,
+        });
         await this._indexPdfScanned(pw);
         return this._wasm.endDocument();
     }
@@ -1483,20 +1627,29 @@ export class AlbexEngine {
     };
     // ── Public API ────────────────────────────────────────────────────────────
     /**
-     * Index a file. Supported formats: DOCX, XLSX, PDF, TXT, XML.
+     * Index a file. Supported formats (11, with varying depth): DOCX, XLSX, PDF,
+     * HTML, MD, JSON, CSV, EML, RTF, TXT, XML. Several are deliberately "lite"
+     * (CSV is RFC-4180-lite, EML is MIME-lite, RTF is regex-stripped).
      * Throws for unsupported formats or parse errors.
      */
     async indexFile(file) {
+        return this._exclusive(() => this._indexFileInner(file));
+    }
+    async _indexFileInner(file) {
         const ext = file.name.split('.').pop()?.toLowerCase() ?? '';
         const indexer = AlbexEngine._INDEXERS[ext];
         if (!indexer)
             throw new AlbexUnsupportedFormatError(ext);
+        // Size guard BEFORE reading: `file.size` is available without buffering,
+        // so a pathological input (a 2 GB .txt) is refused with a typed error
+        // instead of being fully loaded and hashed first (audit 3.5).
+        assertFileSizeWithinLimit(file, this._opts.maxFileBytes);
         // Hash the source bytes for idempotency. We always read the bytes once
         // here so the indexer can reuse them — avoids a double File.arrayBuffer().
         const bytes = new Uint8Array(await file.arrayBuffer());
-        const hash = contentHash(bytes);
+        const hash = this._contentHash(bytes);
         // Idempotency: if a non-deleted doc already has this hash, return it
-        // unchanged. Cheap O(N) scan since MAX_DOCS = 128.
+        // unchanged. O(doc_count) scan — cheap at any supported capacity.
         const existing = this._docs.find(d => d.contentHash === hash);
         if (existing)
             return existing;
@@ -1516,6 +1669,31 @@ export class AlbexEngine {
             w.setDocumentContentHash(hashBytes.length);
         }
         const chunks = await indexer(this, file, bytes);
+        // Capacity check (0.6.0). The WASM pools fill silently and break out of
+        // their ingest loops; getLastIndexOverflow reports which one filled.
+        // Surface a typed error instead of returning a half-indexed document the
+        // caller cannot tell apart from a complete one (audit finding #3).
+        const overflow = w.getLastIndexOverflow();
+        if (overflow !== 0) {
+            const which = (overflow & 1) ? 'chunks' : (overflow & 2) ? 'text'
+                : (overflow & 4) ? 'docs' : 'names';
+            // The RUNTIME limit of the pool that overflowed, as configured via
+            // `capacity` (std defaults · 'large' · custom object).
+            const max = which === 'chunks' ? w.getMaxChunks()
+                : which === 'text' ? w.getTextCapacity()
+                    : which === 'docs' ? w.getMaxDocs()
+                        : w.getNameCapacity();
+            const pools = [
+                overflow & 1 ? 'chunk pool' : '',
+                overflow & 2 ? 'text pool' : '',
+                overflow & 4 ? 'document table' : '',
+                overflow & 8 ? 'name pool' : '',
+            ].filter(Boolean).join(', ');
+            throw new AlbexCapacityError(`Index capacity exceeded while indexing "${file.name}" (${pools} full, ` +
+                `${which} limit = ${max}). The document was rolled back (not indexed); ` +
+                `treat the index as full (compact(), shard across an AlbexPool, ` +
+                `reset(), or re-create the engine with a bigger \`capacity\`).`, which, max);
+        }
         // The new doc occupies slot `docCountBefore`.
         const docId = w.getDocId(docCountBefore);
         const doc = {
@@ -1528,6 +1706,7 @@ export class AlbexEngine {
             contentHash: hash,
         };
         this._docs.push(doc);
+        this._gpuUploadDirty = true;
         return doc;
     }
     /**
@@ -1538,12 +1717,17 @@ export class AlbexEngine {
      * Returns `true` if a matching document was found and tombstoned.
      */
     removeDocument(id) {
+        this._assertIdle('removeDocument');
+        return this._removeDocumentInner(id);
+    }
+    _removeDocumentInner(id) {
         const doc = this._docs.find(d => d.name === id || d.contentHash === id);
         if (!doc)
             return false;
         const ok = this._wasm.removeDocument(doc.docId) === 1;
         if (ok) {
             this._docs = this._docs.filter(d => d !== doc);
+            this._gpuUploadDirty = true;
         }
         return ok;
     }
@@ -1553,12 +1737,15 @@ export class AlbexEngine {
      * idempotency check (so re-indexing the *same* bytes after a remove works).
      */
     async replaceDocument(name, newFile) {
-        this.removeDocument(name);
-        // Force a unique-hash path by indexing directly; if the new file happens
-        // to hash identically to a still-tracked document, the dedupe in
-        // indexFile will return that one. The remove above prevents the
-        // common case.
-        return this.indexFile(newFile);
+        return this._exclusive(async () => {
+            this._removeDocumentInner(name);
+            // Index directly via the inner path (we already hold the lock).
+            const doc = await this._indexFileInner(newFile);
+            // Repeated replaces leave tombstones in the text pool; reclaim under
+            // pressure so the pool isn't silently exhausted (audit finding #7).
+            this._autoCompactIfNeeded();
+            return doc;
+        });
     }
     /**
      * Reclaim storage from previously removed documents. Compacts CHUNKS,
@@ -1568,7 +1755,78 @@ export class AlbexEngine {
      * references (e.g. in a UI) remain valid.
      */
     compact() {
+        this._assertIdle('compact');
         this._wasm.compact();
+        // compact() reorders the chunk array (and therefore the per-chunk
+        // blooms) even when the chunk count stays the same — the GPU copy is
+        // stale no matter what (audit 1.5).
+        this._gpuUploadDirty = true;
+    }
+    /**
+     * Enumerate the authoritative chunks Albex indexed for a document, in order.
+     * Lets a host mirror Albex's exact chunking — e.g. embed the same units for a
+     * parallel semantic index keyed on the same {@link AuthoritativeChunk.id}
+     * (`"<docId>::<ord>"`, identical to {@link SearchResult.chunkId}). `docId` is
+     * `IndexedDocument.docId` from {@link indexFile}; returns `[]` if no live
+     * document has that id.
+     *
+     * The returned `id`/`ord`/`sub` are stable across {@link compact} and
+     * snapshot save/load. Never key persistent structures on a search result's
+     * absolute `chunkIdx`, which {@link compact} renumbers.
+     */
+    listChunks(docId) {
+        this._assertIdle('listChunks');
+        const w = this._wasm;
+        const slot = this._docSlotOf(docId);
+        if (slot < 0)
+            return [];
+        const count = w.getDocChunkCount(slot);
+        const out = [];
+        let prevLocation = -1;
+        let sub = 0;
+        // Batched enumeration (ABI 6): one `listChunksBatch` frontier call per
+        // scratchpad-full of chunks instead of 2-3 calls per chunk (audit 2.6 —
+        // an embeddings pipeline over 100k chunks used to make ~300k calls).
+        // Each batch packs records as [u32 text_len][u32 location][text bytes],
+        // tightly, in ordinal order; layout documented in wasm/src/lib.rs.
+        let ord = 0;
+        while (ord < count) {
+            const n = w.listChunksBatch(slot, ord, count - ord);
+            if (n === 0)
+                break; // defensive — should not happen for a live slot
+            const ptr = w.getBuffer(0);
+            // The view is only valid until the next frontier call; everything is
+            // decoded out of it inside this loop body before the next batch.
+            const view = new DataView(this._mem.buffer);
+            let off = ptr;
+            for (let k = 0; k < n; k++) {
+                const byteLen = view.getUint32(off, true);
+                const location = view.getUint32(off + 4, true);
+                const text = byteLen > 0
+                    ? _dec.decode(new Uint8Array(this._mem.buffer, off + 8, byteLen))
+                    : '';
+                if (location === prevLocation)
+                    sub++;
+                else {
+                    sub = 0;
+                    prevLocation = location;
+                }
+                out.push({ docId, location, ord, sub, text, byteLen, id: `${docId}::${ord}` });
+                ord++;
+                off += 8 + byteLen;
+            }
+        }
+        return out;
+    }
+    /** Doc-table slot (0..getDocCount) whose stable id is `docId`, or -1. */
+    _docSlotOf(docId) {
+        const w = this._wasm;
+        const n = w.getDocCount();
+        for (let i = 0; i < n; i++) {
+            if (w.getDocId(i) === docId)
+                return i;
+        }
+        return -1;
     }
     /**
      * Search the index. Supports:
@@ -1578,17 +1836,42 @@ export class AlbexEngine {
      *
      * Pass `{ windowed: true }` to receive cropped snippets with ASCII ellipsis
      * markers instead of full chunk text. Defaults: 60 bytes before, 120 after.
+     *
+     * Note: this synchronous path never uses the GPU pre-filter — the WebGPU
+     * scan is asynchronous by nature. Only `searchCooperative` (the budgeted
+     * path) engages the GPU; `search()` always runs the CPU Bloom pre-filter,
+     * regardless of the `gpu` option.
      */
     search(query, opts = {}) {
-        const parsed = parseQuery(query);
-        if (parsed.kind === 'or') {
-            return this._searchOr(parsed.branches, query, opts);
-        }
-        const results = this._runSearch(tokensToWasmQuery(parsed.tokens), query, opts);
-        if (parsed.kind === 'phrase') {
-            return results.filter(r => containsPhrase(r.snippet, parsed.tokens));
-        }
-        return results;
+        this._assertIdle('search');
+        const w = this._wasm;
+        const ql = this._writeStr(query);
+        const kind = w.prepareQuery(ql);
+        this._lastTruncFlags = w.getQueryTruncationFlags();
+        if (kind < 0)
+            return [];
+        if (kind === 2) {
+            // OR: iterate branches and merge in TS. WASM stores compiled
+            // branches internally so we never re-tokenize on the host.
+            return this._searchOr(query, opts);
+        }
+        w.selectQueryBranch(0);
+        // Phrase queries (kind 1) post-filter on adjacency. Pass the tokens down
+        // so the check runs against the FULL chunk text, not a cropped windowed
+        // snippet — otherwise `{ windowed: true }` could drop a valid phrase hit
+        // whose second term fell outside the window (audit finding #7).
+        const phraseTokens = kind === 1 ? this._branchTokens(0) : undefined;
+        return this._runSearch(query, opts, phraseTokens);
+    }
+    /** Read the WASM-compiled tokens of branch `i` for phrase post-filter.
+     * The bytes returned are exactly what the WASM tokenizer produced —
+     * no TS re-tokenization. */
+    _branchTokens(i) {
+        const n = this._wasm.getQueryBranchPattern(i);
+        if (n === 0)
+            return [];
+        const pattern = this._readPad(n);
+        return pattern.split(' ').filter(t => t.length > 0);
     }
     /**
      * Cooperative search. Processes the corpus in slices, yielding to the
@@ -1605,20 +1888,37 @@ export class AlbexEngine {
      * Pass `opts.frameBudgetMs` to control the slice size (default 8 ms).
      */
     async *searchCooperative(query, opts = {}) {
-        const parsed = parseQuery(query);
+        // Collect under the exclusivity lock so no other engine op interleaves at
+        // a slice boundary; the per-slice scheduler yields still happen inside.
+        const results = await this._exclusive(() => this._searchCooperativeCollect(query, opts));
+        for (const r of results)
+            yield r;
+    }
+    /** Materialise a cooperative search to a sorted result array. Runs inside
+     * the exclusivity lock. Frame-budget yielding lives in _runSearchBudgeted. */
+    async _searchCooperativeCollect(query, opts) {
         const budget = opts.frameBudgetMs ?? 8;
         const w = this._wasm;
-        // OR queries: run each branch as its own resumable search, dedup, sort.
-        if (parsed.kind === 'or') {
+        const ql = this._writeStr(query);
+        const kind = w.prepareQuery(ql);
+        this._lastTruncFlags = w.getQueryTruncationFlags();
+        if (kind < 0)
+            return [];
+        if (kind === 2) {
+            // OR branches — run each as its own resumable search and merge.
             const seen = new Set();
             const all = [];
-            for (const tokens of parsed.branches) {
-                const q = tokensToWasmQuery(tokens);
-                if (!q)
-                    continue;
-                const r = await this._runSearchBudgeted(q, query, opts, budget);
+            const n = w.getQueryBranchCount();
+            for (let i = 0; i < n; i++) {
+                w.selectQueryBranch(i);
+                const r = await this._runSearchBudgeted(query, opts, budget, undefined, i);
                 for (const x of r) {
-                    const key = `${x.documentName}:${x.location}:${x.matchStart}`;
+                    // chunkId ("<docId>::<ord>") distinguishes two sub-chunks of the
+                    // same location — a (doc, location, matchStart) key would collide
+                    // when both sub-chunks hit at the same relative offset and drop a
+                    // legitimate result (audit 3.4). matchStart keeps distinct hits
+                    // within one chunk across branches.
+                    const key = `${x.chunkId}:${x.matchStart}`;
                     if (!seen.has(key)) {
                         seen.add(key);
                         all.push(x);
@@ -1626,17 +1926,11 @@ export class AlbexEngine {
                 }
             }
             all.sort((a, b) => b.score - a.score);
-            for (const r of all)
-                yield r;
-            return;
+            return all;
         }
-        const results = await this._runSearchBudgeted(tokensToWasmQuery(parsed.tokens), query, opts, budget);
-        const filtered = parsed.kind === 'phrase'
-            ? results.filter(r => containsPhrase(r.snippet, parsed.tokens))
-            : results;
-        for (const r of filtered)
-            yield r;
-        void w;
+        w.selectQueryBranch(0);
+        const phraseTokens = kind === 1 ? this._branchTokens(0) : undefined;
+        return this._runSearchBudgeted(query, opts, budget, phraseTokens, 0);
     }
     /**
      * @deprecated Renamed to `searchCooperative` in 0.3.0. The original name
@@ -1657,29 +1951,42 @@ export class AlbexEngine {
      * JS<->WASM overhead on fast machines; on slow machines a single batch
      * may eat the entire budget, which is also fine.
      */
-    async _runSearchBudgeted(wasmQuery, displayQuery, opts, budgetMs) {
+    async _runSearchBudgeted(displayQuery, opts, budgetMs, phraseTokens, branchIdx = 0) {
         const w = this._wasm;
-        const ql = this._writeStr(wasmQuery);
-        w.setPattern(ql);
+        // Pattern is already set by the caller via selectQueryBranch(branchIdx),
+        // which also computed THAT branch's pattern Bloom inside WASM — so the
+        // GPU pre-filter below builds the right candidate mask per OR branch
+        // (audit finding #6) without re-reading the pattern across the frontier.
         // GPU pre-filter (CD1). If enabled AND the corpus is large enough,
         // the GPU computes the candidate bitset and we install it into WASM
         // before searchBegin so the slice loop only inspects candidates.
         // Failure here is silent: we fall back to CPU-only Bloom transparently.
         if (this._shouldEngageGpu()) {
             try {
-                await this._gpuPreFilter(wasmQuery);
+                await this._gpuPreFilter();
             }
             catch (e) {
                 // Don't let a GPU hiccup kill the search — drop to CPU path.
-                console.warn('[albex] GPU pre-filter failed; falling back to CPU:', e);
+                this._diag({
+                    kind: 'fallback', stage: 'gpu',
+                    message: `GPU pre-filter failed; falling back to CPU: ${e instanceof Error ? e.message : String(e)}`,
+                });
                 w.clearCandidateMask();
             }
+            // The GPU pre-filter pushes the candidate bitset through the
+            // scratchpad, overwriting the pattern staged by selectQueryBranch.
+            // searchBegin() snapshots the pattern FROM the scratchpad, so it
+            // would compile garbage tokens out of the mask bytes (audit 1.2 —
+            // every GPU-assisted search silently returned wrong results).
+            // Re-select the active branch to restore the pattern.
+            w.selectQueryBranch(branchIdx);
         }
         const t0 = performance.now();
         if (w.searchBegin() === 0) {
             this._lastSearch = {
                 query: displayQuery, timeMs: 0, results: 0,
                 bloomTested: 0, bloomPassed: 0, bitapMatched: 0,
+                ...this._truncStats(),
             };
             return [];
         }
@@ -1718,41 +2025,116 @@ export class AlbexEngine {
             bloomTested: w.getStatBloomTested(),
             bloomPassed: w.getStatBloomPassed(),
             bitapMatched: w.getStatBitapMatched(),
+            ...this._truncStats(),
         };
-        return this._collectResults(count, opts);
+        return this._collectResults(count, opts, phraseTokens);
     }
-    /** Materialise results [0..count) into the public SearchResult shape. */
-    _collectResults(count, opts) {
+    /** Truncation booleans for SearchStats, decoded from the flags the WASM
+     * reported for the most recent prepareQuery (audit 1.6 — the engine used
+     * to drop OR branches past 8 and tokens past 4 in silence). */
+    _truncStats() {
+        const f = this._lastTruncFlags;
+        return {
+            truncatedBranches: (f & 1) !== 0,
+            truncatedTokens: (f & 2) !== 0,
+            truncatedQuery: (f & 4) !== 0,
+        };
+    }
+    /** Materialise results [0..count) into the public SearchResult shape.
+     * When `phraseTokens` is given, each result is kept only if those tokens
+     * appear adjacently in the FULL chunk text — independent of any display
+     * windowing — so phrase queries stay correct under `{ windowed: true }`.
+     *
+     * Frontier discipline (audit 2.1): all numeric fields of every result are
+     * read in ONE DataView pass over the `#[repr(C)]` RESULTS array
+     * (`getResultsPtr`/`getResultStride`, ABI 6) — the old path made 12-15
+     * frontier calls per result. Strings still need calls, minimised to one
+     * snippet read per result plus one doc-name read per DISTINCT document
+     * (the old `getResultDocName` was additionally O(doc_count) inside WASM
+     * for every single result). */
+    _collectResults(count, opts, phraseTokens) {
         const w = this._wasm;
         const windowed = opts.windowed === true;
         const before = opts.before ?? 60;
         const after = opts.after ?? 120;
+        const phraseFilter = phraseTokens && phraseTokens.length > 0 ? phraseTokens : null;
+        // Map each live doc_id to its CHUNKS[] base (to turn a result's absolute
+        // chunk index into a compact()-stable doc-relative ordinal) and to its
+        // doc-table slot (for O(1) name resolution via getDocName).
+        const chunkBaseByDocId = new Map();
+        const slotByDocId = new Map();
+        {
+            const docCount = w.getDocCount();
+            for (let d = 0; d < docCount; d++) {
+                const id = w.getDocId(d);
+                chunkBaseByDocId.set(id, w.getDocChunkBase(d));
+                slotByDocId.set(id, d);
+            }
+        }
+        const raw = new Array(count);
+        {
+            const ptr = w.getResultsPtr();
+            const stride = w.getResultStride();
+            const view = new DataView(this._mem.buffer, ptr, count * stride);
+            for (let i = 0; i < count; i++) {
+                const base = i * stride;
+                const matchCount = view.getUint32(base + 56, true);
+                const matches = [];
+                for (let k = 0; k < matchCount && k < 4; k++) {
+                    matches.push({
+                        start: view.getUint32(base + 24 + k * 8, true),
+                        end: view.getUint32(base + 28 + k * 8, true),
+                    });
+                }
+                const matchStart = view.getUint32(base + 16, true);
+                const matchEnd = view.getUint32(base + 20, true);
+                if (matches.length === 0)
+                    matches.push({ start: matchStart, end: matchEnd });
+                raw[i] = {
+                    docId: view.getUint32(base, true),
+                    chunkIdx: view.getUint32(base + 4, true),
+                    location: view.getUint32(base + 8, true),
+                    score: view.getUint16(base + 12, true),
+                    matchStart, matchEnd, matches,
+                };
+            }
+        }
+        // Resolve each distinct doc name ONCE per search (one getDocName call
+        // per document that actually appears in the results).
+        const nameByDocId = new Map();
+        const docName = (docId) => {
+            let name = nameByDocId.get(docId);
+            if (name === undefined) {
+                const slot = slotByDocId.get(docId);
+                const nl = slot !== undefined ? w.getDocName(slot) : 0;
+                name = nl > 0 ? this._readPad(nl) : '?';
+                nameByDocId.set(docId, name);
+            }
+            return name;
+        };
         const results = [];
         for (let i = 0; i < count; i++) {
-            const score = w.getResultScore(i);
-            const location = w.getResultLocation(i);
-            const matchStart = w.getResultStart(i);
-            const matchEnd = w.getResultEnd(i);
-            const nl = w.getResultDocName(i);
-            const name = nl > 0 ? this._readPad(nl) : '?';
-            const matchCount = w.getResultMatchCount(i);
-            const matches = [];
-            for (let k = 0; k < matchCount; k++) {
-                matches.push({ start: w.getResultMatchStartAt(i, k), end: w.getResultMatchEndAt(i, k) });
+            const r = raw[i];
+            // Phrase adjacency check against the full chunk text (getSnippet), not
+            // the possibly-cropped display window.
+            if (phraseFilter) {
+                const fl = w.getSnippet(i);
+                const full = fl > 0 ? this._readPad(fl) : '';
+                if (!containsPhrase(full, phraseFilter))
+                    continue;
             }
-            if (matches.length === 0)
-                matches.push({ start: matchStart, end: matchEnd });
-            let snippet;
-            let primaryStart = matchStart;
-            let primaryEnd = matchEnd;
-            let adjustedMatches = matches;
+            const chunkOrd = r.chunkIdx - (chunkBaseByDocId.get(r.docId) ?? 0);
+            let snippetBytes;
+            let primaryStart = r.matchStart;
+            let primaryEnd = r.matchEnd;
+            let adjustedMatches = r.matches;
             if (windowed) {
                 const sl = w.getSnippetWindow(i, before, after);
-                snippet = sl > 0 ? this._readPad(sl) : '';
+                snippetBytes = sl > 0 ? this._readPadBytes(sl) : new Uint8Array(0);
                 const offset = w.getSnippetWindowOffset();
                 const leadingPrefix = offset > 0 ? 4 : 0;
                 const shift = leadingPrefix - offset;
-                adjustedMatches = matches.map(m => ({
+                adjustedMatches = r.matches.map(m => ({
                     start: Math.max(0, m.start + shift),
                     end: Math.max(0, m.end + shift),
                 }));
@@ -1761,44 +2143,61 @@ export class AlbexEngine {
             }
             else {
                 const sl = w.getSnippet(i);
-                snippet = sl > 0 ? this._readPad(sl) : '';
+                snippetBytes = sl > 0 ? this._readPadBytes(sl) : new Uint8Array(0);
             }
+            const snippet = snippetBytes.length > 0 ? _dec.decode(snippetBytes) : '';
+            // UTF-16 view of the primary span, ready for `snippet.slice()` —
+            // byte offsets and JS string indices diverge on the first accent
+            // (audit 3.1, the consumer footgun in the main Spanish use case).
+            const snippetStart = utf16IndexAtByte(snippetBytes, primaryStart);
+            const snippetEnd = utf16IndexAtByte(snippetBytes, primaryEnd);
             results.push({
-                documentName: name,
-                location,
-                score,
+                documentName: docName(r.docId),
+                docId: r.docId,
+                location: r.location,
+                chunkId: `${r.docId}::${chunkOrd}`,
+                score: r.score,
                 snippet,
                 matchStart: primaryStart,
                 matchEnd: primaryEnd,
                 matches: adjustedMatches,
+                snippetStart,
+                snippetEnd,
             });
         }
         return results;
     }
-    _searchOr(branches, rawQuery, opts) {
+    /** Run all OR branches and merge dedup-by-(chunkId, matchStart). The
+     * branches are already compiled inside the WASM (by prepareQuery); we
+     * iterate them with selectQueryBranch. The "rawQuery" param is kept
+     * only for the lastSearch.query field. */
+    _searchOr(rawQuery, opts) {
+        const w = this._wasm;
         const seen = new Set();
         const all = [];
-        for (const tokens of branches) {
-            const q = tokensToWasmQuery(tokens);
-            if (!q)
-                continue;
-            const results = this._runSearch(q, rawQuery, opts);
+        const n = w.getQueryBranchCount();
+        for (let i = 0; i < n; i++) {
+            w.selectQueryBranch(i);
+            const results = this._runSearch(rawQuery, opts);
             for (const r of results) {
-                const key = `${r.documentName}:${r.location}:${r.matchStart}`;
+                // Keyed on chunkId, not (doc, location, matchStart): two sub-chunks
+                // of the same location can hit at the same relative offset, and the
+                // old key silently dropped one of them (audit 3.4).
+                const key = `${r.chunkId}:${r.matchStart}`;
                 if (!seen.has(key)) {
                     seen.add(key);
                     all.push(r);
                 }
             }
         }
-        // Re-rank the merged list by score descending.
         all.sort((a, b) => b.score - a.score);
         return all;
     }
-    _runSearch(wasmQuery, displayQuery, opts) {
+    /** Execute a single search using whichever query branch is currently
+     * active (set via selectQueryBranch). Returns the materialised
+     * SearchResult[]. Caller is responsible for activating a branch first. */
+    _runSearch(displayQuery, opts, phraseTokens) {
         const w = this._wasm;
-        const ql = this._writeStr(wasmQuery);
-        w.setPattern(ql);
         const t0 = performance.now();
         const count = w.search();
         const ms = performance.now() - t0;
@@ -1809,62 +2208,12 @@ export class AlbexEngine {
             bloomTested: w.getStatBloomTested(),
             bloomPassed: w.getStatBloomPassed(),
             bitapMatched: w.getStatBitapMatched(),
+            ...this._truncStats(),
         };
-        const windowed = opts.windowed === true;
-        const before = opts.before ?? 60;
-        const after = opts.after ?? 120;
-        const results = [];
-        for (let i = 0; i < count; i++) {
-            const score = w.getResultScore(i);
-            const location = w.getResultLocation(i);
-            const matchStart = w.getResultStart(i);
-            const matchEnd = w.getResultEnd(i);
-            const nl = w.getResultDocName(i);
-            const name = nl > 0 ? this._readPad(nl) : '?';
-            const matchCount = w.getResultMatchCount(i);
-            const matches = [];
-            for (let k = 0; k < matchCount; k++) {
-                matches.push({ start: w.getResultMatchStartAt(i, k), end: w.getResultMatchEndAt(i, k) });
-            }
-            if (matches.length === 0) {
-                matches.push({ start: matchStart, end: matchEnd });
-            }
-            let snippet;
-            let primaryStart = matchStart;
-            let primaryEnd = matchEnd;
-            let adjustedMatches = matches;
-            if (windowed) {
-                const sl = w.getSnippetWindow(i, before, after);
-                snippet = sl > 0 ? this._readPad(sl) : '';
-                const offset = w.getSnippetWindowOffset();
-                // Spans came back chunk-relative; shift them into window-relative.
-                // Account for leading "... " prefix when present.
-                const leadingPrefix = offset > 0 ? 4 : 0;
-                const shift = leadingPrefix - offset;
-                adjustedMatches = matches.map(m => ({
-                    start: Math.max(0, m.start + shift),
-                    end: Math.max(0, m.end + shift),
-                }));
-                primaryStart = adjustedMatches[0]?.start ?? 0;
-                primaryEnd = adjustedMatches[0]?.end ?? 0;
-            }
-            else {
-                const sl = w.getSnippet(i);
-                snippet = sl > 0 ? this._readPad(sl) : '';
-            }
-            results.push({
-                documentName: name,
-                location,
-                score,
-                snippet,
-                matchStart: primaryStart,
-                matchEnd: primaryEnd,
-                matches: adjustedMatches,
-            });
-        }
-        return results;
+        return this._collectResults(count, opts, phraseTokens);
     }
-    /** Returns current engine statistics. */
+    /** Returns current engine statistics (capacities are the RUNTIME values
+     * the engine was initialised with via the `capacity` option). */
     getStats() {
         return {
             documents: this._docs.length,
@@ -1872,9 +2221,9 @@ export class AlbexEngine {
             textUsed: this._wasm.getTextUsed(),
             textCapacity: this._wasm.getTextCapacity(),
             wasmMemoryBytes: this._mem.buffer.byteLength,
-            tier: this._tier,
             maxChunks: this._wasm.getMaxChunks(),
             maxDocs: this._wasm.getMaxDocs(),
+            namePoolBytes: this._wasm.getNameCapacity(),
         };
     }
     /** Returns stats from the most recent search, or null. */
@@ -1914,9 +2263,92 @@ export class AlbexEngine {
     }
     /** Full reset — clears all indexed documents and chunks. */
     reset() {
-        this._wasm.init();
+        this._assertIdle('reset');
+        this._resetInner();
+    }
+    _resetInner() {
+        // Re-init with the engine's CONFIGURED capacity, not the std defaults
+        // (`wasm.init()` would silently shrink a 'large'/custom engine). Same
+        // capacities → the WASM side does a plain counter reset, no realloc.
+        const c = this._capacity;
+        this._wasm.initWithCapacity(c.maxDocs, c.maxChunks, c.textPoolBytes, c.namePoolBytes);
         this._docs = [];
         this._lastSearch = null;
+        this._diagnostics = [];
+        this._gpuUploadDirty = true;
+    }
+    /**
+     * Drain and return the diagnostics collected since the last call (or
+     * since the engine was created). Use this to surface recoverable
+     * issues to the caller after `indexFile`, `load`, or any other
+     * operation that may run into a "best-effort" path.
+     *
+     * Example diagnostics:
+     *   - `{kind:'fallback', stage:'pdf', message:'pdf-extract crashed,
+     *      attempting OCR-only fallback', file:'invoice.pdf'}`
+     *   - `{kind:'skipped', stage:'ocr', message:'Tesseract abort on page
+     *      3 image 1; remaining images on this page skipped', file:'...',
+     *      page:3}`
+     *   - `{kind:'fallback', stage:'gpu', message:'GPU pre-filter failed,
+     *      using CPU'}`
+     *
+     * The buffer is cleared on each call; callers should consume the
+     * returned array immediately (e.g. log to their telemetry, surface
+     * a UI banner). After `reset()` the buffer is also cleared.
+     */
+    takeDiagnostics() {
+        const out = this._diagnostics;
+        this._diagnostics = [];
+        return out;
+    }
+    /** Internal: record a diagnostic. Capped at 256 to bound memory. */
+    _diag(entry) {
+        if (this._diagnostics.length >= 256)
+            return;
+        this._diagnostics.push(entry);
+    }
+    /**
+     * Install an OCR adapter. Returns a handle whose `dispose()` removes the
+     * adapter from the engine.
+     *
+     * The contract: the adapter must provide `recognize(image, opts)` that
+     * returns `Promise<OcrAttachedResult>`. The engine validates the
+     * contract at attach time and refuses adapters that don't expose a
+     * recognise function. Only one adapter can be attached at a time; a
+     * second call to `attachOcr` while one is active throws — the caller
+     * must dispose the previous one first.
+     *
+     * @example
+     * ```ts
+     * import { enableOcr } from '@albex/ocr';
+     * const handle = enableOcr(engine);   // internally calls attachOcr
+     * // ... later ...
+     * await handle.dispose();
+     * ```
+     *
+     * Direct use without the companion package:
+     * ```ts
+     * const handle = engine.attachOcr({
+     *   recognize: async (blob) => myCustomOcr(blob),
+     *   options: { alwaysExtractEmbeddedImages: false },
+     * });
+     * ```
+     */
+    attachOcr(adapter) {
+        if (this._ocrAdapter) {
+            throw new AlbexInitError('OCR adapter already attached. Call dispose() on the previous handle before attaching a new one.');
+        }
+        if (typeof adapter?.recognize !== 'function') {
+            throw new AlbexInitError('attachOcr requires an adapter with a recognize(image, opts) function.');
+        }
+        this._ocrAdapter = adapter;
+        return {
+            dispose: async () => {
+                // Idempotent: a double dispose is a no-op rather than a throw.
+                if (this._ocrAdapter === adapter)
+                    this._ocrAdapter = null;
+            },
+        };
     }
     // ── Persistence ───────────────────────────────────────────────────────────
     /**
@@ -1927,6 +2359,9 @@ export class AlbexEngine {
      * state in roughly O(total bytes), bypassing re-parsing.
      */
     async save(name) {
+        return this._exclusive(() => this._saveInner(name));
+    }
+    async _saveInner(name) {
         const w = this._wasm;
         const total = w.snapshotSize();
         if (total === 0) {
@@ -1953,6 +2388,9 @@ export class AlbexEngine {
      * header (wrong magic, version, or struct sizes).
      */
     async load(name) {
+        return this._exclusive(() => this._loadInner(name));
+    }
+    async _loadInner(name) {
         const bytes = await loadPersisted(name);
         if (!bytes || bytes.length === 0)
             return false;
@@ -1975,6 +2413,19 @@ export class AlbexEngine {
                 return false;
             off += n;
         }
+        // Commit. For v3 this is the atomic apply step (state is untouched
+        // until now); a failure here leaves the previous index intact so the
+        // caller can keep using the engine. For v1/v2 snapshots `restoreCommit`
+        // is a no-op that returns 1 (those formats applied in-place during
+        // restoreFeed and have no rollback to offer). Older binaries that
+        // predate v3 do not export `restoreCommit` — in that case we treat
+        // the load as already committed by feature-detect.
+        if (typeof w.restoreCommit === 'function') {
+            if (w.restoreCommit() !== 1)
+                return false;
+        }
+        // The restored chunk array replaces whatever the GPU last saw.
+        this._gpuUploadDirty = true;
         // Rebuild _docs metadata from the restored WASM tables.
         //
         // What's available after a restore:
@@ -2035,10 +2486,12 @@ export class AlbexEngine {
      * empty. Returns whether a load actually happened.
      */
     async loadOrInit(name) {
-        const loaded = await this.load(name);
-        if (!loaded)
-            this.reset();
-        return loaded;
+        return this._exclusive(async () => {
+            const loaded = await this._loadInner(name);
+            if (!loaded)
+                this._resetInner();
+            return loaded;
+        });
     }
     /** Delete a previously persisted snapshot. */
     async deleteSnapshot(name) {
@@ -2060,7 +2513,8 @@ export class AlbexEngine {
      * WASM instance and its (typically 20 MB) backing memory.
      */
     [Symbol.dispose]() {
-        this.reset();
+        // Terminal: bypass the idle guard — disposing mid-operation is allowed.
+        this._resetInner();
         this._unsubscribeResources?.();
         this._unsubscribeResources = null;
         this._gpu?.destroy();