npm - sweet-search - Versions diffs - 2.5.1 → 2.5.2 - Mend

sweet-search 2.5.1 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/core/cli.js +24 -0
package/core/embedding/embedding-cache.js +90 -4
package/core/embedding/embedding-service.js +27 -5
package/core/indexing/index-codebase-v21.js +31 -2
package/core/infrastructure/index.js +2 -0
package/core/infrastructure/init-config.js +138 -0
package/core/search/search-read-semantic.js +17 -0
package/core/search/search-server.js +147 -1
package/core/search/sweet-search.js +15 -2
package/core/start-server.js +13 -2
package/package.json +7 -7
package/scripts/uninstall.js +152 -6

package/core/cli.js CHANGED Viewed

@@ -28,6 +28,20 @@ if (args[0] === 'init') {
   // Hybrid span-selection reader; runs in JS (depends on LI index + ranking).
   const { handleReadSemanticCli } = await import('./search/search-read-semantic.js');
   await handleReadSemanticCli(args.slice(1));
+} else if (args[0] === 'index') {
+  // Indexing pipeline. Forwarded to index-codebase-v21.js::main(), which
+  // reads its own flags via process.argv. Setting argv here is required
+  // because the indexer's parseArgs reads process.argv.slice(2) by default.
+  // Without this subcommand, npm-installed users had no way to invoke
+  // indexing — `node ./node_modules/sweet-search/core/indexing/index-codebase-v21.js`
+  // was a silent no-op (direct-run guard mismatched under symlinked installs)
+  // and the bin had no `index` entry at all. Forwards every argument after
+  // `index` so existing flag combos (--full / --graph-only / --vectors-only /
+  // --files-from-stdin / --late-interaction-model=… / etc.) all work.
+  const indexerArgs = args.slice(1);
+  process.argv = [process.argv[0], 'index-codebase-v21.js', ...indexerArgs];
+  const { main: runIndexer } = await import('./indexing/index-codebase-v21.js');
+  await runIndexer();
 } else if (args[0] === '--serve' || args[0] === '--stop') {
   // Warm search server lifecycle is implemented in JS.
   const { runCli } = await import('./search/index.js');
@@ -39,6 +53,7 @@ Usage:
   sweet-search <query>                  Search the indexed codebase
   sweet-search read <file...>           Filesystem-grounded read (1-20 files)
   sweet-search read-semantic <f> <q>    Return only file spans relevant to a query
+  sweet-search index [options]          Build / update the codebase index
   sweet-search init [options]           Set up runtime assets and models
   sweet-search uninstall [opts]         Remove local state created by init
   sweet-search prewarm-vocab [file]     Pre-warm vocabulary cache with terms
@@ -50,6 +65,15 @@ Options:
   --json            Output results as JSON
   --cold            Force cold start (skip warm server)
+Indexing flags (sweet-search index ...):
+  --full            Full reindex from scratch
+  --graph-only      Build code graph only
+  --vectors-only    Build vectors + HNSW only (skips code graph)
+  --files-from-stdin  Read newline-delimited paths from stdin
+  --late-interaction-model=ID  Override the LI variant for this run
+  --no-late-interaction        Skip LI index build
+  --quiet | --verbose          Logging verbosity
 Run 'sweet-search init --help' or 'sweet-search uninstall --help' for subcommand options.`);
 } else {
   const { resolveNativeBinary } = await import('./infrastructure/index.js');

package/core/embedding/embedding-cache.js CHANGED Viewed

@@ -45,6 +45,7 @@ export class LRUCache {
   }
   has(key) { return this.cache.has(key); }
+  delete(key) { this.hitCount.delete(key); return this.cache.delete(key); }
   getHitCount(key) { return this.hitCount.get(key) || 0; }
   size() { return this.cache.size; }
   clear() { this.cache.clear(); this.hitCount.clear(); }
@@ -191,6 +192,62 @@ export class QueryStats {
 //        model is not silently served when a different model is active.
 const VOCAB_SCHEMA_VERSION = 3;
+/**
+ * Coerce an input value into a Float32Array suitable for downstream embedding
+ * math (truncateForHNSW, late-interaction MaxSim, cosine similarity).
+ *
+ * Why this exists: persisted vocabularies are JSON-serialised. JSON.stringify
+ * on a Float32Array produces an indexed object `{"0": v0, "1": v1, ...}`,
+ * not an array. After `JSON.parse`, the value has `.length === undefined`,
+ * `.slice === undefined`, and crashes any downstream consumer that calls
+ * vector methods. This helper repairs the value at the cache boundary so
+ * the rest of the embedding pipeline can rely on a uniform vector contract.
+ *
+ * Accepted inputs:
+ *   - Float32Array → returned as-is
+ *   - Array<number> → wrapped in Float32Array
+ *   - Float64Array / Int*Array etc. → copied into Float32Array
+ *   - Plain object with stringly-keyed numeric indices ("0","1",...,"N-1")
+ *     → reconstructed as Float32Array of length N
+ *
+ * Returns null when the input cannot be sensibly interpreted as a vector
+ * (callers should drop the cache entry and re-derive).
+ *
+ * @param {*} value
+ * @returns {Float32Array|null}
+ */
+export function coerceToFloat32Vector(value) {
+  if (value == null) return null;
+  if (value instanceof Float32Array) return value;
+  if (Array.isArray(value)) return Float32Array.from(value);
+  // Other typed arrays: copy values into a Float32Array.
+  if (ArrayBuffer.isView(value) && typeof value.length === 'number') {
+    return Float32Array.from(value);
+  }
+  // Plain object form from JSON-deserialised Float32Array.
+  if (typeof value === 'object') {
+    const keys = Object.keys(value);
+    if (keys.length === 0) return null;
+    // All keys must be string-encoded non-negative integers and contiguous
+    // from 0 to length-1. (We do not try to "fill gaps" — that would silently
+    // mask a real bug.)
+    const indices = new Array(keys.length);
+    for (let i = 0; i < keys.length; i++) {
+      const k = keys[i];
+      // Reject anything that isn't an integer-shaped key.
+      if (!/^\d+$/.test(k)) return null;
+      const n = +k;
+      if (!Number.isInteger(n) || n < 0 || n >= keys.length) return null;
+      indices[n] = value[k];
+    }
+    for (let i = 0; i < indices.length; i++) {
+      if (typeof indices[i] !== 'number' || !Number.isFinite(indices[i])) return null;
+    }
+    return Float32Array.from(indices);
+  }
+  return null;
+}
 /** Build the embedding-fingerprint we expect a vocabulary file to match. */
 function currentVocabFingerprint() {
   return {
@@ -269,10 +326,27 @@ export class Vocabulary {
           this.terms.clear();
         } else {
           this.metadata = { ...this.metadata, ...(data.metadata || {}) };
-          for (const [term, embedding] of Object.entries(data.terms || {})) {
-            this.terms.set(term, embedding);
+          let normalized = 0;
+          let dropped = 0;
+          for (const [term, raw] of Object.entries(data.terms || {})) {
+            // Coerce to Float32Array. Persisted vocabs JSON-serialise typed
+            // arrays as indexed objects (`{"0": v0, ...}`), which otherwise
+            // crash downstream `embedding.slice(...)` calls (see
+            // `truncateForHNSW`). Reject any entry we cannot interpret as a
+            // vector — better to re-embed than to surface a corrupt vector.
+            const vec = coerceToFloat32Vector(raw);
+            if (vec) {
+              this.terms.set(term, vec);
+              normalized++;
+            } else {
+              dropped++;
+            }
+          }
+          if (dropped > 0) {
+            console.log(`Vocabulary: Loaded ${normalized} pre-computed embeddings (dropped ${dropped} unrecognised)`);
+          } else {
+            console.log(`Vocabulary: Loaded ${normalized} pre-computed embeddings`);
           }
-          console.log(`Vocabulary: Loaded ${this.terms.size} pre-computed embeddings`);
         }
       }
     } catch (err) {
@@ -292,7 +366,18 @@ export class Vocabulary {
       this.metadata.model = EMBEDDING_CONFIG.model;
       this.metadata.dimension = EMBEDDING_CONFIG.dimension;
       if (!this.metadata.created) this.metadata.created = this.metadata.lastUpdated;
-      const data = { metadata: this.metadata, terms: Object.fromEntries(this.terms) };
+      // Normalise to plain arrays so JSON.stringify produces a compact,
+      // round-trippable form. Float32Array would otherwise serialise as
+      // an indexed object ({"0": v0, "1": v1, ...}) which load() can read
+      // (via coerceToFloat32Vector) but which is wasteful and was the
+      // shape that originally caused the `embedding.slice` bug.
+      const termsOut = {};
+      for (const [term, vec] of this.terms.entries()) {
+        termsOut[term] = vec instanceof Float32Array || ArrayBuffer.isView(vec)
+          ? Array.from(vec)
+          : vec;
+      }
+      const data = { metadata: this.metadata, terms: termsOut };
       await writeJsonAtomic(this.vocabPath, JSON.stringify(data, null, 2));
     });
   }
@@ -303,6 +388,7 @@ export class Vocabulary {
   }
   set(term, embedding) { this.terms.set(this.normalize(term), embedding); }
   has(term) { return this.terms.has(this.normalize(term)); }
+  delete(term) { return this.terms.delete(this.normalize(term)); }
   normalize(term) { return term.toLowerCase().trim(); }
   size() { return this.terms.size; }

package/core/embedding/embedding-service.js CHANGED Viewed

@@ -45,6 +45,7 @@ import {
   queryDeduplicator,
   queryStats,
   cacheStats,
+  coerceToFloat32Vector,
   getCacheStats as _getCacheStats,
   getSemanticCacheStats,
   clearCache,
@@ -205,17 +206,38 @@ export async function getEmbedding(text, options = {}) {
   if (useCache && EMBEDDING_CONFIG.cache?.enabled) {
     const cached = queryCache.get(cacheKey);
     if (cached) {
-      cacheStats.hits++;
-      return { embedding: cached, cached: true, source: 'lru', latency_us: Math.round((performance.now() - start) * 1000) };
+      // Defensive guard: a cache value MUST be a vector with .length and
+      // .slice. Persisted vocabularies that round-tripped through JSON
+      // produce indexed-object shapes which crash downstream consumers.
+      // Coerce; if unrecoverable, drop the entry and fall through.
+      const cachedVec = coerceToFloat32Vector(cached);
+      if (cachedVec) {
+        if (cachedVec !== cached) queryCache.set(cacheKey, cachedVec);
+        cacheStats.hits++;
+        return { embedding: cachedVec, cached: true, source: 'lru', latency_us: Math.round((performance.now() - start) * 1000) };
+      }
+      queryCache.delete?.(cacheKey);
+      console.warn(`[embedding] LRU cache held non-vector for "${cacheKey.slice(0, 60)}"; regenerating`);
     }
     if (isQuery && EMBEDDING_CONFIG.cache?.useVocabulary !== false) {
       await vocabulary.load();
       const vocabHit = vocabulary.get(text);
       if (vocabHit) {
-        cacheStats.vocabularyHits++;
-        queryCache.set(cacheKey, vocabHit);
-        return { embedding: vocabHit, cached: true, source: 'vocabulary', latency_us: Math.round((performance.now() - start) * 1000) };
+        const vocabVec = coerceToFloat32Vector(vocabHit);
+        if (vocabVec) {
+          // Backfill the in-memory vocab map with the typed-array form so
+          // subsequent hits skip re-coercion.
+          if (vocabVec !== vocabHit) vocabulary.set?.(text, vocabVec);
+          cacheStats.vocabularyHits++;
+          queryCache.set(cacheKey, vocabVec);
+          return { embedding: vocabVec, cached: true, source: 'vocabulary', latency_us: Math.round((performance.now() - start) * 1000) };
+        }
+        // Unrecoverable vocab entry — drop it and continue. (load() now
+        // normalises on read, so this branch should be unreachable in
+        // practice; it is the belt-and-braces for older code paths.)
+        vocabulary.delete?.(text);
+        console.warn(`[embedding] vocabulary held non-vector for "${text.slice(0, 60)}"; dropping and regenerating`);
       }
     }
   }

package/core/indexing/index-codebase-v21.js CHANGED Viewed

@@ -40,6 +40,7 @@ if (process.env.SWEET_SEARCH_UV_THREADPOOL_SIZE && !process.env.UV_THREADPOOL_SI
 import { existsSync } from 'fs';
 import { DB_PATHS, LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
+import { applyPersistedLiModel } from '../infrastructure/init-config.js';
 import { resolveRelationshipTargets } from '../graph/relationship-resolver.js';
 import { requireNativeAnn as requireNativeAnnBackend } from '../vector-store/hnsw-index.js';
 import { getStats as getIncrementalStats } from './incremental-tracker.js';
@@ -124,11 +125,18 @@ async function main() {
     setVerboseMode(true);
   }
-  // Apply late interaction model overrides before any model code runs
+  // Apply late interaction model overrides before any model code runs.
+  // Precedence: --no-late-interaction > --late-interaction-model=… > env
+  // var (already honoured by LATE_INTERACTION_CONFIG.model at module load) >
+  // .sweet-search/config.json::runtime.li.model > built-in default. Only
+  // touch the persisted-config branch when neither CLI flag was used —
+  // applyPersistedLiModel internally re-checks the env var.
   if (noLateInteraction) {
     LATE_INTERACTION_CONFIG.model = false;
   } else if (lateInteractionModel) {
     LATE_INTERACTION_CONFIG.model = lateInteractionModel;
+  } else {
+    applyPersistedLiModel(process.env.SWEET_SEARCH_PROJECT_ROOT || process.cwd());
   }
   log(`${colors.bright}╔═══════════════════════════════════════════════════╗${colors.reset}`, 'bright');
@@ -441,7 +449,28 @@ Output:
   }
 }
-if (import.meta.url === `file://${process.argv[1]}`) {
+// Direct-run guard. The previous `import.meta.url === \`file://${process.argv[1]}\``
+// form silently no-op'd under three real-world conditions:
+//   1. `npm install ../sweet-search-private` (file install) symlinks
+//      `node_modules/sweet-search/` to the source — `process.argv[1]` is the
+//      symlink path while `import.meta.url` resolves to the realpath.
+//   2. Paths containing spaces or unicode — the URL form encodes them but
+//      `file://` + raw path doesn't.
+//   3. Windows backslash vs URL forward-slash mismatch.
+// Resolve both sides through `realpathSync(fileURLToPath(...))` so the
+// comparison survives every common install layout. Falls back to never-direct
+// (safe default) if either side errors.
+import { realpathSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+const _isDirectRun = (() => {
+  if (!process.argv[1]) return false;
+  try {
+    return realpathSync(fileURLToPath(import.meta.url)) === realpathSync(process.argv[1]);
+  } catch {
+    return false;
+  }
+})();
+if (_isDirectRun) {
   main().catch(err => {
     console.error(err);
     process.exit(1);

package/core/infrastructure/index.js CHANGED Viewed

@@ -82,6 +82,8 @@ export {
   loadInitConfig,
   writeInitConfig,
   readPersistedLiPolicy,
+  resolveRuntimeLiModel,
+  applyPersistedLiModel,
 } from './init-config.js';
 // Language analysis

package/core/infrastructure/init-config.js CHANGED Viewed

@@ -12,6 +12,8 @@
 import { existsSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
+import { LATE_INTERACTION_CONFIG } from './config/ranking.js';
 export const INIT_DATA_DIR_NAME = '.sweet-search';
 export const INIT_CONFIG_FILE_NAME = 'config.json';
@@ -76,3 +78,139 @@ export function readPersistedLiPolicy(projectRoot) {
   if (typeof li.searchReranking === 'string') out.searchReranking = li.searchReranking;
   return out;
 }
+// ---------------------------------------------------------------------------
+// Runtime LI model resolution (Phase 4 follow-up).
+//
+// Background: scripts/init.js writes the user's chosen LI variant into
+// `runtime.li.model` of `.sweet-search/config.json` (one of 'lateon-code',
+// 'lateon-code-edge', or 'none'). However the runtime late-interaction
+// machinery — encodeQuery, the LI index header check, the native model
+// loader, the CoreML cascade dispatcher, the indexer, the prewarm hook —
+// all read `LATE_INTERACTION_CONFIG.model` directly from
+// `core/infrastructure/config/ranking.js`, which only honours
+// `process.env.SWEET_SEARCH_LATE_INTERACTION_MODEL`. Without this bridge
+// an edge-only init would silently boot the standard model on every search,
+// breaking `read-semantic`, ColGrep, and every encodeQuery path even when
+// the persisted choice is correct.
+//
+// `applyPersistedLiModel` closes that gap. It is called from one place per
+// process entry point (SweetSearch ctor, read-semantic CLI/API, server
+// startup, prewarm hook) and is fully idempotent + cheap.
+//
+// Precedence (most-specific wins):
+//   1. explicit env var SWEET_SEARCH_LATE_INTERACTION_MODEL  (CI / scripts)
+//   2. persisted runtime.li.model in .sweet-search/config.json
+//   3. config default ('lateon-code')
+//
+// `'none'` from persisted config maps to the same disabled state the
+// indexer's `--no-late-interaction` flag uses (`model = false`), so
+// `LATE_INTERACTION_CONFIG.enabled` collapses to false everywhere LI is
+// consulted.
+// ---------------------------------------------------------------------------
+const VALID_RUNTIME_LI_MODEL_IDS = new Set([
+  'lateon-code',
+  'lateon-code-edge',
+  'none',
+]);
+/**
+ * Resolve the active LI runtime model id given precedence inputs. Pure;
+ * does no I/O of its own. Caller passes in the persisted record (or null).
+ *
+ * Returns one of:
+ *   - 'lateon-code'      → standard variant
+ *   - 'lateon-code-edge' → edge variant
+ *   - false              → LI disabled (matches index-codebase-v21's `--no-late-interaction`)
+ *
+ * @param {string|null|undefined} persistedModel
+ * @param {object} [env=process.env]
+ * @param {string} [defaultModel='lateon-code']
+ */
+export function resolveRuntimeLiModel(
+  persistedModel,
+  env = process.env,
+  defaultModel = 'lateon-code',
+) {
+  // 1. Explicit env override always wins (preserves the CI/script contract
+  //    documented on LATE_INTERACTION_CONFIG.model).
+  const fromEnv = env?.SWEET_SEARCH_LATE_INTERACTION_MODEL;
+  if (typeof fromEnv === 'string' && fromEnv.length > 0) {
+    if (fromEnv === 'none') return false;
+    if (fromEnv === 'false') return false;
+    return fromEnv;
+  }
+  // 2. Persisted choice from .sweet-search/config.json.
+  if (typeof persistedModel === 'string' && VALID_RUNTIME_LI_MODEL_IDS.has(persistedModel)) {
+    if (persistedModel === 'none') return false;
+    return persistedModel;
+  }
+  // 3. Default — preserves the historical 'lateon-code' fallback.
+  return defaultModel;
+}
+/**
+ * Apply the persisted LI model choice from `.sweet-search/config.json`
+ * to the global `LATE_INTERACTION_CONFIG.model`, honouring the precedence
+ * ladder documented above. Idempotent + cheap; safe to call from every
+ * runtime entry point.
+ *
+ * Returns a small report object so the caller can log / surface the
+ * decision when verbose. Never throws.
+ *
+ *   {
+ *     applied:   'lateon-code' | 'lateon-code-edge' | false,
+ *     before:    <prior LATE_INTERACTION_CONFIG.model>,
+ *     source:    'env' | 'persisted' | 'default',
+ *     persistedModel: <raw persisted value or null>,
+ *     changed:   boolean,
+ *   }
+ *
+ * @param {string} projectRoot
+ * @param {object} [opts]
+ * @param {object} [opts.env=process.env]
+ * @param {boolean} [opts.force=false] — when true, applies even if the
+ *   active value already matches (used by tests resetting state).
+ */
+export function applyPersistedLiModel(projectRoot, opts = {}) {
+  const env = opts.env ?? process.env;
+  const before = LATE_INTERACTION_CONFIG.model;
+  let persistedModel = null;
+  try {
+    const persisted = readPersistedLiPolicy(projectRoot);
+    persistedModel = typeof persisted.liModel === 'string' ? persisted.liModel : null;
+  } catch {
+    persistedModel = null;
+  }
+  const resolved = resolveRuntimeLiModel(persistedModel, env, 'lateon-code');
+  let source;
+  if (typeof env?.SWEET_SEARCH_LATE_INTERACTION_MODEL === 'string'
+      && env.SWEET_SEARCH_LATE_INTERACTION_MODEL.length > 0) {
+    source = 'env';
+  } else if (persistedModel != null && VALID_RUNTIME_LI_MODEL_IDS.has(persistedModel)) {
+    source = 'persisted';
+  } else {
+    source = 'default';
+  }
+  if (opts.force === true || resolved !== before) {
+    // Mutates the shared LATE_INTERACTION_CONFIG singleton — this is the
+    // entire point of this helper. The ESM namespace binding is read-only
+    // but the OBJECT it references is the same reference every consumer
+    // (encodeQuery, indexer, native-inference, prewarm) holds.
+    LATE_INTERACTION_CONFIG.model = resolved;
+  }
+  return {
+    applied: resolved,
+    before,
+    source,
+    persistedModel,
+    changed: resolved !== before,
+  };
+}

package/core/search/search-read-semantic.js CHANGED Viewed

@@ -32,8 +32,23 @@
 import path from 'node:path';
 import { CodebaseRepository } from '../infrastructure/codebase-repository.js';
 import { DB_PATHS, LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
+import { applyPersistedLiModel } from '../infrastructure/init-config.js';
 import { readFile as readFileExact } from './search-read.js';
+// Applies the user's persisted LI model exactly once per (projectRoot, env)
+// pair so encodeQuery/_getLateInteractionIndex below see the right variant.
+// Without this an edge-only init silently uses the standard 768d model for
+// query encoding while the on-disk LI index was built with the 256d edge
+// model — every score becomes nonsense (the dim mismatch trips the
+// modelMismatch guard but query encoding has already paid the wrong-cost).
+const _appliedLiPerRoot = new Map(); // projectRoot -> appliedModel
+function _ensurePersistedLiModelApplied(projectRoot) {
+  const key = projectRoot || process.cwd();
+  if (_appliedLiPerRoot.has(key)) return;
+  const r = applyPersistedLiModel(key);
+  _appliedLiPerRoot.set(key, r.applied);
+}
 // ---------------------------------------------------------------------------
 // Defaults — keep modest so a one-file call stays under ~100ms after warmup.
 // ---------------------------------------------------------------------------
@@ -448,6 +463,7 @@ export async function readSemantic(req) {
   if (!req.query || !String(req.query).trim()) throw new Error('query is required');
   const projectRoot = req.projectRoot || process.cwd();
+  _ensurePersistedLiModelApplied(projectRoot);
   const filePathRel = _projectRelative(req.path, projectRoot);
   const topK = req.topK ?? DEFAULTS.topK;
@@ -714,4 +730,5 @@ export function __resetReadSemanticCachesForTests() {
   _liIndex = null;
   _liInitPromise = null;
   _encodeQueryFn = null;
+  _appliedLiPerRoot.clear();
 }

package/core/search/search-server.js CHANGED Viewed

@@ -257,8 +257,13 @@ export async function startServer() {
           ...(agentFormat && { format: agentFormat, tokenBudget }),
         });
-        // Agent mode: return the packaged response directly as JSON
+        // Agent mode: return the packaged response directly as JSON.
+        // Inject server-side repo identity so callers can prove which repo
+        // produced these results (defends against multi-repo bench reusing
+        // a stale daemon — see eval/agent-read-workflows/run-bench.js).
         if (searchResult.format === 'agent') {
+          searchResult.serverProjectRoot = searcher.projectRoot || null;
+          searchResult.serverPid = process.pid;
           res.writeHead(200, { 'Content-Type': 'application/json' });
           res.end(JSON.stringify(searchResult));
         } else {
@@ -286,12 +291,22 @@ export async function startServer() {
       }
     } else if (req.method === 'GET' && reqUrl === '/health') {
       const status = initError ? 'failed' : (serverReady ? 'ready' : 'starting');
+      // Repo identity — harness uses these to verify the daemon serves the
+      // expected repo, not a leftover from a previous benchmark subprocess.
+      // We resolve the path so symlinks/relative differences are normalised.
+      const rawProjectRoot = searcher.projectRoot || null;
+      let resolvedProjectRoot = null;
+      try { if (rawProjectRoot) resolvedProjectRoot = (await import('path')).default.resolve(rawProjectRoot); } catch { /* */ }
       res.writeHead(200, { 'Content-Type': 'application/json' });
       res.end(JSON.stringify({
         status,
         warm: serverReady,
         pid: process.pid,
         uptimeSec: Math.round(process.uptime()),
+        projectRoot: rawProjectRoot,
+        resolvedProjectRoot,
+        codebaseDbPath: searcher.codebaseDbPath || null,
+        initialized: serverReady && !initError,
         init: {
           startedAt: new Date(initStartedAt).toISOString(),
           elapsedMs: initTimeMs ?? (Date.now() - initStartedAt),
@@ -481,6 +496,137 @@ export async function queryServer(query, options = {}) {
   });
 }
+/**
+ * Fetch /health from the running daemon. Returns the parsed body, or null if
+ * the daemon is unreachable / replies non-200.
+ *
+ * Use this (not isServerRunning alone) when you need repo identity to make a
+ * decision — e.g., the agent-bench harness must know which repo the daemon
+ * is currently serving so it can refuse cross-repo contamination.
+ */
+export async function getServerHealth({ timeoutMs = 1000 } = {}) {
+  try {
+    const http = await import('http');
+    return await new Promise((resolve) => {
+      const req = http.get(`http://localhost:${SEARCH_SERVER_PORT}/health`, (res) => {
+        let payload = '';
+        res.on('data', chunk => { payload += chunk; });
+        res.on('end', () => {
+          if (res.statusCode !== 200) { resolve(null); return; }
+          try { resolve(JSON.parse(payload)); }
+          catch { resolve(null); }
+        });
+      });
+      req.on('error', () => resolve(null));
+      req.setTimeout(timeoutMs, () => { req.destroy(); resolve(null); });
+    });
+  } catch {
+    return null;
+  }
+}
+/**
+ * Send /stop to the running daemon (Unix-socket only — TCP is forbidden).
+ * Returns true if the request reached the daemon (200 reply or connection
+ * closed by the dying server). Caller is expected to poll until the socket
+ * disappears or wait a short cool-down.
+ */
+export async function stopServer({ timeoutMs = 5000 } = {}) {
+  try {
+    const http = await import('http');
+    return await new Promise((resolve) => {
+      const req = http.request({
+        socketPath: SEARCH_SERVER_SOCKET, path: '/stop', method: 'GET',
+      }, (res) => {
+        res.on('data', () => {});
+        res.on('end', () => resolve(true));
+      });
+      // The server may close the socket abruptly as it exits before sending an
+      // end-of-response. Treat that as success too.
+      req.on('error', (err) => {
+        const msg = (err && err.code) || '';
+        if (msg === 'ECONNRESET' || msg === 'EPIPE' || msg === 'ENOENT') resolve(true);
+        else resolve(false);
+      });
+      req.setTimeout(timeoutMs, () => { req.destroy(); resolve(false); });
+      req.end();
+    });
+  } catch {
+    return false;
+  }
+}
+/**
+ * Best-effort wait for the daemon to exit. Returns true once /health stops
+ * answering (within timeoutMs); false otherwise.
+ */
+export async function waitForServerExit({ timeoutMs = 8000, intervalMs = 200 } = {}) {
+  const deadline = Date.now() + timeoutMs;
+  while (Date.now() < deadline) {
+    if (!(await isServerRunning())) return true;
+    await new Promise(r => setTimeout(r, intervalMs));
+  }
+  return false;
+}
+/**
+ * Ensure the warm daemon serves the requested projectRoot. If a daemon is
+ * already running with a different projectRoot, stop it first, then re-spawn.
+ *
+ * Returns:
+ *   { ok: true,  health, action: 'reused'|'spawned'|'restarted' }
+ *   { ok: false, reason, health? }
+ *
+ * Used by the agent-bench harness to fail closed against cross-repo
+ * contamination (see eval/agent-read-workflows/run-bench.js warmup phase).
+ */
+export async function ensureDaemonForProjectRoot(expectedProjectRoot, {
+  timeoutMs = 60000, intervalMs = 500,
+} = {}) {
+  const path = (await import('path')).default;
+  const expected = path.resolve(expectedProjectRoot);
+  let action = null;
+  let health = await getServerHealth();
+  if (health && health.resolvedProjectRoot && health.resolvedProjectRoot === expected) {
+    return { ok: true, health, action: 'reused' };
+  }
+  if (health && health.resolvedProjectRoot && health.resolvedProjectRoot !== expected) {
+    // Wrong-repo daemon. Stop it and respawn with the correct env.
+    await stopServer();
+    const exited = await waitForServerExit();
+    if (!exited) {
+      return { ok: false, reason: 'previous-daemon-failed-to-exit', health };
+    }
+    action = 'restarted';
+  } else {
+    action = 'spawned';
+  }
+  // Spawn detached daemon. autoSpawnServer inherits env, so the caller must
+  // already have SWEET_SEARCH_PROJECT_ROOT set to expectedProjectRoot.
+  if (process.env.SWEET_SEARCH_PROJECT_ROOT) {
+    const envResolved = path.resolve(process.env.SWEET_SEARCH_PROJECT_ROOT);
+    if (envResolved !== expected) {
+      return {
+        ok: false,
+        reason: `caller env SWEET_SEARCH_PROJECT_ROOT=${envResolved} differs from expected=${expected}`,
+      };
+    }
+  }
+  await autoSpawnServer();
+  const deadline = Date.now() + timeoutMs;
+  while (Date.now() < deadline) {
+    health = await getServerHealth();
+    if (health && health.resolvedProjectRoot === expected && (health.warm === true || health.status === 'ready')) {
+      return { ok: true, health, action };
+    }
+    await new Promise(r => setTimeout(r, intervalMs));
+  }
+  return { ok: false, reason: 'daemon-did-not-become-ready-with-expected-root', health };
+}
 export async function isServerRunning() {
   try {
     const http = await import('http');

package/core/search/sweet-search.js CHANGED Viewed

@@ -22,7 +22,7 @@ import { BinaryHNSWIndex } from '../vector-store/binary-hnsw-index.js';
 import { Reranker } from '../ranking/flashrank.js';
 import { LateInteractionIndex } from '../ranking/late-interaction-index.js';
 import { resolveSearchRerankPolicy } from '../ranking/late-interaction-policy.js';
-import { readPersistedLiPolicy } from '../infrastructure/index.js';
+import { applyPersistedLiModel, readPersistedLiPolicy } from '../infrastructure/index.js';
 import { getEmbedding, getBinaryEmbedding, truncateForHNSW, int8CosineSimilarity, warmup as warmupEmbedding, isWarm, registerAutoPersistOnExit } from '../embedding/embedding-service.js';
 import { FloatVectorStore, getFloatStorePath } from '../vector-store/float-vector-store.js';
 import { recordQueryTelemetry } from '../embedding/embedding-cache.js';
@@ -91,6 +91,14 @@ export class SweetSearch {
   constructor(options = {}) {
     const projectRoot = options.projectRoot || process.env.SWEET_SEARCH_PROJECT_ROOT || process.cwd();
     this.projectRoot = projectRoot;
+    // Honor the user's persisted `runtime.li.model` choice from
+    // `.sweet-search/config.json` BEFORE we read `LATE_INTERACTION_CONFIG.model`
+    // for activeConfigModel below or any downstream consumer (encodeQuery,
+    // LateInteractionIndex header check, native LI loader, CoreML cascade
+    // dispatcher). Without this an edge-only init silently activates the
+    // standard model path on every search. Env var still wins; see
+    // applyPersistedLiModel for the full precedence ladder.
+    this._liModelApply = applyPersistedLiModel(projectRoot);
     const projectConfig = loadProjectConfig(projectRoot);
     const projectCascade = projectConfig.cascade || {};
     const envOrProject = (envKey, cascadeKey, configKey) =>
@@ -405,7 +413,12 @@ export class SweetSearch {
     let searchMode;
     if (mode === 'auto') {
       searchMode = routing.mode;
-      stats.routing = { mode: routing.mode, confidence: routing.confidence, latency_us: routing.routingLatency_us };
+      stats.routing = {
+        mode: routing.mode,
+        confidence: routing.confidence,
+        latency_us: routing.routingLatency_us,
+        method: routing.method,
+      };
     } else {
       searchMode = mode;
       stats.routing = {

package/core/start-server.js CHANGED Viewed

@@ -1,6 +1,17 @@
 #!/usr/bin/env node
 // Minimal server-start entry point — avoids the circular import in sweet-search.js.
-// Used by the Rust CLI's auto_start_server() to spawn the background server.
+// Used by the Rust CLI's auto_start_server() to spawn the background server,
+// and by the SessionStart daemon-prewarm hook (core/search/session-daemon-prewarm.mjs)
+// when Claude Code opens a new session.
-import { startServer } from './search/search-server.js';
+// Apply the user's persisted `runtime.li.model` from .sweet-search/config.json
+// BEFORE importing search-server (which transitively imports session-warmup,
+// which gates warmup steps on `LATE_INTERACTION_CONFIG.enabled` and triggers a
+// warmup search using `LATE_INTERACTION_CONFIG.model`). Without this, an
+// edge-only init still spawns a daemon that prewarms the standard model.
+const projectRoot = process.env.SWEET_SEARCH_PROJECT_ROOT || process.cwd();
+const { applyPersistedLiModel } = await import('./infrastructure/init-config.js');
+applyPersistedLiModel(projectRoot);
+const { startServer } = await import('./search/search-server.js');
 await startServer();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sweet-search",
-  "version": "2.5.1",
+  "version": "2.5.2",
   "description": "Sweet Search - SOTA Hybrid Code Search Engine with WASM CatBoost Query Router, Semantic/Lexical/Structural Search, and Multilingual Support",
   "type": "module",
   "main": "core/search/sweet-search.js",
@@ -142,12 +142,12 @@
     "vitest": "^4.0.16"
   },
   "optionalDependencies": {
-    "@sweet-search/native-darwin-arm64": "2.5.1",
-    "@sweet-search/native-darwin-x64": "2.5.1",
-    "@sweet-search/native-linux-arm64-gnu": "2.5.1",
-    "@sweet-search/native-linux-arm64-gnu-cuda": "2.5.1",
-    "@sweet-search/native-linux-x64-gnu": "2.5.1",
-    "@sweet-search/native-linux-x64-gnu-cuda": "2.5.1"
+    "@sweet-search/native-darwin-arm64": "2.5.2",
+    "@sweet-search/native-darwin-x64": "2.5.2",
+    "@sweet-search/native-linux-arm64-gnu": "2.5.2",
+    "@sweet-search/native-linux-arm64-gnu-cuda": "2.5.2",
+    "@sweet-search/native-linux-x64-gnu": "2.5.2",
+    "@sweet-search/native-linux-x64-gnu-cuda": "2.5.2"
   },
   "engines": {
     "node": ">=18.0.0"

package/scripts/uninstall.js CHANGED Viewed

@@ -132,15 +132,23 @@ function getModelCacheDirs(initConfig) {
  * return an empty array — uninstall doesn't print a "removing 0 B"
  * line.
  */
-function getCoremlCascadeRemovals() {
+export function getCoremlCascadeRemovals() {
   const removals = [];
   try {
     const root = getCoremlCascadeRoot();
     if (existsSync(root)) {
       const state = getCoremlCascadeState();
+      // Sum across all advertised families — embed + standard LI + LI-edge.
+      // The earlier label only counted embed + standard LI (12 on the
+      // shipping spec) which contradicted init's "18 variants ready"
+      // (6 embed + 6 LI + 6 LI-edge). `liEdgeTotal` is 0 on hosts whose
+      // spec doesn't advertise the edge family, so older specs still
+      // collapse to the prior 12-count behaviour without ceremony.
+      const totalAll = state.embedTotal + state.liTotal + state.liEdgeTotal;
+      const presentAll = state.embedPresent + state.liPresent + state.liEdgePresent;
       const label = state.complete
-        ? `coreml cascade (${state.embedTotal + state.liTotal} variants complete)`
-        : `coreml cascade (${state.embedPresent + state.liPresent}/${state.embedTotal + state.liTotal} variants partial)`;
+        ? `coreml cascade (${totalAll} variants complete)`
+        : `coreml cascade (${presentAll}/${totalAll} variants partial)`;
       removals.push({ label, path: root, size: dirSize(root), type: 'coreml-cascade' });
     }
   } catch {
@@ -213,6 +221,70 @@ export function stopRunningDaemon({
   return result;
 }
+/**
+ * Remove the index-maintainer daemon hook init copied into
+ * `.claude/hooks/index-maintainer.mjs`. Only removes the file when it
+ * matches the bytes init shipped — never deletes a user-modified file
+ * we don't own. The marker is the source path: init does
+ * `copyFileSync(<pkg>/core/indexing/index-maintainer.mjs, dest)`, so
+ * we compare destination bytes to the package source.
+ *
+ * Returns `{ status, detail }`:
+ *   not-found  — file absent (nothing to do)
+ *   removed    — file removed (matched shipped bytes)
+ *   skipped    — file present but contents differ (user-modified) — left intact
+ *   dry-run    — found the file but skipped the delete
+ *   error      — rm or read failed; uninstall continues
+ */
+export function removeIndexMaintainerHook(projectRoot, { dryRun = false } = {}) {
+  const hookPath = join(projectRoot, '.claude', 'hooks', 'index-maintainer.mjs');
+  if (!existsSync(hookPath)) {
+    return { status: 'not-found', detail: 'no .claude/hooks/index-maintainer.mjs' };
+  }
+  // Only remove when the bytes match the version init shipped — refuses to
+  // delete a hook the user has customized. Failing the byte compare is a
+  // soft skip, not an error.
+  const shippedPath = join(PACKAGE_ROOT, 'core', 'indexing', 'index-maintainer.mjs');
+  let bytesMatch = false;
+  try {
+    if (existsSync(shippedPath)) {
+      const a = readFileSync(hookPath);
+      const b = readFileSync(shippedPath);
+      bytesMatch = a.length === b.length && a.equals(b);
+    }
+  } catch {
+    // Read errored on either side — treat as "don't remove, surface the
+    // file path so the user can clean up manually if they want to".
+    return { status: 'skipped', detail: `cannot compare bytes (${hookPath})` };
+  }
+  if (!bytesMatch) {
+    return {
+      status: 'skipped',
+      detail: `${hookPath} differs from shipped version — leaving in place (delete manually if intended)`,
+    };
+  }
+  if (dryRun) {
+    return { status: 'dry-run', detail: hookPath };
+  }
+  try {
+    unlinkSync(hookPath);
+    // Best-effort: prune the parent .claude/hooks/ if it's now empty (we
+    // own the file, not the directory; only delete if WE made it empty).
+    try {
+      const parent = dirname(hookPath);
+      const entries = readdirSync(parent);
+      if (entries.length === 0) rmdirSync(parent);
+    } catch { /* ignore — sibling files exist or rmdir failed */ }
+    return { status: 'removed', detail: hookPath };
+  } catch (err) {
+    return { status: 'error', detail: err.message };
+  }
+}
 /**
  * Remove the sweet-search /sweet-index skill from `.claude/skills/sweet-index/`.
  * Only removes the directory we created — leaves `.claude/skills/` and `.claude/`
@@ -342,6 +414,39 @@ export function removePrewarmSessionStartHook(projectRoot, { dryRun = false } =
   return { status: 'removed', detail: `spliced out ${sessionStart.length - filtered.length} entry` };
 }
+// ---------------------------------------------------------------------------
+// Optional native package list (derived from package.json)
+// ---------------------------------------------------------------------------
+/**
+ * Return the list of `@sweet-search/native-*` packages declared as
+ * `optionalDependencies` in package.json. `--purge` walks this list so
+ * additions (e.g. CUDA variants) are picked up automatically without
+ * having to keep two hand-maintained lists in sync.
+ *
+ * Falls back to a hard-coded list if package.json is unreadable, so a
+ * partial install still gets best-effort purge coverage.
+ */
+export function getOptionalNativePackageNames() {
+  try {
+    const pkgPath = join(PACKAGE_ROOT, 'package.json');
+    const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
+    const deps = pkg.optionalDependencies || {};
+    const out = Object.keys(deps).filter((n) => n.startsWith('@sweet-search/'));
+    if (out.length > 0) return out;
+  } catch { /* fall through to baseline */ }
+  // Baseline keeps the prior behaviour PLUS the CUDA variants that were
+  // missing from the pre-Phase-7 hand-maintained list.
+  return [
+    '@sweet-search/native-darwin-arm64',
+    '@sweet-search/native-darwin-x64',
+    '@sweet-search/native-linux-arm64-gnu',
+    '@sweet-search/native-linux-arm64-gnu-cuda',
+    '@sweet-search/native-linux-x64-gnu',
+    '@sweet-search/native-linux-x64-gnu-cuda',
+  ];
+}
 // ---------------------------------------------------------------------------
 // Help text
 // ---------------------------------------------------------------------------
@@ -369,6 +474,8 @@ What gets removed:
     artifacts AND the sibling .mlmodelc compiled cache files next to
     each variant. Skipped by --keep-models.
   - .claude/skills/sweet-index/ (the per-project /sweet-index skill copy)
+  - .claude/hooks/index-maintainer.mjs (init-installed). User-modified
+    copies are detected via a byte-compare and left in place.
   - daemon-prewarm SessionStart entry inside .claude/settings.json
 What is NOT removed:
@@ -436,8 +543,15 @@ export async function runUninstall(args) {
   const skillPreview = removeSweetIndexSkill(projectRoot, { dryRun: true });
   const hasSkillEntry = skillPreview.status === 'dry-run';
+  // Check for the index-maintainer daemon hook init copies into
+  // `.claude/hooks/index-maintainer.mjs`.  Same dry-run pattern.
+  const indexMaintainerPreview = removeIndexMaintainerHook(projectRoot, { dryRun: true });
+  const hasIndexMaintainerHook = indexMaintainerPreview.status === 'dry-run';
+  const indexMaintainerSkippedReason =
+    indexMaintainerPreview.status === 'skipped' ? indexMaintainerPreview.detail : null;
   // Nothing to remove?
-  if (removals.length === 0 && !hasHookEntry && !hasSkillEntry) {
+  if (removals.length === 0 && !hasHookEntry && !hasSkillEntry && !hasIndexMaintainerHook) {
     console.log('Nothing to remove — Sweet Search is not initialized in this project.');
     return;
   }
@@ -457,6 +571,11 @@ export async function runUninstall(args) {
   if (hasSkillEntry) {
     console.log(`    /sweet-index skill (.claude/skills/sweet-index/)`);
   }
+  if (hasIndexMaintainerHook) {
+    console.log(`    index-maintainer hook (.claude/hooks/index-maintainer.mjs)`);
+  } else if (indexMaintainerSkippedReason) {
+    console.log(`    [skipped] ${indexMaintainerSkippedReason}`);
+  }
   console.log(`  Total: ${formatBytes(totalBytes)}`);
   if (parsed.keepModels) {
     console.log('  Model cache: kept (--keep-models)');
@@ -472,6 +591,12 @@ export async function runUninstall(args) {
     if (drySkill.status === 'dry-run') {
       console.log(`  Would also remove: /sweet-index skill (${drySkill.detail})`);
     }
+    const dryMaintainer = removeIndexMaintainerHook(projectRoot, { dryRun: true });
+    if (dryMaintainer.status === 'dry-run') {
+      console.log(`  Would also remove: index-maintainer hook (${dryMaintainer.detail})`);
+    } else if (dryMaintainer.status === 'skipped') {
+      console.log(`  Would skip: index-maintainer hook — ${dryMaintainer.detail}`);
+    }
     console.log('Dry run — nothing was removed.');
     return;
   }
@@ -540,6 +665,21 @@ export async function runUninstall(args) {
   }
   // 'not-found' and 'dry-run' are silent in the main output.
+  // Reverse the index-maintainer daemon hook init copied into
+  // .claude/hooks/index-maintainer.mjs. Bytes-match check inside the
+  // helper guarantees we never delete a user-customised file.
+  const indexMaintainerResult = removeIndexMaintainerHook(projectRoot, { dryRun: parsed.dryRun });
+  if (indexMaintainerResult.status === 'removed') {
+    console.log(`  Removed: index-maintainer hook (${indexMaintainerResult.detail})`);
+    removed++;
+  } else if (indexMaintainerResult.status === 'skipped') {
+    console.log(`  Kept: index-maintainer hook — ${indexMaintainerResult.detail}`);
+    kept++;
+  } else if (indexMaintainerResult.status === 'error') {
+    console.log(`  Failed to remove index-maintainer hook: ${indexMaintainerResult.detail}`);
+    kept++;
+  }
   // Stop any daemon that an earlier SessionStart hook spawned. Otherwise the
   // old daemon keeps running and holding the socket after uninstall, which
   // surprises users. Never throws — `stopRunningDaemon` swallows every error.
@@ -556,11 +696,17 @@ export async function runUninstall(args) {
     console.log('');
     console.log('  Purging npm packages...');
     try {
-      execSync('npm uninstall sweet-search @sweet-search/native-darwin-arm64 @sweet-search/native-darwin-x64 @sweet-search/native-linux-x64-gnu @sweet-search/native-linux-arm64-gnu 2>/dev/null || true', {
+      const pkgs = ['sweet-search', ...getOptionalNativePackageNames()];
+      // Use shell-form so non-installed packages don't abort the whole
+      // command (npm exits non-zero per missing pkg). The OR-true keeps
+      // the script alive across npm exit codes from a partially-installed
+      // host (e.g. a Linux box without the darwin-* packages).
+      const cmd = `npm uninstall ${pkgs.join(' ')} 2>/dev/null || true`;
+      execSync(cmd, {
         cwd: projectRoot,
         stdio: ['pipe', 'pipe', 'pipe'],
       });
-      console.log('  npm packages removed.');
+      console.log(`  npm packages removed (${pkgs.length} candidates).`);
     } catch {
       console.log('  npm uninstall failed (packages may not be installed).');
     }