npm - sweet-search - Versions diffs - 2.5.13 → 2.6.0 - Mend

sweet-search 2.5.13 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/README.md +36 -9
package/core/cli.js +41 -3
package/core/embedding/embedding-local-model.js +106 -10
package/core/embedding/embedding-service.js +59 -1
package/core/embedding/model-client.mjs +257 -0
package/core/embedding/model-server.mjs +217 -0
package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
package/core/incremental-indexing/application/operator-cli.mjs +14 -5
package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
package/core/incremental-indexing/application/reconciler.mjs +87 -15
package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
package/core/indexing/artifact-builder.js +1 -1
package/core/indexing/dedup/dedup-phase.js +36 -17
package/core/indexing/dedup/exemplar-selector.js +5 -0
package/core/indexing/index-codebase-v21.js +37 -14
package/core/indexing/index-maintainer.mjs +337 -6
package/core/indexing/indexer-ann.js +27 -434
package/core/indexing/indexer-build.js +30 -14
package/core/indexing/indexer-manifest.js +0 -3
package/core/indexing/indexer-phases.js +101 -25
package/core/indexing/maintainer-launcher.mjs +22 -0
package/core/indexing/maintainer-watcher.mjs +397 -0
package/core/indexing/os-priority.mjs +160 -0
package/core/indexing/rss-budget.mjs +425 -0
package/core/indexing/streaming-vectors.js +450 -0
package/core/infrastructure/config/platform.js +14 -10
package/core/infrastructure/onnx-session-utils.js +37 -0
package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
package/core/ranking/late-interaction-index.js +58 -7
package/core/search/daemon-registry.js +199 -0
package/core/search/search-read-semantic.js +9 -3
package/core/search/search-semantic.js +6 -29
package/core/search/search-server.js +527 -27
package/core/search/session-daemon-prewarm.mjs +110 -1
package/core/search/sweet-search.js +0 -38
package/core/vector-store/binary-hnsw-index.js +692 -78
package/core/vector-store/index.js +1 -4
package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
package/eval/agent-read-workflows/bin/ss-read +2 -0
package/mcp/tool-handlers.js +1 -2
package/package.json +11 -8
package/scripts/uninstall.js +2 -0
package/core/vector-store/hnsw-index.js +0 -751

package/core/ranking/late-interaction-index.js CHANGED Viewed

@@ -376,6 +376,21 @@ export class LateInteractionIndex {
     this._segmentSize = options.segmentSize || LI_SEGMENT_SIZE;
     this._docSegmentPositions = new Map(); // doc id -> { segmentPath, docIndex }
     this._staleBitmapCache = new Map(); // segment path -> { mtimeMs, size, bitmap }
+    // Bounded build mode (Phase C completion). When `buildEvict` is set,
+    // _flushSegment() drops each flushed segment's per-token slabs from
+    // `this.documents` so peak indexing memory stays O(one segment) instead of
+    // O(all docs) — the regression that let large repos accumulate the entire
+    // per-token corpus in the heap. Only safe during a from-scratch build (no
+    // search reads, no rewrite-from-documents save path). The fast-path save()
+    // writes the manifest from the already-flushed segment files, so it never
+    // needs the evicted docs back. A lightweight id set keeps alias-pointer
+    // registration valid after the exemplar's tokens are gone, and running
+    // doc/token totals keep getStats() + the save() doc-count accurate.
+    this._evictMode = !!options.buildEvict;
+    this._evictedDocs = 0;
+    this._evictedTokens = 0;
+    this._addedIds = this._evictMode ? new Set() : null;
   }
   /**
@@ -406,6 +421,21 @@ export class LateInteractionIndex {
     this._finalIndexPath = finalIndexPath;
     this._segments = [];
     this._currentSegment = new Map();
+    // Reset bounded-build counters for the fresh staged save.
+    this._evictedDocs = 0;
+    this._evictedTokens = 0;
+    if (this._addedIds) this._addedIds.clear();
+  }
+  /**
+   * True if `id` is (or was) a document in this build — checks both the live
+   * `documents` map and, in bounded build mode, the lightweight id set that
+   * survives segment eviction. Alias-pointer registration uses this to verify
+   * an exemplar exists even after its per-token slab has been flushed+evicted.
+   */
+  hasDoc(id) {
+    if (this.documents.has(id)) return true;
+    return this._addedIds ? this._addedIds.has(id) : false;
   }
   /**
@@ -568,6 +598,7 @@ export class LateInteractionIndex {
     this.documents.set(id, docEntry);
     if (docEntry.minArray) this._hasPerTokenQuant = true;
     this._currentSegment.set(id, docEntry);
+    if (this._evictMode) this._addedIds.add(id);
     // Flush segment to disk when full — releases memory for completed segments
     if (this._currentSegment.size >= this._segmentSize) {
@@ -595,6 +626,18 @@ export class LateInteractionIndex {
     await this._writeSegmentFile(segPath, this._currentSegment);
     this._segments.push({ path: segPath, count: this._currentSegment.size });
+    // Bounded build mode: drop this segment's per-token slabs from the live
+    // documents map now that they're durable on disk. Keeps peak heap O(one
+    // segment). The id set + running totals preserve everything later stages
+    // need (alias validity via hasDoc(), doc/token counts for save()+stats).
+    if (this._evictMode) {
+      for (const [id, doc] of this._currentSegment) {
+        this.documents.delete(id);
+        this._evictedDocs++;
+        this._evictedTokens += doc.numTokens || 0;
+      }
+    }
     // Release segment memory — these docs will be reloaded from segments during load()
     this._currentSegment = new Map();
   }
@@ -1594,11 +1637,16 @@ export class LateInteractionIndex {
   async save() {
     await fs.mkdir(path.dirname(this.indexPath), { recursive: true });
+    // Total doc count including any flushed-and-evicted segments (bounded build
+    // mode). In normal mode `_evictedDocs` is 0, so this is byte-identical to
+    // `this.documents.size`.
+    const effectiveTotal = this.documents.size + this._evictedDocs;
     // Use segmented format when the doc count exceeds one segment.
     // Always rewrite ALL segments from this.documents (the authoritative
     // state) — never reuse stale segment files from a previous load,
     // because documents may have been removed since then.
-    const useSegmented = this.documents.size >= this._segmentSize;
+    const useSegmented = effectiveTotal >= this._segmentSize;
     if (useSegmented) {
       if (!this._loadedExisting) {
@@ -1607,7 +1655,7 @@ export class LateInteractionIndex {
         }
         const flushedCount = this._segments.reduce((sum, segment) => sum + segment.count, 0);
-        if (flushedCount === this.documents.size && this._segments.length > 0) {
+        if (flushedCount === effectiveTotal && this._segments.length > 0) {
           // Staging-aware segment directory. _segmentDir was pre-seeded by
           // resetForSave() when staging; otherwise derive from indexPath.
           const segDir = this._segmentDir || (this.indexPath + '.segments');
@@ -1623,7 +1671,7 @@ export class LateInteractionIndex {
             poolFactor: this.poolFactor,
             whtSeed: this.whtSeed || 0,
             whtOrdering: this.whtOrdering,
-            totalDocuments: this.documents.size,
+            totalDocuments: effectiveTotal,
             segments: this._segments.map((segment) => ({
               path: path.basename(segment.path),
               count: segment.count,
@@ -2362,13 +2410,16 @@ export class LateInteractionIndex {
    * Get index statistics
    */
   getStats() {
-    let totalTokens = 0;
+    let totalTokens = this._evictedTokens || 0;
     for (const doc of this.documents.values()) {
       totalTokens += doc.numTokens;
     }
-    const avgTokens = this.documents.size > 0 ?
-      (totalTokens / this.documents.size).toFixed(1) : 0;
+    // In bounded build mode, flushed docs are evicted from `documents` but their
+    // counts live in `_evictedDocs`/`_evictedTokens` so stats stay accurate.
+    const docCount = this.documents.size + (this._evictedDocs || 0);
+    const avgTokens = docCount > 0 ?
+      (totalTokens / docCount).toFixed(1) : 0;
     let bytesPerToken;
     if (this.quantBits === 4) {
@@ -2381,7 +2432,7 @@ export class LateInteractionIndex {
     const estimatedMB = (totalTokens * bytesPerToken / 1024 / 1024).toFixed(2);
     return {
-      documents: this.documents.size,
+      documents: docCount,
       totalTokens,
       avgTokensPerDoc: avgTokens,
       tokenDim: this.tokenDim,

package/core/search/daemon-registry.js ADDED Viewed

@@ -0,0 +1,199 @@
+/**
+ * Best-effort resident search-daemon registry (footprint cap support).
+ *
+ * Backs the optional SWEET_SEARCH_MAX_DAEMONS cap (Part 2 of the daemon
+ * footprint work). Each warm search daemon, WHEN the cap is opted into,
+ * upserts a single entry describing itself into one shared JSON file and
+ * refreshes it on a coarse timer. A daemon enforcing the cap reads the file,
+ * prunes entries whose process is gone or whose socket no longer answers, and
+ * (when more daemons are resident than the cap allows) sends /stop to the
+ * least-recently-active peers — never itself, never the most-recently-active.
+ *
+ * Properties:
+ *   - ONLY search daemons ever call this module. The index maintainer
+ *     (core/indexing/*) never imports it, so a maintainer can never be
+ *     enumerated, listed, or signalled through the registry.
+ *   - Every operation is best-effort: a redundant eviction is harmless and a
+ *     read/write race resolves to "do nothing this tick". All I/O is
+ *     try/caught; writes are atomic (tmp + rename) so a crash mid-write never
+ *     leaves a torn file.
+ *   - lastActivityMs stores REAL query activity (the daemon's /search and
+ *     /read-semantic wall-clock), so "least-recently-active" == least-recently
+ *     queried. The actively-used repo's daemon is never evicted by an
+ *     equally-or-less-recently-active peer; the one residual race is a
+ *     newly-STARTED peer, which is freshest-by-construction (its startedAt
+ *     seeds lastActivityMs) and may evict a recently-active-but-stale-stamped
+ *     peer within one registry-refresh interval, because the registry reflects
+ *     activity only as of each daemon's coarse registryTouchSelf tick.
+ */
+import fs from 'node:fs/promises';
+import { readFileSync } from 'node:fs';
+import http from 'node:http';
+const DEFAULT_REGISTRY_PATH = '/tmp/sweet-search-daemons.json';
+/** Path to the shared registry file (override via SWEET_SEARCH_DAEMON_REGISTRY for tests). */
+export function registryPath(env = process.env) {
+  return env.SWEET_SEARCH_DAEMON_REGISTRY || DEFAULT_REGISTRY_PATH;
+}
+/**
+ * Is a process with this pid alive right now? Treats EPERM (process owned by
+ * another user) as alive — standard `kill -0` probe.
+ */
+export function pidAlive(pid) {
+  const n = Number(pid);
+  if (!Number.isInteger(n) || n <= 0) return false;
+  try {
+    process.kill(n, 0);
+    return true;
+  } catch (err) {
+    return err && err.code === 'EPERM';
+  }
+}
+/** Read + parse the registry, returning a { "<pid>": entry } map ({} on any error). */
+export async function readRegistry(env = process.env) {
+  try {
+    const raw = await fs.readFile(registryPath(env), 'utf-8');
+    const parsed = JSON.parse(raw);
+    const daemons = parsed && typeof parsed === 'object' ? parsed.daemons : null;
+    return daemons && typeof daemons === 'object' ? daemons : {};
+  } catch {
+    return {};
+  }
+}
+/** Atomically persist the daemon map (tmp + rename). Best-effort: swallows errors. */
+async function writeRegistryAtomic(daemons, env = process.env) {
+  const target = registryPath(env);
+  // Per-pid tmp suffix so two daemons writing concurrently never collide on the
+  // tmp file; the rename is atomic so the reader always sees a whole document.
+  const tmp = `${target}.${process.pid}.tmp`;
+  try {
+    await fs.writeFile(tmp, JSON.stringify({ daemons }), { mode: 0o600 });
+    await fs.rename(tmp, target);
+    return true;
+  } catch {
+    try { await fs.unlink(tmp); } catch { /* ignore */ }
+    return false;
+  }
+}
+/** Insert/replace this daemon's entry. */
+export async function upsertSelf(entry, env = process.env) {
+  const daemons = await readRegistry(env);
+  daemons[String(entry.pid)] = { ...entry };
+  return writeRegistryAtomic(daemons, env);
+}
+/** Refresh this daemon's lastActivityMs (no-op if its entry vanished). */
+export async function touchSelf(pid, lastActivityMs, env = process.env) {
+  const daemons = await readRegistry(env);
+  const key = String(pid);
+  if (!daemons[key]) return false;
+  daemons[key].lastActivityMs = lastActivityMs;
+  return writeRegistryAtomic(daemons, env);
+}
+/** Remove this daemon's entry (called on graceful shutdown). */
+export async function removeSelf(pid, env = process.env) {
+  const daemons = await readRegistry(env);
+  const key = String(pid);
+  if (!(key in daemons)) return false;
+  delete daemons[key];
+  return writeRegistryAtomic(daemons, env);
+}
+/**
+ * GET /health over an explicit unix socket. Resolves true on a 200, false
+ * otherwise (unreachable, non-200, timeout). Mirrors getServerHealth's probe
+ * but parameterised by socket so we can check peers, not just our own.
+ */
+export function socketHealthy(socketPath, timeoutMs = 500) {
+  return new Promise((resolve) => {
+    try {
+      const req = http.request({ socketPath, path: '/health', method: 'GET' }, (res) => {
+        res.on('data', () => {});
+        res.on('end', () => resolve(res.statusCode === 200));
+      });
+      req.on('error', () => resolve(false));
+      req.setTimeout(timeoutMs, () => { req.destroy(); resolve(false); });
+      req.end();
+    } catch {
+      resolve(false);
+    }
+  });
+}
+/**
+ * Prune entries whose process is gone OR whose socket no longer answers
+ * /health, persist the pruned map, and return the surviving (live) entries.
+ *
+ * `probe` lets tests inject a synchronous/async liveness override; by default
+ * the registry uses pidAlive + socketHealthy. Best-effort throughout.
+ */
+export async function pruneAndList({ env = process.env, probe = null, timeoutMs = 500 } = {}) {
+  const daemons = await readRegistry(env);
+  const live = [];
+  const liveMap = {};
+  for (const [key, entry] of Object.entries(daemons)) {
+    if (!entry || typeof entry !== 'object') continue;
+    let ok;
+    if (probe) {
+      ok = await probe(entry);
+    } else {
+      ok = pidAlive(entry.pid) && await socketHealthy(entry.socketPath, timeoutMs);
+    }
+    if (ok) {
+      live.push(entry);
+      liveMap[key] = entry;
+    }
+  }
+  if (Object.keys(liveMap).length !== Object.keys(daemons).length) {
+    await writeRegistryAtomic(liveMap, env);
+  }
+  return live;
+}
+/**
+ * Pick up to `count` eviction targets: the least-recently-active peers that are
+ * NOT self AND strictly less-recently-active than self, sorted oldest-first.
+ *
+ * The "older than self" gate is what makes CONCURRENT enforcement safe: every
+ * resident daemon runs this independently, but a daemon only ever sheds peers
+ * less active than itself — never itself, never a more-recently-active peer. So
+ * the union of all daemons' evictions is exactly the surplus (the oldest
+ * live.length-cap daemons): the newest daemon alone already targets precisely
+ * that set, and every other daemon targets a subset of it. The actively-used
+ * repo's daemon (freshest lastActivityMs) is therefore never evicted by an
+ * equally-or-less-recently-active peer — though a newly-started peer, freshest
+ * by construction, may evict it within one registry-refresh interval before
+ * its next registryTouchSelf tick re-stamps it. The cap converges without
+ * over-shooting below it.
+ *
+ * When self is absent from the list (e.g. an unregistered caller, or tests),
+ * the gate falls back to "any non-self", i.e. plain least-recently-active.
+ */
+export function selectEvictionTargets(liveEntries, selfPid, count) {
+  if (!Array.isArray(liveEntries) || count <= 0) return [];
+  const selfKey = String(selfPid);
+  const self = liveEntries.find((e) => e && String(e.pid) === selfKey);
+  const cutoff = self ? (self.lastActivityMs ?? 0) : Infinity;
+  return liveEntries
+    .filter((e) => e && String(e.pid) !== selfKey && (e.lastActivityMs ?? 0) < cutoff)
+    .sort((a, b) => (a.lastActivityMs ?? 0) - (b.lastActivityMs ?? 0))
+    .slice(0, count);
+}
+/** Synchronous registry read (used only by diagnostics/tests). */
+export function readRegistrySync(env = process.env) {
+  try {
+    const parsed = JSON.parse(readFileSync(registryPath(env), 'utf-8'));
+    const daemons = parsed && typeof parsed === 'object' ? parsed.daemons : null;
+    return daemons && typeof daemons === 'object' ? daemons : {};
+  } catch {
+    return {};
+  }
+}

package/core/search/search-read-semantic.js CHANGED Viewed

@@ -445,9 +445,9 @@ function _scoreSymbol(chunks, queryTerms, queryRaw) {
   return scores;
 }
-async function _scoreLateInteraction(chunks, query, projectRoot) {
+async function _scoreLateInteraction(chunks, query, projectRoot, lateInteractionIndexOverride = null) {
   if (chunks.length === 0) return { scores: new Map(), ran: false };
-  const liIndex = await _getLateInteractionIndex(projectRoot);
+  const liIndex = lateInteractionIndexOverride || await _getLateInteractionIndex(projectRoot);
   if (!liIndex) return { scores: new Map(), ran: false };
   // Only score chunks whose IDs actually appear in the LI index. Use the
@@ -625,6 +625,7 @@ function _fallbackSpanFromText(fileText, totalLines, maxChars) {
  * @param {number} [req.maxTokens] - Convenience: ~maxChars / 4
  * @param {string} [req.projectRoot]
  * @param {boolean} [req.verbose=false] - include timings + signal contributions
+ * @param {Object} [req._lateInteractionIndex] - private daemon injection; same-project index only
  * @returns {Promise<Object>}
  */
 async function _readSemanticUnpinned(req) {
@@ -686,7 +687,12 @@ async function _readSemanticUnpinned(req) {
   const tLex1 = performance.now();
   const tLi0 = performance.now();
-  const { scores: maxsimScores, ran: liRan } = await _scoreLateInteraction(chunks, req.query, projectRoot);
+  const { scores: maxsimScores, ran: liRan } = await _scoreLateInteraction(
+    chunks,
+    req.query,
+    projectRoot,
+    req._lateInteractionIndex || null,
+  );
   const tLi1 = performance.now();
   // Threshold gate on MaxSim — drop chunks whose LI score is too low. This

package/core/search/search-semantic.js CHANGED Viewed

@@ -617,16 +617,12 @@ export async function semanticSearchStandard(query, options = {}) {
   let candidates;
-  if (this.hasHnswIndex) {
-    // ADAPTIVE CANDIDATE SIZING: Reduce candidates for simple queries
-    const baseNumCandidates = rerank ? Math.max(k * 10, 100) : k;
-    const numCandidates = this.getAdaptiveCandidateCount(query, baseNumCandidates);
-    const hnswResult = await this.hnswIndex.search(queryEmbedding, numCandidates);
-    candidates = hnswResult.results;
-    this.log(`HNSW: ${hnswResult.latency_us}us for ${hnswResult.k} candidates (adaptive: ${numCandidates})`);
-  } else if (this.hasCodebaseIndex) {
-    // Fallback: O(N) scan from SQLite
+  // Non-3-stage ("Standard") path: the binary 3-stage cascade is the default
+  // (see semanticSearch dispatcher). This path is reached only when 3-stage is
+  // disabled or no binary index exists, and scans float vectors directly from
+  // SQLite. (The legacy usearch float-HNSW shortcut was removed.)
+  if (this.hasCodebaseIndex) {
+    // O(N) scan from SQLite
     candidates = await this.vectorScan(queryEmbedding, rerank ? 100 : k);
     this.log(`Vector scan: ${candidates.length} candidates`);
   } else {
@@ -790,22 +786,3 @@ export function shouldSkipRerank(scores, options = {}) {
   return { skip: false, reason: 'needs_rerank' };
 }
-/**
- * Adaptive candidate count based on query complexity
- */
-export function getAdaptiveCandidateCount(query, baseCount) {
-  const trimmed = query.trim();
-  // Very short queries (likely identifiers): use 50% of base
-  if (trimmed.length < 15) {
-    return Math.max(Math.floor(baseCount * 0.5), 20);
-  }
-  // Short queries without question words: use 75% of base
-  if (trimmed.length < 30 && !/\b(how|what|where|why|when|which)\b/i.test(trimmed)) {
-    return Math.max(Math.floor(baseCount * 0.75), 30);
-  }
-  // Complex queries (questions, long): use full base
-  return baseCount;
-}