npm - sweet-search - Versions diffs - 2.5.13 → 2.6.0 - Mend

sweet-search 2.5.13 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/README.md +36 -9
package/core/cli.js +41 -3
package/core/embedding/embedding-local-model.js +106 -10
package/core/embedding/embedding-service.js +59 -1
package/core/embedding/model-client.mjs +257 -0
package/core/embedding/model-server.mjs +217 -0
package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
package/core/incremental-indexing/application/operator-cli.mjs +14 -5
package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
package/core/incremental-indexing/application/reconciler.mjs +87 -15
package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
package/core/indexing/artifact-builder.js +1 -1
package/core/indexing/dedup/dedup-phase.js +36 -17
package/core/indexing/dedup/exemplar-selector.js +5 -0
package/core/indexing/index-codebase-v21.js +37 -14
package/core/indexing/index-maintainer.mjs +337 -6
package/core/indexing/indexer-ann.js +27 -434
package/core/indexing/indexer-build.js +30 -14
package/core/indexing/indexer-manifest.js +0 -3
package/core/indexing/indexer-phases.js +101 -25
package/core/indexing/maintainer-launcher.mjs +22 -0
package/core/indexing/maintainer-watcher.mjs +397 -0
package/core/indexing/os-priority.mjs +160 -0
package/core/indexing/rss-budget.mjs +425 -0
package/core/indexing/streaming-vectors.js +450 -0
package/core/infrastructure/config/platform.js +14 -10
package/core/infrastructure/onnx-session-utils.js +37 -0
package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
package/core/ranking/late-interaction-index.js +58 -7
package/core/search/daemon-registry.js +199 -0
package/core/search/search-read-semantic.js +9 -3
package/core/search/search-semantic.js +6 -29
package/core/search/search-server.js +527 -27
package/core/search/session-daemon-prewarm.mjs +110 -1
package/core/search/sweet-search.js +0 -38
package/core/vector-store/binary-hnsw-index.js +692 -78
package/core/vector-store/index.js +1 -4
package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
package/eval/agent-read-workflows/bin/ss-read +2 -0
package/mcp/tool-handlers.js +1 -2
package/package.json +11 -8
package/scripts/uninstall.js +2 -0
package/core/vector-store/hnsw-index.js +0 -751

package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs CHANGED Viewed

@@ -173,4 +173,76 @@ export function fts5Merge(db, tableName, pages) {
   db.prepare(`INSERT INTO ${tableName}(${tableName}, rank) VALUES('merge', ?)`).run(pages);
 }
+/**
+ * Derive the FTS5 merge page budget from the spare CPU budget for a tick, using
+ * the token-bucket policy in lever E.5:
+ *
+ *   - tick fast (elapsed < `fastMs`, default 500 ms) → a small merge step
+ *     (`smallPages`, default 16 — the same fixed value the reconcile tick used
+ *     before budgeting, so a fast tick is byte/behavior-equivalent to today);
+ *   - tick busy (elapsed > `slowMs`, default 1800 ms) → skip the merge
+ *     (`null`) to leave CPU for reconcile;
+ *   - in between → the small step.
+ *
+ * Returns a positive integer page count, or `null` to skip the merge entirely.
+ *
+ * @param {{elapsedMs: number, fastMs?: number, slowMs?: number, smallPages?: number}} args
+ * @returns {number|null}
+ */
+export function fts5MergeBudgetPages({ elapsedMs, fastMs = 500, slowMs = 1800, smallPages = 16 } = {}) {
+  const elapsed = Number.isFinite(elapsedMs) ? elapsedMs : 0;
+  if (elapsed > slowMs) return null;
+  return smallPages;
+}
+/**
+ * Derive the watermark-handler FTS5 merge page budget from the wall-clock budget
+ * remaining for the maintenance drain. A generous budget keeps the original
+ * aggressive `pages=500`; a tight remaining budget scales the page count down
+ * (floor 16) so a near-exhausted drain still makes a small step of forward
+ * progress rather than blowing the budget on one 500-page merge.
+ *
+ * @param {{remainingMs: number, maxPages?: number, minPages?: number, fullBudgetMs?: number}} args
+ * @returns {number}
+ */
+export function fts5WatermarkBudgetPages({ remainingMs, maxPages = 500, minPages = 16, fullBudgetMs = 2000 } = {}) {
+  if (!Number.isFinite(remainingMs) || remainingMs >= fullBudgetMs) return maxPages;
+  if (remainingMs <= 0) return minPages;
+  const scaled = Math.round((remainingMs / fullBudgetMs) * maxPages);
+  return Math.max(minPages, Math.min(maxPages, scaled));
+}
+/**
+ * Run a full FTS5 `('optimize')` rewrite of one table, then immediately
+ * `wal_checkpoint(TRUNCATE)` to flush the (potentially large) optimize
+ * transaction out of the WAL and truncate it back to zero — guarding the
+ * documented 256 MiB WAL-bloat alarm that is the reason `fts5Merge` itself
+ * never calls optimize (see `fts5Merge` note above).
+ *
+ * Lever E.5 (`SWEET_SEARCH_RECONCILE_FTS5_OPTIMIZE`). The CALLER is responsible
+ * for the gate: optimize must run ONLY on true-idle (consecutive empty ticks)
+ * AND only when a table-size check says it is worth it. This helper does not
+ * decide *when* — it just performs the optimize + checkpoint as one safe unit.
+ *
+ * `('optimize')` is a single-transaction rewrite: it merges every segment into
+ * one. It is idempotent (a second call on an already-optimized table is a near
+ * no-op) and output-equivalent to a fully-merged index — query results are
+ * unchanged.
+ *
+ * @param {import('better-sqlite3').Database} db
+ * @param {string} tableName  Name of the FTS5 virtual table (not the shadow).
+ * @returns {{optimized: boolean}}
+ */
+export function fts5Optimize(db, tableName) {
+  if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(tableName)) {
+    throw new Error(`fts5Optimize: invalid table name ${tableName}`);
+  }
+  db.prepare(`INSERT INTO ${tableName}(${tableName}) VALUES('optimize')`).run();
+  // Flush + truncate the WAL the optimize transaction just grew. Best-effort:
+  // a checkpoint failure (e.g. an active reader) must not turn a successful
+  // optimize into a thrown error on the maintainer path.
+  try { db.pragma('wal_checkpoint(TRUNCATE)'); } catch {}
+  return { optimized: true };
+}
 export const __testing = { readVarint, STRUCTURE_ROWID };

package/core/indexing/artifact-builder.js CHANGED Viewed

@@ -172,7 +172,7 @@ export async function shouldSkipArtifactRebuild(options = {}) {
   // Skip - not enough changes, artifacts exist, within time window
   return {
     shouldSkip: true,
-    reason: `Only ${changedFiles} files changed (threshold: ${ARTIFACT_THRESHOLDS.skipThreshold}), Float HNSW will serve search`,
+    reason: `Only ${changedFiles} files changed (threshold: ${ARTIFACT_THRESHOLDS.skipThreshold})`,
     state,
     accumulatedTotal,
   };

package/core/indexing/dedup/dedup-phase.js CHANGED Viewed

@@ -60,10 +60,29 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
   const fingerprints = computeFingerprints(texts, config);
   const clusters = clusterFingerprints(fingerprints, config);
-  // Seed every chunk with simhash + self-exemplar defaults.
-  for (let i = 0; i < allChunks.length; i++) {
-    const chunk = allChunks[i];
-    const meta = (chunk.metadata = chunk.metadata || {});
+  const stats = annotateDedupClusters(allChunks, fingerprints, clusters, config);
+  return { skipped: false, stats };
+}
+/**
+ * Apply dedup annotations (simhash + exemplar/alias assignment) to a list of
+ * `items` given their `fingerprints` and the `clusters` from
+ * clusterFingerprints(). Mutates each item's `.metadata` in place and returns
+ * the stats object.
+ *
+ * `items[i]` must expose `.id`, a mutable `.metadata`, and be acceptable to
+ * selectExemplar (text length via `.text`/`.content` OR a precomputed
+ * `._textLen`, plus path/hash fields). The full-corpus path passes the chunk
+ * objects directly; the streaming path passes lightweight per-chunk records
+ * (text spilled to disk, length carried as `_textLen`) so the SAME global
+ * dedup runs without holding every chunk's text in memory. Both paths produce
+ * byte-identical annotations.
+ */
+export function annotateDedupClusters(items, fingerprints, clusters, config = DEDUP_CONFIG) {
+  // Seed every item with simhash + self-exemplar defaults.
+  for (let i = 0; i < items.length; i++) {
+    const meta = (items[i].metadata = items[i].metadata || {});
     meta.simhash = fingerprints[i].simhashHex;
     meta.isExemplar = true;
     meta.exemplarId = null;
@@ -78,6 +97,9 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
     ? (config.liReuseJaccardThreshold ?? 0.95)
     : Infinity;
+  const lenOf = (it) =>
+    typeof it._textLen === 'number' ? it._textLen : (it.text || it.content || '').length;
   for (const cluster of clusters) {
     if (!cluster.siblingIdxs || cluster.siblingIdxs.length === 0) continue;
     clustersWithSiblings++;
@@ -98,12 +120,12 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
     }
     const memberIdxs = [cluster.exemplarIdx, ...cluster.siblingIdxs];
-    const members = memberIdxs.map((idx) => ({ idx, chunk: allChunks[idx] }));
+    const members = memberIdxs.map((idx) => ({ idx, chunk: items[idx] }));
     const exemplar = selectExemplar(members);
-    const exemplarId = allChunks[exemplar.idx].id;
+    const exemplarId = items[exemplar.idx].id;
     for (const m of members) {
-      const meta = allChunks[m.idx].metadata;
+      const meta = items[m.idx].metadata;
       meta.clusterId = cluster.clusterId;
       if (m.idx === exemplar.idx) {
         meta.isExemplar = true;
@@ -123,21 +145,18 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
         meta.liReuseEligible = j >= liJaccardThreshold;
         if (meta.liReuseEligible) liEligibleAliases++;
         totalAliases++;
-        bytesSaved += (allChunks[m.idx].text || allChunks[m.idx].content || '').length;
+        bytesSaved += lenOf(items[m.idx]);
       }
     }
   }
   return {
-    skipped: false,
-    stats: {
-      totalChunks: allChunks.length,
-      clustersWithSiblings,
-      totalAliases,
-      liEligibleAliases,
-      liReuseJaccardThreshold: liJaccardThreshold === Infinity ? null : liJaccardThreshold,
-      bytesSaved,
-    },
+    totalChunks: items.length,
+    clustersWithSiblings,
+    totalAliases,
+    liEligibleAliases,
+    liReuseJaccardThreshold: liJaccardThreshold === Infinity ? null : liJaccardThreshold,
+    bytesSaved,
   };
 }

package/core/indexing/dedup/exemplar-selector.js CHANGED Viewed

@@ -12,6 +12,11 @@
  */
 function lengthOf(chunk) {
+  // Streaming dedup passes lightweight records that carry a precomputed text
+  // length (`_textLen`) instead of the full text (which lives on disk). Full
+  // chunks have no `_textLen`, so they fall through to the original behavior —
+  // byte-identical exemplar selection on the in-memory path.
+  if (typeof chunk._textLen === 'number') return chunk._textLen;
   return (chunk.text || chunk.content || '').length;
 }

package/core/indexing/index-codebase-v21.js CHANGED Viewed

@@ -41,7 +41,6 @@ import { existsSync } from 'fs';
 import { DB_PATHS, LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
 import { applyPersistedLiModel } from '../infrastructure/init-config.js';
 import { resolveRelationshipTargets } from '../graph/relationship-resolver.js';
-import { requireNativeAnn as requireNativeAnnBackend } from '../vector-store/hnsw-index.js';
 import { getStats as getIncrementalStats } from './incremental-tracker.js';
 import { ARTIFACT_THRESHOLDS } from './artifact-builder.js';
@@ -58,7 +57,6 @@ import {
   buildVectorIndex,
 } from './indexer-build.js';
 import {
-  incrementalUpdateHNSW, buildHNSWIndex,
   buildLateInteractionIndex, buildQuantizedArtifactsPhase,
 } from './indexer-ann.js';
 import {
@@ -94,7 +92,6 @@ function parseArgs(argv) {
     lateInteractionModel: args.find(a => a.startsWith('--late-interaction-model='))?.split('=')[1] || null,
     lateInteractionPool: parseInt(args.find(a => a.startsWith('--late-interaction-pool='))?.split('=')[1] || process.env.SWEET_SEARCH_LI_POOL_FACTOR || '1', 10),
     lateInteractionExtendedSkiplist: args.includes('--late-interaction-skiplist=extended'),
-    requireNativeAnn: args.includes('--require-native-ann'),
     sqliteFastMode: args.includes('--sqlite-fast') || process.env.SWEET_SEARCH_SQLITE_FAST_MODE === '1',
     verbose: args.includes('--verbose') || args.includes('-v'),
   };
@@ -110,7 +107,7 @@ async function main() {
   const { dryRun, graphOnly, vectorsOnly, fullReindex, showStats, resolveOnly,
           skipSummaryRegen, filesFromStdin, quiet, forceArtifacts, help,
           noLateInteraction, lateInteractionModel, lateInteractionPool, lateInteractionExtendedSkiplist,
-          requireNativeAnn, sqliteFastMode, verbose } = parseArgs();
+          sqliteFastMode, verbose } = parseArgs();
   if (quiet) {
     setQuietMode(true);
@@ -172,8 +169,6 @@ Options:
   --late-interaction-model=ID  Use specific model (lateon-code or lateon-code-edge)
   --late-interaction-pool=N    Token pooling factor (2=halve tokens, 3=third). Reduces index size.
   --late-interaction-skiplist=extended  Extend skiplist with code-noise tokens (whitespace, semicolons)
-  --require-native-ann  Fail fast if native ANN backend (usearch) is unavailable.
-                   Prevents accidental fallback to slower JS ANN in benchmarks.
   --sqlite-fast    Use unsafe SQLite pragmas for faster builds (benchmarking only).
                    Can also be set via SWEET_SEARCH_SQLITE_FAST_MODE=1.
                    WARNING: Data may be lost on crash - do NOT use in production.
@@ -206,9 +201,9 @@ This is intentional since relationships span across files.
 Output:
   .sweet-search/code-graph.db      Code graph with FTS5 (lexical search)
-  .sweet-search/codebase.db        Vector embeddings (semantic search)
-  .sweet-search/codebase-hnsw.idx  HNSW index (fast ANN)
-  .sweet-search/merkle-state.json  Incremental indexing state
+  .sweet-search/codebase.db               Vector embeddings (semantic search)
+  .sweet-search/codebase-binary-hnsw.idx  Binary HNSW index (fast ANN)
+  .sweet-search/merkle-state.json         Incremental indexing state
 `);
     process.exit(0);
   }
@@ -252,10 +247,6 @@ Output:
     return;
   }
-  if (requireNativeAnn) {
-    await requireNativeAnnBackend();
-  }
   try {
     // =========================================================================
     // PHASE 1: File Discovery
@@ -473,6 +464,39 @@ const _isDirectRun = (() => {
   }
 })();
 if (_isDirectRun) {
+  // ── Bounded-memory guard (works on ANY device, NO heap cap, ZERO startup cost) ──
+  // On big-RAM machines Node auto-sizes its default old-space heap large, so V8
+  // defers GC and the embedding phase's transient per-batch garbage piles up
+  // until the OS OOM-kills the indexer on very large repos (e.g. swc ~164k
+  // chunks, libsql). The pipeline is already correctly streaming (chunks spill
+  // to disk; only lightweight records stay resident) — the live working set is
+  // ~2GB — so this only needs GC to actually RUN. Enable gc at runtime (no
+  // --expose-gc launch flag and no re-exec, so frequent incremental runs pay
+  // nothing) and proactively collect only when RSS climbs past a device-adaptive
+  // watermark. Changes only WHEN gc runs — never what is embedded or stored.
+  // Opt out with SWEET_SEARCH_NO_GC_GUARD=1; degrades to a no-op if unavailable.
+  if (!process.env.SWEET_SEARCH_NO_GC_GUARD) {
+    try {
+      let gc = globalThis.gc;
+      if (typeof gc !== 'function') {
+        const v8 = await import('node:v8');
+        const vm = await import('node:vm');
+        v8.setFlagsFromString('--expose-gc');
+        gc = vm.runInNewContext('gc');     // captured fn stays callable after reset below
+        v8.setFlagsFromString('--no-expose-gc');
+      }
+      if (typeof gc === 'function') {
+        const os = await import('node:os');
+        // GC watermark: ~half of RAM, clamped to [2GB, 6GB]. Live set is ~2GB, so
+        // peak RSS stays bounded well below OOM on any machine ≥4GB. The interval
+        // is unref'd and only GCs when RSS is actually high (never on small repos).
+        const rssLimit = Math.min(6 * 1024 ** 3, Math.max(2 * 1024 ** 3, Math.floor(os.totalmem() * 0.5)));
+        setInterval(() => {
+          try { if (process.memoryUsage().rss > rssLimit) gc(); } catch { /* best-effort */ }
+        }, 4000).unref();
+      }
+    } catch { /* GC guard unavailable — indexing proceeds unchanged */ }
+  }
   main().catch(err => {
     console.error(err);
     process.exit(1);
@@ -484,7 +508,6 @@ export {
   discoverFiles,
   buildCodeGraph,
   buildVectorIndex,
-  buildHNSWIndex,
   buildLateInteractionIndex,
   buildQuantizedArtifactsPhase,
   parseArgs,