sweet-search 2.5.13 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +36 -9
  2. package/core/cli.js +41 -3
  3. package/core/embedding/embedding-local-model.js +106 -10
  4. package/core/embedding/embedding-service.js +59 -1
  5. package/core/embedding/model-client.mjs +257 -0
  6. package/core/embedding/model-server.mjs +217 -0
  7. package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
  8. package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
  9. package/core/incremental-indexing/application/operator-cli.mjs +14 -5
  10. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
  11. package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
  12. package/core/incremental-indexing/application/reconciler.mjs +87 -15
  13. package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
  14. package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
  15. package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
  16. package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
  17. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
  18. package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
  19. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
  20. package/core/indexing/artifact-builder.js +1 -1
  21. package/core/indexing/dedup/dedup-phase.js +36 -17
  22. package/core/indexing/dedup/exemplar-selector.js +5 -0
  23. package/core/indexing/index-codebase-v21.js +37 -14
  24. package/core/indexing/index-maintainer.mjs +337 -6
  25. package/core/indexing/indexer-ann.js +27 -434
  26. package/core/indexing/indexer-build.js +30 -14
  27. package/core/indexing/indexer-manifest.js +0 -3
  28. package/core/indexing/indexer-phases.js +101 -25
  29. package/core/indexing/maintainer-launcher.mjs +22 -0
  30. package/core/indexing/maintainer-watcher.mjs +397 -0
  31. package/core/indexing/os-priority.mjs +160 -0
  32. package/core/indexing/rss-budget.mjs +425 -0
  33. package/core/indexing/streaming-vectors.js +450 -0
  34. package/core/infrastructure/config/platform.js +14 -10
  35. package/core/infrastructure/onnx-session-utils.js +37 -0
  36. package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
  37. package/core/ranking/late-interaction-index.js +58 -7
  38. package/core/search/daemon-registry.js +199 -0
  39. package/core/search/search-read-semantic.js +9 -3
  40. package/core/search/search-semantic.js +6 -29
  41. package/core/search/search-server.js +527 -27
  42. package/core/search/session-daemon-prewarm.mjs +110 -1
  43. package/core/search/sweet-search.js +0 -38
  44. package/core/vector-store/binary-hnsw-index.js +692 -78
  45. package/core/vector-store/index.js +1 -4
  46. package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
  47. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
  48. package/eval/agent-read-workflows/bin/ss-read +2 -0
  49. package/mcp/tool-handlers.js +1 -2
  50. package/package.json +11 -8
  51. package/scripts/uninstall.js +2 -0
  52. package/core/vector-store/hnsw-index.js +0 -751
@@ -173,4 +173,76 @@ export function fts5Merge(db, tableName, pages) {
173
173
  db.prepare(`INSERT INTO ${tableName}(${tableName}, rank) VALUES('merge', ?)`).run(pages);
174
174
  }
175
175
 
176
+ /**
177
+ * Derive the FTS5 merge page budget from the spare CPU budget for a tick, using
178
+ * the token-bucket policy in lever E.5:
179
+ *
180
+ * - tick fast (elapsed < `fastMs`, default 500 ms) → a small merge step
181
+ * (`smallPages`, default 16 — the same fixed value the reconcile tick used
182
+ * before budgeting, so a fast tick is byte/behavior-equivalent to today);
183
+ * - tick busy (elapsed > `slowMs`, default 1800 ms) → skip the merge
184
+ * (`null`) to leave CPU for reconcile;
185
+ * - in between → the small step.
186
+ *
187
+ * Returns a positive integer page count, or `null` to skip the merge entirely.
188
+ *
189
+ * @param {{elapsedMs: number, fastMs?: number, slowMs?: number, smallPages?: number}} args
190
+ * @returns {number|null}
191
+ */
192
+ export function fts5MergeBudgetPages({ elapsedMs, fastMs = 500, slowMs = 1800, smallPages = 16 } = {}) {
193
+ const elapsed = Number.isFinite(elapsedMs) ? elapsedMs : 0;
194
+ if (elapsed > slowMs) return null;
195
+ return smallPages;
196
+ }
197
+
198
+ /**
199
+ * Derive the watermark-handler FTS5 merge page budget from the wall-clock budget
200
+ * remaining for the maintenance drain. A generous budget keeps the original
201
+ * aggressive `pages=500`; a tight remaining budget scales the page count down
202
+ * (floor 16) so a near-exhausted drain still makes a small step of forward
203
+ * progress rather than blowing the budget on one 500-page merge.
204
+ *
205
+ * @param {{remainingMs: number, maxPages?: number, minPages?: number, fullBudgetMs?: number}} args
206
+ * @returns {number}
207
+ */
208
+ export function fts5WatermarkBudgetPages({ remainingMs, maxPages = 500, minPages = 16, fullBudgetMs = 2000 } = {}) {
209
+ if (!Number.isFinite(remainingMs) || remainingMs >= fullBudgetMs) return maxPages;
210
+ if (remainingMs <= 0) return minPages;
211
+ const scaled = Math.round((remainingMs / fullBudgetMs) * maxPages);
212
+ return Math.max(minPages, Math.min(maxPages, scaled));
213
+ }
214
+
215
+ /**
216
+ * Run a full FTS5 `('optimize')` rewrite of one table, then immediately
217
+ * `wal_checkpoint(TRUNCATE)` to flush the (potentially large) optimize
218
+ * transaction out of the WAL and truncate it back to zero — guarding the
219
+ * documented 256 MiB WAL-bloat alarm that is the reason `fts5Merge` itself
220
+ * never calls optimize (see `fts5Merge` note above).
221
+ *
222
+ * Lever E.5 (`SWEET_SEARCH_RECONCILE_FTS5_OPTIMIZE`). The CALLER is responsible
223
+ * for the gate: optimize must run ONLY on true-idle (consecutive empty ticks)
224
+ * AND only when a table-size check says it is worth it. This helper does not
225
+ * decide *when* — it just performs the optimize + checkpoint as one safe unit.
226
+ *
227
+ * `('optimize')` is a single-transaction rewrite: it merges every segment into
228
+ * one. It is idempotent (a second call on an already-optimized table is a near
229
+ * no-op) and output-equivalent to a fully-merged index — query results are
230
+ * unchanged.
231
+ *
232
+ * @param {import('better-sqlite3').Database} db
233
+ * @param {string} tableName Name of the FTS5 virtual table (not the shadow).
234
+ * @returns {{optimized: boolean}}
235
+ */
236
+ export function fts5Optimize(db, tableName) {
237
+ if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(tableName)) {
238
+ throw new Error(`fts5Optimize: invalid table name ${tableName}`);
239
+ }
240
+ db.prepare(`INSERT INTO ${tableName}(${tableName}) VALUES('optimize')`).run();
241
+ // Flush + truncate the WAL the optimize transaction just grew. Best-effort:
242
+ // a checkpoint failure (e.g. an active reader) must not turn a successful
243
+ // optimize into a thrown error on the maintainer path.
244
+ try { db.pragma('wal_checkpoint(TRUNCATE)'); } catch {}
245
+ return { optimized: true };
246
+ }
247
+
176
248
  export const __testing = { readVarint, STRUCTURE_ROWID };
@@ -172,7 +172,7 @@ export async function shouldSkipArtifactRebuild(options = {}) {
172
172
  // Skip - not enough changes, artifacts exist, within time window
173
173
  return {
174
174
  shouldSkip: true,
175
- reason: `Only ${changedFiles} files changed (threshold: ${ARTIFACT_THRESHOLDS.skipThreshold}), Float HNSW will serve search`,
175
+ reason: `Only ${changedFiles} files changed (threshold: ${ARTIFACT_THRESHOLDS.skipThreshold})`,
176
176
  state,
177
177
  accumulatedTotal,
178
178
  };
@@ -60,10 +60,29 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
60
60
  const fingerprints = computeFingerprints(texts, config);
61
61
  const clusters = clusterFingerprints(fingerprints, config);
62
62
 
63
- // Seed every chunk with simhash + self-exemplar defaults.
64
- for (let i = 0; i < allChunks.length; i++) {
65
- const chunk = allChunks[i];
66
- const meta = (chunk.metadata = chunk.metadata || {});
63
+ const stats = annotateDedupClusters(allChunks, fingerprints, clusters, config);
64
+
65
+ return { skipped: false, stats };
66
+ }
67
+
68
+ /**
69
+ * Apply dedup annotations (simhash + exemplar/alias assignment) to a list of
70
+ * `items` given their `fingerprints` and the `clusters` from
71
+ * clusterFingerprints(). Mutates each item's `.metadata` in place and returns
72
+ * the stats object.
73
+ *
74
+ * `items[i]` must expose `.id`, a mutable `.metadata`, and be acceptable to
75
+ * selectExemplar (text length via `.text`/`.content` OR a precomputed
76
+ * `._textLen`, plus path/hash fields). The full-corpus path passes the chunk
77
+ * objects directly; the streaming path passes lightweight per-chunk records
78
+ * (text spilled to disk, length carried as `_textLen`) so the SAME global
79
+ * dedup runs without holding every chunk's text in memory. Both paths produce
80
+ * byte-identical annotations.
81
+ */
82
+ export function annotateDedupClusters(items, fingerprints, clusters, config = DEDUP_CONFIG) {
83
+ // Seed every item with simhash + self-exemplar defaults.
84
+ for (let i = 0; i < items.length; i++) {
85
+ const meta = (items[i].metadata = items[i].metadata || {});
67
86
  meta.simhash = fingerprints[i].simhashHex;
68
87
  meta.isExemplar = true;
69
88
  meta.exemplarId = null;
@@ -78,6 +97,9 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
78
97
  ? (config.liReuseJaccardThreshold ?? 0.95)
79
98
  : Infinity;
80
99
 
100
+ const lenOf = (it) =>
101
+ typeof it._textLen === 'number' ? it._textLen : (it.text || it.content || '').length;
102
+
81
103
  for (const cluster of clusters) {
82
104
  if (!cluster.siblingIdxs || cluster.siblingIdxs.length === 0) continue;
83
105
  clustersWithSiblings++;
@@ -98,12 +120,12 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
98
120
  }
99
121
 
100
122
  const memberIdxs = [cluster.exemplarIdx, ...cluster.siblingIdxs];
101
- const members = memberIdxs.map((idx) => ({ idx, chunk: allChunks[idx] }));
123
+ const members = memberIdxs.map((idx) => ({ idx, chunk: items[idx] }));
102
124
  const exemplar = selectExemplar(members);
103
- const exemplarId = allChunks[exemplar.idx].id;
125
+ const exemplarId = items[exemplar.idx].id;
104
126
 
105
127
  for (const m of members) {
106
- const meta = allChunks[m.idx].metadata;
128
+ const meta = items[m.idx].metadata;
107
129
  meta.clusterId = cluster.clusterId;
108
130
  if (m.idx === exemplar.idx) {
109
131
  meta.isExemplar = true;
@@ -123,21 +145,18 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
123
145
  meta.liReuseEligible = j >= liJaccardThreshold;
124
146
  if (meta.liReuseEligible) liEligibleAliases++;
125
147
  totalAliases++;
126
- bytesSaved += (allChunks[m.idx].text || allChunks[m.idx].content || '').length;
148
+ bytesSaved += lenOf(items[m.idx]);
127
149
  }
128
150
  }
129
151
  }
130
152
 
131
153
  return {
132
- skipped: false,
133
- stats: {
134
- totalChunks: allChunks.length,
135
- clustersWithSiblings,
136
- totalAliases,
137
- liEligibleAliases,
138
- liReuseJaccardThreshold: liJaccardThreshold === Infinity ? null : liJaccardThreshold,
139
- bytesSaved,
140
- },
154
+ totalChunks: items.length,
155
+ clustersWithSiblings,
156
+ totalAliases,
157
+ liEligibleAliases,
158
+ liReuseJaccardThreshold: liJaccardThreshold === Infinity ? null : liJaccardThreshold,
159
+ bytesSaved,
141
160
  };
142
161
  }
143
162
 
@@ -12,6 +12,11 @@
12
12
  */
13
13
 
14
14
  function lengthOf(chunk) {
15
+ // Streaming dedup passes lightweight records that carry a precomputed text
16
+ // length (`_textLen`) instead of the full text (which lives on disk). Full
17
+ // chunks have no `_textLen`, so they fall through to the original behavior —
18
+ // byte-identical exemplar selection on the in-memory path.
19
+ if (typeof chunk._textLen === 'number') return chunk._textLen;
15
20
  return (chunk.text || chunk.content || '').length;
16
21
  }
17
22
 
@@ -41,7 +41,6 @@ import { existsSync } from 'fs';
41
41
  import { DB_PATHS, LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
42
42
  import { applyPersistedLiModel } from '../infrastructure/init-config.js';
43
43
  import { resolveRelationshipTargets } from '../graph/relationship-resolver.js';
44
- import { requireNativeAnn as requireNativeAnnBackend } from '../vector-store/hnsw-index.js';
45
44
  import { getStats as getIncrementalStats } from './incremental-tracker.js';
46
45
  import { ARTIFACT_THRESHOLDS } from './artifact-builder.js';
47
46
 
@@ -58,7 +57,6 @@ import {
58
57
  buildVectorIndex,
59
58
  } from './indexer-build.js';
60
59
  import {
61
- incrementalUpdateHNSW, buildHNSWIndex,
62
60
  buildLateInteractionIndex, buildQuantizedArtifactsPhase,
63
61
  } from './indexer-ann.js';
64
62
  import {
@@ -94,7 +92,6 @@ function parseArgs(argv) {
94
92
  lateInteractionModel: args.find(a => a.startsWith('--late-interaction-model='))?.split('=')[1] || null,
95
93
  lateInteractionPool: parseInt(args.find(a => a.startsWith('--late-interaction-pool='))?.split('=')[1] || process.env.SWEET_SEARCH_LI_POOL_FACTOR || '1', 10),
96
94
  lateInteractionExtendedSkiplist: args.includes('--late-interaction-skiplist=extended'),
97
- requireNativeAnn: args.includes('--require-native-ann'),
98
95
  sqliteFastMode: args.includes('--sqlite-fast') || process.env.SWEET_SEARCH_SQLITE_FAST_MODE === '1',
99
96
  verbose: args.includes('--verbose') || args.includes('-v'),
100
97
  };
@@ -110,7 +107,7 @@ async function main() {
110
107
  const { dryRun, graphOnly, vectorsOnly, fullReindex, showStats, resolveOnly,
111
108
  skipSummaryRegen, filesFromStdin, quiet, forceArtifacts, help,
112
109
  noLateInteraction, lateInteractionModel, lateInteractionPool, lateInteractionExtendedSkiplist,
113
- requireNativeAnn, sqliteFastMode, verbose } = parseArgs();
110
+ sqliteFastMode, verbose } = parseArgs();
114
111
 
115
112
  if (quiet) {
116
113
  setQuietMode(true);
@@ -172,8 +169,6 @@ Options:
172
169
  --late-interaction-model=ID Use specific model (lateon-code or lateon-code-edge)
173
170
  --late-interaction-pool=N Token pooling factor (2=halve tokens, 3=third). Reduces index size.
174
171
  --late-interaction-skiplist=extended Extend skiplist with code-noise tokens (whitespace, semicolons)
175
- --require-native-ann Fail fast if native ANN backend (usearch) is unavailable.
176
- Prevents accidental fallback to slower JS ANN in benchmarks.
177
172
  --sqlite-fast Use unsafe SQLite pragmas for faster builds (benchmarking only).
178
173
  Can also be set via SWEET_SEARCH_SQLITE_FAST_MODE=1.
179
174
  WARNING: Data may be lost on crash - do NOT use in production.
@@ -206,9 +201,9 @@ This is intentional since relationships span across files.
206
201
 
207
202
  Output:
208
203
  .sweet-search/code-graph.db Code graph with FTS5 (lexical search)
209
- .sweet-search/codebase.db Vector embeddings (semantic search)
210
- .sweet-search/codebase-hnsw.idx HNSW index (fast ANN)
211
- .sweet-search/merkle-state.json Incremental indexing state
204
+ .sweet-search/codebase.db Vector embeddings (semantic search)
205
+ .sweet-search/codebase-binary-hnsw.idx Binary HNSW index (fast ANN)
206
+ .sweet-search/merkle-state.json Incremental indexing state
212
207
  `);
213
208
  process.exit(0);
214
209
  }
@@ -252,10 +247,6 @@ Output:
252
247
  return;
253
248
  }
254
249
 
255
- if (requireNativeAnn) {
256
- await requireNativeAnnBackend();
257
- }
258
-
259
250
  try {
260
251
  // =========================================================================
261
252
  // PHASE 1: File Discovery
@@ -473,6 +464,39 @@ const _isDirectRun = (() => {
473
464
  }
474
465
  })();
475
466
  if (_isDirectRun) {
467
+ // ── Bounded-memory guard (works on ANY device, NO heap cap, ZERO startup cost) ──
468
+ // On big-RAM machines Node auto-sizes its default old-space heap large, so V8
469
+ // defers GC and the embedding phase's transient per-batch garbage piles up
470
+ // until the OS OOM-kills the indexer on very large repos (e.g. swc ~164k
471
+ // chunks, libsql). The pipeline is already correctly streaming (chunks spill
472
+ // to disk; only lightweight records stay resident) — the live working set is
473
+ // ~2GB — so this only needs GC to actually RUN. Enable gc at runtime (no
474
+ // --expose-gc launch flag and no re-exec, so frequent incremental runs pay
475
+ // nothing) and proactively collect only when RSS climbs past a device-adaptive
476
+ // watermark. Changes only WHEN gc runs — never what is embedded or stored.
477
+ // Opt out with SWEET_SEARCH_NO_GC_GUARD=1; degrades to a no-op if unavailable.
478
+ if (!process.env.SWEET_SEARCH_NO_GC_GUARD) {
479
+ try {
480
+ let gc = globalThis.gc;
481
+ if (typeof gc !== 'function') {
482
+ const v8 = await import('node:v8');
483
+ const vm = await import('node:vm');
484
+ v8.setFlagsFromString('--expose-gc');
485
+ gc = vm.runInNewContext('gc'); // captured fn stays callable after reset below
486
+ v8.setFlagsFromString('--no-expose-gc');
487
+ }
488
+ if (typeof gc === 'function') {
489
+ const os = await import('node:os');
490
+ // GC watermark: ~half of RAM, clamped to [2GB, 6GB]. Live set is ~2GB, so
491
+ // peak RSS stays bounded well below OOM on any machine ≥4GB. The interval
492
+ // is unref'd and only GCs when RSS is actually high (never on small repos).
493
+ const rssLimit = Math.min(6 * 1024 ** 3, Math.max(2 * 1024 ** 3, Math.floor(os.totalmem() * 0.5)));
494
+ setInterval(() => {
495
+ try { if (process.memoryUsage().rss > rssLimit) gc(); } catch { /* best-effort */ }
496
+ }, 4000).unref();
497
+ }
498
+ } catch { /* GC guard unavailable — indexing proceeds unchanged */ }
499
+ }
476
500
  main().catch(err => {
477
501
  console.error(err);
478
502
  process.exit(1);
@@ -484,7 +508,6 @@ export {
484
508
  discoverFiles,
485
509
  buildCodeGraph,
486
510
  buildVectorIndex,
487
- buildHNSWIndex,
488
511
  buildLateInteractionIndex,
489
512
  buildQuantizedArtifactsPhase,
490
513
  parseArgs,