sweet-search 2.5.13 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +36 -9
  2. package/core/cli.js +41 -3
  3. package/core/embedding/embedding-local-model.js +106 -10
  4. package/core/embedding/embedding-service.js +59 -1
  5. package/core/embedding/model-client.mjs +257 -0
  6. package/core/embedding/model-server.mjs +217 -0
  7. package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
  8. package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
  9. package/core/incremental-indexing/application/operator-cli.mjs +14 -5
  10. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
  11. package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
  12. package/core/incremental-indexing/application/reconciler.mjs +87 -15
  13. package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
  14. package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
  15. package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
  16. package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
  17. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
  18. package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
  19. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
  20. package/core/indexing/artifact-builder.js +1 -1
  21. package/core/indexing/dedup/dedup-phase.js +36 -17
  22. package/core/indexing/dedup/exemplar-selector.js +5 -0
  23. package/core/indexing/index-codebase-v21.js +37 -14
  24. package/core/indexing/index-maintainer.mjs +337 -6
  25. package/core/indexing/indexer-ann.js +27 -434
  26. package/core/indexing/indexer-build.js +30 -14
  27. package/core/indexing/indexer-manifest.js +0 -3
  28. package/core/indexing/indexer-phases.js +101 -25
  29. package/core/indexing/maintainer-launcher.mjs +22 -0
  30. package/core/indexing/maintainer-watcher.mjs +397 -0
  31. package/core/indexing/os-priority.mjs +160 -0
  32. package/core/indexing/rss-budget.mjs +425 -0
  33. package/core/indexing/streaming-vectors.js +450 -0
  34. package/core/infrastructure/config/platform.js +14 -10
  35. package/core/infrastructure/onnx-session-utils.js +37 -0
  36. package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
  37. package/core/ranking/late-interaction-index.js +58 -7
  38. package/core/search/daemon-registry.js +199 -0
  39. package/core/search/search-read-semantic.js +9 -3
  40. package/core/search/search-semantic.js +6 -29
  41. package/core/search/search-server.js +527 -27
  42. package/core/search/session-daemon-prewarm.mjs +110 -1
  43. package/core/search/sweet-search.js +0 -38
  44. package/core/vector-store/binary-hnsw-index.js +692 -78
  45. package/core/vector-store/index.js +1 -4
  46. package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
  47. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
  48. package/eval/agent-read-workflows/bin/ss-read +2 -0
  49. package/mcp/tool-handlers.js +1 -2
  50. package/package.json +11 -8
  51. package/scripts/uninstall.js +2 -0
  52. package/core/vector-store/hnsw-index.js +0 -751
@@ -22,7 +22,15 @@
22
22
  */
23
23
 
24
24
  import { spawn } from 'node:child_process';
25
- import { existsSync, readFileSync, openSync, writeSync, closeSync, unlinkSync } from 'node:fs';
25
+ import {
26
+ existsSync,
27
+ readFileSync,
28
+ readdirSync,
29
+ openSync,
30
+ writeSync,
31
+ closeSync,
32
+ unlinkSync,
33
+ } from 'node:fs';
26
34
  import { connect } from 'node:net';
27
35
  import { dirname, join } from 'node:path';
28
36
  import { fileURLToPath } from 'node:url';
@@ -44,6 +52,92 @@ const log = (msg) => {
44
52
  if (verbose) process.stderr.write(`[sweet-search prewarm] ${msg}\n`);
45
53
  };
46
54
 
55
+ function pageCacheSweepEnabled() {
56
+ const v = String(process.env.SWEET_SEARCH_PREWARM_PAGE_CACHE || '').trim().toLowerCase();
57
+ return v !== '0' && v !== 'false' && v !== 'off' && v !== 'no';
58
+ }
59
+
60
+ function readAndDiscard(filePath, deadlineMs) {
61
+ if (!filePath || Date.now() >= deadlineMs || !existsSync(filePath)) return false;
62
+ try {
63
+ readFileSync(filePath);
64
+ return true;
65
+ } catch {
66
+ return false;
67
+ }
68
+ }
69
+
70
+ async function pageCacheSweep() {
71
+ const rawBudgetMs = Number(process.env.SWEET_SEARCH_PREWARM_PAGE_CACHE_MS || 3500);
72
+ const budgetMs = Number.isFinite(rawBudgetMs) ? Math.max(250, rawBudgetMs) : 3500;
73
+ const deadlineMs = Date.now() + budgetMs;
74
+ let warmed = 0;
75
+ try {
76
+ const { DB_PATHS, LATE_INTERACTION_CONFIG } = await import('../infrastructure/config/index.js');
77
+ const files = [
78
+ DB_PATHS.codeGraph,
79
+ DB_PATHS.lateInteraction,
80
+ DB_PATHS.sparseGramIndex,
81
+ // codebase.db is opened by the read tool (CodebaseRepository) on every
82
+ // invocation but was the only query-path DB the sweep omitted; warming
83
+ // its page cache means the first read call pays no cold SQLite I/O.
84
+ // readAndDiscard no-ops on an absent file, so this is harmless when the
85
+ // index predates codebase.db or it was never built.
86
+ DB_PATHS.codebase,
87
+ ];
88
+
89
+ const segmentDir = DB_PATHS.lateInteraction ? `${DB_PATHS.lateInteraction}.segments` : null;
90
+ if (segmentDir && existsSync(segmentDir)) {
91
+ try {
92
+ for (const name of readdirSync(segmentDir)) {
93
+ if (name.endsWith('.bin')) files.push(join(segmentDir, name));
94
+ }
95
+ } catch {
96
+ /* best-effort page-cache prewarm */
97
+ }
98
+ }
99
+
100
+ const modelConfig = LATE_INTERACTION_CONFIG.activeModel;
101
+ if (modelConfig?.hfId) {
102
+ try {
103
+ const { getModelCacheDir } = await import('../infrastructure/model-fetcher.js');
104
+ const modelDir = getModelCacheDir(modelConfig.hfId);
105
+ files.push(join(modelDir, modelConfig.onnxFile));
106
+ files.push(join(modelDir, 'tokenizer.json'));
107
+ } catch {
108
+ /* model cache unavailable; do not fetch or load sessions here */
109
+ }
110
+ }
111
+
112
+ for (const filePath of files) {
113
+ if (readAndDiscard(filePath, deadlineMs)) warmed++;
114
+ }
115
+ log(`page-cache sweep warmed ${warmed} file(s)`);
116
+ } catch (err) {
117
+ log(`page-cache sweep non-fatal: ${err?.message || err}`);
118
+ }
119
+ }
120
+
121
+ function spawnPageCacheSweep() {
122
+ if (!pageCacheSweepEnabled()) return;
123
+ if (process.env.SWEET_SEARCH_PAGE_CACHE_SWEEP === '1') return;
124
+ try {
125
+ const child = spawn(process.execPath, [fileURLToPath(import.meta.url)], {
126
+ detached: true,
127
+ stdio: 'ignore',
128
+ cwd: process.cwd(),
129
+ env: {
130
+ ...process.env,
131
+ SWEET_SEARCH_PAGE_CACHE_SWEEP: '1',
132
+ },
133
+ });
134
+ child.unref();
135
+ log(`page-cache sweep spawned (pid ${child.pid}, detached)`);
136
+ } catch (err) {
137
+ log(`page-cache sweep spawn non-fatal: ${err?.message || err}`);
138
+ }
139
+ }
140
+
47
141
  /** Does a process with this PID exist right now? Handles EPERM (alien user) as "alive". */
48
142
  function pidAlive(pid) {
49
143
  if (!pid || !Number.isFinite(pid)) return false;
@@ -165,9 +259,24 @@ async function prewarmServer() {
165
259
  }
166
260
  }
167
261
 
262
+ if (process.env.SWEET_SEARCH_PAGE_CACHE_SWEEP === '1') {
263
+ try {
264
+ await pageCacheSweep();
265
+ } catch (err) {
266
+ log(`page-cache sweep non-fatal: ${err?.message || err}`);
267
+ }
268
+ process.exit(0);
269
+ }
270
+
168
271
  // The search server and the index maintainer are independent: a stuck/already
169
272
  // running server must not stop the maintainer from starting, and vice versa.
170
273
  // Each is isolated in its own try so one failing never blocks the other.
274
+ try {
275
+ spawnPageCacheSweep();
276
+ } catch (err) {
277
+ log(`page-cache sweep spawn non-fatal: ${err?.message || err}`);
278
+ }
279
+
171
280
  try {
172
281
  await prewarmServer();
173
282
  } catch (err) {
@@ -17,7 +17,6 @@ import { getGlobalLocalReranker } from '../ranking/local-reranker.js';
17
17
  import { QueryRouter, routeQuery } from '../query/query-router.js';
18
18
  import { GraphSearch } from '../graph/graph-search.js';
19
19
  import { SYMBOL_KIND_WEIGHTS, DEFINITION_TYPES } from '../infrastructure/constants.js';
20
- import { HNSWIndex } from '../vector-store/hnsw-index.js';
21
20
  import { BinaryHNSWIndex } from '../vector-store/binary-hnsw-index.js';
22
21
  import { Reranker } from '../ranking/flashrank.js';
23
22
  import { LateInteractionIndex } from '../ranking/late-interaction-index.js';
@@ -133,9 +132,7 @@ export class SweetSearch {
133
132
  this._manifestGraphDbPath = this.graphDbPath;
134
133
  this.graphSearch = new GraphSearch(this.graphDbPath);
135
134
  this.codeGraphRepo = new CodeGraphRepository(this.graphDbPath);
136
- this.hnswPath = options.hnswPath || DB_PATHS.hnswIndex;
137
135
  this.binaryHnswPath = options.binaryHnswPath || DB_PATHS.binaryHnswIndex;
138
- this.hnswIndex = new HNSWIndex({ indexPath: this.hnswPath });
139
136
  this.binaryHnswIndex = new BinaryHNSWIndex({ indexPath: this.binaryHnswPath });
140
137
  this.reranker = new Reranker(options);
141
138
  this.lateInteractionIndex = new LateInteractionIndex(options.lateInteractionOptions || {});
@@ -228,7 +225,6 @@ export class SweetSearch {
228
225
  this._syncManifestPaths(this._readReconcileManifest());
229
226
 
230
227
  this.hasGraphIndex = existsSync(this.graphDbPath);
231
- this.hasHnswIndex = existsSync(this.hnswPath.replace('.idx', '.meta.json'));
232
228
  this.hasBinaryHnswIndex = existsSync(this.binaryHnswPath.replace('.idx', '.meta.json'));
233
229
  this.hasCodebaseIndex = existsSync(this.codebaseDbPath);
234
230
  this.hasLateInteractionIndex = existsSync(this.lateInteractionIndex.indexPath);
@@ -271,16 +267,6 @@ export class SweetSearch {
271
267
  }
272
268
  }
273
269
 
274
- if (this.hasHnswIndex) {
275
- try {
276
- await this.hnswIndex.load(undefined, { mmap: true });
277
- this.log(`HNSW: Loaded ${this.hnswIndex.getStats().totalVectors} vectors (mmap)`);
278
- } catch (err) {
279
- this.log(`HNSW: Failed to load: ${err.message}`);
280
- this.hasHnswIndex = false;
281
- }
282
- }
283
-
284
270
  if (this.hasLateInteractionIndex && this.useLateInteraction) {
285
271
  try {
286
272
  await this.lateInteractionIndex.init();
@@ -433,8 +419,6 @@ export class SweetSearch {
433
419
  return {
434
420
  codebaseDbPath: this._resolveStatePath(manifest.vectors?.path),
435
421
  graphDbPath: this._resolveStatePath(manifest.codeGraph?.path),
436
- hnswPath: this._resolveStatePath(manifest.hnsw?.path),
437
- hnswStalePath: this._resolveStatePath(manifest.hnsw?.stale),
438
422
  binaryHnswPath: this._resolveStatePath(manifest.binaryHnsw?.path),
439
423
  binaryHnswStalePath: this._resolveStatePath(manifest.binaryHnsw?.stale),
440
424
  lateInteractionIndexPath: liIndexPath,
@@ -461,10 +445,6 @@ export class SweetSearch {
461
445
  this.graphSearch = new GraphSearch(this._manifestGraphDbPath);
462
446
  this.codeGraphRepo = new CodeGraphRepository(this._manifestGraphDbPath);
463
447
  }
464
- if (paths.hnswPath && (paths.hnswPath !== this.hnswPath || paths.hnswStalePath !== this.hnswIndex?.stalePath)) {
465
- this.hnswPath = paths.hnswPath;
466
- this.hnswIndex = new HNSWIndex({ indexPath: this.hnswPath, stalePath: paths.hnswStalePath || `${this.hnswPath}.stale.bin` });
467
- }
468
448
  if (paths.binaryHnswPath && (paths.binaryHnswPath !== this.binaryHnswPath || paths.binaryHnswStalePath !== this.binaryHnswIndex?.stalePath)) {
469
449
  this.binaryHnswPath = paths.binaryHnswPath;
470
450
  this.binaryHnswIndex = new BinaryHNSWIndex({ indexPath: this.binaryHnswPath, stalePath: paths.binaryHnswStalePath || `${this.binaryHnswPath}.stale.bin` });
@@ -513,23 +493,6 @@ export class SweetSearch {
513
493
  }
514
494
  }
515
495
 
516
- if (!grepOnly) {
517
- this.hasHnswIndex = existsSync(this.hnswPath.replace('.idx', '.meta.json'));
518
- }
519
- if (!grepOnly && this.hasHnswIndex) {
520
- try {
521
- const nextHnsw = new HNSWIndex({
522
- indexPath: this.hnswPath,
523
- stalePath: this.hnswIndex?.stalePath || `${this.hnswPath}.stale.bin`,
524
- });
525
- await nextHnsw.load(undefined, { mmap: true });
526
- this.hnswIndex = nextHnsw;
527
- } catch (err) {
528
- this.log(`HNSW: Failed to reload after manifest publish: ${err.message}`);
529
- this.hasHnswIndex = false;
530
- }
531
- }
532
-
533
496
  if (!grepOnly) {
534
497
  this.hasLateInteractionIndex = existsSync(this.lateInteractionIndex.indexPath);
535
498
  }
@@ -1098,7 +1061,6 @@ Object.assign(SweetSearch.prototype, {
1098
1061
  semanticSearch3Stage: semantic.semanticSearch3Stage,
1099
1062
  semanticSearchStandard: semantic.semanticSearchStandard,
1100
1063
  shouldSkipRerank: semantic.shouldSkipRerank,
1101
- getAdaptiveCandidateCount: semantic.getAdaptiveCandidateCount,
1102
1064
  hybridSearchV2: hybrid.hybridSearchV2,
1103
1065
  hybridSearch: hybrid.hybridSearch,
1104
1066
  patternSearch: pattern.patternSearch,