sweet-search 2.5.13 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -9
- package/core/cli.js +41 -3
- package/core/embedding/embedding-local-model.js +106 -10
- package/core/embedding/embedding-service.js +59 -1
- package/core/embedding/model-client.mjs +257 -0
- package/core/embedding/model-server.mjs +217 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
- package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
- package/core/incremental-indexing/application/operator-cli.mjs +14 -5
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
- package/core/incremental-indexing/application/reconciler.mjs +87 -15
- package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
- package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
- package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
- package/core/indexing/artifact-builder.js +1 -1
- package/core/indexing/dedup/dedup-phase.js +36 -17
- package/core/indexing/dedup/exemplar-selector.js +5 -0
- package/core/indexing/index-codebase-v21.js +37 -14
- package/core/indexing/index-maintainer.mjs +337 -6
- package/core/indexing/indexer-ann.js +27 -434
- package/core/indexing/indexer-build.js +30 -14
- package/core/indexing/indexer-manifest.js +0 -3
- package/core/indexing/indexer-phases.js +101 -25
- package/core/indexing/maintainer-launcher.mjs +22 -0
- package/core/indexing/maintainer-watcher.mjs +397 -0
- package/core/indexing/os-priority.mjs +160 -0
- package/core/indexing/rss-budget.mjs +425 -0
- package/core/indexing/streaming-vectors.js +450 -0
- package/core/infrastructure/config/platform.js +14 -10
- package/core/infrastructure/onnx-session-utils.js +37 -0
- package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
- package/core/ranking/late-interaction-index.js +58 -7
- package/core/search/daemon-registry.js +199 -0
- package/core/search/search-read-semantic.js +9 -3
- package/core/search/search-semantic.js +6 -29
- package/core/search/search-server.js +527 -27
- package/core/search/session-daemon-prewarm.mjs +110 -1
- package/core/search/sweet-search.js +0 -38
- package/core/vector-store/binary-hnsw-index.js +692 -78
- package/core/vector-store/index.js +1 -4
- package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
- package/eval/agent-read-workflows/bin/ss-read +2 -0
- package/mcp/tool-handlers.js +1 -2
- package/package.json +11 -8
- package/scripts/uninstall.js +2 -0
- package/core/vector-store/hnsw-index.js +0 -751
|
@@ -22,7 +22,15 @@
|
|
|
22
22
|
*/
|
|
23
23
|
|
|
24
24
|
import { spawn } from 'node:child_process';
|
|
25
|
-
import {
|
|
25
|
+
import {
|
|
26
|
+
existsSync,
|
|
27
|
+
readFileSync,
|
|
28
|
+
readdirSync,
|
|
29
|
+
openSync,
|
|
30
|
+
writeSync,
|
|
31
|
+
closeSync,
|
|
32
|
+
unlinkSync,
|
|
33
|
+
} from 'node:fs';
|
|
26
34
|
import { connect } from 'node:net';
|
|
27
35
|
import { dirname, join } from 'node:path';
|
|
28
36
|
import { fileURLToPath } from 'node:url';
|
|
@@ -44,6 +52,92 @@ const log = (msg) => {
|
|
|
44
52
|
if (verbose) process.stderr.write(`[sweet-search prewarm] ${msg}\n`);
|
|
45
53
|
};
|
|
46
54
|
|
|
55
|
+
function pageCacheSweepEnabled() {
|
|
56
|
+
const v = String(process.env.SWEET_SEARCH_PREWARM_PAGE_CACHE || '').trim().toLowerCase();
|
|
57
|
+
return v !== '0' && v !== 'false' && v !== 'off' && v !== 'no';
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function readAndDiscard(filePath, deadlineMs) {
|
|
61
|
+
if (!filePath || Date.now() >= deadlineMs || !existsSync(filePath)) return false;
|
|
62
|
+
try {
|
|
63
|
+
readFileSync(filePath);
|
|
64
|
+
return true;
|
|
65
|
+
} catch {
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function pageCacheSweep() {
|
|
71
|
+
const rawBudgetMs = Number(process.env.SWEET_SEARCH_PREWARM_PAGE_CACHE_MS || 3500);
|
|
72
|
+
const budgetMs = Number.isFinite(rawBudgetMs) ? Math.max(250, rawBudgetMs) : 3500;
|
|
73
|
+
const deadlineMs = Date.now() + budgetMs;
|
|
74
|
+
let warmed = 0;
|
|
75
|
+
try {
|
|
76
|
+
const { DB_PATHS, LATE_INTERACTION_CONFIG } = await import('../infrastructure/config/index.js');
|
|
77
|
+
const files = [
|
|
78
|
+
DB_PATHS.codeGraph,
|
|
79
|
+
DB_PATHS.lateInteraction,
|
|
80
|
+
DB_PATHS.sparseGramIndex,
|
|
81
|
+
// codebase.db is opened by the read tool (CodebaseRepository) on every
|
|
82
|
+
// invocation but was the only query-path DB the sweep omitted; warming
|
|
83
|
+
// its page cache means the first read call pays no cold SQLite I/O.
|
|
84
|
+
// readAndDiscard no-ops on an absent file, so this is harmless when the
|
|
85
|
+
// index predates codebase.db or it was never built.
|
|
86
|
+
DB_PATHS.codebase,
|
|
87
|
+
];
|
|
88
|
+
|
|
89
|
+
const segmentDir = DB_PATHS.lateInteraction ? `${DB_PATHS.lateInteraction}.segments` : null;
|
|
90
|
+
if (segmentDir && existsSync(segmentDir)) {
|
|
91
|
+
try {
|
|
92
|
+
for (const name of readdirSync(segmentDir)) {
|
|
93
|
+
if (name.endsWith('.bin')) files.push(join(segmentDir, name));
|
|
94
|
+
}
|
|
95
|
+
} catch {
|
|
96
|
+
/* best-effort page-cache prewarm */
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const modelConfig = LATE_INTERACTION_CONFIG.activeModel;
|
|
101
|
+
if (modelConfig?.hfId) {
|
|
102
|
+
try {
|
|
103
|
+
const { getModelCacheDir } = await import('../infrastructure/model-fetcher.js');
|
|
104
|
+
const modelDir = getModelCacheDir(modelConfig.hfId);
|
|
105
|
+
files.push(join(modelDir, modelConfig.onnxFile));
|
|
106
|
+
files.push(join(modelDir, 'tokenizer.json'));
|
|
107
|
+
} catch {
|
|
108
|
+
/* model cache unavailable; do not fetch or load sessions here */
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
for (const filePath of files) {
|
|
113
|
+
if (readAndDiscard(filePath, deadlineMs)) warmed++;
|
|
114
|
+
}
|
|
115
|
+
log(`page-cache sweep warmed ${warmed} file(s)`);
|
|
116
|
+
} catch (err) {
|
|
117
|
+
log(`page-cache sweep non-fatal: ${err?.message || err}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function spawnPageCacheSweep() {
|
|
122
|
+
if (!pageCacheSweepEnabled()) return;
|
|
123
|
+
if (process.env.SWEET_SEARCH_PAGE_CACHE_SWEEP === '1') return;
|
|
124
|
+
try {
|
|
125
|
+
const child = spawn(process.execPath, [fileURLToPath(import.meta.url)], {
|
|
126
|
+
detached: true,
|
|
127
|
+
stdio: 'ignore',
|
|
128
|
+
cwd: process.cwd(),
|
|
129
|
+
env: {
|
|
130
|
+
...process.env,
|
|
131
|
+
SWEET_SEARCH_PAGE_CACHE_SWEEP: '1',
|
|
132
|
+
},
|
|
133
|
+
});
|
|
134
|
+
child.unref();
|
|
135
|
+
log(`page-cache sweep spawned (pid ${child.pid}, detached)`);
|
|
136
|
+
} catch (err) {
|
|
137
|
+
log(`page-cache sweep spawn non-fatal: ${err?.message || err}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
47
141
|
/** Does a process with this PID exist right now? Handles EPERM (alien user) as "alive". */
|
|
48
142
|
function pidAlive(pid) {
|
|
49
143
|
if (!pid || !Number.isFinite(pid)) return false;
|
|
@@ -165,9 +259,24 @@ async function prewarmServer() {
|
|
|
165
259
|
}
|
|
166
260
|
}
|
|
167
261
|
|
|
262
|
+
if (process.env.SWEET_SEARCH_PAGE_CACHE_SWEEP === '1') {
|
|
263
|
+
try {
|
|
264
|
+
await pageCacheSweep();
|
|
265
|
+
} catch (err) {
|
|
266
|
+
log(`page-cache sweep non-fatal: ${err?.message || err}`);
|
|
267
|
+
}
|
|
268
|
+
process.exit(0);
|
|
269
|
+
}
|
|
270
|
+
|
|
168
271
|
// The search server and the index maintainer are independent: a stuck/already
|
|
169
272
|
// running server must not stop the maintainer from starting, and vice versa.
|
|
170
273
|
// Each is isolated in its own try so one failing never blocks the other.
|
|
274
|
+
try {
|
|
275
|
+
spawnPageCacheSweep();
|
|
276
|
+
} catch (err) {
|
|
277
|
+
log(`page-cache sweep spawn non-fatal: ${err?.message || err}`);
|
|
278
|
+
}
|
|
279
|
+
|
|
171
280
|
try {
|
|
172
281
|
await prewarmServer();
|
|
173
282
|
} catch (err) {
|
|
@@ -17,7 +17,6 @@ import { getGlobalLocalReranker } from '../ranking/local-reranker.js';
|
|
|
17
17
|
import { QueryRouter, routeQuery } from '../query/query-router.js';
|
|
18
18
|
import { GraphSearch } from '../graph/graph-search.js';
|
|
19
19
|
import { SYMBOL_KIND_WEIGHTS, DEFINITION_TYPES } from '../infrastructure/constants.js';
|
|
20
|
-
import { HNSWIndex } from '../vector-store/hnsw-index.js';
|
|
21
20
|
import { BinaryHNSWIndex } from '../vector-store/binary-hnsw-index.js';
|
|
22
21
|
import { Reranker } from '../ranking/flashrank.js';
|
|
23
22
|
import { LateInteractionIndex } from '../ranking/late-interaction-index.js';
|
|
@@ -133,9 +132,7 @@ export class SweetSearch {
|
|
|
133
132
|
this._manifestGraphDbPath = this.graphDbPath;
|
|
134
133
|
this.graphSearch = new GraphSearch(this.graphDbPath);
|
|
135
134
|
this.codeGraphRepo = new CodeGraphRepository(this.graphDbPath);
|
|
136
|
-
this.hnswPath = options.hnswPath || DB_PATHS.hnswIndex;
|
|
137
135
|
this.binaryHnswPath = options.binaryHnswPath || DB_PATHS.binaryHnswIndex;
|
|
138
|
-
this.hnswIndex = new HNSWIndex({ indexPath: this.hnswPath });
|
|
139
136
|
this.binaryHnswIndex = new BinaryHNSWIndex({ indexPath: this.binaryHnswPath });
|
|
140
137
|
this.reranker = new Reranker(options);
|
|
141
138
|
this.lateInteractionIndex = new LateInteractionIndex(options.lateInteractionOptions || {});
|
|
@@ -228,7 +225,6 @@ export class SweetSearch {
|
|
|
228
225
|
this._syncManifestPaths(this._readReconcileManifest());
|
|
229
226
|
|
|
230
227
|
this.hasGraphIndex = existsSync(this.graphDbPath);
|
|
231
|
-
this.hasHnswIndex = existsSync(this.hnswPath.replace('.idx', '.meta.json'));
|
|
232
228
|
this.hasBinaryHnswIndex = existsSync(this.binaryHnswPath.replace('.idx', '.meta.json'));
|
|
233
229
|
this.hasCodebaseIndex = existsSync(this.codebaseDbPath);
|
|
234
230
|
this.hasLateInteractionIndex = existsSync(this.lateInteractionIndex.indexPath);
|
|
@@ -271,16 +267,6 @@ export class SweetSearch {
|
|
|
271
267
|
}
|
|
272
268
|
}
|
|
273
269
|
|
|
274
|
-
if (this.hasHnswIndex) {
|
|
275
|
-
try {
|
|
276
|
-
await this.hnswIndex.load(undefined, { mmap: true });
|
|
277
|
-
this.log(`HNSW: Loaded ${this.hnswIndex.getStats().totalVectors} vectors (mmap)`);
|
|
278
|
-
} catch (err) {
|
|
279
|
-
this.log(`HNSW: Failed to load: ${err.message}`);
|
|
280
|
-
this.hasHnswIndex = false;
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
|
|
284
270
|
if (this.hasLateInteractionIndex && this.useLateInteraction) {
|
|
285
271
|
try {
|
|
286
272
|
await this.lateInteractionIndex.init();
|
|
@@ -433,8 +419,6 @@ export class SweetSearch {
|
|
|
433
419
|
return {
|
|
434
420
|
codebaseDbPath: this._resolveStatePath(manifest.vectors?.path),
|
|
435
421
|
graphDbPath: this._resolveStatePath(manifest.codeGraph?.path),
|
|
436
|
-
hnswPath: this._resolveStatePath(manifest.hnsw?.path),
|
|
437
|
-
hnswStalePath: this._resolveStatePath(manifest.hnsw?.stale),
|
|
438
422
|
binaryHnswPath: this._resolveStatePath(manifest.binaryHnsw?.path),
|
|
439
423
|
binaryHnswStalePath: this._resolveStatePath(manifest.binaryHnsw?.stale),
|
|
440
424
|
lateInteractionIndexPath: liIndexPath,
|
|
@@ -461,10 +445,6 @@ export class SweetSearch {
|
|
|
461
445
|
this.graphSearch = new GraphSearch(this._manifestGraphDbPath);
|
|
462
446
|
this.codeGraphRepo = new CodeGraphRepository(this._manifestGraphDbPath);
|
|
463
447
|
}
|
|
464
|
-
if (paths.hnswPath && (paths.hnswPath !== this.hnswPath || paths.hnswStalePath !== this.hnswIndex?.stalePath)) {
|
|
465
|
-
this.hnswPath = paths.hnswPath;
|
|
466
|
-
this.hnswIndex = new HNSWIndex({ indexPath: this.hnswPath, stalePath: paths.hnswStalePath || `${this.hnswPath}.stale.bin` });
|
|
467
|
-
}
|
|
468
448
|
if (paths.binaryHnswPath && (paths.binaryHnswPath !== this.binaryHnswPath || paths.binaryHnswStalePath !== this.binaryHnswIndex?.stalePath)) {
|
|
469
449
|
this.binaryHnswPath = paths.binaryHnswPath;
|
|
470
450
|
this.binaryHnswIndex = new BinaryHNSWIndex({ indexPath: this.binaryHnswPath, stalePath: paths.binaryHnswStalePath || `${this.binaryHnswPath}.stale.bin` });
|
|
@@ -513,23 +493,6 @@ export class SweetSearch {
|
|
|
513
493
|
}
|
|
514
494
|
}
|
|
515
495
|
|
|
516
|
-
if (!grepOnly) {
|
|
517
|
-
this.hasHnswIndex = existsSync(this.hnswPath.replace('.idx', '.meta.json'));
|
|
518
|
-
}
|
|
519
|
-
if (!grepOnly && this.hasHnswIndex) {
|
|
520
|
-
try {
|
|
521
|
-
const nextHnsw = new HNSWIndex({
|
|
522
|
-
indexPath: this.hnswPath,
|
|
523
|
-
stalePath: this.hnswIndex?.stalePath || `${this.hnswPath}.stale.bin`,
|
|
524
|
-
});
|
|
525
|
-
await nextHnsw.load(undefined, { mmap: true });
|
|
526
|
-
this.hnswIndex = nextHnsw;
|
|
527
|
-
} catch (err) {
|
|
528
|
-
this.log(`HNSW: Failed to reload after manifest publish: ${err.message}`);
|
|
529
|
-
this.hasHnswIndex = false;
|
|
530
|
-
}
|
|
531
|
-
}
|
|
532
|
-
|
|
533
496
|
if (!grepOnly) {
|
|
534
497
|
this.hasLateInteractionIndex = existsSync(this.lateInteractionIndex.indexPath);
|
|
535
498
|
}
|
|
@@ -1098,7 +1061,6 @@ Object.assign(SweetSearch.prototype, {
|
|
|
1098
1061
|
semanticSearch3Stage: semantic.semanticSearch3Stage,
|
|
1099
1062
|
semanticSearchStandard: semantic.semanticSearchStandard,
|
|
1100
1063
|
shouldSkipRerank: semantic.shouldSkipRerank,
|
|
1101
|
-
getAdaptiveCandidateCount: semantic.getAdaptiveCandidateCount,
|
|
1102
1064
|
hybridSearchV2: hybrid.hybridSearchV2,
|
|
1103
1065
|
hybridSearch: hybrid.hybridSearch,
|
|
1104
1066
|
patternSearch: pattern.patternSearch,
|