sweet-search 2.5.13 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -9
- package/core/cli.js +41 -3
- package/core/embedding/embedding-local-model.js +106 -10
- package/core/embedding/embedding-service.js +59 -1
- package/core/embedding/model-client.mjs +257 -0
- package/core/embedding/model-server.mjs +217 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
- package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
- package/core/incremental-indexing/application/operator-cli.mjs +14 -5
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
- package/core/incremental-indexing/application/reconciler.mjs +87 -15
- package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
- package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
- package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
- package/core/indexing/artifact-builder.js +1 -1
- package/core/indexing/dedup/dedup-phase.js +36 -17
- package/core/indexing/dedup/exemplar-selector.js +5 -0
- package/core/indexing/index-codebase-v21.js +37 -14
- package/core/indexing/index-maintainer.mjs +337 -6
- package/core/indexing/indexer-ann.js +27 -434
- package/core/indexing/indexer-build.js +30 -14
- package/core/indexing/indexer-manifest.js +0 -3
- package/core/indexing/indexer-phases.js +101 -25
- package/core/indexing/maintainer-launcher.mjs +22 -0
- package/core/indexing/maintainer-watcher.mjs +397 -0
- package/core/indexing/os-priority.mjs +160 -0
- package/core/indexing/rss-budget.mjs +425 -0
- package/core/indexing/streaming-vectors.js +450 -0
- package/core/infrastructure/config/platform.js +14 -10
- package/core/infrastructure/onnx-session-utils.js +37 -0
- package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
- package/core/ranking/late-interaction-index.js +58 -7
- package/core/search/daemon-registry.js +199 -0
- package/core/search/search-read-semantic.js +9 -3
- package/core/search/search-semantic.js +6 -29
- package/core/search/search-server.js +527 -27
- package/core/search/session-daemon-prewarm.mjs +110 -1
- package/core/search/sweet-search.js +0 -38
- package/core/vector-store/binary-hnsw-index.js +692 -78
- package/core/vector-store/index.js +1 -4
- package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
- package/eval/agent-read-workflows/bin/ss-read +2 -0
- package/mcp/tool-handlers.js +1 -2
- package/package.json +11 -8
- package/scripts/uninstall.js +2 -0
- package/core/vector-store/hnsw-index.js +0 -751
|
@@ -173,4 +173,76 @@ export function fts5Merge(db, tableName, pages) {
|
|
|
173
173
|
db.prepare(`INSERT INTO ${tableName}(${tableName}, rank) VALUES('merge', ?)`).run(pages);
|
|
174
174
|
}
|
|
175
175
|
|
|
176
|
+
/**
|
|
177
|
+
* Derive the FTS5 merge page budget from the spare CPU budget for a tick, using
|
|
178
|
+
* the token-bucket policy in lever E.5:
|
|
179
|
+
*
|
|
180
|
+
* - tick fast (elapsed < `fastMs`, default 500 ms) → a small merge step
|
|
181
|
+
* (`smallPages`, default 16 — the same fixed value the reconcile tick used
|
|
182
|
+
* before budgeting, so a fast tick is byte/behavior-equivalent to today);
|
|
183
|
+
* - tick busy (elapsed > `slowMs`, default 1800 ms) → skip the merge
|
|
184
|
+
* (`null`) to leave CPU for reconcile;
|
|
185
|
+
* - in between → the small step.
|
|
186
|
+
*
|
|
187
|
+
* Returns a positive integer page count, or `null` to skip the merge entirely.
|
|
188
|
+
*
|
|
189
|
+
* @param {{elapsedMs: number, fastMs?: number, slowMs?: number, smallPages?: number}} args
|
|
190
|
+
* @returns {number|null}
|
|
191
|
+
*/
|
|
192
|
+
export function fts5MergeBudgetPages({ elapsedMs, fastMs = 500, slowMs = 1800, smallPages = 16 } = {}) {
|
|
193
|
+
const elapsed = Number.isFinite(elapsedMs) ? elapsedMs : 0;
|
|
194
|
+
if (elapsed > slowMs) return null;
|
|
195
|
+
return smallPages;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Derive the watermark-handler FTS5 merge page budget from the wall-clock budget
|
|
200
|
+
* remaining for the maintenance drain. A generous budget keeps the original
|
|
201
|
+
* aggressive `pages=500`; a tight remaining budget scales the page count down
|
|
202
|
+
* (floor 16) so a near-exhausted drain still makes a small step of forward
|
|
203
|
+
* progress rather than blowing the budget on one 500-page merge.
|
|
204
|
+
*
|
|
205
|
+
* @param {{remainingMs: number, maxPages?: number, minPages?: number, fullBudgetMs?: number}} args
|
|
206
|
+
* @returns {number}
|
|
207
|
+
*/
|
|
208
|
+
export function fts5WatermarkBudgetPages({ remainingMs, maxPages = 500, minPages = 16, fullBudgetMs = 2000 } = {}) {
|
|
209
|
+
if (!Number.isFinite(remainingMs) || remainingMs >= fullBudgetMs) return maxPages;
|
|
210
|
+
if (remainingMs <= 0) return minPages;
|
|
211
|
+
const scaled = Math.round((remainingMs / fullBudgetMs) * maxPages);
|
|
212
|
+
return Math.max(minPages, Math.min(maxPages, scaled));
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Run a full FTS5 `('optimize')` rewrite of one table, then immediately
|
|
217
|
+
* `wal_checkpoint(TRUNCATE)` to flush the (potentially large) optimize
|
|
218
|
+
* transaction out of the WAL and truncate it back to zero — guarding the
|
|
219
|
+
* documented 256 MiB WAL-bloat alarm that is the reason `fts5Merge` itself
|
|
220
|
+
* never calls optimize (see `fts5Merge` note above).
|
|
221
|
+
*
|
|
222
|
+
* Lever E.5 (`SWEET_SEARCH_RECONCILE_FTS5_OPTIMIZE`). The CALLER is responsible
|
|
223
|
+
* for the gate: optimize must run ONLY on true-idle (consecutive empty ticks)
|
|
224
|
+
* AND only when a table-size check says it is worth it. This helper does not
|
|
225
|
+
* decide *when* — it just performs the optimize + checkpoint as one safe unit.
|
|
226
|
+
*
|
|
227
|
+
* `('optimize')` is a single-transaction rewrite: it merges every segment into
|
|
228
|
+
* one. It is idempotent (a second call on an already-optimized table is a near
|
|
229
|
+
* no-op) and output-equivalent to a fully-merged index — query results are
|
|
230
|
+
* unchanged.
|
|
231
|
+
*
|
|
232
|
+
* @param {import('better-sqlite3').Database} db
|
|
233
|
+
* @param {string} tableName Name of the FTS5 virtual table (not the shadow).
|
|
234
|
+
* @returns {{optimized: boolean}}
|
|
235
|
+
*/
|
|
236
|
+
export function fts5Optimize(db, tableName) {
|
|
237
|
+
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(tableName)) {
|
|
238
|
+
throw new Error(`fts5Optimize: invalid table name ${tableName}`);
|
|
239
|
+
}
|
|
240
|
+
db.prepare(`INSERT INTO ${tableName}(${tableName}) VALUES('optimize')`).run();
|
|
241
|
+
// Flush + truncate the WAL the optimize transaction just grew. Best-effort:
|
|
242
|
+
// a checkpoint failure (e.g. an active reader) must not turn a successful
|
|
243
|
+
// optimize into a thrown error on the maintainer path.
|
|
244
|
+
try { db.pragma('wal_checkpoint(TRUNCATE)'); } catch {}
|
|
245
|
+
return { optimized: true };
|
|
246
|
+
}
|
|
247
|
+
|
|
176
248
|
export const __testing = { readVarint, STRUCTURE_ROWID };
|
|
@@ -172,7 +172,7 @@ export async function shouldSkipArtifactRebuild(options = {}) {
|
|
|
172
172
|
// Skip - not enough changes, artifacts exist, within time window
|
|
173
173
|
return {
|
|
174
174
|
shouldSkip: true,
|
|
175
|
-
reason: `Only ${changedFiles} files changed (threshold: ${ARTIFACT_THRESHOLDS.skipThreshold})
|
|
175
|
+
reason: `Only ${changedFiles} files changed (threshold: ${ARTIFACT_THRESHOLDS.skipThreshold})`,
|
|
176
176
|
state,
|
|
177
177
|
accumulatedTotal,
|
|
178
178
|
};
|
|
@@ -60,10 +60,29 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
|
|
|
60
60
|
const fingerprints = computeFingerprints(texts, config);
|
|
61
61
|
const clusters = clusterFingerprints(fingerprints, config);
|
|
62
62
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
63
|
+
const stats = annotateDedupClusters(allChunks, fingerprints, clusters, config);
|
|
64
|
+
|
|
65
|
+
return { skipped: false, stats };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Apply dedup annotations (simhash + exemplar/alias assignment) to a list of
|
|
70
|
+
* `items` given their `fingerprints` and the `clusters` from
|
|
71
|
+
* clusterFingerprints(). Mutates each item's `.metadata` in place and returns
|
|
72
|
+
* the stats object.
|
|
73
|
+
*
|
|
74
|
+
* `items[i]` must expose `.id`, a mutable `.metadata`, and be acceptable to
|
|
75
|
+
* selectExemplar (text length via `.text`/`.content` OR a precomputed
|
|
76
|
+
* `._textLen`, plus path/hash fields). The full-corpus path passes the chunk
|
|
77
|
+
* objects directly; the streaming path passes lightweight per-chunk records
|
|
78
|
+
* (text spilled to disk, length carried as `_textLen`) so the SAME global
|
|
79
|
+
* dedup runs without holding every chunk's text in memory. Both paths produce
|
|
80
|
+
* byte-identical annotations.
|
|
81
|
+
*/
|
|
82
|
+
export function annotateDedupClusters(items, fingerprints, clusters, config = DEDUP_CONFIG) {
|
|
83
|
+
// Seed every item with simhash + self-exemplar defaults.
|
|
84
|
+
for (let i = 0; i < items.length; i++) {
|
|
85
|
+
const meta = (items[i].metadata = items[i].metadata || {});
|
|
67
86
|
meta.simhash = fingerprints[i].simhashHex;
|
|
68
87
|
meta.isExemplar = true;
|
|
69
88
|
meta.exemplarId = null;
|
|
@@ -78,6 +97,9 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
|
|
|
78
97
|
? (config.liReuseJaccardThreshold ?? 0.95)
|
|
79
98
|
: Infinity;
|
|
80
99
|
|
|
100
|
+
const lenOf = (it) =>
|
|
101
|
+
typeof it._textLen === 'number' ? it._textLen : (it.text || it.content || '').length;
|
|
102
|
+
|
|
81
103
|
for (const cluster of clusters) {
|
|
82
104
|
if (!cluster.siblingIdxs || cluster.siblingIdxs.length === 0) continue;
|
|
83
105
|
clustersWithSiblings++;
|
|
@@ -98,12 +120,12 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
|
|
|
98
120
|
}
|
|
99
121
|
|
|
100
122
|
const memberIdxs = [cluster.exemplarIdx, ...cluster.siblingIdxs];
|
|
101
|
-
const members = memberIdxs.map((idx) => ({ idx, chunk:
|
|
123
|
+
const members = memberIdxs.map((idx) => ({ idx, chunk: items[idx] }));
|
|
102
124
|
const exemplar = selectExemplar(members);
|
|
103
|
-
const exemplarId =
|
|
125
|
+
const exemplarId = items[exemplar.idx].id;
|
|
104
126
|
|
|
105
127
|
for (const m of members) {
|
|
106
|
-
const meta =
|
|
128
|
+
const meta = items[m.idx].metadata;
|
|
107
129
|
meta.clusterId = cluster.clusterId;
|
|
108
130
|
if (m.idx === exemplar.idx) {
|
|
109
131
|
meta.isExemplar = true;
|
|
@@ -123,21 +145,18 @@ export async function runDedupPhase(allChunks, config = DEDUP_CONFIG) {
|
|
|
123
145
|
meta.liReuseEligible = j >= liJaccardThreshold;
|
|
124
146
|
if (meta.liReuseEligible) liEligibleAliases++;
|
|
125
147
|
totalAliases++;
|
|
126
|
-
bytesSaved += (
|
|
148
|
+
bytesSaved += lenOf(items[m.idx]);
|
|
127
149
|
}
|
|
128
150
|
}
|
|
129
151
|
}
|
|
130
152
|
|
|
131
153
|
return {
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
liReuseJaccardThreshold: liJaccardThreshold === Infinity ? null : liJaccardThreshold,
|
|
139
|
-
bytesSaved,
|
|
140
|
-
},
|
|
154
|
+
totalChunks: items.length,
|
|
155
|
+
clustersWithSiblings,
|
|
156
|
+
totalAliases,
|
|
157
|
+
liEligibleAliases,
|
|
158
|
+
liReuseJaccardThreshold: liJaccardThreshold === Infinity ? null : liJaccardThreshold,
|
|
159
|
+
bytesSaved,
|
|
141
160
|
};
|
|
142
161
|
}
|
|
143
162
|
|
|
@@ -12,6 +12,11 @@
|
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
function lengthOf(chunk) {
|
|
15
|
+
// Streaming dedup passes lightweight records that carry a precomputed text
|
|
16
|
+
// length (`_textLen`) instead of the full text (which lives on disk). Full
|
|
17
|
+
// chunks have no `_textLen`, so they fall through to the original behavior —
|
|
18
|
+
// byte-identical exemplar selection on the in-memory path.
|
|
19
|
+
if (typeof chunk._textLen === 'number') return chunk._textLen;
|
|
15
20
|
return (chunk.text || chunk.content || '').length;
|
|
16
21
|
}
|
|
17
22
|
|
|
@@ -41,7 +41,6 @@ import { existsSync } from 'fs';
|
|
|
41
41
|
import { DB_PATHS, LATE_INTERACTION_CONFIG } from '../infrastructure/config/index.js';
|
|
42
42
|
import { applyPersistedLiModel } from '../infrastructure/init-config.js';
|
|
43
43
|
import { resolveRelationshipTargets } from '../graph/relationship-resolver.js';
|
|
44
|
-
import { requireNativeAnn as requireNativeAnnBackend } from '../vector-store/hnsw-index.js';
|
|
45
44
|
import { getStats as getIncrementalStats } from './incremental-tracker.js';
|
|
46
45
|
import { ARTIFACT_THRESHOLDS } from './artifact-builder.js';
|
|
47
46
|
|
|
@@ -58,7 +57,6 @@ import {
|
|
|
58
57
|
buildVectorIndex,
|
|
59
58
|
} from './indexer-build.js';
|
|
60
59
|
import {
|
|
61
|
-
incrementalUpdateHNSW, buildHNSWIndex,
|
|
62
60
|
buildLateInteractionIndex, buildQuantizedArtifactsPhase,
|
|
63
61
|
} from './indexer-ann.js';
|
|
64
62
|
import {
|
|
@@ -94,7 +92,6 @@ function parseArgs(argv) {
|
|
|
94
92
|
lateInteractionModel: args.find(a => a.startsWith('--late-interaction-model='))?.split('=')[1] || null,
|
|
95
93
|
lateInteractionPool: parseInt(args.find(a => a.startsWith('--late-interaction-pool='))?.split('=')[1] || process.env.SWEET_SEARCH_LI_POOL_FACTOR || '1', 10),
|
|
96
94
|
lateInteractionExtendedSkiplist: args.includes('--late-interaction-skiplist=extended'),
|
|
97
|
-
requireNativeAnn: args.includes('--require-native-ann'),
|
|
98
95
|
sqliteFastMode: args.includes('--sqlite-fast') || process.env.SWEET_SEARCH_SQLITE_FAST_MODE === '1',
|
|
99
96
|
verbose: args.includes('--verbose') || args.includes('-v'),
|
|
100
97
|
};
|
|
@@ -110,7 +107,7 @@ async function main() {
|
|
|
110
107
|
const { dryRun, graphOnly, vectorsOnly, fullReindex, showStats, resolveOnly,
|
|
111
108
|
skipSummaryRegen, filesFromStdin, quiet, forceArtifacts, help,
|
|
112
109
|
noLateInteraction, lateInteractionModel, lateInteractionPool, lateInteractionExtendedSkiplist,
|
|
113
|
-
|
|
110
|
+
sqliteFastMode, verbose } = parseArgs();
|
|
114
111
|
|
|
115
112
|
if (quiet) {
|
|
116
113
|
setQuietMode(true);
|
|
@@ -172,8 +169,6 @@ Options:
|
|
|
172
169
|
--late-interaction-model=ID Use specific model (lateon-code or lateon-code-edge)
|
|
173
170
|
--late-interaction-pool=N Token pooling factor (2=halve tokens, 3=third). Reduces index size.
|
|
174
171
|
--late-interaction-skiplist=extended Extend skiplist with code-noise tokens (whitespace, semicolons)
|
|
175
|
-
--require-native-ann Fail fast if native ANN backend (usearch) is unavailable.
|
|
176
|
-
Prevents accidental fallback to slower JS ANN in benchmarks.
|
|
177
172
|
--sqlite-fast Use unsafe SQLite pragmas for faster builds (benchmarking only).
|
|
178
173
|
Can also be set via SWEET_SEARCH_SQLITE_FAST_MODE=1.
|
|
179
174
|
WARNING: Data may be lost on crash - do NOT use in production.
|
|
@@ -206,9 +201,9 @@ This is intentional since relationships span across files.
|
|
|
206
201
|
|
|
207
202
|
Output:
|
|
208
203
|
.sweet-search/code-graph.db Code graph with FTS5 (lexical search)
|
|
209
|
-
.sweet-search/codebase.db
|
|
210
|
-
.sweet-search/codebase-hnsw.idx HNSW index (fast ANN)
|
|
211
|
-
.sweet-search/merkle-state.json
|
|
204
|
+
.sweet-search/codebase.db Vector embeddings (semantic search)
|
|
205
|
+
.sweet-search/codebase-binary-hnsw.idx Binary HNSW index (fast ANN)
|
|
206
|
+
.sweet-search/merkle-state.json Incremental indexing state
|
|
212
207
|
`);
|
|
213
208
|
process.exit(0);
|
|
214
209
|
}
|
|
@@ -252,10 +247,6 @@ Output:
|
|
|
252
247
|
return;
|
|
253
248
|
}
|
|
254
249
|
|
|
255
|
-
if (requireNativeAnn) {
|
|
256
|
-
await requireNativeAnnBackend();
|
|
257
|
-
}
|
|
258
|
-
|
|
259
250
|
try {
|
|
260
251
|
// =========================================================================
|
|
261
252
|
// PHASE 1: File Discovery
|
|
@@ -473,6 +464,39 @@ const _isDirectRun = (() => {
|
|
|
473
464
|
}
|
|
474
465
|
})();
|
|
475
466
|
if (_isDirectRun) {
|
|
467
|
+
// ── Bounded-memory guard (works on ANY device, NO heap cap, ZERO startup cost) ──
|
|
468
|
+
// On big-RAM machines Node auto-sizes its default old-space heap large, so V8
|
|
469
|
+
// defers GC and the embedding phase's transient per-batch garbage piles up
|
|
470
|
+
// until the OS OOM-kills the indexer on very large repos (e.g. swc ~164k
|
|
471
|
+
// chunks, libsql). The pipeline is already correctly streaming (chunks spill
|
|
472
|
+
// to disk; only lightweight records stay resident) — the live working set is
|
|
473
|
+
// ~2GB — so this only needs GC to actually RUN. Enable gc at runtime (no
|
|
474
|
+
// --expose-gc launch flag and no re-exec, so frequent incremental runs pay
|
|
475
|
+
// nothing) and proactively collect only when RSS climbs past a device-adaptive
|
|
476
|
+
// watermark. Changes only WHEN gc runs — never what is embedded or stored.
|
|
477
|
+
// Opt out with SWEET_SEARCH_NO_GC_GUARD=1; degrades to a no-op if unavailable.
|
|
478
|
+
if (!process.env.SWEET_SEARCH_NO_GC_GUARD) {
|
|
479
|
+
try {
|
|
480
|
+
let gc = globalThis.gc;
|
|
481
|
+
if (typeof gc !== 'function') {
|
|
482
|
+
const v8 = await import('node:v8');
|
|
483
|
+
const vm = await import('node:vm');
|
|
484
|
+
v8.setFlagsFromString('--expose-gc');
|
|
485
|
+
gc = vm.runInNewContext('gc'); // captured fn stays callable after reset below
|
|
486
|
+
v8.setFlagsFromString('--no-expose-gc');
|
|
487
|
+
}
|
|
488
|
+
if (typeof gc === 'function') {
|
|
489
|
+
const os = await import('node:os');
|
|
490
|
+
// GC watermark: ~half of RAM, clamped to [2GB, 6GB]. Live set is ~2GB, so
|
|
491
|
+
// peak RSS stays bounded well below OOM on any machine ≥4GB. The interval
|
|
492
|
+
// is unref'd and only GCs when RSS is actually high (never on small repos).
|
|
493
|
+
const rssLimit = Math.min(6 * 1024 ** 3, Math.max(2 * 1024 ** 3, Math.floor(os.totalmem() * 0.5)));
|
|
494
|
+
setInterval(() => {
|
|
495
|
+
try { if (process.memoryUsage().rss > rssLimit) gc(); } catch { /* best-effort */ }
|
|
496
|
+
}, 4000).unref();
|
|
497
|
+
}
|
|
498
|
+
} catch { /* GC guard unavailable — indexing proceeds unchanged */ }
|
|
499
|
+
}
|
|
476
500
|
main().catch(err => {
|
|
477
501
|
console.error(err);
|
|
478
502
|
process.exit(1);
|
|
@@ -484,7 +508,6 @@ export {
|
|
|
484
508
|
discoverFiles,
|
|
485
509
|
buildCodeGraph,
|
|
486
510
|
buildVectorIndex,
|
|
487
|
-
buildHNSWIndex,
|
|
488
511
|
buildLateInteractionIndex,
|
|
489
512
|
buildQuantizedArtifactsPhase,
|
|
490
513
|
parseArgs,
|