sweet-search 2.5.14 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -9
- package/core/cli.js +41 -3
- package/core/embedding/embedding-local-model.js +106 -10
- package/core/embedding/embedding-service.js +59 -1
- package/core/embedding/model-client.mjs +257 -0
- package/core/embedding/model-server.mjs +217 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
- package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
- package/core/incremental-indexing/application/operator-cli.mjs +14 -5
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
- package/core/incremental-indexing/application/reconciler.mjs +87 -15
- package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
- package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
- package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
- package/core/indexing/artifact-builder.js +1 -1
- package/core/indexing/dedup/dedup-phase.js +36 -17
- package/core/indexing/dedup/exemplar-selector.js +5 -0
- package/core/indexing/index-codebase-v21.js +37 -14
- package/core/indexing/index-maintainer.mjs +337 -6
- package/core/indexing/indexer-ann.js +27 -434
- package/core/indexing/indexer-build.js +30 -14
- package/core/indexing/indexer-manifest.js +0 -3
- package/core/indexing/indexer-phases.js +101 -25
- package/core/indexing/maintainer-launcher.mjs +22 -0
- package/core/indexing/maintainer-watcher.mjs +397 -0
- package/core/indexing/os-priority.mjs +160 -0
- package/core/indexing/rss-budget.mjs +425 -0
- package/core/indexing/streaming-vectors.js +450 -0
- package/core/infrastructure/config/platform.js +14 -10
- package/core/infrastructure/onnx-session-utils.js +37 -0
- package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
- package/core/ranking/late-interaction-index.js +58 -7
- package/core/search/daemon-registry.js +199 -0
- package/core/search/search-read-semantic.js +9 -3
- package/core/search/search-semantic.js +6 -29
- package/core/search/search-server.js +527 -27
- package/core/search/session-daemon-prewarm.mjs +110 -1
- package/core/search/sweet-search.js +0 -38
- package/core/vector-store/binary-hnsw-index.js +692 -78
- package/core/vector-store/index.js +1 -4
- package/mcp/tool-handlers.js +1 -2
- package/package.json +11 -8
- package/scripts/uninstall.js +2 -0
- package/core/vector-store/hnsw-index.js +0 -751
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* G8 — Shared model server: the resident-model SERVER process.
|
|
3
|
+
*
|
|
4
|
+
* Loads the ONNX embedding model ONCE and serves embedding requests for many
|
|
5
|
+
* per-repo daemons over a Unix domain socket, saving N−1 model copies
|
|
6
|
+
* (~10 GB across 8 repos), keeping per-repo crash isolation, and making
|
|
7
|
+
* per-repo state cheap to evict.
|
|
8
|
+
*
|
|
9
|
+
* It also cleanly resolves G3's process-global ORT-config contamination: this
|
|
10
|
+
* process can opt into the background ORT profile (force_spinning_stop +
|
|
11
|
+
* arena-off + clamped intra-op threads) WITHOUT affecting any query-serving
|
|
12
|
+
* process, because the model lives here and nowhere else. Opt in with
|
|
13
|
+
* `SWEET_SEARCH_MODEL_SERVER_BACKGROUND=1` (or pass `{ background: true }` to
|
|
14
|
+
* `startModelServer`); off by default.
|
|
15
|
+
*
|
|
16
|
+
* Wire protocol + codec live in `model-client.mjs` (single source of truth);
|
|
17
|
+
* this module imports them so framing/serialization can never drift between
|
|
18
|
+
* the two sides. Embeddings travel as RAW Float32 little-endian bytes →
|
|
19
|
+
* byte-identical to the in-process path (same model, same preprocessing).
|
|
20
|
+
*
|
|
21
|
+
* Gate: only started when `SWEET_SEARCH_SHARED_MODEL_SERVER==='1'`
|
|
22
|
+
* (the launcher checks this); the in-process embedding path is the default and
|
|
23
|
+
* is completely untouched when the flag is off.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import net from 'node:net';
|
|
27
|
+
import fs from 'node:fs';
|
|
28
|
+
|
|
29
|
+
import {
|
|
30
|
+
encodeFrame,
|
|
31
|
+
FrameDecoder,
|
|
32
|
+
packEmbeddings,
|
|
33
|
+
modelServerSocketPath,
|
|
34
|
+
PROTOCOL_VERSION,
|
|
35
|
+
} from './model-client.mjs';
|
|
36
|
+
|
|
37
|
+
import {
|
|
38
|
+
callLocalModelBucketed,
|
|
39
|
+
configureLocalModelRuntime,
|
|
40
|
+
} from './embedding-local-model.js';
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Compute the embeddings for one request. Routes through the SAME bucketed
|
|
44
|
+
* local-model path the in-process embedding service uses for the `local`
|
|
45
|
+
* provider, so the output Float32 vectors are byte-identical to in-process.
|
|
46
|
+
*
|
|
47
|
+
* Exported so the parity test can exercise the exact server-side code path
|
|
48
|
+
* headlessly (no socket).
|
|
49
|
+
*/
|
|
50
|
+
export async function computeEmbeddingsForRequest(texts, providerOptions = {}) {
|
|
51
|
+
if (!Array.isArray(texts) || texts.length === 0) return [];
|
|
52
|
+
const bucketOptions = {
|
|
53
|
+
maxLength: providerOptions.maxLength,
|
|
54
|
+
hardCap: providerOptions.hardCap,
|
|
55
|
+
resolveHardCap: providerOptions.resolveHardCap,
|
|
56
|
+
batchingSafety: providerOptions.batchingSafety,
|
|
57
|
+
// onProgress is a function → not serializable over the wire; never set.
|
|
58
|
+
};
|
|
59
|
+
return callLocalModelBucketed(texts, bucketOptions);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Handle one decoded request frame for a connected client. Writes exactly one
|
|
64
|
+
* reply frame back. Pure wire glue around `computeEmbeddingsForRequest`.
|
|
65
|
+
*/
|
|
66
|
+
async function handleRequestFrame(socket, header) {
|
|
67
|
+
if (!header || typeof header !== 'object') return;
|
|
68
|
+
|
|
69
|
+
if (header.type === 'ping') {
|
|
70
|
+
socket.write(encodeFrame({ type: 'pong', v: PROTOCOL_VERSION }));
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (header.type === 'getEmbeddings') {
|
|
75
|
+
const requestId = header.requestId;
|
|
76
|
+
try {
|
|
77
|
+
const embeddings = await computeEmbeddingsForRequest(
|
|
78
|
+
header.texts || [],
|
|
79
|
+
header.providerOptions || {},
|
|
80
|
+
);
|
|
81
|
+
const { payload, dims } = packEmbeddings(embeddings);
|
|
82
|
+
socket.write(encodeFrame({ type: 'embeddings', requestId, dims, v: PROTOCOL_VERSION }, payload));
|
|
83
|
+
} catch (err) {
|
|
84
|
+
socket.write(encodeFrame({
|
|
85
|
+
type: 'error',
|
|
86
|
+
requestId,
|
|
87
|
+
message: err?.message || String(err),
|
|
88
|
+
v: PROTOCOL_VERSION,
|
|
89
|
+
}));
|
|
90
|
+
}
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Unknown request type — reply with a structured error (client falls back).
|
|
95
|
+
socket.write(encodeFrame({
|
|
96
|
+
type: 'error',
|
|
97
|
+
requestId: header.requestId,
|
|
98
|
+
message: `unknown request type: ${header.type}`,
|
|
99
|
+
v: PROTOCOL_VERSION,
|
|
100
|
+
}));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Wire a connected socket to the request handler. Concurrent clients are
|
|
105
|
+
* supported: each connection gets its own decoder and requests are processed
|
|
106
|
+
* as their frames complete. Robust to fragmentation and to client disconnects.
|
|
107
|
+
*/
|
|
108
|
+
export function attachConnection(socket) {
|
|
109
|
+
socket.on('error', () => { /* client vanished — ignore, keep server up */ });
|
|
110
|
+
const decoder = new FrameDecoder((header, _payload, err) => {
|
|
111
|
+
if (err) {
|
|
112
|
+
try { socket.destroy(); } catch { /* ignore */ }
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
// Fire-and-forget per frame; the handler writes its own reply. We never
|
|
116
|
+
// let one client's failure take down the server.
|
|
117
|
+
handleRequestFrame(socket, header).catch(() => {
|
|
118
|
+
try {
|
|
119
|
+
socket.write(encodeFrame({
|
|
120
|
+
type: 'error',
|
|
121
|
+
requestId: header?.requestId,
|
|
122
|
+
message: 'internal model-server error',
|
|
123
|
+
v: PROTOCOL_VERSION,
|
|
124
|
+
}));
|
|
125
|
+
} catch { /* socket gone */ }
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
socket.on('data', (chunk) => decoder.push(chunk));
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Start the shared model server. Binds the Unix socket, applies the background
|
|
133
|
+
* ORT profile (if requested) BEFORE the first encode (the local model is a
|
|
134
|
+
* singleton built on first encode — configuring after would be a silent
|
|
135
|
+
* no-op), and serves until `close()` is called.
|
|
136
|
+
*
|
|
137
|
+
* @param {object} [opts]
|
|
138
|
+
* @param {string} [opts.socketPath] override the socket path.
|
|
139
|
+
* @param {boolean} [opts.background] opt into the bg ORT profile for THIS
|
|
140
|
+
* process only (default from env gate).
|
|
141
|
+
* @returns {Promise<{ socketPath, server, close }>}
|
|
142
|
+
*/
|
|
143
|
+
export async function startModelServer(opts = {}) {
|
|
144
|
+
const socketPath = opts.socketPath || modelServerSocketPath();
|
|
145
|
+
const background = opts.background
|
|
146
|
+
?? (process.env.SWEET_SEARCH_MODEL_SERVER_BACKGROUND === '1');
|
|
147
|
+
|
|
148
|
+
// Configure the resident model's runtime profile up front. This process owns
|
|
149
|
+
// the only model copy, so bg-profiling here cannot throttle any query server.
|
|
150
|
+
if (background) {
|
|
151
|
+
try {
|
|
152
|
+
configureLocalModelRuntime({ background: true });
|
|
153
|
+
} catch (err) {
|
|
154
|
+
// Best-effort: never let profile config abort server startup.
|
|
155
|
+
if (process.env.DEBUG_CATCHES) {
|
|
156
|
+
process.stderr.write(`[model-server] bg profile config failed: ${err?.message || err}\n`);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Remove any stale socket left by a crashed predecessor.
|
|
162
|
+
try { fs.unlinkSync(socketPath); } catch { /* none / not ours — listen() will error if truly busy */ }
|
|
163
|
+
|
|
164
|
+
const server = net.createServer((socket) => attachConnection(socket));
|
|
165
|
+
|
|
166
|
+
await new Promise((resolve, reject) => {
|
|
167
|
+
const onErr = (err) => { server.off('listening', onOk); reject(err); };
|
|
168
|
+
const onOk = () => { server.off('error', onErr); resolve(); };
|
|
169
|
+
server.once('error', onErr);
|
|
170
|
+
server.once('listening', onOk);
|
|
171
|
+
// Restrict permissions on the socket file (owner-only).
|
|
172
|
+
const prevUmask = process.umask(0o077);
|
|
173
|
+
try {
|
|
174
|
+
server.listen(socketPath);
|
|
175
|
+
} finally {
|
|
176
|
+
process.umask(prevUmask);
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
// Belt-and-suspenders: chmod explicitly in case umask was ineffective.
|
|
181
|
+
try { fs.chmodSync(socketPath, 0o700); } catch { /* best-effort */ }
|
|
182
|
+
|
|
183
|
+
const close = () => new Promise((resolve) => {
|
|
184
|
+
try {
|
|
185
|
+
server.close(() => {
|
|
186
|
+
try { fs.unlinkSync(socketPath); } catch { /* already gone */ }
|
|
187
|
+
resolve();
|
|
188
|
+
});
|
|
189
|
+
} catch {
|
|
190
|
+
resolve();
|
|
191
|
+
}
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
return { socketPath, server, close };
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// ── CLI entrypoint ────────────────────────────────────────────────────────
|
|
198
|
+
// Allow `node core/embedding/model-server.mjs` to run a standalone server.
|
|
199
|
+
// Guarded so importing this module (tests, launcher) never auto-binds.
|
|
200
|
+
const _isMain = (() => {
|
|
201
|
+
try {
|
|
202
|
+
return import.meta.url === `file://${process.argv[1]}`;
|
|
203
|
+
} catch {
|
|
204
|
+
return false;
|
|
205
|
+
}
|
|
206
|
+
})();
|
|
207
|
+
|
|
208
|
+
if (_isMain) {
|
|
209
|
+
startModelServer()
|
|
210
|
+
.then(({ socketPath }) => {
|
|
211
|
+
process.stdout.write(`[model-server] listening on ${socketPath}\n`);
|
|
212
|
+
})
|
|
213
|
+
.catch((err) => {
|
|
214
|
+
process.stderr.write(`[model-server] failed to start: ${err?.message || err}\n`);
|
|
215
|
+
process.exit(1);
|
|
216
|
+
});
|
|
217
|
+
}
|
|
@@ -16,12 +16,12 @@
|
|
|
16
16
|
* Manifest semantics:
|
|
17
17
|
* - sparse_gram, LI segment: the reconcile manifest is unchanged. New
|
|
18
18
|
* artifacts replace old ones at canonical paths read fresh per query.
|
|
19
|
-
* - HNSW
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
19
|
+
* - Binary HNSW: canonical paths unchanged; the reconcile manifest
|
|
20
|
+
* stays at the current epoch. Cross-process readers that cache a
|
|
21
|
+
* binary HNSW index in memory MUST already invalidate on manifest
|
|
22
|
+
* change — but maintenance does not bump the epoch by itself. This
|
|
23
|
+
* matches the existing reconcile tick semantics; a follow-up
|
|
24
|
+
* workstream can add versioned tier paths if needed.
|
|
25
25
|
*
|
|
26
26
|
* The handlers degrade safely when artifacts are missing/corrupt — they
|
|
27
27
|
* throw a descriptive error which the worker converts into the standard
|
|
@@ -33,7 +33,6 @@ import path from 'node:path';
|
|
|
33
33
|
import Database from 'better-sqlite3';
|
|
34
34
|
|
|
35
35
|
import { BinaryHNSWIndex } from '../../vector-store/binary-hnsw-index.js';
|
|
36
|
-
import { HNSWIndex } from '../../vector-store/hnsw-index.js';
|
|
37
36
|
import { LateInteractionIndex } from '../../ranking/late-interaction-index.js';
|
|
38
37
|
import { compactDeltaSegments, listDeltaSegments } from '../infrastructure/sparse-gram-delta.mjs';
|
|
39
38
|
import { mergeLiSegments, LI_MERGE_GRACE_MS } from '../infrastructure/li-segment-merge.mjs';
|
|
@@ -160,9 +159,8 @@ export async function binaryHnswHandler(job, { stateDir, onProgress = null }) {
|
|
|
160
159
|
await existing.load(indexPath);
|
|
161
160
|
progress('maintenance:binary-hnsw:loaded');
|
|
162
161
|
|
|
163
|
-
// Liveness authority is codebase.db
|
|
164
|
-
// binary reclamation self-healing
|
|
165
|
-
// (which already rebuilds from `vectors WHERE epoch_retired IS NULL`): a
|
|
162
|
+
// Liveness authority is codebase.db (`vectors WHERE epoch_retired IS NULL`),
|
|
163
|
+
// NOT the binary stale bitmap. This makes binary reclamation self-healing: a
|
|
166
164
|
// vector retired in codebase.db is dropped here even if its binary stale bit
|
|
167
165
|
// was never set. Falls back to the stale bitmap only when codebase.db is
|
|
168
166
|
// unavailable.
|
|
@@ -195,8 +193,18 @@ export async function binaryHnswHandler(job, { stateDir, onProgress = null }) {
|
|
|
195
193
|
maxElements: existing.maxElements,
|
|
196
194
|
});
|
|
197
195
|
fresh.resetForBuild();
|
|
196
|
+
// When deterministic levels are enabled, the compacted graph must be
|
|
197
|
+
// reproducible: insert surviving ids in a FIXED sorted order so the
|
|
198
|
+
// rebuild is independent of the scan/encounter order above. Combined with
|
|
199
|
+
// levelForId() (gated by the same flag inside `add()`), this makes the
|
|
200
|
+
// compaction path agree byte-for-byte with batched/per-file builds.
|
|
201
|
+
// DEFAULT-ON (disable with SWEET_SEARCH_HNSW_DETERMINISTIC_LEVELS=0) →
|
|
202
|
+
// set the flag to '0' to preserve today's encounter-order insertion exactly.
|
|
203
|
+
const insertOrder = process.env.SWEET_SEARCH_HNSW_DETERMINISTIC_LEVELS !== '0'
|
|
204
|
+
? [...live].sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0))
|
|
205
|
+
: live;
|
|
198
206
|
let added = 0;
|
|
199
|
-
for (const v of
|
|
207
|
+
for (const v of insertOrder) {
|
|
200
208
|
await fresh.add(v.id, v.binary, v.metadata, v.int8);
|
|
201
209
|
added += 1;
|
|
202
210
|
if (added % 500 === 0) progress('maintenance:binary-hnsw:add');
|
|
@@ -214,92 +222,6 @@ export async function binaryHnswHandler(job, { stateDir, onProgress = null }) {
|
|
|
214
222
|
};
|
|
215
223
|
}
|
|
216
224
|
|
|
217
|
-
/* ------------------------------------------------------------------ *
|
|
218
|
-
* float_hnsw *
|
|
219
|
-
* ------------------------------------------------------------------ */
|
|
220
|
-
|
|
221
|
-
/**
|
|
222
|
-
* Float HNSW clean replacement.
|
|
223
|
-
*
|
|
224
|
-
* Source of truth for "which vectors are live" is `codebase.db`. The
|
|
225
|
-
* existing HNSW meta.json's idMap is also pruned, but we re-read the DB
|
|
226
|
-
* to pick up `embedding` blobs the in-memory HNSWIndex doesn't expose.
|
|
227
|
-
*
|
|
228
|
-
* Caller invariant: the codebase.db schema columns (`id`, `embedding`,
|
|
229
|
-
* `metadata`, `epoch_retired`) are stable — verified in the production
|
|
230
|
-
* reconciler `applyVectorDelta` path.
|
|
231
|
-
*/
|
|
232
|
-
export async function floatHnswHandler(job, { stateDir, onProgress = null }) {
|
|
233
|
-
const progress = progressFn(onProgress);
|
|
234
|
-
const indexPath = path.join(stateDir, 'codebase-hnsw.idx');
|
|
235
|
-
const metaPath = path.join(stateDir, 'codebase-hnsw.meta.json');
|
|
236
|
-
const dbPath = path.join(stateDir, 'codebase.db');
|
|
237
|
-
if (!fs.existsSync(metaPath)) return { skipped: 'no-index' };
|
|
238
|
-
if (!fs.existsSync(dbPath)) return { skipped: 'no-vector-db' };
|
|
239
|
-
|
|
240
|
-
// Load existing index to discover dimension / parameters (cheap).
|
|
241
|
-
const existing = new HNSWIndex({ indexPath });
|
|
242
|
-
try { await existing.load(indexPath); } catch { return { skipped: 'load-failed' }; }
|
|
243
|
-
progress('maintenance:float-hnsw:loaded');
|
|
244
|
-
const dimension = existing.dimension;
|
|
245
|
-
const stalePath = existing.stalePath;
|
|
246
|
-
|
|
247
|
-
const stalePresent = fs.existsSync(stalePath);
|
|
248
|
-
const liveIdsBefore = new Set(existing.idMap.keys());
|
|
249
|
-
|
|
250
|
-
// Walk live vectors from codebase.db.
|
|
251
|
-
const db = new Database(dbPath, { readonly: true });
|
|
252
|
-
let liveRows;
|
|
253
|
-
try {
|
|
254
|
-
liveRows = db.prepare(
|
|
255
|
-
'SELECT id, embedding, metadata FROM vectors WHERE epoch_retired IS NULL'
|
|
256
|
-
).all();
|
|
257
|
-
} finally {
|
|
258
|
-
db.close();
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
// If everything aligns AND no stale bitmap → nothing to do.
|
|
262
|
-
if (!stalePresent && liveIdsBefore.size === liveRows.length) {
|
|
263
|
-
return { skipped: 'no-stale-vectors', dropped: 0 };
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
// Rebuild the index in memory and let `HNSWIndex.save()` publish via
|
|
267
|
-
// its tmp+rename protocol — that protocol keeps any cross-process
|
|
268
|
-
// `usearch.view()` mmap valid against the unlinked old inode.
|
|
269
|
-
const fresh = new HNSWIndex({
|
|
270
|
-
indexPath,
|
|
271
|
-
stalePath,
|
|
272
|
-
dimension,
|
|
273
|
-
maxElements: existing.maxElements,
|
|
274
|
-
M: existing.M,
|
|
275
|
-
efConstruction: existing.efConstruction,
|
|
276
|
-
efSearch: existing.efSearch,
|
|
277
|
-
metric: existing.metric,
|
|
278
|
-
});
|
|
279
|
-
await fresh.init();
|
|
280
|
-
for (let i = 0; i < liveRows.length; i += 1) {
|
|
281
|
-
const row = liveRows[i];
|
|
282
|
-
const embedding = float32FromBuffer(row.embedding);
|
|
283
|
-
let meta;
|
|
284
|
-
try { meta = JSON.parse(row.metadata || '{}'); } catch { meta = {}; }
|
|
285
|
-
const truncated = embedding.length > dimension ? embedding.slice(0, dimension) : embedding;
|
|
286
|
-
await fresh.add(row.id, truncated, meta);
|
|
287
|
-
if (i > 0 && i % 500 === 0) progress('maintenance:float-hnsw:add');
|
|
288
|
-
}
|
|
289
|
-
await fresh.save(indexPath);
|
|
290
|
-
progress('maintenance:float-hnsw:saved');
|
|
291
|
-
// Stale bitmap is meaningless after rebuild — keys are fresh.
|
|
292
|
-
safeUnlink(stalePath);
|
|
293
|
-
|
|
294
|
-
return {
|
|
295
|
-
tier: 'float_hnsw',
|
|
296
|
-
kept: liveRows.length,
|
|
297
|
-
dropped: Math.max(0, liveIdsBefore.size - liveRows.length),
|
|
298
|
-
staleBitmapCleared: true,
|
|
299
|
-
atomicPublish: true,
|
|
300
|
-
};
|
|
301
|
-
}
|
|
302
|
-
|
|
303
225
|
/* ------------------------------------------------------------------ *
|
|
304
226
|
* li_segment *
|
|
305
227
|
* ------------------------------------------------------------------ */
|
|
@@ -510,7 +432,6 @@ export function reclamationHandlers(stateDir) {
|
|
|
510
432
|
return {
|
|
511
433
|
sparse_gram: (job, ctx = {}) => sparseGramHandler(job, { stateDir, onProgress: ctx.onProgress }),
|
|
512
434
|
binary_hnsw: (job, ctx = {}) => binaryHnswHandler(job, { stateDir, onProgress: ctx.onProgress }),
|
|
513
|
-
float_hnsw: (job, ctx = {}) => floatHnswHandler(job, { stateDir, onProgress: ctx.onProgress }),
|
|
514
435
|
li_segment: (job, ctx = {}) => liSegmentHandler(job, { stateDir, onProgress: ctx.onProgress }),
|
|
515
436
|
li_segments: (job, ctx = {}) => liSegmentsHandler(job, { stateDir, onProgress: ctx.onProgress }),
|
|
516
437
|
vector_gc: (job, ctx = {}) => vectorGcHandler(job, { stateDir, onProgress: ctx.onProgress }),
|
|
@@ -34,9 +34,20 @@ import fs from 'node:fs';
|
|
|
34
34
|
import path from 'node:path';
|
|
35
35
|
import process from 'node:process';
|
|
36
36
|
import Database from 'better-sqlite3';
|
|
37
|
-
import { fts5Merge } from '../infrastructure/sqlite-fts5.mjs';
|
|
37
|
+
import { fts5Merge, fts5Optimize, fts5SegmentCount, fts5WatermarkBudgetPages } from '../infrastructure/sqlite-fts5.mjs';
|
|
38
38
|
import { reclamationHandlers } from './maintenance-handlers.mjs';
|
|
39
39
|
|
|
40
|
+
// SWEET_SEARCH_RECONCILE_FTS5_BUDGET is DEFAULT-ON (disable with =0): the
|
|
41
|
+
// budget-derived merge scales page count down as the drain budget runs out so a
|
|
42
|
+
// near-exhausted window makes a small step instead of one big overrun. Verified
|
|
43
|
+
// recall-neutral + soak == baseline. Set to '0' for the legacy fixed 500-page
|
|
44
|
+
// merge. SWEET_SEARCH_RECONCILE_FTS5_OPTIMIZE stays DEFAULT-OFF (heavy idle
|
|
45
|
+
// compaction — strict opt-in).
|
|
46
|
+
const fts5BudgetEnabled = () => process.env.SWEET_SEARCH_RECONCILE_FTS5_BUDGET !== '0';
|
|
47
|
+
const fts5OptimizeEnabled = () => process.env.SWEET_SEARCH_RECONCILE_FTS5_OPTIMIZE === '1';
|
|
48
|
+
// Minimum segment count below which an optimize is not worth its full rewrite.
|
|
49
|
+
const FTS5_OPTIMIZE_MIN_SEGMENTS = Number.parseInt(process.env.SWEET_SEARCH_RECONCILE_FTS5_OPTIMIZE_MIN_SEGMENTS || '8', 10);
|
|
50
|
+
|
|
40
51
|
const FORBIDDEN_GPU_FLAGS = [
|
|
41
52
|
'SWEET_SEARCH_GPU', // sweet-search canonical knob
|
|
42
53
|
'INDEX_GPU_BACKEND', // pre-existing flag in core/indexing
|
|
@@ -89,7 +100,7 @@ export function installGpuLoadGuard() {
|
|
|
89
100
|
* JSON job descriptor:
|
|
90
101
|
*
|
|
91
102
|
* {
|
|
92
|
-
* "tier": "
|
|
103
|
+
* "tier": "binary_hnsw" | "li_segment" | "sparse_gram" | "fts5",
|
|
93
104
|
* "reason": "tombstone_watermark" | "dead_doc_ratio" | "stale_doc_ratio" | "delta_size_ratio" | "fts5_segment_count" | "crash_recovery",
|
|
94
105
|
* "epoch": <int>,
|
|
95
106
|
* "createdAt": <ISO-8601>,
|
|
@@ -226,29 +237,52 @@ export function appendDeadLetter(stateDir, job, err) {
|
|
|
226
237
|
export function defaultMaintenanceHandlers(stateDir) {
|
|
227
238
|
return {
|
|
228
239
|
...reclamationHandlers(stateDir),
|
|
229
|
-
fts5: async (job) => {
|
|
240
|
+
fts5: async (job, ctx = {}) => {
|
|
230
241
|
const payload = job?.payload || {};
|
|
231
242
|
const dbPath = payload.dbPath || payload.databasePath || path.join(stateDir, payload.dbFile || 'code-graph.db');
|
|
232
243
|
const tableNames = payload.tableName || payload.table
|
|
233
244
|
? [payload.tableName || payload.table]
|
|
234
245
|
: ['entities_fts', 'entities_trigram'];
|
|
235
|
-
|
|
246
|
+
// E.5: derive the merge page count from the budget remaining in the drain
|
|
247
|
+
// window. Off (default) → the original aggressive 500-page merge. On →
|
|
248
|
+
// scale pages down (floor 16) as the budget runs out so a near-exhausted
|
|
249
|
+
// drain still makes a small step rather than overrunning on one big merge.
|
|
250
|
+
// An explicit `payload.pages` always wins (operator override).
|
|
251
|
+
const explicitPages = Number.isFinite(payload.pages) && payload.pages > 0 ? payload.pages : null;
|
|
252
|
+
const pages = explicitPages != null
|
|
253
|
+
? explicitPages
|
|
254
|
+
: (fts5BudgetEnabled()
|
|
255
|
+
? fts5WatermarkBudgetPages({ remainingMs: ctx.remainingBudgetMs })
|
|
256
|
+
: 500);
|
|
257
|
+
// E.5 idle-gated optimize: when the daemon enqueues an fts5 job with
|
|
258
|
+
// `payload.optimize:true` (signaling true-idle / consecutive empty ticks)
|
|
259
|
+
// AND the optimize flag is on, run the full `('optimize')` rewrite + an
|
|
260
|
+
// immediate wal_checkpoint(TRUNCATE) — but ONLY on tables above a size
|
|
261
|
+
// threshold (a tiny table doesn't need it). Off by default; the merge path
|
|
262
|
+
// is the steady-state behavior.
|
|
263
|
+
const wantOptimize = fts5OptimizeEnabled() && payload.optimize === true;
|
|
236
264
|
if (!fs.existsSync(dbPath)) throw new Error(`fts5 maintenance database not found: ${dbPath}`);
|
|
237
265
|
const db = new Database(dbPath);
|
|
238
266
|
try {
|
|
239
267
|
const merged = [];
|
|
268
|
+
const optimized = [];
|
|
240
269
|
for (const tableName of tableNames) {
|
|
241
270
|
const exists = db.prepare(
|
|
242
271
|
"SELECT 1 FROM sqlite_master WHERE type='table' AND name = ?",
|
|
243
272
|
).get(tableName);
|
|
244
273
|
if (!exists) continue;
|
|
245
|
-
|
|
246
|
-
|
|
274
|
+
if (wantOptimize && fts5SegmentCount(db, tableName) >= FTS5_OPTIMIZE_MIN_SEGMENTS) {
|
|
275
|
+
fts5Optimize(db, tableName);
|
|
276
|
+
optimized.push(tableName);
|
|
277
|
+
} else {
|
|
278
|
+
fts5Merge(db, tableName, pages);
|
|
279
|
+
merged.push(tableName);
|
|
280
|
+
}
|
|
247
281
|
}
|
|
248
|
-
if (merged.length === 0) {
|
|
282
|
+
if (merged.length === 0 && optimized.length === 0) {
|
|
249
283
|
throw new Error(`fts5 maintenance found no FTS5 tables in ${dbPath}`);
|
|
250
284
|
}
|
|
251
|
-
return { dbPath, tableNames: merged, pages };
|
|
285
|
+
return { dbPath, tableNames: [...merged, ...optimized], pages, optimized };
|
|
252
286
|
} finally {
|
|
253
287
|
db.close();
|
|
254
288
|
}
|
|
@@ -317,7 +351,10 @@ export async function processMaintenanceQueue(stateDir, options = {}) {
|
|
|
317
351
|
attempted += 1;
|
|
318
352
|
try {
|
|
319
353
|
onProgress(`maintenance:${job.tier || 'unknown'}:start`);
|
|
320
|
-
|
|
354
|
+
// E.5: surface the wall-clock budget remaining so budget-aware handlers
|
|
355
|
+
// (fts5 merge) can scale their work to the spare window.
|
|
356
|
+
const remainingBudgetMs = budgetMs === Infinity ? Infinity : Math.max(0, budgetMs - (clock() - startMs));
|
|
357
|
+
await handler(job, { stateDir, onProgress, remainingBudgetMs });
|
|
321
358
|
onProgress(`maintenance:${job.tier || 'unknown'}:done`);
|
|
322
359
|
summary.succeeded += 1;
|
|
323
360
|
} catch (err) {
|
|
@@ -27,9 +27,7 @@ const MERKLE_STATE = 'merkle-state.json';
|
|
|
27
27
|
const PAUSE_FILE = 'reconcile-pause.json';
|
|
28
28
|
|
|
29
29
|
const REBUILD_TIERS = new Map([
|
|
30
|
-
['hnsw', '
|
|
31
|
-
['float_hnsw', 'float_hnsw'],
|
|
32
|
-
['float-hnsw', 'float_hnsw'],
|
|
30
|
+
['hnsw', 'binary_hnsw'],
|
|
33
31
|
['binary_hnsw', 'binary_hnsw'],
|
|
34
32
|
['binary-hnsw', 'binary_hnsw'],
|
|
35
33
|
['li', 'li_segment'],
|
|
@@ -399,12 +397,23 @@ function addDirtyHint(ctx, inputPath) {
|
|
|
399
397
|
|
|
400
398
|
async function preserveJsonStdout(json, fn) {
|
|
401
399
|
if (!json) return fn();
|
|
402
|
-
|
|
400
|
+
// Route EVERY stdout-bound console level (log/info/debug) to stderr while the
|
|
401
|
+
// command runs, so stray diagnostics never corrupt the --json payload on
|
|
402
|
+
// stdout. console.log alone is not enough: the autotune emits its
|
|
403
|
+
// "[reconciler] interval …ms → …ms" line via the default `console` logger's
|
|
404
|
+
// `.info` (now that autotune is default-on), which writes to stdout and would
|
|
405
|
+
// otherwise prepend a non-JSON line to `reconcile tick --json`. warn/error
|
|
406
|
+
// already go to stderr, so they're left alone.
|
|
407
|
+
const original = { log: console.log, info: console.info, debug: console.debug };
|
|
403
408
|
console.log = (...args) => console.error(...args);
|
|
409
|
+
console.info = (...args) => console.error(...args);
|
|
410
|
+
console.debug = (...args) => console.error(...args);
|
|
404
411
|
try {
|
|
405
412
|
return await fn();
|
|
406
413
|
} finally {
|
|
407
|
-
console.log =
|
|
414
|
+
console.log = original.log;
|
|
415
|
+
console.info = original.info;
|
|
416
|
+
console.debug = original.debug;
|
|
408
417
|
}
|
|
409
418
|
}
|
|
410
419
|
|
|
@@ -105,3 +105,43 @@ export async function maintainFloatStore(binaryHnswPath, { upserts, removeIds, b
|
|
|
105
105
|
store.applyDelta({ upserts, removeIds });
|
|
106
106
|
await store.save(floatStorePath);
|
|
107
107
|
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Tick-finalize variant of `maintainFloatStore` for the batched path (lever
|
|
111
|
+
* E.1). Instead of loading + saving the float store once per file, the
|
|
112
|
+
* reconciler loads the store once at tick start, accumulates all of the tick's
|
|
113
|
+
* float upserts/removes, and calls this once at tick finalize to apply them and
|
|
114
|
+
* save.
|
|
115
|
+
*
|
|
116
|
+
* `binaryVectorsBefore` is the live binary-HNSW vector count captured at TICK
|
|
117
|
+
* START (before any of this tick's appends), preserving the same
|
|
118
|
+
* "abnormal-state skip" semantics as the per-file path: if the float store is
|
|
119
|
+
* absent but the binary HNSW already held vectors, a store built from the delta
|
|
120
|
+
* alone would mis-score every baseline doc, so we skip until a full rebuild
|
|
121
|
+
* restores it.
|
|
122
|
+
*
|
|
123
|
+
* Returns `{ saved: boolean }` — `saved=false` means the delta was empty or the
|
|
124
|
+
* store was skipped (no fsync happened), which the persist-before-advance gate
|
|
125
|
+
* treats as "no float artifact changed this tick".
|
|
126
|
+
*
|
|
127
|
+
* @param {object} args
|
|
128
|
+
* @param {string} args.binaryHnswPath
|
|
129
|
+
* @param {FloatVectorStore} [args.store] resident store (loaded once at tick start)
|
|
130
|
+
* @param {Array<{id:string, vector:Float32Array}>} args.upserts
|
|
131
|
+
* @param {string[]} args.removeIds
|
|
132
|
+
* @param {number} args.binaryVectorsBefore
|
|
133
|
+
* @param {number} args.dimension
|
|
134
|
+
* @returns {Promise<{saved: boolean}>}
|
|
135
|
+
*/
|
|
136
|
+
export async function flushFloatStore({ binaryHnswPath, store = null, upserts = [], removeIds = [], binaryVectorsBefore = 0, dimension }) {
|
|
137
|
+
if (upserts.length === 0 && removeIds.length === 0) return { saved: false };
|
|
138
|
+
const floatStorePath = getFloatStorePath(binaryHnswPath);
|
|
139
|
+
if (!existsSync(floatStorePath) && binaryVectorsBefore > 0 && !(store && store.loaded && store.count > 0)) {
|
|
140
|
+
return { saved: false };
|
|
141
|
+
}
|
|
142
|
+
const fvs = store || new FloatVectorStore();
|
|
143
|
+
if (!fvs.loaded) await fvs.loadOrInit(floatStorePath, dimension);
|
|
144
|
+
fvs.applyDelta({ upserts, removeIds });
|
|
145
|
+
await fvs.save(floatStorePath);
|
|
146
|
+
return { saved: true };
|
|
147
|
+
}
|