sweet-search 2.5.13 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +36 -9
  2. package/core/cli.js +41 -3
  3. package/core/embedding/embedding-local-model.js +106 -10
  4. package/core/embedding/embedding-service.js +59 -1
  5. package/core/embedding/model-client.mjs +257 -0
  6. package/core/embedding/model-server.mjs +217 -0
  7. package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
  8. package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
  9. package/core/incremental-indexing/application/operator-cli.mjs +14 -5
  10. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
  11. package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
  12. package/core/incremental-indexing/application/reconciler.mjs +87 -15
  13. package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
  14. package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
  15. package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
  16. package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
  17. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
  18. package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
  19. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
  20. package/core/indexing/artifact-builder.js +1 -1
  21. package/core/indexing/dedup/dedup-phase.js +36 -17
  22. package/core/indexing/dedup/exemplar-selector.js +5 -0
  23. package/core/indexing/index-codebase-v21.js +37 -14
  24. package/core/indexing/index-maintainer.mjs +337 -6
  25. package/core/indexing/indexer-ann.js +27 -434
  26. package/core/indexing/indexer-build.js +30 -14
  27. package/core/indexing/indexer-manifest.js +0 -3
  28. package/core/indexing/indexer-phases.js +101 -25
  29. package/core/indexing/maintainer-launcher.mjs +22 -0
  30. package/core/indexing/maintainer-watcher.mjs +397 -0
  31. package/core/indexing/os-priority.mjs +160 -0
  32. package/core/indexing/rss-budget.mjs +425 -0
  33. package/core/indexing/streaming-vectors.js +450 -0
  34. package/core/infrastructure/config/platform.js +14 -10
  35. package/core/infrastructure/onnx-session-utils.js +37 -0
  36. package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
  37. package/core/ranking/late-interaction-index.js +58 -7
  38. package/core/search/daemon-registry.js +199 -0
  39. package/core/search/search-read-semantic.js +9 -3
  40. package/core/search/search-semantic.js +6 -29
  41. package/core/search/search-server.js +527 -27
  42. package/core/search/session-daemon-prewarm.mjs +110 -1
  43. package/core/search/sweet-search.js +0 -38
  44. package/core/vector-store/binary-hnsw-index.js +692 -78
  45. package/core/vector-store/index.js +1 -4
  46. package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
  47. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
  48. package/eval/agent-read-workflows/bin/ss-read +2 -0
  49. package/mcp/tool-handlers.js +1 -2
  50. package/package.json +11 -8
  51. package/scripts/uninstall.js +2 -0
  52. package/core/vector-store/hnsw-index.js +0 -751
@@ -0,0 +1,217 @@
1
+ /**
2
+ * G8 — Shared model server: the resident-model SERVER process.
3
+ *
4
+ * Loads the ONNX embedding model ONCE and serves embedding requests for many
5
+ * per-repo daemons over a Unix domain socket, saving N−1 model copies
6
+ * (~10 GB across 8 repos), keeping per-repo crash isolation, and making
7
+ * per-repo state cheap to evict.
8
+ *
9
+ * It also cleanly resolves G3's process-global ORT-config contamination: this
10
+ * process can opt into the background ORT profile (force_spinning_stop +
11
+ * arena-off + clamped intra-op threads) WITHOUT affecting any query-serving
12
+ * process, because the model lives here and nowhere else. Opt in with
13
+ * `SWEET_SEARCH_MODEL_SERVER_BACKGROUND=1` (or pass `{ background: true }` to
14
+ * `startModelServer`); off by default.
15
+ *
16
+ * Wire protocol + codec live in `model-client.mjs` (single source of truth);
17
+ * this module imports them so framing/serialization can never drift between
18
+ * the two sides. Embeddings travel as RAW Float32 little-endian bytes →
19
+ * byte-identical to the in-process path (same model, same preprocessing).
20
+ *
21
+ * Gate: only started when `SWEET_SEARCH_SHARED_MODEL_SERVER==='1'`
22
+ * (the launcher checks this); the in-process embedding path is the default and
23
+ * is completely untouched when the flag is off.
24
+ */
25
+
26
+ import net from 'node:net';
27
+ import fs from 'node:fs';
28
+
29
+ import {
30
+ encodeFrame,
31
+ FrameDecoder,
32
+ packEmbeddings,
33
+ modelServerSocketPath,
34
+ PROTOCOL_VERSION,
35
+ } from './model-client.mjs';
36
+
37
+ import {
38
+ callLocalModelBucketed,
39
+ configureLocalModelRuntime,
40
+ } from './embedding-local-model.js';
41
+
42
+ /**
43
+ * Compute the embeddings for one request. Routes through the SAME bucketed
44
+ * local-model path the in-process embedding service uses for the `local`
45
+ * provider, so the output Float32 vectors are byte-identical to in-process.
46
+ *
47
+ * Exported so the parity test can exercise the exact server-side code path
48
+ * headlessly (no socket).
49
+ */
50
+ export async function computeEmbeddingsForRequest(texts, providerOptions = {}) {
51
+ if (!Array.isArray(texts) || texts.length === 0) return [];
52
+ const bucketOptions = {
53
+ maxLength: providerOptions.maxLength,
54
+ hardCap: providerOptions.hardCap,
55
+ resolveHardCap: providerOptions.resolveHardCap,
56
+ batchingSafety: providerOptions.batchingSafety,
57
+ // onProgress is a function → not serializable over the wire; never set.
58
+ };
59
+ return callLocalModelBucketed(texts, bucketOptions);
60
+ }
61
+
62
+ /**
63
+ * Handle one decoded request frame for a connected client. Writes exactly one
64
+ * reply frame back. Pure wire glue around `computeEmbeddingsForRequest`.
65
+ */
66
+ async function handleRequestFrame(socket, header) {
67
+ if (!header || typeof header !== 'object') return;
68
+
69
+ if (header.type === 'ping') {
70
+ socket.write(encodeFrame({ type: 'pong', v: PROTOCOL_VERSION }));
71
+ return;
72
+ }
73
+
74
+ if (header.type === 'getEmbeddings') {
75
+ const requestId = header.requestId;
76
+ try {
77
+ const embeddings = await computeEmbeddingsForRequest(
78
+ header.texts || [],
79
+ header.providerOptions || {},
80
+ );
81
+ const { payload, dims } = packEmbeddings(embeddings);
82
+ socket.write(encodeFrame({ type: 'embeddings', requestId, dims, v: PROTOCOL_VERSION }, payload));
83
+ } catch (err) {
84
+ socket.write(encodeFrame({
85
+ type: 'error',
86
+ requestId,
87
+ message: err?.message || String(err),
88
+ v: PROTOCOL_VERSION,
89
+ }));
90
+ }
91
+ return;
92
+ }
93
+
94
+ // Unknown request type — reply with a structured error (client falls back).
95
+ socket.write(encodeFrame({
96
+ type: 'error',
97
+ requestId: header.requestId,
98
+ message: `unknown request type: ${header.type}`,
99
+ v: PROTOCOL_VERSION,
100
+ }));
101
+ }
102
+
103
+ /**
104
+ * Wire a connected socket to the request handler. Concurrent clients are
105
+ * supported: each connection gets its own decoder and requests are processed
106
+ * as their frames complete. Robust to fragmentation and to client disconnects.
107
+ */
108
+ export function attachConnection(socket) {
109
+ socket.on('error', () => { /* client vanished — ignore, keep server up */ });
110
+ const decoder = new FrameDecoder((header, _payload, err) => {
111
+ if (err) {
112
+ try { socket.destroy(); } catch { /* ignore */ }
113
+ return;
114
+ }
115
+ // Fire-and-forget per frame; the handler writes its own reply. We never
116
+ // let one client's failure take down the server.
117
+ handleRequestFrame(socket, header).catch(() => {
118
+ try {
119
+ socket.write(encodeFrame({
120
+ type: 'error',
121
+ requestId: header?.requestId,
122
+ message: 'internal model-server error',
123
+ v: PROTOCOL_VERSION,
124
+ }));
125
+ } catch { /* socket gone */ }
126
+ });
127
+ });
128
+ socket.on('data', (chunk) => decoder.push(chunk));
129
+ }
130
+
131
+ /**
132
+ * Start the shared model server. Binds the Unix socket, applies the background
133
+ * ORT profile (if requested) BEFORE the first encode (the local model is a
134
+ * singleton built on first encode — configuring after would be a silent
135
+ * no-op), and serves until `close()` is called.
136
+ *
137
+ * @param {object} [opts]
138
+ * @param {string} [opts.socketPath] override the socket path.
139
+ * @param {boolean} [opts.background] opt into the bg ORT profile for THIS
140
+ * process only (default from env gate).
141
+ * @returns {Promise<{ socketPath, server, close }>}
142
+ */
143
+ export async function startModelServer(opts = {}) {
144
+ const socketPath = opts.socketPath || modelServerSocketPath();
145
+ const background = opts.background
146
+ ?? (process.env.SWEET_SEARCH_MODEL_SERVER_BACKGROUND === '1');
147
+
148
+ // Configure the resident model's runtime profile up front. This process owns
149
+ // the only model copy, so bg-profiling here cannot throttle any query server.
150
+ if (background) {
151
+ try {
152
+ configureLocalModelRuntime({ background: true });
153
+ } catch (err) {
154
+ // Best-effort: never let profile config abort server startup.
155
+ if (process.env.DEBUG_CATCHES) {
156
+ process.stderr.write(`[model-server] bg profile config failed: ${err?.message || err}\n`);
157
+ }
158
+ }
159
+ }
160
+
161
+ // Remove any stale socket left by a crashed predecessor.
162
+ try { fs.unlinkSync(socketPath); } catch { /* none / not ours — listen() will error if truly busy */ }
163
+
164
+ const server = net.createServer((socket) => attachConnection(socket));
165
+
166
+ await new Promise((resolve, reject) => {
167
+ const onErr = (err) => { server.off('listening', onOk); reject(err); };
168
+ const onOk = () => { server.off('error', onErr); resolve(); };
169
+ server.once('error', onErr);
170
+ server.once('listening', onOk);
171
+ // Restrict permissions on the socket file (owner-only).
172
+ const prevUmask = process.umask(0o077);
173
+ try {
174
+ server.listen(socketPath);
175
+ } finally {
176
+ process.umask(prevUmask);
177
+ }
178
+ });
179
+
180
+ // Belt-and-suspenders: chmod explicitly in case umask was ineffective.
181
+ try { fs.chmodSync(socketPath, 0o700); } catch { /* best-effort */ }
182
+
183
+ const close = () => new Promise((resolve) => {
184
+ try {
185
+ server.close(() => {
186
+ try { fs.unlinkSync(socketPath); } catch { /* already gone */ }
187
+ resolve();
188
+ });
189
+ } catch {
190
+ resolve();
191
+ }
192
+ });
193
+
194
+ return { socketPath, server, close };
195
+ }
196
+
197
+ // ── CLI entrypoint ────────────────────────────────────────────────────────
198
+ // Allow `node core/embedding/model-server.mjs` to run a standalone server.
199
+ // Guarded so importing this module (tests, launcher) never auto-binds.
200
+ const _isMain = (() => {
201
+ try {
202
+ return import.meta.url === `file://${process.argv[1]}`;
203
+ } catch {
204
+ return false;
205
+ }
206
+ })();
207
+
208
+ if (_isMain) {
209
+ startModelServer()
210
+ .then(({ socketPath }) => {
211
+ process.stdout.write(`[model-server] listening on ${socketPath}\n`);
212
+ })
213
+ .catch((err) => {
214
+ process.stderr.write(`[model-server] failed to start: ${err?.message || err}\n`);
215
+ process.exit(1);
216
+ });
217
+ }
@@ -16,12 +16,12 @@
16
16
  * Manifest semantics:
17
17
  * - sparse_gram, LI segment: the reconcile manifest is unchanged. New
18
18
  * artifacts replace old ones at canonical paths read fresh per query.
19
- * - HNSW (float / binary): canonical paths unchanged; the reconcile
20
- * manifest stays at the current epoch. Cross-process readers that
21
- * cache an HNSWIndex instance in memory MUST already invalidate on
22
- * manifest change — but maintenance does not bump the epoch by
23
- * itself. This matches the existing reconcile tick semantics; a
24
- * follow-up workstream can add versioned tier paths if needed.
19
+ * - Binary HNSW: canonical paths unchanged; the reconcile manifest
20
+ * stays at the current epoch. Cross-process readers that cache a
21
+ * binary HNSW index in memory MUST already invalidate on manifest
22
+ * change — but maintenance does not bump the epoch by itself. This
23
+ * matches the existing reconcile tick semantics; a follow-up
24
+ * workstream can add versioned tier paths if needed.
25
25
  *
26
26
  * The handlers degrade safely when artifacts are missing/corrupt — they
27
27
  * throw a descriptive error which the worker converts into the standard
@@ -33,7 +33,6 @@ import path from 'node:path';
33
33
  import Database from 'better-sqlite3';
34
34
 
35
35
  import { BinaryHNSWIndex } from '../../vector-store/binary-hnsw-index.js';
36
- import { HNSWIndex } from '../../vector-store/hnsw-index.js';
37
36
  import { LateInteractionIndex } from '../../ranking/late-interaction-index.js';
38
37
  import { compactDeltaSegments, listDeltaSegments } from '../infrastructure/sparse-gram-delta.mjs';
39
38
  import { mergeLiSegments, LI_MERGE_GRACE_MS } from '../infrastructure/li-segment-merge.mjs';
@@ -160,9 +159,8 @@ export async function binaryHnswHandler(job, { stateDir, onProgress = null }) {
160
159
  await existing.load(indexPath);
161
160
  progress('maintenance:binary-hnsw:loaded');
162
161
 
163
- // Liveness authority is codebase.db, NOT the binary stale bitmap. This makes
164
- // binary reclamation self-healing and consistent with floatHnswHandler
165
- // (which already rebuilds from `vectors WHERE epoch_retired IS NULL`): a
162
+ // Liveness authority is codebase.db (`vectors WHERE epoch_retired IS NULL`),
163
+ // NOT the binary stale bitmap. This makes binary reclamation self-healing: a
166
164
  // vector retired in codebase.db is dropped here even if its binary stale bit
167
165
  // was never set. Falls back to the stale bitmap only when codebase.db is
168
166
  // unavailable.
@@ -195,8 +193,18 @@ export async function binaryHnswHandler(job, { stateDir, onProgress = null }) {
195
193
  maxElements: existing.maxElements,
196
194
  });
197
195
  fresh.resetForBuild();
196
+ // When deterministic levels are enabled, the compacted graph must be
197
+ // reproducible: insert surviving ids in a FIXED sorted order so the
198
+ // rebuild is independent of the scan/encounter order above. Combined with
199
+ // levelForId() (gated by the same flag inside `add()`), this makes the
200
+ // compaction path agree byte-for-byte with batched/per-file builds.
201
+ // DEFAULT-ON (disable with SWEET_SEARCH_HNSW_DETERMINISTIC_LEVELS=0) →
202
+ // set the flag to '0' to preserve today's encounter-order insertion exactly.
203
+ const insertOrder = process.env.SWEET_SEARCH_HNSW_DETERMINISTIC_LEVELS !== '0'
204
+ ? [...live].sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0))
205
+ : live;
198
206
  let added = 0;
199
- for (const v of live) {
207
+ for (const v of insertOrder) {
200
208
  await fresh.add(v.id, v.binary, v.metadata, v.int8);
201
209
  added += 1;
202
210
  if (added % 500 === 0) progress('maintenance:binary-hnsw:add');
@@ -214,92 +222,6 @@ export async function binaryHnswHandler(job, { stateDir, onProgress = null }) {
214
222
  };
215
223
  }
216
224
 
217
- /* ------------------------------------------------------------------ *
218
- * float_hnsw *
219
- * ------------------------------------------------------------------ */
220
-
221
- /**
222
- * Float HNSW clean replacement.
223
- *
224
- * Source of truth for "which vectors are live" is `codebase.db`. The
225
- * existing HNSW meta.json's idMap is also pruned, but we re-read the DB
226
- * to pick up `embedding` blobs the in-memory HNSWIndex doesn't expose.
227
- *
228
- * Caller invariant: the codebase.db schema columns (`id`, `embedding`,
229
- * `metadata`, `epoch_retired`) are stable — verified in the production
230
- * reconciler `applyVectorDelta` path.
231
- */
232
- export async function floatHnswHandler(job, { stateDir, onProgress = null }) {
233
- const progress = progressFn(onProgress);
234
- const indexPath = path.join(stateDir, 'codebase-hnsw.idx');
235
- const metaPath = path.join(stateDir, 'codebase-hnsw.meta.json');
236
- const dbPath = path.join(stateDir, 'codebase.db');
237
- if (!fs.existsSync(metaPath)) return { skipped: 'no-index' };
238
- if (!fs.existsSync(dbPath)) return { skipped: 'no-vector-db' };
239
-
240
- // Load existing index to discover dimension / parameters (cheap).
241
- const existing = new HNSWIndex({ indexPath });
242
- try { await existing.load(indexPath); } catch { return { skipped: 'load-failed' }; }
243
- progress('maintenance:float-hnsw:loaded');
244
- const dimension = existing.dimension;
245
- const stalePath = existing.stalePath;
246
-
247
- const stalePresent = fs.existsSync(stalePath);
248
- const liveIdsBefore = new Set(existing.idMap.keys());
249
-
250
- // Walk live vectors from codebase.db.
251
- const db = new Database(dbPath, { readonly: true });
252
- let liveRows;
253
- try {
254
- liveRows = db.prepare(
255
- 'SELECT id, embedding, metadata FROM vectors WHERE epoch_retired IS NULL'
256
- ).all();
257
- } finally {
258
- db.close();
259
- }
260
-
261
- // If everything aligns AND no stale bitmap → nothing to do.
262
- if (!stalePresent && liveIdsBefore.size === liveRows.length) {
263
- return { skipped: 'no-stale-vectors', dropped: 0 };
264
- }
265
-
266
- // Rebuild the index in memory and let `HNSWIndex.save()` publish via
267
- // its tmp+rename protocol — that protocol keeps any cross-process
268
- // `usearch.view()` mmap valid against the unlinked old inode.
269
- const fresh = new HNSWIndex({
270
- indexPath,
271
- stalePath,
272
- dimension,
273
- maxElements: existing.maxElements,
274
- M: existing.M,
275
- efConstruction: existing.efConstruction,
276
- efSearch: existing.efSearch,
277
- metric: existing.metric,
278
- });
279
- await fresh.init();
280
- for (let i = 0; i < liveRows.length; i += 1) {
281
- const row = liveRows[i];
282
- const embedding = float32FromBuffer(row.embedding);
283
- let meta;
284
- try { meta = JSON.parse(row.metadata || '{}'); } catch { meta = {}; }
285
- const truncated = embedding.length > dimension ? embedding.slice(0, dimension) : embedding;
286
- await fresh.add(row.id, truncated, meta);
287
- if (i > 0 && i % 500 === 0) progress('maintenance:float-hnsw:add');
288
- }
289
- await fresh.save(indexPath);
290
- progress('maintenance:float-hnsw:saved');
291
- // Stale bitmap is meaningless after rebuild — keys are fresh.
292
- safeUnlink(stalePath);
293
-
294
- return {
295
- tier: 'float_hnsw',
296
- kept: liveRows.length,
297
- dropped: Math.max(0, liveIdsBefore.size - liveRows.length),
298
- staleBitmapCleared: true,
299
- atomicPublish: true,
300
- };
301
- }
302
-
303
225
  /* ------------------------------------------------------------------ *
304
226
  * li_segment *
305
227
  * ------------------------------------------------------------------ */
@@ -510,7 +432,6 @@ export function reclamationHandlers(stateDir) {
510
432
  return {
511
433
  sparse_gram: (job, ctx = {}) => sparseGramHandler(job, { stateDir, onProgress: ctx.onProgress }),
512
434
  binary_hnsw: (job, ctx = {}) => binaryHnswHandler(job, { stateDir, onProgress: ctx.onProgress }),
513
- float_hnsw: (job, ctx = {}) => floatHnswHandler(job, { stateDir, onProgress: ctx.onProgress }),
514
435
  li_segment: (job, ctx = {}) => liSegmentHandler(job, { stateDir, onProgress: ctx.onProgress }),
515
436
  li_segments: (job, ctx = {}) => liSegmentsHandler(job, { stateDir, onProgress: ctx.onProgress }),
516
437
  vector_gc: (job, ctx = {}) => vectorGcHandler(job, { stateDir, onProgress: ctx.onProgress }),
@@ -34,9 +34,20 @@ import fs from 'node:fs';
34
34
  import path from 'node:path';
35
35
  import process from 'node:process';
36
36
  import Database from 'better-sqlite3';
37
- import { fts5Merge } from '../infrastructure/sqlite-fts5.mjs';
37
+ import { fts5Merge, fts5Optimize, fts5SegmentCount, fts5WatermarkBudgetPages } from '../infrastructure/sqlite-fts5.mjs';
38
38
  import { reclamationHandlers } from './maintenance-handlers.mjs';
39
39
 
40
+ // SWEET_SEARCH_RECONCILE_FTS5_BUDGET is DEFAULT-ON (disable with =0): the
41
+ // budget-derived merge scales page count down as the drain budget runs out so a
42
+ // near-exhausted window makes a small step instead of one big overrun. Verified
43
+ // recall-neutral + soak == baseline. Set to '0' for the legacy fixed 500-page
44
+ // merge. SWEET_SEARCH_RECONCILE_FTS5_OPTIMIZE stays DEFAULT-OFF (heavy idle
45
+ // compaction — strict opt-in).
46
+ const fts5BudgetEnabled = () => process.env.SWEET_SEARCH_RECONCILE_FTS5_BUDGET !== '0';
47
+ const fts5OptimizeEnabled = () => process.env.SWEET_SEARCH_RECONCILE_FTS5_OPTIMIZE === '1';
48
+ // Minimum segment count below which an optimize is not worth its full rewrite.
49
+ const FTS5_OPTIMIZE_MIN_SEGMENTS = Number.parseInt(process.env.SWEET_SEARCH_RECONCILE_FTS5_OPTIMIZE_MIN_SEGMENTS || '8', 10);
50
+
40
51
  const FORBIDDEN_GPU_FLAGS = [
41
52
  'SWEET_SEARCH_GPU', // sweet-search canonical knob
42
53
  'INDEX_GPU_BACKEND', // pre-existing flag in core/indexing
@@ -89,7 +100,7 @@ export function installGpuLoadGuard() {
89
100
  * JSON job descriptor:
90
101
  *
91
102
  * {
92
- * "tier": "float_hnsw" | "binary_hnsw" | "li_segment" | "sparse_gram" | "fts5",
103
+ * "tier": "binary_hnsw" | "li_segment" | "sparse_gram" | "fts5",
93
104
  * "reason": "tombstone_watermark" | "dead_doc_ratio" | "stale_doc_ratio" | "delta_size_ratio" | "fts5_segment_count" | "crash_recovery",
94
105
  * "epoch": <int>,
95
106
  * "createdAt": <ISO-8601>,
@@ -226,29 +237,52 @@ export function appendDeadLetter(stateDir, job, err) {
226
237
  export function defaultMaintenanceHandlers(stateDir) {
227
238
  return {
228
239
  ...reclamationHandlers(stateDir),
229
- fts5: async (job) => {
240
+ fts5: async (job, ctx = {}) => {
230
241
  const payload = job?.payload || {};
231
242
  const dbPath = payload.dbPath || payload.databasePath || path.join(stateDir, payload.dbFile || 'code-graph.db');
232
243
  const tableNames = payload.tableName || payload.table
233
244
  ? [payload.tableName || payload.table]
234
245
  : ['entities_fts', 'entities_trigram'];
235
- const pages = Number.isFinite(payload.pages) && payload.pages > 0 ? payload.pages : 500;
246
+ // E.5: derive the merge page count from the budget remaining in the drain
247
+ // window. Off (default) → the original aggressive 500-page merge. On →
248
+ // scale pages down (floor 16) as the budget runs out so a near-exhausted
249
+ // drain still makes a small step rather than overrunning on one big merge.
250
+ // An explicit `payload.pages` always wins (operator override).
251
+ const explicitPages = Number.isFinite(payload.pages) && payload.pages > 0 ? payload.pages : null;
252
+ const pages = explicitPages != null
253
+ ? explicitPages
254
+ : (fts5BudgetEnabled()
255
+ ? fts5WatermarkBudgetPages({ remainingMs: ctx.remainingBudgetMs })
256
+ : 500);
257
+ // E.5 idle-gated optimize: when the daemon enqueues an fts5 job with
258
+ // `payload.optimize:true` (signaling true-idle / consecutive empty ticks)
259
+ // AND the optimize flag is on, run the full `('optimize')` rewrite + an
260
+ // immediate wal_checkpoint(TRUNCATE) — but ONLY on tables above a size
261
+ // threshold (a tiny table doesn't need it). Off by default; the merge path
262
+ // is the steady-state behavior.
263
+ const wantOptimize = fts5OptimizeEnabled() && payload.optimize === true;
236
264
  if (!fs.existsSync(dbPath)) throw new Error(`fts5 maintenance database not found: ${dbPath}`);
237
265
  const db = new Database(dbPath);
238
266
  try {
239
267
  const merged = [];
268
+ const optimized = [];
240
269
  for (const tableName of tableNames) {
241
270
  const exists = db.prepare(
242
271
  "SELECT 1 FROM sqlite_master WHERE type='table' AND name = ?",
243
272
  ).get(tableName);
244
273
  if (!exists) continue;
245
- fts5Merge(db, tableName, pages);
246
- merged.push(tableName);
274
+ if (wantOptimize && fts5SegmentCount(db, tableName) >= FTS5_OPTIMIZE_MIN_SEGMENTS) {
275
+ fts5Optimize(db, tableName);
276
+ optimized.push(tableName);
277
+ } else {
278
+ fts5Merge(db, tableName, pages);
279
+ merged.push(tableName);
280
+ }
247
281
  }
248
- if (merged.length === 0) {
282
+ if (merged.length === 0 && optimized.length === 0) {
249
283
  throw new Error(`fts5 maintenance found no FTS5 tables in ${dbPath}`);
250
284
  }
251
- return { dbPath, tableNames: merged, pages };
285
+ return { dbPath, tableNames: [...merged, ...optimized], pages, optimized };
252
286
  } finally {
253
287
  db.close();
254
288
  }
@@ -317,7 +351,10 @@ export async function processMaintenanceQueue(stateDir, options = {}) {
317
351
  attempted += 1;
318
352
  try {
319
353
  onProgress(`maintenance:${job.tier || 'unknown'}:start`);
320
- await handler(job, { stateDir, onProgress });
354
+ // E.5: surface the wall-clock budget remaining so budget-aware handlers
355
+ // (fts5 merge) can scale their work to the spare window.
356
+ const remainingBudgetMs = budgetMs === Infinity ? Infinity : Math.max(0, budgetMs - (clock() - startMs));
357
+ await handler(job, { stateDir, onProgress, remainingBudgetMs });
321
358
  onProgress(`maintenance:${job.tier || 'unknown'}:done`);
322
359
  summary.succeeded += 1;
323
360
  } catch (err) {
@@ -27,9 +27,7 @@ const MERKLE_STATE = 'merkle-state.json';
27
27
  const PAUSE_FILE = 'reconcile-pause.json';
28
28
 
29
29
  const REBUILD_TIERS = new Map([
30
- ['hnsw', 'float_hnsw'],
31
- ['float_hnsw', 'float_hnsw'],
32
- ['float-hnsw', 'float_hnsw'],
30
+ ['hnsw', 'binary_hnsw'],
33
31
  ['binary_hnsw', 'binary_hnsw'],
34
32
  ['binary-hnsw', 'binary_hnsw'],
35
33
  ['li', 'li_segment'],
@@ -399,12 +397,23 @@ function addDirtyHint(ctx, inputPath) {
399
397
 
400
398
  async function preserveJsonStdout(json, fn) {
401
399
  if (!json) return fn();
402
- const originalLog = console.log;
400
+ // Route EVERY stdout-bound console level (log/info/debug) to stderr while the
401
+ // command runs, so stray diagnostics never corrupt the --json payload on
402
+ // stdout. console.log alone is not enough: the autotune emits its
403
+ // "[reconciler] interval …ms → …ms" line via the default `console` logger's
404
+ // `.info` (now that autotune is default-on), which writes to stdout and would
405
+ // otherwise prepend a non-JSON line to `reconcile tick --json`. warn/error
406
+ // already go to stderr, so they're left alone.
407
+ const original = { log: console.log, info: console.info, debug: console.debug };
403
408
  console.log = (...args) => console.error(...args);
409
+ console.info = (...args) => console.error(...args);
410
+ console.debug = (...args) => console.error(...args);
404
411
  try {
405
412
  return await fn();
406
413
  } finally {
407
- console.log = originalLog;
414
+ console.log = original.log;
415
+ console.info = original.info;
416
+ console.debug = original.debug;
408
417
  }
409
418
  }
410
419
 
@@ -105,3 +105,43 @@ export async function maintainFloatStore(binaryHnswPath, { upserts, removeIds, b
105
105
  store.applyDelta({ upserts, removeIds });
106
106
  await store.save(floatStorePath);
107
107
  }
108
+
109
+ /**
110
+ * Tick-finalize variant of `maintainFloatStore` for the batched path (lever
111
+ * E.1). Instead of loading + saving the float store once per file, the
112
+ * reconciler loads the store once at tick start, accumulates all of the tick's
113
+ * float upserts/removes, and calls this once at tick finalize to apply them and
114
+ * save.
115
+ *
116
+ * `binaryVectorsBefore` is the live binary-HNSW vector count captured at TICK
117
+ * START (before any of this tick's appends), preserving the same
118
+ * "abnormal-state skip" semantics as the per-file path: if the float store is
119
+ * absent but the binary HNSW already held vectors, a store built from the delta
120
+ * alone would mis-score every baseline doc, so we skip until a full rebuild
121
+ * restores it.
122
+ *
123
+ * Returns `{ saved: boolean }` — `saved=false` means the delta was empty or the
124
+ * store was skipped (no fsync happened), which the persist-before-advance gate
125
+ * treats as "no float artifact changed this tick".
126
+ *
127
+ * @param {object} args
128
+ * @param {string} args.binaryHnswPath
129
+ * @param {FloatVectorStore} [args.store] resident store (loaded once at tick start)
130
+ * @param {Array<{id:string, vector:Float32Array}>} args.upserts
131
+ * @param {string[]} args.removeIds
132
+ * @param {number} args.binaryVectorsBefore
133
+ * @param {number} args.dimension
134
+ * @returns {Promise<{saved: boolean}>}
135
+ */
136
+ export async function flushFloatStore({ binaryHnswPath, store = null, upserts = [], removeIds = [], binaryVectorsBefore = 0, dimension }) {
137
+ if (upserts.length === 0 && removeIds.length === 0) return { saved: false };
138
+ const floatStorePath = getFloatStorePath(binaryHnswPath);
139
+ if (!existsSync(floatStorePath) && binaryVectorsBefore > 0 && !(store && store.loaded && store.count > 0)) {
140
+ return { saved: false };
141
+ }
142
+ const fvs = store || new FloatVectorStore();
143
+ if (!fvs.loaded) await fvs.loadOrInit(floatStorePath, dimension);
144
+ fvs.applyDelta({ upserts, removeIds });
145
+ await fvs.save(floatStorePath);
146
+ return { saved: true };
147
+ }