sweet-search 2.5.13 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +36 -9
  2. package/core/cli.js +41 -3
  3. package/core/embedding/embedding-local-model.js +106 -10
  4. package/core/embedding/embedding-service.js +59 -1
  5. package/core/embedding/model-client.mjs +257 -0
  6. package/core/embedding/model-server.mjs +217 -0
  7. package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
  8. package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
  9. package/core/incremental-indexing/application/operator-cli.mjs +14 -5
  10. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
  11. package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
  12. package/core/incremental-indexing/application/reconciler.mjs +87 -15
  13. package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
  14. package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
  15. package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
  16. package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
  17. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
  18. package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
  19. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
  20. package/core/indexing/artifact-builder.js +1 -1
  21. package/core/indexing/dedup/dedup-phase.js +36 -17
  22. package/core/indexing/dedup/exemplar-selector.js +5 -0
  23. package/core/indexing/index-codebase-v21.js +37 -14
  24. package/core/indexing/index-maintainer.mjs +337 -6
  25. package/core/indexing/indexer-ann.js +27 -434
  26. package/core/indexing/indexer-build.js +30 -14
  27. package/core/indexing/indexer-manifest.js +0 -3
  28. package/core/indexing/indexer-phases.js +101 -25
  29. package/core/indexing/maintainer-launcher.mjs +22 -0
  30. package/core/indexing/maintainer-watcher.mjs +397 -0
  31. package/core/indexing/os-priority.mjs +160 -0
  32. package/core/indexing/rss-budget.mjs +425 -0
  33. package/core/indexing/streaming-vectors.js +450 -0
  34. package/core/infrastructure/config/platform.js +14 -10
  35. package/core/infrastructure/onnx-session-utils.js +37 -0
  36. package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
  37. package/core/ranking/late-interaction-index.js +58 -7
  38. package/core/search/daemon-registry.js +199 -0
  39. package/core/search/search-read-semantic.js +9 -3
  40. package/core/search/search-semantic.js +6 -29
  41. package/core/search/search-server.js +527 -27
  42. package/core/search/session-daemon-prewarm.mjs +110 -1
  43. package/core/search/sweet-search.js +0 -38
  44. package/core/vector-store/binary-hnsw-index.js +692 -78
  45. package/core/vector-store/index.js +1 -4
  46. package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
  47. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
  48. package/eval/agent-read-workflows/bin/ss-read +2 -0
  49. package/mcp/tool-handlers.js +1 -2
  50. package/package.json +11 -8
  51. package/scripts/uninstall.js +2 -0
  52. package/core/vector-store/hnsw-index.js +0 -751
@@ -1,23 +1,34 @@
1
1
  import fs from 'node:fs';
2
+ import os from 'node:os';
2
3
  import path from 'node:path';
3
4
  import { createHash } from 'node:crypto';
4
5
  import Database from 'better-sqlite3';
5
6
 
6
7
  import { Reconciler } from './reconciler.mjs';
7
- import { enqueueMaintenanceJob } from './maintenance-worker.mjs';
8
+ import { enqueueMaintenanceJob, readMaintenanceQueue } from './maintenance-worker.mjs';
8
9
  import { createAdmissionPolicy } from '../../indexing/admission-policy.js';
9
10
  import { applyIndexingChunkPolicy } from '../../indexing/indexing-file-policy.js';
10
11
  import { contentHashSync } from '../infrastructure/hashing.mjs';
11
12
  import { readManifest, writeManifest } from '../infrastructure/manifest.mjs';
12
13
  import { annotateChunksForDelta, snapshotFileRows, diffChunks, applyDiff } from '../infrastructure/vector-delta-writer.mjs';
13
14
  import { appendDeltaRecord, FALLBACK_WEIGHTS_ID, fileIdFor, listDeltaSegments } from '../infrastructure/sparse-gram-delta.mjs';
14
- import { fts5Merge } from '../infrastructure/sqlite-fts5.mjs';
15
- import { insertEntity, insertRelationships, markBinaryStale, maintainFloatStore } from './production-reconciler-helpers.mjs';
15
+ import { fts5Merge, fts5MergeBudgetPages } from '../infrastructure/sqlite-fts5.mjs';
16
+ import { insertEntity, insertRelationships, markBinaryStale, maintainFloatStore, flushFloatStore } from './production-reconciler-helpers.mjs';
17
+ import {
18
+ chunkCutoffEnabled,
19
+ computeCutoffSignature,
20
+ signaturesMatch,
21
+ loadCutoffCache,
22
+ getFileSignature,
23
+ setFileSignature,
24
+ deleteFileSignature,
25
+ saveCutoffCache,
26
+ } from '../domain/cutoff-cache.mjs';
27
+ import { FloatVectorStore, getFloatStorePath } from '../../vector-store/float-vector-store.js';
16
28
  import { createGraphSchema, GraphExtractor } from '../../graph/graph-extractor.js';
17
29
  import { createVectorSchema, ensureVectorSchema, buildInsertItems, insertVectorItems } from '../../indexing/indexer-build.js';
18
30
  import { ASTChunker, JAVA_FAMILY } from '../../indexing/ast-chunker.js';
19
31
  import { getEmbeddings, getModelInfo } from '../../embedding/embedding-service.js';
20
- import { HNSWIndex } from '../../vector-store/hnsw-index.js';
21
32
  import { BinaryHNSWIndex } from '../../vector-store/binary-hnsw-index.js';
22
33
  import { floatToBinary, normalizedFloatToInt8, truncateForHNSW } from '../../infrastructure/quantization.js';
23
34
  import { extractSparseGramDeltaRecord } from '../../infrastructure/native-sparse-gram.js';
@@ -29,6 +40,149 @@ const PROCESSING_QUEUE = 'index-maintainer-queue.processing.jsonl';
29
40
  const MERKLE_STATE = 'merkle-state.json';
30
41
  const METRICS_FILE = 'reconcile-metrics.jsonl';
31
42
 
43
+ // ---- G2 lever flags ----------------------------------------------------------
44
+ // `flagOn` = strict opt-in (`'1'`), used by the levers that remain DEFAULT-OFF
45
+ // (a trade or unvalidated). `flagDefaultOn` = default-on (ON unless explicitly
46
+ // '0'), used by the PROVEN-safe levers (recall-neutral / byte-identical / soak
47
+ // == baseline). Disable any default-on lever with `=0`.
48
+ const flagOn = (name) => process.env[name] === '1';
49
+ const flagDefaultOn = (name) => process.env[name] !== '0';
50
+ // DEFAULT-ON (verified safe): batch tier writes (byte-identical with det-levels),
51
+ // SQLite memory pragmas (footprint-only), budget-derived FTS5 merge (CPU-budget
52
+ // adaptive, recall-neutral). Disable with the matching env var = '0'.
53
+ const batchTierWritesEnabled = () => flagDefaultOn('SWEET_SEARCH_RECONCILE_BATCH_TIER_WRITES');
54
+ const sqlitePragmasEnabled = () => flagDefaultOn('SWEET_SEARCH_RECONCILE_SQLITE_PRAGMAS');
55
+ const fts5BudgetEnabled = () => flagDefaultOn('SWEET_SEARCH_RECONCILE_FTS5_BUDGET');
56
+ // DEFAULT-OFF (freshness trade — defers HNSW disk saves so on-disk lags the live
57
+ // graph): keep strict opt-in.
58
+ const liveHnswEnabled = () => flagOn('SWEET_SEARCH_RECONCILE_LIVE_HNSW');
59
+
60
+ const BATCH_FLAG = 'SWEET_SEARCH_RECONCILE_BATCH_TIER_WRITES';
61
+ const DET_LEVELS_FLAG = 'SWEET_SEARCH_HNSW_DETERMINISTIC_LEVELS';
62
+
63
+ // One-time-warning latch so the forced-on notice is emitted ONCE per process,
64
+ // not on every tick (createProductionReconciler runs per tick in the daemon).
65
+ let _batchForcedDetLevelsWarned = false;
66
+
67
+ /**
68
+ * Couple the two HNSW-determinism levers so the batch lever can never silently
69
+ * produce a non-byte-identical graph.
70
+ *
71
+ * E.1 batching (`SWEET_SEARCH_RECONCILE_BATCH_TIER_WRITES`) only yields a graph
72
+ * byte-identical to the per-file / compaction paths when per-id deterministic
73
+ * levels (`SWEET_SEARCH_HNSW_DETERMINISTIC_LEVELS`) are ON — see plan §0.5.
74
+ * `binary-hnsw-index.js` reads the det-levels env var directly at insert time,
75
+ * so running batch WITHOUT det-levels is a footgun: the batched tick draws the
76
+ * global RNG in a different interleaving than a per-file run and the resulting
77
+ * graph diverges. These two flags are coupled here.
78
+ *
79
+ * BOTH levers are now DEFAULT-ON (`!== '0'`). "batch effectively ON" therefore
80
+ * means the batch flag is unset (default) OR '1' — i.e. `BATCH_FLAG !== '0'`.
81
+ * In the normal case (both unset / default-on) det-levels is ALREADY effectively
82
+ * ON, so this is a no-op; no env mutation is needed and binary-hnsw-index.js
83
+ * sees det-levels on via its own `!== '0'` gate. The ONE case that must still
84
+ * fail loudly is the explicit contradiction: batch effectively ON while
85
+ * det-levels is EXPLICITLY '0'.
86
+ *
87
+ * Runs per tick from `createProductionReconciler` (which the daemon constructs
88
+ * each tick), so the daemon is covered without touching index-maintainer.mjs.
89
+ *
90
+ * - batch effectively ON + det-levels EXPLICITLY '0' → throw (explicit
91
+ * contradiction: byte-identity requested via batch, but levels forced
92
+ * non-deterministic).
93
+ * - batch effectively ON + det-levels default/unset/'1' → no-op (det-levels is
94
+ * default-on, so the batched graph already stays byte-identical).
95
+ * - batch EXPLICITLY OFF ('0') → no-op (the per-file path is taken; det-levels
96
+ * is independently meaningful and we never touch it).
97
+ *
98
+ * @param {{warn?:Function}} [logger] optional logger (reserved; the default-on
99
+ * defaults make the force-on warning path unreachable, but the signature is
100
+ * preserved for callers + tests).
101
+ */
102
+ export function normalizeHnswDeterminismFlags(logger = null) {
103
+ void logger;
104
+ // batch is DEFAULT-ON: it is effectively OFF only when explicitly '0'.
105
+ if (process.env[BATCH_FLAG] === '0') return;
106
+ const det = process.env[DET_LEVELS_FLAG];
107
+ // det-levels is also DEFAULT-ON: only an EXPLICIT '0' is the contradiction.
108
+ if (det === '0') {
109
+ throw new Error(
110
+ `${BATCH_FLAG} is enabled (default-on) but requires ${DET_LEVELS_FLAG} to be `
111
+ + `ON for a byte-identical HNSW graph — ${DET_LEVELS_FLAG} is explicitly '0'. `
112
+ + `These are contradictory: batch tier writes only converge with the `
113
+ + `per-file and compaction build paths when per-id deterministic levels are `
114
+ + `enabled (see INDEX_MAINTAINER_EFFICIENCY_IMPLEMENTATION_PLAN §0.5). Either `
115
+ + `leave ${DET_LEVELS_FLAG} default-on (omit it or set it to '1') or disable `
116
+ + `the batch lever with ${BATCH_FLAG}=0.`,
117
+ );
118
+ }
119
+ // det-levels is default-on (unset) or explicitly '1' → already coupled
120
+ // correctly; no env mutation needed (binary-hnsw-index.js reads its own
121
+ // `!== '0'` gate). The legacy force-on warning latch is retained only to keep
122
+ // its symbol stable for any importer.
123
+ void _batchForcedDetLevelsWarned;
124
+ }
125
+
126
+ // E.2: deletion-fraction threshold + insert cadence for the live (daemon-scoped)
127
+ // HNSW. Save to disk only on graceful shutdown, deletion-fraction >= this, or
128
+ // every N inserts.
129
+ const LIVE_HNSW_DELETION_FRACTION = Number.parseFloat(process.env.SWEET_SEARCH_RECONCILE_LIVE_HNSW_DELETE_FRAC || '0.15');
130
+ const LIVE_HNSW_SAVE_EVERY_INSERTS = Number.parseInt(process.env.SWEET_SEARCH_RECONCILE_LIVE_HNSW_SAVE_EVERY || '2000', 10);
131
+
132
+ /**
133
+ * E.4 SQLite memory pragmas, applied AFTER `journal_mode=WAL; synchronous=NORMAL`
134
+ * on a write connection. `cache_size=-32768` caps the per-connection page cache
135
+ * at ~32 MiB. `soft_heap_limit` is process-global and is set ONCE at daemon
136
+ * startup by G4 (index-maintainer) — NOT here — to avoid every connection
137
+ * re-setting a process-wide knob. With E.1 these attach to the tick-scoped
138
+ * connection so the cache is meaningful; without E.1 the per-file conn churn
139
+ * makes them near no-ops (documented, not a bug).
140
+ *
141
+ * Gated on `SWEET_SEARCH_RECONCILE_SQLITE_PRAGMAS`; off ⇒ unchanged behavior.
142
+ *
143
+ * @param {import('better-sqlite3').Database} db
144
+ * @param {{readonly?: boolean}} [opts]
145
+ */
146
+ function applyMemoryPragmas(db, { readonly = false } = {}) {
147
+ if (!sqlitePragmasEnabled()) return;
148
+ try { db.pragma('cache_size = -32768'); } catch {}
149
+ // mmap_size only on readonly maintainer conns (negligible benefit on the JS
150
+ // side — the user search path is native Rust — but harmless; matches the doc).
151
+ if (readonly) {
152
+ try { db.pragma('mmap_size = 268435456'); } catch {}
153
+ }
154
+ }
155
+
156
+ /**
157
+ * E.2: process-scoped (daemon-scoped) live store registry. The index-maintainer
158
+ * daemon runs many ticks in one process, each constructing a fresh
159
+ * `createProductionReconciler`; a module-level registry keyed by the resolved
160
+ * state dir lets the resident HNSW + float store survive across those
161
+ * per-tick adapter instances when `SWEET_SEARCH_RECONCILE_LIVE_HNSW` is on.
162
+ *
163
+ * Each entry: { index: BinaryHNSWIndex, floatStore: FloatVectorStore,
164
+ * insertsSinceSave, deletedCount, totalCount, dirty }
165
+ */
166
+ const liveStoreRegistry = new Map();
167
+
168
+ /**
169
+ * Release all live stores, saving any that are dirty. Called on graceful daemon
170
+ * shutdown (G4 wires the call; exposed here for tests + the disposeTick path).
171
+ */
172
+ export async function shutdownLiveStores() {
173
+ for (const [key, entry] of liveStoreRegistry) {
174
+ try {
175
+ if (entry.dirty && entry.index) {
176
+ await entry.index.save(entry.indexPath);
177
+ if (entry.floatStore && entry.floatStore.loaded) {
178
+ await entry.floatStore.save(getFloatStorePath(entry.indexPath));
179
+ }
180
+ }
181
+ } catch { /* best-effort flush on shutdown */ }
182
+ liveStoreRegistry.delete(key);
183
+ }
184
+ }
185
+
32
186
  function relPath(projectRoot, filePath) {
33
187
  const abs = path.isAbsolute(filePath) ? filePath : path.join(projectRoot, filePath);
34
188
  const rel = path.relative(projectRoot, abs).replace(/\\/g, '/');
@@ -100,10 +254,21 @@ function pickLiInput(chunk) {
100
254
  return chunk.li_greedy_text || chunk.embedding_text || chunk.li_text || chunk.text || chunk.content || '';
101
255
  }
102
256
 
103
- async function enrichChunksFromGraph(chunks, stateDir) {
257
+ async function enrichChunksFromGraph(chunks, stateDir, tickCtx = null) {
104
258
  const dbPath = path.join(stateDir, 'code-graph.db');
105
- if (!fs.existsSync(dbPath) || chunks.length === 0) return chunks;
106
- const db = new Database(dbPath, { readonly: true });
259
+ if (chunks.length === 0) return chunks;
260
+ // E.1: reuse the tick-scoped readonly connection when batching; else open a
261
+ // per-file readonly connection exactly as before.
262
+ let db;
263
+ let ownConn = false;
264
+ if (tickCtx?.graphRoDb) {
265
+ db = tickCtx.graphRoDb;
266
+ } else {
267
+ if (!fs.existsSync(dbPath)) return chunks;
268
+ db = new Database(dbPath, { readonly: true });
269
+ applyMemoryPragmas(db, { readonly: true });
270
+ ownConn = true;
271
+ }
107
272
  try {
108
273
  const entityStmt = db.prepare('SELECT type, name, start_line, end_line FROM entities WHERE file_path = ? AND epoch_retired IS NULL ORDER BY start_line ASC');
109
274
  const fileEntityStmt = db.prepare('SELECT id FROM entities WHERE file_path = ? AND logical_entity_id = ? AND epoch_retired IS NULL ORDER BY epoch_written DESC LIMIT 1');
@@ -125,15 +290,35 @@ async function enrichChunksFromGraph(chunks, stateDir) {
125
290
  } catch {
126
291
  return chunks;
127
292
  } finally {
128
- db.close();
293
+ if (ownConn) db.close();
129
294
  }
130
295
  return chunks;
131
296
  }
132
297
 
133
298
  export function createProductionReconciler(options = {}) {
299
+ // Couple the batch + det-levels flags FIRST (before the adapter is built and
300
+ // before any tier write), so binary-hnsw-index.js — which reads
301
+ // SWEET_SEARCH_HNSW_DETERMINISTIC_LEVELS at insert time — transparently sees
302
+ // the forced value whenever batch is on. Throws on the explicit contradiction
303
+ // (batch=1 + det-levels=0). See `normalizeHnswDeterminismFlags`.
304
+ normalizeHnswDeterminismFlags(options.logger);
134
305
  const projectRoot = path.resolve(options.projectRoot || process.env.SWEET_SEARCH_PROJECT_ROOT || process.cwd());
135
306
  const stateDir = path.resolve(options.stateDir || process.env.SWEET_SEARCH_STATE_DIR || path.join(projectRoot, '.sweet-search'));
136
307
  const adapter = new ProductionReconcileAdapter({ ...options, projectRoot, stateDir });
308
+ // A.4-config: feed the interval-autotune a real load signal. This is ONLY the
309
+ // config half — the daemon (G4) reads the tuned interval back into its sleep
310
+ // loop. DEFAULT-ON (disable with SWEET_SEARCH_RECONCILE_AUTOTUNE=0): when off,
311
+ // `autotuneInterval` stays false and the reconciler never re-tunes (today's
312
+ // fixed-interval behavior). Verified recall-neutral + soak == baseline.
313
+ const autotuneOn = flagDefaultOn('SWEET_SEARCH_RECONCILE_AUTOTUNE');
314
+ const cpuCount = Math.max(1, os.cpus().length);
315
+ const autotuneConfig = autotuneOn
316
+ ? {
317
+ autotuneInterval: true,
318
+ cpuLoadAvg: os.loadavg()[0] / cpuCount,
319
+ maintenanceBacklog: adapter.maintenanceBacklog(),
320
+ }
321
+ : {};
137
322
  return new Reconciler({
138
323
  projectRoot,
139
324
  stateDir,
@@ -142,6 +327,7 @@ export function createProductionReconciler(options = {}) {
142
327
  config: {
143
328
  filesPerTick: Number.parseInt(process.env.SWEET_SEARCH_RECONCILE_FILES_PER_TICK || '50', 10),
144
329
  cpuBudgetMs: Number.parseInt(process.env.SWEET_SEARCH_RECONCILE_CPU_BUDGET_MS || '2000', 10),
330
+ ...autotuneConfig,
145
331
  ...(options.config || {}),
146
332
  },
147
333
  logger: options.logger || console,
@@ -184,28 +370,311 @@ class ProductionReconcileAdapter {
184
370
  this._liSkipFiles = new Set();
185
371
  this.hashes = new Map();
186
372
  this.touched = new Map();
373
+ // E.1: the active tick-scoped store context (null on the per-file path).
374
+ this._tickCtx = null;
375
+ // E.6: chunk-cutoff cache — loaded lazily at tick begin / first vector delta
376
+ // when the flag is on; null when disabled.
377
+ this._cutoffCache = null;
378
+ this._cutoffDirty = false;
187
379
  }
188
380
 
189
381
  progress(phase) {
190
382
  this.onProgress?.(phase);
191
383
  }
192
384
 
385
+ /**
386
+ * A.4-config: a coarse maintenance-backlog signal for the interval autotune —
387
+ * the depth of the rebuild queue. Best-effort; never throws.
388
+ * @returns {number}
389
+ */
390
+ maintenanceBacklog() {
391
+ try { return readMaintenanceQueue(this.stateDir).length; } catch { return 0; }
392
+ }
393
+
193
394
  adapters() {
194
- return {
395
+ const hooks = {
195
396
  readDirtySet: () => this.readDirtySet(),
196
397
  requeueDirtyFiles: (files) => this.requeueDirtyFiles(files),
197
398
  hashFile: (file) => this.hashFile(file),
198
399
  loadCurrentManifest: () => readManifest(this.stateDir),
199
400
  persistManifest: (manifest) => this.persistManifest(manifest),
200
- applyGraphDelta: (file, hashes, epoch) => this.applyGraphDelta(file, hashes, epoch),
201
- applyVectorDelta: (file, chunks, hashes, epoch) => this.applyVectorDelta(file, chunks, hashes, epoch),
202
- applyHNSWDelta: (file, ops, epoch) => this.applyHNSWDelta(file, ops, epoch),
203
- applyBinaryHNSWDelta: (file, ops, epoch) => this.applyBinaryHNSWDelta(file, ops, epoch),
401
+ applyGraphDelta: (file, hashes, epoch, ctx) => this.applyGraphDelta(file, hashes, epoch, ctx),
402
+ applyVectorDelta: (file, chunks, hashes, epoch, ctx) => this.applyVectorDelta(file, chunks, hashes, epoch, ctx),
403
+ applyBinaryHNSWDelta: (file, ops, epoch, ctx) => this.applyBinaryHNSWDelta(file, ops, epoch, ctx),
204
404
  applyLIDelta: (file, ops, epoch) => this.applyLIDelta(file, ops, epoch),
205
405
  applySparseGramDelta: (file, ops, epoch) => this.applySparseGramDelta(file, ops, epoch),
206
406
  readMaintenanceState: () => this.readMaintenanceState(),
207
407
  scheduleMaintenance: (job) => enqueueMaintenanceJob(this.stateDir, job),
208
408
  };
409
+ // E.1: expose the batch lifecycle hooks ONLY when the flag is on, so the
410
+ // reconciler's `_batchTierWritesEnabled()` gate (which checks for the hooks)
411
+ // stays false by default and the per-file path is taken verbatim.
412
+ if (batchTierWritesEnabled()) {
413
+ hooks.beginTick = (info) => this.beginTick(info);
414
+ hooks.finalizeTick = (ctx, info) => this.finalizeTick(ctx, info);
415
+ hooks.disposeTick = (ctx) => this.disposeTick(ctx);
416
+ }
417
+ return hooks;
418
+ }
419
+
420
+ // ---- E.1/E.2 tick-scoped store context -----------------------------------
421
+
422
+ /**
423
+ * E.1: open the tick-scoped store context once at tick start. Opens RW
424
+ * `codebase.db` + `code-graph.db`, a RO `code-graph.db` for enrichment, loads
425
+ * the binary HNSW + float store once, and primes the cutoff cache (E.6).
426
+ *
427
+ * With E.2 (`SWEET_SEARCH_RECONCILE_LIVE_HNSW`) the HNSW + float store come
428
+ * from the daemon-scoped registry (loaded once, kept resident across ticks);
429
+ * otherwise they are loaded fresh and saved+closed at finalize.
430
+ */
431
+ async beginTick() {
432
+ const codebaseDbPath = path.join(this.stateDir, 'codebase.db');
433
+ const graphDbPath = path.join(this.stateDir, 'code-graph.db');
434
+ const indexPath = path.join(this.stateDir, 'codebase-binary-hnsw.idx');
435
+ fs.mkdirSync(this.stateDir, { recursive: true });
436
+
437
+ const codebaseExisted = fs.existsSync(codebaseDbPath);
438
+ const codebaseDb = new Database(codebaseDbPath);
439
+ codebaseDb.pragma('journal_mode = WAL');
440
+ codebaseDb.pragma('synchronous = NORMAL');
441
+ applyMemoryPragmas(codebaseDb);
442
+ codebaseExisted ? ensureVectorSchema(codebaseDb) : createVectorSchema(codebaseDb);
443
+
444
+ const graphDb = new Database(graphDbPath);
445
+ graphDb.pragma('journal_mode = WAL');
446
+ graphDb.pragma('synchronous = NORMAL');
447
+ applyMemoryPragmas(graphDb);
448
+ const graphHasFts = createGraphSchema(graphDb);
449
+ migrateEntitiesSchema(graphDb);
450
+ migrateRelationshipsSchema(graphDb);
451
+
452
+ // Enrichment reads must observe THIS tick's graph writes. Because the
453
+ // batched path defers the SQLite COMMIT to finalize (persist-before-advance,
454
+ // see below), a SEPARATE readonly connection in WAL mode would NOT see the
455
+ // uncommitted in-tick graph rows. So enrichment reads from the SAME RW
456
+ // connection (`graphDb`) — a connection always sees its own uncommitted
457
+ // writes — preserving per-file enrichment semantics inside one tick.
458
+ const graphRoDb = graphDb;
459
+
460
+ // Resident HNSW + float store (E.1 load-once; E.2 daemon-scoped singleton).
461
+ const live = liveHnswEnabled() ? this._getLiveStore(indexPath) : null;
462
+ let index = live?.index || null;
463
+ let floatStore = live?.floatStore || null;
464
+ if (!index) {
465
+ index = new BinaryHNSWIndex({ indexPath, stalePath: `${indexPath}.stale.bin`, floatDimension: this.modelInfo.hnswDimension });
466
+ try { await index.load(indexPath); } catch { await index.init(); }
467
+ }
468
+ const binaryVectorsBefore = index.idToIndex?.size ?? 0;
469
+ if (!floatStore) {
470
+ floatStore = new FloatVectorStore();
471
+ try { await floatStore.loadOrInit(getFloatStorePath(indexPath), this.modelInfo.hnswDimension); } catch { /* fall back to fresh */ }
472
+ }
473
+
474
+ if (chunkCutoffEnabled() && !this._cutoffCache) {
475
+ this._cutoffCache = loadCutoffCache(this.stateDir);
476
+ this._cutoffDirty = false;
477
+ }
478
+
479
+ // E.1 PERSIST-BEFORE-ADVANCE: defer the SQLite COMMIT to finalizeTick. Open
480
+ // an explicit outer transaction on each RW connection now; the per-file
481
+ // `db.transaction(fn)()` calls inside apply*Delta then run as SAVEPOINTs
482
+ // (better-sqlite3 nests automatically) and only become durable when
483
+ // finalizeTick COMMITs — which it does ONLY after the HNSW + float batch
484
+ // save fsyncs. A crash/throw before that point rolls the whole tick's SQLite
485
+ // writes back, so a restart re-reconciles from a consistent prior state and
486
+ // can never leave a SQLite-live row missing from the HNSW.
487
+ codebaseDb.exec('BEGIN');
488
+ graphDb.exec('BEGIN');
489
+
490
+ const ctx = {
491
+ indexPath,
492
+ tickStartMs: Date.now(),
493
+ txOpen: true,
494
+ codebaseDb,
495
+ graphDb,
496
+ graphRoDb,
497
+ graphHasFts,
498
+ index,
499
+ floatStore,
500
+ binaryVectorsBefore,
501
+ live: !!live,
502
+ // Accumulated across the tick:
503
+ floatUpserts: [],
504
+ floatRemoveIds: [],
505
+ append: 0,
506
+ tombstone: 0,
507
+ // Files whose ops are staged in this batch (provisional → promoted to
508
+ // merkle only after finalize fsyncs).
509
+ persistedFiles: new Set(),
510
+ pendingAdds: [],
511
+ };
512
+ this._tickCtx = ctx;
513
+ this._lastPersistedFiles = ctx.persistedFiles;
514
+ return ctx;
515
+ }
516
+
517
+ /**
518
+ * E.2: fetch (or lazily create) the daemon-scoped resident store entry.
519
+ */
520
+ _getLiveStore(indexPath) {
521
+ let entry = liveStoreRegistry.get(indexPath);
522
+ if (!entry) {
523
+ entry = {
524
+ indexPath,
525
+ index: null,
526
+ floatStore: null,
527
+ insertsSinceSave: 0,
528
+ deletedCount: 0,
529
+ totalCount: 0,
530
+ dirty: false,
531
+ loadPromise: null,
532
+ };
533
+ liveStoreRegistry.set(indexPath, entry);
534
+ }
535
+ return entry;
536
+ }
537
+
538
+ /**
539
+ * E.1 PERSIST-BEFORE-ADVANCE: save the batched HNSW + float store once, fsync,
540
+ * then (per E.4) shrink_memory + wal_checkpoint(PASSIVE) on the tick-scoped
541
+ * connections. Returns the set of files whose ops are now persisted so the
542
+ * manifest publish only promotes those into the merkle.
543
+ *
544
+ * With E.2 the resident index is NOT saved every tick — only on a deletion
545
+ * fraction >= threshold, every N inserts, or graceful shutdown. The SQLite
546
+ * tiers always fsync here; the merkle then advances for files whose vector
547
+ * rows landed (HNSW reconverges from those rows on the next save / restart).
548
+ */
549
+ async finalizeTick(ctx) {
550
+ if (!ctx) return { persistedFiles: new Set(), requeueFiles: [] };
551
+ let hnswSaved = false;
552
+ try {
553
+ // E.1: insert all staged adds into the resident index in a DETERMINISTIC
554
+ // order (sorted by id). Combined with G1's per-id deterministic levels and
555
+ // sorted-order compaction, this makes the batched graph reproducible and
556
+ // byte-identical across batch / rebuild / compaction construction paths.
557
+ const pending = ctx.pendingAdds || [];
558
+ if (pending.length > 0) {
559
+ pending.sort((a, b) => (a.addId < b.addId ? -1 : a.addId > b.addId ? 1 : 0));
560
+ let done = 0;
561
+ for (const op of pending) {
562
+ const truncated = truncateForHNSW(op.embedding, this.modelInfo.hnswDimension);
563
+ await ctx.index.add(op.addId, floatToBinary(truncated), op.metadata || {}, normalizedFloatToInt8(truncated));
564
+ ctx.floatUpserts.push({ id: op.addId, vector: truncated });
565
+ if ((++done) % 100 === 0) this.progress('production:binary-hnsw-loop');
566
+ }
567
+ this.progress('production:binary-hnsw-batched');
568
+ }
569
+ ctx.pendingAdds = [];
570
+
571
+ if (ctx.live) {
572
+ const entry = liveStoreRegistry.get(ctx.indexPath);
573
+ if (entry) {
574
+ entry.index = ctx.index;
575
+ entry.floatStore = ctx.floatStore;
576
+ entry.insertsSinceSave += ctx.append;
577
+ entry.deletedCount += ctx.tombstone;
578
+ entry.totalCount = ctx.index.idToIndex?.size ?? entry.totalCount;
579
+ if (ctx.append > 0 || ctx.tombstone > 0) entry.dirty = true;
580
+ const denom = Math.max(1, entry.totalCount + entry.deletedCount);
581
+ const deletionFraction = entry.deletedCount / denom;
582
+ const shouldSave = deletionFraction >= LIVE_HNSW_DELETION_FRACTION
583
+ || entry.insertsSinceSave >= LIVE_HNSW_SAVE_EVERY_INSERTS;
584
+ if (shouldSave && entry.dirty) {
585
+ await ctx.index.save(ctx.indexPath);
586
+ await flushFloatStore({
587
+ binaryHnswPath: ctx.indexPath,
588
+ store: ctx.floatStore,
589
+ upserts: ctx.floatUpserts,
590
+ removeIds: ctx.floatRemoveIds,
591
+ binaryVectorsBefore: ctx.binaryVectorsBefore,
592
+ dimension: this.modelInfo.hnswDimension,
593
+ });
594
+ entry.insertsSinceSave = 0;
595
+ entry.deletedCount = 0;
596
+ entry.dirty = false;
597
+ hnswSaved = true;
598
+ } else if (ctx.floatUpserts.length || ctx.floatRemoveIds.length) {
599
+ // Keep the float store's in-memory delta consistent with the live
600
+ // index even when we skip the disk save (so a later threshold save
601
+ // writes the full set).
602
+ ctx.floatStore.applyDelta({ upserts: ctx.floatUpserts, removeIds: ctx.floatRemoveIds });
603
+ }
604
+ }
605
+ } else if (ctx.append > 0 || ctx.tombstone > 0) {
606
+ await ctx.index.save(ctx.indexPath);
607
+ this.progress('production:binary-hnsw-saved');
608
+ await flushFloatStore({
609
+ binaryHnswPath: ctx.indexPath,
610
+ store: ctx.floatStore,
611
+ upserts: ctx.floatUpserts,
612
+ removeIds: ctx.floatRemoveIds,
613
+ binaryVectorsBefore: ctx.binaryVectorsBefore,
614
+ dimension: this.modelInfo.hnswDimension,
615
+ });
616
+ this.progress('production:float-store-maintained');
617
+ hnswSaved = true;
618
+ }
619
+ // PERSIST-BEFORE-ADVANCE: the HNSW + float batch has now fsynced (or was
620
+ // intentionally not due-to-save under E.2). COMMIT the SQLite tiers ONLY
621
+ // now, so a crash before this point rolled the SQLite writes back too.
622
+ if (ctx.txOpen) {
623
+ ctx.codebaseDb.exec('COMMIT');
624
+ ctx.graphDb.exec('COMMIT');
625
+ ctx.txOpen = false;
626
+ }
627
+ } catch (err) {
628
+ // HNSW save (or COMMIT) failed → roll back the SQLite tiers so nothing is
629
+ // half-persisted, close connections, and surface the error. The manifest
630
+ // never advances for these files (persistedFiles is dropped) and the
631
+ // processing queue is left in place → the next tick re-reconciles them.
632
+ await this.disposeTick(ctx);
633
+ throw err;
634
+ }
635
+
636
+ // E.4: return cache to the OS + checkpoint the WAL, then close. Safe now that
637
+ // the transaction is committed (a checkpoint inside a write tx is a no-op).
638
+ try {
639
+ if (sqlitePragmasEnabled()) {
640
+ try { ctx.codebaseDb.pragma('shrink_memory'); } catch {}
641
+ try { ctx.graphDb.pragma('shrink_memory'); } catch {}
642
+ }
643
+ try { ctx.codebaseDb.pragma('wal_checkpoint(PASSIVE)'); } catch {}
644
+ try { ctx.graphDb.pragma('wal_checkpoint(PASSIVE)'); } catch {}
645
+ } finally {
646
+ await this.disposeTick(ctx);
647
+ }
648
+ void hnswSaved;
649
+ // Stash for persistManifest (which runs after finalize disposed the ctx):
650
+ // only these files are promoted into the merkle (persist-before-advance).
651
+ this._lastPersistedFiles = ctx.persistedFiles;
652
+ return { persistedFiles: ctx.persistedFiles, requeueFiles: [] };
653
+ }
654
+
655
+ /**
656
+ * Close the tick-scoped connections. Idempotent + best-effort. With E.2 the
657
+ * resident index/float store are NOT closed (they belong to the registry).
658
+ */
659
+ async disposeTick(ctx) {
660
+ if (!ctx) return;
661
+ // Roll back an uncommitted tick transaction (crash/throw before finalize's
662
+ // COMMIT) so the SQLite tiers are left at their prior consistent state.
663
+ if (ctx.txOpen) {
664
+ for (const key of ['codebaseDb', 'graphDb']) {
665
+ try { ctx[key]?.exec('ROLLBACK'); } catch {}
666
+ }
667
+ ctx.txOpen = false;
668
+ }
669
+ // graphRoDb aliases graphDb in the batched path; close each distinct handle
670
+ // once.
671
+ const closed = new Set();
672
+ for (const key of ['codebaseDb', 'graphDb', 'graphRoDb']) {
673
+ const db = ctx[key];
674
+ if (db && !closed.has(db)) { try { db.close(); } catch {} closed.add(db); }
675
+ ctx[key] = null;
676
+ }
677
+ if (this._tickCtx === ctx) this._tickCtx = null;
209
678
  }
210
679
 
211
680
  async readDirtySet() {
@@ -292,16 +761,28 @@ class ProductionReconcileAdapter {
292
761
  return h;
293
762
  }
294
763
 
295
- async applyGraphDelta(file, hashes, epoch) {
764
+ async applyGraphDelta(file, hashes, epoch, ctx = null) {
296
765
  const rel = typeof file === 'string' ? file : file.path;
297
- const dbPath = path.join(this.stateDir, 'code-graph.db');
298
- fs.mkdirSync(this.stateDir, { recursive: true });
299
- const db = new Database(dbPath);
300
- db.pragma('journal_mode = WAL');
301
- db.pragma('synchronous = NORMAL');
302
- const hasFts = createGraphSchema(db);
303
- migrateEntitiesSchema(db);
304
- migrateRelationshipsSchema(db);
766
+ // E.1: reuse the tick-scoped RW connection (schema already ensured) instead
767
+ // of opening + migrating + closing a connection per file.
768
+ let db;
769
+ let hasFts;
770
+ let ownConn = false;
771
+ if (ctx?.graphDb) {
772
+ db = ctx.graphDb;
773
+ hasFts = ctx.graphHasFts;
774
+ } else {
775
+ const dbPath = path.join(this.stateDir, 'code-graph.db');
776
+ fs.mkdirSync(this.stateDir, { recursive: true });
777
+ db = new Database(dbPath);
778
+ db.pragma('journal_mode = WAL');
779
+ db.pragma('synchronous = NORMAL');
780
+ applyMemoryPragmas(db);
781
+ hasFts = createGraphSchema(db);
782
+ migrateEntitiesSchema(db);
783
+ migrateRelationshipsSchema(db);
784
+ ownConn = true;
785
+ }
305
786
  try {
306
787
  const oldRows = db.prepare('SELECT rowid, id, logical_entity_id, signature_hash FROM entities WHERE file_path = ? AND epoch_retired IS NULL').all(rel);
307
788
  const oldByLogical = new Map(oldRows.map((r) => [r.logical_entity_id || r.id, r]));
@@ -361,22 +842,41 @@ class ProductionReconcileAdapter {
361
842
  });
362
843
  tx();
363
844
  this.progress('production:graph-written');
364
- if (hasFts) for (const table of ['entities_fts', 'entities_trigram']) try { fts5Merge(db, table, 16); } catch {}
845
+ // E.5: budget-derived FTS5 merge. When the budget flag is off this is the
846
+ // fixed 16-page merge exactly as before; when on, a busy tick (elapsed >
847
+ // 1800ms) skips the merge to leave CPU for reconcile.
848
+ if (hasFts) {
849
+ const pages = fts5BudgetEnabled()
850
+ ? fts5MergeBudgetPages({ elapsedMs: ctx ? Date.now() - ctx.tickStartMs : 0 })
851
+ : 16;
852
+ if (pages != null) {
853
+ for (const table of ['entities_fts', 'entities_trigram']) try { fts5Merge(db, table, pages); } catch {}
854
+ }
855
+ }
365
856
  this.touched.set(rel, { ...(this.touched.get(rel) || {}), graphEntities: entities.length });
366
857
  return { ops: { graph_upsert: upsert, graph_tombstone: tombstone }, manifest: { path: 'code-graph.db' } };
367
858
  } finally {
368
- db.close();
859
+ if (ownConn) db.close();
369
860
  }
370
861
  }
371
862
 
372
- async applyVectorDelta(file, _chunks, hashes, epoch) {
863
+ async applyVectorDelta(file, _chunks, hashes, epoch, ctx = null) {
373
864
  const rel = typeof file === 'string' ? file : file.path;
374
- const dbPath = path.join(this.stateDir, 'codebase.db');
375
- const existed = fs.existsSync(dbPath);
376
- const db = new Database(dbPath);
377
- db.pragma('journal_mode = WAL');
378
- db.pragma('synchronous = NORMAL');
379
- existed ? ensureVectorSchema(db) : createVectorSchema(db);
865
+ // E.1: reuse the tick-scoped RW connection (schema already ensured).
866
+ let db;
867
+ let ownConn = false;
868
+ if (ctx?.codebaseDb) {
869
+ db = ctx.codebaseDb;
870
+ } else {
871
+ const dbPath = path.join(this.stateDir, 'codebase.db');
872
+ const existed = fs.existsSync(dbPath);
873
+ db = new Database(dbPath);
874
+ db.pragma('journal_mode = WAL');
875
+ db.pragma('synchronous = NORMAL');
876
+ applyMemoryPragmas(db);
877
+ existed ? ensureVectorSchema(db) : createVectorSchema(db);
878
+ ownConn = true;
879
+ }
380
880
  const vectorOps = [];
381
881
  let chunks = [];
382
882
  try {
@@ -386,12 +886,57 @@ class ProductionReconcileAdapter {
386
886
  const summary = retire();
387
887
  const retired = summary.retiredRows.map((r) => ({ retireId: r.oldId, file: rel }));
388
888
  this.touched.set(rel, { ...(this.touched.get(rel) || {}), hash: hashes, chunkIds: [] });
889
+ // E.6: drop a deleted file's cutoff signature.
890
+ if (this._cutoffCache) { deleteFileSignature(this._cutoffCache, rel); this._cutoffDirty = true; }
389
891
  return { ops: { vectors_delete: summary.retiredRows.length }, vectorOps: retired, tokenOps: retired, gramOps: [{ file: rel, deleted: true }] };
390
892
  }
391
893
  const parsed = await new ASTChunker({ projectRoot: this.projectRoot }).parseFile(rel, hashes.content);
392
894
  this.progress('production:vector-parsed');
393
- chunks = await enrichChunksFromGraph(parsed.map((chunk, i) => ({ ...chunk, file: rel, id: `${rel}:${chunk.metadata?.line_start || 0}-${chunk.metadata?.line_end || chunk.metadata?.line_start || 0}:${i}` })), this.stateDir);
895
+ chunks = await enrichChunksFromGraph(parsed.map((chunk, i) => ({ ...chunk, file: rel, id: `${rel}:${chunk.metadata?.line_start || 0}-${chunk.metadata?.line_end || chunk.metadata?.line_start || 0}:${i}` })), this.stateDir, ctx);
394
896
  this.progress('production:vector-enriched');
897
+ // E.6 chunk-hash early-cutoff. The signature is the per-chunk encoder-input
898
+ // hashes (embedding_input_hash + li_input_hash) computed from the ENRICHED
899
+ // chunks — so cross-file enrichment (scope/imports injected above) folds in.
900
+ // If the file changed on disk but produces byte-identical encoder inputs
901
+ // (comment-only / reformat edits, or a dependency change that does NOT
902
+ // alter this file's enriched text), the encode + all tier writes are
903
+ // skipped. CORRECTNESS GATE: keyed ONLY on encoder-input hashes, never on
904
+ // the file's own chunk_text_hash / contentUnchanged.
905
+ if (chunkCutoffEnabled()) {
906
+ if (!this._cutoffCache) { this._cutoffCache = loadCutoffCache(this.stateDir); this._cutoffDirty = false; }
907
+ const signature = computeCutoffSignature(chunks);
908
+ const previous = getFileSignature(this._cutoffCache, rel);
909
+ if (signaturesMatch(previous, signature)) {
910
+ this.progress('production:vector-cutoff-skip');
911
+ // Provisional touched entry: keep the file's existing chunkIds so the
912
+ // merkle hash advances (the encoder inputs are unchanged) without any
913
+ // tier write. The merkle still records the new content hash so the
914
+ // file is not re-queued forever.
915
+ const prevTouched = this.touched.get(rel) || {};
916
+ const prevChunkIds = readJson(path.join(this.stateDir, MERKLE_STATE), { files: {} }).files?.[rel]?.chunkIds || prevTouched.chunkIds || [];
917
+ this.touched.set(rel, { ...prevTouched, hash: hashes, chunkIds: prevChunkIds, content: hashes.content });
918
+ if (ctx) ctx.persistedFiles.add(rel);
919
+ return {
920
+ ops: { vectors_upsert: 0, vectors_delete: 0 },
921
+ chunksTotal: chunks.length,
922
+ chunksEncoded: 0,
923
+ chunksReused: chunks.length,
924
+ chunksMetadataDirty: 0,
925
+ skipped: true,
926
+ vectorOps: [],
927
+ tokenOps: [],
928
+ // E.6 cutoff skips the EXPENSIVE encode + dense/HNSW/LI/graph tiers
929
+ // (encoder inputs are byte-identical), but sparse-grams are derived
930
+ // from RAW file content — a comment/reformat edit, or a change in an
931
+ // un-chunked region, leaves encoder inputs identical yet changes the
932
+ // raw content, so ss-grep/regex would go STALE if we dropped the
933
+ // sparse delta here. Always emit the content-derived sparse-gram delta
934
+ // on cutoff; the reconciler applies ONLY this tier when `skipped`.
935
+ gramOps: [{ file: rel, deleted: false, content: hashes.content, contentHash: hashes.contentHash }],
936
+ manifest: { path: 'codebase.db' },
937
+ };
938
+ }
939
+ }
395
940
  // LI generated-content parity: decide ONCE, from the file's full chunk set
396
941
  // (exactly like full indexing's per-file applyIndexingChunkPolicy), whether
397
942
  // late interaction skips this file. Embeddings/graph/sparse still index it.
@@ -437,7 +982,89 @@ class ProductionReconcileAdapter {
437
982
  const reused = delta.toReuse.find((item) => item.ann?.chunkStructId === row.chunkStructId);
438
983
  if (reused?.chunk) tokenOps.push({ addId: row.newId, chunk: reused.chunk });
439
984
  }
440
- this.touched.set(rel, { ...(this.touched.get(rel) || {}), hash: hashes, chunkIds: newIds, content: hashes.content });
985
+
986
+ // CRASH-CONSISTENCY (default per-file path durability). On this path the
987
+ // SQLite vector COMMIT above (`production:vector-written`) lands BEFORE the
988
+ // dependent tiers (HNSW, then LI) are persisted, in separate adapter calls
989
+ // with no shared transaction. A SIGKILL between them leaves the vector rows
990
+ // DURABLE in codebase.db while the HNSW node + LI doc were never written —
991
+ // and the merkle did NOT advance (persistManifest never ran). On the next
992
+ // tick the file is re-reconciled, but its committed rows now hash-MATCH an
993
+ // EXACT reuse in diffChunks → zero re-encode → zero add ops → the chunk is
994
+ // QUERYABLE via FTS/SQLite yet permanently MISSING from the HNSW vector
995
+ // index AND the LI index (a real correctness hole: vector / late-interaction
996
+ // search silently never return it). Verified by the determinism harness
997
+ // `--kill-after-tick 2`: live HNSW + LI diverge (the new gamma.js chunk).
998
+ //
999
+ // FIX (minimal persist-before-advance for the per-file path): the published
1000
+ // merkle epoch is the advance authority — persistManifest only advances
1001
+ // `merkle.epoch` AFTER a tick wrote its downstream tiers, so any LIVE vector
1002
+ // row whose `epoch_written` exceeds the highest published merkle epoch was
1003
+ // committed by a tick that never published: a torn post-crash row whose
1004
+ // HNSW node / LI doc may be missing. Re-emit a repair op for each such row
1005
+ // under its EXISTING id (so the recovered index is identical to a clean
1006
+ // run, not a fresh re-encode id):
1007
+ // - HNSW: an ADD op. `index.add` on an already-present id updates in place
1008
+ // (no duplicate node, no graph mutation) → no-op when the node landed,
1009
+ // repair when it didn't.
1010
+ // - LI: a RETIRE+ADD pair. LI `add` appends to a fresh segment and is NOT
1011
+ // idempotent, so we tombstone any existing doc for the id first; the
1012
+ // add then yields exactly one live doc whether or not the doc had
1013
+ // landed. (Retiring a non-existent LI doc is a no-op.)
1014
+ //
1015
+ // The discriminator is exact and never fires on the happy path: a
1016
+ // successful tick always advances `merkle.epoch` to the epoch it wrote its
1017
+ // rows at, so on a non-crash tick every prior live row has
1018
+ // `epoch_written <= merkle.epoch` and the rows written THIS tick are already
1019
+ // in `newIds` (added above), never re-added here. Runs in BOTH the per-file
1020
+ // and the E.1 batched path: the batched path never CREATES a torn row (its
1021
+ // deferred COMMIT + merkle gating rolls a crashed batch back), but a LEGACY
1022
+ // orphan from a pre-batched per-file crash can still exist on disk when the
1023
+ // daemon later runs batched, so the repair must heal it under either mode.
1024
+ // In the batched path the repair ops join `vectorOps`/`tokenOps`, flow
1025
+ // through the tick-scoped adapters into `ctx.pendingAdds`, and are saved by
1026
+ // `finalizeTick` — and `db` reads see the committed orphan (prior-tick) row.
1027
+ // Byte-diff/behaviour on non-crash runs is UNCHANGED in both modes (no
1028
+ // orphan ⇒ no-op; verified by the determinism harness control + sweeps).
1029
+ const repairedIds = [];
1030
+ if (snap.size > 0) {
1031
+ const publishedEpoch = this._publishedEpoch();
1032
+ const alreadyAdded = new Set(newIds);
1033
+ const chunkByStructId = new Map();
1034
+ for (let i = 0; i < chunks.length; i += 1) {
1035
+ const sid = annotations[i]?.chunkStructId;
1036
+ if (sid != null) chunkByStructId.set(sid, chunks[i]);
1037
+ }
1038
+ for (const row of snap.values()) {
1039
+ if (row.epoch_retired != null) continue;
1040
+ if (!Number.isInteger(row.epoch_written) || row.epoch_written <= publishedEpoch) continue;
1041
+ if (alreadyAdded.has(row.id)) continue; // written THIS tick already
1042
+ const dbRow = db.prepare('SELECT id, embedding, metadata FROM vectors WHERE id = ?').get(row.id);
1043
+ if (!dbRow?.embedding) continue;
1044
+ vectorOps.push({ addId: dbRow.id, embedding: float32FromBuffer(dbRow.embedding), metadata: JSON.parse(dbRow.metadata || '{}') });
1045
+ const chunk = chunkByStructId.get(row.chunk_struct_id);
1046
+ if (chunk) {
1047
+ tokenOps.push({ retireId: dbRow.id, file: rel });
1048
+ tokenOps.push({ addId: dbRow.id, chunk });
1049
+ }
1050
+ // Record the repaired row in the merkle chunkIds so the file's recorded
1051
+ // chunk set reflects the rows actually live after recovery (a torn add
1052
+ // produced no `newIds`, so without this the merkle would advance with an
1053
+ // empty chunkIds for a file that does have a live, now-indexed chunk).
1054
+ repairedIds.push(dbRow.id);
1055
+ this.progress('production:vector-crash-recovery');
1056
+ }
1057
+ }
1058
+
1059
+ const recordedChunkIds = repairedIds.length > 0 ? [...newIds, ...repairedIds] : newIds;
1060
+ this.touched.set(rel, { ...(this.touched.get(rel) || {}), hash: hashes, chunkIds: recordedChunkIds, content: hashes.content });
1061
+ // E.6: record this file's new cutoff signature (encoder-input hashes of
1062
+ // the enriched chunks) for next-tick comparison.
1063
+ if (this._cutoffCache) {
1064
+ setFileSignature(this._cutoffCache, rel, computeCutoffSignature(chunks));
1065
+ this._cutoffDirty = true;
1066
+ }
1067
+ if (ctx) ctx.persistedFiles.add(rel);
441
1068
  return {
442
1069
  ops: { vectors_upsert: newIds.length, vectors_delete: vectorOps.filter((o) => o.retireId).length },
443
1070
  chunksTotal: chunks.length,
@@ -450,32 +1077,40 @@ class ProductionReconcileAdapter {
450
1077
  manifest: { path: 'codebase.db' },
451
1078
  };
452
1079
  } finally {
453
- db.close();
1080
+ if (ownConn) db.close();
454
1081
  }
455
1082
  }
456
1083
 
457
- async applyHNSWDelta(_file, ops) {
458
- if (!Array.isArray(ops) || ops.length === 0) return { ops: { hnsw_add: 0, hnsw_tombstone: 0 } };
459
- const indexPath = path.join(this.stateDir, 'codebase-hnsw.idx');
460
- const index = new HNSWIndex({ indexPath, stalePath: `${indexPath}.stale.bin`, dimension: this.modelInfo.hnswDimension });
461
- try { await index.load(indexPath); } catch { await index.init(); }
462
- this.progress('production:hnsw-loaded');
463
- let add = 0; let tombstone = 0;
464
- for (const op of ops) {
465
- if (op.retireId && await index.remove(op.retireId)) tombstone += 1;
466
- if (op.addId && op.embedding) {
467
- await index.add(op.addId, truncateForHNSW(op.embedding, this.modelInfo.hnswDimension), { file: op.metadata?.file, name: op.metadata?.name, type: op.metadata?.type });
468
- add += 1;
1084
+ async applyBinaryHNSWDelta(_file, ops, _epoch, ctx = null) {
1085
+ if (!Array.isArray(ops) || ops.length === 0) return { ops: { binary_hnsw_append: 0, binary_hnsw_tombstone: 0 } };
1086
+
1087
+ // E.1 batched path: reuse the resident index, apply tombstones in place, and
1088
+ // ACCUMULATE the add ops onto the tick context. The actual `index.add()`
1089
+ // insertions are deferred to finalizeTick, where they run sorted-by-id so
1090
+ // the graph is reproducible (G1 byte-identity). We still report the per-file
1091
+ // append/tombstone counts here for the tick counters.
1092
+ if (ctx?.index) {
1093
+ const index = ctx.index;
1094
+ let append = 0; let tombstone = 0;
1095
+ for (const op of ops) {
1096
+ if (op.retireId) {
1097
+ if (markBinaryStale(index, op.retireId)) tombstone += 1;
1098
+ ctx.floatRemoveIds.push(op.retireId);
1099
+ }
1100
+ if (op.addId && op.embedding) {
1101
+ // Stage the add; insertion happens in finalize (sorted by id).
1102
+ ctx.pendingAdds = ctx.pendingAdds || [];
1103
+ ctx.pendingAdds.push(op);
1104
+ append += 1;
1105
+ }
469
1106
  }
470
- if ((add + tombstone) > 0 && (add + tombstone) % 100 === 0) this.progress('production:hnsw-loop');
1107
+ ctx.tombstone += tombstone;
1108
+ // append is committed to ctx.append in finalize after the sorted inserts.
1109
+ ctx.append += append;
1110
+ return { ops: { binary_hnsw_append: append, binary_hnsw_tombstone: tombstone }, manifest: { path: 'codebase-binary-hnsw.idx' } };
471
1111
  }
472
- await index.save(indexPath);
473
- this.progress('production:hnsw-saved');
474
- return { ops: { hnsw_add: add, hnsw_tombstone: tombstone }, manifest: { path: 'codebase-hnsw.idx', stale: 'codebase-hnsw.idx.stale.bin' } };
475
- }
476
1112
 
477
- async applyBinaryHNSWDelta(_file, ops) {
478
- if (!Array.isArray(ops) || ops.length === 0) return { ops: { binary_hnsw_append: 0, binary_hnsw_tombstone: 0 } };
1113
+ // ---- Per-file path (flag off): exact current behavior. ----
479
1114
  const indexPath = path.join(this.stateDir, 'codebase-binary-hnsw.idx');
480
1115
  const index = new BinaryHNSWIndex({ indexPath, stalePath: `${indexPath}.stale.bin`, floatDimension: this.modelInfo.hnswDimension });
481
1116
  try { await index.load(indexPath); } catch { await index.init(); }
@@ -527,6 +1162,18 @@ class ProductionReconcileAdapter {
527
1162
  return { ops: { li_segment_append: appended, li_tombstone: tombstone }, manifest: { path: 'codebase-late-interaction.db', segments: 'codebase-late-interaction.db.segments/manifest.json' } };
528
1163
  }
529
1164
 
1165
+ /**
1166
+ * Highest SUCCESSFULLY-published merkle epoch (the per-file crash-recovery
1167
+ * advance authority — see the torn-row repair in `applyVectorDelta`). Returns
1168
+ * -1 when no merkle has been published yet, so any committed row counts as
1169
+ * un-advanced. Best-effort + read-only.
1170
+ * @returns {number}
1171
+ */
1172
+ _publishedEpoch() {
1173
+ const merkle = readJson(path.join(this.stateDir, MERKLE_STATE), {});
1174
+ return Number.isInteger(merkle?.epoch) ? merkle.epoch : -1;
1175
+ }
1176
+
530
1177
  applySparseGramDelta(_file, ops, epoch) {
531
1178
  if (!Array.isArray(ops) || ops.length === 0) return { ops: { sparse_gram_delta_upsert: 0 } };
532
1179
  const base = path.join(this.stateDir, 'codebase-sparse-grams.idx');
@@ -563,14 +1210,27 @@ class ProductionReconcileAdapter {
563
1210
  const merklePath = path.join(this.stateDir, MERKLE_STATE);
564
1211
  const merkle = readJson(merklePath, { version: '2.4', files: {}, stats: {} });
565
1212
  merkle.files ||= {};
1213
+ // E.1 PERSIST-BEFORE-ADVANCE: when batching, promote a file into the merkle
1214
+ // ONLY if its ops are in the persisted batch (recorded in finalizeTick). A
1215
+ // file touched this tick but absent from the persisted set (e.g. its HNSW
1216
+ // adds did not make the saved batch) is left at its prior merkle state and
1217
+ // re-reconciled next tick. Deletions always apply (no HNSW add to persist).
1218
+ const persisted = this._lastPersistedFiles;
1219
+ const gate = batchTierWritesEnabled() && persisted instanceof Set;
566
1220
  for (const [file, data] of this.touched.entries()) {
567
1221
  if (data.hash?.deleted) delete merkle.files[file];
568
- else merkle.files[file] = { hash: data.hash.contentHash, ...data.hash.stat, epoch: manifest.epoch, chunkIds: data.chunkIds || [] };
1222
+ else if (!gate || persisted.has(file)) merkle.files[file] = { hash: data.hash.contentHash, ...data.hash.stat, epoch: manifest.epoch, chunkIds: data.chunkIds || [] };
569
1223
  }
570
1224
  merkle.lastIndex = new Date().toISOString();
571
1225
  merkle.epoch = manifest.epoch;
572
1226
  merkle.stats = { ...(merkle.stats || {}), totalFiles: Object.keys(merkle.files).length };
573
1227
  safeWriteJson(merklePath, merkle);
1228
+ // E.6: persist the updated chunk-cutoff cache once per tick (after the
1229
+ // merkle advances). Best-effort; a failure only costs a redundant re-embed.
1230
+ if (this._cutoffCache && this._cutoffDirty) {
1231
+ saveCutoffCache(this.stateDir, this._cutoffCache);
1232
+ this._cutoffDirty = false;
1233
+ }
574
1234
  try { fs.unlinkSync(path.join(this.stateDir, PROCESSING_QUEUE)); } catch {}
575
1235
  fs.appendFileSync(path.join(this.stateDir, METRICS_FILE), JSON.stringify({ ...manifest, ts: Date.now() / 1000, epoch: manifest.epoch }) + '\n');
576
1236
  }
@@ -580,4 +1240,8 @@ export const __testing = {
580
1240
  ProductionReconcileAdapter,
581
1241
  sparseGramRecord,
582
1242
  markBinaryStale,
1243
+ normalizeHnswDeterminismFlags,
1244
+ // Reset the one-time forced-on warning latch so each test case observes the
1245
+ // warning independently.
1246
+ _resetDetLevelsWarnLatch() { _batchForcedDetLevelsWarned = false; },
583
1247
  };