sweet-search 2.5.2 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -0,0 +1,477 @@
1
+ /**
2
+ * Reconciler application service.
3
+ *
4
+ * Plan § 6.1, § 8.1, § 13 Phase 2. The reconciler owns:
5
+ *
6
+ * - dirty-set processing,
7
+ * - content-hash diff,
8
+ * - encoder-dependency expansion,
9
+ * - metadata-dirty input rebuild,
10
+ * - per-file per-tier writes,
11
+ * - strict row visibility,
12
+ * - reader heartbeat files,
13
+ * - prune grace periods,
14
+ * - manifest publish.
15
+ *
16
+ * The reconciler coordinates adapter-provided per-tier deltas, enforces
17
+ * tick budgets, schedules maintenance jobs from watermarks, and publishes
18
+ * the manifest. Concrete graph/vector/HNSW/LI/sparse writes stay behind
19
+ * the injected adapters.
20
+ *
21
+ * The reconciler is intentionally pure of I/O orchestration — it accepts
22
+ * dependency-injected adapters so unit tests can drive every tick through
23
+ * synthetic inputs.
24
+ */
25
+
26
+ import { ReconcileCounters } from '../domain/reconcile-counters.mjs';
27
+ import { nextInterval } from '../domain/interval-autotune.mjs';
28
+ import {
29
+ buildNextManifest,
30
+ readManifest,
31
+ writeManifest,
32
+ zeroManifest,
33
+ } from '../infrastructure/manifest.mjs';
34
+ import { evaluateWatermarks, loadWatermarkConfig } from '../domain/watermark-scheduler.mjs';
35
+ import { beginRead, endRead, minLiveEpoch } from '../infrastructure/reader-heartbeat.mjs';
36
+ import { verifyStamp, writeStamp, formatStampMismatch } from '../infrastructure/worktree-stamp.mjs';
37
+
38
+ /**
39
+ * Adapter contract (Phase 2 declaration):
40
+ *
41
+ * {
42
+ * readDirtySet(): Promise<DirtyFile[]>,
43
+ * requeueDirtyFiles(files): Promise<void>, // optional budget tail
44
+ * hashFile(file): Promise<{ contentHash, metadata }>,
45
+ * loadCurrentManifest(): object|null,
46
+ * persistManifest(manifest): Promise<void>,
47
+ * applyGraphDelta(file, parsed, epoch): Promise<{ ops }>,
48
+ * applyVectorDelta(file, chunks, hashes, epoch): Promise<{ ops }>,
49
+ * applyHNSWDelta(file, vectorOps, epoch): Promise<{ ops }>,
50
+ * applyBinaryHNSWDelta(file, vectorOps, epoch): Promise<{ ops }>,
51
+ * applyLIDelta(file, tokenOps, epoch): Promise<{ ops }>,
52
+ * applySparseGramDelta(file, gramOps, epoch): Promise<{ ops }>,
53
+ * // Any apply* call may also return either:
54
+ * // { manifest: {...tier descriptor...} }
55
+ * // or:
56
+ * // { manifestTiers: { sparseGram: {...}, hnsw: {...} } }
57
+ * // These descriptors are merged into the next epoch manifest.
58
+ * readMaintenanceState(ctx): Promise<object>|object,
59
+ * scheduleMaintenance(job): Promise<void>|void,
60
+ * }
61
+ *
62
+ * Maintenance scheduling uses `domain/watermark-scheduler.mjs`; the adapter
63
+ * persists emitted jobs to the queue used by `maintenance-worker.mjs`.
64
+ */
65
+
66
+ const DEFAULT_TICK_INTERVAL_MS = 60_000;
67
+ const DEFAULT_CPU_BUDGET_MS = 2_000;
68
+ const DEFAULT_FILES_PER_TICK = 50;
69
+
70
+ const MANIFEST_TIER_KEYS = new Set([
71
+ 'codeGraph',
72
+ 'vectors',
73
+ 'hnsw',
74
+ 'binaryHnsw',
75
+ 'lateInteraction',
76
+ 'sparseGram',
77
+ ]);
78
+
79
+ function isPlainObject(value) {
80
+ return value && typeof value === 'object' && !Array.isArray(value);
81
+ }
82
+
83
+ function collectManifestTier(target, tier, result) {
84
+ if (!result) return;
85
+ if (isPlainObject(result.manifest)) {
86
+ mergeManifestTiers(target, { [tier]: result.manifest });
87
+ }
88
+ if (isPlainObject(result.manifestTiers)) {
89
+ mergeManifestTiers(target, result.manifestTiers);
90
+ }
91
+ }
92
+
93
+ function mergeManifestTiers(target, update) {
94
+ if (!isPlainObject(update)) return target;
95
+ for (const [tier, descriptor] of Object.entries(update)) {
96
+ if (!MANIFEST_TIER_KEYS.has(tier) || !isPlainObject(descriptor)) continue;
97
+ target[tier] = mergeTierDescriptor(target[tier] || {}, descriptor);
98
+ }
99
+ return target;
100
+ }
101
+
102
+ function mergeTierDescriptor(previous, next) {
103
+ const merged = { ...previous, ...next };
104
+ if (Array.isArray(previous.deltas) || Array.isArray(next.deltas)) {
105
+ merged.deltas = [
106
+ ...new Set([
107
+ ...(Array.isArray(previous.deltas) ? previous.deltas : []),
108
+ ...(Array.isArray(next.deltas) ? next.deltas : []),
109
+ ]),
110
+ ];
111
+ }
112
+ return merged;
113
+ }
114
+
115
+ function finalizeManifestTiers(previousManifest, tiers) {
116
+ const out = { ...tiers };
117
+ if (isPlainObject(out.sparseGram)) {
118
+ const previousSparse = previousManifest?.sparseGram || {};
119
+ const baseChanged = typeof out.sparseGram.base === 'string'
120
+ && out.sparseGram.base !== previousSparse.base;
121
+ const weightsChanged = typeof out.sparseGram.weightsId === 'string'
122
+ && out.sparseGram.weightsId !== previousSparse.weightsId;
123
+ if (baseChanged || weightsChanged) {
124
+ out.sparseGram = {
125
+ ...out.sparseGram,
126
+ deltas: Array.isArray(out.sparseGram.deltas) ? out.sparseGram.deltas : [],
127
+ };
128
+ } else if (Array.isArray(out.sparseGram.deltas)) {
129
+ out.sparseGram = {
130
+ ...out.sparseGram,
131
+ deltas: [
132
+ ...new Set([
133
+ ...(Array.isArray(previousSparse.deltas) ? previousSparse.deltas : []),
134
+ ...out.sparseGram.deltas,
135
+ ]),
136
+ ],
137
+ };
138
+ }
139
+ }
140
+ return out;
141
+ }
142
+
143
+ export class Reconciler {
144
+ /**
145
+ * @param {object} options
146
+ * @param {string} options.stateDir
147
+ * @param {object} options.adapters Adapter contract above.
148
+ * @param {object} [options.config] Tick interval / budgets / etc.
149
+ * @param {Function} [options.now] Injectable clock for tests.
150
+ * @param {{info:Function, warn:Function, error:Function}} [options.logger]
151
+ * @param {(phase:string)=>void} [options.onProgress]
152
+ */
153
+ constructor({ stateDir, adapters, config = {}, now = Date.now, logger = console, projectRoot = null, onProgress = null }) {
154
+ if (!stateDir) throw new Error('Reconciler: stateDir is required');
155
+ if (!adapters) throw new Error('Reconciler: adapters are required');
156
+ this.stateDir = stateDir;
157
+ this.projectRoot = projectRoot;
158
+ this.adapters = adapters;
159
+ this.config = {
160
+ intervalMs: config.intervalMs ?? DEFAULT_TICK_INTERVAL_MS,
161
+ cpuBudgetMs: config.cpuBudgetMs ?? DEFAULT_CPU_BUDGET_MS,
162
+ filesPerTick: config.filesPerTick ?? DEFAULT_FILES_PER_TICK,
163
+ autotuneInterval: config.autotuneInterval ?? false,
164
+ pinnedIntervalMs: config.pinnedIntervalMs ?? false,
165
+ ...config,
166
+ };
167
+ this.now = now;
168
+ this.logger = logger;
169
+ this.onProgress = typeof onProgress === 'function' ? onProgress : null;
170
+ this._lastEpoch = 0;
171
+ this._running = false;
172
+ }
173
+
174
+ progress(phase) {
175
+ this.onProgress?.(phase);
176
+ }
177
+
178
+ /**
179
+ * Verify the worktree stamp before any tick. Plan § 8.5 / § 14.2.4 —
180
+ * cross-worktree mixing is a silent footgun; verify or mint a stamp
181
+ * before the daemon writes to this state dir.
182
+ *
183
+ * @returns {{ok:boolean, reason?:string}}
184
+ */
185
+ verifyStartup() {
186
+ if (!this.projectRoot) return { ok: true, reason: 'no-project-root' };
187
+ const check = verifyStamp(this.stateDir, this.projectRoot);
188
+ if (!check.ok) {
189
+ this.logger.error?.(formatStampMismatch(check));
190
+ return check;
191
+ }
192
+ if (check.reason === 'absent') {
193
+ writeStamp(this.stateDir, this.projectRoot);
194
+ }
195
+ return { ok: true, reason: check.reason };
196
+ }
197
+
198
+ /**
199
+ * Load the current manifest's epoch. Falls back to 0 when no manifest
200
+ * has been written yet.
201
+ *
202
+ * @returns {number}
203
+ */
204
+ currentEpoch() {
205
+ const manifest = this.adapters.loadCurrentManifest
206
+ ? this.adapters.loadCurrentManifest()
207
+ : readManifest(this.stateDir);
208
+ return manifest?.epoch ?? 0;
209
+ }
210
+
211
+ /**
212
+ * Build the next-epoch number. Plan § 8.1 step 1-2.
213
+ *
214
+ * @returns {number}
215
+ */
216
+ nextEpoch() {
217
+ const current = this.currentEpoch();
218
+ return Math.max(current + 1, this._lastEpoch + 1);
219
+ }
220
+
221
+ /**
222
+ * Run one reconcile tick. Returns the counters snapshot for the tick.
223
+ *
224
+ * Walk the dirty set through the adapter contract. Adapters own concrete
225
+ * tier writes; the reconciler enforces budget, maintenance scheduling,
226
+ * and manifest publication.
227
+ *
228
+ * @returns {Promise<object>}
229
+ */
230
+ async tick() {
231
+ if (this._running) {
232
+ throw new Error('Reconciler.tick(): tick already in progress (single-instance enforced by lockfile)');
233
+ }
234
+ this._running = true;
235
+ const startedAt = this.now();
236
+ const counters = new ReconcileCounters();
237
+ const epoch = this.nextEpoch();
238
+ counters.set('epoch', epoch);
239
+ let dirty = [];
240
+ let dirtyCursor = 0;
241
+ let deferredRequeued = false;
242
+ let manifestPublished = false;
243
+
244
+ try {
245
+ dirty = await this.adapters.readDirtySet();
246
+ this.progress('reconciler:dirty-read');
247
+ counters.set('dirty_paths_seen', dirty.length);
248
+
249
+ counters.set('cpu_budget_total_ms', this.config.cpuBudgetMs);
250
+
251
+ // Track per-file outcomes for the tick summary.
252
+ const tierOps = {};
253
+ const manifestTiers = {};
254
+ const filesProcessed = [];
255
+
256
+ for (; dirtyCursor < dirty.length; dirtyCursor += 1) {
257
+ const filesAttempted = counters._fields.files_processed + counters._fields.content_unchanged;
258
+ if (filesAttempted >= this.config.filesPerTick) break;
259
+ if (filesAttempted > 0 && this.now() - startedAt >= this.config.cpuBudgetMs) break;
260
+
261
+ const file = dirty[dirtyCursor];
262
+ this.progress('reconciler:file:start');
263
+ const hashes = await this.adapters.hashFile(file);
264
+ this.progress('reconciler:file:hashed');
265
+ if (hashes && hashes.contentUnchanged) {
266
+ counters.observeContentUnchanged();
267
+ continue;
268
+ }
269
+ const fileRes = await this._reconcileOneFile(file, epoch, hashes);
270
+ this.progress('reconciler:file:done');
271
+ filesProcessed.push({ file, ...fileRes });
272
+ mergeManifestTiers(manifestTiers, fileRes?.manifestTiers);
273
+ counters.inc('files_processed');
274
+ counters.inc('chunks_total', fileRes?.chunksTotal ?? 0);
275
+ counters.inc('chunks_encoded', fileRes?.chunksEncoded ?? 0);
276
+ counters.inc('chunks_hash_reused', fileRes?.chunksReused ?? 0);
277
+ counters.inc('chunks_struct_stable', fileRes?.chunksStructStable ?? 0);
278
+ counters.inc('chunks_text_unchanged', fileRes?.chunksTextUnchanged ?? 0);
279
+ counters.inc('chunks_metadata_dirty', fileRes?.chunksMetadataDirty ?? 0);
280
+ counters.inc('chunks_dedup_repaired', fileRes?.chunksDedupRepaired ?? 0);
281
+ counters.inc('tree_sitter_error_nodes_seen', fileRes?.treeSitterErrorNodes ?? 0);
282
+ if ((fileRes?.treeSitterErrorNodes ?? 0) > 0) {
283
+ counters.inc('tree_sitter_files_with_errors');
284
+ }
285
+ for (const [tier, op] of Object.entries(fileRes?.ops ?? {})) {
286
+ if (typeof op === 'number') {
287
+ counters.inc(`ops_per_tier.${tier}`, op);
288
+ tierOps[tier] = (tierOps[tier] || 0) + op;
289
+ }
290
+ }
291
+ }
292
+
293
+ const deferredFiles = dirty.slice(dirtyCursor);
294
+ if (deferredFiles.length > 0) {
295
+ counters.inc('dirty_paths_deferred', deferredFiles.length);
296
+ if (this.adapters.requeueDirtyFiles) {
297
+ await this.adapters.requeueDirtyFiles(deferredFiles);
298
+ deferredRequeued = true;
299
+ } else {
300
+ this.logger.warn?.(
301
+ `[reconciler] ${deferredFiles.length} dirty paths exceeded the per-tick budget; ` +
302
+ 'adapter has no requeueDirtyFiles hook',
303
+ );
304
+ }
305
+ }
306
+
307
+ // Publish the new manifest. Plan § 8.1 step 4: write to *.tmp,
308
+ // fsync, atomic rename, fsync parent dir. `writeManifest` already
309
+ // does that.
310
+ const previous = (this.adapters.loadCurrentManifest
311
+ ? this.adapters.loadCurrentManifest()
312
+ : readManifest(this.stateDir))
313
+ ?? zeroManifest({});
314
+ const manifest = buildNextManifest(previous, {
315
+ epoch,
316
+ tiers: finalizeManifestTiers(previous, manifestTiers),
317
+ });
318
+ await this._publishManifest(manifest);
319
+ this.progress('reconciler:manifest-published');
320
+ manifestPublished = true;
321
+ this._lastEpoch = epoch;
322
+
323
+ // Plan § 6.1 step 11: maintenance observes a successfully-published
324
+ // epoch. Never enqueue jobs for an epoch whose manifest did not land.
325
+ await this._scheduleMaintenance(epoch, counters, tierOps, filesProcessed);
326
+ this.progress('reconciler:maintenance-scheduled');
327
+
328
+ counters.set('tick_ms', this.now() - startedAt);
329
+ counters.set('ts', startedAt / 1000);
330
+
331
+ // Interval auto-tune (plan § 14.2.1). Skipped when the operator pins
332
+ // a fixed interval via `config.pinnedIntervalMs` or
333
+ // `config.autotuneInterval === false`.
334
+ if (this.config.autotuneInterval && !this.config.pinnedIntervalMs) {
335
+ const tuned = nextInterval({
336
+ currentMs: this.config.intervalMs,
337
+ lastTickMs: counters._fields.tick_ms,
338
+ dirtyAtTickStart: dirty.length,
339
+ cpuLoadAvg: this.config.cpuLoadAvg ?? 0,
340
+ maintenanceBacklog: this.config.maintenanceBacklog ?? 0,
341
+ });
342
+ if (tuned.nextMs !== this.config.intervalMs) {
343
+ this.logger.info?.(`[reconciler] interval ${this.config.intervalMs}ms → ${tuned.nextMs}ms (${tuned.reasons.join(',')})`);
344
+ this.config.intervalMs = tuned.nextMs;
345
+ }
346
+ }
347
+
348
+ return counters.snapshot();
349
+ } catch (err) {
350
+ if (!manifestPublished && dirty.length > 0) {
351
+ const filesToRequeue = deferredRequeued ? dirty.slice(0, dirtyCursor) : dirty;
352
+ if (filesToRequeue.length > 0) {
353
+ await this._tryRequeueDirtyAfterFailure(filesToRequeue, err);
354
+ }
355
+ }
356
+ throw err;
357
+ } finally {
358
+ this._running = false;
359
+ }
360
+ }
361
+
362
+ async _reconcileOneFile(file, epoch, hashes) {
363
+ // Dispatch to per-tier adapter methods. Adapters can return undefined
364
+ // when a tier has no work for this file.
365
+ const ops = {};
366
+ this.progress('reconciler:graph:start');
367
+ const graph = await this.adapters.applyGraphDelta?.(file, hashes, epoch);
368
+ this.progress('reconciler:graph:done');
369
+ const manifestTiers = {};
370
+ collectManifestTier(manifestTiers, 'codeGraph', graph);
371
+ if (graph?.ops?.graph_upsert != null) ops.graph_upsert = graph.ops.graph_upsert;
372
+ if (graph?.ops?.graph_tombstone != null) ops.graph_tombstone = graph.ops.graph_tombstone;
373
+ this.progress('reconciler:vector:start');
374
+ const vec = await this.adapters.applyVectorDelta?.(file, hashes?.chunks ?? [], hashes, epoch);
375
+ this.progress('reconciler:vector:done');
376
+ collectManifestTier(manifestTiers, 'vectors', vec);
377
+ if (vec?.ops?.vectors_upsert != null) ops.vectors_upsert = vec.ops.vectors_upsert;
378
+ if (vec?.ops?.vectors_delete != null) ops.vectors_delete = vec.ops.vectors_delete;
379
+ this.progress('reconciler:hnsw:start');
380
+ const hnsw = await this.adapters.applyHNSWDelta?.(file, vec?.vectorOps ?? [], epoch);
381
+ this.progress('reconciler:hnsw:done');
382
+ collectManifestTier(manifestTiers, 'hnsw', hnsw);
383
+ if (hnsw?.ops?.hnsw_add != null) ops.hnsw_add = hnsw.ops.hnsw_add;
384
+ if (hnsw?.ops?.hnsw_tombstone != null) ops.hnsw_tombstone = hnsw.ops.hnsw_tombstone;
385
+ this.progress('reconciler:binary-hnsw:start');
386
+ const bin = await this.adapters.applyBinaryHNSWDelta?.(file, vec?.vectorOps ?? [], epoch);
387
+ this.progress('reconciler:binary-hnsw:done');
388
+ collectManifestTier(manifestTiers, 'binaryHnsw', bin);
389
+ if (bin?.ops?.binary_hnsw_append != null) ops.binary_hnsw_append = bin.ops.binary_hnsw_append;
390
+ if (bin?.ops?.binary_hnsw_tombstone != null) ops.binary_hnsw_tombstone = bin.ops.binary_hnsw_tombstone;
391
+ this.progress('reconciler:li:start');
392
+ const li = await this.adapters.applyLIDelta?.(file, vec?.tokenOps ?? [], epoch);
393
+ this.progress('reconciler:li:done');
394
+ collectManifestTier(manifestTiers, 'lateInteraction', li);
395
+ if (li?.ops?.li_segment_append != null) ops.li_segment_append = li.ops.li_segment_append;
396
+ if (li?.ops?.li_tombstone != null) ops.li_tombstone = li.ops.li_tombstone;
397
+ this.progress('reconciler:sparse:start');
398
+ const sg = await this.adapters.applySparseGramDelta?.(file, vec?.gramOps ?? [], epoch);
399
+ this.progress('reconciler:sparse:done');
400
+ collectManifestTier(manifestTiers, 'sparseGram', sg);
401
+ if (sg?.ops?.sparse_gram_delta_upsert != null) ops.sparse_gram_delta_upsert = sg.ops.sparse_gram_delta_upsert;
402
+
403
+ return {
404
+ chunksTotal: vec?.chunksTotal ?? 0,
405
+ chunksEncoded: vec?.chunksEncoded ?? 0,
406
+ chunksReused: vec?.chunksReused ?? 0,
407
+ chunksStructStable: vec?.chunksStructStable ?? 0,
408
+ chunksTextUnchanged: vec?.chunksTextUnchanged ?? 0,
409
+ chunksMetadataDirty: vec?.chunksMetadataDirty ?? 0,
410
+ chunksDedupRepaired: vec?.chunksDedupRepaired ?? 0,
411
+ treeSitterErrorNodes: graph?.treeSitterErrorNodes ?? 0,
412
+ manifestTiers,
413
+ ops,
414
+ };
415
+ }
416
+
417
+ async _scheduleMaintenance(epoch, counters, tierOps, filesProcessed) {
418
+ if (!this.adapters.readMaintenanceState) return;
419
+ const state = await this.adapters.readMaintenanceState({
420
+ epoch,
421
+ tierOps,
422
+ filesProcessed,
423
+ counters: counters.snapshot(),
424
+ });
425
+ const jobs = Array.isArray(state)
426
+ ? state
427
+ : evaluateWatermarks(state || {}, this.config.watermarks || loadWatermarkConfig());
428
+ if (jobs.length === 0) return;
429
+ if (!this.adapters.scheduleMaintenance) {
430
+ this.logger.warn?.(`[reconciler] ${jobs.length} maintenance jobs crossed watermarks; adapter has no scheduleMaintenance hook`);
431
+ return;
432
+ }
433
+ for (const job of jobs) {
434
+ await this.adapters.scheduleMaintenance({ ...job, epoch: job.epoch ?? epoch });
435
+ counters.inc('maintenance_jobs_enqueued');
436
+ }
437
+ }
438
+
439
+ async _publishManifest(manifest) {
440
+ if (this.adapters.persistManifest) {
441
+ await this.adapters.persistManifest(manifest);
442
+ return;
443
+ }
444
+ writeManifest(this.stateDir, manifest);
445
+ }
446
+
447
+ async _tryRequeueDirtyAfterFailure(files, cause) {
448
+ if (!this.adapters.requeueDirtyFiles) {
449
+ this.logger.warn?.(
450
+ `[reconciler] tick failed before manifest publish and ${files.length} dirty paths were drained; ` +
451
+ 'adapter has no requeueDirtyFiles hook',
452
+ );
453
+ return;
454
+ }
455
+ try {
456
+ await this.adapters.requeueDirtyFiles(files);
457
+ } catch (requeueErr) {
458
+ this.logger.error?.(
459
+ `[reconciler] failed to requeue ${files.length} dirty paths after tick failure ` +
460
+ `(${cause?.message ?? cause}): ${requeueErr?.message ?? requeueErr}`,
461
+ );
462
+ }
463
+ }
464
+
465
+ // ----- Reader heartbeat helpers (exposed for tests; production callers
466
+ // use beginRead/endRead from infrastructure/reader-heartbeat.mjs directly).
467
+
468
+ beginRead(epoch, meta) { return beginRead(this.stateDir, epoch, meta); }
469
+ endRead(record) { return endRead(this.stateDir, record); }
470
+ minLiveEpoch() { return minLiveEpoch(this.stateDir); }
471
+ }
472
+
473
+ export const __testing = {
474
+ DEFAULT_TICK_INTERVAL_MS,
475
+ DEFAULT_CPU_BUDGET_MS,
476
+ DEFAULT_FILES_PER_TICK,
477
+ };
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Synthetic tombstone-injection harness.
3
+ *
4
+ * Plan § 13 Phase 5, § 14.1 measurement 1. The harness picks a random
5
+ * fraction of vectors in `codebase.db::vectors`, marks them tombstoned
6
+ * (sets `epoch_retired`), and lets the search path apply the
7
+ * manifest-epoch predicate so the GCSN dev sweep can measure
8
+ * MRR-vs-tombstone-fraction.
9
+ *
10
+ * Discipline (CLAUDE.md `feedback_heldout_discipline_strict.md`):
11
+ * - The sweep runs on **dev** GCSN, not held-out.
12
+ * - Use a fixed seed (`SWEET_SEARCH_TOMBSTONE_SEED`, default 42) so
13
+ * reruns reproduce.
14
+ * - The harness restores the original `epoch_retired` values on exit
15
+ * so the index returns to its pre-experiment state.
16
+ *
17
+ * Output:
18
+ * - A `restore` function the runner calls at the end of each sweep
19
+ * point to undo the injection.
20
+ * - A `report` object with the actual fraction injected (may differ
21
+ * from the request slightly due to row count rounding).
22
+ *
23
+ * This module is pure infrastructure — the runner script in
24
+ * `scripts/incremental-indexing-tombstone-sensitivity.mjs` ties it to
25
+ * the GCSN benchmark loop.
26
+ */
27
+
28
+ const DEFAULT_SEED = 42;
29
+
30
+ /**
31
+ * Deterministic 32-bit PRNG (mulberry32). Seeded with an integer, returns
32
+ * a function producing floats in [0, 1).
33
+ *
34
+ * @param {number} seed
35
+ * @returns {() => number}
36
+ */
37
+ function mulberry32(seed) {
38
+ let s = (seed | 0) || 1;
39
+ return function rand() {
40
+ s = (s + 0x6D2B79F5) | 0;
41
+ let t = s;
42
+ t = Math.imul(t ^ (t >>> 15), t | 1);
43
+ t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
44
+ return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
45
+ };
46
+ }
47
+
48
+ /**
49
+ * Inject tombstones at the requested fraction.
50
+ *
51
+ * @param {object} options
52
+ * @param {import('better-sqlite3').Database} options.db The `codebase.db` vectors database.
53
+ * @param {number} options.fraction 0 < fraction <= 0.5
54
+ * @param {number} options.epoch ε+1 to write into epoch_retired
55
+ * @param {number} [options.seed=42]
56
+ * @param {string} [options.sessionFilter] Restrict to one `session_id` if set.
57
+ * @returns {{injected:number, fraction:number, restore:() => void}}
58
+ */
59
+ export function injectTombstones({ db, fraction, epoch, seed = DEFAULT_SEED, sessionFilter }) {
60
+ if (!Number.isFinite(fraction) || fraction <= 0 || fraction > 0.5) {
61
+ throw new Error(`injectTombstones: fraction must be in (0, 0.5], got ${fraction}`);
62
+ }
63
+ if (!Number.isInteger(epoch) || epoch <= 0) {
64
+ throw new Error(`injectTombstones: epoch must be a positive integer, got ${epoch}`);
65
+ }
66
+ const rand = mulberry32(seed);
67
+
68
+ const liveRowsQ = sessionFilter
69
+ ? `SELECT id FROM vectors WHERE session_id = ? AND epoch_retired IS NULL`
70
+ : `SELECT id FROM vectors WHERE epoch_retired IS NULL`;
71
+ const rows = sessionFilter
72
+ ? db.prepare(liveRowsQ).all(sessionFilter)
73
+ : db.prepare(liveRowsQ).all();
74
+ const total = rows.length;
75
+ if (total === 0) {
76
+ return { injected: 0, fraction: 0, restore: () => {} };
77
+ }
78
+ const target = Math.floor(total * fraction);
79
+
80
+ // Fisher-Yates sample without replacement.
81
+ const indices = new Array(total);
82
+ for (let i = 0; i < total; i++) indices[i] = i;
83
+ for (let i = total - 1; i > 0; i--) {
84
+ const j = Math.floor(rand() * (i + 1));
85
+ const tmp = indices[i]; indices[i] = indices[j]; indices[j] = tmp;
86
+ }
87
+ const picked = indices.slice(0, target).map((i) => rows[i].id);
88
+
89
+ const update = db.prepare(`
90
+ UPDATE vectors
91
+ SET epoch_retired = ?
92
+ WHERE id = ? AND epoch_retired IS NULL
93
+ `);
94
+ const transaction = db.transaction((ids) => {
95
+ for (const id of ids) update.run(epoch, id);
96
+ });
97
+ transaction(picked);
98
+
99
+ const restore = () => {
100
+ const undo = db.prepare(`
101
+ UPDATE vectors
102
+ SET epoch_retired = NULL
103
+ WHERE id = ? AND epoch_retired = ?
104
+ `);
105
+ const undoTxn = db.transaction((ids) => {
106
+ for (const id of ids) undo.run(id, epoch);
107
+ });
108
+ undoTxn(picked);
109
+ };
110
+
111
+ return {
112
+ injected: picked.length,
113
+ fraction: picked.length / total,
114
+ restore,
115
+ };
116
+ }
117
+
118
+ /**
119
+ * Run a sweep across the requested fraction list, calling `runOnce`
120
+ * between injection and restore. The runner is responsible for actually
121
+ * executing the benchmark — this helper is intentionally agnostic.
122
+ *
123
+ * @param {object} options
124
+ * @param {import('better-sqlite3').Database} options.db
125
+ * @param {number[]} options.fractions
126
+ * @param {(meta:{fraction:number, injected:number}) => Promise<object>} options.runOnce
127
+ * @param {number} [options.seed=42]
128
+ * @param {number} [options.epoch=1_000_000]
129
+ * @returns {Promise<Array<{fraction:number, injected:number, result:object}>>}
130
+ */
131
+ export async function sweepTombstoneFractions({ db, fractions, runOnce, seed = DEFAULT_SEED, epoch = 1_000_000 }) {
132
+ if (!Array.isArray(fractions) || fractions.length === 0) {
133
+ throw new Error('sweepTombstoneFractions: fractions must be a non-empty array');
134
+ }
135
+ const points = [];
136
+ for (const f of fractions) {
137
+ const inj = injectTombstones({ db, fraction: f, epoch, seed });
138
+ try {
139
+ const result = await runOnce({ fraction: inj.fraction, injected: inj.injected });
140
+ points.push({ fraction: inj.fraction, injected: inj.injected, result });
141
+ } finally {
142
+ inj.restore();
143
+ }
144
+ }
145
+ return points;
146
+ }
147
+
148
+ export const __testing = { mulberry32 };