sweet-search 2.5.2 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -0,0 +1,190 @@
1
+ /**
2
+ * Path filter (`.sweet-search-ignore` + repo-size cap).
3
+ *
4
+ * Plan § 11 ("Default deny-list (independent of `.gitignore`) catches"),
5
+ * § 22.9 ("Many tiny files / Cursor's 400 k freeze") and § 14.2.7
6
+ * ("resolved-exclude fingerprint"). Three concerns combined:
7
+ *
8
+ * 1. **Default deny-list** for paths that should never reach the
9
+ * reconcile dirty set, independent of project `.gitignore`. This
10
+ * catches `node_modules`, build dirs, common artifact extensions,
11
+ * etc.
12
+ * 2. **`.sweet-search-ignore`** — a project-level ignore file with
13
+ * gitignore-compatible patterns. Allows users to opt out of paths
14
+ * that `.gitignore` doesn't already cover.
15
+ * 3. **Repo-size cap** — Cursor froze at ~400 k indexed files; we cap
16
+ * at 200 k by default with a warning at 50 % of cap.
17
+ *
18
+ * Pattern matching is intentionally simple — substring + extension +
19
+ * directory checks. Full gitignore semantics live in
20
+ * `core/infrastructure/config/search.js::loadProjectConfig` which the
21
+ * reconcile path consults for the authoritative exclude list.
22
+ */
23
+
24
+ import fs from 'node:fs';
25
+ import path from 'node:path';
26
+ import { loadProjectConfig } from '../../infrastructure/config/search.js';
27
+
28
+ const DEFAULT_DENY_DIRS = Object.freeze([
29
+ 'node_modules',
30
+ '.git',
31
+ '.sweet-search',
32
+ 'dist',
33
+ 'build',
34
+ '.next',
35
+ '.nuxt',
36
+ 'target',
37
+ 'vendor',
38
+ '__pycache__',
39
+ '.venv',
40
+ 'venv',
41
+ '.cache',
42
+ '.turbo',
43
+ 'coverage',
44
+ '.parcel-cache',
45
+ '.svelte-kit',
46
+ '.vercel',
47
+ ]);
48
+
49
+ const DEFAULT_DENY_EXTS = Object.freeze([
50
+ '.lock',
51
+ '.lockb',
52
+ '.min.js',
53
+ '.min.css',
54
+ '.map',
55
+ '.bundle.js',
56
+ '.pyc',
57
+ '.so',
58
+ '.dylib',
59
+ '.dll',
60
+ '.exe',
61
+ '.bin',
62
+ '.wasm',
63
+ ]);
64
+
65
+ export const DEFAULT_REPO_SIZE_CAP = 200_000;
66
+ export const DEFAULT_REPO_SIZE_WARN_FRAC = 0.5;
67
+
68
+ /**
69
+ * Parse a `.sweet-search-ignore` file. Format: gitignore-like, but we
70
+ * only support `glob`, `dir/`, `**`, `*`, and `#` comments. Returns
71
+ * the array of normalised pattern strings.
72
+ *
73
+ * @param {string} filePath
74
+ * @returns {string[]}
75
+ */
76
+ export function loadIgnoreFile(filePath) {
77
+ if (!fs.existsSync(filePath)) return [];
78
+ const raw = fs.readFileSync(filePath, 'utf-8');
79
+ const out = [];
80
+ for (const rawLine of raw.split('\n')) {
81
+ const line = rawLine.trim();
82
+ if (!line || line.startsWith('#')) continue;
83
+ out.push(line);
84
+ }
85
+ return out;
86
+ }
87
+
88
+ function patternToRegex(pattern) {
89
+ // Convert the subset of gitignore-style globbing used by the shared
90
+ // sweet-search config. In particular, leading `**/` must match root-level
91
+ // files too (`**/package-lock.json` matches `package-lock.json`).
92
+ let p = String(pattern || '').replace(/\\/g, '/');
93
+ let dirOnly = false;
94
+ if (p.startsWith('/')) p = p.slice(1);
95
+ if (p.endsWith('/')) { p = p.slice(0, -1); dirOnly = true; }
96
+ const hasSlash = p.includes('/');
97
+
98
+ let body = '';
99
+ for (let i = 0; i < p.length; i++) {
100
+ if (p.startsWith('**/', i)) {
101
+ body += '(?:.*/)?';
102
+ i += 2;
103
+ continue;
104
+ }
105
+ if (p.startsWith('/**', i) && i + 3 === p.length) {
106
+ body += '(?:/.*)?';
107
+ i += 2;
108
+ continue;
109
+ }
110
+ if (p.startsWith('**', i)) {
111
+ body += '.*';
112
+ i += 1;
113
+ continue;
114
+ }
115
+ const ch = p[i];
116
+ if (ch === '*') body += '[^/]*';
117
+ else if (ch === '?') body += '[^/]';
118
+ else body += ch.replace(/[.+^${}()|[\]\\]/g, '\\$&');
119
+ }
120
+
121
+ const prefix = hasSlash ? '^' : '(?:^|.*/)';
122
+ const suffix = dirOnly ? '(?:/.*)?$' : '$';
123
+ return new RegExp(prefix + body + suffix);
124
+ }
125
+
126
+ /**
127
+ * Build a path-filter function from default deny-list + ignore file.
128
+ *
129
+ * The filter is true for paths that should be **excluded**.
130
+ *
131
+ * @param {{projectRoot?:string, ignoreFile?:string, extraPatterns?:string[], allowSweetSearchDir?:boolean}} [opts]
132
+ * @returns {(relativePath:string)=>boolean}
133
+ */
134
+ export function buildPathFilter(opts = {}) {
135
+ const patterns = [];
136
+ if (opts.projectRoot) {
137
+ for (const p of loadProjectConfig(opts.projectRoot).exclude || []) {
138
+ if (opts.allowSweetSearchDir && String(p).includes('.sweet-search')) continue;
139
+ patterns.push(p);
140
+ }
141
+ }
142
+ for (const p of (opts.extraPatterns || [])) patterns.push(p);
143
+ const ignoreFile = opts.ignoreFile
144
+ || (opts.projectRoot ? path.join(opts.projectRoot, '.sweet-search-ignore') : null);
145
+ if (ignoreFile) {
146
+ for (const p of loadIgnoreFile(ignoreFile)) patterns.push(p);
147
+ }
148
+ const regexes = patterns.map(patternToRegex);
149
+ const denyDirs = new Set(DEFAULT_DENY_DIRS);
150
+ if (opts.allowSweetSearchDir) denyDirs.delete('.sweet-search');
151
+ const denyExts = new Set(DEFAULT_DENY_EXTS);
152
+
153
+ return function isExcluded(relativePath) {
154
+ if (typeof relativePath !== 'string') return true;
155
+ const norm = relativePath.replace(/\\/g, '/');
156
+ const parts = norm.split('/');
157
+ for (const part of parts) {
158
+ if (denyDirs.has(part)) return true;
159
+ }
160
+ const base = parts[parts.length - 1] || '';
161
+ const ext = path.extname(base).toLowerCase();
162
+ if (denyExts.has(ext)) return true;
163
+ // Multi-suffix matches like `.min.js` — denyExts already has them.
164
+ for (const compound of ['.min.js', '.min.css', '.bundle.js']) {
165
+ if (base.endsWith(compound)) return true;
166
+ }
167
+ for (const re of regexes) {
168
+ if (re.test(norm)) return true;
169
+ }
170
+ return false;
171
+ };
172
+ }
173
+
174
+ /**
175
+ * Apply the repo-size cap policy. Plan § 22.9.
176
+ *
177
+ * @param {number} fileCount
178
+ * @param {{cap?:number, warnFrac?:number}} [opts]
179
+ * @returns {{ok:boolean, warn:boolean, cap:number, fileCount:number}}
180
+ */
181
+ export function evaluateRepoSizeCap(fileCount, opts = {}) {
182
+ const cap = opts.cap ?? DEFAULT_REPO_SIZE_CAP;
183
+ const warnFrac = opts.warnFrac ?? DEFAULT_REPO_SIZE_WARN_FRAC;
184
+ return {
185
+ ok: fileCount <= cap,
186
+ warn: fileCount >= cap * warnFrac,
187
+ cap,
188
+ fileCount,
189
+ };
190
+ }
@@ -0,0 +1,201 @@
1
+ /**
2
+ * Reader heartbeat / grace policy.
3
+ *
4
+ * Plan § 8.1.1. Strict row visibility requires bounded history retention:
5
+ * retired rows cannot be physically pruned until every live reader has
6
+ * advanced past their epoch. We track each reader's pinned epoch via a
7
+ * small JSON file under `.sweet-search/readers/<pid>-<boot>-<read>.json`
8
+ * so the maintenance scheduler can compute `min_live_epoch` across
9
+ * non-stale heartbeats. The per-read token is load-bearing: a long-lived
10
+ * MCP/server process can run concurrent queries pinned to different
11
+ * manifest epochs, and those pins must not overwrite each other.
12
+ *
13
+ * Lifecycle:
14
+ * - Each reader process (sweet-search CLI, MCP server, etc.) calls
15
+ * `beginRead(stateDir, epoch)` before a query and `endRead` when it
16
+ * finishes. The heartbeat file holds `{ epoch, pid, bootId, readId, startedAt }`.
17
+ * - The reconcile maintenance worker enumerates the heartbeats and:
18
+ * - drops files whose process no longer exists,
19
+ * - returns `min({live readers}.epoch)` as the prune frontier.
20
+ * - Heartbeats older than READER_GRACE_MS without a live pid are
21
+ * ignored.
22
+ *
23
+ * This file is pure I/O. The reconciler / maintenance worker uses it to
24
+ * compute `min_live_epoch` but does not block on it (heartbeats are
25
+ * advisory; correctness is preserved by tombstone-then-prune).
26
+ */
27
+
28
+ import fs from 'node:fs';
29
+ import os from 'node:os';
30
+ import path from 'node:path';
31
+
32
+ export const READER_GRACE_MS = 60 * 60 * 1000; // 1h default
33
+ const HEARTBEAT_DIR = 'readers';
34
+ let heartbeatSeq = 0;
35
+
36
+ function heartbeatDir(stateDir) {
37
+ return path.join(stateDir, HEARTBEAT_DIR);
38
+ }
39
+
40
+ function heartbeatPath(stateDir, pid, bootId, readId = null) {
41
+ const suffix = readId ? `-${readId}` : '';
42
+ return path.join(heartbeatDir(stateDir), `${pid}-${bootId}${suffix}.json`);
43
+ }
44
+
45
+ function nextReadId() {
46
+ heartbeatSeq = (heartbeatSeq + 1) >>> 0;
47
+ const time = Date.now().toString(36);
48
+ const seq = heartbeatSeq.toString(36);
49
+ const rand = Math.random().toString(36).slice(2, 8);
50
+ return `${time}-${seq}-${rand}`;
51
+ }
52
+
53
+ /**
54
+ * Returns a coarse boot-id stand-in. Plan § 8.6 mentions
55
+ * `/proc/sys/kernel/random/boot_id` on Linux and `kern.boottime` on macOS;
56
+ * we keep a cross-platform fallback based on `os.uptime()` rounded to the
57
+ * minute boundary, which is stable across the lifetime of a process and
58
+ * changes whenever the machine reboots.
59
+ *
60
+ * @returns {string}
61
+ */
62
+ export function bootIdStub() {
63
+ try {
64
+ const procBoot = '/proc/sys/kernel/random/boot_id';
65
+ if (fs.existsSync(procBoot)) {
66
+ const raw = fs.readFileSync(procBoot, 'utf-8').trim();
67
+ return raw.replace(/[^a-zA-Z0-9-]/g, '');
68
+ }
69
+ } catch {
70
+ // fall through
71
+ }
72
+ // Stable enough for the heartbeat: the same process never gets a different
73
+ // value, and a reboot always changes it.
74
+ const epochSeconds = Math.floor(Date.now() / 1000 - os.uptime());
75
+ return `boot-${epochSeconds}`;
76
+ }
77
+
78
+ /**
79
+ * Record a reader heartbeat. Idempotent; safe to call before every query.
80
+ *
81
+ * @param {string} stateDir
82
+ * @param {number} epoch
83
+ * @param {object} [meta] Optional caller-supplied metadata (mcp-session-id,
84
+ * query, etc.) — stored verbatim for diagnostics.
85
+ * @returns {{pid:number, bootId:string, readId:string, path:string}}
86
+ */
87
+ export function beginRead(stateDir, epoch, meta = {}) {
88
+ const dir = heartbeatDir(stateDir);
89
+ fs.mkdirSync(dir, { recursive: true });
90
+ const pid = process.pid;
91
+ const bootId = bootIdStub();
92
+ const readId = nextReadId();
93
+ const p = heartbeatPath(stateDir, pid, bootId, readId);
94
+ const payload = {
95
+ epoch,
96
+ pid,
97
+ bootId,
98
+ readId,
99
+ startedAt: new Date().toISOString(),
100
+ meta,
101
+ };
102
+ fs.writeFileSync(p, JSON.stringify(payload));
103
+ return { pid, bootId, readId, path: p };
104
+ }
105
+
106
+ /**
107
+ * Drop the heartbeat. Plan § 8.1.1 step 2 requires every reader process
108
+ * to delete its file when the query completes.
109
+ *
110
+ * @param {string} stateDir
111
+ * @param {{pid:number, bootId:string, readId?:string, path?:string}|undefined} record
112
+ * Return value of beginRead.
113
+ */
114
+ export function endRead(stateDir, record) {
115
+ const pid = record?.pid ?? process.pid;
116
+ const bootId = record?.bootId ?? bootIdStub();
117
+ const p = typeof record?.path === 'string'
118
+ ? record.path
119
+ : heartbeatPath(stateDir, pid, bootId, record?.readId ?? null);
120
+ try {
121
+ fs.unlinkSync(p);
122
+ } catch {
123
+ // Ignore — the heartbeat may have been swept by the maintenance scheduler.
124
+ }
125
+ }
126
+
127
+ /**
128
+ * Best-effort liveness check. Returns true when the process is still
129
+ * running and matches the recorded boot id.
130
+ *
131
+ * @param {number} pid
132
+ * @param {string} bootId
133
+ */
134
+ export function isReaderAlive(pid, bootId) {
135
+ if (!Number.isInteger(pid) || pid <= 0) return false;
136
+ if (bootId !== bootIdStub()) return false;
137
+ try {
138
+ process.kill(pid, 0);
139
+ return true;
140
+ } catch (err) {
141
+ return err.code === 'EPERM';
142
+ }
143
+ }
144
+
145
+ /**
146
+ * Sweep stale heartbeats. Returns the surviving records sorted by
147
+ * pinned epoch.
148
+ *
149
+ * @param {string} stateDir
150
+ * @returns {Array<{epoch:number, pid:number, bootId:string, readId?:string, startedAt:string, meta:object}>}
151
+ */
152
+ export function liveReaders(stateDir) {
153
+ const dir = heartbeatDir(stateDir);
154
+ if (!fs.existsSync(dir)) return [];
155
+ const out = [];
156
+ for (const name of fs.readdirSync(dir)) {
157
+ const p = path.join(dir, name);
158
+ let payload;
159
+ try {
160
+ payload = JSON.parse(fs.readFileSync(p, 'utf-8'));
161
+ } catch {
162
+ // Malformed — drop after grace.
163
+ tryUnlinkAfterGrace(p);
164
+ continue;
165
+ }
166
+ if (!Number.isInteger(payload.epoch) || !Number.isInteger(payload.pid) || typeof payload.bootId !== 'string') {
167
+ tryUnlinkAfterGrace(p);
168
+ continue;
169
+ }
170
+ if (!isReaderAlive(payload.pid, payload.bootId)) {
171
+ tryUnlinkAfterGrace(p);
172
+ continue;
173
+ }
174
+ out.push(payload);
175
+ }
176
+ return out.sort((a, b) => a.epoch - b.epoch);
177
+ }
178
+
179
+ function tryUnlinkAfterGrace(p) {
180
+ try {
181
+ const stat = fs.statSync(p);
182
+ const ageMs = Date.now() - stat.mtimeMs;
183
+ if (ageMs > READER_GRACE_MS) fs.unlinkSync(p);
184
+ } catch {
185
+ // ignore
186
+ }
187
+ }
188
+
189
+ /**
190
+ * Compute the prune frontier — the minimum epoch any live reader pins.
191
+ * Returns `null` when no live readers exist, meaning all retired rows
192
+ * older than the current epoch are eligible for prune.
193
+ *
194
+ * @param {string} stateDir
195
+ * @returns {number|null}
196
+ */
197
+ export function minLiveEpoch(stateDir) {
198
+ const live = liveReaders(stateDir);
199
+ if (live.length === 0) return null;
200
+ return live[0].epoch;
201
+ }
@@ -0,0 +1,257 @@
1
+ /**
2
+ * Schema migrations for the incremental-indexing bounded context.
3
+ *
4
+ * Plan § 7.1.6, § 7.2, § 13 Phase 1, § 33 Phase 1 Pre-Merge Checklist.
5
+ *
6
+ * Adds the strict-row-visibility and exact-encoder-input columns required by
7
+ * the reconcile path. All columns carry `DEFAULT` clauses so an older daemon
8
+ * running the original INSERT path (e.g. after a git rollback) does NOT crash
9
+ * with `SQLITE_CONSTRAINT_NOTNULL`. This is load-bearing: without the
10
+ * defaults a rollback would put the daemon into a permanent crash-loop.
11
+ *
12
+ * The migrations are idempotent: `ALTER TABLE ... ADD COLUMN` is skipped when
13
+ * the column already exists, mirroring the pattern in
14
+ * `core/graph/graph-extractor.js`.
15
+ *
16
+ * Index choice: the epoch visibility index is a full B-tree on
17
+ * `epoch_written` rather than a partial recent-window index. Plan § 0 / § 36.5
18
+ * requires Phase 0 benchmarking before committing to the partial form;
19
+ * SQLite's single-writer monotonic-integer append is already a fast path, so
20
+ * we default to the full B-tree and revisit only if Phase 0 measurement shows
21
+ * insertion latency creep.
22
+ */
23
+
24
+ /**
25
+ * Add a column only if it is missing, in a transaction-safe way.
26
+ * Helper kept private to this module.
27
+ *
28
+ * @param {import('better-sqlite3').Database} db
29
+ * @param {string} table
30
+ * @param {string} column
31
+ * @param {string} definition e.g. `"TEXT NOT NULL DEFAULT ''"`.
32
+ * @returns {boolean} true if the column was added; false if it already existed.
33
+ */
34
+ function addColumnIfMissing(db, table, column, definition) {
35
+ const cols = db.prepare(`PRAGMA table_info(${table})`).all();
36
+ if (cols.some((c) => c.name === column)) return false;
37
+ db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`);
38
+ return true;
39
+ }
40
+
41
+ function createIndexIfMissing(db, indexName, sql) {
42
+ db.exec(sql);
43
+ // SQLite's CREATE INDEX IF NOT EXISTS already covers idempotence; this
44
+ // helper exists for symmetry with addColumnIfMissing in case we ever need
45
+ // to wrap with logging.
46
+ void indexName;
47
+ }
48
+
49
+ /**
50
+ * Apply the reconcile-v2 vectors-table migration.
51
+ *
52
+ * Adds (plan § 7.2):
53
+ * - `chunk_struct_id TEXT NOT NULL DEFAULT ''`
54
+ * - `chunk_text_hash TEXT NOT NULL DEFAULT ''`
55
+ * - `embedding_input_hash TEXT NOT NULL DEFAULT ''`
56
+ * - `li_input_hash TEXT NOT NULL DEFAULT ''`
57
+ * - `metadata_fingerprint TEXT NOT NULL DEFAULT ''`
58
+ * - `logical_chunk_id TEXT NOT NULL DEFAULT ''`
59
+ * - `epoch_written INTEGER NOT NULL DEFAULT 0`
60
+ * - `epoch_retired INTEGER`
61
+ *
62
+ * Plus the epoch visibility index.
63
+ *
64
+ * Idempotent. Safe to call on every daemon start.
65
+ *
66
+ * @param {import('better-sqlite3').Database} db
67
+ * @returns {{added: string[]}}
68
+ */
69
+ export function migrateVectorsSchema(db) {
70
+ const added = [];
71
+ const columns = [
72
+ ['chunk_struct_id', "TEXT NOT NULL DEFAULT ''"],
73
+ ['chunk_text_hash', "TEXT NOT NULL DEFAULT ''"],
74
+ ['embedding_input_hash', "TEXT NOT NULL DEFAULT ''"],
75
+ ['li_input_hash', "TEXT NOT NULL DEFAULT ''"],
76
+ ['metadata_fingerprint', "TEXT NOT NULL DEFAULT ''"],
77
+ ['logical_chunk_id', "TEXT NOT NULL DEFAULT ''"],
78
+ ['epoch_written', 'INTEGER NOT NULL DEFAULT 0'],
79
+ ['epoch_retired', 'INTEGER'],
80
+ ];
81
+ // The `vectors` table is created lazily by `createVectorSchema` in
82
+ // `core/indexing/indexer-build.js`. We assume the caller has invoked that
83
+ // already; otherwise the PRAGMA call returns empty and addColumnIfMissing
84
+ // would throw on the ALTER. The reconciler always seeds the schema first.
85
+ const hasTable = db.prepare(
86
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors'",
87
+ ).get();
88
+ if (!hasTable) return { added };
89
+
90
+ for (const [col, defn] of columns) {
91
+ if (addColumnIfMissing(db, 'vectors', col, defn)) added.push(col);
92
+ }
93
+
94
+ createIndexIfMissing(
95
+ db,
96
+ 'idx_vectors_struct',
97
+ 'CREATE INDEX IF NOT EXISTS idx_vectors_struct ON vectors(chunk_struct_id) WHERE chunk_struct_id != \'\'',
98
+ );
99
+ createIndexIfMissing(
100
+ db,
101
+ 'idx_vectors_epoch_written',
102
+ 'CREATE INDEX IF NOT EXISTS idx_vectors_epoch_written ON vectors(epoch_written)',
103
+ );
104
+ createIndexIfMissing(
105
+ db,
106
+ 'idx_vectors_epoch_retired',
107
+ 'CREATE INDEX IF NOT EXISTS idx_vectors_epoch_retired ON vectors(epoch_retired) WHERE epoch_retired IS NOT NULL',
108
+ );
109
+ createIndexIfMissing(
110
+ db,
111
+ 'idx_vectors_logical',
112
+ 'CREATE INDEX IF NOT EXISTS idx_vectors_logical ON vectors(logical_chunk_id) WHERE logical_chunk_id != \'\'',
113
+ );
114
+ return { added };
115
+ }
116
+
117
+ /**
118
+ * Apply the reconcile-v2 entities-table migration.
119
+ *
120
+ * Adds (plan § 7.1.6):
121
+ * - `logical_entity_id TEXT NOT NULL DEFAULT ''`
122
+ * - `epoch_written INTEGER NOT NULL DEFAULT 0`
123
+ * - `epoch_retired INTEGER`
124
+ *
125
+ * The `stale_since` column is already present; we keep it for compatibility
126
+ * with existing soft-delete behaviour in `core/graph/graph-extractor.js`.
127
+ *
128
+ * @param {import('better-sqlite3').Database} db
129
+ * @returns {{added: string[]}}
130
+ */
131
+ export function migrateEntitiesSchema(db) {
132
+ const added = [];
133
+ const hasTable = db.prepare(
134
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name='entities'",
135
+ ).get();
136
+ if (!hasTable) return { added };
137
+
138
+ if (addColumnIfMissing(db, 'entities', 'logical_entity_id', "TEXT NOT NULL DEFAULT ''")) {
139
+ added.push('logical_entity_id');
140
+ }
141
+ if (addColumnIfMissing(db, 'entities', 'epoch_written', 'INTEGER NOT NULL DEFAULT 0')) {
142
+ added.push('epoch_written');
143
+ }
144
+ if (addColumnIfMissing(db, 'entities', 'epoch_retired', 'INTEGER')) {
145
+ added.push('epoch_retired');
146
+ }
147
+ createIndexIfMissing(
148
+ db,
149
+ 'idx_entities_logical',
150
+ 'CREATE INDEX IF NOT EXISTS idx_entities_logical ON entities(logical_entity_id) WHERE logical_entity_id != \'\'',
151
+ );
152
+ createIndexIfMissing(
153
+ db,
154
+ 'idx_entities_epoch_written',
155
+ 'CREATE INDEX IF NOT EXISTS idx_entities_epoch_written ON entities(epoch_written)',
156
+ );
157
+ createIndexIfMissing(
158
+ db,
159
+ 'idx_entities_epoch_retired',
160
+ 'CREATE INDEX IF NOT EXISTS idx_entities_epoch_retired ON entities(epoch_retired) WHERE epoch_retired IS NOT NULL',
161
+ );
162
+ return { added };
163
+ }
164
+
165
+ /**
166
+ * Apply the reconcile-v2 relationships-table migration.
167
+ *
168
+ * Adds (plan § 7.1.6 / § 33):
169
+ * - `logical_relationship_id TEXT NOT NULL DEFAULT ''`
170
+ * - `epoch_written INTEGER NOT NULL DEFAULT 0`
171
+ * - `epoch_retired INTEGER`
172
+ *
173
+ * @param {import('better-sqlite3').Database} db
174
+ * @returns {{added: string[]}}
175
+ */
176
+ export function migrateRelationshipsSchema(db) {
177
+ const added = [];
178
+ const hasTable = db.prepare(
179
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name='relationships'",
180
+ ).get();
181
+ if (!hasTable) return { added };
182
+
183
+ if (addColumnIfMissing(db, 'relationships', 'logical_relationship_id', "TEXT NOT NULL DEFAULT ''")) {
184
+ added.push('logical_relationship_id');
185
+ }
186
+ if (addColumnIfMissing(db, 'relationships', 'epoch_written', 'INTEGER NOT NULL DEFAULT 0')) {
187
+ added.push('epoch_written');
188
+ }
189
+ if (addColumnIfMissing(db, 'relationships', 'epoch_retired', 'INTEGER')) {
190
+ added.push('epoch_retired');
191
+ }
192
+ createIndexIfMissing(
193
+ db,
194
+ 'idx_rel_logical',
195
+ 'CREATE INDEX IF NOT EXISTS idx_rel_logical ON relationships(logical_relationship_id) WHERE logical_relationship_id != \'\'',
196
+ );
197
+ createIndexIfMissing(
198
+ db,
199
+ 'idx_rel_epoch_written',
200
+ 'CREATE INDEX IF NOT EXISTS idx_rel_epoch_written ON relationships(epoch_written)',
201
+ );
202
+ createIndexIfMissing(
203
+ db,
204
+ 'idx_rel_epoch_retired',
205
+ 'CREATE INDEX IF NOT EXISTS idx_rel_epoch_retired ON relationships(epoch_retired) WHERE epoch_retired IS NOT NULL',
206
+ );
207
+ return { added };
208
+ }
209
+
210
+ /**
211
+ * Apply the encoder-input dependency sidecar (plan § 7.2.1 / § 13 Phase 1).
212
+ *
213
+ * Stores reverse dependencies from external facts to dependent chunks:
214
+ * `(dependency_key, file_path, chunk_struct_id, consumer)`
215
+ *
216
+ * `consumer` is one of `dense | li | dedup` so the reconciler can mark a
217
+ * chunk metadata-dirty for the specific consumer whose input changed.
218
+ * Future cross-file metadata rules register dependency keys whose changes
219
+ * expand the dirty set; the table also holds the dense / LI / dedup
220
+ * dependencies for same-file metadata edits.
221
+ *
222
+ * @param {import('better-sqlite3').Database} db
223
+ */
224
+ export function ensureEncoderDepsSchema(db) {
225
+ db.exec(`
226
+ CREATE TABLE IF NOT EXISTS encoder_input_dependencies (
227
+ dependency_key TEXT NOT NULL,
228
+ file_path TEXT NOT NULL,
229
+ chunk_struct_id TEXT NOT NULL,
230
+ consumer TEXT NOT NULL CHECK (consumer IN ('dense', 'li', 'dedup')),
231
+ PRIMARY KEY (dependency_key, file_path, chunk_struct_id, consumer)
232
+ ) WITHOUT ROWID;
233
+ `);
234
+ db.exec(`
235
+ CREATE INDEX IF NOT EXISTS idx_encoder_deps_by_chunk
236
+ ON encoder_input_dependencies (file_path, chunk_struct_id);
237
+ `);
238
+ }
239
+
240
+ /**
241
+ * Run every reconcile-v2 schema migration against the given database.
242
+ *
243
+ * The migrations are split by destination table so callers that only need
244
+ * one can call the focused helper, but the umbrella is the common path for
245
+ * the reconcile bootstrap.
246
+ *
247
+ * @param {{ codeGraph: import('better-sqlite3').Database, vectors: import('better-sqlite3').Database }} dbs
248
+ * @returns {{vectors:{added:string[]}, entities:{added:string[]}, relationships:{added:string[]}}}
249
+ */
250
+ export function applyReconcileSchemaMigrations(dbs) {
251
+ const { codeGraph, vectors } = dbs;
252
+ const vRes = migrateVectorsSchema(vectors);
253
+ const eRes = migrateEntitiesSchema(codeGraph);
254
+ const rRes = migrateRelationshipsSchema(codeGraph);
255
+ ensureEncoderDepsSchema(codeGraph);
256
+ return { vectors: vRes, entities: eRes, relationships: rRes };
257
+ }