sweet-search 2.5.2 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -0,0 +1,137 @@
1
+ /**
2
+ * Shared, idempotent launcher for the reconcile-v2 incremental-index maintainer.
3
+ *
4
+ * This is the single place that knows how to start the maintainer daemon. It is
5
+ * called from every entry point that should keep the index fresh:
6
+ * - the warm search-server startup (`core/search/search-server.js`) — the
7
+ * durable, non-MCP guarantee that any normal `sweet-search` use starts the
8
+ * maintainer;
9
+ * - the Claude/Codex SessionStart prewarm hook (best-effort convenience);
10
+ * - the MCP server startup, *only* when MCP is actually enabled/used.
11
+ *
12
+ * Design contract (all required by the durable-startup spec):
13
+ * - respects the default-on opt-out (`SWEET_SEARCH_RECONCILE_V2=0|false|off`)
14
+ * via the canonical `reconcileEnablement` policy;
15
+ * - skips when the project has no `.sweet-search` state dir (nothing to
16
+ * maintain yet);
17
+ * - skips when a *live* maintainer already holds `index-maintainer.lock`;
18
+ * - relies on the daemon's own `O_EXCL` state lock as the HARD no-duplicate
19
+ * guarantee — the liveness probe here is only an optimization so we don't
20
+ * spawn a process that would immediately exit;
21
+ * - spawns fully detached with the right cwd and a pinned
22
+ * `SWEET_SEARCH_PROJECT_ROOT` (the maintainer's package copy resolves its
23
+ * own PROJECT_ROOT from `__dirname`, so the env pin is load-bearing);
24
+ * - returns quickly (a few `fs` stats + a detached spawn);
25
+ * - is stdout-clean — it NEVER writes to stdout, only to stderr when verbose,
26
+ * so machine-readable commands that call it stay parseable;
27
+ * - is safe to call often (repeated calls are cheap no-ops once a maintainer
28
+ * is up).
29
+ */
30
+
31
+ import { spawn } from 'node:child_process';
32
+ import { existsSync, readFileSync } from 'node:fs';
33
+ import { dirname, join } from 'node:path';
34
+ import { fileURLToPath } from 'node:url';
35
+ import { reconcileEnablement } from '../incremental-indexing/domain/interval-autotune.mjs';
36
+
37
+ const __dirname = dirname(fileURLToPath(import.meta.url));
38
+
39
+ export const MAINTAINER_LOCK_FILENAME = 'index-maintainer.lock';
40
+
41
+ /** Default maintainer entry: the sibling daemon in this same context. */
42
+ export function defaultMaintainerEntry() {
43
+ return join(__dirname, 'index-maintainer.mjs');
44
+ }
45
+
46
+ /** Does a process with this pid exist right now? EPERM (alien owner) = alive. */
47
+ function pidAlive(pid) {
48
+ if (!Number.isFinite(pid) || pid <= 0) return false;
49
+ try {
50
+ process.kill(pid, 0);
51
+ return true;
52
+ } catch (err) {
53
+ return err.code === 'EPERM';
54
+ }
55
+ }
56
+
57
+ /** Resolve the reconcile state dir the same way the daemon's v2 context does. */
58
+ export function resolveStateDir(env = process.env, cwd = process.cwd()) {
59
+ if (env.SWEET_SEARCH_STATE_DIR) return env.SWEET_SEARCH_STATE_DIR;
60
+ const root = env.SWEET_SEARCH_PROJECT_ROOT || cwd;
61
+ return join(root, '.sweet-search');
62
+ }
63
+
64
+ /** True when a live maintainer already holds the lock for this state dir. */
65
+ export function maintainerAlive(stateDir) {
66
+ const lockFile = join(stateDir, MAINTAINER_LOCK_FILENAME);
67
+ if (!existsSync(lockFile)) return false;
68
+ try {
69
+ const { pid } = JSON.parse(readFileSync(lockFile, 'utf-8'));
70
+ return pidAlive(Number(pid));
71
+ } catch {
72
+ // Unreadable/corrupt lock → treat as not-alive; the daemon's O_EXCL
73
+ // acquire + stale-lock reclaim handle the real arbitration on spawn.
74
+ return false;
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Start the maintainer if it should run and isn't already running.
80
+ *
81
+ * @param {{
82
+ * env?: NodeJS.ProcessEnv,
83
+ * cwd?: string,
84
+ * verbose?: boolean,
85
+ * maintainerEntry?: string,
86
+ * log?: (msg: string) => void,
87
+ * }} [options]
88
+ * @returns {{spawned: boolean, reason: 'opted-out'|'entry-missing'|'no-state-dir'|'already-running'|'spawned'|'error', pid?: number, stateDir?: string, error?: string}}
89
+ */
90
+ export function launchMaintainer(options = {}) {
91
+ const env = options.env || process.env;
92
+ const cwd = options.cwd || process.cwd();
93
+ const verbose = options.verbose ?? !!env.SWEET_SEARCH_PREWARM_VERBOSE;
94
+ // stdout-clean: log only to stderr, only when verbose.
95
+ const log = options.log
96
+ || ((msg) => { if (verbose) process.stderr.write(`[sweet-search maintainer-launch] ${msg}\n`); });
97
+
98
+ const maintainerEntry = options.maintainerEntry
99
+ || env.SWEET_SEARCH_MAINTAINER_ENTRY
100
+ || defaultMaintainerEntry();
101
+
102
+ if (!reconcileEnablement(env).enabled) {
103
+ log('disabled via SWEET_SEARCH_RECONCILE_V2 opt-out');
104
+ return { spawned: false, reason: 'opted-out' };
105
+ }
106
+ if (!existsSync(maintainerEntry)) {
107
+ log(`maintainer entry missing: ${maintainerEntry}`);
108
+ return { spawned: false, reason: 'entry-missing' };
109
+ }
110
+ const stateDir = resolveStateDir(env, cwd);
111
+ if (!existsSync(stateDir)) {
112
+ log(`no index state dir (${stateDir}); skipping (run sweet-search index first)`);
113
+ return { spawned: false, reason: 'no-state-dir', stateDir };
114
+ }
115
+ if (maintainerAlive(stateDir)) {
116
+ log('maintainer already running for this state dir');
117
+ return { spawned: false, reason: 'already-running', stateDir };
118
+ }
119
+
120
+ try {
121
+ const child = spawn(process.execPath, [maintainerEntry], {
122
+ detached: true,
123
+ stdio: 'ignore',
124
+ cwd,
125
+ env: {
126
+ ...env,
127
+ SWEET_SEARCH_PROJECT_ROOT: env.SWEET_SEARCH_PROJECT_ROOT || cwd,
128
+ },
129
+ });
130
+ child.unref();
131
+ log(`maintainer spawned (pid ${child.pid}, detached)`);
132
+ return { spawned: true, reason: 'spawned', pid: child.pid, stateDir };
133
+ } catch (err) {
134
+ log(`maintainer spawn failed (non-fatal): ${err?.message || err}`);
135
+ return { spawned: false, reason: 'error', stateDir, error: err?.message || String(err) };
136
+ }
137
+ }
@@ -1,244 +1,251 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * Merkle Tree-Based File Tracking for Incremental Indexing
5
- *
6
- * Tracks file content hashes and chunk IDs to enable efficient change detection
7
- * and selective re-indexing. Supports atomic updates and persistent state.
8
- */
9
-
10
- import { readFile, writeFile, mkdir } from 'fs/promises';
11
- import { createHash } from 'crypto';
12
- import { dirname, resolve } from 'path';
13
- import { existsSync } from 'fs';
14
-
15
- export class MerkleTracker {
16
- constructor(statePath = '.sweet-search/merkle/codebase-state.json') {
17
- this.statePath = resolve(statePath);
18
- this.state = {
19
- version: 1,
20
- lastFullIndex: null,
21
- lastIncrementalIndex: null,
22
- stats: {
23
- totalFiles: 0,
24
- totalChunks: 0,
25
- lastDuration: 0
26
- },
27
- files: {}
28
- };
29
- }
30
-
31
- async load() {
32
- try {
33
- if (existsSync(this.statePath)) {
34
- const content = await readFile(this.statePath, 'utf8');
35
- const loaded = JSON.parse(content);
36
-
37
- if (loaded.version !== 1) {
38
- console.warn(`Unknown state version ${loaded.version}, starting fresh`);
39
- return;
40
- }
41
-
42
- this.state = loaded;
43
- console.log(`Loaded tracking state: ${this.state.stats.totalFiles} files, ${this.state.stats.totalChunks} chunks`);
44
- } else {
45
- console.log('No existing state found, starting fresh');
46
- }
47
- } catch (error) {
48
- console.error('Failed to load tracking state:', error.message);
49
- console.log('Starting with fresh state');
50
- }
51
- }
52
-
53
- async save() {
54
- try {
55
- const dir = dirname(this.statePath);
56
- await mkdir(dir, { recursive: true });
57
-
58
- const tmpPath = `${this.statePath}.tmp`;
59
- const content = JSON.stringify(this.state, null, 2);
60
- await writeFile(tmpPath, content, 'utf8');
61
- await writeFile(this.statePath, content, 'utf8');
62
-
63
- try {
64
- const { unlink } = await import('fs/promises');
65
- if (existsSync(tmpPath)) {
66
- await unlink(tmpPath);
67
- }
68
- } catch (e) {}
69
-
70
- console.log(`Saved tracking state: ${this.state.stats.totalFiles} files, ${this.state.stats.totalChunks} chunks`);
71
- } catch (error) {
72
- console.error('Failed to save tracking state:', error.message);
73
- throw error;
74
- }
75
- }
76
-
77
- needsReindex(filePath, contentHash) {
78
- const tracked = this.state.files[filePath];
79
- if (!tracked) return true;
80
- if (tracked.contentHash !== contentHash) return true;
81
- return false;
82
- }
83
-
84
- async findChangedFiles(allFiles) {
85
- const changed = [];
86
- for (const filePath of allFiles) {
87
- try {
88
- const content = await readFile(filePath, 'utf8');
89
- const hash = MerkleTracker.computeHash(content);
90
- if (this.needsReindex(filePath, hash)) {
91
- changed.push(filePath);
92
- }
93
- } catch (err) {
94
- changed.push(filePath);
95
- }
96
- }
97
- return changed;
98
- }
99
-
100
- getChunkIds(filePath) {
101
- const tracked = this.state.files[filePath];
102
- return tracked ? tracked.chunkIds : [];
103
- }
104
-
105
- async updateFile(filePath, contentHash = null, chunkIds = [], size = 0) {
106
- const now = new Date().toISOString();
107
-
108
- if (!contentHash) {
109
- try {
110
- const content = await readFile(filePath, 'utf8');
111
- contentHash = MerkleTracker.computeHash(content);
112
- size = content.length;
113
- } catch (err) {
114
- contentHash = 'unknown';
115
- }
116
- }
117
-
118
- this.state.files[filePath] = {
119
- contentHash,
120
- lastIndexed: now,
121
- chunkIds,
122
- chunkCount: chunkIds.length,
123
- size
124
- };
125
-
126
- this.state.lastIncrementalIndex = now;
127
- this._recalculateStats();
128
- }
129
-
130
- removeFile(filePath) {
131
- const chunkIds = this.getChunkIds(filePath);
132
- delete this.state.files[filePath];
133
- this._recalculateStats();
134
- return chunkIds;
135
- }
136
-
137
- getTrackedFiles() {
138
- return Object.keys(this.state.files);
139
- }
140
-
141
- getStats() {
142
- return { ...this.state.stats };
143
- }
144
-
145
- getSummary() {
146
- const fileCount = Object.keys(this.state.files).length;
147
- const totalChunks = Object.values(this.state.files).reduce(
148
- (sum, file) => sum + file.chunkCount,
149
- 0
150
- );
151
- const totalSize = Object.values(this.state.files).reduce(
152
- (sum, file) => sum + (file.size || 0),
153
- 0
154
- );
155
-
156
- return {
157
- fileCount,
158
- totalChunks,
159
- totalSize,
160
- lastFullIndex: this.state.lastFullIndex,
161
- lastIncrementalIndex: this.state.lastIncrementalIndex,
162
- lastDuration: this.state.stats.lastDuration
163
- };
164
- }
165
-
166
- clear() {
167
- this.state = {
168
- version: 1,
169
- lastFullIndex: null,
170
- lastIncrementalIndex: null,
171
- stats: {
172
- totalFiles: 0,
173
- totalChunks: 0,
174
- lastDuration: 0
175
- },
176
- files: {}
177
- };
178
- }
179
-
180
- _recalculateStats() {
181
- const files = Object.values(this.state.files);
182
- this.state.stats.totalFiles = files.length;
183
- this.state.stats.totalChunks = files.reduce(
184
- (sum, file) => sum + file.chunkCount,
185
- 0
186
- );
187
- }
188
-
189
- static computeHash(content) {
190
- return createHash('sha256')
191
- .update(content)
192
- .digest('hex')
193
- .slice(0, 16);
194
- }
195
-
196
- static async computeFileHash(filePath) {
197
- const content = await readFile(filePath, 'utf8');
198
- return MerkleTracker.computeHash(content);
199
- }
200
- }
201
-
202
- // CLI Interface
203
- if (import.meta.url === `file://${process.argv[1]}`) {
204
- const tracker = new MerkleTracker();
205
- await tracker.load();
206
-
207
- const command = process.argv[2];
208
-
209
- switch (command) {
210
- case 'stats':
211
- console.log(JSON.stringify(tracker.getSummary(), null, 2));
212
- break;
213
-
214
- case 'list':
215
- console.log(tracker.getTrackedFiles().join('\n'));
216
- break;
217
-
218
- case 'clear':
219
- tracker.clear();
220
- await tracker.save();
221
- console.log('Tracking state cleared');
222
- break;
223
-
224
- case 'inspect':
225
- if (!process.argv[3]) {
226
- console.error('Usage: merkle-tracker.js inspect <file-path>');
227
- process.exit(1);
228
- }
229
- const metadata = tracker.state.files[process.argv[3]];
230
- console.log(JSON.stringify(metadata, null, 2));
231
- break;
232
-
233
- default:
234
- console.log(`
235
- Merkle File Tracker CLI
236
-
237
- Usage:
238
- merkle-tracker.js stats Show tracking statistics
239
- merkle-tracker.js list List all tracked files
240
- merkle-tracker.js inspect <file> Show metadata for a file
241
- merkle-tracker.js clear Clear all tracking state
242
- `);
243
- }
244
- }
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Merkle Tree-Based File Tracking for Incremental Indexing
5
+ *
6
+ * Tracks file content hashes and chunk IDs to enable efficient change detection
7
+ * and selective re-indexing. Supports atomic updates and persistent state.
8
+ */
9
+
10
+ import { readFile, mkdir, open, rename, unlink } from 'fs/promises';
11
+ import { dirname, resolve } from 'path';
12
+ import { existsSync } from 'fs';
13
+ import { contentHashSync, HASH_ALGORITHM } from '../incremental-indexing/infrastructure/hashing.mjs';
14
+
15
+ export class MerkleTracker {
16
+ constructor(statePath = '.sweet-search/merkle/codebase-state.json') {
17
+ this.statePath = resolve(statePath);
18
+ this.state = {
19
+ version: 2,
20
+ hashAlgorithm: HASH_ALGORITHM,
21
+ lastFullIndex: null,
22
+ lastIncrementalIndex: null,
23
+ stats: {
24
+ totalFiles: 0,
25
+ totalChunks: 0,
26
+ lastDuration: 0
27
+ },
28
+ files: {}
29
+ };
30
+ }
31
+
32
+ async load() {
33
+ try {
34
+ if (existsSync(this.statePath)) {
35
+ const content = await readFile(this.statePath, 'utf8');
36
+ const loaded = JSON.parse(content);
37
+
38
+ if (loaded.version !== 2 || loaded.hashAlgorithm !== HASH_ALGORITHM) {
39
+ console.warn(`Unknown state version ${loaded.version}, starting fresh`);
40
+ return;
41
+ }
42
+
43
+ this.state = loaded;
44
+ console.log(`Loaded tracking state: ${this.state.stats.totalFiles} files, ${this.state.stats.totalChunks} chunks`);
45
+ } else {
46
+ console.log('No existing state found, starting fresh');
47
+ }
48
+ } catch (error) {
49
+ console.error('Failed to load tracking state:', error.message);
50
+ console.log('Starting with fresh state');
51
+ }
52
+ }
53
+
54
+ async save() {
55
+ try {
56
+ const dir = dirname(this.statePath);
57
+ await mkdir(dir, { recursive: true });
58
+
59
+ const tmpPath = `${this.statePath}.tmp`;
60
+ const content = JSON.stringify(this.state, null, 2);
61
+ let handle = await open(tmpPath, 'w');
62
+ try {
63
+ await handle.writeFile(content, 'utf8');
64
+ await handle.sync();
65
+ } finally {
66
+ await handle.close();
67
+ }
68
+ await rename(tmpPath, this.statePath);
69
+ try {
70
+ handle = await open(dir, 'r');
71
+ try { await handle.sync(); } finally { await handle.close(); }
72
+ } catch {
73
+ // Some container/tmpfs filesystems reject directory fsync.
74
+ }
75
+
76
+ console.log(`Saved tracking state: ${this.state.stats.totalFiles} files, ${this.state.stats.totalChunks} chunks`);
77
+ } catch (error) {
78
+ try {
79
+ await unlink(`${this.statePath}.tmp`);
80
+ } catch {}
81
+ console.error('Failed to save tracking state:', error.message);
82
+ throw error;
83
+ }
84
+ }
85
+
86
+ needsReindex(filePath, contentHash) {
87
+ const tracked = this.state.files[filePath];
88
+ if (!tracked) return true;
89
+ if (tracked.contentHash !== contentHash) return true;
90
+ return false;
91
+ }
92
+
93
+ async findChangedFiles(allFiles) {
94
+ const changed = [];
95
+ for (const filePath of allFiles) {
96
+ try {
97
+ const content = await readFile(filePath);
98
+ const hash = MerkleTracker.computeHash(content);
99
+ if (this.needsReindex(filePath, hash)) {
100
+ changed.push(filePath);
101
+ }
102
+ } catch (err) {
103
+ changed.push(filePath);
104
+ }
105
+ }
106
+ return changed;
107
+ }
108
+
109
+ getChunkIds(filePath) {
110
+ const tracked = this.state.files[filePath];
111
+ return tracked ? tracked.chunkIds : [];
112
+ }
113
+
114
+ async updateFile(filePath, contentHash = null, chunkIds = [], size = 0) {
115
+ const now = new Date().toISOString();
116
+
117
+ if (!contentHash) {
118
+ try {
119
+ const content = await readFile(filePath);
120
+ contentHash = MerkleTracker.computeHash(content);
121
+ size = content.length;
122
+ } catch (err) {
123
+ contentHash = 'unknown';
124
+ }
125
+ }
126
+
127
+ this.state.files[filePath] = {
128
+ contentHash,
129
+ lastIndexed: now,
130
+ chunkIds,
131
+ chunkCount: chunkIds.length,
132
+ size
133
+ };
134
+
135
+ this.state.lastIncrementalIndex = now;
136
+ this._recalculateStats();
137
+ }
138
+
139
+ removeFile(filePath) {
140
+ const chunkIds = this.getChunkIds(filePath);
141
+ delete this.state.files[filePath];
142
+ this._recalculateStats();
143
+ return chunkIds;
144
+ }
145
+
146
+ getTrackedFiles() {
147
+ return Object.keys(this.state.files);
148
+ }
149
+
150
+ getStats() {
151
+ return { ...this.state.stats };
152
+ }
153
+
154
+ getSummary() {
155
+ const fileCount = Object.keys(this.state.files).length;
156
+ const totalChunks = Object.values(this.state.files).reduce(
157
+ (sum, file) => sum + file.chunkCount,
158
+ 0
159
+ );
160
+ const totalSize = Object.values(this.state.files).reduce(
161
+ (sum, file) => sum + (file.size || 0),
162
+ 0
163
+ );
164
+
165
+ return {
166
+ fileCount,
167
+ totalChunks,
168
+ totalSize,
169
+ lastFullIndex: this.state.lastFullIndex,
170
+ lastIncrementalIndex: this.state.lastIncrementalIndex,
171
+ lastDuration: this.state.stats.lastDuration
172
+ };
173
+ }
174
+
175
+ clear() {
176
+ this.state = {
177
+ version: 2,
178
+ hashAlgorithm: HASH_ALGORITHM,
179
+ lastFullIndex: null,
180
+ lastIncrementalIndex: null,
181
+ stats: {
182
+ totalFiles: 0,
183
+ totalChunks: 0,
184
+ lastDuration: 0
185
+ },
186
+ files: {}
187
+ };
188
+ }
189
+
190
+ _recalculateStats() {
191
+ const files = Object.values(this.state.files);
192
+ this.state.stats.totalFiles = files.length;
193
+ this.state.stats.totalChunks = files.reduce(
194
+ (sum, file) => sum + file.chunkCount,
195
+ 0
196
+ );
197
+ }
198
+
199
+ static computeHash(content) {
200
+ return contentHashSync(content);
201
+ }
202
+
203
+ static async computeFileHash(filePath) {
204
+ const content = await readFile(filePath);
205
+ return MerkleTracker.computeHash(content);
206
+ }
207
+ }
208
+
209
+ // CLI Interface
210
+ if (import.meta.url === `file://${process.argv[1]}`) {
211
+ const tracker = new MerkleTracker();
212
+ await tracker.load();
213
+
214
+ const command = process.argv[2];
215
+
216
+ switch (command) {
217
+ case 'stats':
218
+ console.log(JSON.stringify(tracker.getSummary(), null, 2));
219
+ break;
220
+
221
+ case 'list':
222
+ console.log(tracker.getTrackedFiles().join('\n'));
223
+ break;
224
+
225
+ case 'clear':
226
+ tracker.clear();
227
+ await tracker.save();
228
+ console.log('Tracking state cleared');
229
+ break;
230
+
231
+ case 'inspect':
232
+ if (!process.argv[3]) {
233
+ console.error('Usage: merkle-tracker.js inspect <file-path>');
234
+ process.exit(1);
235
+ }
236
+ const metadata = tracker.state.files[process.argv[3]];
237
+ console.log(JSON.stringify(metadata, null, 2));
238
+ break;
239
+
240
+ default:
241
+ console.log(`
242
+ Merkle File Tracker CLI
243
+
244
+ Usage:
245
+ merkle-tracker.js stats Show tracking statistics
246
+ merkle-tracker.js list List all tracked files
247
+ merkle-tracker.js inspect <file> Show metadata for a file
248
+ merkle-tracker.js clear Clear all tracking state
249
+ `);
250
+ }
251
+ }