sweet-search 2.5.1 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/core/cli.js +45 -0
  2. package/core/embedding/embedding-cache.js +90 -4
  3. package/core/embedding/embedding-service.js +27 -5
  4. package/core/graph/graph-expansion.js +215 -36
  5. package/core/graph/graph-extractor.js +196 -11
  6. package/core/graph/graph-search.js +395 -92
  7. package/core/graph/hcgs-generator.js +2 -1
  8. package/core/graph/index.js +2 -0
  9. package/core/graph/repo-map.js +28 -6
  10. package/core/graph/structural-answer-cues.js +168 -0
  11. package/core/graph/structural-callsite-hints.js +40 -0
  12. package/core/graph/structural-context-format.js +40 -0
  13. package/core/graph/structural-context.js +450 -0
  14. package/core/graph/structural-forward-push.js +156 -0
  15. package/core/graph/structural-header-context.js +19 -0
  16. package/core/graph/structural-importance.js +148 -0
  17. package/core/graph/structural-pagerank.js +197 -0
  18. package/core/graph/summary-manager.js +13 -9
  19. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  20. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  21. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  22. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  23. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  24. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  25. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  26. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  27. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  28. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  29. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  30. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  31. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  32. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  33. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  34. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  35. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  36. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  37. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  38. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  39. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  40. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  41. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  42. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  43. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  44. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  45. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  46. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  47. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  48. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  49. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  50. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  51. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  52. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  53. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  54. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  55. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  56. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  57. package/core/indexing/admission-policy.js +139 -0
  58. package/core/indexing/artifact-builder.js +29 -12
  59. package/core/indexing/ast-chunker.js +107 -30
  60. package/core/indexing/dedup/exemplar-selector.js +19 -1
  61. package/core/indexing/gitignore-filter.js +223 -0
  62. package/core/indexing/incremental-tracker.js +99 -30
  63. package/core/indexing/index-codebase-v21.js +37 -7
  64. package/core/indexing/index-maintainer.mjs +698 -6
  65. package/core/indexing/indexer-ann.js +99 -15
  66. package/core/indexing/indexer-build.js +158 -45
  67. package/core/indexing/indexer-empty-baseline.js +80 -0
  68. package/core/indexing/indexer-manifest.js +66 -0
  69. package/core/indexing/indexer-phases.js +56 -23
  70. package/core/indexing/indexer-sparse-gram.js +54 -13
  71. package/core/indexing/indexer-utils.js +26 -208
  72. package/core/indexing/indexing-file-policy.js +32 -7
  73. package/core/indexing/maintainer-launcher.mjs +137 -0
  74. package/core/indexing/merkle-tracker.js +251 -244
  75. package/core/indexing/model-pool.js +46 -5
  76. package/core/infrastructure/code-graph-repository.js +758 -6
  77. package/core/infrastructure/code-graph-visibility.js +157 -0
  78. package/core/infrastructure/codebase-repository.js +100 -13
  79. package/core/infrastructure/config/search.js +1 -1
  80. package/core/infrastructure/db-utils.js +118 -0
  81. package/core/infrastructure/dedup-hashing.js +10 -13
  82. package/core/infrastructure/hardware-capability.js +17 -7
  83. package/core/infrastructure/index.js +10 -2
  84. package/core/infrastructure/init-config.js +138 -0
  85. package/core/infrastructure/language-patterns/maps.js +4 -1
  86. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  87. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  88. package/core/infrastructure/language-patterns.js +69 -0
  89. package/core/infrastructure/model-registry.js +20 -0
  90. package/core/infrastructure/native-inference.js +7 -12
  91. package/core/infrastructure/native-resolver.js +52 -37
  92. package/core/infrastructure/native-sparse-gram.js +261 -20
  93. package/core/infrastructure/native-tokenizer.js +6 -15
  94. package/core/infrastructure/simd-distance.js +10 -16
  95. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  96. package/core/infrastructure/structural-alias-resolver.js +122 -0
  97. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  98. package/core/infrastructure/structural-context-repository.js +472 -0
  99. package/core/infrastructure/structural-context-utils.js +51 -0
  100. package/core/infrastructure/structural-graph-signals.js +121 -0
  101. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  102. package/core/infrastructure/structural-source-definitions.js +100 -0
  103. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  104. package/core/infrastructure/tree-sitter-provider.js +811 -37
  105. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  106. package/core/query/query-router.js +55 -5
  107. package/core/ranking/file-kind-ranking.js +2192 -15
  108. package/core/ranking/late-interaction-index.js +87 -12
  109. package/core/search/cli-decoration.js +290 -0
  110. package/core/search/context-expander.js +988 -78
  111. package/core/search/index.js +1 -0
  112. package/core/search/output-policy.js +275 -0
  113. package/core/search/search-anchor.js +499 -0
  114. package/core/search/search-boost.js +93 -1
  115. package/core/search/search-cli.js +61 -204
  116. package/core/search/search-hybrid.js +250 -10
  117. package/core/search/search-pattern-chunks.js +57 -8
  118. package/core/search/search-pattern-planner.js +68 -9
  119. package/core/search/search-pattern-prefilter.js +30 -10
  120. package/core/search/search-pattern-ripgrep.js +40 -4
  121. package/core/search/search-pattern-sparse-overlay.js +256 -0
  122. package/core/search/search-pattern.js +117 -29
  123. package/core/search/search-postprocess.js +479 -5
  124. package/core/search/search-read-semantic.js +277 -23
  125. package/core/search/search-read.js +82 -64
  126. package/core/search/search-reader-pin.js +71 -0
  127. package/core/search/search-rrf.js +279 -0
  128. package/core/search/search-semantic.js +110 -5
  129. package/core/search/search-server.js +273 -54
  130. package/core/search/search-trace.js +107 -0
  131. package/core/search/server-identity.js +93 -0
  132. package/core/search/session-daemon-prewarm.mjs +33 -10
  133. package/core/search/sweet-search.js +414 -9
  134. package/core/skills/sweet-index/SKILL.md +8 -6
  135. package/core/start-server.js +13 -2
  136. package/core/vector-store/binary-hnsw-index.js +194 -30
  137. package/core/vector-store/float-vector-store.js +96 -6
  138. package/core/vector-store/hnsw-index.js +220 -49
  139. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  140. package/eval/agent-read-workflows/bin/ss-find +15 -0
  141. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  142. package/eval/agent-read-workflows/bin/ss-read +14 -0
  143. package/eval/agent-read-workflows/bin/ss-search +18 -0
  144. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  145. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  146. package/mcp/read-tool.js +109 -0
  147. package/mcp/server.js +55 -15
  148. package/mcp/tool-handlers.js +14 -124
  149. package/mcp/trace-tool.js +81 -0
  150. package/package.json +25 -10
  151. package/scripts/hooks/intercept-read.mjs +55 -0
  152. package/scripts/hooks/remind-tools.mjs +40 -0
  153. package/scripts/init.js +698 -54
  154. package/scripts/inject-agent-instructions.js +431 -0
  155. package/scripts/install-prompt-reminders.js +188 -0
  156. package/scripts/install-tool-enforcement.js +220 -0
  157. package/scripts/smoke-test.js +12 -9
  158. package/scripts/uninstall.js +427 -23
  159. package/scripts/write-claude-rules.js +110 -0
@@ -0,0 +1,197 @@
1
+ /**
2
+ * File-watcher abstraction.
3
+ *
4
+ * Plan § 9.1-9.5. Two sources push paths into the dirty set:
5
+ * 1. Watcher (Rust notify via FSEvents/inotify/ReadDirectoryChangesW).
6
+ * 2. Polling backstop: walk the tracked roots and compare
7
+ * `(mtime, size, inode)` tuples against `merkle-state.json`.
8
+ *
9
+ * Phase 4 ships the JS side with `node:fs.watch` as the baseline watcher
10
+ * (sufficient for unit tests + cross-platform parity). Phase 6 plugs in
11
+ * the Rust binding via `crates/sweet-search-native`. The interface stays
12
+ * stable: a constructor receives the `DirtySet`, `path-filter`, and
13
+ * project root; `start()` and `stop()` are async lifecycle calls.
14
+ *
15
+ * ENOSPC handling: if `inotify_add_watch` fails with ENOSPC (Linux
16
+ * default 524 288 watches exhausted), the watcher emits a single WARN,
17
+ * marks the subtree as "polling-only", and never crashes. The polling
18
+ * backstop is the correctness mechanism even when the watcher dies.
19
+ */
20
+
21
+ import fs from 'node:fs';
22
+ import path from 'node:path';
23
+ import { canonicaliseInsideRoot } from '../infrastructure/dirty-set.mjs';
24
+
25
+ const DEFAULT_DEBOUNCE_MS = 200;
26
+
27
+ export class FileWatcher {
28
+ /**
29
+ * @param {object} options
30
+ * @param {string} options.projectRoot
31
+ * @param {import('../infrastructure/dirty-set.mjs').DirtySet} options.dirtySet
32
+ * @param {(rel:string)=>boolean} [options.isExcluded]
33
+ * @param {number} [options.debounceMs]
34
+ * @param {{warn:Function, error:Function, info:Function}} [options.logger]
35
+ */
36
+ constructor({ projectRoot, dirtySet, isExcluded, debounceMs = DEFAULT_DEBOUNCE_MS, logger = console }) {
37
+ if (!projectRoot) throw new Error('FileWatcher: projectRoot is required');
38
+ if (!dirtySet) throw new Error('FileWatcher: dirtySet is required');
39
+ this.projectRoot = projectRoot;
40
+ this.dirtySet = dirtySet;
41
+ this.isExcluded = isExcluded || (() => false);
42
+ this.debounceMs = debounceMs;
43
+ this.logger = logger;
44
+ this._watchers = new Set();
45
+ this._pending = new Map(); // path → timer
46
+ this._enospcSeen = false;
47
+ this._stopped = false;
48
+ }
49
+
50
+ /**
51
+ * Start watching the project root. Returns immediately; errors are
52
+ * surfaced via the logger and never thrown after the initial call.
53
+ */
54
+ async start() {
55
+ this._stopped = false;
56
+ try {
57
+ const w = fs.watch(this.projectRoot, { recursive: true, persistent: false });
58
+ w.on('change', (eventType, filename) => {
59
+ if (!filename || this._stopped) return;
60
+ this._handleEvent(filename);
61
+ });
62
+ w.on('error', (err) => {
63
+ if (err && err.code === 'ENOSPC') this._handleEnospc(err);
64
+ else this.logger.warn?.(`[file-watcher] error: ${err.message}`);
65
+ });
66
+ this._watchers.add(w);
67
+ } catch (err) {
68
+ if (err && err.code === 'ENOSPC') this._handleEnospc(err);
69
+ else if (err && err.code === 'ENOENT') {
70
+ this.logger.warn?.(`[file-watcher] projectRoot missing: ${this.projectRoot}`);
71
+ } else {
72
+ this.logger.warn?.(`[file-watcher] start failed: ${err?.message ?? err}`);
73
+ }
74
+ }
75
+ }
76
+
77
+ async stop() {
78
+ this._stopped = true;
79
+ for (const w of this._watchers) {
80
+ try { w.close(); } catch {}
81
+ }
82
+ this._watchers.clear();
83
+ for (const [, timer] of this._pending) clearTimeout(timer);
84
+ this._pending.clear();
85
+ }
86
+
87
+ _handleEnospc(err) {
88
+ if (this._enospcSeen) return;
89
+ this._enospcSeen = true;
90
+ this.logger.warn?.(
91
+ '[file-watcher] inotify watch limit exhausted (ENOSPC). Falling back to polling. ' +
92
+ 'Remediation: `sudo sysctl fs.inotify.max_user_watches=524288` or higher. ' +
93
+ `Detail: ${err.message}`,
94
+ );
95
+ }
96
+
97
+ _handleEvent(filename) {
98
+ const norm = filename.replace(/\\/g, '/');
99
+ if (this.isExcluded(norm)) return;
100
+ const abs = canonicaliseInsideRoot(this.projectRoot, filename);
101
+ if (!abs) return;
102
+
103
+ // Debounce: editor "atomic save" patterns emit CREATE/WRITE/RENAME
104
+ // bursts. Coalesce within `debounceMs` then push once.
105
+ const existing = this._pending.get(abs);
106
+ if (existing) clearTimeout(existing);
107
+ const timer = setTimeout(() => {
108
+ this._pending.delete(abs);
109
+ if (this._stopped) return;
110
+ // Suppress temp/swp paths created by editor atomic-save flows.
111
+ if (this._isTempFile(norm)) return;
112
+ this.dirtySet.add(abs, 'watcher', { eventTime: Date.now() });
113
+ }, this.debounceMs);
114
+ this._pending.set(abs, timer);
115
+ }
116
+
117
+ _isTempFile(relPath) {
118
+ const base = path.basename(relPath);
119
+ if (/\.swp$/.test(base)) return true;
120
+ if (/\.tmp$/.test(base)) return true;
121
+ if (/^\.#/.test(base)) return true; // emacs lockfiles
122
+ if (/^~/.test(base)) return true;
123
+ if (/~$/.test(base)) return true; // gedit backup
124
+ return false;
125
+ }
126
+
127
+ /**
128
+ * Diagnostic: how many native watchers are active and whether the
129
+ * ENOSPC fallback has fired.
130
+ */
131
+ status() {
132
+ return {
133
+ watchers: this._watchers.size,
134
+ pending: this._pending.size,
135
+ enospcFallback: this._enospcSeen,
136
+ stopped: this._stopped,
137
+ };
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Polling backstop. Plan § 9.1: every reconcile-interval seconds, walk
143
+ * the tracked roots and re-discover any dirty file the watcher missed.
144
+ *
145
+ * The backstop is a function, not a class — it's stateless and the
146
+ * caller (Reconciler) schedules it on the same timer as the tick.
147
+ *
148
+ * @param {string} projectRoot
149
+ * @param {(rel:string)=>boolean} isExcluded
150
+ * @param {import('../infrastructure/dirty-set.mjs').DirtySet} dirtySet
151
+ * @param {(entry:{absPath:string, relPath:string, stat:fs.Stats})=>boolean|Promise<boolean>} [shouldEnqueue]
152
+ * @returns {Promise<{filesSeen:number, filesEnqueued:number}>}
153
+ */
154
+ export async function pollingBackstopSweep(projectRoot, isExcluded, dirtySet, shouldEnqueue = null) {
155
+ let filesSeen = 0;
156
+ let filesEnqueued = 0;
157
+ let filesSkippedUnchanged = 0;
158
+ async function walk(dir, rel) {
159
+ let entries;
160
+ try {
161
+ entries = await fs.promises.readdir(dir, { withFileTypes: true });
162
+ } catch {
163
+ return;
164
+ }
165
+ for (const entry of entries) {
166
+ const childRel = rel ? `${rel}/${entry.name}` : entry.name;
167
+ if (isExcluded(childRel)) continue;
168
+ const childAbs = path.join(dir, entry.name);
169
+ if (entry.isDirectory()) {
170
+ await walk(childAbs, childRel);
171
+ continue;
172
+ }
173
+ if (!entry.isFile()) continue;
174
+ filesSeen += 1;
175
+ const canonicalAbs = canonicaliseInsideRoot(projectRoot, childAbs);
176
+ if (!canonicalAbs) continue;
177
+ if (shouldEnqueue) {
178
+ let stat;
179
+ try {
180
+ stat = await fs.promises.stat(canonicalAbs);
181
+ } catch {
182
+ continue;
183
+ }
184
+ if (!await shouldEnqueue({ absPath: canonicalAbs, relPath: childRel, stat })) {
185
+ filesSkippedUnchanged += 1;
186
+ continue;
187
+ }
188
+ }
189
+ if (!dirtySet.has(canonicalAbs)) {
190
+ dirtySet.add(canonicalAbs, 'polling');
191
+ filesEnqueued += 1;
192
+ }
193
+ }
194
+ }
195
+ await walk(projectRoot, '');
196
+ return { filesSeen, filesEnqueued, filesSkippedUnchanged };
197
+ }