sweet-search 2.5.2 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -4,37 +4,185 @@
4
4
  */
5
5
 
6
6
  import { existsSync } from 'fs';
7
- import { createRequire } from 'module';
8
- import { resolveNativeAddon } from './native-resolver.js';
7
+ import { loadNativeAddon } from './native-resolver.js';
9
8
  import { SPARSE_SYMBOL_MASKS, resolveSparseSymbolMask } from './constants.js';
10
9
 
11
10
  // Re-export from constants.js — canonical source of symbol type vocabulary.
12
11
  export { SPARSE_SYMBOL_MASKS, resolveSparseSymbolMask };
13
12
 
14
- const require = createRequire(import.meta.url);
15
-
16
13
  let _addon = null;
17
14
  let _addonLoaded = false;
15
+ let _fallbackWeights = null;
16
+
17
+ const ASCII_DIM = 128;
18
+ const WEIGHT_TABLE_LEN = ASCII_DIM * ASCII_DIM;
19
+ const MIN_SPAN_LEN = 3;
20
+ const MAX_GRAM_LEN = 12;
21
+ const FALLBACK_WEIGHTS_ID = 'common-code-bigram-v1';
22
+ const COMMON_CODE_BIGRAMS = [
23
+ ['th', 5000], ['he', 4800], ['in', 4700], ['er', 4500], ['re', 4300], ['fo', 4200], ['or', 4200], ['fu', 4100],
24
+ ['un', 4000], ['ct', 3900], ['cl', 3800], ['ss', 3700], ['co', 3600], ['de', 3500], ['nt', 3400], ['io', 3300],
25
+ ['on', 3200], ['st', 3100], ['te', 3000], ['ra', 2900], ['ri', 2800], ['al', 2700], ['se', 2600], ['it', 2500],
26
+ ['at', 2400], ['es', 2300], ['is', 2200], ['le', 2100], ['ar', 2000], ['ha', 1900], ['ng', 1800], ['js', 1700],
27
+ ['ts', 1600], ['py', 1500], ['rs', 1400], ['::', 1300], ['->', 1200], ['=>', 1100], ['__', 1000], ['./', 900],
28
+ ];
29
+
30
+ function isSpanCode(code) {
31
+ return (code >= 97 && code <= 122) || (code >= 48 && code <= 57) ||
32
+ code === 95 || code === 46 || code === 47 || code === 58 || code === 45;
33
+ }
34
+
35
+ function normalizeAsciiCode(code) {
36
+ return code >= 65 && code <= 90 ? code + 32 : code;
37
+ }
38
+
39
+ function pairIndex(left, right) {
40
+ return (left << 7) | right;
41
+ }
42
+
43
+ function buildFallbackWeights() {
44
+ if (_fallbackWeights) return _fallbackWeights;
45
+ const counts = new Uint32Array(WEIGHT_TABLE_LEN);
46
+ counts.fill(1);
47
+ for (const [pair, count] of COMMON_CODE_BIGRAMS) {
48
+ counts[pairIndex(pair.charCodeAt(0), pair.charCodeAt(1))] = count;
49
+ }
50
+ let total = 0;
51
+ for (const count of counts) total += count;
52
+ const denominator = total + WEIGHT_TABLE_LEN;
53
+ _fallbackWeights = Array.from(counts, (count) => Math.log(denominator / (count + 1)));
54
+ return _fallbackWeights;
55
+ }
56
+
57
+ function collectNormalizedSpans(text) {
58
+ const spans = [];
59
+ let current = [];
60
+ for (const char of String(text || '')) {
61
+ const code = normalizeAsciiCode(char.charCodeAt(0));
62
+ if (isSpanCode(code)) current.push(code);
63
+ else if (current.length >= MIN_SPAN_LEN) {
64
+ spans.push(current);
65
+ current = [];
66
+ } else current = [];
67
+ }
68
+ if (current.length >= MIN_SPAN_LEN) spans.push(current);
69
+ return spans;
70
+ }
71
+
72
+ function bytesToString(bytes) {
73
+ return String.fromCharCode(...bytes);
74
+ }
75
+
76
+ function extractSparseGramsFromSpan(span, weights) {
77
+ if (span.length < MIN_SPAN_LEN) return [];
78
+ const pairWeights = [];
79
+ for (let i = 0; i < span.length - 1; i += 1) {
80
+ pairWeights.push(weights[pairIndex(span[i], span[i + 1])]);
81
+ }
82
+ const grams = [];
83
+ const seen = new Set();
84
+ for (let start = 0; start <= span.length - MIN_SPAN_LEN; start += 1) {
85
+ const maxEnd = Math.min(span.length, start + MAX_GRAM_LEN);
86
+ for (let end = start + MIN_SPAN_LEN; end <= maxEnd; end += 1) {
87
+ const first = pairWeights[start];
88
+ const last = pairWeights[end - 2];
89
+ let interiorMax = Number.NEGATIVE_INFINITY;
90
+ if (end - start > MIN_SPAN_LEN) {
91
+ for (let i = start + 1; i < end - 2; i += 1) {
92
+ interiorMax = Math.max(interiorMax, pairWeights[i]);
93
+ }
94
+ }
95
+ if (Math.min(first, last) > interiorMax) {
96
+ const gram = bytesToString(span.slice(start, end));
97
+ if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
98
+ }
99
+ }
100
+ }
101
+ if (grams.length === 0) {
102
+ for (let i = 0; i <= span.length - MIN_SPAN_LEN; i += 1) {
103
+ const gram = bytesToString(span.slice(i, i + MIN_SPAN_LEN));
104
+ if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
105
+ }
106
+ }
107
+ return grams;
108
+ }
109
+
110
+ function normalizeLiteral(literal) {
111
+ const bytes = [];
112
+ for (const char of String(literal || '')) {
113
+ const code = normalizeAsciiCode(char.charCodeAt(0));
114
+ if (!isSpanCode(code)) return null;
115
+ bytes.push(code);
116
+ }
117
+ return bytes.length >= MIN_SPAN_LEN ? bytes : null;
118
+ }
119
+
120
+ function extractCoveringGramsFromSpan(span, weights) {
121
+ if (span.length < MIN_SPAN_LEN) return [];
122
+ const pairWeights = [];
123
+ for (let i = 0; i < span.length - 1; i += 1) {
124
+ pairWeights.push(weights[pairIndex(span[i], span[i + 1])]);
125
+ }
126
+ const grams = [];
127
+ const seen = new Set();
128
+ const stack = [[0, span.length]];
129
+ while (stack.length > 0) {
130
+ const [start, end] = stack.pop();
131
+ const len = end - start;
132
+ if (len < MIN_SPAN_LEN) continue;
133
+ if (len === MIN_SPAN_LEN) {
134
+ const gram = bytesToString(span.slice(start, end));
135
+ if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
136
+ continue;
137
+ }
138
+ if (len <= MAX_GRAM_LEN) {
139
+ const first = pairWeights[start];
140
+ const last = pairWeights[end - 2];
141
+ let interiorMax = Number.NEGATIVE_INFINITY;
142
+ for (let i = start + 1; i < end - 2; i += 1) {
143
+ interiorMax = Math.max(interiorMax, pairWeights[i]);
144
+ }
145
+ if (Math.min(first, last) > interiorMax) {
146
+ const gram = bytesToString(span.slice(start, end));
147
+ if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
148
+ continue;
149
+ }
150
+ }
151
+ let maxWeight = Number.NEGATIVE_INFINITY;
152
+ let maxPos = start + 1;
153
+ for (let i = start + 1; i < end - 1; i += 1) {
154
+ if (pairWeights[i] > maxWeight) {
155
+ maxWeight = pairWeights[i];
156
+ maxPos = i;
157
+ }
158
+ }
159
+ const leftEnd = maxPos + 1;
160
+ const rightStart = maxPos;
161
+ if (end - rightStart >= MIN_SPAN_LEN) stack.push([rightStart, end]);
162
+ if (leftEnd - start >= MIN_SPAN_LEN) stack.push([start, leftEnd]);
163
+ }
164
+ if (grams.length === 0) {
165
+ for (let i = 0; i <= span.length - MIN_SPAN_LEN; i += 1) {
166
+ const gram = bytesToString(span.slice(i, i + MIN_SPAN_LEN));
167
+ if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
168
+ }
169
+ }
170
+ return grams;
171
+ }
18
172
 
19
173
  function loadAddon() {
20
174
  if (_addonLoaded) return _addon;
21
175
  _addonLoaded = true;
22
176
 
23
- try {
24
- const addonPath = resolveNativeAddon();
25
- if (!addonPath) return null;
26
- const mod = require(addonPath);
27
- if (
28
- typeof mod.buildSparseGramIndex === 'function' &&
29
- typeof mod.NativeSparseGramIndex?.load === 'function' &&
30
- typeof mod.extractRegexLiterals === 'function'
31
- ) {
32
- _addon = mod;
33
- }
34
- } catch (err) {
35
- // Native addon is optional; callers decide whether to warn or fall back.
36
- if (process.env.SWEET_DEBUG) console.debug('[native-sparse-gram] addon load failed:', err.message);
37
- }
177
+ // CUDA-preferred with CPU fallback (see loadNativeAddon): a CUDA addon that
178
+ // can't load on a no-GPU box falls back to the plain CPU addon.
179
+ const res = loadNativeAddon({
180
+ validate: (m) =>
181
+ typeof m.buildSparseGramIndex === 'function' &&
182
+ typeof m.NativeSparseGramIndex?.load === 'function' &&
183
+ typeof m.extractRegexLiterals === 'function',
184
+ });
185
+ if (res) _addon = res.mod;
38
186
 
39
187
  return _addon;
40
188
  }
@@ -43,6 +191,20 @@ export function hasNativeSparseGramSupport() {
43
191
  return !!loadAddon();
44
192
  }
45
193
 
194
+ /**
195
+ * Whether the native addon exposes the in-process grep functions
196
+ * (native_grep_lines / native_grep_full). When true, ss-grep / ss-pattern can
197
+ * run regex matching in-process without spawning ripgrep.
198
+ */
199
+ export function isNativeGrepAvailable() {
200
+ const addon = loadAddon();
201
+ return !!(
202
+ addon &&
203
+ typeof addon.nativeGrepLines === 'function' &&
204
+ typeof addon.nativeGrepFull === 'function'
205
+ );
206
+ }
207
+
46
208
  export function buildSparseGramIndexArtifact({ projectRoot, files, fileSymbolMasks = [], outputPath }) {
47
209
  const addon = loadAddon();
48
210
  if (!addon) {
@@ -63,6 +225,86 @@ export function loadSparseGramIndex(indexPath) {
63
225
  return addon.NativeSparseGramIndex.load(indexPath);
64
226
  }
65
227
 
228
+ function normalizeExtractionResult(result) {
229
+ if (!result || !Array.isArray(result.grams)) return null;
230
+ return {
231
+ weightsId: result.weightsId || result.weights_id || FALLBACK_WEIGHTS_ID,
232
+ grams: [...new Set(result.grams.map(String))].sort(),
233
+ };
234
+ }
235
+
236
+ function fallbackSparseGramExtraction(content) {
237
+ const weights = buildFallbackWeights();
238
+ const grams = new Set();
239
+ for (const span of collectNormalizedSpans(content)) {
240
+ for (const gram of extractSparseGramsFromSpan(span, weights)) grams.add(gram);
241
+ }
242
+ return { weightsId: FALLBACK_WEIGHTS_ID, grams: [...grams].sort() };
243
+ }
244
+
245
+ function fallbackRequiredGrams(literals) {
246
+ if (!Array.isArray(literals) || literals.length === 0) {
247
+ return { eligible: false, grams: [], weightsId: FALLBACK_WEIGHTS_ID };
248
+ }
249
+ const weights = buildFallbackWeights();
250
+ const grams = new Set();
251
+ for (const literal of literals) {
252
+ const span = normalizeLiteral(literal);
253
+ if (!span) return { eligible: false, grams: [], weightsId: FALLBACK_WEIGHTS_ID };
254
+ const required = extractCoveringGramsFromSpan(span, weights);
255
+ if (required.length === 0) return { eligible: false, grams: [], weightsId: FALLBACK_WEIGHTS_ID };
256
+ for (const gram of required) grams.add(gram);
257
+ }
258
+ return { eligible: true, grams: [...grams].sort(), weightsId: FALLBACK_WEIGHTS_ID };
259
+ }
260
+
261
+ export function extractSparseGramDeltaRecord({ indexPath, content }) {
262
+ const addon = loadAddon();
263
+ if (addon) {
264
+ try {
265
+ if (indexPath && existsSync(indexPath)) {
266
+ const index = addon.NativeSparseGramIndex.load(indexPath);
267
+ const extractor = index.extractIndexGrams || index.extract_index_grams;
268
+ if (typeof extractor === 'function') return normalizeExtractionResult(extractor.call(index, content));
269
+ const stats = typeof index.getStats === 'function' ? index.getStats() : null;
270
+ if (stats?.usedFallbackWeights || stats?.used_fallback_weights) {
271
+ return fallbackSparseGramExtraction(content);
272
+ }
273
+ if (typeof stats?.weightsId === 'string' || typeof stats?.weights_id === 'string') {
274
+ return { weightsId: stats.weightsId || stats.weights_id, grams: [] };
275
+ }
276
+ }
277
+ if (typeof addon.extractSparseGramDelta === 'function') {
278
+ return normalizeExtractionResult(addon.extractSparseGramDelta(content));
279
+ }
280
+ if (typeof addon.extract_sparse_gram_delta === 'function') {
281
+ return normalizeExtractionResult(addon.extract_sparse_gram_delta(content));
282
+ }
283
+ } catch (err) {
284
+ if (process.env.SWEET_DEBUG) console.debug('[native-sparse-gram] extractSparseGramDeltaRecord failed:', err.message);
285
+ }
286
+ }
287
+ return fallbackSparseGramExtraction(content);
288
+ }
289
+
290
+ export function extractSparseGramRequiredGrams(sparseGramIndex, literals) {
291
+ try {
292
+ const extractor = sparseGramIndex?.extractLiteralCoveringGrams || sparseGramIndex?.extract_literal_covering_grams;
293
+ if (typeof extractor === 'function') {
294
+ const result = extractor.call(sparseGramIndex, literals);
295
+ return {
296
+ eligible: !!result?.eligible,
297
+ grams: Array.isArray(result?.grams) ? [...new Set(result.grams.map(String))].sort() : [],
298
+ weightsId: result?.weightsId || result?.weights_id || null,
299
+ };
300
+ }
301
+ } catch (err) {
302
+ if (process.env.SWEET_DEBUG) console.debug('[native-sparse-gram] extractSparseGramRequiredGrams failed:', err.message);
303
+ return null;
304
+ }
305
+ return fallbackRequiredGrams(literals);
306
+ }
307
+
66
308
  export function extractRegexLiteralClauses(regex) {
67
309
  const addon = loadAddon();
68
310
  if (!addon) return null;
@@ -254,4 +496,3 @@ export function searchFull(sparseGramIndex, clauses, regex, projectRoot, opts =
254
496
  return null;
255
497
  }
256
498
  }
257
-
@@ -11,10 +11,7 @@
11
11
  */
12
12
 
13
13
  import { existsSync } from 'fs';
14
- import { resolveNativeAddon } from './native-resolver.js';
15
- import { createRequire } from 'module';
16
-
17
- const require = createRequire(import.meta.url);
14
+ import { loadNativeAddon } from './native-resolver.js';
18
15
 
19
16
  let _addon = null;
20
17
  let _addonLoaded = false;
@@ -22,17 +19,11 @@ let _addonLoaded = false;
22
19
  function loadAddon() {
23
20
  if (_addonLoaded) return _addon;
24
21
  _addonLoaded = true;
25
- try {
26
- const addonPath = resolveNativeAddon();
27
- if (addonPath) {
28
- const mod = require(addonPath);
29
- if (typeof mod.NativeTokenizer?.fromFile === 'function') {
30
- _addon = mod;
31
- }
32
- }
33
- } catch {
34
- // Native addon not available
35
- }
22
+ // CUDA-preferred with CPU fallback (see loadNativeAddon): on a no-GPU box the
23
+ // CUDA addon throws on load (libcuda absent) and we fall back to the plain
24
+ // CPU addon so local tokenization (→ ORT-INT8 indexing) keeps working.
25
+ const res = loadNativeAddon({ validate: (m) => typeof m.NativeTokenizer?.fromFile === 'function' });
26
+ if (res) _addon = res.mod;
36
27
  return _addon;
37
28
  }
38
29
 
@@ -17,8 +17,7 @@
17
17
  import { fileURLToPath } from 'url';
18
18
  import { dirname, join } from 'path';
19
19
  import { readFileSync, existsSync } from 'fs';
20
- import { createRequire } from 'module';
21
- import { resolveNativeAddon } from './native-resolver.js';
20
+ import { loadNativeAddon } from './native-resolver.js';
22
21
 
23
22
  const DATA_OFFSET = 0; // SIMD popcount needs no LUT
24
23
 
@@ -46,17 +45,12 @@ async function initWasm() {
46
45
  initPromise = (async () => {
47
46
  try {
48
47
  const __dirname = dirname(fileURLToPath(import.meta.url));
49
- const require = createRequire(import.meta.url);
50
-
51
- // Tier 1: Try native Rust addon (rayon parallel + NEON/AVX2 SIMD)
52
- try {
53
- const addonPath = resolveNativeAddon();
54
- if (addonPath) {
55
- nativeMaxsim = require(addonPath);
56
- }
57
- } catch {
58
- // Native not available — fall through to WASM
59
- }
48
+
49
+ // Tier 1: native Rust addon (rayon + NEON/AVX2 SIMD). CUDA-preferred with
50
+ // CPU fallback (see loadNativeAddon) a CUDA addon that can't load on a
51
+ // no-GPU box falls back to the plain CPU addon; otherwise WASM (Tier 2).
52
+ const nativeRes = loadNativeAddon();
53
+ if (nativeRes) nativeMaxsim = nativeRes.mod;
60
54
 
61
55
  // Tier 2a: Load hand-assembled SIMD distance WASM
62
56
  const wasmPath = join(__dirname, 'simd-distance.wasm');
@@ -79,11 +73,11 @@ async function initWasm() {
79
73
  initDone = true;
80
74
 
81
75
  if (nativeMaxsim) {
82
- console.log('[MaxSim] Tier 1: Native Rust + Rayon (parallel SIMD)');
76
+ console.error('[MaxSim] Tier 1: Native Rust + Rayon (parallel SIMD)');
83
77
  } else if (maxsimExports || wasmExports?.maxsim_f32) {
84
- console.log('[MaxSim] Tier 2: WASM SIMD f32x4');
78
+ console.error('[MaxSim] Tier 2: WASM SIMD f32x4');
85
79
  } else {
86
- console.log('[MaxSim] Tier 3: JS fallback');
80
+ console.error('[MaxSim] Tier 3: JS fallback');
87
81
  }
88
82
 
89
83
  return true;
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Read-only sparse-gram delta helpers for query-time overlay resolution.
3
+ *
4
+ * The reconcile writer lives under incremental-indexing. Search only needs to
5
+ * resolve the latest append-only delta record per file, so that read contract
6
+ * belongs in infrastructure instead of importing the writer bounded context.
7
+ */
8
+
9
+ import fs from 'node:fs';
10
+ import path from 'node:path';
11
+
12
+ export const SPARSE_DELTA_DIR_SUFFIX = '.deltas';
13
+ export const SPARSE_DELTA_FILE_EXT = '.ssgrmdelta';
14
+
15
+ function deltaDirFor(baseArtifactPath) {
16
+ return baseArtifactPath + SPARSE_DELTA_DIR_SUFFIX;
17
+ }
18
+
19
+ function parseDeltaSegment(baseArtifactPath, segmentPath, maxEpoch) {
20
+ if (typeof segmentPath !== 'string' || !segmentPath.endsWith(SPARSE_DELTA_FILE_EXT)) return null;
21
+ const deltaRoot = path.resolve(deltaDirFor(baseArtifactPath));
22
+ const resolved = path.isAbsolute(segmentPath)
23
+ ? segmentPath
24
+ : path.join(path.dirname(baseArtifactPath), segmentPath);
25
+ const normalized = path.resolve(resolved);
26
+ if (normalized !== deltaRoot && !normalized.startsWith(deltaRoot + path.sep)) return null;
27
+ const match = path.basename(normalized).match(/^(\d+)-(\d+)\.ssgrmdelta$/);
28
+ if (!match) return null;
29
+ const epoch = Number(match[1]);
30
+ if (epoch > maxEpoch) return null;
31
+ if (!fs.existsSync(normalized)) return null;
32
+ return {
33
+ path: normalized,
34
+ epoch,
35
+ seq: Number(match[2]),
36
+ };
37
+ }
38
+
39
+ export function listSparseGramDeltaSegments(baseArtifactPath, opts = {}) {
40
+ const maxEpoch = Number.isInteger(opts.maxEpoch) ? opts.maxEpoch : Infinity;
41
+ if (Array.isArray(opts.segments)) {
42
+ return opts.segments
43
+ .map((segmentPath) => parseDeltaSegment(baseArtifactPath, segmentPath, maxEpoch))
44
+ .filter(Boolean)
45
+ .sort((a, b) => (a.epoch - b.epoch) || (a.seq - b.seq));
46
+ }
47
+
48
+ const dir = deltaDirFor(baseArtifactPath);
49
+ if (!fs.existsSync(dir)) return [];
50
+ const out = [];
51
+ for (const name of fs.readdirSync(dir)) {
52
+ const segment = parseDeltaSegment(baseArtifactPath, path.join(dir, name), maxEpoch);
53
+ if (segment) out.push(segment);
54
+ }
55
+ return out.sort((a, b) => (a.epoch - b.epoch) || (a.seq - b.seq));
56
+ }
57
+
58
+ export function resolveLatestSparseGramDeltaRecords(baseArtifactPath, opts = {}) {
59
+ const latest = new Map();
60
+ for (const seg of listSparseGramDeltaSegments(baseArtifactPath, opts)) {
61
+ const raw = fs.readFileSync(seg.path, 'utf-8');
62
+ for (const line of raw.split('\n')) {
63
+ const trimmed = line.trim();
64
+ if (!trimmed) continue;
65
+ let record;
66
+ try {
67
+ record = JSON.parse(trimmed);
68
+ } catch {
69
+ continue;
70
+ }
71
+ if (!record.fileId) continue;
72
+ latest.set(record.fileId, { record, segmentPath: seg.path, epoch: seg.epoch });
73
+ }
74
+ }
75
+ return latest;
76
+ }
@@ -0,0 +1,122 @@
1
+ import path from 'path';
2
+
3
+ const ACTIVE = 'stale_since IS NULL';
4
+
5
+ function escapeRegExp(text) {
6
+ return String(text).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
7
+ }
8
+
9
+ function rowToEntity(row) {
10
+ return {
11
+ id: row.id,
12
+ name: row.name,
13
+ type: row.type,
14
+ filePath: row.file_path,
15
+ startLine: row.start_line,
16
+ endLine: row.end_line,
17
+ signature: row.signature || '',
18
+ summary: row.summary || '',
19
+ parentClass: row.parent_class || null,
20
+ package: row.package || null,
21
+ };
22
+ }
23
+
24
+ function moduleStem(filePath) {
25
+ return path.basename(String(filePath || ''), path.extname(String(filePath || '')));
26
+ }
27
+
28
+ function moduleLooksRelated(moduleName, stem) {
29
+ const normalized = String(moduleName || '').replace(/\\/g, '/').replace(/\.[cm]?[jt]sx?$/, '');
30
+ return normalized === stem || normalized.endsWith(`/${stem}`);
31
+ }
32
+
33
+ function parseSpecifiers(specs, targetName, aliases) {
34
+ for (const rawPart of specs.split(',')) {
35
+ const part = rawPart.trim();
36
+ const colon = part.match(new RegExp(`^${escapeRegExp(targetName)}\\s*:\\s*([A-Za-z_$][\\w$]*)$`));
37
+ const asAlias = part.match(new RegExp(`^${escapeRegExp(targetName)}\\s+as\\s+([A-Za-z_$][\\w$]*)$`));
38
+ if (colon) aliases.add(colon[1]);
39
+ else if (asAlias) aliases.add(asAlias[1]);
40
+ else if (part === targetName) aliases.add(targetName);
41
+ }
42
+ }
43
+
44
+ function extractAliases(text, target) {
45
+ const aliases = new Set();
46
+ const stem = moduleStem(target.filePath);
47
+ for (const line of String(text || '').split('\n')) {
48
+ if (!line.includes(target.name) || !line.includes(stem)) continue;
49
+ const cjs = line.match(/\{([^}]+)\}\s*=\s*require\(['"]([^'"]+)['"]\)/);
50
+ if (cjs && moduleLooksRelated(cjs[2], stem)) parseSpecifiers(cjs[1], target.name, aliases);
51
+ const esm = line.match(/import\s+\{([^}]+)\}\s+from\s+['"]([^'"]+)['"]/);
52
+ if (esm && moduleLooksRelated(esm[2], stem)) parseSpecifiers(esm[1], target.name, aliases);
53
+ const prop = line.match(new RegExp(`(?:const|let|var)\\s+([A-Za-z_$][\\w$]*)\\s*=\\s*require\\(['"]([^'"]+)['"]\\)\\.${escapeRegExp(target.name)}\\b`));
54
+ if (prop && moduleLooksRelated(prop[2], stem)) aliases.add(prop[1]);
55
+ }
56
+ return [...aliases];
57
+ }
58
+
59
+ function lineOfIndex(text, index) {
60
+ let line = 1;
61
+ for (let i = 0; i < index; i++) if (text.charCodeAt(i) === 10) line++;
62
+ return line;
63
+ }
64
+
65
+ export function findAliasCallers({
66
+ db,
67
+ target,
68
+ readFileRange,
69
+ limit = 40,
70
+ entityVisibilitySql: entitySql = ACTIVE,
71
+ entityVisibilityParams: entityParams = [],
72
+ mapEntity = rowToEntity,
73
+ }) {
74
+ if (!db || !target?.filePath || !target?.name) return [];
75
+ const files = db.prepare(`
76
+ SELECT DISTINCT file_path
77
+ FROM entities
78
+ WHERE ${entitySql} AND file_path IS NOT NULL
79
+ ORDER BY CASE WHEN file_path LIKE '%/test/%' OR file_path LIKE 'test/%' OR file_path LIKE 'tests/%' THEN 1 ELSE 0 END, file_path
80
+ LIMIT 1000
81
+ `).all(...entityParams);
82
+ const entityAtLine = db.prepare(`
83
+ SELECT id, name, type, file_path, start_line, end_line, signature, summary, parent_class, package
84
+ FROM entities
85
+ WHERE ${entitySql} AND file_path = ? AND start_line <= ? AND end_line >= ?
86
+ ORDER BY (end_line - start_line) ASC
87
+ LIMIT 1
88
+ `);
89
+ const out = [];
90
+ const seen = new Set();
91
+ for (const { file_path: filePath } of files) {
92
+ if (filePath === target.filePath) continue;
93
+ const text = readFileRange(filePath, 1, 20000);
94
+ const aliases = extractAliases(text, target);
95
+ const patterns = aliases.map(alias => ({
96
+ targetName: alias,
97
+ re: new RegExp(`(?<![\\w$])${escapeRegExp(alias)}\\s*\\(`, 'g'),
98
+ }));
99
+ if (/\.rs$/.test(target.filePath)) {
100
+ patterns.push({
101
+ targetName: `::${target.name}`,
102
+ re: new RegExp(`\\b[A-Za-z_][\\w]*::${escapeRegExp(target.name)}\\s*\\(`, 'g'),
103
+ });
104
+ }
105
+ if (!patterns.length) continue;
106
+ for (const pattern of patterns) {
107
+ const re = pattern.re;
108
+ for (const match of text.matchAll(re)) {
109
+ const line = lineOfIndex(text, match.index || 0);
110
+ const entity = entityAtLine.get(...entityParams, filePath, line, line);
111
+ if (!entity || entity.id === target.id) continue;
112
+ const targetName = match[0].replace(/\s*\($/, '') || pattern.targetName;
113
+ const key = `${entity.id}:${line}:${targetName}`;
114
+ if (seen.has(key)) continue;
115
+ seen.add(key);
116
+ out.push({ ...mapEntity(entity), relationship: 'calls', contextLine: line, targetName, weight: 0.82 });
117
+ if (out.length >= limit) return out;
118
+ }
119
+ }
120
+ }
121
+ return out;
122
+ }
@@ -0,0 +1,34 @@
1
+ function tokens(text) {
2
+ return [...new Set(String(text || '').toLowerCase().match(/[a-z_][a-z0-9_]{2,}/g) || [])];
3
+ }
4
+
5
+ function wantsImplementation(hintTokens) {
6
+ if (hintTokens.includes('wrapper')) return false;
7
+ return hintTokens.some(t => ['callee', 'callees', 'downstream', 'helper', 'helpers', 'conversion', 'implementation'].includes(t));
8
+ }
9
+
10
+ function delegatesSameName(candidate, code) {
11
+ const name = String(candidate.name || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
12
+ return new RegExp(`\\.\\s*${name}\\s*\\(`).test(code);
13
+ }
14
+
15
+ function scoreCandidate(candidate, hintTokens, readFileRange) {
16
+ if (!hintTokens.length) return 0;
17
+ const code = readFileRange(candidate.filePath, candidate.startLine, candidate.endLine) || '';
18
+ const hay = `${candidate.name} ${candidate.type} ${candidate.filePath} ${candidate.signature} ${candidate.summary} ${code}`.toLowerCase();
19
+ let hits = 0;
20
+ for (const tok of hintTokens) if (hay.includes(tok)) hits++;
21
+ let score = hits / hintTokens.length;
22
+ if (wantsImplementation(hintTokens) && delegatesSameName(candidate, code)) score -= 0.35;
23
+ return score;
24
+ }
25
+
26
+ export function rankStructuralCandidates(candidates, { queryHint, readFileRange }) {
27
+ const hintTokens = tokens(queryHint);
28
+ if (!hintTokens.length || candidates.length < 2) return candidates;
29
+ return candidates.map((candidate, index) => ({
30
+ candidate,
31
+ index,
32
+ score: scoreCandidate(candidate, hintTokens, readFileRange),
33
+ })).sort((a, b) => (b.score - a.score) || (a.index - b.index)).map(x => x.candidate);
34
+ }