sweet-search 2.5.2 → 2.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
|
@@ -4,37 +4,185 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import { existsSync } from 'fs';
|
|
7
|
-
import {
|
|
8
|
-
import { resolveNativeAddon } from './native-resolver.js';
|
|
7
|
+
import { loadNativeAddon } from './native-resolver.js';
|
|
9
8
|
import { SPARSE_SYMBOL_MASKS, resolveSparseSymbolMask } from './constants.js';
|
|
10
9
|
|
|
11
10
|
// Re-export from constants.js — canonical source of symbol type vocabulary.
|
|
12
11
|
export { SPARSE_SYMBOL_MASKS, resolveSparseSymbolMask };
|
|
13
12
|
|
|
14
|
-
const require = createRequire(import.meta.url);
|
|
15
|
-
|
|
16
13
|
let _addon = null;
|
|
17
14
|
let _addonLoaded = false;
|
|
15
|
+
let _fallbackWeights = null;
|
|
16
|
+
|
|
17
|
+
const ASCII_DIM = 128;
|
|
18
|
+
const WEIGHT_TABLE_LEN = ASCII_DIM * ASCII_DIM;
|
|
19
|
+
const MIN_SPAN_LEN = 3;
|
|
20
|
+
const MAX_GRAM_LEN = 12;
|
|
21
|
+
const FALLBACK_WEIGHTS_ID = 'common-code-bigram-v1';
|
|
22
|
+
const COMMON_CODE_BIGRAMS = [
|
|
23
|
+
['th', 5000], ['he', 4800], ['in', 4700], ['er', 4500], ['re', 4300], ['fo', 4200], ['or', 4200], ['fu', 4100],
|
|
24
|
+
['un', 4000], ['ct', 3900], ['cl', 3800], ['ss', 3700], ['co', 3600], ['de', 3500], ['nt', 3400], ['io', 3300],
|
|
25
|
+
['on', 3200], ['st', 3100], ['te', 3000], ['ra', 2900], ['ri', 2800], ['al', 2700], ['se', 2600], ['it', 2500],
|
|
26
|
+
['at', 2400], ['es', 2300], ['is', 2200], ['le', 2100], ['ar', 2000], ['ha', 1900], ['ng', 1800], ['js', 1700],
|
|
27
|
+
['ts', 1600], ['py', 1500], ['rs', 1400], ['::', 1300], ['->', 1200], ['=>', 1100], ['__', 1000], ['./', 900],
|
|
28
|
+
];
|
|
29
|
+
|
|
30
|
+
function isSpanCode(code) {
|
|
31
|
+
return (code >= 97 && code <= 122) || (code >= 48 && code <= 57) ||
|
|
32
|
+
code === 95 || code === 46 || code === 47 || code === 58 || code === 45;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function normalizeAsciiCode(code) {
|
|
36
|
+
return code >= 65 && code <= 90 ? code + 32 : code;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function pairIndex(left, right) {
|
|
40
|
+
return (left << 7) | right;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function buildFallbackWeights() {
|
|
44
|
+
if (_fallbackWeights) return _fallbackWeights;
|
|
45
|
+
const counts = new Uint32Array(WEIGHT_TABLE_LEN);
|
|
46
|
+
counts.fill(1);
|
|
47
|
+
for (const [pair, count] of COMMON_CODE_BIGRAMS) {
|
|
48
|
+
counts[pairIndex(pair.charCodeAt(0), pair.charCodeAt(1))] = count;
|
|
49
|
+
}
|
|
50
|
+
let total = 0;
|
|
51
|
+
for (const count of counts) total += count;
|
|
52
|
+
const denominator = total + WEIGHT_TABLE_LEN;
|
|
53
|
+
_fallbackWeights = Array.from(counts, (count) => Math.log(denominator / (count + 1)));
|
|
54
|
+
return _fallbackWeights;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function collectNormalizedSpans(text) {
|
|
58
|
+
const spans = [];
|
|
59
|
+
let current = [];
|
|
60
|
+
for (const char of String(text || '')) {
|
|
61
|
+
const code = normalizeAsciiCode(char.charCodeAt(0));
|
|
62
|
+
if (isSpanCode(code)) current.push(code);
|
|
63
|
+
else if (current.length >= MIN_SPAN_LEN) {
|
|
64
|
+
spans.push(current);
|
|
65
|
+
current = [];
|
|
66
|
+
} else current = [];
|
|
67
|
+
}
|
|
68
|
+
if (current.length >= MIN_SPAN_LEN) spans.push(current);
|
|
69
|
+
return spans;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function bytesToString(bytes) {
|
|
73
|
+
return String.fromCharCode(...bytes);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function extractSparseGramsFromSpan(span, weights) {
|
|
77
|
+
if (span.length < MIN_SPAN_LEN) return [];
|
|
78
|
+
const pairWeights = [];
|
|
79
|
+
for (let i = 0; i < span.length - 1; i += 1) {
|
|
80
|
+
pairWeights.push(weights[pairIndex(span[i], span[i + 1])]);
|
|
81
|
+
}
|
|
82
|
+
const grams = [];
|
|
83
|
+
const seen = new Set();
|
|
84
|
+
for (let start = 0; start <= span.length - MIN_SPAN_LEN; start += 1) {
|
|
85
|
+
const maxEnd = Math.min(span.length, start + MAX_GRAM_LEN);
|
|
86
|
+
for (let end = start + MIN_SPAN_LEN; end <= maxEnd; end += 1) {
|
|
87
|
+
const first = pairWeights[start];
|
|
88
|
+
const last = pairWeights[end - 2];
|
|
89
|
+
let interiorMax = Number.NEGATIVE_INFINITY;
|
|
90
|
+
if (end - start > MIN_SPAN_LEN) {
|
|
91
|
+
for (let i = start + 1; i < end - 2; i += 1) {
|
|
92
|
+
interiorMax = Math.max(interiorMax, pairWeights[i]);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (Math.min(first, last) > interiorMax) {
|
|
96
|
+
const gram = bytesToString(span.slice(start, end));
|
|
97
|
+
if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
if (grams.length === 0) {
|
|
102
|
+
for (let i = 0; i <= span.length - MIN_SPAN_LEN; i += 1) {
|
|
103
|
+
const gram = bytesToString(span.slice(i, i + MIN_SPAN_LEN));
|
|
104
|
+
if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return grams;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function normalizeLiteral(literal) {
|
|
111
|
+
const bytes = [];
|
|
112
|
+
for (const char of String(literal || '')) {
|
|
113
|
+
const code = normalizeAsciiCode(char.charCodeAt(0));
|
|
114
|
+
if (!isSpanCode(code)) return null;
|
|
115
|
+
bytes.push(code);
|
|
116
|
+
}
|
|
117
|
+
return bytes.length >= MIN_SPAN_LEN ? bytes : null;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function extractCoveringGramsFromSpan(span, weights) {
|
|
121
|
+
if (span.length < MIN_SPAN_LEN) return [];
|
|
122
|
+
const pairWeights = [];
|
|
123
|
+
for (let i = 0; i < span.length - 1; i += 1) {
|
|
124
|
+
pairWeights.push(weights[pairIndex(span[i], span[i + 1])]);
|
|
125
|
+
}
|
|
126
|
+
const grams = [];
|
|
127
|
+
const seen = new Set();
|
|
128
|
+
const stack = [[0, span.length]];
|
|
129
|
+
while (stack.length > 0) {
|
|
130
|
+
const [start, end] = stack.pop();
|
|
131
|
+
const len = end - start;
|
|
132
|
+
if (len < MIN_SPAN_LEN) continue;
|
|
133
|
+
if (len === MIN_SPAN_LEN) {
|
|
134
|
+
const gram = bytesToString(span.slice(start, end));
|
|
135
|
+
if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
if (len <= MAX_GRAM_LEN) {
|
|
139
|
+
const first = pairWeights[start];
|
|
140
|
+
const last = pairWeights[end - 2];
|
|
141
|
+
let interiorMax = Number.NEGATIVE_INFINITY;
|
|
142
|
+
for (let i = start + 1; i < end - 2; i += 1) {
|
|
143
|
+
interiorMax = Math.max(interiorMax, pairWeights[i]);
|
|
144
|
+
}
|
|
145
|
+
if (Math.min(first, last) > interiorMax) {
|
|
146
|
+
const gram = bytesToString(span.slice(start, end));
|
|
147
|
+
if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
let maxWeight = Number.NEGATIVE_INFINITY;
|
|
152
|
+
let maxPos = start + 1;
|
|
153
|
+
for (let i = start + 1; i < end - 1; i += 1) {
|
|
154
|
+
if (pairWeights[i] > maxWeight) {
|
|
155
|
+
maxWeight = pairWeights[i];
|
|
156
|
+
maxPos = i;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
const leftEnd = maxPos + 1;
|
|
160
|
+
const rightStart = maxPos;
|
|
161
|
+
if (end - rightStart >= MIN_SPAN_LEN) stack.push([rightStart, end]);
|
|
162
|
+
if (leftEnd - start >= MIN_SPAN_LEN) stack.push([start, leftEnd]);
|
|
163
|
+
}
|
|
164
|
+
if (grams.length === 0) {
|
|
165
|
+
for (let i = 0; i <= span.length - MIN_SPAN_LEN; i += 1) {
|
|
166
|
+
const gram = bytesToString(span.slice(i, i + MIN_SPAN_LEN));
|
|
167
|
+
if (!seen.has(gram)) { seen.add(gram); grams.push(gram); }
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return grams;
|
|
171
|
+
}
|
|
18
172
|
|
|
19
173
|
function loadAddon() {
|
|
20
174
|
if (_addonLoaded) return _addon;
|
|
21
175
|
_addonLoaded = true;
|
|
22
176
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
typeof
|
|
29
|
-
typeof
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
_addon = mod;
|
|
33
|
-
}
|
|
34
|
-
} catch (err) {
|
|
35
|
-
// Native addon is optional; callers decide whether to warn or fall back.
|
|
36
|
-
if (process.env.SWEET_DEBUG) console.debug('[native-sparse-gram] addon load failed:', err.message);
|
|
37
|
-
}
|
|
177
|
+
// CUDA-preferred with CPU fallback (see loadNativeAddon): a CUDA addon that
|
|
178
|
+
// can't load on a no-GPU box falls back to the plain CPU addon.
|
|
179
|
+
const res = loadNativeAddon({
|
|
180
|
+
validate: (m) =>
|
|
181
|
+
typeof m.buildSparseGramIndex === 'function' &&
|
|
182
|
+
typeof m.NativeSparseGramIndex?.load === 'function' &&
|
|
183
|
+
typeof m.extractRegexLiterals === 'function',
|
|
184
|
+
});
|
|
185
|
+
if (res) _addon = res.mod;
|
|
38
186
|
|
|
39
187
|
return _addon;
|
|
40
188
|
}
|
|
@@ -43,6 +191,20 @@ export function hasNativeSparseGramSupport() {
|
|
|
43
191
|
return !!loadAddon();
|
|
44
192
|
}
|
|
45
193
|
|
|
194
|
+
/**
|
|
195
|
+
* Whether the native addon exposes the in-process grep functions
|
|
196
|
+
* (native_grep_lines / native_grep_full). When true, ss-grep / ss-pattern can
|
|
197
|
+
* run regex matching in-process without spawning ripgrep.
|
|
198
|
+
*/
|
|
199
|
+
export function isNativeGrepAvailable() {
|
|
200
|
+
const addon = loadAddon();
|
|
201
|
+
return !!(
|
|
202
|
+
addon &&
|
|
203
|
+
typeof addon.nativeGrepLines === 'function' &&
|
|
204
|
+
typeof addon.nativeGrepFull === 'function'
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
|
|
46
208
|
export function buildSparseGramIndexArtifact({ projectRoot, files, fileSymbolMasks = [], outputPath }) {
|
|
47
209
|
const addon = loadAddon();
|
|
48
210
|
if (!addon) {
|
|
@@ -63,6 +225,86 @@ export function loadSparseGramIndex(indexPath) {
|
|
|
63
225
|
return addon.NativeSparseGramIndex.load(indexPath);
|
|
64
226
|
}
|
|
65
227
|
|
|
228
|
+
function normalizeExtractionResult(result) {
|
|
229
|
+
if (!result || !Array.isArray(result.grams)) return null;
|
|
230
|
+
return {
|
|
231
|
+
weightsId: result.weightsId || result.weights_id || FALLBACK_WEIGHTS_ID,
|
|
232
|
+
grams: [...new Set(result.grams.map(String))].sort(),
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function fallbackSparseGramExtraction(content) {
|
|
237
|
+
const weights = buildFallbackWeights();
|
|
238
|
+
const grams = new Set();
|
|
239
|
+
for (const span of collectNormalizedSpans(content)) {
|
|
240
|
+
for (const gram of extractSparseGramsFromSpan(span, weights)) grams.add(gram);
|
|
241
|
+
}
|
|
242
|
+
return { weightsId: FALLBACK_WEIGHTS_ID, grams: [...grams].sort() };
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function fallbackRequiredGrams(literals) {
|
|
246
|
+
if (!Array.isArray(literals) || literals.length === 0) {
|
|
247
|
+
return { eligible: false, grams: [], weightsId: FALLBACK_WEIGHTS_ID };
|
|
248
|
+
}
|
|
249
|
+
const weights = buildFallbackWeights();
|
|
250
|
+
const grams = new Set();
|
|
251
|
+
for (const literal of literals) {
|
|
252
|
+
const span = normalizeLiteral(literal);
|
|
253
|
+
if (!span) return { eligible: false, grams: [], weightsId: FALLBACK_WEIGHTS_ID };
|
|
254
|
+
const required = extractCoveringGramsFromSpan(span, weights);
|
|
255
|
+
if (required.length === 0) return { eligible: false, grams: [], weightsId: FALLBACK_WEIGHTS_ID };
|
|
256
|
+
for (const gram of required) grams.add(gram);
|
|
257
|
+
}
|
|
258
|
+
return { eligible: true, grams: [...grams].sort(), weightsId: FALLBACK_WEIGHTS_ID };
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
export function extractSparseGramDeltaRecord({ indexPath, content }) {
|
|
262
|
+
const addon = loadAddon();
|
|
263
|
+
if (addon) {
|
|
264
|
+
try {
|
|
265
|
+
if (indexPath && existsSync(indexPath)) {
|
|
266
|
+
const index = addon.NativeSparseGramIndex.load(indexPath);
|
|
267
|
+
const extractor = index.extractIndexGrams || index.extract_index_grams;
|
|
268
|
+
if (typeof extractor === 'function') return normalizeExtractionResult(extractor.call(index, content));
|
|
269
|
+
const stats = typeof index.getStats === 'function' ? index.getStats() : null;
|
|
270
|
+
if (stats?.usedFallbackWeights || stats?.used_fallback_weights) {
|
|
271
|
+
return fallbackSparseGramExtraction(content);
|
|
272
|
+
}
|
|
273
|
+
if (typeof stats?.weightsId === 'string' || typeof stats?.weights_id === 'string') {
|
|
274
|
+
return { weightsId: stats.weightsId || stats.weights_id, grams: [] };
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
if (typeof addon.extractSparseGramDelta === 'function') {
|
|
278
|
+
return normalizeExtractionResult(addon.extractSparseGramDelta(content));
|
|
279
|
+
}
|
|
280
|
+
if (typeof addon.extract_sparse_gram_delta === 'function') {
|
|
281
|
+
return normalizeExtractionResult(addon.extract_sparse_gram_delta(content));
|
|
282
|
+
}
|
|
283
|
+
} catch (err) {
|
|
284
|
+
if (process.env.SWEET_DEBUG) console.debug('[native-sparse-gram] extractSparseGramDeltaRecord failed:', err.message);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
return fallbackSparseGramExtraction(content);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
export function extractSparseGramRequiredGrams(sparseGramIndex, literals) {
|
|
291
|
+
try {
|
|
292
|
+
const extractor = sparseGramIndex?.extractLiteralCoveringGrams || sparseGramIndex?.extract_literal_covering_grams;
|
|
293
|
+
if (typeof extractor === 'function') {
|
|
294
|
+
const result = extractor.call(sparseGramIndex, literals);
|
|
295
|
+
return {
|
|
296
|
+
eligible: !!result?.eligible,
|
|
297
|
+
grams: Array.isArray(result?.grams) ? [...new Set(result.grams.map(String))].sort() : [],
|
|
298
|
+
weightsId: result?.weightsId || result?.weights_id || null,
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
} catch (err) {
|
|
302
|
+
if (process.env.SWEET_DEBUG) console.debug('[native-sparse-gram] extractSparseGramRequiredGrams failed:', err.message);
|
|
303
|
+
return null;
|
|
304
|
+
}
|
|
305
|
+
return fallbackRequiredGrams(literals);
|
|
306
|
+
}
|
|
307
|
+
|
|
66
308
|
export function extractRegexLiteralClauses(regex) {
|
|
67
309
|
const addon = loadAddon();
|
|
68
310
|
if (!addon) return null;
|
|
@@ -254,4 +496,3 @@ export function searchFull(sparseGramIndex, clauses, regex, projectRoot, opts =
|
|
|
254
496
|
return null;
|
|
255
497
|
}
|
|
256
498
|
}
|
|
257
|
-
|
|
@@ -11,10 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
import { existsSync } from 'fs';
|
|
14
|
-
import {
|
|
15
|
-
import { createRequire } from 'module';
|
|
16
|
-
|
|
17
|
-
const require = createRequire(import.meta.url);
|
|
14
|
+
import { loadNativeAddon } from './native-resolver.js';
|
|
18
15
|
|
|
19
16
|
let _addon = null;
|
|
20
17
|
let _addonLoaded = false;
|
|
@@ -22,17 +19,11 @@ let _addonLoaded = false;
|
|
|
22
19
|
function loadAddon() {
|
|
23
20
|
if (_addonLoaded) return _addon;
|
|
24
21
|
_addonLoaded = true;
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
_addon = mod;
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
} catch {
|
|
34
|
-
// Native addon not available
|
|
35
|
-
}
|
|
22
|
+
// CUDA-preferred with CPU fallback (see loadNativeAddon): on a no-GPU box the
|
|
23
|
+
// CUDA addon throws on load (libcuda absent) and we fall back to the plain
|
|
24
|
+
// CPU addon so local tokenization (→ ORT-INT8 indexing) keeps working.
|
|
25
|
+
const res = loadNativeAddon({ validate: (m) => typeof m.NativeTokenizer?.fromFile === 'function' });
|
|
26
|
+
if (res) _addon = res.mod;
|
|
36
27
|
return _addon;
|
|
37
28
|
}
|
|
38
29
|
|
|
@@ -17,8 +17,7 @@
|
|
|
17
17
|
import { fileURLToPath } from 'url';
|
|
18
18
|
import { dirname, join } from 'path';
|
|
19
19
|
import { readFileSync, existsSync } from 'fs';
|
|
20
|
-
import {
|
|
21
|
-
import { resolveNativeAddon } from './native-resolver.js';
|
|
20
|
+
import { loadNativeAddon } from './native-resolver.js';
|
|
22
21
|
|
|
23
22
|
const DATA_OFFSET = 0; // SIMD popcount needs no LUT
|
|
24
23
|
|
|
@@ -46,17 +45,12 @@ async function initWasm() {
|
|
|
46
45
|
initPromise = (async () => {
|
|
47
46
|
try {
|
|
48
47
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
//
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
nativeMaxsim = require(addonPath);
|
|
56
|
-
}
|
|
57
|
-
} catch {
|
|
58
|
-
// Native not available — fall through to WASM
|
|
59
|
-
}
|
|
48
|
+
|
|
49
|
+
// Tier 1: native Rust addon (rayon + NEON/AVX2 SIMD). CUDA-preferred with
|
|
50
|
+
// CPU fallback (see loadNativeAddon) — a CUDA addon that can't load on a
|
|
51
|
+
// no-GPU box falls back to the plain CPU addon; otherwise WASM (Tier 2).
|
|
52
|
+
const nativeRes = loadNativeAddon();
|
|
53
|
+
if (nativeRes) nativeMaxsim = nativeRes.mod;
|
|
60
54
|
|
|
61
55
|
// Tier 2a: Load hand-assembled SIMD distance WASM
|
|
62
56
|
const wasmPath = join(__dirname, 'simd-distance.wasm');
|
|
@@ -79,11 +73,11 @@ async function initWasm() {
|
|
|
79
73
|
initDone = true;
|
|
80
74
|
|
|
81
75
|
if (nativeMaxsim) {
|
|
82
|
-
console.
|
|
76
|
+
console.error('[MaxSim] Tier 1: Native Rust + Rayon (parallel SIMD)');
|
|
83
77
|
} else if (maxsimExports || wasmExports?.maxsim_f32) {
|
|
84
|
-
console.
|
|
78
|
+
console.error('[MaxSim] Tier 2: WASM SIMD f32x4');
|
|
85
79
|
} else {
|
|
86
|
-
console.
|
|
80
|
+
console.error('[MaxSim] Tier 3: JS fallback');
|
|
87
81
|
}
|
|
88
82
|
|
|
89
83
|
return true;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-only sparse-gram delta helpers for query-time overlay resolution.
|
|
3
|
+
*
|
|
4
|
+
* The reconcile writer lives under incremental-indexing. Search only needs to
|
|
5
|
+
* resolve the latest append-only delta record per file, so that read contract
|
|
6
|
+
* belongs in infrastructure instead of importing the writer bounded context.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
|
|
12
|
+
export const SPARSE_DELTA_DIR_SUFFIX = '.deltas';
|
|
13
|
+
export const SPARSE_DELTA_FILE_EXT = '.ssgrmdelta';
|
|
14
|
+
|
|
15
|
+
function deltaDirFor(baseArtifactPath) {
|
|
16
|
+
return baseArtifactPath + SPARSE_DELTA_DIR_SUFFIX;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function parseDeltaSegment(baseArtifactPath, segmentPath, maxEpoch) {
|
|
20
|
+
if (typeof segmentPath !== 'string' || !segmentPath.endsWith(SPARSE_DELTA_FILE_EXT)) return null;
|
|
21
|
+
const deltaRoot = path.resolve(deltaDirFor(baseArtifactPath));
|
|
22
|
+
const resolved = path.isAbsolute(segmentPath)
|
|
23
|
+
? segmentPath
|
|
24
|
+
: path.join(path.dirname(baseArtifactPath), segmentPath);
|
|
25
|
+
const normalized = path.resolve(resolved);
|
|
26
|
+
if (normalized !== deltaRoot && !normalized.startsWith(deltaRoot + path.sep)) return null;
|
|
27
|
+
const match = path.basename(normalized).match(/^(\d+)-(\d+)\.ssgrmdelta$/);
|
|
28
|
+
if (!match) return null;
|
|
29
|
+
const epoch = Number(match[1]);
|
|
30
|
+
if (epoch > maxEpoch) return null;
|
|
31
|
+
if (!fs.existsSync(normalized)) return null;
|
|
32
|
+
return {
|
|
33
|
+
path: normalized,
|
|
34
|
+
epoch,
|
|
35
|
+
seq: Number(match[2]),
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function listSparseGramDeltaSegments(baseArtifactPath, opts = {}) {
|
|
40
|
+
const maxEpoch = Number.isInteger(opts.maxEpoch) ? opts.maxEpoch : Infinity;
|
|
41
|
+
if (Array.isArray(opts.segments)) {
|
|
42
|
+
return opts.segments
|
|
43
|
+
.map((segmentPath) => parseDeltaSegment(baseArtifactPath, segmentPath, maxEpoch))
|
|
44
|
+
.filter(Boolean)
|
|
45
|
+
.sort((a, b) => (a.epoch - b.epoch) || (a.seq - b.seq));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const dir = deltaDirFor(baseArtifactPath);
|
|
49
|
+
if (!fs.existsSync(dir)) return [];
|
|
50
|
+
const out = [];
|
|
51
|
+
for (const name of fs.readdirSync(dir)) {
|
|
52
|
+
const segment = parseDeltaSegment(baseArtifactPath, path.join(dir, name), maxEpoch);
|
|
53
|
+
if (segment) out.push(segment);
|
|
54
|
+
}
|
|
55
|
+
return out.sort((a, b) => (a.epoch - b.epoch) || (a.seq - b.seq));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function resolveLatestSparseGramDeltaRecords(baseArtifactPath, opts = {}) {
|
|
59
|
+
const latest = new Map();
|
|
60
|
+
for (const seg of listSparseGramDeltaSegments(baseArtifactPath, opts)) {
|
|
61
|
+
const raw = fs.readFileSync(seg.path, 'utf-8');
|
|
62
|
+
for (const line of raw.split('\n')) {
|
|
63
|
+
const trimmed = line.trim();
|
|
64
|
+
if (!trimmed) continue;
|
|
65
|
+
let record;
|
|
66
|
+
try {
|
|
67
|
+
record = JSON.parse(trimmed);
|
|
68
|
+
} catch {
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
if (!record.fileId) continue;
|
|
72
|
+
latest.set(record.fileId, { record, segmentPath: seg.path, epoch: seg.epoch });
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return latest;
|
|
76
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
|
|
3
|
+
const ACTIVE = 'stale_since IS NULL';
|
|
4
|
+
|
|
5
|
+
function escapeRegExp(text) {
|
|
6
|
+
return String(text).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
function rowToEntity(row) {
|
|
10
|
+
return {
|
|
11
|
+
id: row.id,
|
|
12
|
+
name: row.name,
|
|
13
|
+
type: row.type,
|
|
14
|
+
filePath: row.file_path,
|
|
15
|
+
startLine: row.start_line,
|
|
16
|
+
endLine: row.end_line,
|
|
17
|
+
signature: row.signature || '',
|
|
18
|
+
summary: row.summary || '',
|
|
19
|
+
parentClass: row.parent_class || null,
|
|
20
|
+
package: row.package || null,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function moduleStem(filePath) {
|
|
25
|
+
return path.basename(String(filePath || ''), path.extname(String(filePath || '')));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function moduleLooksRelated(moduleName, stem) {
|
|
29
|
+
const normalized = String(moduleName || '').replace(/\\/g, '/').replace(/\.[cm]?[jt]sx?$/, '');
|
|
30
|
+
return normalized === stem || normalized.endsWith(`/${stem}`);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function parseSpecifiers(specs, targetName, aliases) {
|
|
34
|
+
for (const rawPart of specs.split(',')) {
|
|
35
|
+
const part = rawPart.trim();
|
|
36
|
+
const colon = part.match(new RegExp(`^${escapeRegExp(targetName)}\\s*:\\s*([A-Za-z_$][\\w$]*)$`));
|
|
37
|
+
const asAlias = part.match(new RegExp(`^${escapeRegExp(targetName)}\\s+as\\s+([A-Za-z_$][\\w$]*)$`));
|
|
38
|
+
if (colon) aliases.add(colon[1]);
|
|
39
|
+
else if (asAlias) aliases.add(asAlias[1]);
|
|
40
|
+
else if (part === targetName) aliases.add(targetName);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function extractAliases(text, target) {
|
|
45
|
+
const aliases = new Set();
|
|
46
|
+
const stem = moduleStem(target.filePath);
|
|
47
|
+
for (const line of String(text || '').split('\n')) {
|
|
48
|
+
if (!line.includes(target.name) || !line.includes(stem)) continue;
|
|
49
|
+
const cjs = line.match(/\{([^}]+)\}\s*=\s*require\(['"]([^'"]+)['"]\)/);
|
|
50
|
+
if (cjs && moduleLooksRelated(cjs[2], stem)) parseSpecifiers(cjs[1], target.name, aliases);
|
|
51
|
+
const esm = line.match(/import\s+\{([^}]+)\}\s+from\s+['"]([^'"]+)['"]/);
|
|
52
|
+
if (esm && moduleLooksRelated(esm[2], stem)) parseSpecifiers(esm[1], target.name, aliases);
|
|
53
|
+
const prop = line.match(new RegExp(`(?:const|let|var)\\s+([A-Za-z_$][\\w$]*)\\s*=\\s*require\\(['"]([^'"]+)['"]\\)\\.${escapeRegExp(target.name)}\\b`));
|
|
54
|
+
if (prop && moduleLooksRelated(prop[2], stem)) aliases.add(prop[1]);
|
|
55
|
+
}
|
|
56
|
+
return [...aliases];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function lineOfIndex(text, index) {
|
|
60
|
+
let line = 1;
|
|
61
|
+
for (let i = 0; i < index; i++) if (text.charCodeAt(i) === 10) line++;
|
|
62
|
+
return line;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function findAliasCallers({
|
|
66
|
+
db,
|
|
67
|
+
target,
|
|
68
|
+
readFileRange,
|
|
69
|
+
limit = 40,
|
|
70
|
+
entityVisibilitySql: entitySql = ACTIVE,
|
|
71
|
+
entityVisibilityParams: entityParams = [],
|
|
72
|
+
mapEntity = rowToEntity,
|
|
73
|
+
}) {
|
|
74
|
+
if (!db || !target?.filePath || !target?.name) return [];
|
|
75
|
+
const files = db.prepare(`
|
|
76
|
+
SELECT DISTINCT file_path
|
|
77
|
+
FROM entities
|
|
78
|
+
WHERE ${entitySql} AND file_path IS NOT NULL
|
|
79
|
+
ORDER BY CASE WHEN file_path LIKE '%/test/%' OR file_path LIKE 'test/%' OR file_path LIKE 'tests/%' THEN 1 ELSE 0 END, file_path
|
|
80
|
+
LIMIT 1000
|
|
81
|
+
`).all(...entityParams);
|
|
82
|
+
const entityAtLine = db.prepare(`
|
|
83
|
+
SELECT id, name, type, file_path, start_line, end_line, signature, summary, parent_class, package
|
|
84
|
+
FROM entities
|
|
85
|
+
WHERE ${entitySql} AND file_path = ? AND start_line <= ? AND end_line >= ?
|
|
86
|
+
ORDER BY (end_line - start_line) ASC
|
|
87
|
+
LIMIT 1
|
|
88
|
+
`);
|
|
89
|
+
const out = [];
|
|
90
|
+
const seen = new Set();
|
|
91
|
+
for (const { file_path: filePath } of files) {
|
|
92
|
+
if (filePath === target.filePath) continue;
|
|
93
|
+
const text = readFileRange(filePath, 1, 20000);
|
|
94
|
+
const aliases = extractAliases(text, target);
|
|
95
|
+
const patterns = aliases.map(alias => ({
|
|
96
|
+
targetName: alias,
|
|
97
|
+
re: new RegExp(`(?<![\\w$])${escapeRegExp(alias)}\\s*\\(`, 'g'),
|
|
98
|
+
}));
|
|
99
|
+
if (/\.rs$/.test(target.filePath)) {
|
|
100
|
+
patterns.push({
|
|
101
|
+
targetName: `::${target.name}`,
|
|
102
|
+
re: new RegExp(`\\b[A-Za-z_][\\w]*::${escapeRegExp(target.name)}\\s*\\(`, 'g'),
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
if (!patterns.length) continue;
|
|
106
|
+
for (const pattern of patterns) {
|
|
107
|
+
const re = pattern.re;
|
|
108
|
+
for (const match of text.matchAll(re)) {
|
|
109
|
+
const line = lineOfIndex(text, match.index || 0);
|
|
110
|
+
const entity = entityAtLine.get(...entityParams, filePath, line, line);
|
|
111
|
+
if (!entity || entity.id === target.id) continue;
|
|
112
|
+
const targetName = match[0].replace(/\s*\($/, '') || pattern.targetName;
|
|
113
|
+
const key = `${entity.id}:${line}:${targetName}`;
|
|
114
|
+
if (seen.has(key)) continue;
|
|
115
|
+
seen.add(key);
|
|
116
|
+
out.push({ ...mapEntity(entity), relationship: 'calls', contextLine: line, targetName, weight: 0.82 });
|
|
117
|
+
if (out.length >= limit) return out;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return out;
|
|
122
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
function tokens(text) {
|
|
2
|
+
return [...new Set(String(text || '').toLowerCase().match(/[a-z_][a-z0-9_]{2,}/g) || [])];
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
function wantsImplementation(hintTokens) {
|
|
6
|
+
if (hintTokens.includes('wrapper')) return false;
|
|
7
|
+
return hintTokens.some(t => ['callee', 'callees', 'downstream', 'helper', 'helpers', 'conversion', 'implementation'].includes(t));
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function delegatesSameName(candidate, code) {
|
|
11
|
+
const name = String(candidate.name || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
12
|
+
return new RegExp(`\\.\\s*${name}\\s*\\(`).test(code);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function scoreCandidate(candidate, hintTokens, readFileRange) {
|
|
16
|
+
if (!hintTokens.length) return 0;
|
|
17
|
+
const code = readFileRange(candidate.filePath, candidate.startLine, candidate.endLine) || '';
|
|
18
|
+
const hay = `${candidate.name} ${candidate.type} ${candidate.filePath} ${candidate.signature} ${candidate.summary} ${code}`.toLowerCase();
|
|
19
|
+
let hits = 0;
|
|
20
|
+
for (const tok of hintTokens) if (hay.includes(tok)) hits++;
|
|
21
|
+
let score = hits / hintTokens.length;
|
|
22
|
+
if (wantsImplementation(hintTokens) && delegatesSameName(candidate, code)) score -= 0.35;
|
|
23
|
+
return score;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function rankStructuralCandidates(candidates, { queryHint, readFileRange }) {
|
|
27
|
+
const hintTokens = tokens(queryHint);
|
|
28
|
+
if (!hintTokens.length || candidates.length < 2) return candidates;
|
|
29
|
+
return candidates.map((candidate, index) => ({
|
|
30
|
+
candidate,
|
|
31
|
+
index,
|
|
32
|
+
score: scoreCandidate(candidate, hintTokens, readFileRange),
|
|
33
|
+
})).sort((a, b) => (b.score - a.score) || (a.index - b.index)).map(x => x.candidate);
|
|
34
|
+
}
|