sweet-search 2.5.2 → 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/cli.js +24 -3
- package/core/graph/graph-expansion.js +215 -36
- package/core/graph/graph-extractor.js +196 -11
- package/core/graph/graph-search.js +395 -92
- package/core/graph/hcgs-generator.js +2 -1
- package/core/graph/index.js +2 -0
- package/core/graph/repo-map.js +28 -6
- package/core/graph/structural-answer-cues.js +168 -0
- package/core/graph/structural-callsite-hints.js +40 -0
- package/core/graph/structural-context-format.js +40 -0
- package/core/graph/structural-context.js +450 -0
- package/core/graph/structural-forward-push.js +156 -0
- package/core/graph/structural-header-context.js +19 -0
- package/core/graph/structural-importance.js +148 -0
- package/core/graph/structural-pagerank.js +197 -0
- package/core/graph/summary-manager.js +13 -9
- package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
- package/core/incremental-indexing/application/file-watcher.mjs +197 -0
- package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
- package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
- package/core/incremental-indexing/application/operator-cli.mjs +554 -0
- package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
- package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
- package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
- package/core/incremental-indexing/application/reconciler.mjs +477 -0
- package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
- package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
- package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
- package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
- package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
- package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
- package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
- package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
- package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
- package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
- package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
- package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
- package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
- package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
- package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
- package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
- package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
- package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
- package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
- package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
- package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
- package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
- package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
- package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
- package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
- package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
- package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
- package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
- package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
- package/core/indexing/admission-policy.js +139 -0
- package/core/indexing/artifact-builder.js +29 -12
- package/core/indexing/ast-chunker.js +107 -30
- package/core/indexing/dedup/exemplar-selector.js +19 -1
- package/core/indexing/gitignore-filter.js +223 -0
- package/core/indexing/incremental-tracker.js +99 -30
- package/core/indexing/index-codebase-v21.js +6 -5
- package/core/indexing/index-maintainer.mjs +698 -6
- package/core/indexing/indexer-ann.js +99 -15
- package/core/indexing/indexer-build.js +158 -45
- package/core/indexing/indexer-empty-baseline.js +80 -0
- package/core/indexing/indexer-manifest.js +66 -0
- package/core/indexing/indexer-phases.js +56 -23
- package/core/indexing/indexer-sparse-gram.js +54 -13
- package/core/indexing/indexer-utils.js +26 -208
- package/core/indexing/indexing-file-policy.js +32 -7
- package/core/indexing/maintainer-launcher.mjs +137 -0
- package/core/indexing/merkle-tracker.js +251 -244
- package/core/indexing/model-pool.js +46 -5
- package/core/infrastructure/code-graph-repository.js +758 -6
- package/core/infrastructure/code-graph-visibility.js +157 -0
- package/core/infrastructure/codebase-repository.js +100 -13
- package/core/infrastructure/config/search.js +1 -1
- package/core/infrastructure/db-utils.js +118 -0
- package/core/infrastructure/dedup-hashing.js +10 -13
- package/core/infrastructure/hardware-capability.js +17 -7
- package/core/infrastructure/index.js +8 -2
- package/core/infrastructure/language-patterns/maps.js +4 -1
- package/core/infrastructure/language-patterns/registry-core.js +56 -17
- package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
- package/core/infrastructure/language-patterns.js +69 -0
- package/core/infrastructure/model-registry.js +20 -0
- package/core/infrastructure/native-inference.js +7 -12
- package/core/infrastructure/native-resolver.js +52 -37
- package/core/infrastructure/native-sparse-gram.js +261 -20
- package/core/infrastructure/native-tokenizer.js +6 -15
- package/core/infrastructure/simd-distance.js +10 -16
- package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
- package/core/infrastructure/structural-alias-resolver.js +122 -0
- package/core/infrastructure/structural-candidate-ranker.js +34 -0
- package/core/infrastructure/structural-context-repository.js +472 -0
- package/core/infrastructure/structural-context-utils.js +51 -0
- package/core/infrastructure/structural-graph-signals.js +121 -0
- package/core/infrastructure/structural-qualified-resolution.js +15 -0
- package/core/infrastructure/structural-source-definitions.js +100 -0
- package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
- package/core/infrastructure/tree-sitter-provider.js +811 -37
- package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
- package/core/query/query-router.js +55 -5
- package/core/ranking/file-kind-ranking.js +2192 -15
- package/core/ranking/late-interaction-index.js +87 -12
- package/core/search/cli-decoration.js +290 -0
- package/core/search/context-expander.js +988 -78
- package/core/search/index.js +1 -0
- package/core/search/output-policy.js +275 -0
- package/core/search/search-anchor.js +499 -0
- package/core/search/search-boost.js +93 -1
- package/core/search/search-cli.js +61 -204
- package/core/search/search-hybrid.js +250 -10
- package/core/search/search-pattern-chunks.js +57 -8
- package/core/search/search-pattern-planner.js +68 -9
- package/core/search/search-pattern-prefilter.js +30 -10
- package/core/search/search-pattern-ripgrep.js +40 -4
- package/core/search/search-pattern-sparse-overlay.js +256 -0
- package/core/search/search-pattern.js +117 -29
- package/core/search/search-postprocess.js +479 -5
- package/core/search/search-read-semantic.js +260 -23
- package/core/search/search-read.js +82 -64
- package/core/search/search-reader-pin.js +71 -0
- package/core/search/search-rrf.js +279 -0
- package/core/search/search-semantic.js +110 -5
- package/core/search/search-server.js +130 -57
- package/core/search/search-trace.js +107 -0
- package/core/search/server-identity.js +93 -0
- package/core/search/session-daemon-prewarm.mjs +33 -10
- package/core/search/sweet-search.js +399 -7
- package/core/skills/sweet-index/SKILL.md +8 -6
- package/core/vector-store/binary-hnsw-index.js +194 -30
- package/core/vector-store/float-vector-store.js +96 -6
- package/core/vector-store/hnsw-index.js +220 -49
- package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
- package/eval/agent-read-workflows/bin/ss-find +15 -0
- package/eval/agent-read-workflows/bin/ss-grep +12 -0
- package/eval/agent-read-workflows/bin/ss-read +14 -0
- package/eval/agent-read-workflows/bin/ss-search +18 -0
- package/eval/agent-read-workflows/bin/ss-semantic +12 -0
- package/eval/agent-read-workflows/bin/ss-trace +11 -0
- package/mcp/read-tool.js +109 -0
- package/mcp/server.js +55 -15
- package/mcp/tool-handlers.js +14 -124
- package/mcp/trace-tool.js +81 -0
- package/package.json +25 -10
- package/scripts/hooks/intercept-read.mjs +55 -0
- package/scripts/hooks/remind-tools.mjs +40 -0
- package/scripts/init.js +698 -54
- package/scripts/inject-agent-instructions.js +431 -0
- package/scripts/install-prompt-reminders.js +188 -0
- package/scripts/install-tool-enforcement.js +220 -0
- package/scripts/smoke-test.js +12 -9
- package/scripts/uninstall.js +276 -18
- package/scripts/write-claude-rules.js +110 -0
|
@@ -9,12 +9,13 @@
|
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
11
|
import { readFileSync } from 'fs';
|
|
12
|
+
import path from 'path';
|
|
12
13
|
import { SEISMIC_CONFIG, DB_PATHS } from '../infrastructure/config/index.js';
|
|
13
14
|
import { expandResults } from '../graph/graph-expansion.js';
|
|
14
15
|
import { int8CosineSimilarity } from '../embedding/embedding-service.js';
|
|
15
16
|
import { QualityScorer } from '../ranking/quality-scorer.js';
|
|
16
17
|
import { classifyIntent, getIntentPolicy } from '../query/intent-router.js';
|
|
17
|
-
import { applyFileKindRanking, classifyFileKindIntent } from '../ranking/file-kind-ranking.js';
|
|
18
|
+
import { applyFileKindRanking, applyResultDemotions, classifyFileKindIntent } from '../ranking/file-kind-ranking.js';
|
|
18
19
|
import { recordQueryTelemetry } from '../embedding/embedding-cache.js';
|
|
19
20
|
import { expandAliases } from './dedup/sibling-expander.js';
|
|
20
21
|
|
|
@@ -34,6 +35,353 @@ export function minMaxNormalize(values) {
|
|
|
34
35
|
// for telemetry purposes. Derived empirically: FTS5 page-cache hits typically
|
|
35
36
|
// complete in <2ms; 5ms gives headroom for slow I/O without inflating miss rates.
|
|
36
37
|
const LEXICAL_HIT_THRESHOLD_MS = 5;
|
|
38
|
+
const QUERY_TEXT_RANKING_WEIGHT = 0.75;
|
|
39
|
+
const QUERY_TEXT_RANKING_WINDOW = 20;
|
|
40
|
+
const QUERY_TEXT_MIN_AGREEMENT = 0.5;
|
|
41
|
+
const QUERY_TEXT_MAX_CHARS = 12000;
|
|
42
|
+
const FULL_VECTOR_RESCORE_WINDOW = 20;
|
|
43
|
+
const FULL_VECTOR_RESCORE_WEIGHT = 0.80;
|
|
44
|
+
// After LI/MaxSim rerank, blend dense full-vector similarity. Must match
|
|
45
|
+
// LATE_INTERACTION_CONFIG.blendWeight (ranking.js) so agent + bench agree
|
|
46
|
+
// without env overrides — calibrated on GCSN dev/held-out (seed=42 splits).
|
|
47
|
+
const FULL_VECTOR_LI_RESCORE_WEIGHT = 0.3;
|
|
48
|
+
const FULL_VECTOR_EXACT_TEXT_WEIGHT = 0.20;
|
|
49
|
+
const QUERY_TEXT_FILE_CACHE = new Map();
|
|
50
|
+
const QUERY_TEXT_STOPWORDS = new Set([
|
|
51
|
+
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'can', 'could',
|
|
52
|
+
'did', 'do', 'does', 'for', 'from', 'how', 'i', 'in', 'into', 'is',
|
|
53
|
+
'it', 'of', 'on', 'or', 'should', 'the', 'to', 'was', 'were', 'what',
|
|
54
|
+
'when', 'where', 'with', 'you', 'your',
|
|
55
|
+
]);
|
|
56
|
+
|
|
57
|
+
function hasAblation(ablations, name) {
|
|
58
|
+
return ablations instanceof Set
|
|
59
|
+
? ablations.has(name)
|
|
60
|
+
: Array.isArray(ablations) && ablations.includes(name);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Per-stage profiling hooks. No-op unless `globalThis.__stageTimings` is set
|
|
64
|
+
// by the profiler (scripts/profile-search-stages.mjs). Used to attribute the
|
|
65
|
+
// "unaccounted" portion of post-retrieval wall time to specific sub-stages.
|
|
66
|
+
function __ptStart() {
|
|
67
|
+
return globalThis.__stageTimings ? performance.now() : null;
|
|
68
|
+
}
|
|
69
|
+
function __ptEnd(stage, t0) {
|
|
70
|
+
if (t0 == null || !globalThis.__stageTimings) return;
|
|
71
|
+
const ms = performance.now() - t0;
|
|
72
|
+
const buf = globalThis.__stageTimings;
|
|
73
|
+
(buf[stage] = buf[stage] || []).push(ms);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function envNumber(name, fallback, min = 0, max = Infinity) {
|
|
77
|
+
const value = process.env[name];
|
|
78
|
+
if (value == null || value === '') return fallback;
|
|
79
|
+
const parsed = Number.parseFloat(value);
|
|
80
|
+
return Number.isFinite(parsed) && parsed >= min && parsed <= max ? parsed : fallback;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function resultFileKey(result) {
|
|
84
|
+
return result?.file
|
|
85
|
+
|| result?.file_path
|
|
86
|
+
|| result?.path
|
|
87
|
+
|| result?.metadata?.file
|
|
88
|
+
|| result?.metadata?.file_path
|
|
89
|
+
|| result?.metadata?.path
|
|
90
|
+
|| '';
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function queryTextRankingOff() {
|
|
94
|
+
return process.env.SWEET_SEARCH_QUERY_TEXT_RANKING === '0'
|
|
95
|
+
|| process.env.SWEET_SEARCH_QUERY_TEXT_RANKING === 'false';
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function adaptiveLegacyLiEnabled() {
|
|
99
|
+
const raw = process.env.SWEET_SEARCH_ADAPTIVE_LI_RERANK;
|
|
100
|
+
if (raw == null || raw === '') return true;
|
|
101
|
+
return raw === '1' || raw === 'true';
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function shouldRunAdaptiveLegacyLi(results) {
|
|
105
|
+
if (!adaptiveLegacyLiEnabled()) return false;
|
|
106
|
+
if (!Array.isArray(results) || results.length < 2) return false;
|
|
107
|
+
const threshold = envNumber('SWEET_SEARCH_ADAPTIVE_LI_MARGIN', 0.03, 0, 1);
|
|
108
|
+
const first = typeof results[0]?.score === 'number' ? results[0].score : 0;
|
|
109
|
+
const second = typeof results[1]?.score === 'number' ? results[1].score : 0;
|
|
110
|
+
if (!Number.isFinite(first) || !Number.isFinite(second)) return false;
|
|
111
|
+
return (first - second) <= threshold;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function normalizeForQueryText(value) {
|
|
115
|
+
return String(value || '')
|
|
116
|
+
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
|
117
|
+
.toLowerCase()
|
|
118
|
+
.replace(/[^a-z0-9]+/g, ' ')
|
|
119
|
+
.trim();
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function queryTextTerms(query) {
|
|
123
|
+
const terms = normalizeForQueryText(query).split(/\s+/).filter(Boolean);
|
|
124
|
+
const unique = [];
|
|
125
|
+
const seen = new Set();
|
|
126
|
+
for (const term of terms) {
|
|
127
|
+
if (term.length < 2 || QUERY_TEXT_STOPWORDS.has(term) || seen.has(term)) continue;
|
|
128
|
+
seen.add(term);
|
|
129
|
+
unique.push(term);
|
|
130
|
+
}
|
|
131
|
+
return unique;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function safeCandidatePath(projectRoot, file) {
|
|
135
|
+
if (!projectRoot || !file || path.isAbsolute(file) || file.includes('\0')) return null;
|
|
136
|
+
const root = path.resolve(projectRoot);
|
|
137
|
+
const resolved = path.resolve(root, file);
|
|
138
|
+
if (resolved !== root && !resolved.startsWith(root + path.sep)) return null;
|
|
139
|
+
return resolved;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function readCandidateSpan(projectRoot, result) {
|
|
143
|
+
const file = resultFileKey(result);
|
|
144
|
+
const absPath = safeCandidatePath(projectRoot, file);
|
|
145
|
+
if (!absPath) return '';
|
|
146
|
+
|
|
147
|
+
const cacheKey = `${projectRoot}\0${file}`;
|
|
148
|
+
let content = QUERY_TEXT_FILE_CACHE.get(cacheKey);
|
|
149
|
+
if (content == null) {
|
|
150
|
+
try {
|
|
151
|
+
content = readFileSync(absPath, 'utf8');
|
|
152
|
+
} catch {
|
|
153
|
+
content = '';
|
|
154
|
+
}
|
|
155
|
+
QUERY_TEXT_FILE_CACHE.set(cacheKey, content);
|
|
156
|
+
}
|
|
157
|
+
if (!content) return '';
|
|
158
|
+
|
|
159
|
+
const startLine = result?.startLine ?? result?.metadata?.startLine ?? null;
|
|
160
|
+
const endLine = result?.endLine ?? result?.metadata?.endLine ?? null;
|
|
161
|
+
if (startLine == null || endLine == null) return content.slice(0, QUERY_TEXT_MAX_CHARS);
|
|
162
|
+
|
|
163
|
+
const lines = content.split(/\r?\n/);
|
|
164
|
+
const start = Math.max(0, Number(startLine) - 9);
|
|
165
|
+
const end = Math.min(lines.length, Number(endLine));
|
|
166
|
+
if (!Number.isFinite(start) || !Number.isFinite(end) || end < start) return '';
|
|
167
|
+
return lines.slice(start, end).join('\n').slice(0, QUERY_TEXT_MAX_CHARS);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function queryTextAgreementScore(query, result, projectRoot) {
|
|
171
|
+
const terms = queryTextTerms(query);
|
|
172
|
+
if (terms.length === 0) return 0;
|
|
173
|
+
|
|
174
|
+
const text = normalizeForQueryText([
|
|
175
|
+
result?.name,
|
|
176
|
+
result?.type,
|
|
177
|
+
result?.signature,
|
|
178
|
+
result?.docComment,
|
|
179
|
+
result?.content,
|
|
180
|
+
result?.text,
|
|
181
|
+
resultFileKey(result),
|
|
182
|
+
readCandidateSpan(projectRoot, result),
|
|
183
|
+
].filter(Boolean).join('\n'));
|
|
184
|
+
if (!text) return 0;
|
|
185
|
+
|
|
186
|
+
const textTerms = new Set(text.split(/\s+/).filter(Boolean));
|
|
187
|
+
let matched = 0;
|
|
188
|
+
for (const term of terms) {
|
|
189
|
+
if (textTerms.has(term)) matched += 1;
|
|
190
|
+
else if (term.length >= 4 && text.includes(term)) matched += 0.5;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
let bigramMatches = 0;
|
|
194
|
+
const bigramTotal = Math.max(0, terms.length - 1);
|
|
195
|
+
for (let i = 0; i < terms.length - 1; i++) {
|
|
196
|
+
if (text.includes(`${terms[i]} ${terms[i + 1]}`)) bigramMatches++;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const coverage = matched / terms.length;
|
|
200
|
+
const exact = text.includes(normalizeForQueryText(query)) ? 1 : 0;
|
|
201
|
+
const bigrams = bigramTotal > 0 ? bigramMatches / bigramTotal : 0;
|
|
202
|
+
return Math.min(1, 0.65 * coverage + 0.25 * exact + 0.10 * bigrams);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
function hasExactQueryTextMatch(query, result, projectRoot) {
|
|
206
|
+
const normalizedQuery = normalizeForQueryText(query);
|
|
207
|
+
if (!normalizedQuery) return false;
|
|
208
|
+
const text = normalizeForQueryText([
|
|
209
|
+
result?.docComment,
|
|
210
|
+
result?.content,
|
|
211
|
+
result?.text,
|
|
212
|
+
readCandidateSpan(projectRoot, result),
|
|
213
|
+
].filter(Boolean).join('\n'));
|
|
214
|
+
return !!text && text.includes(normalizedQuery);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function applyQueryTextRanking(results, query, opts = {}) {
|
|
218
|
+
if (queryTextRankingOff()) return results;
|
|
219
|
+
if (hasAblation(opts.ablations, 'no-query-text-ranking')) return results;
|
|
220
|
+
if (!Array.isArray(results) || results.length < 3) return results;
|
|
221
|
+
|
|
222
|
+
const projectRoot = opts.projectRoot || process.env.SWEET_SEARCH_PROJECT_ROOT || process.cwd();
|
|
223
|
+
const window = Math.min(
|
|
224
|
+
results.length,
|
|
225
|
+
Math.max(3, opts.window ?? QUERY_TEXT_RANKING_WINDOW)
|
|
226
|
+
);
|
|
227
|
+
const weight = opts.weight ?? envNumber(
|
|
228
|
+
'SWEET_SEARCH_QUERY_TEXT_RANKING_WEIGHT',
|
|
229
|
+
QUERY_TEXT_RANKING_WEIGHT,
|
|
230
|
+
0,
|
|
231
|
+
2
|
|
232
|
+
);
|
|
233
|
+
const minAgreement = opts.minAgreement ?? envNumber(
|
|
234
|
+
'SWEET_SEARCH_QUERY_TEXT_MIN_AGREEMENT',
|
|
235
|
+
QUERY_TEXT_MIN_AGREEMENT,
|
|
236
|
+
0,
|
|
237
|
+
1
|
|
238
|
+
);
|
|
239
|
+
if (!(weight > 0)) return results;
|
|
240
|
+
|
|
241
|
+
let changed = false;
|
|
242
|
+
const reranked = results.slice(0, window).map((result, index) => {
|
|
243
|
+
const agreement = queryTextAgreementScore(query, result, projectRoot);
|
|
244
|
+
if (agreement < minAgreement) return { ...result, _queryTextOrigIndex: index };
|
|
245
|
+
changed = true;
|
|
246
|
+
const baseScore = typeof result.score === 'number' ? result.score : 0;
|
|
247
|
+
const mult = 1 + weight * agreement;
|
|
248
|
+
return {
|
|
249
|
+
...result,
|
|
250
|
+
score: baseScore * mult,
|
|
251
|
+
_queryTextScore: agreement,
|
|
252
|
+
_queryTextMult: mult,
|
|
253
|
+
_queryTextOrigScore: baseScore,
|
|
254
|
+
_queryTextOrigIndex: index,
|
|
255
|
+
};
|
|
256
|
+
});
|
|
257
|
+
if (!changed) return results;
|
|
258
|
+
|
|
259
|
+
reranked.sort((a, b) => {
|
|
260
|
+
const d = (b.score || 0) - (a.score || 0);
|
|
261
|
+
return d !== 0 ? d : a._queryTextOrigIndex - b._queryTextOrigIndex;
|
|
262
|
+
});
|
|
263
|
+
for (const result of reranked) delete result._queryTextOrigIndex;
|
|
264
|
+
return window === results.length ? reranked : reranked.concat(results.slice(window));
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function resultIdentity(result) {
|
|
268
|
+
return result?.id || result?.metadata?.id || null;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function dotProduct(a, b) {
|
|
272
|
+
const n = Math.min(a?.length || 0, b?.length || 0);
|
|
273
|
+
if (n === 0) return null;
|
|
274
|
+
let score = 0;
|
|
275
|
+
for (let i = 0; i < n; i++) score += a[i] * b[i];
|
|
276
|
+
return score;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function normalizeScore(value, min, max) {
|
|
280
|
+
if (!Number.isFinite(value)) return 0;
|
|
281
|
+
if (!(max > min)) return 0.5;
|
|
282
|
+
return Math.max(0, Math.min(1, (value - min) / (max - min)));
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function applyFullVectorRescore(results, opts = {}) {
|
|
286
|
+
if (hasAblation(opts.ablations, 'no-full-vector-rescore')) return results;
|
|
287
|
+
if (!Array.isArray(results) || results.length < 3) return results;
|
|
288
|
+
if (!opts.queryFloat || !opts.codebaseRepo?.getEmbeddingsByIds) return results;
|
|
289
|
+
|
|
290
|
+
const window = Math.min(
|
|
291
|
+
results.length,
|
|
292
|
+
Math.max(3, opts.window ?? FULL_VECTOR_RESCORE_WINDOW)
|
|
293
|
+
);
|
|
294
|
+
const ids = results.slice(0, window).map(resultIdentity).filter(Boolean);
|
|
295
|
+
if (ids.length === 0) return results;
|
|
296
|
+
|
|
297
|
+
const embeddings = opts.codebaseRepo.getEmbeddingsByIds(ids);
|
|
298
|
+
if (!embeddings || embeddings.size === 0) return results;
|
|
299
|
+
|
|
300
|
+
const scored = results.slice(0, window).map((result, index) => {
|
|
301
|
+
const id = resultIdentity(result);
|
|
302
|
+
const vector = id ? embeddings.get(id) : null;
|
|
303
|
+
const fullScore = vector ? dotProduct(opts.queryFloat, vector) : null;
|
|
304
|
+
return {
|
|
305
|
+
result,
|
|
306
|
+
index,
|
|
307
|
+
baseScore: typeof result.score === 'number' ? result.score : 0,
|
|
308
|
+
fullScore,
|
|
309
|
+
};
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
const withFullScore = scored.filter(item => Number.isFinite(item.fullScore));
|
|
313
|
+
if (withFullScore.length < 2) return results;
|
|
314
|
+
|
|
315
|
+
const baseValues = scored.map(item => item.baseScore);
|
|
316
|
+
const fullValues = withFullScore.map(item => item.fullScore);
|
|
317
|
+
const minBase = Math.min(...baseValues);
|
|
318
|
+
const maxBase = Math.max(...baseValues);
|
|
319
|
+
const minFull = Math.min(...fullValues);
|
|
320
|
+
const maxFull = Math.max(...fullValues);
|
|
321
|
+
const projectRoot = opts.projectRoot || process.env.SWEET_SEARCH_PROJECT_ROOT || process.cwd();
|
|
322
|
+
const exactTextMatch = results
|
|
323
|
+
.slice(0, window)
|
|
324
|
+
.some(result => hasExactQueryTextMatch(opts.query || '', result, projectRoot));
|
|
325
|
+
const liRescoreWeight = envNumber(
|
|
326
|
+
'SWEET_SEARCH_FULL_VECTOR_LI_RESCORE_WEIGHT',
|
|
327
|
+
envNumber('SWEET_SEARCH_FULL_VECTOR_RESCORE_WEIGHT', FULL_VECTOR_LI_RESCORE_WEIGHT, 0, 1),
|
|
328
|
+
0,
|
|
329
|
+
1
|
|
330
|
+
);
|
|
331
|
+
const weight = opts.weight ?? (exactTextMatch
|
|
332
|
+
? envNumber('SWEET_SEARCH_FULL_VECTOR_EXACT_TEXT_WEIGHT', FULL_VECTOR_EXACT_TEXT_WEIGHT, 0, 1)
|
|
333
|
+
: opts.lateInteractionApplied
|
|
334
|
+
? liRescoreWeight
|
|
335
|
+
: envNumber('SWEET_SEARCH_FULL_VECTOR_RESCORE_WEIGHT', FULL_VECTOR_RESCORE_WEIGHT, 0, 1));
|
|
336
|
+
|
|
337
|
+
const reranked = scored.map(item => {
|
|
338
|
+
if (!Number.isFinite(item.fullScore)) {
|
|
339
|
+
return { ...item.result, _fullVectorOrigIndex: item.index };
|
|
340
|
+
}
|
|
341
|
+
const baseNorm = normalizeScore(item.baseScore, minBase, maxBase);
|
|
342
|
+
const fullNorm = normalizeScore(item.fullScore, minFull, maxFull);
|
|
343
|
+
const blended = (1 - weight) * baseNorm + weight * fullNorm;
|
|
344
|
+
return {
|
|
345
|
+
...item.result,
|
|
346
|
+
score: blended,
|
|
347
|
+
_fullVectorScore: item.fullScore,
|
|
348
|
+
_fullVectorNorm: fullNorm,
|
|
349
|
+
_fullVectorOrigScore: item.baseScore,
|
|
350
|
+
_fullVectorOrigIndex: item.index,
|
|
351
|
+
};
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
reranked.sort((a, b) => {
|
|
355
|
+
const d = (b.score || 0) - (a.score || 0);
|
|
356
|
+
return d !== 0 ? d : a._fullVectorOrigIndex - b._fullVectorOrigIndex;
|
|
357
|
+
});
|
|
358
|
+
for (const result of reranked) delete result._fullVectorOrigIndex;
|
|
359
|
+
return window === results.length ? reranked : reranked.concat(results.slice(window));
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function promoteFileDiversity(results, opts = {}) {
|
|
363
|
+
if (!Array.isArray(results) || results.length < 3) return results;
|
|
364
|
+
if (hasAblation(opts.ablations, 'no-file-diversity')) return results;
|
|
365
|
+
|
|
366
|
+
const window = Math.min(results.length, Math.max(10, opts.window ?? results.length));
|
|
367
|
+
const head = results.slice(0, window);
|
|
368
|
+
const seen = new Set();
|
|
369
|
+
const unique = [];
|
|
370
|
+
const duplicates = [];
|
|
371
|
+
|
|
372
|
+
for (const result of head) {
|
|
373
|
+
const key = resultFileKey(result);
|
|
374
|
+
if (!key || !seen.has(key)) {
|
|
375
|
+
if (key) seen.add(key);
|
|
376
|
+
unique.push(result);
|
|
377
|
+
} else {
|
|
378
|
+
duplicates.push(result);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
if (duplicates.length === 0) return results;
|
|
383
|
+
return unique.concat(duplicates, results.slice(window));
|
|
384
|
+
}
|
|
37
385
|
|
|
38
386
|
// =============================================================================
|
|
39
387
|
// Post-retrieval processing
|
|
@@ -74,6 +422,13 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
74
422
|
effectiveGraphExpand,
|
|
75
423
|
intentPolicy,
|
|
76
424
|
start,
|
|
425
|
+
_entityKindCache,
|
|
426
|
+
_entityNameCache,
|
|
427
|
+
_resultTextCache,
|
|
428
|
+
_fullFileTextCache,
|
|
429
|
+
_isTestSupportCache,
|
|
430
|
+
_isTestChunkCache,
|
|
431
|
+
_fileKindCache,
|
|
77
432
|
} = searchContext;
|
|
78
433
|
|
|
79
434
|
// Merge semantic stats (embedding/rerank) into main stats for CostTracker.
|
|
@@ -165,6 +520,7 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
165
520
|
}
|
|
166
521
|
};
|
|
167
522
|
|
|
523
|
+
const __t_expand = __ptStart();
|
|
168
524
|
results = expandResults(graphDb, results, {
|
|
169
525
|
expandMode: effectiveGraphExpand,
|
|
170
526
|
adaptiveHop2,
|
|
@@ -173,16 +529,21 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
173
529
|
cosineSimilarity: int8CosineSimilarity,
|
|
174
530
|
codebaseDb: this.codebaseRepo,
|
|
175
531
|
readFileLines,
|
|
532
|
+
format: options.format,
|
|
533
|
+
manifestEpoch: this.graphSearch?.getManifestEpoch?.(),
|
|
176
534
|
...(intentEdgeTypes && !graphExpandOptions.edgeTypes ? { edgeTypes: intentEdgeTypes } : {}),
|
|
177
535
|
...graphExpandOptions,
|
|
178
536
|
});
|
|
537
|
+
__ptEnd('post:expandResults', __t_expand);
|
|
179
538
|
|
|
180
539
|
// Attach LI chunk ids to expanded entities so they can participate
|
|
181
540
|
// in the post-expansion MaxSim rerank pool. The graph stores entities
|
|
182
541
|
// (entity_id keyed by code-graph.db) while LI is keyed by chunk id;
|
|
183
542
|
// without this bridge expanded entries fall through hasTokens() and
|
|
184
543
|
// are appended to the result tail without ever competing for top-K.
|
|
544
|
+
const __t_attachIds = __ptStart();
|
|
185
545
|
const expandedAttached = attachChunkIdsToExpanded(results, this.codebaseRepo);
|
|
546
|
+
__ptEnd('post:attachChunkIdsToExpanded', __t_attachIds);
|
|
186
547
|
|
|
187
548
|
stats.graphExpansion = {
|
|
188
549
|
mode: effectiveGraphExpand,
|
|
@@ -227,6 +588,7 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
227
588
|
: null;
|
|
228
589
|
|
|
229
590
|
try {
|
|
591
|
+
const __t_cascade = __ptStart();
|
|
230
592
|
const { cascadedScore } = await import('../ranking/cascaded-scorer.js');
|
|
231
593
|
const cascadeResult = await cascadedScore(query, results, {
|
|
232
594
|
lateInteractionIndex: liIndex,
|
|
@@ -238,6 +600,7 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
238
600
|
lexicalConfident: false,
|
|
239
601
|
loadDocumentContent: this.loadDocumentContent.bind(this),
|
|
240
602
|
});
|
|
603
|
+
__ptEnd('post:cascadedScore', __t_cascade);
|
|
241
604
|
results = cascadeResult.results;
|
|
242
605
|
stats.cascade = cascadeResult.stats;
|
|
243
606
|
|
|
@@ -257,9 +620,15 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
257
620
|
// =========================================================================
|
|
258
621
|
// Late Interaction Reranking (legacy, flag OFF — post-expansion, Phase 6)
|
|
259
622
|
// =========================================================================
|
|
623
|
+
const agentFormats = new Set(['agent', 'agent_preview', 'agent_full', 'agent_full_xl']);
|
|
624
|
+
const allowLegacyLateInteraction = process.env.SWEET_SEARCH_LEGACY_LI_RERANK === '1'
|
|
625
|
+
|| agentFormats.has(options.format)
|
|
626
|
+
|| searchContext?.fromSearch !== true
|
|
627
|
+
|| shouldRunAdaptiveLegacyLi(results);
|
|
260
628
|
const shouldRunLateInteraction = this.hasLateInteractionIndex &&
|
|
261
629
|
(options.useLateInteraction ?? this.useLateInteraction) &&
|
|
262
630
|
!this.lateInteractionIndex.modelMismatch &&
|
|
631
|
+
allowLegacyLateInteraction &&
|
|
263
632
|
Array.isArray(results) && results.length > 0 &&
|
|
264
633
|
!isConfidentLexical;
|
|
265
634
|
|
|
@@ -330,12 +699,14 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
330
699
|
// =========================================================================
|
|
331
700
|
if (qualityWeight > 0 && Array.isArray(results) && results.length > 0) {
|
|
332
701
|
const qStart = Date.now();
|
|
702
|
+
const __t_quality = __ptStart();
|
|
333
703
|
if (!this._qualityScorer) {
|
|
334
704
|
this._qualityScorer = new QualityScorer({
|
|
335
705
|
dbPath: this.graphSearch?.dbPath || DB_PATHS.codeGraph,
|
|
336
706
|
});
|
|
337
707
|
}
|
|
338
708
|
results = this._qualityScorer.scoreResults(results);
|
|
709
|
+
__ptEnd('post:qualityScoring', __t_quality);
|
|
339
710
|
|
|
340
711
|
// Blend: final = (1 - w) * original + w * quality
|
|
341
712
|
const w = Math.max(0, Math.min(1, qualityWeight));
|
|
@@ -367,6 +738,7 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
367
738
|
// =========================================================================
|
|
368
739
|
// Apply intent policy — chunkTypeBoosts, maxResults, rerankerWeight
|
|
369
740
|
// =========================================================================
|
|
741
|
+
const __t_intentPolicy = __ptStart();
|
|
370
742
|
if (intentPolicy && Array.isArray(results) && results.length > 0) {
|
|
371
743
|
// (a) chunkTypeBoosts: Multiply result scores by per-chunk-type boost factors
|
|
372
744
|
if (intentPolicy.chunkTypeBoosts && Object.keys(intentPolicy.chunkTypeBoosts).length > 0) {
|
|
@@ -399,18 +771,34 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
399
771
|
results = results.slice(0, effectiveK);
|
|
400
772
|
}
|
|
401
773
|
}
|
|
774
|
+
__ptEnd('post:intentPolicy', __t_intentPolicy);
|
|
402
775
|
|
|
403
776
|
// =========================================================================
|
|
404
777
|
// Intent-aware file-kind ranking
|
|
405
778
|
// =========================================================================
|
|
406
|
-
// Soft-demote docs/tests/types files when the query is
|
|
407
|
-
// implementation-seeking AND the top-N window contains both
|
|
408
|
-
//
|
|
779
|
+
// Soft-demote docs/examples/tests/types/config files when the query is
|
|
780
|
+
// confidently implementation-seeking AND the top-N window contains both
|
|
781
|
+
// demotable and implementation candidates. No-op otherwise. Disable with
|
|
409
782
|
// SWEET_SEARCH_FILE_KIND_RANKING=0; tune SWEET_SEARCH_FILE_KIND_FACTOR.
|
|
410
783
|
if (Array.isArray(results) && results.length > 0) {
|
|
411
784
|
const fileKindIntent = classifyFileKindIntent(query);
|
|
412
785
|
const beforeTop = results[0];
|
|
413
|
-
const
|
|
786
|
+
const semanticLike = searchMode === 'hybrid' || searchMode === 'semantic'
|
|
787
|
+
|| stats.path === 'hybrid' || stats.path === 'semantic';
|
|
788
|
+
const isAgentFormat = options.format === 'agent';
|
|
789
|
+
const __t_fileKind = __ptStart();
|
|
790
|
+
const afterFK = applyFileKindRanking(results, {
|
|
791
|
+
intent: fileKindIntent,
|
|
792
|
+
...(semanticLike ? {
|
|
793
|
+
docFactor: 0.35,
|
|
794
|
+
testFactor: 0.35,
|
|
795
|
+
typeFactor: 0.70,
|
|
796
|
+
ancillaryFactor: 0.15,
|
|
797
|
+
tinyAncillaryFactor: 0.05,
|
|
798
|
+
} : {}),
|
|
799
|
+
_fileKindCache,
|
|
800
|
+
});
|
|
801
|
+
__ptEnd('post:applyFileKindRanking', __t_fileKind);
|
|
414
802
|
if (afterFK !== results) {
|
|
415
803
|
results = afterFK;
|
|
416
804
|
stats.fileKindRanking = {
|
|
@@ -424,6 +812,90 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
424
812
|
applied: false,
|
|
425
813
|
};
|
|
426
814
|
}
|
|
815
|
+
|
|
816
|
+
const beforeDemotionTop = results[0];
|
|
817
|
+
const __t_demotions = __ptStart();
|
|
818
|
+
const afterDemotions = applyResultDemotions(results, {
|
|
819
|
+
query,
|
|
820
|
+
ablations: options.ablations,
|
|
821
|
+
format: options.format,
|
|
822
|
+
projectRoot: this.projectRoot,
|
|
823
|
+
codeGraphRepo: this.codeGraphRepo,
|
|
824
|
+
_entityKindCache,
|
|
825
|
+
_entityNameCache,
|
|
826
|
+
_resultTextCache,
|
|
827
|
+
_fullFileTextCache,
|
|
828
|
+
_isTestSupportCache,
|
|
829
|
+
_isTestChunkCache,
|
|
830
|
+
_fileKindCache,
|
|
831
|
+
});
|
|
832
|
+
__ptEnd('post:applyResultDemotions', __t_demotions);
|
|
833
|
+
if (afterDemotions !== results) {
|
|
834
|
+
results = afterDemotions;
|
|
835
|
+
stats.resultDemotions = {
|
|
836
|
+
applied: true,
|
|
837
|
+
top1Changed: !!beforeDemotionTop && results[0] && (beforeDemotionTop !== results[0]),
|
|
838
|
+
};
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
const beforeQueryTextTop = results[0];
|
|
842
|
+
const __t_queryText = __ptStart();
|
|
843
|
+
const afterQueryTextRanking = semanticLike && !isAgentFormat
|
|
844
|
+
? applyQueryTextRanking(results, query, {
|
|
845
|
+
ablations: options.ablations,
|
|
846
|
+
projectRoot: this.projectRoot,
|
|
847
|
+
window: options.queryTextRankingWindow,
|
|
848
|
+
weight: options.queryTextRankingWeight,
|
|
849
|
+
})
|
|
850
|
+
: results;
|
|
851
|
+
__ptEnd('post:applyQueryTextRanking', __t_queryText);
|
|
852
|
+
if (afterQueryTextRanking !== results) {
|
|
853
|
+
results = afterQueryTextRanking;
|
|
854
|
+
stats.queryTextRanking = {
|
|
855
|
+
applied: true,
|
|
856
|
+
top1Changed: !!beforeQueryTextTop && results[0] && (beforeQueryTextTop !== results[0]),
|
|
857
|
+
};
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
const beforeFullVectorTop = results[0];
|
|
861
|
+
const __t_fullVec = __ptStart();
|
|
862
|
+
const afterFullVectorRescore = semanticLike && !isAgentFormat
|
|
863
|
+
? applyFullVectorRescore(results, {
|
|
864
|
+
ablations: options.ablations,
|
|
865
|
+
query,
|
|
866
|
+
queryFloat: semanticStats?.queryFloat,
|
|
867
|
+
codebaseRepo: this.codebaseRepo,
|
|
868
|
+
projectRoot: this.projectRoot,
|
|
869
|
+
window: options.fullVectorRescoreWindow,
|
|
870
|
+
weight: options.fullVectorRescoreWeight,
|
|
871
|
+
lateInteractionApplied: !!stats.lateInteraction && !stats.lateInteraction.error,
|
|
872
|
+
})
|
|
873
|
+
: results;
|
|
874
|
+
__ptEnd('post:applyFullVectorRescore', __t_fullVec);
|
|
875
|
+
if (afterFullVectorRescore !== results) {
|
|
876
|
+
results = afterFullVectorRescore;
|
|
877
|
+
stats.fullVectorRescore = {
|
|
878
|
+
applied: true,
|
|
879
|
+
top1Changed: !!beforeFullVectorTop && results[0] && (beforeFullVectorTop !== results[0]),
|
|
880
|
+
};
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
const beforeDiversityTop = results[0];
|
|
884
|
+
const __t_diversity = __ptStart();
|
|
885
|
+
const diversified = isAgentFormat
|
|
886
|
+
? results
|
|
887
|
+
: promoteFileDiversity(results, {
|
|
888
|
+
ablations: options.ablations,
|
|
889
|
+
window: options.fileDiversityWindow ?? results.length,
|
|
890
|
+
});
|
|
891
|
+
__ptEnd('post:promoteFileDiversity', __t_diversity);
|
|
892
|
+
if (diversified !== results) {
|
|
893
|
+
results = diversified;
|
|
894
|
+
stats.fileDiversity = {
|
|
895
|
+
applied: true,
|
|
896
|
+
top1Changed: !!beforeDiversityTop && results[0] && (beforeDiversityTop !== results[0]),
|
|
897
|
+
};
|
|
898
|
+
}
|
|
427
899
|
}
|
|
428
900
|
|
|
429
901
|
stats.total_ms = Date.now() - start;
|
|
@@ -443,7 +915,9 @@ export async function applyPostRetrieval(results, query, options, searchContext)
|
|
|
443
915
|
// every file matching a search — grouped under the exemplar as result.aliases.
|
|
444
916
|
if (Array.isArray(results) && results.length > 0 && this.codebaseRepo) {
|
|
445
917
|
try {
|
|
918
|
+
const __t_aliases = __ptStart();
|
|
446
919
|
const { stats: dedupStats } = expandAliases(results, this.codebaseRepo, query);
|
|
920
|
+
__ptEnd('post:expandAliases', __t_aliases);
|
|
447
921
|
if (dedupStats.exemplarsExpanded > 0) {
|
|
448
922
|
stats.dedupExpansion = dedupStats;
|
|
449
923
|
}
|