sweet-search 2.5.2 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/core/cli.js +24 -3
  2. package/core/graph/graph-expansion.js +215 -36
  3. package/core/graph/graph-extractor.js +196 -11
  4. package/core/graph/graph-search.js +395 -92
  5. package/core/graph/hcgs-generator.js +2 -1
  6. package/core/graph/index.js +2 -0
  7. package/core/graph/repo-map.js +28 -6
  8. package/core/graph/structural-answer-cues.js +168 -0
  9. package/core/graph/structural-callsite-hints.js +40 -0
  10. package/core/graph/structural-context-format.js +40 -0
  11. package/core/graph/structural-context.js +450 -0
  12. package/core/graph/structural-forward-push.js +156 -0
  13. package/core/graph/structural-header-context.js +19 -0
  14. package/core/graph/structural-importance.js +148 -0
  15. package/core/graph/structural-pagerank.js +197 -0
  16. package/core/graph/summary-manager.js +13 -9
  17. package/core/incremental-indexing/application/dirty-scan.mjs +236 -0
  18. package/core/incremental-indexing/application/file-watcher.mjs +197 -0
  19. package/core/incremental-indexing/application/maintenance-handlers.mjs +519 -0
  20. package/core/incremental-indexing/application/maintenance-worker.mjs +380 -0
  21. package/core/incremental-indexing/application/operator-cli.mjs +554 -0
  22. package/core/incremental-indexing/application/production-li-delta.mjs +192 -0
  23. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +107 -0
  24. package/core/incremental-indexing/application/production-reconciler.mjs +583 -0
  25. package/core/incremental-indexing/application/reconciler.mjs +477 -0
  26. package/core/incremental-indexing/application/tombstone-injector.mjs +148 -0
  27. package/core/incremental-indexing/domain/chunk-identity.mjs +260 -0
  28. package/core/incremental-indexing/domain/encoder-deps.mjs +193 -0
  29. package/core/incremental-indexing/domain/encoder-input.mjs +225 -0
  30. package/core/incremental-indexing/domain/interval-autotune.mjs +255 -0
  31. package/core/incremental-indexing/domain/reconcile-counters.mjs +149 -0
  32. package/core/incremental-indexing/domain/watermark-scheduler.mjs +239 -0
  33. package/core/incremental-indexing/infrastructure/artifact-temp-sweep.mjs +163 -0
  34. package/core/incremental-indexing/infrastructure/baseline-readiness.mjs +121 -0
  35. package/core/incremental-indexing/infrastructure/dirty-set.mjs +233 -0
  36. package/core/incremental-indexing/infrastructure/graph-gc.mjs +314 -0
  37. package/core/incremental-indexing/infrastructure/hashing.mjs +298 -0
  38. package/core/incremental-indexing/infrastructure/hcgs-invalidation.mjs +182 -0
  39. package/core/incremental-indexing/infrastructure/li-segment-merge.mjs +278 -0
  40. package/core/incremental-indexing/infrastructure/li-segment-state.mjs +173 -0
  41. package/core/incremental-indexing/infrastructure/lockfile.mjs +119 -0
  42. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +283 -0
  43. package/core/incremental-indexing/infrastructure/manifest.mjs +194 -0
  44. package/core/incremental-indexing/infrastructure/path-filter.mjs +190 -0
  45. package/core/incremental-indexing/infrastructure/reader-heartbeat.mjs +201 -0
  46. package/core/incremental-indexing/infrastructure/schema-migrations.mjs +257 -0
  47. package/core/incremental-indexing/infrastructure/sparse-gram-delta.mjs +335 -0
  48. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +176 -0
  49. package/core/incremental-indexing/infrastructure/staleness-display.mjs +105 -0
  50. package/core/incremental-indexing/infrastructure/tombstone-bitmap.mjs +234 -0
  51. package/core/incremental-indexing/infrastructure/vector-delta-writer.mjs +359 -0
  52. package/core/incremental-indexing/infrastructure/vector-gc.mjs +133 -0
  53. package/core/incremental-indexing/infrastructure/worktree-stamp.mjs +155 -0
  54. package/core/incremental-indexing/infrastructure/wsl2-detect.mjs +115 -0
  55. package/core/indexing/admission-policy.js +139 -0
  56. package/core/indexing/artifact-builder.js +29 -12
  57. package/core/indexing/ast-chunker.js +107 -30
  58. package/core/indexing/dedup/exemplar-selector.js +19 -1
  59. package/core/indexing/gitignore-filter.js +223 -0
  60. package/core/indexing/incremental-tracker.js +99 -30
  61. package/core/indexing/index-codebase-v21.js +6 -5
  62. package/core/indexing/index-maintainer.mjs +698 -6
  63. package/core/indexing/indexer-ann.js +99 -15
  64. package/core/indexing/indexer-build.js +158 -45
  65. package/core/indexing/indexer-empty-baseline.js +80 -0
  66. package/core/indexing/indexer-manifest.js +66 -0
  67. package/core/indexing/indexer-phases.js +56 -23
  68. package/core/indexing/indexer-sparse-gram.js +54 -13
  69. package/core/indexing/indexer-utils.js +26 -208
  70. package/core/indexing/indexing-file-policy.js +32 -7
  71. package/core/indexing/maintainer-launcher.mjs +137 -0
  72. package/core/indexing/merkle-tracker.js +251 -244
  73. package/core/indexing/model-pool.js +46 -5
  74. package/core/infrastructure/code-graph-repository.js +758 -6
  75. package/core/infrastructure/code-graph-visibility.js +157 -0
  76. package/core/infrastructure/codebase-repository.js +100 -13
  77. package/core/infrastructure/config/search.js +1 -1
  78. package/core/infrastructure/db-utils.js +118 -0
  79. package/core/infrastructure/dedup-hashing.js +10 -13
  80. package/core/infrastructure/hardware-capability.js +17 -7
  81. package/core/infrastructure/index.js +8 -2
  82. package/core/infrastructure/language-patterns/maps.js +4 -1
  83. package/core/infrastructure/language-patterns/registry-core.js +56 -17
  84. package/core/infrastructure/language-patterns/registry-object-oriented.js +12 -5
  85. package/core/infrastructure/language-patterns.js +69 -0
  86. package/core/infrastructure/model-registry.js +20 -0
  87. package/core/infrastructure/native-inference.js +7 -12
  88. package/core/infrastructure/native-resolver.js +52 -37
  89. package/core/infrastructure/native-sparse-gram.js +261 -20
  90. package/core/infrastructure/native-tokenizer.js +6 -15
  91. package/core/infrastructure/simd-distance.js +10 -16
  92. package/core/infrastructure/sparse-gram-delta-reader.js +76 -0
  93. package/core/infrastructure/structural-alias-resolver.js +122 -0
  94. package/core/infrastructure/structural-candidate-ranker.js +34 -0
  95. package/core/infrastructure/structural-context-repository.js +472 -0
  96. package/core/infrastructure/structural-context-utils.js +51 -0
  97. package/core/infrastructure/structural-graph-signals.js +121 -0
  98. package/core/infrastructure/structural-qualified-resolution.js +15 -0
  99. package/core/infrastructure/structural-source-definitions.js +100 -0
  100. package/core/infrastructure/tombstone-bitmap-reader.js +139 -0
  101. package/core/infrastructure/tree-sitter-provider.js +811 -37
  102. package/core/prompt-optimization/data/p7-final/sweet-search-system-prompt.md +50 -0
  103. package/core/query/query-router.js +55 -5
  104. package/core/ranking/file-kind-ranking.js +2192 -15
  105. package/core/ranking/late-interaction-index.js +87 -12
  106. package/core/search/cli-decoration.js +290 -0
  107. package/core/search/context-expander.js +988 -78
  108. package/core/search/index.js +1 -0
  109. package/core/search/output-policy.js +275 -0
  110. package/core/search/search-anchor.js +499 -0
  111. package/core/search/search-boost.js +93 -1
  112. package/core/search/search-cli.js +61 -204
  113. package/core/search/search-hybrid.js +250 -10
  114. package/core/search/search-pattern-chunks.js +57 -8
  115. package/core/search/search-pattern-planner.js +68 -9
  116. package/core/search/search-pattern-prefilter.js +30 -10
  117. package/core/search/search-pattern-ripgrep.js +40 -4
  118. package/core/search/search-pattern-sparse-overlay.js +256 -0
  119. package/core/search/search-pattern.js +117 -29
  120. package/core/search/search-postprocess.js +479 -5
  121. package/core/search/search-read-semantic.js +260 -23
  122. package/core/search/search-read.js +82 -64
  123. package/core/search/search-reader-pin.js +71 -0
  124. package/core/search/search-rrf.js +279 -0
  125. package/core/search/search-semantic.js +110 -5
  126. package/core/search/search-server.js +130 -57
  127. package/core/search/search-trace.js +107 -0
  128. package/core/search/server-identity.js +93 -0
  129. package/core/search/session-daemon-prewarm.mjs +33 -10
  130. package/core/search/sweet-search.js +399 -7
  131. package/core/skills/sweet-index/SKILL.md +8 -6
  132. package/core/vector-store/binary-hnsw-index.js +194 -30
  133. package/core/vector-store/float-vector-store.js +96 -6
  134. package/core/vector-store/hnsw-index.js +220 -49
  135. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +471 -0
  136. package/eval/agent-read-workflows/bin/ss-find +15 -0
  137. package/eval/agent-read-workflows/bin/ss-grep +12 -0
  138. package/eval/agent-read-workflows/bin/ss-read +14 -0
  139. package/eval/agent-read-workflows/bin/ss-search +18 -0
  140. package/eval/agent-read-workflows/bin/ss-semantic +12 -0
  141. package/eval/agent-read-workflows/bin/ss-trace +11 -0
  142. package/mcp/read-tool.js +109 -0
  143. package/mcp/server.js +55 -15
  144. package/mcp/tool-handlers.js +14 -124
  145. package/mcp/trace-tool.js +81 -0
  146. package/package.json +25 -10
  147. package/scripts/hooks/intercept-read.mjs +55 -0
  148. package/scripts/hooks/remind-tools.mjs +40 -0
  149. package/scripts/init.js +698 -54
  150. package/scripts/inject-agent-instructions.js +431 -0
  151. package/scripts/install-prompt-reminders.js +188 -0
  152. package/scripts/install-tool-enforcement.js +220 -0
  153. package/scripts/smoke-test.js +12 -9
  154. package/scripts/uninstall.js +276 -18
  155. package/scripts/write-claude-rules.js +110 -0
@@ -9,12 +9,13 @@
9
9
  */
10
10
 
11
11
  import { readFileSync } from 'fs';
12
+ import path from 'path';
12
13
  import { SEISMIC_CONFIG, DB_PATHS } from '../infrastructure/config/index.js';
13
14
  import { expandResults } from '../graph/graph-expansion.js';
14
15
  import { int8CosineSimilarity } from '../embedding/embedding-service.js';
15
16
  import { QualityScorer } from '../ranking/quality-scorer.js';
16
17
  import { classifyIntent, getIntentPolicy } from '../query/intent-router.js';
17
- import { applyFileKindRanking, classifyFileKindIntent } from '../ranking/file-kind-ranking.js';
18
+ import { applyFileKindRanking, applyResultDemotions, classifyFileKindIntent } from '../ranking/file-kind-ranking.js';
18
19
  import { recordQueryTelemetry } from '../embedding/embedding-cache.js';
19
20
  import { expandAliases } from './dedup/sibling-expander.js';
20
21
 
@@ -34,6 +35,353 @@ export function minMaxNormalize(values) {
34
35
  // for telemetry purposes. Derived empirically: FTS5 page-cache hits typically
35
36
  // complete in <2ms; 5ms gives headroom for slow I/O without inflating miss rates.
36
37
  const LEXICAL_HIT_THRESHOLD_MS = 5;
38
+ const QUERY_TEXT_RANKING_WEIGHT = 0.75;
39
+ const QUERY_TEXT_RANKING_WINDOW = 20;
40
+ const QUERY_TEXT_MIN_AGREEMENT = 0.5;
41
+ const QUERY_TEXT_MAX_CHARS = 12000;
42
+ const FULL_VECTOR_RESCORE_WINDOW = 20;
43
+ const FULL_VECTOR_RESCORE_WEIGHT = 0.80;
44
+ // After LI/MaxSim rerank, blend dense full-vector similarity. Must match
45
+ // LATE_INTERACTION_CONFIG.blendWeight (ranking.js) so agent + bench agree
46
+ // without env overrides — calibrated on GCSN dev/held-out (seed=42 splits).
47
+ const FULL_VECTOR_LI_RESCORE_WEIGHT = 0.3;
48
+ const FULL_VECTOR_EXACT_TEXT_WEIGHT = 0.20;
49
+ const QUERY_TEXT_FILE_CACHE = new Map();
50
+ const QUERY_TEXT_STOPWORDS = new Set([
51
+ 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'can', 'could',
52
+ 'did', 'do', 'does', 'for', 'from', 'how', 'i', 'in', 'into', 'is',
53
+ 'it', 'of', 'on', 'or', 'should', 'the', 'to', 'was', 'were', 'what',
54
+ 'when', 'where', 'with', 'you', 'your',
55
+ ]);
56
+
57
+ function hasAblation(ablations, name) {
58
+ return ablations instanceof Set
59
+ ? ablations.has(name)
60
+ : Array.isArray(ablations) && ablations.includes(name);
61
+ }
62
+
63
+ // Per-stage profiling hooks. No-op unless `globalThis.__stageTimings` is set
64
+ // by the profiler (scripts/profile-search-stages.mjs). Used to attribute the
65
+ // "unaccounted" portion of post-retrieval wall time to specific sub-stages.
66
+ function __ptStart() {
67
+ return globalThis.__stageTimings ? performance.now() : null;
68
+ }
69
+ function __ptEnd(stage, t0) {
70
+ if (t0 == null || !globalThis.__stageTimings) return;
71
+ const ms = performance.now() - t0;
72
+ const buf = globalThis.__stageTimings;
73
+ (buf[stage] = buf[stage] || []).push(ms);
74
+ }
75
+
76
+ function envNumber(name, fallback, min = 0, max = Infinity) {
77
+ const value = process.env[name];
78
+ if (value == null || value === '') return fallback;
79
+ const parsed = Number.parseFloat(value);
80
+ return Number.isFinite(parsed) && parsed >= min && parsed <= max ? parsed : fallback;
81
+ }
82
+
83
+ function resultFileKey(result) {
84
+ return result?.file
85
+ || result?.file_path
86
+ || result?.path
87
+ || result?.metadata?.file
88
+ || result?.metadata?.file_path
89
+ || result?.metadata?.path
90
+ || '';
91
+ }
92
+
93
+ function queryTextRankingOff() {
94
+ return process.env.SWEET_SEARCH_QUERY_TEXT_RANKING === '0'
95
+ || process.env.SWEET_SEARCH_QUERY_TEXT_RANKING === 'false';
96
+ }
97
+
98
+ function adaptiveLegacyLiEnabled() {
99
+ const raw = process.env.SWEET_SEARCH_ADAPTIVE_LI_RERANK;
100
+ if (raw == null || raw === '') return true;
101
+ return raw === '1' || raw === 'true';
102
+ }
103
+
104
+ function shouldRunAdaptiveLegacyLi(results) {
105
+ if (!adaptiveLegacyLiEnabled()) return false;
106
+ if (!Array.isArray(results) || results.length < 2) return false;
107
+ const threshold = envNumber('SWEET_SEARCH_ADAPTIVE_LI_MARGIN', 0.03, 0, 1);
108
+ const first = typeof results[0]?.score === 'number' ? results[0].score : 0;
109
+ const second = typeof results[1]?.score === 'number' ? results[1].score : 0;
110
+ if (!Number.isFinite(first) || !Number.isFinite(second)) return false;
111
+ return (first - second) <= threshold;
112
+ }
113
+
114
+ function normalizeForQueryText(value) {
115
+ return String(value || '')
116
+ .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
117
+ .toLowerCase()
118
+ .replace(/[^a-z0-9]+/g, ' ')
119
+ .trim();
120
+ }
121
+
122
+ function queryTextTerms(query) {
123
+ const terms = normalizeForQueryText(query).split(/\s+/).filter(Boolean);
124
+ const unique = [];
125
+ const seen = new Set();
126
+ for (const term of terms) {
127
+ if (term.length < 2 || QUERY_TEXT_STOPWORDS.has(term) || seen.has(term)) continue;
128
+ seen.add(term);
129
+ unique.push(term);
130
+ }
131
+ return unique;
132
+ }
133
+
134
+ function safeCandidatePath(projectRoot, file) {
135
+ if (!projectRoot || !file || path.isAbsolute(file) || file.includes('\0')) return null;
136
+ const root = path.resolve(projectRoot);
137
+ const resolved = path.resolve(root, file);
138
+ if (resolved !== root && !resolved.startsWith(root + path.sep)) return null;
139
+ return resolved;
140
+ }
141
+
142
+ function readCandidateSpan(projectRoot, result) {
143
+ const file = resultFileKey(result);
144
+ const absPath = safeCandidatePath(projectRoot, file);
145
+ if (!absPath) return '';
146
+
147
+ const cacheKey = `${projectRoot}\0${file}`;
148
+ let content = QUERY_TEXT_FILE_CACHE.get(cacheKey);
149
+ if (content == null) {
150
+ try {
151
+ content = readFileSync(absPath, 'utf8');
152
+ } catch {
153
+ content = '';
154
+ }
155
+ QUERY_TEXT_FILE_CACHE.set(cacheKey, content);
156
+ }
157
+ if (!content) return '';
158
+
159
+ const startLine = result?.startLine ?? result?.metadata?.startLine ?? null;
160
+ const endLine = result?.endLine ?? result?.metadata?.endLine ?? null;
161
+ if (startLine == null || endLine == null) return content.slice(0, QUERY_TEXT_MAX_CHARS);
162
+
163
+ const lines = content.split(/\r?\n/);
164
+ const start = Math.max(0, Number(startLine) - 9);
165
+ const end = Math.min(lines.length, Number(endLine));
166
+ if (!Number.isFinite(start) || !Number.isFinite(end) || end < start) return '';
167
+ return lines.slice(start, end).join('\n').slice(0, QUERY_TEXT_MAX_CHARS);
168
+ }
169
+
170
+ function queryTextAgreementScore(query, result, projectRoot) {
171
+ const terms = queryTextTerms(query);
172
+ if (terms.length === 0) return 0;
173
+
174
+ const text = normalizeForQueryText([
175
+ result?.name,
176
+ result?.type,
177
+ result?.signature,
178
+ result?.docComment,
179
+ result?.content,
180
+ result?.text,
181
+ resultFileKey(result),
182
+ readCandidateSpan(projectRoot, result),
183
+ ].filter(Boolean).join('\n'));
184
+ if (!text) return 0;
185
+
186
+ const textTerms = new Set(text.split(/\s+/).filter(Boolean));
187
+ let matched = 0;
188
+ for (const term of terms) {
189
+ if (textTerms.has(term)) matched += 1;
190
+ else if (term.length >= 4 && text.includes(term)) matched += 0.5;
191
+ }
192
+
193
+ let bigramMatches = 0;
194
+ const bigramTotal = Math.max(0, terms.length - 1);
195
+ for (let i = 0; i < terms.length - 1; i++) {
196
+ if (text.includes(`${terms[i]} ${terms[i + 1]}`)) bigramMatches++;
197
+ }
198
+
199
+ const coverage = matched / terms.length;
200
+ const exact = text.includes(normalizeForQueryText(query)) ? 1 : 0;
201
+ const bigrams = bigramTotal > 0 ? bigramMatches / bigramTotal : 0;
202
+ return Math.min(1, 0.65 * coverage + 0.25 * exact + 0.10 * bigrams);
203
+ }
204
+
205
+ function hasExactQueryTextMatch(query, result, projectRoot) {
206
+ const normalizedQuery = normalizeForQueryText(query);
207
+ if (!normalizedQuery) return false;
208
+ const text = normalizeForQueryText([
209
+ result?.docComment,
210
+ result?.content,
211
+ result?.text,
212
+ readCandidateSpan(projectRoot, result),
213
+ ].filter(Boolean).join('\n'));
214
+ return !!text && text.includes(normalizedQuery);
215
+ }
216
+
217
+ function applyQueryTextRanking(results, query, opts = {}) {
218
+ if (queryTextRankingOff()) return results;
219
+ if (hasAblation(opts.ablations, 'no-query-text-ranking')) return results;
220
+ if (!Array.isArray(results) || results.length < 3) return results;
221
+
222
+ const projectRoot = opts.projectRoot || process.env.SWEET_SEARCH_PROJECT_ROOT || process.cwd();
223
+ const window = Math.min(
224
+ results.length,
225
+ Math.max(3, opts.window ?? QUERY_TEXT_RANKING_WINDOW)
226
+ );
227
+ const weight = opts.weight ?? envNumber(
228
+ 'SWEET_SEARCH_QUERY_TEXT_RANKING_WEIGHT',
229
+ QUERY_TEXT_RANKING_WEIGHT,
230
+ 0,
231
+ 2
232
+ );
233
+ const minAgreement = opts.minAgreement ?? envNumber(
234
+ 'SWEET_SEARCH_QUERY_TEXT_MIN_AGREEMENT',
235
+ QUERY_TEXT_MIN_AGREEMENT,
236
+ 0,
237
+ 1
238
+ );
239
+ if (!(weight > 0)) return results;
240
+
241
+ let changed = false;
242
+ const reranked = results.slice(0, window).map((result, index) => {
243
+ const agreement = queryTextAgreementScore(query, result, projectRoot);
244
+ if (agreement < minAgreement) return { ...result, _queryTextOrigIndex: index };
245
+ changed = true;
246
+ const baseScore = typeof result.score === 'number' ? result.score : 0;
247
+ const mult = 1 + weight * agreement;
248
+ return {
249
+ ...result,
250
+ score: baseScore * mult,
251
+ _queryTextScore: agreement,
252
+ _queryTextMult: mult,
253
+ _queryTextOrigScore: baseScore,
254
+ _queryTextOrigIndex: index,
255
+ };
256
+ });
257
+ if (!changed) return results;
258
+
259
+ reranked.sort((a, b) => {
260
+ const d = (b.score || 0) - (a.score || 0);
261
+ return d !== 0 ? d : a._queryTextOrigIndex - b._queryTextOrigIndex;
262
+ });
263
+ for (const result of reranked) delete result._queryTextOrigIndex;
264
+ return window === results.length ? reranked : reranked.concat(results.slice(window));
265
+ }
266
+
267
+ function resultIdentity(result) {
268
+ return result?.id || result?.metadata?.id || null;
269
+ }
270
+
271
+ function dotProduct(a, b) {
272
+ const n = Math.min(a?.length || 0, b?.length || 0);
273
+ if (n === 0) return null;
274
+ let score = 0;
275
+ for (let i = 0; i < n; i++) score += a[i] * b[i];
276
+ return score;
277
+ }
278
+
279
+ function normalizeScore(value, min, max) {
280
+ if (!Number.isFinite(value)) return 0;
281
+ if (!(max > min)) return 0.5;
282
+ return Math.max(0, Math.min(1, (value - min) / (max - min)));
283
+ }
284
+
285
+ function applyFullVectorRescore(results, opts = {}) {
286
+ if (hasAblation(opts.ablations, 'no-full-vector-rescore')) return results;
287
+ if (!Array.isArray(results) || results.length < 3) return results;
288
+ if (!opts.queryFloat || !opts.codebaseRepo?.getEmbeddingsByIds) return results;
289
+
290
+ const window = Math.min(
291
+ results.length,
292
+ Math.max(3, opts.window ?? FULL_VECTOR_RESCORE_WINDOW)
293
+ );
294
+ const ids = results.slice(0, window).map(resultIdentity).filter(Boolean);
295
+ if (ids.length === 0) return results;
296
+
297
+ const embeddings = opts.codebaseRepo.getEmbeddingsByIds(ids);
298
+ if (!embeddings || embeddings.size === 0) return results;
299
+
300
+ const scored = results.slice(0, window).map((result, index) => {
301
+ const id = resultIdentity(result);
302
+ const vector = id ? embeddings.get(id) : null;
303
+ const fullScore = vector ? dotProduct(opts.queryFloat, vector) : null;
304
+ return {
305
+ result,
306
+ index,
307
+ baseScore: typeof result.score === 'number' ? result.score : 0,
308
+ fullScore,
309
+ };
310
+ });
311
+
312
+ const withFullScore = scored.filter(item => Number.isFinite(item.fullScore));
313
+ if (withFullScore.length < 2) return results;
314
+
315
+ const baseValues = scored.map(item => item.baseScore);
316
+ const fullValues = withFullScore.map(item => item.fullScore);
317
+ const minBase = Math.min(...baseValues);
318
+ const maxBase = Math.max(...baseValues);
319
+ const minFull = Math.min(...fullValues);
320
+ const maxFull = Math.max(...fullValues);
321
+ const projectRoot = opts.projectRoot || process.env.SWEET_SEARCH_PROJECT_ROOT || process.cwd();
322
+ const exactTextMatch = results
323
+ .slice(0, window)
324
+ .some(result => hasExactQueryTextMatch(opts.query || '', result, projectRoot));
325
+ const liRescoreWeight = envNumber(
326
+ 'SWEET_SEARCH_FULL_VECTOR_LI_RESCORE_WEIGHT',
327
+ envNumber('SWEET_SEARCH_FULL_VECTOR_RESCORE_WEIGHT', FULL_VECTOR_LI_RESCORE_WEIGHT, 0, 1),
328
+ 0,
329
+ 1
330
+ );
331
+ const weight = opts.weight ?? (exactTextMatch
332
+ ? envNumber('SWEET_SEARCH_FULL_VECTOR_EXACT_TEXT_WEIGHT', FULL_VECTOR_EXACT_TEXT_WEIGHT, 0, 1)
333
+ : opts.lateInteractionApplied
334
+ ? liRescoreWeight
335
+ : envNumber('SWEET_SEARCH_FULL_VECTOR_RESCORE_WEIGHT', FULL_VECTOR_RESCORE_WEIGHT, 0, 1));
336
+
337
+ const reranked = scored.map(item => {
338
+ if (!Number.isFinite(item.fullScore)) {
339
+ return { ...item.result, _fullVectorOrigIndex: item.index };
340
+ }
341
+ const baseNorm = normalizeScore(item.baseScore, minBase, maxBase);
342
+ const fullNorm = normalizeScore(item.fullScore, minFull, maxFull);
343
+ const blended = (1 - weight) * baseNorm + weight * fullNorm;
344
+ return {
345
+ ...item.result,
346
+ score: blended,
347
+ _fullVectorScore: item.fullScore,
348
+ _fullVectorNorm: fullNorm,
349
+ _fullVectorOrigScore: item.baseScore,
350
+ _fullVectorOrigIndex: item.index,
351
+ };
352
+ });
353
+
354
+ reranked.sort((a, b) => {
355
+ const d = (b.score || 0) - (a.score || 0);
356
+ return d !== 0 ? d : a._fullVectorOrigIndex - b._fullVectorOrigIndex;
357
+ });
358
+ for (const result of reranked) delete result._fullVectorOrigIndex;
359
+ return window === results.length ? reranked : reranked.concat(results.slice(window));
360
+ }
361
+
362
+ function promoteFileDiversity(results, opts = {}) {
363
+ if (!Array.isArray(results) || results.length < 3) return results;
364
+ if (hasAblation(opts.ablations, 'no-file-diversity')) return results;
365
+
366
+ const window = Math.min(results.length, Math.max(10, opts.window ?? results.length));
367
+ const head = results.slice(0, window);
368
+ const seen = new Set();
369
+ const unique = [];
370
+ const duplicates = [];
371
+
372
+ for (const result of head) {
373
+ const key = resultFileKey(result);
374
+ if (!key || !seen.has(key)) {
375
+ if (key) seen.add(key);
376
+ unique.push(result);
377
+ } else {
378
+ duplicates.push(result);
379
+ }
380
+ }
381
+
382
+ if (duplicates.length === 0) return results;
383
+ return unique.concat(duplicates, results.slice(window));
384
+ }
37
385
 
38
386
  // =============================================================================
39
387
  // Post-retrieval processing
@@ -74,6 +422,13 @@ export async function applyPostRetrieval(results, query, options, searchContext)
74
422
  effectiveGraphExpand,
75
423
  intentPolicy,
76
424
  start,
425
+ _entityKindCache,
426
+ _entityNameCache,
427
+ _resultTextCache,
428
+ _fullFileTextCache,
429
+ _isTestSupportCache,
430
+ _isTestChunkCache,
431
+ _fileKindCache,
77
432
  } = searchContext;
78
433
 
79
434
  // Merge semantic stats (embedding/rerank) into main stats for CostTracker.
@@ -165,6 +520,7 @@ export async function applyPostRetrieval(results, query, options, searchContext)
165
520
  }
166
521
  };
167
522
 
523
+ const __t_expand = __ptStart();
168
524
  results = expandResults(graphDb, results, {
169
525
  expandMode: effectiveGraphExpand,
170
526
  adaptiveHop2,
@@ -173,16 +529,21 @@ export async function applyPostRetrieval(results, query, options, searchContext)
173
529
  cosineSimilarity: int8CosineSimilarity,
174
530
  codebaseDb: this.codebaseRepo,
175
531
  readFileLines,
532
+ format: options.format,
533
+ manifestEpoch: this.graphSearch?.getManifestEpoch?.(),
176
534
  ...(intentEdgeTypes && !graphExpandOptions.edgeTypes ? { edgeTypes: intentEdgeTypes } : {}),
177
535
  ...graphExpandOptions,
178
536
  });
537
+ __ptEnd('post:expandResults', __t_expand);
179
538
 
180
539
  // Attach LI chunk ids to expanded entities so they can participate
181
540
  // in the post-expansion MaxSim rerank pool. The graph stores entities
182
541
  // (entity_id keyed by code-graph.db) while LI is keyed by chunk id;
183
542
  // without this bridge expanded entries fall through hasTokens() and
184
543
  // are appended to the result tail without ever competing for top-K.
544
+ const __t_attachIds = __ptStart();
185
545
  const expandedAttached = attachChunkIdsToExpanded(results, this.codebaseRepo);
546
+ __ptEnd('post:attachChunkIdsToExpanded', __t_attachIds);
186
547
 
187
548
  stats.graphExpansion = {
188
549
  mode: effectiveGraphExpand,
@@ -227,6 +588,7 @@ export async function applyPostRetrieval(results, query, options, searchContext)
227
588
  : null;
228
589
 
229
590
  try {
591
+ const __t_cascade = __ptStart();
230
592
  const { cascadedScore } = await import('../ranking/cascaded-scorer.js');
231
593
  const cascadeResult = await cascadedScore(query, results, {
232
594
  lateInteractionIndex: liIndex,
@@ -238,6 +600,7 @@ export async function applyPostRetrieval(results, query, options, searchContext)
238
600
  lexicalConfident: false,
239
601
  loadDocumentContent: this.loadDocumentContent.bind(this),
240
602
  });
603
+ __ptEnd('post:cascadedScore', __t_cascade);
241
604
  results = cascadeResult.results;
242
605
  stats.cascade = cascadeResult.stats;
243
606
 
@@ -257,9 +620,15 @@ export async function applyPostRetrieval(results, query, options, searchContext)
257
620
  // =========================================================================
258
621
  // Late Interaction Reranking (legacy, flag OFF — post-expansion, Phase 6)
259
622
  // =========================================================================
623
+ const agentFormats = new Set(['agent', 'agent_preview', 'agent_full', 'agent_full_xl']);
624
+ const allowLegacyLateInteraction = process.env.SWEET_SEARCH_LEGACY_LI_RERANK === '1'
625
+ || agentFormats.has(options.format)
626
+ || searchContext?.fromSearch !== true
627
+ || shouldRunAdaptiveLegacyLi(results);
260
628
  const shouldRunLateInteraction = this.hasLateInteractionIndex &&
261
629
  (options.useLateInteraction ?? this.useLateInteraction) &&
262
630
  !this.lateInteractionIndex.modelMismatch &&
631
+ allowLegacyLateInteraction &&
263
632
  Array.isArray(results) && results.length > 0 &&
264
633
  !isConfidentLexical;
265
634
 
@@ -330,12 +699,14 @@ export async function applyPostRetrieval(results, query, options, searchContext)
330
699
  // =========================================================================
331
700
  if (qualityWeight > 0 && Array.isArray(results) && results.length > 0) {
332
701
  const qStart = Date.now();
702
+ const __t_quality = __ptStart();
333
703
  if (!this._qualityScorer) {
334
704
  this._qualityScorer = new QualityScorer({
335
705
  dbPath: this.graphSearch?.dbPath || DB_PATHS.codeGraph,
336
706
  });
337
707
  }
338
708
  results = this._qualityScorer.scoreResults(results);
709
+ __ptEnd('post:qualityScoring', __t_quality);
339
710
 
340
711
  // Blend: final = (1 - w) * original + w * quality
341
712
  const w = Math.max(0, Math.min(1, qualityWeight));
@@ -367,6 +738,7 @@ export async function applyPostRetrieval(results, query, options, searchContext)
367
738
  // =========================================================================
368
739
  // Apply intent policy — chunkTypeBoosts, maxResults, rerankerWeight
369
740
  // =========================================================================
741
+ const __t_intentPolicy = __ptStart();
370
742
  if (intentPolicy && Array.isArray(results) && results.length > 0) {
371
743
  // (a) chunkTypeBoosts: Multiply result scores by per-chunk-type boost factors
372
744
  if (intentPolicy.chunkTypeBoosts && Object.keys(intentPolicy.chunkTypeBoosts).length > 0) {
@@ -399,18 +771,34 @@ export async function applyPostRetrieval(results, query, options, searchContext)
399
771
  results = results.slice(0, effectiveK);
400
772
  }
401
773
  }
774
+ __ptEnd('post:intentPolicy', __t_intentPolicy);
402
775
 
403
776
  // =========================================================================
404
777
  // Intent-aware file-kind ranking
405
778
  // =========================================================================
406
- // Soft-demote docs/tests/types files when the query is confidently
407
- // implementation-seeking AND the top-N window contains both docs/tests/
408
- // types and implementation candidates. No-op otherwise. Disable with
779
+ // Soft-demote docs/examples/tests/types/config files when the query is
780
+ // confidently implementation-seeking AND the top-N window contains both
781
+ // demotable and implementation candidates. No-op otherwise. Disable with
409
782
  // SWEET_SEARCH_FILE_KIND_RANKING=0; tune SWEET_SEARCH_FILE_KIND_FACTOR.
410
783
  if (Array.isArray(results) && results.length > 0) {
411
784
  const fileKindIntent = classifyFileKindIntent(query);
412
785
  const beforeTop = results[0];
413
- const afterFK = applyFileKindRanking(results, { intent: fileKindIntent });
786
+ const semanticLike = searchMode === 'hybrid' || searchMode === 'semantic'
787
+ || stats.path === 'hybrid' || stats.path === 'semantic';
788
+ const isAgentFormat = options.format === 'agent';
789
+ const __t_fileKind = __ptStart();
790
+ const afterFK = applyFileKindRanking(results, {
791
+ intent: fileKindIntent,
792
+ ...(semanticLike ? {
793
+ docFactor: 0.35,
794
+ testFactor: 0.35,
795
+ typeFactor: 0.70,
796
+ ancillaryFactor: 0.15,
797
+ tinyAncillaryFactor: 0.05,
798
+ } : {}),
799
+ _fileKindCache,
800
+ });
801
+ __ptEnd('post:applyFileKindRanking', __t_fileKind);
414
802
  if (afterFK !== results) {
415
803
  results = afterFK;
416
804
  stats.fileKindRanking = {
@@ -424,6 +812,90 @@ export async function applyPostRetrieval(results, query, options, searchContext)
424
812
  applied: false,
425
813
  };
426
814
  }
815
+
816
+ const beforeDemotionTop = results[0];
817
+ const __t_demotions = __ptStart();
818
+ const afterDemotions = applyResultDemotions(results, {
819
+ query,
820
+ ablations: options.ablations,
821
+ format: options.format,
822
+ projectRoot: this.projectRoot,
823
+ codeGraphRepo: this.codeGraphRepo,
824
+ _entityKindCache,
825
+ _entityNameCache,
826
+ _resultTextCache,
827
+ _fullFileTextCache,
828
+ _isTestSupportCache,
829
+ _isTestChunkCache,
830
+ _fileKindCache,
831
+ });
832
+ __ptEnd('post:applyResultDemotions', __t_demotions);
833
+ if (afterDemotions !== results) {
834
+ results = afterDemotions;
835
+ stats.resultDemotions = {
836
+ applied: true,
837
+ top1Changed: !!beforeDemotionTop && results[0] && (beforeDemotionTop !== results[0]),
838
+ };
839
+ }
840
+
841
+ const beforeQueryTextTop = results[0];
842
+ const __t_queryText = __ptStart();
843
+ const afterQueryTextRanking = semanticLike && !isAgentFormat
844
+ ? applyQueryTextRanking(results, query, {
845
+ ablations: options.ablations,
846
+ projectRoot: this.projectRoot,
847
+ window: options.queryTextRankingWindow,
848
+ weight: options.queryTextRankingWeight,
849
+ })
850
+ : results;
851
+ __ptEnd('post:applyQueryTextRanking', __t_queryText);
852
+ if (afterQueryTextRanking !== results) {
853
+ results = afterQueryTextRanking;
854
+ stats.queryTextRanking = {
855
+ applied: true,
856
+ top1Changed: !!beforeQueryTextTop && results[0] && (beforeQueryTextTop !== results[0]),
857
+ };
858
+ }
859
+
860
+ const beforeFullVectorTop = results[0];
861
+ const __t_fullVec = __ptStart();
862
+ const afterFullVectorRescore = semanticLike && !isAgentFormat
863
+ ? applyFullVectorRescore(results, {
864
+ ablations: options.ablations,
865
+ query,
866
+ queryFloat: semanticStats?.queryFloat,
867
+ codebaseRepo: this.codebaseRepo,
868
+ projectRoot: this.projectRoot,
869
+ window: options.fullVectorRescoreWindow,
870
+ weight: options.fullVectorRescoreWeight,
871
+ lateInteractionApplied: !!stats.lateInteraction && !stats.lateInteraction.error,
872
+ })
873
+ : results;
874
+ __ptEnd('post:applyFullVectorRescore', __t_fullVec);
875
+ if (afterFullVectorRescore !== results) {
876
+ results = afterFullVectorRescore;
877
+ stats.fullVectorRescore = {
878
+ applied: true,
879
+ top1Changed: !!beforeFullVectorTop && results[0] && (beforeFullVectorTop !== results[0]),
880
+ };
881
+ }
882
+
883
+ const beforeDiversityTop = results[0];
884
+ const __t_diversity = __ptStart();
885
+ const diversified = isAgentFormat
886
+ ? results
887
+ : promoteFileDiversity(results, {
888
+ ablations: options.ablations,
889
+ window: options.fileDiversityWindow ?? results.length,
890
+ });
891
+ __ptEnd('post:promoteFileDiversity', __t_diversity);
892
+ if (diversified !== results) {
893
+ results = diversified;
894
+ stats.fileDiversity = {
895
+ applied: true,
896
+ top1Changed: !!beforeDiversityTop && results[0] && (beforeDiversityTop !== results[0]),
897
+ };
898
+ }
427
899
  }
428
900
 
429
901
  stats.total_ms = Date.now() - start;
@@ -443,7 +915,9 @@ export async function applyPostRetrieval(results, query, options, searchContext)
443
915
  // every file matching a search — grouped under the exemplar as result.aliases.
444
916
  if (Array.isArray(results) && results.length > 0 && this.codebaseRepo) {
445
917
  try {
918
+ const __t_aliases = __ptStart();
446
919
  const { stats: dedupStats } = expandAliases(results, this.codebaseRepo, query);
920
+ __ptEnd('post:expandAliases', __t_aliases);
447
921
  if (dedupStats.exemplarsExpanded > 0) {
448
922
  stats.dedupExpansion = dedupStats;
449
923
  }