smart-context-mcp 1.19.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import { promisify } from 'node:util';
5
5
  import { rgPath } from '@vscode/ripgrep';
6
6
  import { buildMetrics, persistMetrics } from '../metrics.js';
7
7
  import { loadIndex, queryIndex, queryRelated } from '../index.js';
8
+ import { countTokens } from '../tokenCounter.js';
8
9
  import { projectRoot } from '../utils/paths.js';
9
10
  import { isBinaryBuffer, isDockerfile, resolveSafePath } from '../utils/fs.js';
10
11
  import { truncate } from '../utils/text.js';
@@ -14,7 +15,9 @@ import { recordDevctxOperation } from '../missed-opportunities.js';
14
15
  import { IGNORED_DIRS, IGNORED_FILE_NAMES, IGNORED_FILE_PATTERNS } from '../config/ignored-paths.js';
15
16
  import { createProgressReporter } from '../streaming.js';
16
17
  import { ensureIndexReady } from '../index-manager.js';
17
- import { semanticRankSymbols, semanticRankFiles, buildIndexCorpusIdf } from '../embeddings/index.js';
18
+ import { semanticRankSymbols, semanticRankFiles, buildIndexCorpusIdf, embed, cosineSimilarity } from '../embeddings/index.js';
19
+ import { ACTIVE_SESSION_SCOPE, withStateDbSnapshot } from '../storage/sqlite.js';
20
+ import { getNoiseHints, isGlobalMemoryEnabled, recordNoiseHint } from '../global-memory/store.js';
18
21
 
19
22
  const execFile = promisify(execFileCallback);
20
23
  const supportedGlobs = [
@@ -30,8 +33,11 @@ const likelySourceExtensions = new Set(['.js', '.jsx', '.ts', '.tsx', '.py', '.g
30
33
  const likelyConfigExtensions = new Set(['.json', '.toml', '.yaml', '.yml', '.tf', '.tfvars', '.hcl']);
31
34
  const lowSignalNames = ['changelog', 'readme', 'migration', 'license', 'licence', 'contributing', 'authors', 'code_of_conduct', 'security', 'history'];
32
35
  const testPatterns = ['.test.', '.spec.', '__tests__', '__mocks__', 'fixtures'];
36
+ const barrelFileNames = new Set(['index', 'mod', 'exports', 'public-api', 'public_api', 'barrel']);
37
+ const reexportPattern = /^\s*export\s+(?:\*|\{.*\})\s+from\s+/i;
33
38
 
34
39
  export const VALID_INTENTS = new Set(['implementation', 'debug', 'tests', 'config', 'docs', 'explore']);
40
+ export const VALID_SEARCH_MODES = new Set(['needle', 'balanced', 'semantic']);
35
41
 
36
42
  export const intentWeights = {
37
43
  implementation: { src: 10, source: 14, config: 4, lowSignal: -35, test: -15 },
@@ -43,6 +49,78 @@ export const intentWeights = {
43
49
  };
44
50
 
45
51
  const defaultWeights = intentWeights.explore;
52
+ const DEFAULT_SEARCH_MODE = 'balanced';
53
+
54
+ const resolveSearchMode = ({ mode, semantic }) => {
55
+ if (typeof mode === 'string' && VALID_SEARCH_MODES.has(mode)) {
56
+ return mode;
57
+ }
58
+
59
+ if (semantic === true) {
60
+ return 'semantic';
61
+ }
62
+
63
+ return DEFAULT_SEARCH_MODE;
64
+ };
65
+
66
+ const parseJsonObject = (value, fallback = {}) => {
67
+ if (typeof value !== 'string' || value.trim().length === 0) return fallback;
68
+ try {
69
+ return JSON.parse(value);
70
+ } catch {
71
+ return fallback;
72
+ }
73
+ };
74
+
75
+ const normalizeRelPath = (value) => String(value ?? '').replace(/\\/g, '/');
76
+
77
+ const uniqueLowerTerms = (value) => [...new Set(extractTerms(String(value ?? '')).map((term) => term.toLowerCase()))];
78
+
79
+ const countTermHits = (terms, text) => {
80
+ if (!Array.isArray(terms) || terms.length === 0 || !text) return 0;
81
+ const lower = text.toLowerCase();
82
+ let hits = 0;
83
+ for (const term of terms) {
84
+ if (lower.includes(term)) hits++;
85
+ }
86
+ return hits;
87
+ };
88
+
89
+ const loadActiveSessionSignals = async () => {
90
+ try {
91
+ return await withStateDbSnapshot((db) => {
92
+ const activeSessionId = db.prepare(`
93
+ SELECT session_id
94
+ FROM active_session
95
+ WHERE scope = ?
96
+ `).get(ACTIVE_SESSION_SCOPE)?.session_id;
97
+
98
+ if (!activeSessionId) return null;
99
+
100
+ const row = db.prepare(`
101
+ SELECT goal, current_focus, snapshot_json
102
+ FROM sessions
103
+ WHERE session_id = ?
104
+ `).get(activeSessionId);
105
+
106
+ if (!row) return null;
107
+
108
+ const snapshot = parseJsonObject(row.snapshot_json, {});
109
+ const touchedFiles = Array.isArray(snapshot.touchedFiles)
110
+ ? [...new Set(snapshot.touchedFiles.map(normalizeRelPath).filter(Boolean))]
111
+ : [];
112
+
113
+ return {
114
+ touchedFiles: new Set(touchedFiles),
115
+ hotFiles: new Set(touchedFiles.slice(-5)),
116
+ focusTerms: uniqueLowerTerms(row.current_focus),
117
+ goalTerms: uniqueLowerTerms(row.goal),
118
+ };
119
+ });
120
+ } catch {
121
+ return null;
122
+ }
123
+ };
46
124
 
47
125
  const shouldIgnoreFile = (filePath) => {
48
126
  const base = path.basename(filePath);
@@ -147,12 +225,99 @@ const extractTerms = (query) =>
147
225
  .map((t) => t.trim())
148
226
  .filter((t) => t.length >= 3);
149
227
 
150
- const searchWithRipgrep = async (root, query) => {
228
+ const buildQueryProfile = (query) => {
229
+ const trimmed = String(query ?? '').trim();
230
+ const terms = extractTerms(trimmed);
231
+ const tokenCount = trimmed.length === 0 ? 0 : trimmed.split(/\s+/).filter(Boolean).length;
232
+ const isSingleToken = tokenCount === 1;
233
+ const hasCodeHints = /[A-Z_./:-]/.test(trimmed) || /[a-z][A-Z]/.test(trimmed);
234
+ const looksSymbolLike = isSingleToken && (hasCodeHints || /^[a-z][a-z0-9]*$/.test(trimmed));
235
+ const isConceptualMultiWord = tokenCount >= 2;
236
+ const refinementTerm = terms
237
+ .slice()
238
+ .sort((left, right) => right.length - left.length || left.localeCompare(right))[0] ?? trimmed;
239
+
240
+ return {
241
+ raw: trimmed,
242
+ terms,
243
+ tokenCount,
244
+ isSingleToken,
245
+ looksSymbolLike,
246
+ isConceptualMultiWord,
247
+ refinementTerm,
248
+ };
249
+ };
250
+
251
+ const buildSearchSuggestions = ({
252
+ query,
253
+ mode = DEFAULT_SEARCH_MODE,
254
+ totalMatches = 0,
255
+ totalFiles = 0,
256
+ searchMode = 'exact',
257
+ hasKinds = false,
258
+ }) => {
259
+ const profile = buildQueryProfile(query);
260
+ const suggestions = [];
261
+ const pushSuggestion = (value) => {
262
+ if (!value || suggestions.includes(value)) return;
263
+ suggestions.push(value);
264
+ };
265
+
266
+ if (totalMatches === 0) {
267
+ if (mode === 'needle' && profile.isConceptualMultiWord) {
268
+ pushSuggestion('Try `mode="balanced"` to allow regex and term expansion for this multi-word query.');
269
+ pushSuggestion('Try `mode="semantic"` if the query is conceptual rather than a literal symbol or string.');
270
+ } else if (mode !== 'semantic' && profile.isConceptualMultiWord) {
271
+ pushSuggestion('Try `mode="semantic"` for conceptual multi-word queries such as flows, behaviors, or features.');
272
+ }
273
+
274
+ if (mode !== 'needle' && profile.looksSymbolLike) {
275
+ pushSuggestion('Try `mode="needle"` for an exact symbol/string lookup without aggressive expansion.');
276
+ }
277
+
278
+ if (!hasKinds && profile.looksSymbolLike) {
279
+ pushSuggestion('Try `kinds=["function"]` or `kinds=["class"]` to narrow the search to symbol declarations.');
280
+ }
281
+
282
+ if (profile.isConceptualMultiWord && profile.refinementTerm && profile.refinementTerm !== profile.raw) {
283
+ pushSuggestion(`Try refining the query to \`${profile.refinementTerm}\` or pair it with a symbol filter.`);
284
+ }
285
+
286
+ pushSuggestion('Try Grep for raw text if the content may live in a file type not indexed by `smart_search`.');
287
+ return suggestions;
288
+ }
289
+
290
+ if (totalFiles > 30) {
291
+ if (mode !== 'needle' && profile.looksSymbolLike) {
292
+ pushSuggestion('Try `mode="needle"` to keep this search exact and avoid broad expansion.');
293
+ }
294
+
295
+ if (!hasKinds && profile.looksSymbolLike) {
296
+ pushSuggestion('Try `kinds=["function"]`, `kinds=["class"]`, or another symbol kind to reduce noise.');
297
+ }
298
+
299
+ if (profile.isConceptualMultiWord && profile.refinementTerm && searchMode === 'terms') {
300
+ pushSuggestion(`Try refining to the strongest term \`${profile.refinementTerm}\` before widening again.`);
301
+ }
302
+
303
+ if (profile.isSingleToken && !profile.looksSymbolLike) {
304
+ pushSuggestion('Try a more discriminative term such as a function name, class name, or config key.');
305
+ }
306
+ }
307
+
308
+ return suggestions;
309
+ };
310
+
311
+ const searchWithRipgrep = async (root, query, mode = DEFAULT_SEARCH_MODE) => {
151
312
  // Pass 1: exact literal match
152
313
  const exact = await runRg(root, query, ['--fixed-strings']);
153
314
  if (exact === null) return null;
154
315
  if (exact.length > 0) return { matches: exact, searchMode: 'exact' };
155
316
 
317
+ if (mode === 'needle') {
318
+ return { matches: [], searchMode: 'exact', zeroReason: 'no_matches' };
319
+ }
320
+
156
321
  // Pass 2: regex (handles partial words, snake_case, camelCase fragments)
157
322
  const escaped = query.replace(/[$()*+.?[\\\]^{|}]/g, '\\$&');
158
323
  const regex = await runRg(root, escaped);
@@ -223,6 +388,120 @@ export const searchWithFallback = (root, query) => {
223
388
 
224
389
  const formatMatch = (match) => `${match.file}:${match.lineNumber}:${match.content}`;
225
390
 
391
+ const scoreSessionContext = (group, sessionSignals, root = projectRoot) => {
392
+ if (!sessionSignals) {
393
+ return {
394
+ score: 0,
395
+ breakdown: {
396
+ touchedFileBoost: 0,
397
+ hotFileBoost: 0,
398
+ focusPathBoost: 0,
399
+ focusContentBoost: 0,
400
+ goalPathBoost: 0,
401
+ goalContentBoost: 0,
402
+ },
403
+ };
404
+ }
405
+
406
+ const relPath = normalizeRelPath(path.relative(root, group.file));
407
+ const pathText = relPath.toLowerCase();
408
+ const sampleText = group.matches.slice(0, 5).map((match) => match.content.toLowerCase()).join(' ');
409
+ const focusPathHits = countTermHits(sessionSignals.focusTerms, pathText);
410
+ const focusContentHits = countTermHits(sessionSignals.focusTerms, sampleText);
411
+ const goalPathHits = countTermHits(sessionSignals.goalTerms, pathText);
412
+ const goalContentHits = countTermHits(sessionSignals.goalTerms, sampleText);
413
+ const breakdown = {
414
+ touchedFileBoost: sessionSignals.touchedFiles.has(relPath) ? 24 : 0,
415
+ hotFileBoost: sessionSignals.hotFiles.has(relPath) ? 8 : 0,
416
+ focusPathBoost: Math.min(12, focusPathHits * 4),
417
+ focusContentBoost: Math.min(8, focusContentHits * 2),
418
+ goalPathBoost: Math.min(8, goalPathHits * 2),
419
+ goalContentBoost: Math.min(6, goalContentHits * 2),
420
+ };
421
+
422
+ const score = Object.values(breakdown).reduce((sum, value) => sum + value, 0);
423
+ return { score, breakdown };
424
+ };
425
+
426
+ const isBarrelLikeGroup = (group) => {
427
+ const extension = path.extname(group.file).toLowerCase();
428
+ if (!['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'].includes(extension)) {
429
+ return false;
430
+ }
431
+
432
+ const baseName = path.basename(group.file, extension).toLowerCase();
433
+ const sampleLines = group.matches.slice(0, 6).map((match) => String(match.content ?? ''));
434
+ const reexportLines = sampleLines.filter((line) => reexportPattern.test(line)).length;
435
+ const barrelNamed = barrelFileNames.has(baseName);
436
+ return (barrelNamed && reexportLines > 0) || (sampleLines.length > 0 && reexportLines / sampleLines.length >= 0.6);
437
+ };
438
+
439
+ const buildGroupSemanticFingerprint = (group, root = projectRoot) => {
440
+ const relPath = path.relative(root, group.file).replace(/\\/g, '/');
441
+ const sample = group.matches.slice(0, 6).map((match) => match.content).join(' ');
442
+ return `${relPath} ${sample}`;
443
+ };
444
+
445
+ const dedupeSemanticGroups = (groups, root = projectRoot) => {
446
+ const kept = [];
447
+ let dropped = 0;
448
+ const droppedNoiseHints = [];
449
+
450
+ for (const group of groups) {
451
+ const candidateFingerprint = buildGroupSemanticFingerprint(group, root);
452
+ const candidateVector = embed(candidateFingerprint);
453
+ const candidateBarrel = isBarrelLikeGroup(group);
454
+ let duplicate = false;
455
+
456
+ for (const existing of kept.slice(0, 8)) {
457
+ const similarity = cosineSimilarity(candidateVector, existing.vector);
458
+ if (similarity < 0.93) continue;
459
+
460
+ const sameBase = path.basename(existing.group.file) === path.basename(group.file);
461
+ const sameFirstLine = (existing.group.matches[0]?.content ?? '').trim() === (group.matches[0]?.content ?? '').trim();
462
+ if (candidateBarrel || sameBase || sameFirstLine) {
463
+ duplicate = true;
464
+ break;
465
+ }
466
+ }
467
+
468
+ if (duplicate) {
469
+ dropped += 1;
470
+ if (candidateBarrel) {
471
+ droppedNoiseHints.push(path.relative(root, group.file).replace(/\\/g, '/'));
472
+ }
473
+ continue;
474
+ }
475
+
476
+ kept.push({ group, vector: candidateVector });
477
+ }
478
+
479
+ return {
480
+ groups: kept.map((entry) => entry.group),
481
+ dropped,
482
+ droppedNoiseHints,
483
+ };
484
+ };
485
+
486
+ const scoreNoiseHints = (group, noiseHints, root = projectRoot) => {
487
+ if (!Array.isArray(noiseHints) || noiseHints.length === 0) {
488
+ return 0;
489
+ }
490
+
491
+ const relPath = path.relative(root, group.file).replace(/\\/g, '/');
492
+ const fileName = path.basename(relPath);
493
+ let penalty = 0;
494
+
495
+ for (const hint of noiseHints) {
496
+ if (!hint?.hintKey) continue;
497
+ if (relPath === hint.hintKey || fileName === hint.hintKey) {
498
+ penalty = Math.max(penalty, Number(hint.penalty ?? 0));
499
+ }
500
+ }
501
+
502
+ return penalty > 0 ? -penalty : 0;
503
+ };
504
+
226
505
  const scoreGroup = (group, query, intent) => {
227
506
  const w = (intent && intentWeights[intent]) || defaultWeights;
228
507
  const normalizedQuery = query.toLowerCase();
@@ -232,48 +511,112 @@ const scoreGroup = (group, query, intent) => {
232
511
  const pathDepth = group.file.split(path.sep).length;
233
512
  const sampleText = group.matches.slice(0, 5).map((match) => match.content.toLowerCase()).join(' ');
234
513
  const pathSegments = lowerFilePath.split(/[\\/._-]+/).filter(Boolean);
235
- let score = Math.min(group.count, 12) * 6;
514
+ const breakdown = {
515
+ matchCountBoost: Math.min(group.count, 12) * 6,
516
+ fileNameBoost: 0,
517
+ pathSegmentBoost: 0,
518
+ srcBoost: 0,
519
+ packageBoost: 0,
520
+ sourceBoost: 0,
521
+ configBoost: 0,
522
+ contentBoost: 0,
523
+ lowSignalPenalty: 0,
524
+ testBoost: 0,
525
+ barrelPenalty: 0,
526
+ depthPenalty: -Math.min(pathDepth, 12),
527
+ };
528
+ let score = breakdown.matchCountBoost;
236
529
 
237
530
  if (fileName.includes(normalizedQuery)) {
238
- score += 30;
531
+ breakdown.fileNameBoost = 30;
532
+ score += breakdown.fileNameBoost;
239
533
  }
240
534
 
241
535
  if (pathSegments.includes(normalizedQuery)) {
242
- score += 16;
536
+ breakdown.pathSegmentBoost = 16;
537
+ score += breakdown.pathSegmentBoost;
243
538
  }
244
539
 
245
540
  if (lowerFilePath.includes(`${path.sep}src${path.sep}`)) {
246
- score += w.src;
541
+ breakdown.srcBoost = w.src;
542
+ score += breakdown.srcBoost;
247
543
  }
248
544
 
249
545
  if (lowerFilePath.includes(`${path.sep}packages${path.sep}`) || lowerFilePath.includes(`${path.sep}apps${path.sep}`)) {
250
- score += 8;
546
+ breakdown.packageBoost = 8;
547
+ score += breakdown.packageBoost;
251
548
  }
252
549
 
253
550
  if (likelySourceExtensions.has(extension) || isDockerfile(group.file)) {
254
- score += w.source;
551
+ breakdown.sourceBoost = w.source;
552
+ score += breakdown.sourceBoost;
255
553
  } else if (likelyConfigExtensions.has(extension)) {
256
- score += w.config;
554
+ breakdown.configBoost = w.config;
555
+ score += breakdown.configBoost;
257
556
  }
258
557
 
259
558
  if (sampleText.includes(normalizedQuery)) {
260
- score += 8;
559
+ breakdown.contentBoost = 8;
560
+ score += breakdown.contentBoost;
261
561
  }
262
562
 
263
563
  if (lowSignalNames.some((name) => fileName.includes(name))) {
264
- score += w.lowSignal;
564
+ breakdown.lowSignalPenalty = w.lowSignal;
565
+ score += breakdown.lowSignalPenalty;
265
566
  }
266
567
 
267
568
  if (testPatterns.some((p) => lowerFilePath.includes(p))) {
268
- score += w.test;
569
+ breakdown.testBoost = w.test;
570
+ score += breakdown.testBoost;
269
571
  }
270
572
 
271
- score -= Math.min(pathDepth, 12);
573
+ if (isBarrelLikeGroup(group)) {
574
+ breakdown.barrelPenalty = -18;
575
+ score += breakdown.barrelPenalty;
576
+ }
577
+
578
+ score += breakdown.depthPenalty;
579
+
580
+ return { score, breakdown };
581
+ };
582
+
583
+ const buildWhyRanked = ({ count, boostSource, scoreBreakdown }) => {
584
+ const reasons = [];
585
+
586
+ if (scoreBreakdown.touchedFileBoost > 0) reasons.push(`touched-file boost (+${scoreBreakdown.touchedFileBoost})`);
587
+ else if (scoreBreakdown.hotFileBoost > 0) reasons.push(`recent-session file boost (+${scoreBreakdown.hotFileBoost})`);
588
+
589
+ if (scoreBreakdown.indexBoost > 0) reasons.push(`index boost (+${scoreBreakdown.indexBoost})`);
590
+ else if (scoreBreakdown.graphBoost > 0) reasons.push(`graph boost (+${scoreBreakdown.graphBoost})`);
591
+
592
+ const textReasons = [
593
+ [scoreBreakdown.fileNameBoost, `filename match (+${scoreBreakdown.fileNameBoost})`],
594
+ [scoreBreakdown.pathSegmentBoost, `path segment match (+${scoreBreakdown.pathSegmentBoost})`],
595
+ [scoreBreakdown.matchCountBoost, `${count} text match${count === 1 ? '' : 'es'} (+${scoreBreakdown.matchCountBoost})`],
596
+ [scoreBreakdown.sourceBoost, `source-file boost (+${scoreBreakdown.sourceBoost})`],
597
+ [scoreBreakdown.configBoost, `config-file boost (+${scoreBreakdown.configBoost})`],
598
+ [scoreBreakdown.contentBoost, `content hit (+${scoreBreakdown.contentBoost})`],
599
+ [scoreBreakdown.focusPathBoost, `focus-path boost (+${scoreBreakdown.focusPathBoost})`],
600
+ [scoreBreakdown.focusContentBoost, `focus-content boost (+${scoreBreakdown.focusContentBoost})`],
601
+ [scoreBreakdown.goalPathBoost, `goal-path boost (+${scoreBreakdown.goalPathBoost})`],
602
+ [scoreBreakdown.goalContentBoost, `goal-content boost (+${scoreBreakdown.goalContentBoost})`],
603
+ [scoreBreakdown.testBoost, scoreBreakdown.testBoost > 0 ? `test intent boost (+${scoreBreakdown.testBoost})` : `test-path penalty (${scoreBreakdown.testBoost})`],
604
+ [scoreBreakdown.barrelPenalty, `barrel penalty (${scoreBreakdown.barrelPenalty})`],
605
+ [scoreBreakdown.lowSignalPenalty, `low-signal penalty (${scoreBreakdown.lowSignalPenalty})`],
606
+ ].filter(([value]) => value !== 0)
607
+ .sort((left, right) => Math.abs(right[0]) - Math.abs(left[0]))
608
+ .map(([, label]) => label);
609
+
610
+ reasons.push(...textReasons.slice(0, Math.max(0, 3 - reasons.length)));
611
+
612
+ if (reasons.length === 0) {
613
+ return boostSource === 'text' ? 'text matches ranked by relevance' : `${boostSource} signal boosted this file`;
614
+ }
272
615
 
273
- return score;
616
+ return reasons.join(', ');
274
617
  };
275
618
 
276
- const groupMatches = (matches, query, intent, indexHits, graphHits) => {
619
+ const groupMatches = (matches, query, intent, indexHits, graphHits, sessionSignals, noiseHints, root = projectRoot) => {
277
620
  const groups = new Map();
278
621
 
279
622
  for (const match of matches) {
@@ -284,31 +627,74 @@ const groupMatches = (matches, query, intent, indexHits, graphHits) => {
284
627
  groups.get(match.file).push(match);
285
628
  }
286
629
 
287
- const breakdown = { textMatch: 0, indexBoost: 0, graphBoost: 0 };
630
+ const breakdown = { textMatch: 0, indexBoost: 0, graphBoost: 0, sessionBoost: 0, semanticDedup: 0, noisePenalty: 0 };
288
631
 
289
632
  const sorted = [...groups.entries()]
290
633
  .map(([file, fileMatches]) => {
291
- let score = scoreGroup({ file, count: fileMatches.length, matches: fileMatches }, query, intent);
634
+ const { score: textScore, breakdown: textBreakdown } = scoreGroup({ file, count: fileMatches.length, matches: fileMatches }, query, intent);
635
+ const { score: sessionScore, breakdown: sessionBreakdown } = scoreSessionContext({ file, count: fileMatches.length, matches: fileMatches }, sessionSignals, root);
636
+ let score = textScore;
292
637
  let boostSource = 'text';
293
- if (indexHits?.has(file)) { score += 50; boostSource = 'index'; }
294
- else if (graphHits?.has(file)) { score += 25; boostSource = 'graph'; }
295
- return { file, count: fileMatches.length, score, matches: fileMatches, boostSource };
638
+ const scoreBreakdown = {
639
+ ...textBreakdown,
640
+ ...sessionBreakdown,
641
+ textScore,
642
+ sessionScore,
643
+ indexBoost: 0,
644
+ graphBoost: 0,
645
+ noisePenalty: 0,
646
+ finalScore: textScore,
647
+ };
648
+ score += sessionScore;
649
+ if (indexHits?.has(file)) {
650
+ score += 50;
651
+ boostSource = 'index';
652
+ scoreBreakdown.indexBoost = 50;
653
+ } else if (graphHits?.has(file)) {
654
+ score += 25;
655
+ boostSource = 'graph';
656
+ scoreBreakdown.graphBoost = 25;
657
+ }
658
+ const noisePenalty = scoreNoiseHints({ file, count: fileMatches.length, matches: fileMatches }, noiseHints, root);
659
+ if (noisePenalty !== 0) {
660
+ score += noisePenalty;
661
+ scoreBreakdown.noisePenalty = noisePenalty;
662
+ }
663
+ scoreBreakdown.finalScore = score;
664
+ return {
665
+ file,
666
+ count: fileMatches.length,
667
+ score,
668
+ matches: fileMatches,
669
+ boostSource,
670
+ matchedBy: boostSource,
671
+ scoreBreakdown,
672
+ whyRanked: buildWhyRanked({ count: fileMatches.length, boostSource, scoreBreakdown }),
673
+ };
296
674
  })
297
675
  .sort((left, right) => right.score - left.score || right.count - left.count || left.file.localeCompare(right.file));
298
676
 
299
- for (const g of sorted.slice(0, 10)) {
677
+ const deduped = dedupeSemanticGroups(sorted, root);
678
+
679
+ for (const g of deduped.groups.slice(0, 10)) {
300
680
  if (g.boostSource === 'index') breakdown.indexBoost++;
301
681
  else if (g.boostSource === 'graph') breakdown.graphBoost++;
302
682
  else breakdown.textMatch++;
683
+ if ((g.scoreBreakdown.sessionScore ?? 0) > 0) breakdown.sessionBoost++;
684
+ if ((g.scoreBreakdown.noisePenalty ?? 0) < 0) breakdown.noisePenalty++;
303
685
  }
686
+ breakdown.semanticDedup = deduped.dropped;
304
687
 
305
- return { groups: sorted, breakdown };
688
+ return { groups: deduped.groups, breakdown, droppedNoiseHints: deduped.droppedNoiseHints };
306
689
  };
307
690
 
308
- const buildZeroResultsMessage = (query, searchMode, provenance) => {
691
+ const buildZeroResultsMessage = (query, searchMode, provenance, mode = DEFAULT_SEARCH_MODE, suggestions = []) => {
309
692
  const lines = [`No matches found for: "${query}"`];
310
693
 
311
- if (searchMode === 'exact') {
694
+ if (mode === 'needle') {
695
+ lines.push('• Tried: exact literal match (--fixed-strings)');
696
+ lines.push('• Skipped: regex fallback and term expansion (needle mode)');
697
+ } else if (searchMode === 'exact') {
312
698
  lines.push('• Tried: exact literal match (--fixed-strings)');
313
699
  lines.push('• Tried: regex match');
314
700
  } else if (searchMode === 'terms') {
@@ -318,22 +704,184 @@ const buildZeroResultsMessage = (query, searchMode, provenance) => {
318
704
 
319
705
  lines.push('');
320
706
  lines.push('Suggestions:');
321
- lines.push(' – Use a shorter, more specific term (e.g. a function name, not a phrase)');
322
- lines.push('Try Grep for raw text: the query may be in a file type not indexed by smart_search');
323
- lines.push(' – Run build_index to enable symbol-level search if the codebase is new');
707
+ for (const suggestion of suggestions) {
708
+ lines.push(`${suggestion}`);
709
+ }
710
+
711
+ if (suggestions.length === 0) {
712
+ lines.push(' – Use a shorter, more specific term (e.g. a function name, not a phrase)');
713
+ lines.push(' – Try Grep for raw text: the query may be in a file type not indexed by smart_search');
714
+ lines.push(' – Run build_index to enable symbol-level search if the codebase is new');
715
+ }
324
716
 
325
717
  return lines.join('\n');
326
718
  };
327
719
 
328
- const MAX_RESULT_FILES = 15;
720
+ const truncateByTokens = (text, maxTokens) => {
721
+ const marker = `\n[truncated to fit ${maxTokens} token budget]`;
722
+ const markerTokens = countTokens(marker);
723
+ const budget = Math.max(1, maxTokens - markerTokens);
724
+
725
+ const lines = text.split('\n');
726
+ const kept = [];
727
+ let tokens = 0;
728
+
729
+ for (const line of lines) {
730
+ const lineTokens = countTokens(line);
731
+ if (tokens + lineTokens > budget) break;
732
+ kept.push(line);
733
+ tokens += lineTokens;
734
+ }
735
+
736
+ let result = `${kept.join('\n')}${marker}`;
737
+ while (kept.length > 0 && countTokens(result) > maxTokens) {
738
+ kept.pop();
739
+ result = `${kept.join('\n')}${marker}`;
740
+ }
741
+
742
+ return result;
743
+ };
744
+
745
+ const countResponseTokens = (value) => countTokens(JSON.stringify(value));
746
+
747
+ const buildCompactTopFile = ({ file, count, score, boostSource, matchedBy }) => ({
748
+ file,
749
+ count,
750
+ score,
751
+ boostSource,
752
+ matchedBy,
753
+ });
754
+
755
+ const applyResponseBudget = (response, maxTokens) => {
756
+ const sectionsCompacted = [];
757
+ const actions = [];
758
+ const noteCompaction = (name) => {
759
+ if (!sectionsCompacted.includes(name)) sectionsCompacted.push(name);
760
+ };
761
+ const noteAction = (name) => {
762
+ if (!actions.includes(name)) actions.push(name);
763
+ };
764
+
765
+ const withBudgetMeta = (value) => ({
766
+ ...value,
767
+ budgetApplied: true,
768
+ budgetDetails: {
769
+ scope: 'response',
770
+ maxTokens,
771
+ actions,
772
+ sectionsCompacted,
773
+ },
774
+ });
775
+
776
+ const countBudgeted = (value) => countResponseTokens(withBudgetMeta(value));
777
+
778
+ const finalize = (budgeted, applied) => {
779
+ if (!applied) return { response: budgeted, applied: false };
780
+ return { response: withBudgetMeta(budgeted), applied: true };
781
+ };
782
+
783
+ if (!Number.isFinite(maxTokens) || maxTokens < 1) {
784
+ return { response, applied: false };
785
+ }
786
+
787
+ if (countResponseTokens(response) <= maxTokens) {
788
+ return { response, applied: false };
789
+ }
790
+
791
+ const budgeted = { ...response };
792
+
793
+ delete budgeted.semantic;
794
+ delete budgeted.semanticError;
795
+ noteCompaction('semantic');
796
+ noteAction('metadata_compacted');
797
+
798
+ if (countBudgeted(budgeted) <= maxTokens) {
799
+ return finalize(budgeted, true);
800
+ }
801
+
802
+ delete budgeted.suggestions;
803
+ delete budgeted.totalFiles;
804
+ delete budgeted.nextSuggestedMaxFiles;
805
+ noteCompaction('suggestions');
806
+ noteCompaction('expansionHints');
807
+ noteAction('metadata_compacted');
808
+
809
+ if (countBudgeted(budgeted) <= maxTokens) {
810
+ return finalize(budgeted, true);
811
+ }
812
+
813
+ if (Array.isArray(budgeted.topFiles) && budgeted.topFiles.length > 0) {
814
+ budgeted.topFiles = budgeted.topFiles.map(buildCompactTopFile);
815
+ noteCompaction('topFilesDiagnostics');
816
+ noteAction('metadata_compacted');
817
+ }
818
+
819
+ if (countBudgeted(budgeted) <= maxTokens) {
820
+ return finalize(budgeted, true);
821
+ }
822
+
823
+ while (Array.isArray(budgeted.topFiles) && budgeted.topFiles.length > 1 && countBudgeted(budgeted) > maxTokens) {
824
+ budgeted.topFiles = budgeted.topFiles.slice(0, -1);
825
+ noteCompaction('topFilesCount');
826
+ noteAction('results_reduced');
827
+ }
828
+
829
+ if (countBudgeted(budgeted) <= maxTokens) {
830
+ return finalize(budgeted, true);
831
+ }
832
+
833
+ if (typeof budgeted.matches === 'string') {
834
+ noteCompaction('matches');
835
+ noteAction('content_truncated');
836
+ const withoutMatches = { ...budgeted, matches: '' };
837
+ const remaining = maxTokens - countBudgeted(withoutMatches);
838
+ budgeted.matches = remaining > 0
839
+ ? truncateByTokens(budgeted.matches, remaining)
840
+ : '';
841
+ }
842
+
843
+ if (countBudgeted(budgeted) <= maxTokens) {
844
+ return finalize(budgeted, true);
845
+ }
846
+
847
+ delete budgeted.rankingBreakdown;
848
+ noteCompaction('rankingBreakdown');
849
+ noteAction('metadata_compacted');
850
+
851
+ if (countBudgeted(budgeted) <= maxTokens) {
852
+ return finalize(budgeted, true);
853
+ }
854
+
855
+ while (Array.isArray(budgeted.topFiles) && budgeted.topFiles.length > 0 && countBudgeted(budgeted) > maxTokens) {
856
+ budgeted.topFiles = budgeted.topFiles.slice(0, -1);
857
+ noteCompaction('topFiles');
858
+ noteAction('results_reduced');
859
+ }
860
+
861
+ if (countBudgeted(budgeted) <= maxTokens) {
862
+ return finalize(budgeted, true);
863
+ }
864
+
865
+ budgeted.matches = '';
866
+ noteCompaction('matchesOmitted');
867
+ noteAction('content_truncated');
868
+ return finalize(budgeted, true);
869
+ };
870
+
871
+ const DEFAULT_RESULT_FILES = 5;
872
+ const MAX_COMPACT_RESULT_FILES = 15;
329
873
 
330
- const buildCompactResult = (groups, totalMatches, query, root, searchMode, provenance, totalFiles) => {
874
+ const buildCompactResult = (groups, totalMatches, query, root, searchMode, provenance, totalFiles, mode = DEFAULT_SEARCH_MODE, suggestions = []) => {
331
875
  if (totalMatches === 0) {
332
- return buildZeroResultsMessage(query, searchMode, provenance);
876
+ return buildZeroResultsMessage(query, searchMode, provenance, mode, suggestions);
333
877
  }
334
878
 
335
- const modeLabel = searchMode === 'exact' ? '' : searchMode === 'regex' ? ' [regex fallback]' : ` [term expansion: ${(provenance?.expandedTerms ?? []).join(', ')}]`;
336
- const topGroups = groups.slice(0, MAX_RESULT_FILES);
879
+ const modeLabel = searchMode === 'exact'
880
+ ? ''
881
+ : searchMode === 'regex'
882
+ ? ' [regex fallback]'
883
+ : ` [term expansion: ${(provenance?.expandedTerms ?? []).join(', ')}]`;
884
+ const topGroups = groups.slice(0, MAX_COMPACT_RESULT_FILES);
337
885
 
338
886
  if (totalMatches <= 20) {
339
887
  const header = modeLabel ? `# Search mode:${modeLabel}\n` : '';
@@ -359,6 +907,12 @@ const buildCompactResult = (groups, totalMatches, query, root, searchMode, prove
359
907
  const fileCount = totalFiles ?? groups.length;
360
908
  if (fileCount > 30) {
361
909
  lines.push(`# Note: ${fileCount} files matched — query may be too broad. Use Grep for exact pattern matching.`);
910
+ if (suggestions.length > 0) {
911
+ lines.push('# Refinements:');
912
+ for (const suggestion of suggestions) {
913
+ lines.push(`- ${suggestion}`);
914
+ }
915
+ }
362
916
  }
363
917
 
364
918
  return lines.join('\n');
@@ -376,16 +930,18 @@ const filterGroupsByKinds = (groups, loadedIndex, indexRoot, kinds) => {
376
930
  });
377
931
  };
378
932
 
379
- export const smartSearch = async ({ query, cwd = '.', intent, maxFiles, kinds, semantic = false, semanticLimit = 8, _testForceWalk = false, progress: enableProgress = false }) => {
933
+ export const smartSearch = async ({ query, cwd = '.', intent, maxFiles, kinds, mode, semantic = false, semanticLimit = 8, maxTokens, _testForceWalk = false, _testIgnoreSessionSignals = false, progress: enableProgress = false }) => {
380
934
  const progress = enableProgress ? createProgressReporter('smart_search') : null;
381
935
  const startTime = Date.now();
936
+ const resolvedMode = resolveSearchMode({ mode, semantic });
937
+ const validBudget = Number.isFinite(maxTokens) && maxTokens >= 1 ? maxTokens : null;
382
938
 
383
939
  if (progress) {
384
940
  progress.report({ phase: 'searching', query });
385
941
  }
386
942
 
387
943
  const root = resolveSafePath(cwd);
388
- const rgResult = _testForceWalk ? null : await searchWithRipgrep(root, query);
944
+ const rgResult = _testForceWalk ? null : await searchWithRipgrep(root, query, resolvedMode);
389
945
  const usedFallback = rgResult === null;
390
946
  const engine = usedFallback ? 'walk' : 'rg';
391
947
 
@@ -433,6 +989,8 @@ export const smartSearch = async ({ query, cwd = '.', intent, maxFiles, kinds, s
433
989
  let graphHits = null;
434
990
  let indexFreshness = 'unavailable';
435
991
  let loadedIndex = null;
992
+ const sessionSignals = _testIgnoreSessionSignals ? null : await loadActiveSessionSignals();
993
+ const noiseHints = isGlobalMemoryEnabled() ? (await getNoiseHints({ projectPath: indexRoot })).hints : [];
436
994
 
437
995
  if (progress) {
438
996
  progress.report({ phase: 'ranking', rawMatches: rawMatches.length });
@@ -462,12 +1020,21 @@ export const smartSearch = async ({ query, cwd = '.', intent, maxFiles, kinds, s
462
1020
  // index unavailable — continue without it
463
1021
  }
464
1022
 
465
- let { groups, breakdown } = groupMatches(dedupedMatches, query, validIntent, indexHits, graphHits);
1023
+ let { groups, breakdown, droppedNoiseHints } = groupMatches(dedupedMatches, query, validIntent, indexHits, graphHits, sessionSignals, noiseHints, indexRoot);
466
1024
  const normalizedKinds = Array.isArray(kinds) ? kinds.filter((k) => typeof k === 'string' && k.trim()) : null;
467
1025
  if (normalizedKinds && normalizedKinds.length > 0) {
468
1026
  groups = filterGroupsByKinds(groups, loadedIndex, indexRoot, normalizedKinds);
469
1027
  }
470
1028
 
1029
+ const suggestions = buildSearchSuggestions({
1030
+ query,
1031
+ mode: resolvedMode,
1032
+ totalMatches: dedupedMatches.length,
1033
+ totalFiles: groups.length,
1034
+ searchMode,
1035
+ hasKinds: Boolean(normalizedKinds && normalizedKinds.length > 0),
1036
+ });
1037
+
471
1038
  if (loadedIndex && indexFreshness === 'fresh') {
472
1039
  const topRelPaths = groups.slice(0, 10).map((g) => path.relative(indexRoot, g.file).replace(/\\/g, '/'));
473
1040
  for (const rp of topRelPaths) {
@@ -480,11 +1047,22 @@ export const smartSearch = async ({ query, cwd = '.', intent, maxFiles, kinds, s
480
1047
  }
481
1048
  }
482
1049
 
483
- const effectiveMaxFiles = maxFiles ?? MAX_RESULT_FILES;
1050
+ const effectiveMaxFiles = maxFiles ?? DEFAULT_RESULT_FILES;
484
1051
  const cappedGroups = groups.slice(0, effectiveMaxFiles);
1052
+ const hasMore = groups.length > cappedGroups.length;
1053
+ const nextSuggestedMaxFiles = hasMore
1054
+ ? Math.min(50, groups.length, Math.max(effectiveMaxFiles + 5, effectiveMaxFiles * 2))
1055
+ : undefined;
485
1056
 
486
1057
  const rawText = dedupedMatches.map(formatMatch).join('\n');
487
- const compressedText = truncate(buildCompactResult(cappedGroups, dedupedMatches.length, query, root, searchMode, provenance, groups.length), 5000);
1058
+ const baseCompactText = truncate(
1059
+ buildCompactResult(cappedGroups, dedupedMatches.length, query, root, searchMode, provenance, groups.length, resolvedMode, suggestions),
1060
+ 5000,
1061
+ );
1062
+ let compressedText = baseCompactText;
1063
+ if (validBudget && countTokens(compressedText) > validBudget) {
1064
+ compressedText = truncateByTokens(compressedText, validBudget);
1065
+ }
488
1066
  const metrics = buildMetrics({
489
1067
  tool: 'smart_search',
490
1068
  target: `${root} :: ${query}`,
@@ -501,23 +1079,6 @@ export const smartSearch = async ({ query, cwd = '.', intent, maxFiles, kinds, s
501
1079
  });
502
1080
  recordDevctxOperation();
503
1081
 
504
- let reason = DECISION_REASONS.MULTIPLE_FILES;
505
- if (validIntent) {
506
- reason = DECISION_REASONS.INTENT_AWARE;
507
- }
508
- if (indexHits && indexHits.size > 0) {
509
- reason = DECISION_REASONS.INDEX_BOOST;
510
- }
511
-
512
- recordDecision({
513
- tool: 'smart_search',
514
- action: `search "${query}"${validIntent ? ` (intent: ${validIntent})` : ''}`,
515
- reason,
516
- alternative: 'Grep (unranked results)',
517
- expectedBenefit: `${EXPECTED_BENEFITS.TOKEN_SAVINGS(metrics.savedTokens)}, ${EXPECTED_BENEFITS.BETTER_RANKING}`,
518
- context: `${dedupedMatches.length} matches in ${groups.length} files, ranked by relevance`,
519
- });
520
-
521
1082
  let retrievalConfidence = 'high';
522
1083
  if (dedupedMatches.length === 0) retrievalConfidence = 'none';
523
1084
  else if (searchMode === 'terms') retrievalConfidence = 'low';
@@ -535,23 +1096,39 @@ export const smartSearch = async ({ query, cwd = '.', intent, maxFiles, kinds, s
535
1096
  });
536
1097
  }
537
1098
 
538
- const result = {
1099
+ let result = {
539
1100
  query,
1101
+ mode: resolvedMode,
540
1102
  indexFreshness,
541
1103
  ...(validIntent ? { intent: validIntent } : {}),
542
1104
  ...(normalizedKinds && normalizedKinds.length > 0 ? { kinds: normalizedKinds } : {}),
543
1105
  ...(indexHits ? { indexBoosted: indexHits.size } : {}),
544
1106
  totalMatches: dedupedMatches.length,
545
1107
  matchedFiles: cappedGroups.length,
1108
+ hasMore,
1109
+ rankingBreakdown: breakdown,
546
1110
  ...(groups.length > cappedGroups.length ? { totalFiles: groups.length } : {}),
547
- topFiles: cappedGroups.slice(0, 5).map((group) => ({ file: group.file, count: group.count, score: group.score })),
1111
+ ...(nextSuggestedMaxFiles ? { nextSuggestedMaxFiles } : {}),
1112
+ ...(suggestions.length > 0 ? { suggestions } : {}),
1113
+ topFiles: cappedGroups.slice(0, 5).map((group) => ({
1114
+ file: group.file,
1115
+ count: group.count,
1116
+ score: group.score,
1117
+ boostSource: group.boostSource,
1118
+ matchedBy: group.matchedBy,
1119
+ scoreBreakdown: group.scoreBreakdown,
1120
+ whyRanked: group.whyRanked,
1121
+ })),
548
1122
  matches: compressedText,
549
1123
  };
550
1124
 
551
1125
  if (provenance?.fallbackReason) result.searchMode = provenance.fallbackReason;
552
1126
  if (retrievalConfidence !== 'high') result.retrievalConfidence = retrievalConfidence;
553
1127
 
554
- if (semantic === true) {
1128
+ const shouldIncludeSemanticBlock = resolvedMode === 'semantic'
1129
+ && (dedupedMatches.length === 0 || searchMode !== 'exact');
1130
+
1131
+ if (shouldIncludeSemanticBlock) {
555
1132
  try {
556
1133
  const index = loadIndex(root);
557
1134
  if (index) {
@@ -566,11 +1143,15 @@ export const smartSearch = async ({ query, cwd = '.', intent, maxFiles, kinds, s
566
1143
  symbol: r.symbol.name,
567
1144
  kind: r.symbol.kind,
568
1145
  line: r.symbol.line,
1146
+ matchedBy: 'semantic',
1147
+ whyRanked: `semantic similarity (${Number(r.score.toFixed(4))})`,
569
1148
  })),
570
1149
  files: fileRanks.map((r) => ({
571
1150
  score: Number(r.score.toFixed(4)),
572
1151
  path: r.path,
573
1152
  symbols: r.symbolCount,
1153
+ matchedBy: 'semantic',
1154
+ whyRanked: `semantic similarity (${Number(r.score.toFixed(4))})`,
574
1155
  })),
575
1156
  };
576
1157
  }
@@ -579,5 +1160,32 @@ export const smartSearch = async ({ query, cwd = '.', intent, maxFiles, kinds, s
579
1160
  }
580
1161
  }
581
1162
 
1163
+ const budgetedResult = applyResponseBudget(result, validBudget);
1164
+ result = budgetedResult.response;
1165
+
1166
+ if (isGlobalMemoryEnabled() && Array.isArray(droppedNoiseHints) && droppedNoiseHints.length > 0) {
1167
+ for (const hintKey of droppedNoiseHints) {
1168
+ await recordNoiseHint({ projectPath: indexRoot, hintKey, reason: 'semantic_dedupe' });
1169
+ }
1170
+ }
1171
+
1172
+ let reason = DECISION_REASONS.MULTIPLE_FILES;
1173
+ if (budgetedResult.applied) {
1174
+ reason = DECISION_REASONS.TOKEN_BUDGET;
1175
+ } else if (indexHits && indexHits.size > 0) {
1176
+ reason = DECISION_REASONS.INDEX_BOOST;
1177
+ } else if (validIntent) {
1178
+ reason = DECISION_REASONS.INTENT_AWARE;
1179
+ }
1180
+
1181
+ recordDecision({
1182
+ tool: 'smart_search',
1183
+ action: `search "${query}"${validIntent ? ` (intent: ${validIntent})` : ''}`,
1184
+ reason,
1185
+ alternative: 'Grep (unranked results)',
1186
+ expectedBenefit: `${EXPECTED_BENEFITS.TOKEN_SAVINGS(metrics.savedTokens)}, ${EXPECTED_BENEFITS.BETTER_RANKING}`,
1187
+ context: `${dedupedMatches.length} matches in ${groups.length} files, ranked by relevance`,
1188
+ });
1189
+
582
1190
  return result;
583
1191
  };