smart-context-mcp 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +414 -0
- package/package.json +63 -0
- package/scripts/devctx-server.js +4 -0
- package/scripts/init-clients.js +356 -0
- package/scripts/report-metrics.js +195 -0
- package/src/index.js +976 -0
- package/src/mcp-server.js +3 -0
- package/src/metrics.js +65 -0
- package/src/server.js +143 -0
- package/src/tokenCounter.js +12 -0
- package/src/tools/smart-context.js +1192 -0
- package/src/tools/smart-read/additional-languages.js +684 -0
- package/src/tools/smart-read/code.js +216 -0
- package/src/tools/smart-read/fallback.js +23 -0
- package/src/tools/smart-read/python.js +178 -0
- package/src/tools/smart-read/shared.js +39 -0
- package/src/tools/smart-read/structured.js +72 -0
- package/src/tools/smart-read-batch.js +63 -0
- package/src/tools/smart-read.js +459 -0
- package/src/tools/smart-search.js +412 -0
- package/src/tools/smart-shell.js +213 -0
- package/src/utils/fs.js +47 -0
- package/src/utils/paths.js +1 -0
- package/src/utils/runtime-config.js +29 -0
- package/src/utils/text.js +38 -0
|
@@ -0,0 +1,1192 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import { execFile as execFileCallback } from 'node:child_process';
|
|
4
|
+
import { promisify } from 'node:util';
|
|
5
|
+
import { smartSearch, VALID_INTENTS } from './smart-search.js';
|
|
6
|
+
import { smartRead } from './smart-read.js';
|
|
7
|
+
import { smartReadBatch } from './smart-read-batch.js';
|
|
8
|
+
import { loadIndex, queryRelated, getGraphCoverage } from '../index.js';
|
|
9
|
+
import { projectRoot } from '../utils/paths.js';
|
|
10
|
+
import { resolveSafePath } from '../utils/fs.js';
|
|
11
|
+
import { countTokens } from '../tokenCounter.js';
|
|
12
|
+
import { persistMetrics } from '../metrics.js';
|
|
13
|
+
|
|
14
|
+
const execFile = promisify(execFileCallback);
|
|
15
|
+
|
|
16
|
+
const INTENT_KEYWORDS = {
|
|
17
|
+
debug: ['debug', 'fix', 'error', 'bug', 'crash', 'fail', 'broken', 'issue', 'trace'],
|
|
18
|
+
tests: ['test', 'spec', 'coverage', 'assert', 'mock', 'jest', 'vitest'],
|
|
19
|
+
config: ['config', 'env', 'setup', 'deploy', 'docker', 'ci', 'terraform', 'yaml', 'secret', 'secrets', 'settings', 'database'],
|
|
20
|
+
docs: ['doc', 'readme', 'explain', 'document', 'guide'],
|
|
21
|
+
implementation: ['implement', 'add', 'create', 'build', 'feature', 'refactor', 'update', 'modify'],
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const STOP_WORDS = new Set([
|
|
25
|
+
'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'is', 'are',
|
|
26
|
+
'was', 'were', 'be', 'been', 'and', 'or', 'but', 'not', 'this', 'that', 'it',
|
|
27
|
+
'how', 'what', 'where', 'when', 'why', 'which', 'who', 'do', 'does', 'did',
|
|
28
|
+
'has', 'have', 'had', 'from', 'by', 'about', 'into', 'my', 'our', 'your',
|
|
29
|
+
'can', 'could', 'will', 'would', 'should', 'may', 'might', 'i', 'we', 'you',
|
|
30
|
+
'all', 'each', 'every', 'me', 'us', 'them', 'its',
|
|
31
|
+
]);
|
|
32
|
+
|
|
33
|
+
const LOW_SIGNAL_QUERY_WORDS = new Set([
|
|
34
|
+
'find', 'show', 'list', 'get', 'search', 'locate', 'lookup', 'look', 'check',
|
|
35
|
+
'inspect', 'review', 'analyze', 'analyse', 'understand', 'explore', 'read',
|
|
36
|
+
'open', 'walk', 'help', 'need', 'want', 'please', 'context', 'preview',
|
|
37
|
+
'recall', 'stuff', 'thing', 'things', 'happen', 'happens', 'handle', 'handles',
|
|
38
|
+
'handling', 'wired', 'declare', 'declared', 'defined', 'owns', 'owner', 'existing',
|
|
39
|
+
'exercise', 'exercises', 'before', 'main', 'shared', 'related', 'across', 'split',
|
|
40
|
+
'live', 'lives', 'surface', 'public', 'entry', 'point', 'path', 'logic', 'covers',
|
|
41
|
+
'api', 'apis', 'flow', 'flows', 'file', 'files', 'onboarding', 'app', 'application', 'load', 'loads', 'loaded',
|
|
42
|
+
]);
|
|
43
|
+
|
|
44
|
+
const IDENTIFIER_RE = /\b[a-z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]*\b|\b[A-Z][a-zA-Z0-9]{2,}\b|\b[a-z]{2,}_[a-z_]+\b/g;
|
|
45
|
+
const QUERY_TOKEN_RE = /[a-zA-Z0-9_]+/g;
|
|
46
|
+
|
|
47
|
+
const ROLE_PRIORITY = ['primary', 'test', 'dependency', 'dependent'];
|
|
48
|
+
const ROLE_RANK = Object.fromEntries(ROLE_PRIORITY.map((role, idx) => [role, idx]));
|
|
49
|
+
const EVIDENCE_PRIORITY = {
|
|
50
|
+
entryFile: 0,
|
|
51
|
+
diffHit: 1,
|
|
52
|
+
searchHit: 2,
|
|
53
|
+
symbolMatch: 3,
|
|
54
|
+
symbolDetail: 4,
|
|
55
|
+
testOf: 5,
|
|
56
|
+
dependencyOf: 6,
|
|
57
|
+
dependentOf: 7,
|
|
58
|
+
};
|
|
59
|
+
const ROLE_BASE_SCORE = { primary: 130, test: 85, dependency: 60, dependent: 50 };
|
|
60
|
+
const EVIDENCE_BASE_SCORE = {
|
|
61
|
+
entryFile: 120,
|
|
62
|
+
diffHit: 100,
|
|
63
|
+
searchHit: 70,
|
|
64
|
+
symbolMatch: 90,
|
|
65
|
+
symbolDetail: 95,
|
|
66
|
+
testOf: 40,
|
|
67
|
+
dependencyOf: 25,
|
|
68
|
+
dependentOf: 22,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
const uniqueList = (items = []) => [...new Set(items.filter(Boolean))];
|
|
72
|
+
|
|
73
|
+
const evidenceKey = (evidence) => JSON.stringify([
|
|
74
|
+
evidence.type,
|
|
75
|
+
evidence.via ?? null,
|
|
76
|
+
evidence.ref ?? null,
|
|
77
|
+
evidence.rank ?? null,
|
|
78
|
+
evidence.query ?? null,
|
|
79
|
+
Array.isArray(evidence.symbols) ? evidence.symbols.join('|') : null,
|
|
80
|
+
]);
|
|
81
|
+
|
|
82
|
+
const dedupeEvidence = (items = []) => {
|
|
83
|
+
const map = new Map();
|
|
84
|
+
for (const item of items) {
|
|
85
|
+
if (!item?.type) continue;
|
|
86
|
+
const normalized = { ...item };
|
|
87
|
+
if (Array.isArray(normalized.symbols)) {
|
|
88
|
+
normalized.symbols = uniqueList(normalized.symbols).slice(0, 3);
|
|
89
|
+
if (normalized.symbols.length === 0) delete normalized.symbols;
|
|
90
|
+
}
|
|
91
|
+
const key = evidenceKey(normalized);
|
|
92
|
+
if (!map.has(key)) map.set(key, normalized);
|
|
93
|
+
}
|
|
94
|
+
return [...map.values()].sort((a, b) => {
|
|
95
|
+
const priorityDiff = (EVIDENCE_PRIORITY[a.type] ?? 99) - (EVIDENCE_PRIORITY[b.type] ?? 99);
|
|
96
|
+
if (priorityDiff !== 0) return priorityDiff;
|
|
97
|
+
return (a.rank ?? 999) - (b.rank ?? 999);
|
|
98
|
+
});
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
const formatReasonIncluded = (evidence = []) => {
|
|
102
|
+
const primary = evidence[0];
|
|
103
|
+
if (!primary) return 'selected';
|
|
104
|
+
|
|
105
|
+
switch (primary.type) {
|
|
106
|
+
case 'entryFile':
|
|
107
|
+
return 'entry';
|
|
108
|
+
case 'diffHit':
|
|
109
|
+
return primary.ref ? `diff: ${primary.ref}` : 'diff';
|
|
110
|
+
case 'searchHit':
|
|
111
|
+
return primary.query ? `search: ${primary.query}` : 'search';
|
|
112
|
+
case 'symbolMatch':
|
|
113
|
+
return `symbol: ${(primary.symbols ?? []).slice(0, 2).join(', ')}`;
|
|
114
|
+
case 'symbolDetail':
|
|
115
|
+
return `detail: ${(primary.symbols ?? []).slice(0, 2).join(', ')}`;
|
|
116
|
+
case 'testOf':
|
|
117
|
+
return primary.via ? `test: ${primary.via}` : 'test';
|
|
118
|
+
case 'dependencyOf':
|
|
119
|
+
return primary.via ? `imported-by: ${primary.via}` : 'imported-by';
|
|
120
|
+
case 'dependentOf':
|
|
121
|
+
return primary.via ? `imports: ${primary.via}` : 'imports';
|
|
122
|
+
default:
|
|
123
|
+
return 'selected';
|
|
124
|
+
}
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
const HIGH_SIGNAL_PREVIEW_KINDS = new Set([
|
|
128
|
+
'actor', 'class', 'enum', 'function', 'interface', 'method',
|
|
129
|
+
'protocol', 'struct', 'trait', 'type',
|
|
130
|
+
]);
|
|
131
|
+
|
|
132
|
+
const getPreviewKindPriority = (kind) => {
|
|
133
|
+
switch (kind) {
|
|
134
|
+
case 'class':
|
|
135
|
+
case 'function':
|
|
136
|
+
case 'method':
|
|
137
|
+
return 4;
|
|
138
|
+
case 'interface':
|
|
139
|
+
case 'type':
|
|
140
|
+
case 'protocol':
|
|
141
|
+
case 'trait':
|
|
142
|
+
case 'struct':
|
|
143
|
+
case 'enum':
|
|
144
|
+
case 'actor':
|
|
145
|
+
return 3;
|
|
146
|
+
default:
|
|
147
|
+
return 0;
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
const compactSymbolPreview = (entry) => ({
|
|
152
|
+
name: entry.name,
|
|
153
|
+
kind: entry.kind,
|
|
154
|
+
...(entry.signature ? { signature: entry.signature } : entry.snippet ? { snippet: entry.snippet } : {}),
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
const buildSymbolPreviews = (entries = [], matchedSymbols = [], { includeFallback = false, maxItems = 3 } = {}) => {
|
|
158
|
+
if (maxItems <= 0) return [];
|
|
159
|
+
|
|
160
|
+
const matchedSet = new Set(matchedSymbols.map((symbol) => symbol.toLowerCase()));
|
|
161
|
+
const candidates = entries
|
|
162
|
+
.filter((entry) => includeFallback || matchedSet.has(entry.name.toLowerCase()))
|
|
163
|
+
.sort((a, b) => {
|
|
164
|
+
const aMatched = matchedSet.has(a.name.toLowerCase()) ? 1 : 0;
|
|
165
|
+
const bMatched = matchedSet.has(b.name.toLowerCase()) ? 1 : 0;
|
|
166
|
+
if (aMatched !== bMatched) return bMatched - aMatched;
|
|
167
|
+
const aKind = getPreviewKindPriority(a.kind);
|
|
168
|
+
const bKind = getPreviewKindPriority(b.kind);
|
|
169
|
+
if (aKind !== bKind) return bKind - aKind;
|
|
170
|
+
const aRich = Number(Boolean(a.signature)) + Number(Boolean(a.snippet));
|
|
171
|
+
const bRich = Number(Boolean(b.signature)) + Number(Boolean(b.snippet));
|
|
172
|
+
if (aRich !== bRich) return bRich - aRich;
|
|
173
|
+
return a.line - b.line;
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
const prioritized = [];
|
|
177
|
+
const secondary = [];
|
|
178
|
+
|
|
179
|
+
for (const candidate of candidates) {
|
|
180
|
+
const isMatched = matchedSet.has(candidate.name.toLowerCase());
|
|
181
|
+
if (isMatched || HIGH_SIGNAL_PREVIEW_KINDS.has(candidate.kind)) prioritized.push(candidate);
|
|
182
|
+
else secondary.push(candidate);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return [...prioritized, ...secondary].slice(0, maxItems).map(compactSymbolPreview);
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
const attachSymbolEvidence = (files, index, symbolCandidates) => {
|
|
189
|
+
if (!index || symbolCandidates.length === 0) return;
|
|
190
|
+
|
|
191
|
+
const candidateMap = new Map(symbolCandidates.map((symbol) => [symbol.toLowerCase(), symbol]));
|
|
192
|
+
|
|
193
|
+
for (const [rel, info] of files) {
|
|
194
|
+
const fileSymbols = index.files?.[rel]?.symbols ?? [];
|
|
195
|
+
const matchedSymbols = [];
|
|
196
|
+
|
|
197
|
+
for (const symbol of fileSymbols) {
|
|
198
|
+
const matched = candidateMap.get(symbol.name.toLowerCase());
|
|
199
|
+
if (matched && !matchedSymbols.includes(matched)) matchedSymbols.push(matched);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (matchedSymbols.length === 0) continue;
|
|
203
|
+
|
|
204
|
+
const evidence = dedupeEvidence([
|
|
205
|
+
...(info.evidence ?? []),
|
|
206
|
+
{ type: 'symbolMatch', symbols: matchedSymbols.slice(0, 3) },
|
|
207
|
+
]);
|
|
208
|
+
|
|
209
|
+
files.set(rel, {
|
|
210
|
+
...info,
|
|
211
|
+
evidence,
|
|
212
|
+
matchedSymbols: uniqueList([...(info.matchedSymbols ?? []), ...matchedSymbols]).slice(0, 3),
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
const computeStaticUtility = (candidate, intent) => {
|
|
218
|
+
let score = ROLE_BASE_SCORE[candidate.role] ?? 40;
|
|
219
|
+
if (candidate.role === 'test' && intent === 'tests') score += 20;
|
|
220
|
+
|
|
221
|
+
for (const evidence of candidate.evidence ?? []) {
|
|
222
|
+
score += EVIDENCE_BASE_SCORE[evidence.type] ?? 0;
|
|
223
|
+
if (evidence.type === 'searchHit') score += Math.max(0, 24 - ((evidence.rank ?? 1) - 1) * 6);
|
|
224
|
+
if (evidence.type === 'symbolMatch') score += (evidence.symbols?.length ?? 0) * 12;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
score += (candidate.matchedSymbols?.length ?? 0) * 10;
|
|
228
|
+
return score;
|
|
229
|
+
};
|
|
230
|
+
|
|
231
|
+
const inferRelatedRole = (candidate) => {
|
|
232
|
+
const evidenceTypes = new Set((candidate.evidence ?? []).map((item) => item.type));
|
|
233
|
+
if (evidenceTypes.has('testOf')) return 'test';
|
|
234
|
+
if (evidenceTypes.has('dependencyOf')) return 'dependency';
|
|
235
|
+
if (evidenceTypes.has('dependentOf')) return 'dependent';
|
|
236
|
+
return 'dependent';
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
const computePrimarySignal = (candidate, intent) => {
|
|
240
|
+
const relLower = (candidate.rel ?? '').toLowerCase();
|
|
241
|
+
let score = 0;
|
|
242
|
+
|
|
243
|
+
for (const evidence of candidate.evidence ?? []) {
|
|
244
|
+
if (evidence.type === 'entryFile') score += 120;
|
|
245
|
+
if (evidence.type === 'diffHit') score += 110;
|
|
246
|
+
if (evidence.type === 'searchHit') score += Math.max(0, 28 - ((evidence.rank ?? 1) - 1) * 6);
|
|
247
|
+
if (evidence.type === 'symbolMatch') score += (evidence.symbols?.length ?? 0) * 10;
|
|
248
|
+
if (evidence.type === 'symbolDetail') score += (evidence.symbols?.length ?? 0) * 12;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
score += (candidate.matchedSymbols?.length ?? 0) * 12;
|
|
252
|
+
|
|
253
|
+
if (TEST_FILE_RE.test(relLower)) {
|
|
254
|
+
score += intent === 'tests' ? 10 : -60;
|
|
255
|
+
} else if (relLower.startsWith('src/')) {
|
|
256
|
+
score += 10;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return score;
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
const computePrimaryPromotionScore = (candidate, task, intent) => {
|
|
263
|
+
let score = scorePrimarySeed(candidate, task, intent);
|
|
264
|
+
score += computePrimarySignal(candidate, intent);
|
|
265
|
+
if (candidate.role === 'primary') score += 6;
|
|
266
|
+
return score;
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
const normalizePrimaryCandidate = (files, task, intent) => {
|
|
270
|
+
const candidates = [...files.entries()].map(([rel, info]) => ({ rel, ...info }));
|
|
271
|
+
if (candidates.length === 0) return;
|
|
272
|
+
|
|
273
|
+
const currentPrimary = candidates.find((candidate) => candidate.role === 'primary');
|
|
274
|
+
const best = [...candidates].sort((a, b) =>
|
|
275
|
+
computePrimaryPromotionScore(b, task, intent) - computePrimaryPromotionScore(a, task, intent)
|
|
276
|
+
|| a.rel.localeCompare(b.rel)
|
|
277
|
+
)[0];
|
|
278
|
+
|
|
279
|
+
if (!best) return;
|
|
280
|
+
|
|
281
|
+
const currentScore = currentPrimary
|
|
282
|
+
? computePrimaryPromotionScore(currentPrimary, task, intent)
|
|
283
|
+
: Number.NEGATIVE_INFINITY;
|
|
284
|
+
const bestScore = computePrimaryPromotionScore(best, task, intent);
|
|
285
|
+
const chosenPrimary = currentPrimary && currentScore > bestScore + 10 ? currentPrimary : best;
|
|
286
|
+
|
|
287
|
+
for (const candidate of candidates) {
|
|
288
|
+
if (candidate.rel === chosenPrimary.rel) {
|
|
289
|
+
files.set(candidate.rel, { ...files.get(candidate.rel), role: 'primary' });
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if (candidate.role !== 'primary') continue;
|
|
294
|
+
files.set(candidate.rel, { ...files.get(candidate.rel), role: inferRelatedRole(candidate) });
|
|
295
|
+
}
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
const collectViaRefs = (candidate) => uniqueList((candidate.evidence ?? []).map((item) => item.via));
|
|
299
|
+
|
|
300
|
+
const computeMarginalPenalty = (candidate, selected) => {
|
|
301
|
+
if (selected.length === 0) return 0;
|
|
302
|
+
|
|
303
|
+
const dir = path.dirname(candidate.rel);
|
|
304
|
+
const candidateVia = new Set(collectViaRefs(candidate));
|
|
305
|
+
const candidateSymbols = new Set((candidate.matchedSymbols ?? []).map((symbol) => symbol.toLowerCase()));
|
|
306
|
+
|
|
307
|
+
let penalty = 0;
|
|
308
|
+
let sameDirCount = 0;
|
|
309
|
+
let sameRoleCount = 0;
|
|
310
|
+
let sameViaCount = 0;
|
|
311
|
+
let overlappingSymbolCount = 0;
|
|
312
|
+
|
|
313
|
+
for (const item of selected) {
|
|
314
|
+
if (path.dirname(item.rel) === dir) sameDirCount++;
|
|
315
|
+
if (item.role === candidate.role) sameRoleCount++;
|
|
316
|
+
|
|
317
|
+
for (const via of collectViaRefs(item)) {
|
|
318
|
+
if (candidateVia.has(via)) sameViaCount++;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
for (const symbol of item.matchedSymbols ?? []) {
|
|
322
|
+
if (candidateSymbols.has(symbol.toLowerCase())) overlappingSymbolCount++;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
penalty += sameDirCount * (candidate.role === 'primary' ? 3 : 8);
|
|
327
|
+
penalty += sameRoleCount * (candidate.role === 'primary' ? 2 : 5);
|
|
328
|
+
penalty += sameViaCount * 12;
|
|
329
|
+
penalty += overlappingSymbolCount * 18;
|
|
330
|
+
|
|
331
|
+
return penalty;
|
|
332
|
+
};
|
|
333
|
+
|
|
334
|
+
export const inferIntent = (task) => {
|
|
335
|
+
const lower = task.toLowerCase();
|
|
336
|
+
let best = 'explore';
|
|
337
|
+
let bestScore = 0;
|
|
338
|
+
|
|
339
|
+
for (const [intent, keywords] of Object.entries(INTENT_KEYWORDS)) {
|
|
340
|
+
const score = keywords.filter((kw) => lower.includes(kw)).length;
|
|
341
|
+
if (score > bestScore) { bestScore = score; best = intent; }
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return best;
|
|
345
|
+
};
|
|
346
|
+
|
|
347
|
+
const extractCompoundQueries = (task) => {
|
|
348
|
+
const lowerTask = task.toLowerCase();
|
|
349
|
+
const queries = [];
|
|
350
|
+
|
|
351
|
+
if (/\b(create[-\s]+user|user[-\s]+creation)\b/.test(lowerTask)) {
|
|
352
|
+
queries.push('createUser');
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
if (/\bjwt[-\s]+secret\b/.test(lowerTask)) {
|
|
356
|
+
queries.push('jwtSecret');
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return queries;
|
|
360
|
+
};
|
|
361
|
+
|
|
362
|
+
const filterRedundantPromptQueries = (queries, compoundQueries) => {
|
|
363
|
+
const lowerCompoundQueries = new Set(compoundQueries.map((query) => query.toLowerCase()));
|
|
364
|
+
return queries.filter((query) => {
|
|
365
|
+
const lowerQuery = query.toLowerCase();
|
|
366
|
+
if (lowerCompoundQueries.has('jwtsecret') && lowerQuery === 'jwt') return false;
|
|
367
|
+
return true;
|
|
368
|
+
});
|
|
369
|
+
};
|
|
370
|
+
|
|
371
|
+
export const extractSymbolCandidates = (task) => {
|
|
372
|
+
const compoundQueries = extractCompoundQueries(task);
|
|
373
|
+
return uniqueList([
|
|
374
|
+
...compoundQueries,
|
|
375
|
+
...filterRedundantPromptQueries(task.match(IDENTIFIER_RE) || [], compoundQueries),
|
|
376
|
+
]);
|
|
377
|
+
};
|
|
378
|
+
|
|
379
|
+
const isLikelyCodeSymbol = (token) =>
|
|
380
|
+
token.includes('_')
|
|
381
|
+
|| /\d/.test(token)
|
|
382
|
+
|| /[a-z][A-Z]/.test(token)
|
|
383
|
+
|| /[A-Z]{2,}/.test(token);
|
|
384
|
+
|
|
385
|
+
const scoreKeywordQuery = (token, lowerTask) => {
|
|
386
|
+
let score = Math.min(token.length, 8);
|
|
387
|
+
const position = lowerTask.indexOf(token);
|
|
388
|
+
if (position >= 0) score += Math.max(0, 16 - position);
|
|
389
|
+
if (token.length >= 12) score += 1;
|
|
390
|
+
return score;
|
|
391
|
+
};
|
|
392
|
+
|
|
393
|
+
const extractKeywordQueries = (task, { allowIntentKeywords = false } = {}) => {
|
|
394
|
+
const intentKws = new Set(Object.values(INTENT_KEYWORDS).flat());
|
|
395
|
+
const lowerTask = task.toLowerCase();
|
|
396
|
+
const compoundQueries = extractCompoundQueries(task);
|
|
397
|
+
|
|
398
|
+
return filterRedundantPromptQueries(
|
|
399
|
+
[...new Set((task.match(QUERY_TOKEN_RE) || [])
|
|
400
|
+
.map((token) => token.toLowerCase())
|
|
401
|
+
.filter((token) => {
|
|
402
|
+
if (token.length <= 2) return false;
|
|
403
|
+
if (/^\d+$/.test(token)) return false;
|
|
404
|
+
if (STOP_WORDS.has(token)) return false;
|
|
405
|
+
if (LOW_SIGNAL_QUERY_WORDS.has(token)) return false;
|
|
406
|
+
if (!allowIntentKeywords && intentKws.has(token)) return false;
|
|
407
|
+
return true;
|
|
408
|
+
})
|
|
409
|
+
.sort((a, b) => scoreKeywordQuery(b, lowerTask) - scoreKeywordQuery(a, lowerTask)
|
|
410
|
+
|| lowerTask.indexOf(a) - lowerTask.indexOf(b)
|
|
411
|
+
|| b.length - a.length
|
|
412
|
+
|| a.localeCompare(b)))],
|
|
413
|
+
compoundQueries,
|
|
414
|
+
);
|
|
415
|
+
};
|
|
416
|
+
|
|
417
|
+
const extractExpandedQueries = (task) => {
|
|
418
|
+
const lowerTask = task.toLowerCase();
|
|
419
|
+
const queries = [...extractCompoundQueries(task)];
|
|
420
|
+
|
|
421
|
+
if (/\b(container|docker|image|deploy|deployment)\b/.test(lowerTask)) {
|
|
422
|
+
queries.push('FROM');
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
return queries;
|
|
426
|
+
};
|
|
427
|
+
|
|
428
|
+
const extractFallbackSearchQuery = (task) => {
|
|
429
|
+
const symbolFallback = extractSymbolCandidates(task).find(isLikelyCodeSymbol);
|
|
430
|
+
if (symbolFallback) return symbolFallback;
|
|
431
|
+
|
|
432
|
+
const keywordFallback = extractKeywordQueries(task, { allowIntentKeywords: true })[0];
|
|
433
|
+
if (keywordFallback) return keywordFallback;
|
|
434
|
+
|
|
435
|
+
return task.trim();
|
|
436
|
+
};
|
|
437
|
+
|
|
438
|
+
export const extractSearchQueries = (task) => {
|
|
439
|
+
const symbolQueries = extractSymbolCandidates(task)
|
|
440
|
+
.filter(isLikelyCodeSymbol)
|
|
441
|
+
.filter((candidate) => !LOW_SIGNAL_QUERY_WORDS.has(candidate.toLowerCase()) && !STOP_WORDS.has(candidate.toLowerCase()));
|
|
442
|
+
const keywordQueries = extractKeywordQueries(task);
|
|
443
|
+
const queries = [];
|
|
444
|
+
const seen = new Set();
|
|
445
|
+
|
|
446
|
+
for (const candidate of [...symbolQueries, ...keywordQueries]) {
|
|
447
|
+
const key = candidate.toLowerCase();
|
|
448
|
+
if (seen.has(key)) continue;
|
|
449
|
+
seen.add(key);
|
|
450
|
+
queries.push(candidate);
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
return queries.slice(0, 3);
|
|
454
|
+
};
|
|
455
|
+
|
|
456
|
+
const PRIMARY_PATH_HINT_MAP = [
|
|
457
|
+
{ test: /\b(api|endpoint|endpoints|route|routes)\b/, hints: ['api', 'routes'] },
|
|
458
|
+
{ test: /\b(auth|token|jwt|login|session)\b/, hints: ['auth'] },
|
|
459
|
+
{ test: /\b(config|env|secret|yaml|json)\b/, hints: ['config'] },
|
|
460
|
+
{ test: /\b(test|tests|spec|coverage)\b/, hints: ['test', 'tests'] },
|
|
461
|
+
{ test: /\b(model|models|schema|schemas|entity|entities)\b/, hints: ['model', 'models'] },
|
|
462
|
+
{ test: /\b(container|docker|image|deploy|deployment)\b/, hints: ['dockerfile', 'docker'] },
|
|
463
|
+
];
|
|
464
|
+
|
|
465
|
+
const TEST_FILE_RE = /(^|\/)(tests?|__tests__)\//;
|
|
466
|
+
|
|
467
|
+
const tokenizePath = (rel) =>
|
|
468
|
+
uniqueList((rel.toLowerCase().match(/[a-z0-9]+/g) || []).filter((token) => token.length > 1));
|
|
469
|
+
|
|
470
|
+
const extractPrimaryPathHints = (task) => {
|
|
471
|
+
const lowerTask = task.toLowerCase();
|
|
472
|
+
const hints = new Set(
|
|
473
|
+
(lowerTask.match(QUERY_TOKEN_RE) || [])
|
|
474
|
+
.map((token) => token.toLowerCase())
|
|
475
|
+
.filter((token) => token.length > 2 && !STOP_WORDS.has(token) && !LOW_SIGNAL_QUERY_WORDS.has(token))
|
|
476
|
+
);
|
|
477
|
+
|
|
478
|
+
for (const entry of PRIMARY_PATH_HINT_MAP) {
|
|
479
|
+
if (entry.test.test(lowerTask)) {
|
|
480
|
+
for (const hint of entry.hints) hints.add(hint);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
return [...hints];
|
|
485
|
+
};
|
|
486
|
+
|
|
487
|
+
const scorePrimarySeed = (seed, task, intent) => {
|
|
488
|
+
const rel = seed.rel ?? '';
|
|
489
|
+
const relLower = rel.toLowerCase();
|
|
490
|
+
const basename = path.basename(relLower, path.extname(relLower));
|
|
491
|
+
const pathTokens = new Set(tokenizePath(relLower));
|
|
492
|
+
const pathHints = extractPrimaryPathHints(task);
|
|
493
|
+
let score = 0;
|
|
494
|
+
|
|
495
|
+
for (const evidence of seed.evidence ?? []) {
|
|
496
|
+
if (evidence.type !== 'searchHit') continue;
|
|
497
|
+
score += Math.max(0, 40 - ((evidence.rank ?? 1) - 1) * 8);
|
|
498
|
+
if (!evidence.query) continue;
|
|
499
|
+
|
|
500
|
+
const query = evidence.query.toLowerCase();
|
|
501
|
+
if (basename === query) score += 28;
|
|
502
|
+
else if (relLower.includes(query)) score += 18;
|
|
503
|
+
else if (pathTokens.has(query)) score += 14;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
let hintHits = 0;
|
|
507
|
+
for (const hint of pathHints) {
|
|
508
|
+
if (basename === hint) {
|
|
509
|
+
score += 28;
|
|
510
|
+
hintHits++;
|
|
511
|
+
continue;
|
|
512
|
+
}
|
|
513
|
+
if (pathTokens.has(hint) || relLower.includes(hint)) {
|
|
514
|
+
score += 18;
|
|
515
|
+
hintHits++;
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
const targetsApiSurface = pathHints.includes('api') || pathHints.includes('routes');
|
|
520
|
+
if (targetsApiSurface) {
|
|
521
|
+
if (/(^|\/)(api|routes)(\/|$)/.test(relLower)) score += 28;
|
|
522
|
+
if (/(^|\/)(models?|schemas?)(\/|$)/.test(relLower)) score -= 12;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
if (TEST_FILE_RE.test(relLower)) {
|
|
526
|
+
score += intent === 'tests' ? 24 : -40;
|
|
527
|
+
} else if (intent === 'tests') {
|
|
528
|
+
score -= 10;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
if (intent === 'implementation' && relLower.startsWith('src/')) score += 10;
|
|
532
|
+
if ((intent === 'debug' || intent === 'review') && relLower.startsWith('src/')) score += 8;
|
|
533
|
+
if (hintHits > 0 && relLower.startsWith('src/')) score += 6;
|
|
534
|
+
|
|
535
|
+
return score;
|
|
536
|
+
};
|
|
537
|
+
|
|
538
|
+
const rerankPrimarySeeds = (primarySeeds, task, intent) =>
|
|
539
|
+
[...primarySeeds].sort((a, b) =>
|
|
540
|
+
scorePrimarySeed(b, task, intent) - scorePrimarySeed(a, task, intent)
|
|
541
|
+
|| a.rel.localeCompare(b.rel)
|
|
542
|
+
);
|
|
543
|
+
|
|
544
|
+
const expandWithGraph = (primarySeeds, index, root) => {
|
|
545
|
+
const files = new Map();
|
|
546
|
+
|
|
547
|
+
const upsert = (rel, next) => {
|
|
548
|
+
const absPath = next.absPath ?? path.join(root, rel);
|
|
549
|
+
const existing = files.get(rel);
|
|
550
|
+
|
|
551
|
+
if (!existing) {
|
|
552
|
+
files.set(rel, {
|
|
553
|
+
absPath,
|
|
554
|
+
role: next.role,
|
|
555
|
+
evidence: dedupeEvidence(next.evidence ?? []),
|
|
556
|
+
...(next.matchedSymbols?.length ? { matchedSymbols: uniqueList(next.matchedSymbols).slice(0, 3) } : {}),
|
|
557
|
+
});
|
|
558
|
+
return;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
const role = (ROLE_RANK[next.role] ?? 99) < (ROLE_RANK[existing.role] ?? 99) ? next.role : existing.role;
|
|
562
|
+
const evidence = dedupeEvidence([...(existing.evidence ?? []), ...(next.evidence ?? [])]);
|
|
563
|
+
const matchedSymbols = uniqueList([...(existing.matchedSymbols ?? []), ...(next.matchedSymbols ?? [])]).slice(0, 3);
|
|
564
|
+
|
|
565
|
+
files.set(rel, {
|
|
566
|
+
...existing,
|
|
567
|
+
absPath,
|
|
568
|
+
role,
|
|
569
|
+
evidence,
|
|
570
|
+
...(matchedSymbols.length ? { matchedSymbols } : {}),
|
|
571
|
+
});
|
|
572
|
+
};
|
|
573
|
+
|
|
574
|
+
for (const seed of primarySeeds) {
|
|
575
|
+
const rel = seed.rel ?? path.relative(root, seed.absPath).replace(/\\/g, '/');
|
|
576
|
+
upsert(rel, { role: 'primary', absPath: seed.absPath, evidence: seed.evidence });
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
if (!index) return { files, neighbors: [] };
|
|
580
|
+
|
|
581
|
+
const allNeighbors = new Set();
|
|
582
|
+
|
|
583
|
+
for (const seed of primarySeeds) {
|
|
584
|
+
const rel = seed.rel ?? path.relative(root, seed.absPath).replace(/\\/g, '/');
|
|
585
|
+
if (!index.files?.[rel]) continue;
|
|
586
|
+
|
|
587
|
+
const related = queryRelated(index, rel);
|
|
588
|
+
|
|
589
|
+
for (const p of related.imports) {
|
|
590
|
+
upsert(p, { role: 'dependency', evidence: [{ type: 'dependencyOf', via: rel }] });
|
|
591
|
+
}
|
|
592
|
+
for (const p of related.importedBy) {
|
|
593
|
+
upsert(p, { role: 'dependent', evidence: [{ type: 'dependentOf', via: rel }] });
|
|
594
|
+
}
|
|
595
|
+
for (const p of related.tests) {
|
|
596
|
+
upsert(p, { role: 'test', evidence: [{ type: 'testOf', via: rel }] });
|
|
597
|
+
}
|
|
598
|
+
for (const p of related.neighbors) {
|
|
599
|
+
if (!files.has(p)) allNeighbors.add(p);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
return { files, neighbors: [...allNeighbors] };
|
|
604
|
+
};
|
|
605
|
+
|
|
606
|
+
const checkIndexFreshness = (idx, absPaths, root) => {
|
|
607
|
+
if (!idx) return 'unavailable';
|
|
608
|
+
for (const abs of absPaths) {
|
|
609
|
+
const rel = path.relative(root, abs).replace(/\\/g, '/');
|
|
610
|
+
const entry = idx.files?.[rel];
|
|
611
|
+
if (!entry) continue;
|
|
612
|
+
try {
|
|
613
|
+
const diskMtime = Math.floor(fs.statSync(abs).mtimeMs);
|
|
614
|
+
if (diskMtime !== entry.mtime) return 'stale';
|
|
615
|
+
} catch { /* file gone or unreadable */ }
|
|
616
|
+
}
|
|
617
|
+
return 'fresh';
|
|
618
|
+
};
|
|
619
|
+
|
|
620
|
+
const mergeIndexFreshness = (values) => {
|
|
621
|
+
if (values.includes('stale')) return 'stale';
|
|
622
|
+
if (values.includes('fresh')) return 'fresh';
|
|
623
|
+
return 'unavailable';
|
|
624
|
+
};
|
|
625
|
+
|
|
626
|
+
const getPreviewOptions = (item, detailMode) => {
|
|
627
|
+
if (detailMode === 'minimal') {
|
|
628
|
+
return { includeFallback: true, maxItems: item.role === 'primary' ? 3 : 2 };
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
if ((item.matchedSymbols?.length ?? 0) > 0) {
|
|
632
|
+
return { includeFallback: true, maxItems: 3 };
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
if (item.role === 'primary') {
|
|
636
|
+
return { includeFallback: true, maxItems: 2 };
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
if (item.role === 'dependency' || item.role === 'test') {
|
|
640
|
+
return { includeFallback: true, maxItems: 1 };
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
return { includeFallback: false, maxItems: 0 };
|
|
644
|
+
};
|
|
645
|
+
|
|
646
|
+
export const allocateReads = (files, maxTokens, intent, detailMode = 'balanced') => {
|
|
647
|
+
const maxFiles = Math.min(10, Math.ceil(maxTokens / 800));
|
|
648
|
+
const tightBudget = maxTokens < 4000;
|
|
649
|
+
|
|
650
|
+
const roleLimits = {
|
|
651
|
+
primary: 5,
|
|
652
|
+
test: intent === 'tests' ? 3 : 2,
|
|
653
|
+
dependency: 3,
|
|
654
|
+
dependent: 2,
|
|
655
|
+
};
|
|
656
|
+
|
|
657
|
+
const candidates = [...files.entries()].map(([rel, info]) => ({
|
|
658
|
+
rel,
|
|
659
|
+
...info,
|
|
660
|
+
evidence: dedupeEvidence(info.evidence ?? []),
|
|
661
|
+
matchedSymbols: uniqueList(info.matchedSymbols ?? []).slice(0, 3),
|
|
662
|
+
}));
|
|
663
|
+
|
|
664
|
+
const selected = [];
|
|
665
|
+
const plan = [];
|
|
666
|
+
|
|
667
|
+
while (plan.length < maxFiles) {
|
|
668
|
+
let best = null;
|
|
669
|
+
|
|
670
|
+
for (const candidate of candidates) {
|
|
671
|
+
if (selected.some((item) => item.rel === candidate.rel)) continue;
|
|
672
|
+
if ((roleLimits[candidate.role] ?? 0) <= 0) continue;
|
|
673
|
+
|
|
674
|
+
const utility = computeStaticUtility(candidate, intent) - computeMarginalPenalty(candidate, selected);
|
|
675
|
+
if (!best
|
|
676
|
+
|| utility > best.utility
|
|
677
|
+
|| (utility === best.utility && (ROLE_RANK[candidate.role] ?? 99) < (ROLE_RANK[best.role] ?? 99))
|
|
678
|
+
|| (utility === best.utility && candidate.rel < best.rel)) {
|
|
679
|
+
best = { ...candidate, utility };
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
if (!best) break;
|
|
684
|
+
|
|
685
|
+
const mode = detailMode === 'deep'
|
|
686
|
+
? 'full'
|
|
687
|
+
: best.role === 'primary' && !tightBudget
|
|
688
|
+
? 'outline'
|
|
689
|
+
: 'signatures';
|
|
690
|
+
|
|
691
|
+
roleLimits[best.role]--;
|
|
692
|
+
selected.push(best);
|
|
693
|
+
plan.push({ ...best, mode });
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
return plan;
|
|
697
|
+
};
|
|
698
|
+
|
|
699
|
+
const getFileSymbolEntries = (index, rel) => index?.files?.[rel]?.symbols ?? [];
|
|
700
|
+
|
|
701
|
+
const getSymbolListLimit = (item, detailMode) => {
|
|
702
|
+
if (detailMode === 'minimal') return item.role === 'primary' ? 4 : 2;
|
|
703
|
+
return item.role === 'primary' ? 6 : 3;
|
|
704
|
+
};
|
|
705
|
+
|
|
706
|
+
const getSymbolSignatureLimit = (item, detailMode, readMode) => {
|
|
707
|
+
if (detailMode === 'minimal') return item.role === 'primary' ? 4 : 2;
|
|
708
|
+
if (readMode === 'full') return item.role === 'primary' ? 8 : 4;
|
|
709
|
+
return item.role === 'primary' ? 6 : 3;
|
|
710
|
+
};
|
|
711
|
+
|
|
712
|
+
const getSymbolSignatures = (entries, maxItems = 10) =>
|
|
713
|
+
entries.filter((entry) => entry.signature).slice(0, maxItems).map((entry) => entry.signature);
|
|
714
|
+
|
|
715
|
+
const serializeEvidencePayload = (item) => {
|
|
716
|
+
const evidence = dedupeEvidence(item.evidence ?? []);
|
|
717
|
+
if (evidence.length === 0) return [];
|
|
718
|
+
|
|
719
|
+
const limit = item.role === 'primary' ? 2 : 1;
|
|
720
|
+
const preferred = item.role === 'primary'
|
|
721
|
+
? evidence
|
|
722
|
+
: [
|
|
723
|
+
evidence.find((entry) => ['testOf', 'dependencyOf', 'dependentOf'].includes(entry.type)),
|
|
724
|
+
evidence[0],
|
|
725
|
+
].filter(Boolean);
|
|
726
|
+
|
|
727
|
+
return uniqueList(preferred)
|
|
728
|
+
.slice(0, limit)
|
|
729
|
+
.map((entry) => ({
|
|
730
|
+
type: entry.type,
|
|
731
|
+
...(entry.via ? { via: entry.via } : {}),
|
|
732
|
+
...(entry.query && item.role === 'primary' ? { query: entry.query } : {}),
|
|
733
|
+
...(entry.ref && item.role === 'primary' ? { ref: entry.ref } : {}),
|
|
734
|
+
...(Array.isArray(entry.symbols) && entry.symbols.length > 0 ? { symbols: entry.symbols.slice(0, 2) } : {}),
|
|
735
|
+
}));
|
|
736
|
+
};
|
|
737
|
+
|
|
738
|
+
const shouldIncludeSymbolNames = (item, symbolPreviews, readMode) => {
|
|
739
|
+
if (item.role === 'primary') return true;
|
|
740
|
+
if (readMode === 'full') return true;
|
|
741
|
+
return symbolPreviews.length === 0;
|
|
742
|
+
};
|
|
743
|
+
|
|
744
|
+
const shouldIncludeSymbolSignatures = (item, symbolPreviews) => {
|
|
745
|
+
if (item.role === 'primary') return true;
|
|
746
|
+
return symbolPreviews.length === 0;
|
|
747
|
+
};
|
|
748
|
+
|
|
749
|
+
const buildContextItemPayload = (item, index, detailMode, readMode = 'index-only', content = null) => {
|
|
750
|
+
const fileSymbolEntries = getFileSymbolEntries(index, item.rel);
|
|
751
|
+
const symbolPreviews = buildSymbolPreviews(
|
|
752
|
+
fileSymbolEntries,
|
|
753
|
+
item.matchedSymbols ?? [],
|
|
754
|
+
getPreviewOptions(item, detailMode),
|
|
755
|
+
);
|
|
756
|
+
const fileSymbols = shouldIncludeSymbolNames(item, symbolPreviews, readMode)
|
|
757
|
+
? fileSymbolEntries.map((entry) => entry.name).slice(0, getSymbolListLimit(item, detailMode))
|
|
758
|
+
: [];
|
|
759
|
+
const symbolSignatures = shouldIncludeSymbolSignatures(item, symbolPreviews)
|
|
760
|
+
? getSymbolSignatures(fileSymbolEntries, getSymbolSignatureLimit(item, detailMode, readMode))
|
|
761
|
+
: [];
|
|
762
|
+
const evidence = serializeEvidencePayload(item);
|
|
763
|
+
|
|
764
|
+
return {
|
|
765
|
+
file: item.rel,
|
|
766
|
+
role: item.role,
|
|
767
|
+
readMode,
|
|
768
|
+
reasonIncluded: formatReasonIncluded(item.evidence),
|
|
769
|
+
evidence,
|
|
770
|
+
...(fileSymbols.length > 0 ? { symbols: fileSymbols } : {}),
|
|
771
|
+
...(symbolSignatures.length > 0 ? { symbolSignatures } : {}),
|
|
772
|
+
...(symbolPreviews.length > 0 ? { symbolPreviews } : {}),
|
|
773
|
+
...(typeof content === 'string' && content.length > 0 ? { content } : {}),
|
|
774
|
+
};
|
|
775
|
+
};
|
|
776
|
+
|
|
777
|
+
const hasStrongIndexSignal = (payload) =>
|
|
778
|
+
(payload.symbolPreviews?.length ?? 0) > 0 || (payload.symbolSignatures?.length ?? 0) > 0;
|
|
779
|
+
|
|
780
|
+
const shouldReadContentForItem = (item, payload, detailMode, includeSet, intent) => {
|
|
781
|
+
if (!includeSet.has('content') || detailMode === 'minimal') return false;
|
|
782
|
+
if (detailMode === 'deep') return true;
|
|
783
|
+
|
|
784
|
+
const strongIndexSignal = hasStrongIndexSignal(payload);
|
|
785
|
+
|
|
786
|
+
if (item.role === 'primary') {
|
|
787
|
+
if ((item.matchedSymbols?.length ?? 0) > 0) return false;
|
|
788
|
+
return !strongIndexSignal;
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
if (item.role === 'test' && intent === 'tests') {
|
|
792
|
+
return !strongIndexSignal;
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
if (item.role === 'dependency') {
|
|
796
|
+
return !strongIndexSignal && (payload.symbols?.length ?? 0) === 0;
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
return false;
|
|
800
|
+
};
|
|
801
|
+
|
|
802
|
+
const BLOCKED_REF_RE = /[|&;<>`\n\r$(){}]/;
|
|
803
|
+
|
|
804
|
+
export const getChangedFiles = async (diff, root) => {
|
|
805
|
+
const ref = diff === true ? 'HEAD' : String(diff);
|
|
806
|
+
|
|
807
|
+
if (BLOCKED_REF_RE.test(ref)) {
|
|
808
|
+
return { ref, files: [], skippedDeleted: 0, error: 'Invalid ref: contains shell metacharacters' };
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
try {
|
|
812
|
+
const { stdout } = await execFile('git', ['diff', '--name-only', ref], {
|
|
813
|
+
cwd: root,
|
|
814
|
+
timeout: 10000,
|
|
815
|
+
});
|
|
816
|
+
|
|
817
|
+
const allPaths = stdout.split('\n').map((l) => l.trim()).filter(Boolean);
|
|
818
|
+
|
|
819
|
+
if (ref === 'HEAD') {
|
|
820
|
+
try {
|
|
821
|
+
const { stdout: untrackedOut } = await execFile(
|
|
822
|
+
'git', ['ls-files', '--others', '--exclude-standard'],
|
|
823
|
+
{ cwd: root, timeout: 10000 },
|
|
824
|
+
);
|
|
825
|
+
for (const u of untrackedOut.split('\n').map((l) => l.trim()).filter(Boolean)) {
|
|
826
|
+
if (!allPaths.includes(u)) allPaths.push(u);
|
|
827
|
+
}
|
|
828
|
+
} catch { /* ignore — untracked listing is best-effort */ }
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
let skippedDeleted = 0;
|
|
832
|
+
const files = [];
|
|
833
|
+
|
|
834
|
+
for (const rel of allPaths) {
|
|
835
|
+
const abs = path.join(root, rel);
|
|
836
|
+
if (fs.existsSync(abs)) {
|
|
837
|
+
files.push(rel);
|
|
838
|
+
} else {
|
|
839
|
+
skippedDeleted++;
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
return { ref, files, skippedDeleted };
|
|
844
|
+
} catch (err) {
|
|
845
|
+
const msg = err.stderr?.trim() || err.message || 'git diff failed';
|
|
846
|
+
return { ref, files: [], skippedDeleted: 0, error: msg };
|
|
847
|
+
}
|
|
848
|
+
};
|
|
849
|
+
|
|
850
|
+
const filterFoundSymbols = (content, candidates) => {
|
|
851
|
+
if (candidates.length <= 1) {
|
|
852
|
+
return content.includes('Symbol not found') ? null : content;
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
const sections = content.split(/(?=^--- )/m);
|
|
856
|
+
const kept = sections.filter((s) => !s.includes('Symbol not found'));
|
|
857
|
+
if (kept.length === 0) return null;
|
|
858
|
+
return kept.join('').trim();
|
|
859
|
+
};
|
|
860
|
+
|
|
861
|
+
const VALID_DETAIL_MODES = new Set(['minimal', 'balanced', 'deep']);
|
|
862
|
+
const DEFAULT_INCLUDE = ['content', 'graph', 'hints', 'symbolDetail'];
|
|
863
|
+
|
|
864
|
+
export const smartContext = async ({
|
|
865
|
+
task,
|
|
866
|
+
intent,
|
|
867
|
+
maxTokens = 8000,
|
|
868
|
+
entryFile,
|
|
869
|
+
diff,
|
|
870
|
+
detail = 'balanced',
|
|
871
|
+
include = DEFAULT_INCLUDE,
|
|
872
|
+
}) => {
|
|
873
|
+
const resolvedIntent = (intent && VALID_INTENTS.has(intent)) ? intent : inferIntent(task);
|
|
874
|
+
const root = projectRoot;
|
|
875
|
+
const detailMode = VALID_DETAIL_MODES.has(detail) ? detail : 'balanced';
|
|
876
|
+
const includeSet = new Set(Array.isArray(include) ? include : DEFAULT_INCLUDE);
|
|
877
|
+
|
|
878
|
+
let primarySeeds = [];
|
|
879
|
+
let searchIndexFreshness;
|
|
880
|
+
let diffSummary = null;
|
|
881
|
+
|
|
882
|
+
if (diff) {
|
|
883
|
+
const changed = await getChangedFiles(diff, root);
|
|
884
|
+
primarySeeds = changed.files.map((rel, idx) => ({
|
|
885
|
+
rel,
|
|
886
|
+
absPath: path.join(root, rel),
|
|
887
|
+
evidence: [{ type: 'diffHit', ref: changed.ref, rank: idx + 1 }],
|
|
888
|
+
}));
|
|
889
|
+
diffSummary = {
|
|
890
|
+
ref: changed.ref,
|
|
891
|
+
totalChanged: changed.files.length + changed.skippedDeleted,
|
|
892
|
+
included: Math.min(changed.files.length, 5),
|
|
893
|
+
skippedDeleted: changed.skippedDeleted,
|
|
894
|
+
};
|
|
895
|
+
if (changed.error) diffSummary.error = changed.error;
|
|
896
|
+
searchIndexFreshness = null;
|
|
897
|
+
} else {
|
|
898
|
+
const queries = extractSearchQueries(task);
|
|
899
|
+
const expandedQueries = extractExpandedQueries(task);
|
|
900
|
+
const fallbackKeywords = extractKeywordQueries(task, { allowIntentKeywords: true });
|
|
901
|
+
const queryCandidates = uniqueList([
|
|
902
|
+
...expandedQueries,
|
|
903
|
+
...queries,
|
|
904
|
+
...fallbackKeywords,
|
|
905
|
+
extractFallbackSearchQuery(task),
|
|
906
|
+
]).slice(0, 6);
|
|
907
|
+
const searchResults = await Promise.all(
|
|
908
|
+
queryCandidates.map((query) => smartSearch({ query, cwd: '.', intent: resolvedIntent }))
|
|
909
|
+
);
|
|
910
|
+
const seedMap = new Map();
|
|
911
|
+
|
|
912
|
+
for (let queryIdx = 0; queryIdx < searchResults.length; queryIdx++) {
|
|
913
|
+
const searchResult = searchResults[queryIdx];
|
|
914
|
+
const query = queryCandidates[queryIdx];
|
|
915
|
+
for (let rankIdx = 0; rankIdx < Math.min(searchResult.topFiles.length, 5); rankIdx++) {
|
|
916
|
+
const file = searchResult.topFiles[rankIdx];
|
|
917
|
+
const rel = path.relative(root, file.file).replace(/\\/g, '/');
|
|
918
|
+
const existing = seedMap.get(rel);
|
|
919
|
+
const nextEvidence = dedupeEvidence([
|
|
920
|
+
...(existing?.evidence ?? []),
|
|
921
|
+
{ type: 'searchHit', query, rank: rankIdx + 1 },
|
|
922
|
+
]);
|
|
923
|
+
|
|
924
|
+
if (!existing) {
|
|
925
|
+
seedMap.set(rel, {
|
|
926
|
+
rel,
|
|
927
|
+
absPath: file.file,
|
|
928
|
+
evidence: nextEvidence,
|
|
929
|
+
queryIdx,
|
|
930
|
+
rankIdx,
|
|
931
|
+
});
|
|
932
|
+
continue;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
const better = queryIdx < existing.queryIdx
|
|
936
|
+
|| (queryIdx === existing.queryIdx && rankIdx < existing.rankIdx);
|
|
937
|
+
|
|
938
|
+
seedMap.set(rel, {
|
|
939
|
+
...existing,
|
|
940
|
+
absPath: file.file,
|
|
941
|
+
evidence: nextEvidence,
|
|
942
|
+
...(better ? { queryIdx, rankIdx } : {}),
|
|
943
|
+
});
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
primarySeeds = [...seedMap.values()]
|
|
948
|
+
.sort((a, b) => a.queryIdx - b.queryIdx || a.rankIdx - b.rankIdx || a.rel.localeCompare(b.rel))
|
|
949
|
+
.map(({ queryIdx: _queryIdx, rankIdx: _rankIdx, ...seed }) => seed);
|
|
950
|
+
searchIndexFreshness = mergeIndexFreshness(searchResults.map((result) => result.indexFreshness));
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
if (entryFile) {
|
|
954
|
+
try {
|
|
955
|
+
const abs = resolveSafePath(entryFile);
|
|
956
|
+
if (fs.existsSync(abs)) {
|
|
957
|
+
const rel = path.relative(root, abs).replace(/\\/g, '/');
|
|
958
|
+
const idx = primarySeeds.findIndex((seed) => seed.absPath === abs);
|
|
959
|
+
if (idx >= 0) {
|
|
960
|
+
const updated = {
|
|
961
|
+
...primarySeeds[idx],
|
|
962
|
+
evidence: dedupeEvidence([...(primarySeeds[idx].evidence ?? []), { type: 'entryFile' }]),
|
|
963
|
+
};
|
|
964
|
+
primarySeeds.splice(idx, 1);
|
|
965
|
+
primarySeeds.unshift(updated);
|
|
966
|
+
} else {
|
|
967
|
+
primarySeeds.unshift({ rel, absPath: abs, evidence: [{ type: 'entryFile' }] });
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
} catch { /* invalid path — skip */ }
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
const index = loadIndex(root);
|
|
974
|
+
|
|
975
|
+
primarySeeds = rerankPrimarySeeds(primarySeeds, task, resolvedIntent);
|
|
976
|
+
|
|
977
|
+
const primarySeedsLimited = primarySeeds.slice(0, 5);
|
|
978
|
+
const primaryFiles = primarySeedsLimited.map((seed) => seed.absPath);
|
|
979
|
+
|
|
980
|
+
const indexFreshness = searchIndexFreshness ?? checkIndexFreshness(index, primaryFiles, root);
|
|
981
|
+
|
|
982
|
+
const { files: expanded, neighbors } = expandWithGraph(primarySeedsLimited, index, root);
|
|
983
|
+
const symbolCandidates = extractSymbolCandidates(task);
|
|
984
|
+
attachSymbolEvidence(expanded, index, symbolCandidates);
|
|
985
|
+
normalizePrimaryCandidate(expanded, task, resolvedIntent);
|
|
986
|
+
|
|
987
|
+
const readPlan = allocateReads(expanded, maxTokens, resolvedIntent, detailMode);
|
|
988
|
+
|
|
989
|
+
const context = [];
|
|
990
|
+
let totalRawTokens = 0;
|
|
991
|
+
let totalCompressedTokens = 0;
|
|
992
|
+
const filesWithContent = new Set();
|
|
993
|
+
const pendingReads = [];
|
|
994
|
+
|
|
995
|
+
for (const item of readPlan) {
|
|
996
|
+
const basePayload = buildContextItemPayload(item, index, detailMode);
|
|
997
|
+
const baseTokens = countTokens(JSON.stringify(basePayload));
|
|
998
|
+
if (totalCompressedTokens + baseTokens > maxTokens && context.length > 0) break;
|
|
999
|
+
|
|
1000
|
+
const contextIndex = context.length;
|
|
1001
|
+
context.push(basePayload);
|
|
1002
|
+
totalCompressedTokens += baseTokens;
|
|
1003
|
+
|
|
1004
|
+
if (shouldReadContentForItem(item, basePayload, detailMode, includeSet, resolvedIntent)) {
|
|
1005
|
+
pendingReads.push({ contextIndex, item });
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
if (pendingReads.length > 0) {
|
|
1010
|
+
const batchResults = await smartReadBatch({
|
|
1011
|
+
files: pendingReads.map(({ item }) => ({ path: item.absPath, mode: item.mode })),
|
|
1012
|
+
});
|
|
1013
|
+
|
|
1014
|
+
for (let i = 0; i < pendingReads.length; i++) {
|
|
1015
|
+
const pending = pendingReads[i];
|
|
1016
|
+
const readResult = batchResults.results?.[i];
|
|
1017
|
+
if (!readResult?.content) continue;
|
|
1018
|
+
|
|
1019
|
+
const existing = context[pending.contextIndex];
|
|
1020
|
+
if (!existing) continue;
|
|
1021
|
+
|
|
1022
|
+
const enrichedPayload = buildContextItemPayload(
|
|
1023
|
+
pending.item,
|
|
1024
|
+
index,
|
|
1025
|
+
detailMode,
|
|
1026
|
+
pending.item.mode,
|
|
1027
|
+
readResult.content,
|
|
1028
|
+
);
|
|
1029
|
+
const oldTokens = countTokens(JSON.stringify(existing));
|
|
1030
|
+
const newTokens = countTokens(JSON.stringify(enrichedPayload));
|
|
1031
|
+
const tokenDelta = newTokens - oldTokens;
|
|
1032
|
+
|
|
1033
|
+
if (totalCompressedTokens + tokenDelta > maxTokens && pending.contextIndex > 0) continue;
|
|
1034
|
+
|
|
1035
|
+
context[pending.contextIndex] = enrichedPayload;
|
|
1036
|
+
filesWithContent.add(pending.item.rel);
|
|
1037
|
+
totalRawTokens += readResult.metrics?.rawTokens ?? 0;
|
|
1038
|
+
totalCompressedTokens += tokenDelta;
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
if (includeSet.has('symbolDetail') && symbolCandidates.length > 0 && readPlan.length > 0) {
|
|
1043
|
+
const topPrimary = readPlan.find((p) => p.role === 'primary');
|
|
1044
|
+
if (topPrimary) {
|
|
1045
|
+
try {
|
|
1046
|
+
const symbolResult = await smartRead({
|
|
1047
|
+
filePath: topPrimary.absPath,
|
|
1048
|
+
mode: 'symbol',
|
|
1049
|
+
symbol: symbolCandidates.slice(0, 3),
|
|
1050
|
+
});
|
|
1051
|
+
|
|
1052
|
+
const filtered = filterFoundSymbols(symbolResult.content, symbolCandidates);
|
|
1053
|
+
if (filtered) {
|
|
1054
|
+
const symbolEvidence = dedupeEvidence([{
|
|
1055
|
+
type: 'symbolDetail',
|
|
1056
|
+
symbols: symbolCandidates.slice(0, 3),
|
|
1057
|
+
}]);
|
|
1058
|
+
const symbolPayload = {
|
|
1059
|
+
file: topPrimary.rel,
|
|
1060
|
+
role: 'symbolDetail',
|
|
1061
|
+
readMode: 'symbol',
|
|
1062
|
+
reasonIncluded: formatReasonIncluded(symbolEvidence),
|
|
1063
|
+
evidence: symbolEvidence,
|
|
1064
|
+
content: filtered,
|
|
1065
|
+
};
|
|
1066
|
+
const symbolTokens = countTokens(JSON.stringify(symbolPayload));
|
|
1067
|
+
if (totalCompressedTokens + symbolTokens <= maxTokens) {
|
|
1068
|
+
context.push(symbolPayload);
|
|
1069
|
+
totalCompressedTokens += symbolTokens;
|
|
1070
|
+
|
|
1071
|
+
if (detailMode === 'minimal') {
|
|
1072
|
+
const existingIdx = context.findIndex((c) => c.file === topPrimary.rel && c.role === 'primary');
|
|
1073
|
+
if (existingIdx !== -1) {
|
|
1074
|
+
const existing = context[existingIdx];
|
|
1075
|
+
const signaturesOnly = {
|
|
1076
|
+
...existing,
|
|
1077
|
+
readMode: 'signatures-only',
|
|
1078
|
+
content: '(omitted — see symbolDetail)',
|
|
1079
|
+
};
|
|
1080
|
+
const oldTokens = countTokens(JSON.stringify(existing));
|
|
1081
|
+
const newTokens = countTokens(JSON.stringify(signaturesOnly));
|
|
1082
|
+
context[existingIdx] = signaturesOnly;
|
|
1083
|
+
totalCompressedTokens += newTokens - oldTokens;
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
} catch { /* skip */ }
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
const graphSummary = {
|
|
1093
|
+
primaryImports: [],
|
|
1094
|
+
tests: [],
|
|
1095
|
+
dependents: [],
|
|
1096
|
+
neighbors,
|
|
1097
|
+
};
|
|
1098
|
+
|
|
1099
|
+
for (const [rel, info] of expanded) {
|
|
1100
|
+
if (info.role === 'dependency') graphSummary.primaryImports.push(rel);
|
|
1101
|
+
else if (info.role === 'test') graphSummary.tests.push(rel);
|
|
1102
|
+
else if (info.role === 'dependent') graphSummary.dependents.push(rel);
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
const hints = [];
|
|
1106
|
+
const excludedNeighbors = neighbors.filter((n) => !expanded.has(n));
|
|
1107
|
+
if (excludedNeighbors.length > 0) {
|
|
1108
|
+
hints.push(`${excludedNeighbors.length} neighbor file(s) available: ${excludedNeighbors.slice(0, 3).join(', ')}`);
|
|
1109
|
+
}
|
|
1110
|
+
if (indexFreshness === 'stale') {
|
|
1111
|
+
hints.push('Index is stale — run build_index for better results');
|
|
1112
|
+
}
|
|
1113
|
+
if (indexFreshness === 'unavailable') {
|
|
1114
|
+
hints.push('No symbol index — run build_index for graph expansion and ranking boosts');
|
|
1115
|
+
}
|
|
1116
|
+
if (diff && context.length === 0) {
|
|
1117
|
+
hints.push(diffSummary?.error || 'No changed files found for the given diff ref');
|
|
1118
|
+
}
|
|
1119
|
+
if (context.length > 0 && symbolCandidates.length === 0) {
|
|
1120
|
+
const topCtx = context[0];
|
|
1121
|
+
if (topCtx.symbols?.length) {
|
|
1122
|
+
hints.push(`Inspect symbols with smart_read: ${topCtx.symbols.slice(0, 3).join(', ')}`);
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
const savingsPct = totalRawTokens > 0
|
|
1127
|
+
? Math.round(((totalRawTokens - totalCompressedTokens) / totalRawTokens) * 100)
|
|
1128
|
+
: 0;
|
|
1129
|
+
|
|
1130
|
+
const contentTokens = countTokens(context.map((c) => c.content).join('\n'));
|
|
1131
|
+
const previewTokens = context.reduce((sum, item) => sum + countTokens(JSON.stringify(item.symbolPreviews ?? [])), 0);
|
|
1132
|
+
const indexOnlyItems = context.filter((item) => item.readMode === 'index-only').length;
|
|
1133
|
+
const contentItems = context.filter((item) => typeof item.content === 'string' && item.content.length > 0).length;
|
|
1134
|
+
const primaryItem = context.find((item) => item.role === 'primary');
|
|
1135
|
+
|
|
1136
|
+
await persistMetrics({
|
|
1137
|
+
tool: 'smart_context',
|
|
1138
|
+
target: `${root} :: ${task}`,
|
|
1139
|
+
rawTokens: totalRawTokens,
|
|
1140
|
+
compressedTokens: totalCompressedTokens,
|
|
1141
|
+
savedTokens: Math.max(0, totalRawTokens - totalCompressedTokens),
|
|
1142
|
+
savingsPct,
|
|
1143
|
+
timestamp: new Date().toISOString(),
|
|
1144
|
+
});
|
|
1145
|
+
|
|
1146
|
+
const COVERAGE_RANK = { full: 2, partial: 1, none: 0 };
|
|
1147
|
+
const coverageMin = (vals) => {
|
|
1148
|
+
if (vals.length === 0) return 'none';
|
|
1149
|
+
let min = 2;
|
|
1150
|
+
for (const v of vals) min = Math.min(min, COVERAGE_RANK[v] ?? 0);
|
|
1151
|
+
return ['none', 'partial', 'full'][min];
|
|
1152
|
+
};
|
|
1153
|
+
const uniqueExts = [...new Set(context.map((c) => path.extname(c.file).toLowerCase()))];
|
|
1154
|
+
const perFile = uniqueExts.map((e) => getGraphCoverage(e));
|
|
1155
|
+
|
|
1156
|
+
const graphCov = {
|
|
1157
|
+
imports: coverageMin(perFile.map((c) => c.imports)),
|
|
1158
|
+
tests: coverageMin(perFile.map((c) => c.tests)),
|
|
1159
|
+
};
|
|
1160
|
+
|
|
1161
|
+
const result = {
|
|
1162
|
+
task,
|
|
1163
|
+
intent: resolvedIntent,
|
|
1164
|
+
indexFreshness,
|
|
1165
|
+
confidence: { indexFreshness, graphCoverage: graphCov },
|
|
1166
|
+
context,
|
|
1167
|
+
...(includeSet.has('graph') ? { graph: graphSummary, graphCoverage: graphCov } : {}),
|
|
1168
|
+
metrics: {
|
|
1169
|
+
contentTokens,
|
|
1170
|
+
totalTokens: 0,
|
|
1171
|
+
filesIncluded: new Set(context.map((c) => c.file)).size,
|
|
1172
|
+
filesEvaluated: expanded.size,
|
|
1173
|
+
savingsPct,
|
|
1174
|
+
detailMode,
|
|
1175
|
+
include: [...includeSet],
|
|
1176
|
+
previewTokens,
|
|
1177
|
+
indexOnlyItems,
|
|
1178
|
+
contentItems,
|
|
1179
|
+
primaryReadMode: primaryItem?.readMode ?? null,
|
|
1180
|
+
},
|
|
1181
|
+
...(includeSet.has('hints') ? { hints } : {}),
|
|
1182
|
+
};
|
|
1183
|
+
|
|
1184
|
+
if (diffSummary) {
|
|
1185
|
+
diffSummary.included = context.filter((c) => c.role === 'primary').length;
|
|
1186
|
+
result.diffSummary = diffSummary;
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
result.metrics.totalTokens = countTokens(JSON.stringify(result));
|
|
1190
|
+
|
|
1191
|
+
return result;
|
|
1192
|
+
};
|