scai 0.1.169 → 0.1.171

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,286 @@
1
1
  // File: src/modules/evidenceVerifierStep.ts
2
2
  import fs from "fs";
3
+ import path from "path";
3
4
  import { logInputOutput } from "../utils/promptLogHelper.js";
5
+ const STOPWORDS = new Set([
6
+ "the", "and", "for", "with", "from", "that", "this", "are", "was", "were",
7
+ "has", "have", "had", "not", "but", "can", "could", "should", "would", "into",
8
+ "onto", "about", "above", "below", "under", "over", "then", "else", "when",
9
+ "where", "what", "which", "while", "return", "const", "let", "var", "true",
10
+ "false", "null", "undefined", "new", "set", "get", "in", "to", "of", "on",
11
+ "at", "by"
12
+ ]);
13
+ const WEAK_TOKENS = new Set([
14
+ "file", "line", "move", "update", "change", "modify", "readme", "fix", "code"
15
+ ]);
16
+ const GENERIC_TOKENS = new Set([
17
+ "defined", "define", "location", "where", "find", "code"
18
+ ]);
19
+ const FILE_EXT_REGEX = /\.(ts|tsx|js|jsx|mjs|cjs|md)$/i;
20
+ function clamp(value, min = 0, max = 1) {
21
+ return Math.max(min, Math.min(max, value));
22
+ }
23
+ function escapeRegex(value) {
24
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
25
+ }
26
+ function uniq(values) {
27
+ return Array.from(new Set(values));
28
+ }
29
+ function normalizeToken(token) {
30
+ return token.toLowerCase();
31
+ }
32
+ function stemToken(token) {
33
+ if (token.endsWith("ies") && token.length > 4) {
34
+ return `${token.slice(0, -3)}y`;
35
+ }
36
+ if (token.endsWith("es") && token.length > 4) {
37
+ return token.slice(0, -2);
38
+ }
39
+ if (token.endsWith("s") && token.length > 3) {
40
+ return token.slice(0, -1);
41
+ }
42
+ return token;
43
+ }
44
+ function tokenizeText(text) {
45
+ return (text.match(/\b[a-zA-Z_][a-zA-Z0-9_]{1,}\b/g) ?? []).map(normalizeToken);
46
+ }
47
+ function expandToken(token) {
48
+ const normalized = normalizeToken(token);
49
+ const stemmed = stemToken(normalized);
50
+ const variants = new Set([normalized, stemmed]);
51
+ if (normalized === "database" || stemmed === "database") {
52
+ variants.add("db");
53
+ variants.add("sqlite");
54
+ variants.add("sql");
55
+ variants.add("schema");
56
+ }
57
+ if (normalized === "query" || normalized === "queries" || stemmed === "query") {
58
+ variants.add("sql");
59
+ variants.add("select");
60
+ variants.add("insert");
61
+ variants.add("update");
62
+ variants.add("delete");
63
+ variants.add("template");
64
+ variants.add("templates");
65
+ }
66
+ if (normalized === "defined" || stemmed === "define") {
67
+ variants.add("template");
68
+ variants.add("schema");
69
+ }
70
+ return Array.from(variants).filter((value) => value.length >= 2);
71
+ }
72
+ function buildSnippet(lines, lineIndex) {
73
+ return lines
74
+ .slice(Math.max(0, lineIndex - 1), Math.min(lines.length, lineIndex + 2))
75
+ .join("\n");
76
+ }
77
+ function isCommentLikeOrStringOnly(line) {
78
+ const trimmed = line.trim();
79
+ if (!trimmed)
80
+ return false;
81
+ if (trimmed.startsWith("//") ||
82
+ trimmed.startsWith("/*") ||
83
+ trimmed.startsWith("*") ||
84
+ trimmed.startsWith("#")) {
85
+ return true;
86
+ }
87
+ if (/^['"`].*['"`][,;]?$/.test(trimmed)) {
88
+ return true;
89
+ }
90
+ return false;
91
+ }
92
+ function extractTargets(query) {
93
+ const quotedSentenceTargets = [];
94
+ const quotedRegex = /['"`](.+?)['"`]/g;
95
+ let quoteMatch;
96
+ while ((quoteMatch = quotedRegex.exec(query)) !== null) {
97
+ const target = quoteMatch[1].trim();
98
+ if (target.length >= 3) {
99
+ quotedSentenceTargets.push(target);
100
+ }
101
+ }
102
+ const heuristicSentenceTargets = [];
103
+ if (!quotedSentenceTargets.length) {
104
+ query
105
+ .split(/[\.\n]/)
106
+ .map((segment) => segment.trim())
107
+ .filter((segment) => segment.length > 12)
108
+ .forEach((segment) => heuristicSentenceTargets.push(segment));
109
+ }
110
+ const sentenceTargets = uniq([...quotedSentenceTargets, ...heuristicSentenceTargets]);
111
+ const filenameTargets = uniq(query
112
+ .split(/\s+/)
113
+ .map((word) => word.replace(/^['"`]|[,'"`)]+$/g, "").trim())
114
+ .filter((word) => FILE_EXT_REGEX.test(word)));
115
+ const explicitPathTargets = uniq(filenameTargets
116
+ .filter((target) => target.includes("/") || target.includes("\\"))
117
+ .map((target) => target.replace(/\\/g, "/")));
118
+ const baseNameTargets = uniq(filenameTargets.map((name) => path.basename(name.replace(FILE_EXT_REGEX, ""))));
119
+ const tokens = tokenizeText(query).filter((token) => token.length >= 3);
120
+ const expandedTokens = uniq(tokens.flatMap((token) => expandToken(token)));
121
+ const symbolTargets = uniq(tokens.filter((token) => {
122
+ const normalized = token;
123
+ if (STOPWORDS.has(normalized))
124
+ return false;
125
+ if (quotedSentenceTargets.some((sentence) => sentence.toLowerCase().includes(normalized))) {
126
+ return false;
127
+ }
128
+ if (filenameTargets.includes(token))
129
+ return false;
130
+ if (baseNameTargets.includes(token))
131
+ return false;
132
+ return token.length >= 3;
133
+ }));
134
+ const queryTokenSet = new Set(expandedTokens
135
+ .filter((token) => !STOPWORDS.has(token)));
136
+ const pathSignalTokenSet = new Set(expandedTokens.filter((token) => !STOPWORDS.has(token) &&
137
+ !WEAK_TOKENS.has(token) &&
138
+ !GENERIC_TOKENS.has(token) &&
139
+ token.length >= 2));
140
+ return {
141
+ sentenceTargets,
142
+ filenameTargets,
143
+ explicitPathTargets,
144
+ baseNameTargets,
145
+ symbolTargets,
146
+ queryTokenSet,
147
+ pathSignalTokenSet
148
+ };
149
+ }
150
+ function computeSymbolConfidence(symbol, sentenceTargets) {
151
+ const normalized = normalizeToken(symbol);
152
+ if (sentenceTargets.some((target) => target.toLowerCase().includes(normalized))) {
153
+ return 0.95;
154
+ }
155
+ if (WEAK_TOKENS.has(normalized)) {
156
+ return 0.45;
157
+ }
158
+ return 0.85;
159
+ }
160
+ function dedupeEvidence(items) {
161
+ const bestByKey = new Map();
162
+ for (const item of items) {
163
+ const ev = item.evidence;
164
+ const spanKey = `${ev.span?.startLine ?? 0}:${ev.span?.endLine ?? 0}`;
165
+ const key = `${ev.type}|${ev.claim.toLowerCase()}|${spanKey}`;
166
+ const existing = bestByKey.get(key);
167
+ if (!existing || (ev.confidence ?? 0) > (existing.evidence.confidence ?? 0)) {
168
+ bestByKey.set(key, item);
169
+ }
170
+ }
171
+ return Array.from(bestByKey.values());
172
+ }
173
+ function computeProximityAdjustment(lines) {
174
+ if (lines.length < 2)
175
+ return 0;
176
+ const sorted = uniq(lines.map((line) => String(line))).map(Number).sort((a, b) => a - b);
177
+ let minGap = Number.POSITIVE_INFINITY;
178
+ for (let i = 1; i < sorted.length; i++) {
179
+ minGap = Math.min(minGap, sorted[i] - sorted[i - 1]);
180
+ }
181
+ let adjustment = 0;
182
+ if (minGap <= 2)
183
+ adjustment += 0.08;
184
+ else if (minGap <= 5)
185
+ adjustment += 0.05;
186
+ else if (minGap <= 10)
187
+ adjustment += 0.02;
188
+ else if (minGap > 30)
189
+ adjustment -= 0.06;
190
+ const spread = sorted[sorted.length - 1] - sorted[0];
191
+ if (sorted.length >= 3 && spread > 120) {
192
+ adjustment -= 0.04;
193
+ }
194
+ return adjustment;
195
+ }
196
+ function computeFileConfidence(evidenceWithMeta, symbolTargets) {
197
+ const weights = {
198
+ sentence: 1.0,
199
+ filename: 0.9,
200
+ symbol: 0.7,
201
+ structural: 0.75,
202
+ "keyword-cluster": 0.4
203
+ };
204
+ const caps = {
205
+ sentence: 1.3,
206
+ filename: 0.9,
207
+ symbol: 1.1,
208
+ structural: 1.1,
209
+ "keyword-cluster": 0.5
210
+ };
211
+ const grouped = new Map();
212
+ for (const item of evidenceWithMeta) {
213
+ const type = item.evidence.type;
214
+ if (!grouped.has(type))
215
+ grouped.set(type, []);
216
+ grouped.get(type)?.push(item);
217
+ }
218
+ let baseSum = 0;
219
+ for (const [type, items] of grouped.entries()) {
220
+ const sorted = [...items].sort((a, b) => (b.evidence.confidence ?? 0) - (a.evidence.confidence ?? 0));
221
+ let typeScore = 0;
222
+ sorted.forEach((item, index) => {
223
+ const diminishing = index === 0 ? 1 : 1 / (1 + index * 1.4);
224
+ const quality = clamp(item.evidence.confidence ?? 0.8);
225
+ typeScore += weights[type] * quality * diminishing;
226
+ });
227
+ baseSum += Math.min(typeScore, caps[type]);
228
+ }
229
+ const normalizedBase = clamp(1 - Math.exp(-baseSum / 1.8));
230
+ const distinctTypeCount = new Set(evidenceWithMeta.map((item) => item.evidence.type)).size;
231
+ const coverageBoost = distinctTypeCount <= 1
232
+ ? 0
233
+ : distinctTypeCount === 2
234
+ ? 0.05
235
+ : distinctTypeCount === 3
236
+ ? 0.1
237
+ : 0.14;
238
+ const matchedSymbols = new Set(evidenceWithMeta
239
+ .filter((item) => (item.evidence.type === "symbol" || item.evidence.type === "structural") && item.token)
240
+ .map((item) => normalizeToken(item.token)));
241
+ const tokenCoverageBoost = symbolTargets.length > 0
242
+ ? 0.12 * (matchedSymbols.size / symbolTargets.length)
243
+ : 0;
244
+ const symbolLikeEvidence = evidenceWithMeta.filter((item) => item.evidence.type === "symbol" || item.evidence.type === "structural");
245
+ const weakEvidenceCount = symbolLikeEvidence.filter((item) => item.weakToken).length;
246
+ const genericPenalty = symbolLikeEvidence.length > 0
247
+ ? 0.2 * (weakEvidenceCount / symbolLikeEvidence.length)
248
+ : 0;
249
+ const commentLikeSymbolCount = evidenceWithMeta.filter((item) => item.evidence.type === "symbol" && item.commentLike).length;
250
+ const commentPenalty = symbolLikeEvidence.length > 0
251
+ ? 0.15 * (commentLikeSymbolCount / symbolLikeEvidence.length)
252
+ : 0;
253
+ const proximityLines = evidenceWithMeta
254
+ .map((item) => item.line)
255
+ .filter((line) => line > 0);
256
+ const proximityAdjustment = computeProximityAdjustment(proximityLines);
257
+ const final = clamp(normalizedBase +
258
+ coverageBoost +
259
+ tokenCoverageBoost +
260
+ proximityAdjustment -
261
+ genericPenalty -
262
+ commentPenalty);
263
+ return {
264
+ baseSum,
265
+ normalizedBase,
266
+ coverageBoost,
267
+ tokenCoverageBoost,
268
+ proximityAdjustment,
269
+ genericPenalty,
270
+ commentPenalty,
271
+ final
272
+ };
273
+ }
4
274
  /**
5
275
  * Deterministic evidence verification:
6
- * - Scans candidate files line-by-line for meaningful matches to the query.
7
- * - Filters stopwords and short tokens.
8
- * - Deduplicates symbol evidence per file.
9
- * - Removes low-signal keyword clustering.
10
- * - Strictly leverages structural data (functions, classes, imports/exports) for additional evidence.
276
+ * - Scans candidate files for concrete sentence/symbol/filename/structural matches.
277
+ * - Uses identifier-boundary matching to reduce substring false positives.
278
+ * - Deduplicates evidence globally per file.
279
+ * - Computes confidence with bounded weighted scoring + proximity/coverage/penalties.
11
280
  */
12
281
  export const evidenceVerifierStep = {
13
282
  name: "evidenceVerifier",
14
- description: "Deterministic evidence-first scan over candidate files to populate fileAnalysis, with filename dominance.",
283
+ description: "Deterministic evidence-first scan over candidate files to populate fileAnalysis, with calibrated confidence.",
15
284
  groups: ["analysis"],
16
285
  run: async (input) => {
17
286
  var _a, _b;
@@ -21,326 +290,227 @@ export const evidenceVerifierStep = {
21
290
  throw new Error("[evidenceVerifier] context.analysis is required.");
22
291
  }
23
292
  (_a = context.analysis).fileAnalysis ?? (_a.fileAnalysis = {});
24
- const candidatePaths = [
25
- ...(context.initContext?.relatedFiles ?? []),
26
- ];
27
- const uniquePaths = Array.from(new Set(candidatePaths));
293
+ const uniquePaths = uniq([...(context.initContext?.relatedFiles ?? [])]);
28
294
  if (!uniquePaths.length) {
29
295
  console.warn("[evidenceVerifier] No candidate files to scan.");
30
296
  return { query, data: {} };
31
297
  }
32
- // ----------------- Stopwords -----------------
33
- const STOPWORDS = new Set([
34
- "the", "and", "for", "with", "from", "that",
35
- "this", "are", "was", "were", "has", "have",
36
- "had", "not", "but", "can", "could", "should",
37
- "would", "into", "onto", "about", "above",
38
- "below", "under", "over", "then", "else",
39
- "when", "where", "what", "which", "while",
40
- "return", "const", "let", "var", "true", "false",
41
- "null", "undefined", "new", "set", "get",
42
- "in", "to", "of", "on", "at", "by"
43
- ]);
44
- // ----------------- Parse query for targets -----------------
45
- const sentenceTargets = [];
46
- const quoteRegex = /['"`](.+?)['"`]/g;
47
- let match;
48
- while ((match = quoteRegex.exec(query)) !== null) {
49
- sentenceTargets.push(match[1]);
50
- }
51
- if (!sentenceTargets.length) {
52
- const heuristicSentences = query
53
- .split(/[\.\n]/)
54
- .map(s => s.trim())
55
- .filter(s => s.length > 10);
56
- heuristicSentences.forEach(s => {
57
- if (!sentenceTargets.includes(s)) {
58
- sentenceTargets.push(s);
59
- }
60
- });
61
- }
62
- const filenameTargets = query
63
- .split(/\s+/)
64
- .map(word => word.replace(/['",]/g, ''))
65
- .filter(w => w.match(/\.(ts|js|tsx|md)$/));
66
- const baseNameTargets = filenameTargets.map(t => t.replace(/\.(ts|js|tsx|md)$/, ''));
67
- // ---- Symbol extraction (filtered + deduplicated) ----
68
- const symbolTargets = [];
69
- const symbolRegex = /\b([a-zA-Z_]\w{2,})(?:\(\))?\b/g;
70
- let symMatch;
71
- while ((symMatch = symbolRegex.exec(query)) !== null) {
72
- const token = symMatch[1];
73
- if (token.length >= 3 &&
74
- !STOPWORDS.has(token.toLowerCase()) &&
75
- !sentenceTargets.includes(token)) {
76
- symbolTargets.push(token);
77
- }
78
- }
79
- const uniqueSymbolTargets = Array.from(new Set(symbolTargets));
80
- // ----------------- Token strength tiering -----------------
81
- const WEAK_TOKENS = new Set([
82
- "file",
83
- "line",
84
- "move",
85
- "update",
86
- "change",
87
- "modify",
88
- "readme"
89
- ]);
90
- function computeSymbolConfidence(sym) {
91
- const lower = sym.toLowerCase();
92
- // If symbol appears inside quoted sentence → very strong
93
- const fromQuoted = sentenceTargets.some(s => s.toLowerCase().includes(lower));
94
- if (fromQuoted)
95
- return 0.95;
96
- // Weak structural tokens → low weight
97
- if (WEAK_TOKENS.has(lower))
98
- return 0.5;
99
- // Default meaningful symbol
100
- return 0.85;
101
- }
102
- // ----------------- Process each file -----------------
103
- for (const path of uniquePaths) {
104
- let code = null;
298
+ const { sentenceTargets, filenameTargets, explicitPathTargets, baseNameTargets, symbolTargets, queryTokenSet, pathSignalTokenSet } = extractTargets(query);
299
+ for (const filePath of uniquePaths) {
300
+ let code = "";
105
301
  try {
106
- code = fs.readFileSync(path, "utf-8");
302
+ code = fs.readFileSync(filePath, "utf-8");
107
303
  }
108
304
  catch (err) {
109
- console.warn(`[evidenceVerifier] Failed to read ${path}: ${err.message}`);
305
+ console.warn(`[evidenceVerifier] Failed to read ${filePath}: ${err.message}`);
110
306
  }
111
- const lines = code?.split("\n") ?? [];
112
- const evidenceItems = [];
113
- const matchedLines = [];
114
- const addedSymbols = new Set();
115
- // -------- Sentence matches --------
307
+ const lines = code ? code.split("\n") : [];
308
+ const rawEvidence = [];
309
+ const addEvidence = (evidence, line, token, commentLike) => {
310
+ rawEvidence.push({
311
+ evidence,
312
+ line,
313
+ token,
314
+ weakToken: token ? WEAK_TOKENS.has(normalizeToken(token)) : false,
315
+ commentLike
316
+ });
317
+ };
318
+ const loweredSentenceTargets = sentenceTargets.map((target) => target.toLowerCase());
116
319
  lines.forEach((line, idx) => {
117
- sentenceTargets.forEach(target => {
118
- if (line.includes(target)) {
119
- const snippet = lines
120
- .slice(Math.max(0, idx - 1), Math.min(lines.length, idx + 2))
121
- .join("\n");
122
- evidenceItems.push({
123
- claim: `Sentence match: "${target}"`,
320
+ const loweredLine = line.toLowerCase();
321
+ loweredSentenceTargets.forEach((target, targetIndex) => {
322
+ if (target.length >= 3 && loweredLine.includes(target)) {
323
+ const originalTarget = sentenceTargets[targetIndex];
324
+ addEvidence({
325
+ claim: `Sentence match: "${originalTarget}"`,
124
326
  type: "sentence",
125
- excerpt: snippet,
327
+ excerpt: buildSnippet(lines, idx),
126
328
  span: { startLine: idx + 1, endLine: idx + 1 },
127
- confidence: 1,
128
- });
129
- matchedLines.push(line);
329
+ confidence: 1
330
+ }, idx + 1);
130
331
  }
131
332
  });
132
333
  });
133
- // -------- Symbol matches --------
134
- uniqueSymbolTargets.forEach(sym => {
334
+ for (const symbol of symbolTargets) {
335
+ const regex = new RegExp(`\\b${escapeRegex(symbol)}\\b`, "i");
135
336
  for (let idx = 0; idx < lines.length; idx++) {
136
337
  const line = lines[idx];
137
- if (line.includes(`function ${sym}`) ||
138
- line.includes(`class ${sym}`) ||
139
- line.includes(sym)) {
140
- if (!addedSymbols.has(sym)) {
141
- addedSymbols.add(sym);
142
- const snippet = lines
143
- .slice(Math.max(0, idx - 1), Math.min(lines.length, idx + 2))
144
- .join("\n");
145
- evidenceItems.push({
146
- claim: `Symbol reference found: "${sym}"`,
147
- type: "symbol",
148
- excerpt: snippet,
149
- span: { startLine: idx + 1, endLine: idx + 1 },
150
- confidence: computeSymbolConfidence(sym),
151
- });
152
- matchedLines.push(line);
153
- }
154
- break;
155
- }
338
+ if (!regex.test(line))
339
+ continue;
340
+ addEvidence({
341
+ claim: `Symbol reference found: "${symbol}"`,
342
+ type: "symbol",
343
+ excerpt: buildSnippet(lines, idx),
344
+ span: { startLine: idx + 1, endLine: idx + 1 },
345
+ confidence: computeSymbolConfidence(symbol, sentenceTargets)
346
+ }, idx + 1, symbol, isCommentLikeOrStringOnly(line));
347
+ break;
156
348
  }
157
- });
158
- // -------- Filename-level evidence --------
159
- const fullFileName = path.split("/").pop() ?? "";
160
- const baseFileName = fullFileName.replace(/\.(ts|js|tsx|md)$/, "");
161
- if (filenameTargets.includes(fullFileName) ||
162
- baseNameTargets.includes(baseFileName)) {
163
- evidenceItems.push({
164
- claim: `Filename matches query target: "${fullFileName}"`,
349
+ }
350
+ const normalizedFilePath = filePath.replace(/\\/g, "/");
351
+ const fullFileName = path.basename(filePath);
352
+ const baseFileName = fullFileName.replace(FILE_EXT_REGEX, "");
353
+ const fileNameTargetNames = new Set(filenameTargets.map((target) => path.basename(target)));
354
+ const exactPathMatch = explicitPathTargets.some((target) => normalizedFilePath === target ||
355
+ normalizedFilePath.endsWith(`/${target}`));
356
+ const fileNameMatch = exactPathMatch ||
357
+ fileNameTargetNames.has(fullFileName) ||
358
+ baseNameTargets.includes(baseFileName);
359
+ if (fileNameMatch) {
360
+ addEvidence({
361
+ claim: exactPathMatch
362
+ ? `File path exactly matches query target: "${fullFileName}"`
363
+ : `Filename matches query target: "${fullFileName}"`,
165
364
  type: "filename",
166
- excerpt: `Path: ${path}`,
365
+ excerpt: `Path: ${filePath}`,
167
366
  span: { startLine: 0, endLine: 0 },
168
- confidence: 1,
169
- });
367
+ confidence: 1
368
+ }, 0, fullFileName);
369
+ }
370
+ const filePathTokens = new Set(tokenizeText(filePath).flatMap((token) => expandToken(token)));
371
+ const matchingPathTokens = Array.from(pathSignalTokenSet).filter((token) => filePathTokens.has(token));
372
+ if (matchingPathTokens.length > 0) {
373
+ const pathConfidence = clamp(0.5 + matchingPathTokens.length * 0.08, 0.5, 0.86);
374
+ addEvidence({
375
+ claim: `Path tokens align with query intent: ${matchingPathTokens.slice(0, 5).join(", ")}`,
376
+ type: "keyword-cluster",
377
+ excerpt: `Path: ${filePath}`,
378
+ span: { startLine: 0, endLine: 0 },
379
+ confidence: pathConfidence
380
+ }, 0, matchingPathTokens[0]);
170
381
  }
171
- // -------- Structural evidence (strict) --------
172
- const struct = context.analysis.fileAnalysis[path]?.structural;
382
+ const struct = context.analysis.fileAnalysis[filePath]?.structural;
173
383
  const structuralEvidence = [];
174
384
  if (struct) {
175
- const queryTokens = query
176
- .toLowerCase()
177
- .match(/\b\w{3,}\b/g) ?? [];
178
- const querySet = new Set(queryTokens);
179
- (struct.functions ?? []).forEach(fn => {
180
- if (fn.name && querySet.has(fn.name.toLowerCase())) {
181
- const ev = {
182
- claim: `Function name matches query: "${fn.name}"`,
183
- type: "structural",
184
- excerpt: fn.name,
185
- span: { startLine: fn.start ?? 0, endLine: fn.end ?? 0 },
186
- confidence: 0.85,
187
- };
188
- evidenceItems.push(ev);
189
- structuralEvidence.push(ev);
190
- }
385
+ (struct.functions ?? []).forEach((fn) => {
386
+ if (!fn.name)
387
+ return;
388
+ const normalized = normalizeToken(fn.name);
389
+ if (!queryTokenSet.has(normalized))
390
+ return;
391
+ const ev = {
392
+ claim: `Function name matches query: "${fn.name}"`,
393
+ type: "structural",
394
+ excerpt: fn.name,
395
+ span: { startLine: fn.start ?? 0, endLine: fn.end ?? 0 },
396
+ confidence: WEAK_TOKENS.has(normalized) ? 0.6 : 0.85
397
+ };
398
+ structuralEvidence.push(ev);
399
+ addEvidence(ev, fn.start ?? 0, fn.name);
191
400
  });
192
- (struct.classes ?? []).forEach(cls => {
193
- if (cls.name && querySet.has(cls.name.toLowerCase())) {
194
- const ev = {
195
- claim: `Class name matches query: "${cls.name}"`,
196
- type: "structural",
197
- excerpt: cls.name,
198
- span: { startLine: cls.start ?? 0, endLine: cls.end ?? 0 },
199
- confidence: 0.85,
200
- };
201
- evidenceItems.push(ev);
202
- structuralEvidence.push(ev);
203
- }
401
+ (struct.classes ?? []).forEach((cls) => {
402
+ if (!cls.name)
403
+ return;
404
+ const normalized = normalizeToken(cls.name);
405
+ if (!queryTokenSet.has(normalized))
406
+ return;
407
+ const ev = {
408
+ claim: `Class name matches query: "${cls.name}"`,
409
+ type: "structural",
410
+ excerpt: cls.name,
411
+ span: { startLine: cls.start ?? 0, endLine: cls.end ?? 0 },
412
+ confidence: WEAK_TOKENS.has(normalized) ? 0.6 : 0.85
413
+ };
414
+ structuralEvidence.push(ev);
415
+ addEvidence(ev, cls.start ?? 0, cls.name);
204
416
  });
205
- [...(struct.imports ?? []), ...(struct.exports ?? [])].forEach(sym => {
206
- if (sym && querySet.has(sym.toLowerCase())) {
207
- const ev = {
208
- claim: `Import/Export matches query: "${sym}"`,
209
- type: "structural",
210
- excerpt: sym,
211
- span: { startLine: 0, endLine: 0 },
212
- confidence: 0.85,
213
- };
214
- evidenceItems.push(ev);
215
- structuralEvidence.push(ev);
216
- }
417
+ [...(struct.imports ?? []), ...(struct.exports ?? [])].forEach((symbol) => {
418
+ if (!symbol)
419
+ return;
420
+ const normalized = normalizeToken(symbol);
421
+ if (!queryTokenSet.has(normalized))
422
+ return;
423
+ const ev = {
424
+ claim: `Import/Export matches query: "${symbol}"`,
425
+ type: "structural",
426
+ excerpt: symbol,
427
+ span: { startLine: 0, endLine: 0 },
428
+ confidence: WEAK_TOKENS.has(normalized) ? 0.55 : 0.8
429
+ };
430
+ structuralEvidence.push(ev);
431
+ addEvidence(ev, 0, symbol);
217
432
  });
218
- // -------- Log structural evidence per file --------
219
433
  if (structuralEvidence.length > 0) {
220
434
  logInputOutput("evidenceVerifier", "output", {
221
- file: path,
435
+ file: filePath,
222
436
  count: structuralEvidence.length,
223
- examples: structuralEvidence.slice(0, 5).map(ev => ({
437
+ examples: structuralEvidence.slice(0, 5).map((ev) => ({
224
438
  claim: ev.claim,
225
439
  excerpt: ev.excerpt,
226
- confidence: ev.confidence,
227
- })),
440
+ confidence: ev.confidence
441
+ }))
228
442
  });
229
443
  }
230
444
  }
231
- // -------- Structural evidence (strict) --------
232
- if (struct) {
233
- const queryTokens = query
234
- .toLowerCase()
235
- .match(/\b\w{3,}\b/g) ?? [];
236
- const querySet = new Set(queryTokens);
237
- (struct.functions ?? []).forEach(fn => {
238
- if (fn.name && querySet.has(fn.name.toLowerCase())) {
239
- evidenceItems.push({
240
- claim: `Function name matches query: "${fn.name}"`,
241
- type: "structural",
242
- excerpt: fn.name,
243
- span: { startLine: fn.start ?? 0, endLine: fn.end ?? 0 },
244
- confidence: 0.85,
245
- });
246
- }
247
- });
248
- (struct.classes ?? []).forEach(cls => {
249
- if (cls.name && querySet.has(cls.name.toLowerCase())) {
250
- evidenceItems.push({
251
- claim: `Class name matches query: "${cls.name}"`,
252
- type: "structural",
253
- excerpt: cls.name,
254
- span: { startLine: cls.start ?? 0, endLine: cls.end ?? 0 },
255
- confidence: 0.85,
256
- });
257
- }
258
- });
259
- [...(struct.imports ?? []), ...(struct.exports ?? [])].forEach(sym => {
260
- if (sym && querySet.has(sym.toLowerCase())) {
261
- evidenceItems.push({
262
- claim: `Import/Export matches query: "${sym}"`,
263
- type: "structural",
264
- excerpt: sym,
265
- span: { startLine: 0, endLine: 0 },
266
- confidence: 0.85,
267
- });
268
- }
269
- });
270
- }
271
- // -------- Compute file-level confidence --------
272
- let fileScore = 0;
273
- for (const ev of evidenceItems) {
274
- if (ev.type === "sentence")
275
- fileScore += 1.0;
276
- else if (ev.type === "filename")
277
- fileScore += 1.0;
278
- else if (ev.type === "symbol" || ev.type === "structural")
279
- fileScore += ev.confidence ?? 0.8;
280
- }
281
- const fileConfidence = fileScore === 0
282
- ? 0
283
- : Math.min(1, fileScore / 3);
284
- const isFocusFile = context.analysis.focus?.selectedFiles?.includes(path) ?? false;
445
+ const dedupedEvidence = dedupeEvidence(rawEvidence);
446
+ const evidenceItems = dedupedEvidence.map((item) => item.evidence);
447
+ const score = computeFileConfidence(dedupedEvidence, symbolTargets);
448
+ const hasFilenameHit = evidenceItems.some((ev) => ev.type === "filename");
449
+ const fileConfidence = hasFilenameHit ? 1 : score.final;
450
+ const matchedLines = uniq(dedupedEvidence
451
+ .map((item) => (item.line > 0 ? lines[item.line - 1] : ""))
452
+ .filter(Boolean));
453
+ const isFocusFile = context.analysis.focus?.selectedFiles?.includes(filePath) ?? false;
285
454
  const hasEvidence = evidenceItems.length > 0;
286
- // -------- Merge into fileAnalysis --------
287
- if (isFocusFile || hasEvidence) {
455
+ const isRelevantByConfidence = fileConfidence >= 0.25;
456
+ const isRelevant = isFocusFile || isRelevantByConfidence;
457
+ if (isRelevant && hasEvidence) {
288
458
  const confidenceLabel = fileConfidence.toFixed(2);
289
- context.analysis.fileAnalysis[path] = {
290
- ...context.analysis.fileAnalysis[path],
459
+ context.analysis.fileAnalysis[filePath] = {
460
+ ...context.analysis.fileAnalysis[filePath],
291
461
  intent: "relevant",
292
- relevanceExplanation: `[confidence:${confidenceLabel}] ${evidenceItems.length} evidence item(s) match the query${isFocusFile ? " (focus file already selected)" : ""}`,
462
+ relevanceExplanation: `[confidence:${confidenceLabel}] ${evidenceItems.length} evidence item(s) match the query${isFocusFile ? " (focus file already selected)" : ""}; components base=${score.normalizedBase.toFixed(2)}, coverage=${(score.coverageBoost + score.tokenCoverageBoost).toFixed(2)}, proximity=${score.proximityAdjustment.toFixed(2)}, penalties=${(score.genericPenalty + score.commentPenalty).toFixed(2)}`,
293
463
  role: "primary",
294
464
  action: {
295
465
  isRelevant: true,
296
- shouldModify: hasEvidence,
466
+ shouldModify: hasEvidence
297
467
  },
298
468
  proposedChanges: hasEvidence
299
469
  ? {
300
470
  summary: "Evidence found in file",
301
471
  scope: "minor",
302
- targets: matchedLines.length
303
- ? Array.from(new Set(matchedLines))
304
- : undefined,
472
+ targets: matchedLines.length ? matchedLines : undefined,
473
+ rationale: `calibrated-confidence=${confidenceLabel}`
305
474
  }
306
475
  : {
307
476
  summary: "No evidence found",
308
- scope: "none",
477
+ scope: "none"
309
478
  },
310
479
  semanticAnalyzed: false,
311
480
  risks: hasEvidence
312
481
  ? []
313
482
  : ["No concrete evidence found; modification not permitted"],
314
- evidence: evidenceItems,
483
+ evidence: evidenceItems
315
484
  };
316
485
  }
317
486
  else {
318
- (_b = context.analysis.fileAnalysis)[path] || (_b[path] = {
487
+ (_b = context.analysis.fileAnalysis)[filePath] || (_b[filePath] = {
488
+ intent: "irrelevant",
319
489
  action: { isRelevant: false, shouldModify: false },
320
490
  proposedChanges: {
321
491
  summary: "No evidence found",
322
- scope: "none",
492
+ scope: "none"
323
493
  },
324
- semanticAnalyzed: false,
494
+ semanticAnalyzed: false
325
495
  });
326
496
  }
327
497
  }
328
498
  const output = {
329
499
  query,
330
- data: { fileAnalysis: context.analysis.fileAnalysis },
500
+ data: { fileAnalysis: context.analysis.fileAnalysis }
331
501
  };
332
- const logSummary = Object.entries(context.analysis.fileAnalysis).map(([path, analysis]) => {
502
+ const logSummary = Object.entries(context.analysis.fileAnalysis).map(([filePath, analysis]) => {
333
503
  const evidenceCount = analysis.evidence?.length ?? 0;
334
504
  const confidenceMatch = analysis.relevanceExplanation?.match(/\[confidence:(\d+\.\d+)\]/);
335
505
  const confidence = confidenceMatch?.[1] ?? "0.00";
336
506
  return {
337
- file: path,
507
+ file: filePath,
338
508
  confidence,
339
509
  evidenceCount,
340
- isRelevant: analysis.action?.isRelevant ?? false,
510
+ isRelevant: analysis.action?.isRelevant ?? false
341
511
  };
342
512
  });
343
513
  logInputOutput("evidenceVerifier", "output", logSummary);
344
514
  return output;
345
- },
515
+ }
346
516
  };
package/dist/index.js CHANGED
@@ -18,6 +18,7 @@ const program = cmdFactory();
18
18
  const customCommands = {};
19
19
  // ---------------- Test Queries ----------------
20
20
  const testQueries = [
21
+ // General Questions
21
22
  'please write me comprehensive comments for semanticAnalysisModule.ts and typescript.ts files',
22
23
  'refactor mainagent to improve readability and reduce nesting',
23
24
  'explain the intent and architecture of the semantic analysis module',
@@ -34,7 +35,39 @@ const testQueries = [
34
35
  'How do I run the test suite?',
35
36
  'Are there any flaky tests in this repo?',
36
37
  'Are there any security vulnerabilities in our dependencies?',
37
- 'Is there any dead code we can safely remove?'
38
+ 'Is there any dead code we can safely remove?',
39
+ // Code Quality & Refactoring
40
+ 'Identify and suggest improvements for code smells in the auth module',
41
+ 'How can we improve the performance of the data processing pipeline?',
42
+ 'What are the best practices for handling asynchronous operations in this codebase?',
43
+ 'Suggest refactoring strategies for reducing cyclomatic complexity in the core service',
44
+ 'How can we improve test coverage for the API layer?',
45
+ 'What are the common anti-patterns in the current logging implementation?',
46
+ // Architecture & Design
47
+ 'Explain the layered architecture and how components interact with each other',
48
+ 'How is the dependency inversion principle applied in this codebase?',
49
+ 'What architectural patterns are evident in the current implementation?',
50
+ 'How is state managed across different modules?',
51
+ 'What are the trade-offs of using a monorepo vs multiple repos?',
52
+ 'How does the codebase handle cross-cutting concerns like logging and caching?',
53
+ // Dependency Management
54
+ 'Analyze the dependency tree for potential version conflicts',
55
+ 'How can we reduce the bundle size of the frontend application?',
56
+ 'What are the security implications of the current dependency versions?',
57
+ 'How are third-party libraries integrated and managed?',
58
+ 'What is the strategy for handling transitive dependencies?',
59
+ 'How do we ensure dependency updates don\'t break existing functionality?',
60
+ // Advanced TypeScript Features
61
+ 'Explain how generics are used throughout the codebase and suggest improvements',
62
+ 'How are discriminated unions implemented in this project?',
63
+ 'What are the benefits of using mapped types in the current code?',
64
+ 'How are conditional types utilized in the type system?',
65
+ 'What are the best practices for using decorators in this codebase?',
66
+ 'How does the project leverage TypeScript\'s type inference capabilities?',
67
+ 'Explain the usage of utility types like Partial, Pick, Omit, etc.',
68
+ 'How are module declarations and ambient types used?',
69
+ 'What are the common pitfalls when working with TypeScript types?',
70
+ 'How does the codebase handle type narrowing and assertion functions?'
38
71
  ];
39
72
  // ---------------- Helpers ----------------
40
73
  function pickRandom(items) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scai",
3
- "version": "0.1.169",
3
+ "version": "0.1.171",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "scai": "./dist/index.js"