npm - scai - Versions diffs - 0.1.169 → 0.1.171 - Mend

scai 0.1.169 → 0.1.171

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/agents/evidenceVerifierStep.js +423 -253
package/dist/index.js +34 -1
package/package.json +1 -1

package/dist/agents/evidenceVerifierStep.js CHANGED Viewed

@@ -1,17 +1,286 @@
 // File: src/modules/evidenceVerifierStep.ts
 import fs from "fs";
+import path from "path";
 import { logInputOutput } from "../utils/promptLogHelper.js";
+const STOPWORDS = new Set([
+    "the", "and", "for", "with", "from", "that", "this", "are", "was", "were",
+    "has", "have", "had", "not", "but", "can", "could", "should", "would", "into",
+    "onto", "about", "above", "below", "under", "over", "then", "else", "when",
+    "where", "what", "which", "while", "return", "const", "let", "var", "true",
+    "false", "null", "undefined", "new", "set", "get", "in", "to", "of", "on",
+    "at", "by"
+]);
+const WEAK_TOKENS = new Set([
+    "file", "line", "move", "update", "change", "modify", "readme", "fix", "code"
+]);
+const GENERIC_TOKENS = new Set([
+    "defined", "define", "location", "where", "find", "code"
+]);
+const FILE_EXT_REGEX = /\.(ts|tsx|js|jsx|mjs|cjs|md)$/i;
+function clamp(value, min = 0, max = 1) {
+    return Math.max(min, Math.min(max, value));
+}
+function escapeRegex(value) {
+    return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+function uniq(values) {
+    return Array.from(new Set(values));
+}
+function normalizeToken(token) {
+    return token.toLowerCase();
+}
+function stemToken(token) {
+    if (token.endsWith("ies") && token.length > 4) {
+        return `${token.slice(0, -3)}y`;
+    }
+    if (token.endsWith("es") && token.length > 4) {
+        return token.slice(0, -2);
+    }
+    if (token.endsWith("s") && token.length > 3) {
+        return token.slice(0, -1);
+    }
+    return token;
+}
+function tokenizeText(text) {
+    return (text.match(/\b[a-zA-Z_][a-zA-Z0-9_]{1,}\b/g) ?? []).map(normalizeToken);
+}
+function expandToken(token) {
+    const normalized = normalizeToken(token);
+    const stemmed = stemToken(normalized);
+    const variants = new Set([normalized, stemmed]);
+    if (normalized === "database" || stemmed === "database") {
+        variants.add("db");
+        variants.add("sqlite");
+        variants.add("sql");
+        variants.add("schema");
+    }
+    if (normalized === "query" || normalized === "queries" || stemmed === "query") {
+        variants.add("sql");
+        variants.add("select");
+        variants.add("insert");
+        variants.add("update");
+        variants.add("delete");
+        variants.add("template");
+        variants.add("templates");
+    }
+    if (normalized === "defined" || stemmed === "define") {
+        variants.add("template");
+        variants.add("schema");
+    }
+    return Array.from(variants).filter((value) => value.length >= 2);
+}
+function buildSnippet(lines, lineIndex) {
+    return lines
+        .slice(Math.max(0, lineIndex - 1), Math.min(lines.length, lineIndex + 2))
+        .join("\n");
+}
+function isCommentLikeOrStringOnly(line) {
+    const trimmed = line.trim();
+    if (!trimmed)
+        return false;
+    if (trimmed.startsWith("//") ||
+        trimmed.startsWith("/*") ||
+        trimmed.startsWith("*") ||
+        trimmed.startsWith("#")) {
+        return true;
+    }
+    if (/^['"`].*['"`][,;]?$/.test(trimmed)) {
+        return true;
+    }
+    return false;
+}
+function extractTargets(query) {
+    const quotedSentenceTargets = [];
+    const quotedRegex = /['"`](.+?)['"`]/g;
+    let quoteMatch;
+    while ((quoteMatch = quotedRegex.exec(query)) !== null) {
+        const target = quoteMatch[1].trim();
+        if (target.length >= 3) {
+            quotedSentenceTargets.push(target);
+        }
+    }
+    const heuristicSentenceTargets = [];
+    if (!quotedSentenceTargets.length) {
+        query
+            .split(/[\.\n]/)
+            .map((segment) => segment.trim())
+            .filter((segment) => segment.length > 12)
+            .forEach((segment) => heuristicSentenceTargets.push(segment));
+    }
+    const sentenceTargets = uniq([...quotedSentenceTargets, ...heuristicSentenceTargets]);
+    const filenameTargets = uniq(query
+        .split(/\s+/)
+        .map((word) => word.replace(/^['"`]|[,'"`)]+$/g, "").trim())
+        .filter((word) => FILE_EXT_REGEX.test(word)));
+    const explicitPathTargets = uniq(filenameTargets
+        .filter((target) => target.includes("/") || target.includes("\\"))
+        .map((target) => target.replace(/\\/g, "/")));
+    const baseNameTargets = uniq(filenameTargets.map((name) => path.basename(name.replace(FILE_EXT_REGEX, ""))));
+    const tokens = tokenizeText(query).filter((token) => token.length >= 3);
+    const expandedTokens = uniq(tokens.flatMap((token) => expandToken(token)));
+    const symbolTargets = uniq(tokens.filter((token) => {
+        const normalized = token;
+        if (STOPWORDS.has(normalized))
+            return false;
+        if (quotedSentenceTargets.some((sentence) => sentence.toLowerCase().includes(normalized))) {
+            return false;
+        }
+        if (filenameTargets.includes(token))
+            return false;
+        if (baseNameTargets.includes(token))
+            return false;
+        return token.length >= 3;
+    }));
+    const queryTokenSet = new Set(expandedTokens
+        .filter((token) => !STOPWORDS.has(token)));
+    const pathSignalTokenSet = new Set(expandedTokens.filter((token) => !STOPWORDS.has(token) &&
+        !WEAK_TOKENS.has(token) &&
+        !GENERIC_TOKENS.has(token) &&
+        token.length >= 2));
+    return {
+        sentenceTargets,
+        filenameTargets,
+        explicitPathTargets,
+        baseNameTargets,
+        symbolTargets,
+        queryTokenSet,
+        pathSignalTokenSet
+    };
+}
+function computeSymbolConfidence(symbol, sentenceTargets) {
+    const normalized = normalizeToken(symbol);
+    if (sentenceTargets.some((target) => target.toLowerCase().includes(normalized))) {
+        return 0.95;
+    }
+    if (WEAK_TOKENS.has(normalized)) {
+        return 0.45;
+    }
+    return 0.85;
+}
+function dedupeEvidence(items) {
+    const bestByKey = new Map();
+    for (const item of items) {
+        const ev = item.evidence;
+        const spanKey = `${ev.span?.startLine ?? 0}:${ev.span?.endLine ?? 0}`;
+        const key = `${ev.type}|${ev.claim.toLowerCase()}|${spanKey}`;
+        const existing = bestByKey.get(key);
+        if (!existing || (ev.confidence ?? 0) > (existing.evidence.confidence ?? 0)) {
+            bestByKey.set(key, item);
+        }
+    }
+    return Array.from(bestByKey.values());
+}
+function computeProximityAdjustment(lines) {
+    if (lines.length < 2)
+        return 0;
+    const sorted = uniq(lines.map((line) => String(line))).map(Number).sort((a, b) => a - b);
+    let minGap = Number.POSITIVE_INFINITY;
+    for (let i = 1; i < sorted.length; i++) {
+        minGap = Math.min(minGap, sorted[i] - sorted[i - 1]);
+    }
+    let adjustment = 0;
+    if (minGap <= 2)
+        adjustment += 0.08;
+    else if (minGap <= 5)
+        adjustment += 0.05;
+    else if (minGap <= 10)
+        adjustment += 0.02;
+    else if (minGap > 30)
+        adjustment -= 0.06;
+    const spread = sorted[sorted.length - 1] - sorted[0];
+    if (sorted.length >= 3 && spread > 120) {
+        adjustment -= 0.04;
+    }
+    return adjustment;
+}
+function computeFileConfidence(evidenceWithMeta, symbolTargets) {
+    const weights = {
+        sentence: 1.0,
+        filename: 0.9,
+        symbol: 0.7,
+        structural: 0.75,
+        "keyword-cluster": 0.4
+    };
+    const caps = {
+        sentence: 1.3,
+        filename: 0.9,
+        symbol: 1.1,
+        structural: 1.1,
+        "keyword-cluster": 0.5
+    };
+    const grouped = new Map();
+    for (const item of evidenceWithMeta) {
+        const type = item.evidence.type;
+        if (!grouped.has(type))
+            grouped.set(type, []);
+        grouped.get(type)?.push(item);
+    }
+    let baseSum = 0;
+    for (const [type, items] of grouped.entries()) {
+        const sorted = [...items].sort((a, b) => (b.evidence.confidence ?? 0) - (a.evidence.confidence ?? 0));
+        let typeScore = 0;
+        sorted.forEach((item, index) => {
+            const diminishing = index === 0 ? 1 : 1 / (1 + index * 1.4);
+            const quality = clamp(item.evidence.confidence ?? 0.8);
+            typeScore += weights[type] * quality * diminishing;
+        });
+        baseSum += Math.min(typeScore, caps[type]);
+    }
+    const normalizedBase = clamp(1 - Math.exp(-baseSum / 1.8));
+    const distinctTypeCount = new Set(evidenceWithMeta.map((item) => item.evidence.type)).size;
+    const coverageBoost = distinctTypeCount <= 1
+        ? 0
+        : distinctTypeCount === 2
+            ? 0.05
+            : distinctTypeCount === 3
+                ? 0.1
+                : 0.14;
+    const matchedSymbols = new Set(evidenceWithMeta
+        .filter((item) => (item.evidence.type === "symbol" || item.evidence.type === "structural") && item.token)
+        .map((item) => normalizeToken(item.token)));
+    const tokenCoverageBoost = symbolTargets.length > 0
+        ? 0.12 * (matchedSymbols.size / symbolTargets.length)
+        : 0;
+    const symbolLikeEvidence = evidenceWithMeta.filter((item) => item.evidence.type === "symbol" || item.evidence.type === "structural");
+    const weakEvidenceCount = symbolLikeEvidence.filter((item) => item.weakToken).length;
+    const genericPenalty = symbolLikeEvidence.length > 0
+        ? 0.2 * (weakEvidenceCount / symbolLikeEvidence.length)
+        : 0;
+    const commentLikeSymbolCount = evidenceWithMeta.filter((item) => item.evidence.type === "symbol" && item.commentLike).length;
+    const commentPenalty = symbolLikeEvidence.length > 0
+        ? 0.15 * (commentLikeSymbolCount / symbolLikeEvidence.length)
+        : 0;
+    const proximityLines = evidenceWithMeta
+        .map((item) => item.line)
+        .filter((line) => line > 0);
+    const proximityAdjustment = computeProximityAdjustment(proximityLines);
+    const final = clamp(normalizedBase +
+        coverageBoost +
+        tokenCoverageBoost +
+        proximityAdjustment -
+        genericPenalty -
+        commentPenalty);
+    return {
+        baseSum,
+        normalizedBase,
+        coverageBoost,
+        tokenCoverageBoost,
+        proximityAdjustment,
+        genericPenalty,
+        commentPenalty,
+        final
+    };
+}
 /**
  * Deterministic evidence verification:
- * - Scans candidate files line-by-line for meaningful matches to the query.
- * - Filters stopwords and short tokens.
- * - Deduplicates symbol evidence per file.
- * - Removes low-signal keyword clustering.
- * - Strictly leverages structural data (functions, classes, imports/exports) for additional evidence.
+ * - Scans candidate files for concrete sentence/symbol/filename/structural matches.
+ * - Uses identifier-boundary matching to reduce substring false positives.
+ * - Deduplicates evidence globally per file.
+ * - Computes confidence with bounded weighted scoring + proximity/coverage/penalties.
  */
 export const evidenceVerifierStep = {
     name: "evidenceVerifier",
-    description: "Deterministic evidence-first scan over candidate files to populate fileAnalysis, with filename dominance.",
+    description: "Deterministic evidence-first scan over candidate files to populate fileAnalysis, with calibrated confidence.",
     groups: ["analysis"],
     run: async (input) => {
         var _a, _b;
@@ -21,326 +290,227 @@ export const evidenceVerifierStep = {
             throw new Error("[evidenceVerifier] context.analysis is required.");
         }
         (_a = context.analysis).fileAnalysis ?? (_a.fileAnalysis = {});
-        const candidatePaths = [
-            ...(context.initContext?.relatedFiles ?? []),
-        ];
-        const uniquePaths = Array.from(new Set(candidatePaths));
+        const uniquePaths = uniq([...(context.initContext?.relatedFiles ?? [])]);
         if (!uniquePaths.length) {
             console.warn("[evidenceVerifier] No candidate files to scan.");
             return { query, data: {} };
         }
-        // ----------------- Stopwords -----------------
-        const STOPWORDS = new Set([
-            "the", "and", "for", "with", "from", "that",
-            "this", "are", "was", "were", "has", "have",
-            "had", "not", "but", "can", "could", "should",
-            "would", "into", "onto", "about", "above",
-            "below", "under", "over", "then", "else",
-            "when", "where", "what", "which", "while",
-            "return", "const", "let", "var", "true", "false",
-            "null", "undefined", "new", "set", "get",
-            "in", "to", "of", "on", "at", "by"
-        ]);
-        // ----------------- Parse query for targets -----------------
-        const sentenceTargets = [];
-        const quoteRegex = /['"`](.+?)['"`]/g;
-        let match;
-        while ((match = quoteRegex.exec(query)) !== null) {
-            sentenceTargets.push(match[1]);
-        }
-        if (!sentenceTargets.length) {
-            const heuristicSentences = query
-                .split(/[\.\n]/)
-                .map(s => s.trim())
-                .filter(s => s.length > 10);
-            heuristicSentences.forEach(s => {
-                if (!sentenceTargets.includes(s)) {
-                    sentenceTargets.push(s);
-                }
-            });
-        }
-        const filenameTargets = query
-            .split(/\s+/)
-            .map(word => word.replace(/['",]/g, ''))
-            .filter(w => w.match(/\.(ts|js|tsx|md)$/));
-        const baseNameTargets = filenameTargets.map(t => t.replace(/\.(ts|js|tsx|md)$/, ''));
-        // ---- Symbol extraction (filtered + deduplicated) ----
-        const symbolTargets = [];
-        const symbolRegex = /\b([a-zA-Z_]\w{2,})(?:\(\))?\b/g;
-        let symMatch;
-        while ((symMatch = symbolRegex.exec(query)) !== null) {
-            const token = symMatch[1];
-            if (token.length >= 3 &&
-                !STOPWORDS.has(token.toLowerCase()) &&
-                !sentenceTargets.includes(token)) {
-                symbolTargets.push(token);
-            }
-        }
-        const uniqueSymbolTargets = Array.from(new Set(symbolTargets));
-        // ----------------- Token strength tiering -----------------
-        const WEAK_TOKENS = new Set([
-            "file",
-            "line",
-            "move",
-            "update",
-            "change",
-            "modify",
-            "readme"
-        ]);
-        function computeSymbolConfidence(sym) {
-            const lower = sym.toLowerCase();
-            // If symbol appears inside quoted sentence → very strong
-            const fromQuoted = sentenceTargets.some(s => s.toLowerCase().includes(lower));
-            if (fromQuoted)
-                return 0.95;
-            // Weak structural tokens → low weight
-            if (WEAK_TOKENS.has(lower))
-                return 0.5;
-            // Default meaningful symbol
-            return 0.85;
-        }
-        // ----------------- Process each file -----------------
-        for (const path of uniquePaths) {
-            let code = null;
+        const { sentenceTargets, filenameTargets, explicitPathTargets, baseNameTargets, symbolTargets, queryTokenSet, pathSignalTokenSet } = extractTargets(query);
+        for (const filePath of uniquePaths) {
+            let code = "";
             try {
-                code = fs.readFileSync(path, "utf-8");
+                code = fs.readFileSync(filePath, "utf-8");
             }
             catch (err) {
-                console.warn(`[evidenceVerifier] Failed to read ${path}: ${err.message}`);
+                console.warn(`[evidenceVerifier] Failed to read ${filePath}: ${err.message}`);
             }
-            const lines = code?.split("\n") ?? [];
-            const evidenceItems = [];
-            const matchedLines = [];
-            const addedSymbols = new Set();
-            // -------- Sentence matches --------
+            const lines = code ? code.split("\n") : [];
+            const rawEvidence = [];
+            const addEvidence = (evidence, line, token, commentLike) => {
+                rawEvidence.push({
+                    evidence,
+                    line,
+                    token,
+                    weakToken: token ? WEAK_TOKENS.has(normalizeToken(token)) : false,
+                    commentLike
+                });
+            };
+            const loweredSentenceTargets = sentenceTargets.map((target) => target.toLowerCase());
             lines.forEach((line, idx) => {
-                sentenceTargets.forEach(target => {
-                    if (line.includes(target)) {
-                        const snippet = lines
-                            .slice(Math.max(0, idx - 1), Math.min(lines.length, idx + 2))
-                            .join("\n");
-                        evidenceItems.push({
-                            claim: `Sentence match: "${target}"`,
+                const loweredLine = line.toLowerCase();
+                loweredSentenceTargets.forEach((target, targetIndex) => {
+                    if (target.length >= 3 && loweredLine.includes(target)) {
+                        const originalTarget = sentenceTargets[targetIndex];
+                        addEvidence({
+                            claim: `Sentence match: "${originalTarget}"`,
                             type: "sentence",
-                            excerpt: snippet,
+                            excerpt: buildSnippet(lines, idx),
                             span: { startLine: idx + 1, endLine: idx + 1 },
-                            confidence: 1,
-                        });
-                        matchedLines.push(line);
+                            confidence: 1
+                        }, idx + 1);
                     }
                 });
             });
-            // -------- Symbol matches --------
-            uniqueSymbolTargets.forEach(sym => {
+            for (const symbol of symbolTargets) {
+                const regex = new RegExp(`\\b${escapeRegex(symbol)}\\b`, "i");
                 for (let idx = 0; idx < lines.length; idx++) {
                     const line = lines[idx];
-                    if (line.includes(`function ${sym}`) ||
-                        line.includes(`class ${sym}`) ||
-                        line.includes(sym)) {
-                        if (!addedSymbols.has(sym)) {
-                            addedSymbols.add(sym);
-                            const snippet = lines
-                                .slice(Math.max(0, idx - 1), Math.min(lines.length, idx + 2))
-                                .join("\n");
-                            evidenceItems.push({
-                                claim: `Symbol reference found: "${sym}"`,
-                                type: "symbol",
-                                excerpt: snippet,
-                                span: { startLine: idx + 1, endLine: idx + 1 },
-                                confidence: computeSymbolConfidence(sym),
-                            });
-                            matchedLines.push(line);
-                        }
-                        break;
-                    }
+                    if (!regex.test(line))
+                        continue;
+                    addEvidence({
+                        claim: `Symbol reference found: "${symbol}"`,
+                        type: "symbol",
+                        excerpt: buildSnippet(lines, idx),
+                        span: { startLine: idx + 1, endLine: idx + 1 },
+                        confidence: computeSymbolConfidence(symbol, sentenceTargets)
+                    }, idx + 1, symbol, isCommentLikeOrStringOnly(line));
+                    break;
                 }
-            });
-            // -------- Filename-level evidence --------
-            const fullFileName = path.split("/").pop() ?? "";
-            const baseFileName = fullFileName.replace(/\.(ts|js|tsx|md)$/, "");
-            if (filenameTargets.includes(fullFileName) ||
-                baseNameTargets.includes(baseFileName)) {
-                evidenceItems.push({
-                    claim: `Filename matches query target: "${fullFileName}"`,
+            }
+            const normalizedFilePath = filePath.replace(/\\/g, "/");
+            const fullFileName = path.basename(filePath);
+            const baseFileName = fullFileName.replace(FILE_EXT_REGEX, "");
+            const fileNameTargetNames = new Set(filenameTargets.map((target) => path.basename(target)));
+            const exactPathMatch = explicitPathTargets.some((target) => normalizedFilePath === target ||
+                normalizedFilePath.endsWith(`/${target}`));
+            const fileNameMatch = exactPathMatch ||
+                fileNameTargetNames.has(fullFileName) ||
+                baseNameTargets.includes(baseFileName);
+            if (fileNameMatch) {
+                addEvidence({
+                    claim: exactPathMatch
+                        ? `File path exactly matches query target: "${fullFileName}"`
+                        : `Filename matches query target: "${fullFileName}"`,
                     type: "filename",
-                    excerpt: `Path: ${path}`,
+                    excerpt: `Path: ${filePath}`,
                     span: { startLine: 0, endLine: 0 },
-                    confidence: 1,
-                });
+                    confidence: 1
+                }, 0, fullFileName);
+            }
+            const filePathTokens = new Set(tokenizeText(filePath).flatMap((token) => expandToken(token)));
+            const matchingPathTokens = Array.from(pathSignalTokenSet).filter((token) => filePathTokens.has(token));
+            if (matchingPathTokens.length > 0) {
+                const pathConfidence = clamp(0.5 + matchingPathTokens.length * 0.08, 0.5, 0.86);
+                addEvidence({
+                    claim: `Path tokens align with query intent: ${matchingPathTokens.slice(0, 5).join(", ")}`,
+                    type: "keyword-cluster",
+                    excerpt: `Path: ${filePath}`,
+                    span: { startLine: 0, endLine: 0 },
+                    confidence: pathConfidence
+                }, 0, matchingPathTokens[0]);
             }
-            // -------- Structural evidence (strict) --------
-            const struct = context.analysis.fileAnalysis[path]?.structural;
+            const struct = context.analysis.fileAnalysis[filePath]?.structural;
             const structuralEvidence = [];
             if (struct) {
-                const queryTokens = query
-                    .toLowerCase()
-                    .match(/\b\w{3,}\b/g) ?? [];
-                const querySet = new Set(queryTokens);
-                (struct.functions ?? []).forEach(fn => {
-                    if (fn.name && querySet.has(fn.name.toLowerCase())) {
-                        const ev = {
-                            claim: `Function name matches query: "${fn.name}"`,
-                            type: "structural",
-                            excerpt: fn.name,
-                            span: { startLine: fn.start ?? 0, endLine: fn.end ?? 0 },
-                            confidence: 0.85,
-                        };
-                        evidenceItems.push(ev);
-                        structuralEvidence.push(ev);
-                    }
+                (struct.functions ?? []).forEach((fn) => {
+                    if (!fn.name)
+                        return;
+                    const normalized = normalizeToken(fn.name);
+                    if (!queryTokenSet.has(normalized))
+                        return;
+                    const ev = {
+                        claim: `Function name matches query: "${fn.name}"`,
+                        type: "structural",
+                        excerpt: fn.name,
+                        span: { startLine: fn.start ?? 0, endLine: fn.end ?? 0 },
+                        confidence: WEAK_TOKENS.has(normalized) ? 0.6 : 0.85
+                    };
+                    structuralEvidence.push(ev);
+                    addEvidence(ev, fn.start ?? 0, fn.name);
                 });
-                (struct.classes ?? []).forEach(cls => {
-                    if (cls.name && querySet.has(cls.name.toLowerCase())) {
-                        const ev = {
-                            claim: `Class name matches query: "${cls.name}"`,
-                            type: "structural",
-                            excerpt: cls.name,
-                            span: { startLine: cls.start ?? 0, endLine: cls.end ?? 0 },
-                            confidence: 0.85,
-                        };
-                        evidenceItems.push(ev);
-                        structuralEvidence.push(ev);
-                    }
+                (struct.classes ?? []).forEach((cls) => {
+                    if (!cls.name)
+                        return;
+                    const normalized = normalizeToken(cls.name);
+                    if (!queryTokenSet.has(normalized))
+                        return;
+                    const ev = {
+                        claim: `Class name matches query: "${cls.name}"`,
+                        type: "structural",
+                        excerpt: cls.name,
+                        span: { startLine: cls.start ?? 0, endLine: cls.end ?? 0 },
+                        confidence: WEAK_TOKENS.has(normalized) ? 0.6 : 0.85
+                    };
+                    structuralEvidence.push(ev);
+                    addEvidence(ev, cls.start ?? 0, cls.name);
                 });
-                [...(struct.imports ?? []), ...(struct.exports ?? [])].forEach(sym => {
-                    if (sym && querySet.has(sym.toLowerCase())) {
-                        const ev = {
-                            claim: `Import/Export matches query: "${sym}"`,
-                            type: "structural",
-                            excerpt: sym,
-                            span: { startLine: 0, endLine: 0 },
-                            confidence: 0.85,
-                        };
-                        evidenceItems.push(ev);
-                        structuralEvidence.push(ev);
-                    }
+                [...(struct.imports ?? []), ...(struct.exports ?? [])].forEach((symbol) => {
+                    if (!symbol)
+                        return;
+                    const normalized = normalizeToken(symbol);
+                    if (!queryTokenSet.has(normalized))
+                        return;
+                    const ev = {
+                        claim: `Import/Export matches query: "${symbol}"`,
+                        type: "structural",
+                        excerpt: symbol,
+                        span: { startLine: 0, endLine: 0 },
+                        confidence: WEAK_TOKENS.has(normalized) ? 0.55 : 0.8
+                    };
+                    structuralEvidence.push(ev);
+                    addEvidence(ev, 0, symbol);
                 });
-                // -------- Log structural evidence per file --------
                 if (structuralEvidence.length > 0) {
                     logInputOutput("evidenceVerifier", "output", {
-                        file: path,
+                        file: filePath,
                         count: structuralEvidence.length,
-                        examples: structuralEvidence.slice(0, 5).map(ev => ({
+                        examples: structuralEvidence.slice(0, 5).map((ev) => ({
                             claim: ev.claim,
                             excerpt: ev.excerpt,
-                            confidence: ev.confidence,
-                        })),
+                            confidence: ev.confidence
+                        }))
                     });
                 }
             }
-            // -------- Structural evidence (strict) --------
-            if (struct) {
-                const queryTokens = query
-                    .toLowerCase()
-                    .match(/\b\w{3,}\b/g) ?? [];
-                const querySet = new Set(queryTokens);
-                (struct.functions ?? []).forEach(fn => {
-                    if (fn.name && querySet.has(fn.name.toLowerCase())) {
-                        evidenceItems.push({
-                            claim: `Function name matches query: "${fn.name}"`,
-                            type: "structural",
-                            excerpt: fn.name,
-                            span: { startLine: fn.start ?? 0, endLine: fn.end ?? 0 },
-                            confidence: 0.85,
-                        });
-                    }
-                });
-                (struct.classes ?? []).forEach(cls => {
-                    if (cls.name && querySet.has(cls.name.toLowerCase())) {
-                        evidenceItems.push({
-                            claim: `Class name matches query: "${cls.name}"`,
-                            type: "structural",
-                            excerpt: cls.name,
-                            span: { startLine: cls.start ?? 0, endLine: cls.end ?? 0 },
-                            confidence: 0.85,
-                        });
-                    }
-                });
-                [...(struct.imports ?? []), ...(struct.exports ?? [])].forEach(sym => {
-                    if (sym && querySet.has(sym.toLowerCase())) {
-                        evidenceItems.push({
-                            claim: `Import/Export matches query: "${sym}"`,
-                            type: "structural",
-                            excerpt: sym,
-                            span: { startLine: 0, endLine: 0 },
-                            confidence: 0.85,
-                        });
-                    }
-                });
-            }
-            // -------- Compute file-level confidence --------
-            let fileScore = 0;
-            for (const ev of evidenceItems) {
-                if (ev.type === "sentence")
-                    fileScore += 1.0;
-                else if (ev.type === "filename")
-                    fileScore += 1.0;
-                else if (ev.type === "symbol" || ev.type === "structural")
-                    fileScore += ev.confidence ?? 0.8;
-            }
-            const fileConfidence = fileScore === 0
-                ? 0
-                : Math.min(1, fileScore / 3);
-            const isFocusFile = context.analysis.focus?.selectedFiles?.includes(path) ?? false;
+            const dedupedEvidence = dedupeEvidence(rawEvidence);
+            const evidenceItems = dedupedEvidence.map((item) => item.evidence);
+            const score = computeFileConfidence(dedupedEvidence, symbolTargets);
+            const hasFilenameHit = evidenceItems.some((ev) => ev.type === "filename");
+            const fileConfidence = hasFilenameHit ? 1 : score.final;
+            const matchedLines = uniq(dedupedEvidence
+                .map((item) => (item.line > 0 ? lines[item.line - 1] : ""))
+                .filter(Boolean));
+            const isFocusFile = context.analysis.focus?.selectedFiles?.includes(filePath) ?? false;
             const hasEvidence = evidenceItems.length > 0;
-            // -------- Merge into fileAnalysis --------
-            if (isFocusFile || hasEvidence) {
+            const isRelevantByConfidence = fileConfidence >= 0.25;
+            const isRelevant = isFocusFile || isRelevantByConfidence;
+            if (isRelevant && hasEvidence) {
                 const confidenceLabel = fileConfidence.toFixed(2);
-                context.analysis.fileAnalysis[path] = {
-                    ...context.analysis.fileAnalysis[path],
+                context.analysis.fileAnalysis[filePath] = {
+                    ...context.analysis.fileAnalysis[filePath],
                     intent: "relevant",
-                    relevanceExplanation: `[confidence:${confidenceLabel}] ${evidenceItems.length} evidence item(s) match the query${isFocusFile ? " (focus file already selected)" : ""}`,
+                    relevanceExplanation: `[confidence:${confidenceLabel}] ${evidenceItems.length} evidence item(s) match the query${isFocusFile ? " (focus file already selected)" : ""}; components base=${score.normalizedBase.toFixed(2)}, coverage=${(score.coverageBoost + score.tokenCoverageBoost).toFixed(2)}, proximity=${score.proximityAdjustment.toFixed(2)}, penalties=${(score.genericPenalty + score.commentPenalty).toFixed(2)}`,
                     role: "primary",
                     action: {
                         isRelevant: true,
-                        shouldModify: hasEvidence,
+                        shouldModify: hasEvidence
                     },
                     proposedChanges: hasEvidence
                         ? {
                             summary: "Evidence found in file",
                             scope: "minor",
-                            targets: matchedLines.length
-                                ? Array.from(new Set(matchedLines))
-                                : undefined,
+                            targets: matchedLines.length ? matchedLines : undefined,
+                            rationale: `calibrated-confidence=${confidenceLabel}`
                         }
                         : {
                             summary: "No evidence found",
-                            scope: "none",
+                            scope: "none"
                         },
                     semanticAnalyzed: false,
                     risks: hasEvidence
                         ? []
                         : ["No concrete evidence found; modification not permitted"],
-                    evidence: evidenceItems,
+                    evidence: evidenceItems
                 };
             }
             else {
-                (_b = context.analysis.fileAnalysis)[path] || (_b[path] = {
+                (_b = context.analysis.fileAnalysis)[filePath] || (_b[filePath] = {
+                    intent: "irrelevant",
                     action: { isRelevant: false, shouldModify: false },
                     proposedChanges: {
                         summary: "No evidence found",
-                        scope: "none",
+                        scope: "none"
                     },
-                    semanticAnalyzed: false,
+                    semanticAnalyzed: false
                 });
             }
         }
         const output = {
             query,
-            data: { fileAnalysis: context.analysis.fileAnalysis },
+            data: { fileAnalysis: context.analysis.fileAnalysis }
         };
-        const logSummary = Object.entries(context.analysis.fileAnalysis).map(([path, analysis]) => {
+        const logSummary = Object.entries(context.analysis.fileAnalysis).map(([filePath, analysis]) => {
             const evidenceCount = analysis.evidence?.length ?? 0;
             const confidenceMatch = analysis.relevanceExplanation?.match(/\[confidence:(\d+\.\d+)\]/);
             const confidence = confidenceMatch?.[1] ?? "0.00";
             return {
-                file: path,
+                file: filePath,
                 confidence,
                 evidenceCount,
-                isRelevant: analysis.action?.isRelevant ?? false,
+                isRelevant: analysis.action?.isRelevant ?? false
             };
         });
         logInputOutput("evidenceVerifier", "output", logSummary);
         return output;
-    },
+    }
 };

package/dist/index.js CHANGED Viewed

@@ -18,6 +18,7 @@ const program = cmdFactory();
 const customCommands = {};
 // ---------------- Test Queries ----------------
 const testQueries = [
+    // General Questions
     'please write me comprehensive comments for semanticAnalysisModule.ts and typescript.ts files',
     'refactor mainagent to improve readability and reduce nesting',
     'explain the intent and architecture of the semantic analysis module',
@@ -34,7 +35,39 @@ const testQueries = [
     'How do I run the test suite?',
     'Are there any flaky tests in this repo?',
     'Are there any security vulnerabilities in our dependencies?',
-    'Is there any dead code we can safely remove?'
+    'Is there any dead code we can safely remove?',
+    // Code Quality & Refactoring
+    'Identify and suggest improvements for code smells in the auth module',
+    'How can we improve the performance of the data processing pipeline?',
+    'What are the best practices for handling asynchronous operations in this codebase?',
+    'Suggest refactoring strategies for reducing cyclomatic complexity in the core service',
+    'How can we improve test coverage for the API layer?',
+    'What are the common anti-patterns in the current logging implementation?',
+    // Architecture & Design
+    'Explain the layered architecture and how components interact with each other',
+    'How is the dependency inversion principle applied in this codebase?',
+    'What architectural patterns are evident in the current implementation?',
+    'How is state managed across different modules?',
+    'What are the trade-offs of using a monorepo vs multiple repos?',
+    'How does the codebase handle cross-cutting concerns like logging and caching?',
+    // Dependency Management
+    'Analyze the dependency tree for potential version conflicts',
+    'How can we reduce the bundle size of the frontend application?',
+    'What are the security implications of the current dependency versions?',
+    'How are third-party libraries integrated and managed?',
+    'What is the strategy for handling transitive dependencies?',
+    'How do we ensure dependency updates don\'t break existing functionality?',
+    // Advanced TypeScript Features
+    'Explain how generics are used throughout the codebase and suggest improvements',
+    'How are discriminated unions implemented in this project?',
+    'What are the benefits of using mapped types in the current code?',
+    'How are conditional types utilized in the type system?',
+    'What are the best practices for using decorators in this codebase?',
+    'How does the project leverage TypeScript\'s type inference capabilities?',
+    'Explain the usage of utility types like Partial, Pick, Omit, etc.',
+    'How are module declarations and ambient types used?',
+    'What are the common pitfalls when working with TypeScript types?',
+    'How does the codebase handle type narrowing and assertion functions?'
 ];
 // ---------------- Helpers ----------------
 function pickRandom(items) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "scai",
-  "version": "0.1.169",
+  "version": "0.1.171",
   "type": "module",
   "bin": {
     "scai": "./dist/index.js"