npm - scai - Versions diffs - 0.1.165 → 0.1.166 - Mend

scai 0.1.165 → 0.1.166

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/agents/reasonNextTaskStep.js +45 -0
package/dist/db/fileIndex.js +91 -146
package/dist/pipeline/modules/finalAnswerModule.js +16 -4
package/package.json +1 -1

package/dist/agents/reasonNextTaskStep.js CHANGED Viewed

@@ -1,3 +1,6 @@
+// File: src/agents/reasonNextTaskStep.ts
+import { generate } from "../lib/generate.js";
+import { cleanupModule } from "../pipeline/modules/cleanupModule.js";
 import { logInputOutput } from "../utils/promptLogHelper.js";
 /**
  * REASON NEXT TASK STEP
@@ -95,6 +98,48 @@ export const reasonNextTaskStep = {
             confidence = 0.98;
         }
         // ---------------------------
+        // 6.5️⃣ Optional: Reason over known risks
+        // ---------------------------
+        const knownRisks = context.analysis.understanding?.risks ?? [];
+        if (knownRisks.length > 0) {
+            // Optionally call the LLM with constrained instructions
+            const riskPrompt = `
+You are given the following KNOWN RISKS (authoritative, do not invent new ones):
+${knownRisks.map(r => "- " + r).join("\n")}
+Task:
+- Decide whether it is reasonable to ask the user for clarification before proceeding.
+- Return STRICT JSON: { askUser: true|false, rationale: string }
+`;
+            try {
+                const aiResponse = await generate({
+                    query: context.initContext?.userQuery ?? "",
+                    content: riskPrompt
+                });
+                const cleaned = await cleanupModule.run({
+                    query: context.initContext?.userQuery ?? "",
+                    content: aiResponse.data ?? ""
+                });
+                const parsed = cleaned.data;
+                // type guard
+                if (parsed &&
+                    typeof parsed === "object" &&
+                    "askUser" in parsed &&
+                    "rationale" in parsed &&
+                    typeof parsed.rationale === "string") {
+                    if (parsed.askUser) {
+                        nextAction = "request-feedback";
+                        rationale += `\nUser clarification recommended due to known risks: ${parsed.rationale}`;
+                        confidence = Math.min(confidence, 0.8); // slightly lower because human needed
+                    }
+                }
+            }
+            catch (err) {
+                console.warn("[reasonNextTaskStep] Risk reasoning failed", err);
+                // fallback: ignore, keep deterministic nextAction
+            }
+        }
+        // ---------------------------
         // 7️⃣ Ensure a TaskStep exists for nextFile
         // ---------------------------
         if (nextFile) {

package/dist/db/fileIndex.js CHANGED Viewed

@@ -11,12 +11,15 @@ import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js';
 import { Config } from '../config.js';
 import { log } from '../utils/log.js';
 import { startDaemon } from '../commands/DaemonCmd.js';
-import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
 import * as sqlTemplates from '../db/sqlTemplates.js';
 import { RELATED_FILES_LIMIT } from '../constants.js';
 import { generate } from '../lib/generate.js';
-import { cleanupModule } from '../pipeline/modules/cleanupModule.js';
 import { logInputOutput } from '../utils/promptLogHelper.js';
+import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
+import { extractTaggedContent } from '../utils/parseTaggedContent.js';
+/* -------------------------------------------------- */
+/* DB LOCK                                             */
+/* -------------------------------------------------- */
 async function lockDb() {
     try {
         return await lockfile.lock(getDbPathForRepo());
@@ -26,6 +29,9 @@ async function lockDb() {
         throw err;
     }
 }
+/* -------------------------------------------------- */
+/* INDEX COMMAND                                       */
+/* -------------------------------------------------- */
 export async function runIndexCommand() {
     try {
         initSchema();
@@ -57,9 +63,6 @@ export async function runIndexCommand() {
                 const type = detectFileType(file);
                 const normalizedPath = path.normalize(file).replace(/\\/g, '/');
                 const filename = path.basename(normalizedPath);
-                // --------------------------------------------------
-                // Enqueue file for daemon processing
-                // --------------------------------------------------
                 db.prepare(upsertFileTemplate).run({
                     path: normalizedPath,
                     filename,
@@ -73,7 +76,7 @@ export async function runIndexCommand() {
                 count++;
             }
             catch (err) {
-                log(`⚠️ Skipped in indexCmd ${file}: ${err instanceof Error ? err.message : err}`);
+                log(`⚠️ Skipped in indexCmd ${file}: ${String(err)}`);
             }
         }
     }
@@ -82,110 +85,82 @@ export async function runIndexCommand() {
     }
     log('📊 Discovered files by extension:', JSON.stringify(countByExt, null, 2));
     log(`✅ Done. Enqueued ${count} files for indexing.`);
-    // Kick the daemon — it now owns all processing
     startDaemon();
 }
-// --------------------------------------------------
-// QUERY API (read-only, used by CLI / raw search)
-// --------------------------------------------------
+/* -------------------------------------------------- */
+/* QUERY API                                           */
+/* -------------------------------------------------- */
 export function queryFiles(safeQuery, limit = 10) {
     const db = getDbForRepo();
     return db
         .prepare(sqlTemplates.queryFilesTemplate)
         .all(safeQuery, limit);
 }
-// --------------------------------------------------
-// SEMANTIC SEARCH (AskCmd, answering user directly)
-// - Discards noisy FTS
-// - Uses LLM aggressively
-// - Optimizes for precision
-// --------------------------------------------------
-export async function semanticSearchFiles(originalQuery, _query, // ignored now – LLM owns query construction
-topK = 5) {
+/* -------------------------------------------------- */
+/* SEMANTIC SEARCH                                     */
+/* -------------------------------------------------- */
+export async function semanticSearchFiles(originalQuery, _query, topK = 5) {
     const db = getDbForRepo();
-    // --------------------------------------------------
-    // 1. LLM → primary FTS query (always)
-    // --------------------------------------------------
     const primaryFtsQuery = await generatePrimaryFtsQuery(originalQuery);
     logInputOutput("semanticSearchFiles LLM primary query", "output", {
         originalQuery,
         ftsQuery: primaryFtsQuery,
     });
-    // --------------------------------------------------
-    // 2. Run primary FTS once
-    // --------------------------------------------------
     const primaryResults = db
         .prepare(sqlTemplates.searchFilesTemplate)
         .all(primaryFtsQuery, RELATED_FILES_LIMIT);
     if (primaryResults.length > 0) {
         return rankAndMap(new Map(primaryResults.map(r => [r.id, r])), topK);
     }
-    // --------------------------------------------------
-    // 3. Fallback: LLM → 2–3 subqueries (ONLY if zero results)
-    // --------------------------------------------------
-    const subQueries = await generateFallbackFtsQueries(originalQuery, primaryFtsQuery);
-    logInputOutput("semanticSearchFiles LLM fallback queries", "output", {
+    const fallbackQuery = await generateFallbackFtsQueries(originalQuery, primaryFtsQuery);
+    logInputOutput("semanticSearchFiles LLM fallback query", "output", {
         originalQuery,
         primaryFtsQuery,
-        subQueries,
+        fallbackQuery,
     });
-    // --------------------------------------------------
-    // 4. Execute fallback queries sequentially
-    // --------------------------------------------------
-    for (const subQuery of subQueries) {
-        const rows = db
-            .prepare(sqlTemplates.searchFilesTemplate)
-            .all(subQuery, RELATED_FILES_LIMIT);
-        if (rows.length > 0) {
-            return rankAndMap(new Map(rows.map(r => [r.id, r])), topK);
+    if (fallbackQuery && fallbackQuery.length > 0) {
+        const stmt = db.prepare(sqlTemplates.searchFilesTemplate);
+        for (const query of fallbackQuery) {
+            const rows = stmt.all(query, RELATED_FILES_LIMIT);
+            if (rows.length > 0) {
+                return rankAndMap(new Map(rows.map(r => [r.id, r])), topK);
+            }
         }
     }
-    // --------------------------------------------------
-    // 5. Hard stop
-    // --------------------------------------------------
     return [];
 }
+/* -------------------------------------------------- */
+/* LLM → FTS QUERY GENERATION (TAG-BASED)               */
+/* -------------------------------------------------- */
 async function generatePrimaryFtsQuery(userQuery) {
     const prompt = `
-You are generating a SQLite FTS query for searching a source code repository.
+Generate a SQLite FTS query for searching a source code repository.
-Input (natural language):
+Input:
 "${userQuery}"
-Task:
-- Produce ONE concise FTS query
-- Focus on filenames, symbols, module names, domain nouns
-- Prefer literal identifiers likely to exist in code
-- NO sentences
-- NO stopwords
-- NO explanations
-- NO wildcards unless absolutely necessary
+Rules:
+- Output ONLY the query terms
 - Use OR between terms
-- **MAX 10 terms only** — be selective and concise
+- Max 10 terms
+- No explanations
+- No sentences
-Output JSON ONLY:
-{
-  "ftsQuery": "term1 OR term2 OR term3"
-}
+Wrap the result in <FILE_CONTENT> tags.
+<FILE_CONTENT>
+term1 OR term2 OR term3
+</FILE_CONTENT>
 `.trim();
     try {
         const response = await generate({ content: prompt, query: "" });
-        const cleaned = await cleanupModule.run({
-            query: userQuery,
-            content: response.data,
-        });
-        if (cleaned.data &&
-            typeof cleaned.data === "object" &&
-            "ftsQuery" in cleaned.data &&
-            typeof cleaned.data.ftsQuery === "string") {
-            return cleaned.data.ftsQuery;
-        }
+        const rawText = String(response.data ?? "");
+        const { content } = extractTaggedContent(rawText, "FILE_CONTENT");
+        return sanitizeQueryForFts(content);
     }
     catch (err) {
-        log(`⚠️ [semanticSearchFiles] Failed to generate primary FTS query: ${String(err)}`);
+        return sanitizeQueryForFts(userQuery);
     }
-    // Absolute safety fallback — never explode
-    return sanitizeQueryForFts(userQuery);
 }
 async function generateFallbackFtsQueries(userQuery, failedQuery) {
     const prompt = `
@@ -199,57 +174,44 @@ Primary FTS query returned ZERO results:
 Task:
 - Generate 2–3 independent FTS queries (MAX 3)
-- Each query should be concise: no more than 10 OR-joined search terms
+- Each query must be a single OR-joined expression
+- Max 10 terms per query
 - Focus on filenames, symbols, module names
-- Avoid natural-language sentences
-- Avoid recursion or refinement loops
-- Use OR between terms
+- Avoid natural language sentences
+- Avoid explanations or commentary
-Output JSON ONLY:
-{
-  "subQueries": [
-    "query1",
-    "query2",
-    "query3"
-  ]
-}
+Output format (STRICT):
+<FILE_CONTENT>
+query1
+query2
+query3
+</FILE_CONTENT>
 `.trim();
     try {
         const response = await generate({ content: prompt, query: "" });
-        const cleaned = await cleanupModule.run({
-            query: userQuery,
-            content: response.data,
-        });
-        if (cleaned.data &&
-            typeof cleaned.data === "object" &&
-            Array.isArray(cleaned.data.subQueries)) {
-            return cleaned.data.subQueries
-                .filter((q) => typeof q === "string")
-                .slice(0, 3) // cap to 3 queries
-                .map((q) => q
-                .split(' OR ')
-                .map(term => sanitizeQueryForFts(term)) // sanitize each term individually
-                .slice(0, 10) // cap terms per query
-                .join(' OR '));
+        const rawText = String(response.data ?? "");
+        const { content } = extractTaggedContent(rawText, "FILE_CONTENT");
+        const subQueries = content
+            .split(/\r?\n/)
+            .map(q => sanitizeQueryForFts(q.trim()))
+            .filter(Boolean)
+            .slice(0, 3);
+        if (!subQueries.length) {
+            throw new Error("No fallback subqueries generated");
         }
+        return subQueries;
     }
     catch (err) {
-        log(`⚠️ [semanticSearchFiles] Failed to generate fallback queries: ${String(err)}`);
+        log(`⚠️ [semanticSearchFiles] Fallback FTS generation failed: ${String(err)}`);
+        return null;
     }
-    return [];
 }
-// --------------------------------------------------
-// PLANNER SEARCH (fileSearchModule, discovery)
-// - Never discards FTS
-// - LLM ONLY if FTS is empty
-// - Optimizes for recall
-// --------------------------------------------------
+/* -------------------------------------------------- */
+/* PLANNER SEARCH                                      */
+/* -------------------------------------------------- */
 export async function plannerSearchFiles(originalQuery, query, topK = 5) {
     const db = getDbForRepo();
     const seen = new Map();
-    // -----------------------------
-    // Primary FTS (always trusted)
-    // -----------------------------
     const safeQuery = sanitizeQueryForFts(query);
     const primaryResults = db
         .prepare(sqlTemplates.searchFilesTemplate)
@@ -259,36 +221,31 @@ export async function plannerSearchFiles(originalQuery, query, topK = 5) {
         safeQuery,
         count: primaryResults.length,
     });
-    // -----------------------------
-    // Only call LLM if FTS is empty
-    // -----------------------------
     if (primaryResults.length === 0) {
-        const llmTerms = await expandQueryWithModel(originalQuery);
-        logInputOutput("plannerSearchFiles LLM terms (FTS empty)", "output", {
-            originalQuery,
-            suggestedTerms: llmTerms,
-        });
-        for (const term of llmTerms) {
-            const safeTerm = sanitizeQueryForFts(term);
+        const expanded = await expandQueryWithModel(originalQuery);
+        if (expanded) {
+            const safeTerm = sanitizeQueryForFts(expanded);
             const rows = db
                 .prepare(sqlTemplates.searchFilesTemplate)
                 .all(safeTerm, RELATED_FILES_LIMIT);
-            for (const row of rows) {
-                if (!seen.has(row.id))
-                    seen.set(row.id, row);
-            }
+            rows.forEach(r => {
+                if (!seen.has(r.id))
+                    seen.set(r.id, r);
+            });
         }
     }
     if (seen.size === 0)
         return [];
     return rankAndMap(seen, topK);
 }
-// --------------------------------------------------
-// Helpers
-// --------------------------------------------------
+/* -------------------------------------------------- */
+/* HELPERS                                             */
+/* -------------------------------------------------- */
 function rankAndMap(seen, topK) {
-    const merged = Array.from(seen.values()).sort((a, b) => (a.bm25Score ?? 0) - (b.bm25Score ?? 0));
-    return merged.slice(0, topK).map(r => ({
+    return Array.from(seen.values())
+        .sort((a, b) => (a.bm25Score ?? 0) - (b.bm25Score ?? 0))
+        .slice(0, topK)
+        .map(r => ({
         id: r.id,
         path: r.path,
         filename: r.filename,
@@ -300,32 +257,20 @@ function rankAndMap(seen, topK) {
 }
 async function expandQueryWithModel(query) {
     const prompt = `
-You are assisting a code search system.
-Given a natural-language question about a codebase, return a JSON array
-of 3–8 concrete search terms that are likely to appear literally in source code.
+Return concrete search terms likely to appear in source code.
-Rules:
-- Return ONLY a JSON array of strings
-- No explanations
-- Prefer filenames, function names, symbols, library names
+Wrap the result in <FILE_CONTENT> tags.
 Question:
 "${query}"
 `.trim();
     try {
         const response = await generate({ content: prompt, query: "" });
-        const cleaned = await cleanupModule.run({
-            query,
-            content: response.data,
-        });
-        const terms = Array.isArray(cleaned.data)
-            ? cleaned.data.filter((t) => typeof t === "string")
-            : [];
-        return terms;
+        const rawText = String(response.data ?? "");
+        const { content } = extractTaggedContent(rawText, "FILE_CONTENT");
+        return sanitizeQueryForFts(content);
     }
-    catch (err) {
-        log(`⚠️ [searchFiles] Failed to expand query: ${String(err)}`);
-        return [];
+    catch {
+        return null;
     }
 }

package/dist/pipeline/modules/finalAnswerModule.js CHANGED Viewed

@@ -28,6 +28,8 @@ export const finalAnswerModule = {
             (!focus?.relevantFiles || focus.relevantFiles.includes(path)))
             .map(([path, fa]) => ({ path, analysis: fa }))
             .slice(0, MAX_FILES);
+        // Collect analyzed files for output
+        const analyzedFiles = meaningfulFiles.map(f => f.path);
         // --------------------------------------------------
         // 2️⃣ Collect supporting code snippets from working files
         // --------------------------------------------------
@@ -104,6 +106,9 @@ ${query}
 Rationale for focus:
 ${rationale}
+Analyzed files:
+${analyzedFiles.join("\n")}
 ==================== PROPOSED CHANGES ====================
 ${semanticSection}
@@ -130,17 +135,24 @@ ${codeSection}
         // 5️⃣ Generate final answer
         // --------------------------------------------------
         const aiResponse = await generate({ query, content: prompt });
+        // ✅ Prepend analyzed files to finalText so user sees them
         const finalText = typeof aiResponse.data === "string"
-            ? aiResponse.data
-            : JSON.stringify(aiResponse.data, null, 2);
+            ? `Analyzed files:\n${analyzedFiles.join("\n")}\n\n${aiResponse.data}`
+            : `Analyzed files:\n${analyzedFiles.join("\n")}\n\n${JSON.stringify(aiResponse.data, null, 2)}`;
         context.analysis || (context.analysis = {});
         context.analysis.finalAnswer = finalText;
-        logInputOutput("finalAnswerModule", "output", aiResponse.data);
+        logInputOutput("finalAnswerModule", "output", {
+            data: aiResponse.data,
+            analyzedFiles,
+        });
         console.log(chalk.yellow(`\n\n[FINAL ANSWER]\n${finalText}\n`));
         return {
             query,
             content: finalText,
-            data: aiResponse.data,
+            data: {
+                response: aiResponse.data,
+                analyzedFiles,
+            },
             context,
         };
     },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "scai",
-  "version": "0.1.165",
+  "version": "0.1.166",
   "type": "module",
   "bin": {
     "scai": "./dist/index.js"