npm - scai - Versions diffs - 0.1.44 → 0.1.46 - Mend

scai 0.1.44 → 0.1.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/commands/AskCmd.js +69 -42
package/dist/db/fileIndex.js +72 -13
package/dist/db/functionExtractors/extractFromJs.js +6 -6
package/dist/db/schema.js +2 -0
package/package.json +3 -2

package/dist/commands/AskCmd.js CHANGED Viewed

@@ -1,16 +1,16 @@
 import fs from 'fs';
 import path from 'path';
 import readline from 'readline';
-import { searchFiles, queryFiles } from '../db/fileIndex.js';
+import { searchFiles, queryFiles, getFunctionsForFiles } from '../db/fileIndex.js';
 import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
 import { generate } from '../lib/generate.js';
 import { buildContextualPrompt } from '../utils/buildContextualPrompt.js';
 import { generateFileTree } from '../utils/fileTree.js';
 import { log } from '../utils/log.js';
 import { PROMPT_LOG_PATH, SCAI_HOME, INDEX_DIR } from '../constants.js';
-const MAX_RELATED_FILES = 5;
+const MAX_RELATED_FILES = 3;
+const MAX_SUMMARY_LINES = 12;
 export async function runAskCommand(query) {
-    // 🧠 Prompt the user if no query is passed
     if (!query) {
         query = await promptOnce('🧠 Ask your question:\n> ');
     }
@@ -21,24 +21,21 @@ export async function runAskCommand(query) {
     }
     console.log(`📁 Using index root: ${INDEX_DIR}`);
     console.log(`🔍 Searching for: "${query}"\n`);
-    // 🧠 Step 1: Semantic + fallback search
+    // 🟩 STEP 1: Semantic Search
     const start = Date.now();
-    const semanticResults = await searchFiles(query, MAX_RELATED_FILES);
+    const semanticResults = await searchFiles(query, MAX_RELATED_FILES); // RankedFile[]
     const duration = Date.now() - start;
     console.log(`⏱️ searchFiles took ${duration}ms and returned ${semanticResults.length} result(s)`);
-    // 🔍 Log raw semantic results
-    console.log('🔍 Raw semantic search results:');
     semanticResults.forEach((file, i) => {
         console.log(`  ${i + 1}. 📄 Path: ${file.path} | Score: ${file.score?.toFixed(3) ?? 'n/a'}`);
     });
+    // 🟩 STEP 1.5: Fallback FTS search
     const safeQuery = sanitizeQueryForFts(query);
-    const fallbackResults = queryFiles(safeQuery, 10);
-    // 🔍 Log raw keyword fallback results
-    console.log('\n🔍 Raw fallback keyword (FTS) search results:');
+    const fallbackResults = queryFiles(safeQuery, 10); // FileRow[]
     fallbackResults.forEach((file, i) => {
-        console.log(`  ${i + 1}. 📄 Path: ${file.path}`);
+        console.log(`  ${i + 1}. 🔎 Fallback Match: ${file.path}`);
     });
-    // 🧠 Step 2: Merge results
+    // 🟩 STEP 2: Merge results (de-duplicate by full resolved path)
     const seen = new Set();
     const combinedResults = [];
     for (const file of semanticResults) {
@@ -51,43 +48,78 @@ export async function runAskCommand(query) {
         if (!seen.has(resolved)) {
             seen.add(resolved);
             combinedResults.push({
+                id: file.id,
                 path: file.path,
-                summary: file.summary,
+                summary: file.summary || '',
                 score: 0.0,
+                sim: 0,
+                bm25: 0
             });
         }
     }
-    if (combinedResults.length > 0) {
-        console.log('📊 Closest files based on semantic + keyword match:');
-        combinedResults.forEach((file, i) => {
-            console.log(`  ${i + 1}. 📄 Path: ${file.path} | Score: ${file.score?.toFixed(3) ?? 'fallback'}`);
+    // 🟩 STEP 3: Log results
+    if (combinedResults.length) {
+        console.log('\n📊 Final Related Files:');
+        combinedResults.forEach((f, i) => {
+            console.log(`  ${i + 1}. ${f.path} (${f.score?.toFixed(3) ?? 'fallback'})`);
         });
     }
     else {
-        console.log('⚠️ No similar files found. Asking the model using question only...');
+        console.log('⚠️ No similar files found. Using query only.');
     }
-    // 🧠 Step 3: Build metadata for prompt
-    const relatedFiles = combinedResults.slice(0, MAX_RELATED_FILES).map((file, index) => ({
-        path: file.path,
-        summary: file.summary || '(No summary available)', // Ensure summary is included
-    }));
-    // Get the top-ranked file (the first one in the sorted results)
-    const topRankedFile = combinedResults[0]; // The most relevant file
+    // 🟩 STEP 4: Load top file code + metadata
+    const topFile = combinedResults[0];
+    const filepath = topFile?.path || '';
+    let code = '';
+    let topSummary = topFile.summary || '(No summary available)';
+    let topFunctions = [];
+    // Truncate summary if needed
+    if (topSummary) {
+        topSummary = topSummary.split('\n').slice(0, MAX_SUMMARY_LINES).join('\n');
+    }
+    const allFileIds = combinedResults
+        .map(file => file.id)
+        .filter((id) => typeof id === 'number');
+    const allFunctionsMap = getFunctionsForFiles(allFileIds);
+    try {
+        code = fs.readFileSync(filepath, 'utf-8');
+        const topFileId = topFile.id;
+        topFunctions = allFunctionsMap[topFileId]?.map(fn => fn.name) || [];
+    }
+    catch (err) {
+        console.warn(`⚠️ Failed to read or analyze top file (${filepath}):`, err);
+    }
+    // 🟩 STEP 5: Build relatedFiles with functions
+    const relatedFiles = combinedResults.slice(0, MAX_RELATED_FILES).map(file => {
+        const fileId = file.id;
+        let summary = file.summary || '(No summary available)';
+        if (summary) {
+            summary = summary.split('\n').slice(0, MAX_SUMMARY_LINES).join('\n');
+        }
+        return {
+            path: file.path,
+            summary,
+            functions: allFunctionsMap[fileId]?.map(fn => fn.name) || [],
+        };
+    });
+    // 🟩 STEP 6: Generate file tree
     let fileTree = '';
     try {
-        fileTree = generateFileTree(INDEX_DIR, 2); // Limit depth
+        fileTree = generateFileTree(INDEX_DIR, 2);
     }
     catch (e) {
-        console.warn('⚠️ Failed to generate file tree:', e);
+        console.warn('⚠️ Could not generate file tree:', e);
     }
-    // Now we can build the prompt with summaries included for each file
+    // 🟩 STEP 7: Build prompt
     const promptContent = buildContextualPrompt({
         baseInstruction: query,
-        code: '', // No specific code selected
-        relatedFiles, // This now includes both path and summary for each file
+        code,
+        summary: topSummary,
+        functions: topFunctions,
+        relatedFiles,
         projectFileTree: fileTree || undefined,
     });
-    // 🧠 Step 4: Log prompt to file
+    // 🟩 STEP 8: Save prompt for trace/debug
     try {
         if (!fs.existsSync(SCAI_HOME))
             fs.mkdirSync(SCAI_HOME, { recursive: true });
@@ -97,26 +129,21 @@ export async function runAskCommand(query) {
     catch (err) {
         log('❌ Failed to write prompt log:', err);
     }
-    // 🧠 Step 5: Call the model
+    // 🟩 STEP 9: Ask model
     try {
-        console.log('🤖 Asking the model...');
-        // Create a more structured PromptInput object
+        console.log('\n🤖 Asking the model...');
         const input = {
-            content: query, // Main instruction (the query)
-            filepath: topRankedFile?.path || '', // Include the path of the top-ranked file
-            metadata: {
-                summary: topRankedFile?.summary || '', // Add summary of the top-ranked file
-                relatedFiles: relatedFiles, // Pass related files as part of metadata
-            },
-            projectFileTree: fileTree || '' // Include file structure in metadata
+            content: promptContent,
+            filepath,
         };
         const modelResponse = await generate(input, 'llama3');
-        console.log(`\n📝 Model response:\n${modelResponse.content}`);
+        console.log(`\n🧠 Model Response:\n${modelResponse.content}`);
     }
     catch (err) {
         console.error('❌ Model request failed:', err);
     }
 }
+// 🟩 Helper: Prompt once
 function promptOnce(promptText) {
     return new Promise(resolve => {
         const rl = readline.createInterface({

package/dist/db/fileIndex.js CHANGED Viewed

@@ -4,19 +4,19 @@ import path from 'path';
 import { generateEmbedding } from '../lib/generateEmbedding.js';
 import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
 import * as sqlTemplates from './sqlTemplates.js';
+import { stringSimilarity } from 'string-similarity-js';
 /**
- * Index a file into the local SQLite database.
+ * 📄 Index a single file into the database.
  *
- * - Normalizes the file path for cross-platform compatibility.
- * - Extracts file metadata (last modified time).
- * - Performs an UPSERT into the `files` table with the latest summary/type/timestamp.
+ * - Normalizes the file path to prevent OS-specific path bugs.
+ * - Stores metadata like summary, type, lastModified, etc.
+ * - Sets `embedding` to null initially — computed later.
  */
 export function indexFile(filePath, summary, type) {
     const stats = fs.statSync(filePath);
     const lastModified = stats.mtime.toISOString();
     const indexedAt = new Date().toISOString();
     const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
-    // Index the file metadata
     db.prepare(sqlTemplates.upsertFileTemplate).run({
         path: normalizedPath,
         summary,
@@ -28,7 +28,13 @@ export function indexFile(filePath, summary, type) {
     console.log(`📄 Indexed: ${normalizedPath}`);
 }
 /**
- * Perform a raw keyword-based full-text search using the FTS5 index.
+ * 🔍 FTS5 keyword-based search using SQLite's full-text index.
+ *
+ * Use this when you want:
+ * - A simple keyword search.
+ * - Fast fallback search that doesn't rely on embeddings.
+ *
+ * Returns a full `FileRow[]` (all known metadata, but no similarity score).
  */
 export function queryFiles(safeQuery, limit = 10) {
     console.log(`Executing search query: ${safeQuery}`);
@@ -42,7 +48,16 @@ export function queryFiles(safeQuery, limit = 10) {
     return results;
 }
 /**
- * Perform a hybrid semantic + keyword-based search.
+ * 🧠 Combined semantic + FTS search (Hybrid).
+ *
+ * 1. Convert user query to embedding vector using OpenAI / Ollama etc.
+ * 2. Perform a keyword-based FTS match to limit candidates.
+ * 3. For each candidate, compute cosine similarity between query + file embedding.
+ * 4. Blend the BM25 score and cosine similarity to produce a final score.
+ * 5. Return top K most relevant results.
+ *
+ * ⚠️ This returns a **lighter-weight type** than `FileRow` — doesn't include id, timestamps, etc.
+ * Use a wrapper type like `SearchResultWithScore` in calling code if you need both.
  */
 export async function searchFiles(query, topK = 5) {
     console.log(`🧠 Searching for query: "${query}"`);
@@ -68,24 +83,36 @@ export async function searchFiles(query, topK = 5) {
     const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
     const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
     const scored = ftsResults.map(result => {
-        let finalScore = 0;
         let sim = 0;
+        let finalScore = 0;
+        const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
         if (result.embedding) {
             try {
                 const vector = JSON.parse(result.embedding);
                 sim = cosineSimilarity(embedding, vector);
-                const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
-                finalScore = 0.7 * sim + 0.3 * normalizedBm25;
             }
             catch (err) {
                 console.error(`❌ Failed to parse embedding for ${result.path}:`, err);
-                finalScore = 0; // fallback
             }
         }
-        else {
-            finalScore = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
+        const terms = query.toLowerCase().split(/\s+/);
+        const path = result.path.toLowerCase();
+        const summary = (result.summary || '').toLowerCase();
+        let termMatches = 0;
+        for (const term of terms) {
+            if (path.includes(term) || summary.includes(term)) {
+                termMatches += 1;
+            }
         }
+        const matchRatio = termMatches / terms.length;
+        const termBoost = matchRatio >= 1 ? 1.0 : matchRatio >= 0.5 ? 0.5 : 0;
+        // 🧠 Final score with hybrid weighting (BM25 + Embedding + Term Boost)
+        finalScore = 0.4 * normalizedBm25 + 0.4 * sim + 0.2 * termBoost;
+        // ✅ Fuzzy score using string-similarity-js
+        const fuzzyScore = stringSimilarity(query.toLowerCase(), `${path} ${summary}`);
+        finalScore += fuzzyScore * 10;
         return {
+            id: result.id,
             path: result.path,
             summary: result.summary,
             score: finalScore,
@@ -99,9 +126,41 @@ export async function searchFiles(query, topK = 5) {
     console.log(`Returning top ${topK} results based on combined score`);
     return sorted;
 }
+/**
+ * 🔢 Cosine similarity between two vectors.
+ */
 function cosineSimilarity(a, b) {
     const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
     const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
     const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
     return dot / (magA * magB);
 }
+/**
+ * 🧠 Get all extracted functions for a given list of file IDs.
+ *
+ * - Used in `askCmd.ts` to fetch code snippets from relevant files.
+ * - Grouped by file_id.
+ */
+export function getFunctionsForFiles(fileIds) {
+    if (!fileIds.length)
+        return {};
+    const placeholders = fileIds.map(() => '?').join(',');
+    const stmt = db.prepare(`
+    SELECT f.file_id, f.name, f.start_line, f.end_line, f.content
+    FROM functions f
+    WHERE f.file_id IN (${placeholders})
+  `);
+    const rows = stmt.all(...fileIds);
+    const grouped = {};
+    for (const row of rows) {
+        if (!grouped[row.file_id])
+            grouped[row.file_id] = [];
+        grouped[row.file_id].push({
+            name: row.name,
+            start_line: row.start_line,
+            end_line: row.end_line,
+            content: row.content,
+        });
+    }
+    return grouped;
+}

package/dist/db/functionExtractors/extractFromJs.js CHANGED Viewed

@@ -32,7 +32,7 @@ export async function extractFromJS(filePath, content, fileId) {
                 name,
                 start_line: node.loc?.start.line ?? -1,
                 end_line: node.loc?.end.line ?? -1,
-                body: content.slice(node.start, node.end),
+                content: content.slice(node.start, node.end),
             });
         },
         FunctionExpression(node, ancestors) {
@@ -42,7 +42,7 @@ export async function extractFromJS(filePath, content, fileId) {
                 name,
                 start_line: node.loc?.start.line ?? -1,
                 end_line: node.loc?.end.line ?? -1,
-                body: content.slice(node.start, node.end),
+                content: content.slice(node.start, node.end),
             });
         },
         ArrowFunctionExpression(node, ancestors) {
@@ -52,7 +52,7 @@ export async function extractFromJS(filePath, content, fileId) {
                 name,
                 start_line: node.loc?.start.line ?? -1,
                 end_line: node.loc?.end.line ?? -1,
-                body: content.slice(node.start, node.end),
+                content: content.slice(node.start, node.end),
             });
         },
     });
@@ -62,7 +62,7 @@ export async function extractFromJS(filePath, content, fileId) {
     }
     log(`🔍 Found ${functions.length} functions in ${filePath}`);
     for (const fn of functions) {
-        const embedding = await generateEmbedding(fn.body);
+        const embedding = await generateEmbedding(fn.content);
         const result = db.prepare(`
       INSERT INTO functions (
         file_id, name, start_line, end_line, content, embedding, lang
@@ -74,12 +74,12 @@ export async function extractFromJS(filePath, content, fileId) {
             name: fn.name,
             start_line: fn.start_line,
             end_line: fn.end_line,
-            content: fn.body,
+            content: fn.content,
             embedding: JSON.stringify(embedding),
             lang: 'js'
         });
         const callerId = result.lastInsertRowid;
-        const fnAst = parse(fn.body, {
+        const fnAst = parse(fn.content, {
             ecmaVersion: 'latest',
             sourceType: 'module',
             locations: true,

package/dist/db/schema.js CHANGED Viewed

@@ -46,6 +46,8 @@ export function initSchema() {
       lang TEXT
     );
+    CREATE INDEX IF NOT EXISTS idx_file_id ON functions(file_id);
     CREATE TABLE IF NOT EXISTS function_calls (
       caller_id INTEGER REFERENCES functions(id),
       callee_name TEXT

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "scai",
-  "version": "0.1.44",
+  "version": "0.1.46",
   "type": "module",
   "bin": {
     "scai": "./dist/index.js"
@@ -30,7 +30,8 @@
     "better-sqlite3": "^12.1.1",
     "commander": "^11.0.0",
     "fast-glob": "^3.3.3",
-    "proper-lockfile": "^4.1.2"
+    "proper-lockfile": "^4.1.2",
+    "string-similarity-js": "^2.1.4"
   },
   "devDependencies": {
     "@types/better-sqlite3": "^7.6.13",