scai 0.1.44 → 0.1.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
3
  import readline from 'readline';
4
- import { searchFiles, queryFiles } from '../db/fileIndex.js';
4
+ import { searchFiles, queryFiles, getFunctionsForFiles } from '../db/fileIndex.js';
5
5
  import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
6
6
  import { generate } from '../lib/generate.js';
7
7
  import { buildContextualPrompt } from '../utils/buildContextualPrompt.js';
@@ -10,7 +10,6 @@ import { log } from '../utils/log.js';
10
10
  import { PROMPT_LOG_PATH, SCAI_HOME, INDEX_DIR } from '../constants.js';
11
11
  const MAX_RELATED_FILES = 5;
12
12
  export async function runAskCommand(query) {
13
- // 🧠 Prompt the user if no query is passed
14
13
  if (!query) {
15
14
  query = await promptOnce('🧠 Ask your question:\n> ');
16
15
  }
@@ -21,26 +20,24 @@ export async function runAskCommand(query) {
21
20
  }
22
21
  console.log(`šŸ“ Using index root: ${INDEX_DIR}`);
23
22
  console.log(`šŸ” Searching for: "${query}"\n`);
24
- // 🧠 Step 1: Semantic + fallback search
23
+ // 🟩 STEP 1: Semantic Search
25
24
  const start = Date.now();
26
- const semanticResults = await searchFiles(query, MAX_RELATED_FILES);
25
+ const semanticResults = await searchFiles(query, MAX_RELATED_FILES); // RankedFile[]
27
26
  const duration = Date.now() - start;
28
27
  console.log(`ā±ļø searchFiles took ${duration}ms and returned ${semanticResults.length} result(s)`);
29
- // šŸ” Log raw semantic results
30
- console.log('šŸ” Raw semantic search results:');
31
28
  semanticResults.forEach((file, i) => {
32
29
  console.log(` ${i + 1}. šŸ“„ Path: ${file.path} | Score: ${file.score?.toFixed(3) ?? 'n/a'}`);
33
30
  });
31
+ // 🟩 STEP 1.5: Fallback FTS search
34
32
  const safeQuery = sanitizeQueryForFts(query);
35
- const fallbackResults = queryFiles(safeQuery, 10);
36
- // šŸ” Log raw keyword fallback results
37
- console.log('\nšŸ” Raw fallback keyword (FTS) search results:');
33
+ const fallbackResults = queryFiles(safeQuery, 10); // FileRow[]
38
34
  fallbackResults.forEach((file, i) => {
39
- console.log(` ${i + 1}. šŸ“„ Path: ${file.path}`);
35
+ console.log(` ${i + 1}. šŸ”Ž Fallback Match: ${file.path}`);
40
36
  });
41
- // 🧠 Step 2: Merge results
37
+ // 🟩 STEP 2: Merge results (de-duplicate by full resolved path)
42
38
  const seen = new Set();
43
39
  const combinedResults = [];
40
+ // Merging results now ensures all have `id`
44
41
  for (const file of semanticResults) {
45
42
  const resolved = path.resolve(file.path);
46
43
  seen.add(resolved);
@@ -51,43 +48,71 @@ export async function runAskCommand(query) {
51
48
  if (!seen.has(resolved)) {
52
49
  seen.add(resolved);
53
50
  combinedResults.push({
51
+ id: file.id, // Ensure the id is included here
54
52
  path: file.path,
55
- summary: file.summary,
53
+ summary: file.summary || '',
56
54
  score: 0.0,
55
+ sim: 0,
56
+ bm25: 0
57
57
  });
58
58
  }
59
59
  }
60
- if (combinedResults.length > 0) {
61
- console.log('šŸ“Š Closest files based on semantic + keyword match:');
62
- combinedResults.forEach((file, i) => {
63
- console.log(` ${i + 1}. šŸ“„ Path: ${file.path} | Score: ${file.score?.toFixed(3) ?? 'fallback'}`);
60
+ // 🟩 STEP 3: Log results
61
+ if (combinedResults.length) {
62
+ console.log('\nšŸ“Š Final Related Files:');
63
+ combinedResults.forEach((f, i) => {
64
+ console.log(` ${i + 1}. ${f.path} (${f.score?.toFixed(3) ?? 'fallback'})`);
64
65
  });
65
66
  }
66
67
  else {
67
- console.log('āš ļø No similar files found. Asking the model using question only...');
68
+ console.log('āš ļø No similar files found. Using query only.');
68
69
  }
69
- // 🧠 Step 3: Build metadata for prompt
70
- const relatedFiles = combinedResults.slice(0, MAX_RELATED_FILES).map((file, index) => ({
71
- path: file.path,
72
- summary: file.summary || '(No summary available)', // Ensure summary is included
73
- }));
74
- // Get the top-ranked file (the first one in the sorted results)
75
- const topRankedFile = combinedResults[0]; // The most relevant file
70
+ // 🟩 STEP 4: Load top file code + metadata
71
+ const topFile = combinedResults[0];
72
+ const filepath = topFile?.path || '';
73
+ let code = '';
74
+ let topSummary = topFile.summary || '(No summary available)';
75
+ let topFunctions = [];
76
+ // Gather all file IDs from the combined results
77
+ const allFileIds = combinedResults
78
+ .map(file => file.id) // Now file.id exists
79
+ .filter((id) => typeof id === 'number');
80
+ const allFunctionsMap = getFunctionsForFiles(allFileIds);
81
+ try {
82
+ code = fs.readFileSync(filepath, 'utf-8');
83
+ const topFileId = topFile.id;
84
+ topFunctions = allFunctionsMap[topFileId]?.map(fn => fn.name) || [];
85
+ }
86
+ catch (err) {
87
+ console.warn(`āš ļø Failed to read or analyze top file (${filepath}):`, err);
88
+ }
89
+ // 🟩 STEP 5: Build relatedFiles with functions
90
+ const relatedFiles = combinedResults.slice(0, MAX_RELATED_FILES).map(file => {
91
+ const fileId = file.id;
92
+ return {
93
+ path: file.path,
94
+ summary: file.summary || '(No summary available)',
95
+ functions: allFunctionsMap[fileId]?.map(fn => fn.name) || [],
96
+ };
97
+ });
98
+ // 🟩 STEP 6: Generate file tree
76
99
  let fileTree = '';
77
100
  try {
78
- fileTree = generateFileTree(INDEX_DIR, 2); // Limit depth
101
+ fileTree = generateFileTree(INDEX_DIR, 2);
79
102
  }
80
103
  catch (e) {
81
- console.warn('āš ļø Failed to generate file tree:', e);
104
+ console.warn('āš ļø Could not generate file tree:', e);
82
105
  }
83
- // Now we can build the prompt with summaries included for each file
106
+ // 🟩 STEP 7: Build prompt
84
107
  const promptContent = buildContextualPrompt({
85
108
  baseInstruction: query,
86
- code: '', // No specific code selected
87
- relatedFiles, // This now includes both path and summary for each file
109
+ code,
110
+ summary: topSummary,
111
+ functions: topFunctions,
112
+ relatedFiles,
88
113
  projectFileTree: fileTree || undefined,
89
114
  });
90
- // 🧠 Step 4: Log prompt to file
115
+ // 🟩 STEP 8: Save prompt for trace/debug
91
116
  try {
92
117
  if (!fs.existsSync(SCAI_HOME))
93
118
  fs.mkdirSync(SCAI_HOME, { recursive: true });
@@ -97,26 +122,21 @@ export async function runAskCommand(query) {
97
122
  catch (err) {
98
123
  log('āŒ Failed to write prompt log:', err);
99
124
  }
100
- // 🧠 Step 5: Call the model
125
+ // 🟩 STEP 9: Ask model
101
126
  try {
102
- console.log('šŸ¤– Asking the model...');
103
- // Create a more structured PromptInput object
127
+ console.log('\nšŸ¤– Asking the model...');
104
128
  const input = {
105
- content: query, // Main instruction (the query)
106
- filepath: topRankedFile?.path || '', // Include the path of the top-ranked file
107
- metadata: {
108
- summary: topRankedFile?.summary || '', // Add summary of the top-ranked file
109
- relatedFiles: relatedFiles, // Pass related files as part of metadata
110
- },
111
- projectFileTree: fileTree || '' // Include file structure in metadata
129
+ content: promptContent,
130
+ filepath,
112
131
  };
113
132
  const modelResponse = await generate(input, 'llama3');
114
- console.log(`\nšŸ“ Model response:\n${modelResponse.content}`);
133
+ console.log(`\n🧠 Model Response:\n${modelResponse.content}`);
115
134
  }
116
135
  catch (err) {
117
136
  console.error('āŒ Model request failed:', err);
118
137
  }
119
138
  }
139
+ // 🟩 Helper: Prompt once
120
140
  function promptOnce(promptText) {
121
141
  return new Promise(resolve => {
122
142
  const rl = readline.createInterface({
@@ -5,18 +5,17 @@ import { generateEmbedding } from '../lib/generateEmbedding.js';
5
5
  import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
6
6
  import * as sqlTemplates from './sqlTemplates.js';
7
7
  /**
8
- * Index a file into the local SQLite database.
8
+ * šŸ“„ Index a single file into the database.
9
9
  *
10
- * - Normalizes the file path for cross-platform compatibility.
11
- * - Extracts file metadata (last modified time).
12
- * - Performs an UPSERT into the `files` table with the latest summary/type/timestamp.
10
+ * - Normalizes the file path to prevent OS-specific path bugs.
11
+ * - Stores metadata like summary, type, lastModified, etc.
12
+ * - Sets `embedding` to null initially — computed later.
13
13
  */
14
14
  export function indexFile(filePath, summary, type) {
15
15
  const stats = fs.statSync(filePath);
16
16
  const lastModified = stats.mtime.toISOString();
17
17
  const indexedAt = new Date().toISOString();
18
18
  const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
19
- // Index the file metadata
20
19
  db.prepare(sqlTemplates.upsertFileTemplate).run({
21
20
  path: normalizedPath,
22
21
  summary,
@@ -28,7 +27,13 @@ export function indexFile(filePath, summary, type) {
28
27
  console.log(`šŸ“„ Indexed: ${normalizedPath}`);
29
28
  }
30
29
  /**
31
- * Perform a raw keyword-based full-text search using the FTS5 index.
30
+ * šŸ” FTS5 keyword-based search using SQLite's full-text index.
31
+ *
32
+ * Use this when you want:
33
+ * - A simple keyword search.
34
+ * - Fast fallback search that doesn't rely on embeddings.
35
+ *
36
+ * Returns a full `FileRow[]` (all known metadata, but no similarity score).
32
37
  */
33
38
  export function queryFiles(safeQuery, limit = 10) {
34
39
  console.log(`Executing search query: ${safeQuery}`);
@@ -42,7 +47,16 @@ export function queryFiles(safeQuery, limit = 10) {
42
47
  return results;
43
48
  }
44
49
  /**
45
- * Perform a hybrid semantic + keyword-based search.
50
+ * 🧠 Combined semantic + FTS search (Hybrid).
51
+ *
52
+ * 1. Convert user query to embedding vector using OpenAI / Ollama etc.
53
+ * 2. Perform a keyword-based FTS match to limit candidates.
54
+ * 3. For each candidate, compute cosine similarity between query + file embedding.
55
+ * 4. Blend the BM25 score and cosine similarity to produce a final score.
56
+ * 5. Return top K most relevant results.
57
+ *
58
+ * āš ļø This returns a **lighter-weight type** than `FileRow` — doesn't include id, timestamps, etc.
59
+ * Use a wrapper type like `SearchResultWithScore` in calling code if you need both.
46
60
  */
47
61
  export async function searchFiles(query, topK = 5) {
48
62
  console.log(`🧠 Searching for query: "${query}"`);
@@ -53,6 +67,7 @@ export async function searchFiles(query, topK = 5) {
53
67
  }
54
68
  const safeQuery = sanitizeQueryForFts(query);
55
69
  console.log(`Executing search query in FTS5: ${safeQuery}`);
70
+ // Step 1: Narrow candidate set using fast keyword match
56
71
  const ftsResults = db.prepare(`
57
72
  SELECT fts.rowid AS id, f.path, f.summary, f.type, bm25(files_fts) AS bm25Score, f.embedding
58
73
  FROM files f
@@ -65,6 +80,7 @@ export async function searchFiles(query, topK = 5) {
65
80
  if (ftsResults.length === 0) {
66
81
  return [];
67
82
  }
83
+ // Step 2: Compute score based on embedding similarity + BM25 score
68
84
  const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
69
85
  const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
70
86
  const scored = ftsResults.map(result => {
@@ -79,13 +95,15 @@ export async function searchFiles(query, topK = 5) {
79
95
  }
80
96
  catch (err) {
81
97
  console.error(`āŒ Failed to parse embedding for ${result.path}:`, err);
82
- finalScore = 0; // fallback
98
+ finalScore = 0;
83
99
  }
84
100
  }
85
101
  else {
102
+ // Fallback to BM25-only score
86
103
  finalScore = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
87
104
  }
88
105
  return {
106
+ id: result.id, // Ensure the id is included here
89
107
  path: result.path,
90
108
  summary: result.summary,
91
109
  score: finalScore,
@@ -99,9 +117,41 @@ export async function searchFiles(query, topK = 5) {
99
117
  console.log(`Returning top ${topK} results based on combined score`);
100
118
  return sorted;
101
119
  }
120
+ /**
121
+ * šŸ”¢ Cosine similarity between two vectors.
122
+ */
102
123
  function cosineSimilarity(a, b) {
103
124
  const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
104
125
  const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
105
126
  const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
106
127
  return dot / (magA * magB);
107
128
  }
129
+ /**
130
+ * 🧠 Get all extracted functions for a given list of file IDs.
131
+ *
132
+ * - Used in `askCmd.ts` to fetch code snippets from relevant files.
133
+ * - Grouped by file_id.
134
+ */
135
+ export function getFunctionsForFiles(fileIds) {
136
+ if (!fileIds.length)
137
+ return {};
138
+ const placeholders = fileIds.map(() => '?').join(',');
139
+ const stmt = db.prepare(`
140
+ SELECT f.file_id, f.name, f.start_line, f.end_line, f.content
141
+ FROM functions f
142
+ WHERE f.file_id IN (${placeholders})
143
+ `);
144
+ const rows = stmt.all(...fileIds);
145
+ const grouped = {};
146
+ for (const row of rows) {
147
+ if (!grouped[row.file_id])
148
+ grouped[row.file_id] = [];
149
+ grouped[row.file_id].push({
150
+ name: row.name,
151
+ start_line: row.start_line,
152
+ end_line: row.end_line,
153
+ content: row.content,
154
+ });
155
+ }
156
+ return grouped;
157
+ }
@@ -32,7 +32,7 @@ export async function extractFromJS(filePath, content, fileId) {
32
32
  name,
33
33
  start_line: node.loc?.start.line ?? -1,
34
34
  end_line: node.loc?.end.line ?? -1,
35
- body: content.slice(node.start, node.end),
35
+ content: content.slice(node.start, node.end),
36
36
  });
37
37
  },
38
38
  FunctionExpression(node, ancestors) {
@@ -42,7 +42,7 @@ export async function extractFromJS(filePath, content, fileId) {
42
42
  name,
43
43
  start_line: node.loc?.start.line ?? -1,
44
44
  end_line: node.loc?.end.line ?? -1,
45
- body: content.slice(node.start, node.end),
45
+ content: content.slice(node.start, node.end),
46
46
  });
47
47
  },
48
48
  ArrowFunctionExpression(node, ancestors) {
@@ -52,7 +52,7 @@ export async function extractFromJS(filePath, content, fileId) {
52
52
  name,
53
53
  start_line: node.loc?.start.line ?? -1,
54
54
  end_line: node.loc?.end.line ?? -1,
55
- body: content.slice(node.start, node.end),
55
+ content: content.slice(node.start, node.end),
56
56
  });
57
57
  },
58
58
  });
@@ -62,7 +62,7 @@ export async function extractFromJS(filePath, content, fileId) {
62
62
  }
63
63
  log(`šŸ” Found ${functions.length} functions in ${filePath}`);
64
64
  for (const fn of functions) {
65
- const embedding = await generateEmbedding(fn.body);
65
+ const embedding = await generateEmbedding(fn.content);
66
66
  const result = db.prepare(`
67
67
  INSERT INTO functions (
68
68
  file_id, name, start_line, end_line, content, embedding, lang
@@ -74,12 +74,12 @@ export async function extractFromJS(filePath, content, fileId) {
74
74
  name: fn.name,
75
75
  start_line: fn.start_line,
76
76
  end_line: fn.end_line,
77
- content: fn.body,
77
+ content: fn.content,
78
78
  embedding: JSON.stringify(embedding),
79
79
  lang: 'js'
80
80
  });
81
81
  const callerId = result.lastInsertRowid;
82
- const fnAst = parse(fn.body, {
82
+ const fnAst = parse(fn.content, {
83
83
  ecmaVersion: 'latest',
84
84
  sourceType: 'module',
85
85
  locations: true,
package/dist/db/schema.js CHANGED
@@ -46,6 +46,8 @@ export function initSchema() {
46
46
  lang TEXT
47
47
  );
48
48
 
49
+ CREATE INDEX IF NOT EXISTS idx_file_id ON functions(file_id);
50
+
49
51
  CREATE TABLE IF NOT EXISTS function_calls (
50
52
  caller_id INTEGER REFERENCES functions(id),
51
53
  callee_name TEXT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scai",
3
- "version": "0.1.44",
3
+ "version": "0.1.45",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "scai": "./dist/index.js"