scai 0.1.44 → 0.1.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,16 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
3
  import readline from 'readline';
4
- import { searchFiles, queryFiles } from '../db/fileIndex.js';
4
+ import { searchFiles, queryFiles, getFunctionsForFiles } from '../db/fileIndex.js';
5
5
  import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
6
6
  import { generate } from '../lib/generate.js';
7
7
  import { buildContextualPrompt } from '../utils/buildContextualPrompt.js';
8
8
  import { generateFileTree } from '../utils/fileTree.js';
9
9
  import { log } from '../utils/log.js';
10
10
  import { PROMPT_LOG_PATH, SCAI_HOME, INDEX_DIR } from '../constants.js';
11
- const MAX_RELATED_FILES = 5;
11
+ const MAX_RELATED_FILES = 3;
12
+ const MAX_SUMMARY_LINES = 12;
12
13
  export async function runAskCommand(query) {
13
- // 🧠 Prompt the user if no query is passed
14
14
  if (!query) {
15
15
  query = await promptOnce('🧠 Ask your question:\n> ');
16
16
  }
@@ -21,24 +21,21 @@ export async function runAskCommand(query) {
21
21
  }
22
22
  console.log(`šŸ“ Using index root: ${INDEX_DIR}`);
23
23
  console.log(`šŸ” Searching for: "${query}"\n`);
24
- // 🧠 Step 1: Semantic + fallback search
24
+ // 🟩 STEP 1: Semantic Search
25
25
  const start = Date.now();
26
- const semanticResults = await searchFiles(query, MAX_RELATED_FILES);
26
+ const semanticResults = await searchFiles(query, MAX_RELATED_FILES); // RankedFile[]
27
27
  const duration = Date.now() - start;
28
28
  console.log(`ā±ļø searchFiles took ${duration}ms and returned ${semanticResults.length} result(s)`);
29
- // šŸ” Log raw semantic results
30
- console.log('šŸ” Raw semantic search results:');
31
29
  semanticResults.forEach((file, i) => {
32
30
  console.log(` ${i + 1}. šŸ“„ Path: ${file.path} | Score: ${file.score?.toFixed(3) ?? 'n/a'}`);
33
31
  });
32
+ // 🟩 STEP 1.5: Fallback FTS search
34
33
  const safeQuery = sanitizeQueryForFts(query);
35
- const fallbackResults = queryFiles(safeQuery, 10);
36
- // šŸ” Log raw keyword fallback results
37
- console.log('\nšŸ” Raw fallback keyword (FTS) search results:');
34
+ const fallbackResults = queryFiles(safeQuery, 10); // FileRow[]
38
35
  fallbackResults.forEach((file, i) => {
39
- console.log(` ${i + 1}. šŸ“„ Path: ${file.path}`);
36
+ console.log(` ${i + 1}. šŸ”Ž Fallback Match: ${file.path}`);
40
37
  });
41
- // 🧠 Step 2: Merge results
38
+ // 🟩 STEP 2: Merge results (de-duplicate by full resolved path)
42
39
  const seen = new Set();
43
40
  const combinedResults = [];
44
41
  for (const file of semanticResults) {
@@ -51,43 +48,78 @@ export async function runAskCommand(query) {
51
48
  if (!seen.has(resolved)) {
52
49
  seen.add(resolved);
53
50
  combinedResults.push({
51
+ id: file.id,
54
52
  path: file.path,
55
- summary: file.summary,
53
+ summary: file.summary || '',
56
54
  score: 0.0,
55
+ sim: 0,
56
+ bm25: 0
57
57
  });
58
58
  }
59
59
  }
60
- if (combinedResults.length > 0) {
61
- console.log('šŸ“Š Closest files based on semantic + keyword match:');
62
- combinedResults.forEach((file, i) => {
63
- console.log(` ${i + 1}. šŸ“„ Path: ${file.path} | Score: ${file.score?.toFixed(3) ?? 'fallback'}`);
60
+ // 🟩 STEP 3: Log results
61
+ if (combinedResults.length) {
62
+ console.log('\nšŸ“Š Final Related Files:');
63
+ combinedResults.forEach((f, i) => {
64
+ console.log(` ${i + 1}. ${f.path} (${f.score?.toFixed(3) ?? 'fallback'})`);
64
65
  });
65
66
  }
66
67
  else {
67
- console.log('āš ļø No similar files found. Asking the model using question only...');
68
+ console.log('āš ļø No similar files found. Using query only.');
68
69
  }
69
- // 🧠 Step 3: Build metadata for prompt
70
- const relatedFiles = combinedResults.slice(0, MAX_RELATED_FILES).map((file, index) => ({
71
- path: file.path,
72
- summary: file.summary || '(No summary available)', // Ensure summary is included
73
- }));
74
- // Get the top-ranked file (the first one in the sorted results)
75
- const topRankedFile = combinedResults[0]; // The most relevant file
70
+ // 🟩 STEP 4: Load top file code + metadata
71
+ const topFile = combinedResults[0];
72
+ const filepath = topFile?.path || '';
73
+ let code = '';
74
+ let topSummary = topFile.summary || '(No summary available)';
75
+ let topFunctions = [];
76
+ // Truncate summary if needed
77
+ if (topSummary) {
78
+ topSummary = topSummary.split('\n').slice(0, MAX_SUMMARY_LINES).join('\n');
79
+ }
80
+ const allFileIds = combinedResults
81
+ .map(file => file.id)
82
+ .filter((id) => typeof id === 'number');
83
+ const allFunctionsMap = getFunctionsForFiles(allFileIds);
84
+ try {
85
+ code = fs.readFileSync(filepath, 'utf-8');
86
+ const topFileId = topFile.id;
87
+ topFunctions = allFunctionsMap[topFileId]?.map(fn => fn.name) || [];
88
+ }
89
+ catch (err) {
90
+ console.warn(`āš ļø Failed to read or analyze top file (${filepath}):`, err);
91
+ }
92
+ // 🟩 STEP 5: Build relatedFiles with functions
93
+ const relatedFiles = combinedResults.slice(0, MAX_RELATED_FILES).map(file => {
94
+ const fileId = file.id;
95
+ let summary = file.summary || '(No summary available)';
96
+ if (summary) {
97
+ summary = summary.split('\n').slice(0, MAX_SUMMARY_LINES).join('\n');
98
+ }
99
+ return {
100
+ path: file.path,
101
+ summary,
102
+ functions: allFunctionsMap[fileId]?.map(fn => fn.name) || [],
103
+ };
104
+ });
105
+ // 🟩 STEP 6: Generate file tree
76
106
  let fileTree = '';
77
107
  try {
78
- fileTree = generateFileTree(INDEX_DIR, 2); // Limit depth
108
+ fileTree = generateFileTree(INDEX_DIR, 2);
79
109
  }
80
110
  catch (e) {
81
- console.warn('āš ļø Failed to generate file tree:', e);
111
+ console.warn('āš ļø Could not generate file tree:', e);
82
112
  }
83
- // Now we can build the prompt with summaries included for each file
113
+ // 🟩 STEP 7: Build prompt
84
114
  const promptContent = buildContextualPrompt({
85
115
  baseInstruction: query,
86
- code: '', // No specific code selected
87
- relatedFiles, // This now includes both path and summary for each file
116
+ code,
117
+ summary: topSummary,
118
+ functions: topFunctions,
119
+ relatedFiles,
88
120
  projectFileTree: fileTree || undefined,
89
121
  });
90
- // 🧠 Step 4: Log prompt to file
122
+ // 🟩 STEP 8: Save prompt for trace/debug
91
123
  try {
92
124
  if (!fs.existsSync(SCAI_HOME))
93
125
  fs.mkdirSync(SCAI_HOME, { recursive: true });
@@ -97,26 +129,21 @@ export async function runAskCommand(query) {
97
129
  catch (err) {
98
130
  log('āŒ Failed to write prompt log:', err);
99
131
  }
100
- // 🧠 Step 5: Call the model
132
+ // 🟩 STEP 9: Ask model
101
133
  try {
102
- console.log('šŸ¤– Asking the model...');
103
- // Create a more structured PromptInput object
134
+ console.log('\nšŸ¤– Asking the model...');
104
135
  const input = {
105
- content: query, // Main instruction (the query)
106
- filepath: topRankedFile?.path || '', // Include the path of the top-ranked file
107
- metadata: {
108
- summary: topRankedFile?.summary || '', // Add summary of the top-ranked file
109
- relatedFiles: relatedFiles, // Pass related files as part of metadata
110
- },
111
- projectFileTree: fileTree || '' // Include file structure in metadata
136
+ content: promptContent,
137
+ filepath,
112
138
  };
113
139
  const modelResponse = await generate(input, 'llama3');
114
- console.log(`\nšŸ“ Model response:\n${modelResponse.content}`);
140
+ console.log(`\n🧠 Model Response:\n${modelResponse.content}`);
115
141
  }
116
142
  catch (err) {
117
143
  console.error('āŒ Model request failed:', err);
118
144
  }
119
145
  }
146
+ // 🟩 Helper: Prompt once
120
147
  function promptOnce(promptText) {
121
148
  return new Promise(resolve => {
122
149
  const rl = readline.createInterface({
@@ -4,19 +4,19 @@ import path from 'path';
4
4
  import { generateEmbedding } from '../lib/generateEmbedding.js';
5
5
  import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
6
6
  import * as sqlTemplates from './sqlTemplates.js';
7
+ import { stringSimilarity } from 'string-similarity-js';
7
8
  /**
8
- * Index a file into the local SQLite database.
9
+ * šŸ“„ Index a single file into the database.
9
10
  *
10
- * - Normalizes the file path for cross-platform compatibility.
11
- * - Extracts file metadata (last modified time).
12
- * - Performs an UPSERT into the `files` table with the latest summary/type/timestamp.
11
+ * - Normalizes the file path to prevent OS-specific path bugs.
12
+ * - Stores metadata like summary, type, lastModified, etc.
13
+ * - Sets `embedding` to null initially — computed later.
13
14
  */
14
15
  export function indexFile(filePath, summary, type) {
15
16
  const stats = fs.statSync(filePath);
16
17
  const lastModified = stats.mtime.toISOString();
17
18
  const indexedAt = new Date().toISOString();
18
19
  const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
19
- // Index the file metadata
20
20
  db.prepare(sqlTemplates.upsertFileTemplate).run({
21
21
  path: normalizedPath,
22
22
  summary,
@@ -28,7 +28,13 @@ export function indexFile(filePath, summary, type) {
28
28
  console.log(`šŸ“„ Indexed: ${normalizedPath}`);
29
29
  }
30
30
  /**
31
- * Perform a raw keyword-based full-text search using the FTS5 index.
31
+ * šŸ” FTS5 keyword-based search using SQLite's full-text index.
32
+ *
33
+ * Use this when you want:
34
+ * - A simple keyword search.
35
+ * - Fast fallback search that doesn't rely on embeddings.
36
+ *
37
+ * Returns a full `FileRow[]` (all known metadata, but no similarity score).
32
38
  */
33
39
  export function queryFiles(safeQuery, limit = 10) {
34
40
  console.log(`Executing search query: ${safeQuery}`);
@@ -42,7 +48,16 @@ export function queryFiles(safeQuery, limit = 10) {
42
48
  return results;
43
49
  }
44
50
  /**
45
- * Perform a hybrid semantic + keyword-based search.
51
+ * 🧠 Combined semantic + FTS search (Hybrid).
52
+ *
53
+ * 1. Convert user query to embedding vector using OpenAI / Ollama etc.
54
+ * 2. Perform a keyword-based FTS match to limit candidates.
55
+ * 3. For each candidate, compute cosine similarity between query + file embedding.
56
+ * 4. Blend the BM25 score and cosine similarity to produce a final score.
57
+ * 5. Return top K most relevant results.
58
+ *
59
+ * āš ļø This returns a **lighter-weight type** than `FileRow` — doesn't include id, timestamps, etc.
60
+ * Use a wrapper type like `SearchResultWithScore` in calling code if you need both.
46
61
  */
47
62
  export async function searchFiles(query, topK = 5) {
48
63
  console.log(`🧠 Searching for query: "${query}"`);
@@ -68,24 +83,36 @@ export async function searchFiles(query, topK = 5) {
68
83
  const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
69
84
  const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
70
85
  const scored = ftsResults.map(result => {
71
- let finalScore = 0;
72
86
  let sim = 0;
87
+ let finalScore = 0;
88
+ const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
73
89
  if (result.embedding) {
74
90
  try {
75
91
  const vector = JSON.parse(result.embedding);
76
92
  sim = cosineSimilarity(embedding, vector);
77
- const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
78
- finalScore = 0.7 * sim + 0.3 * normalizedBm25;
79
93
  }
80
94
  catch (err) {
81
95
  console.error(`āŒ Failed to parse embedding for ${result.path}:`, err);
82
- finalScore = 0; // fallback
83
96
  }
84
97
  }
85
- else {
86
- finalScore = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
98
+ const terms = query.toLowerCase().split(/\s+/);
99
+ const path = result.path.toLowerCase();
100
+ const summary = (result.summary || '').toLowerCase();
101
+ let termMatches = 0;
102
+ for (const term of terms) {
103
+ if (path.includes(term) || summary.includes(term)) {
104
+ termMatches += 1;
105
+ }
87
106
  }
107
+ const matchRatio = termMatches / terms.length;
108
+ const termBoost = matchRatio >= 1 ? 1.0 : matchRatio >= 0.5 ? 0.5 : 0;
109
+ // 🧠 Final score with hybrid weighting (BM25 + Embedding + Term Boost)
110
+ finalScore = 0.4 * normalizedBm25 + 0.4 * sim + 0.2 * termBoost;
111
+ // āœ… Fuzzy score using string-similarity-js
112
+ const fuzzyScore = stringSimilarity(query.toLowerCase(), `${path} ${summary}`);
113
+ finalScore += fuzzyScore * 10;
88
114
  return {
115
+ id: result.id,
89
116
  path: result.path,
90
117
  summary: result.summary,
91
118
  score: finalScore,
@@ -99,9 +126,41 @@ export async function searchFiles(query, topK = 5) {
99
126
  console.log(`Returning top ${topK} results based on combined score`);
100
127
  return sorted;
101
128
  }
129
+ /**
130
+ * šŸ”¢ Cosine similarity between two vectors.
131
+ */
102
132
  function cosineSimilarity(a, b) {
103
133
  const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
104
134
  const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
105
135
  const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
106
136
  return dot / (magA * magB);
107
137
  }
138
+ /**
139
+ * 🧠 Get all extracted functions for a given list of file IDs.
140
+ *
141
+ * - Used in `askCmd.ts` to fetch code snippets from relevant files.
142
+ * - Grouped by file_id.
143
+ */
144
+ export function getFunctionsForFiles(fileIds) {
145
+ if (!fileIds.length)
146
+ return {};
147
+ const placeholders = fileIds.map(() => '?').join(',');
148
+ const stmt = db.prepare(`
149
+ SELECT f.file_id, f.name, f.start_line, f.end_line, f.content
150
+ FROM functions f
151
+ WHERE f.file_id IN (${placeholders})
152
+ `);
153
+ const rows = stmt.all(...fileIds);
154
+ const grouped = {};
155
+ for (const row of rows) {
156
+ if (!grouped[row.file_id])
157
+ grouped[row.file_id] = [];
158
+ grouped[row.file_id].push({
159
+ name: row.name,
160
+ start_line: row.start_line,
161
+ end_line: row.end_line,
162
+ content: row.content,
163
+ });
164
+ }
165
+ return grouped;
166
+ }
@@ -32,7 +32,7 @@ export async function extractFromJS(filePath, content, fileId) {
32
32
  name,
33
33
  start_line: node.loc?.start.line ?? -1,
34
34
  end_line: node.loc?.end.line ?? -1,
35
- body: content.slice(node.start, node.end),
35
+ content: content.slice(node.start, node.end),
36
36
  });
37
37
  },
38
38
  FunctionExpression(node, ancestors) {
@@ -42,7 +42,7 @@ export async function extractFromJS(filePath, content, fileId) {
42
42
  name,
43
43
  start_line: node.loc?.start.line ?? -1,
44
44
  end_line: node.loc?.end.line ?? -1,
45
- body: content.slice(node.start, node.end),
45
+ content: content.slice(node.start, node.end),
46
46
  });
47
47
  },
48
48
  ArrowFunctionExpression(node, ancestors) {
@@ -52,7 +52,7 @@ export async function extractFromJS(filePath, content, fileId) {
52
52
  name,
53
53
  start_line: node.loc?.start.line ?? -1,
54
54
  end_line: node.loc?.end.line ?? -1,
55
- body: content.slice(node.start, node.end),
55
+ content: content.slice(node.start, node.end),
56
56
  });
57
57
  },
58
58
  });
@@ -62,7 +62,7 @@ export async function extractFromJS(filePath, content, fileId) {
62
62
  }
63
63
  log(`šŸ” Found ${functions.length} functions in ${filePath}`);
64
64
  for (const fn of functions) {
65
- const embedding = await generateEmbedding(fn.body);
65
+ const embedding = await generateEmbedding(fn.content);
66
66
  const result = db.prepare(`
67
67
  INSERT INTO functions (
68
68
  file_id, name, start_line, end_line, content, embedding, lang
@@ -74,12 +74,12 @@ export async function extractFromJS(filePath, content, fileId) {
74
74
  name: fn.name,
75
75
  start_line: fn.start_line,
76
76
  end_line: fn.end_line,
77
- content: fn.body,
77
+ content: fn.content,
78
78
  embedding: JSON.stringify(embedding),
79
79
  lang: 'js'
80
80
  });
81
81
  const callerId = result.lastInsertRowid;
82
- const fnAst = parse(fn.body, {
82
+ const fnAst = parse(fn.content, {
83
83
  ecmaVersion: 'latest',
84
84
  sourceType: 'module',
85
85
  locations: true,
package/dist/db/schema.js CHANGED
@@ -46,6 +46,8 @@ export function initSchema() {
46
46
  lang TEXT
47
47
  );
48
48
 
49
+ CREATE INDEX IF NOT EXISTS idx_file_id ON functions(file_id);
50
+
49
51
  CREATE TABLE IF NOT EXISTS function_calls (
50
52
  caller_id INTEGER REFERENCES functions(id),
51
53
  callee_name TEXT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scai",
3
- "version": "0.1.44",
3
+ "version": "0.1.46",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "scai": "./dist/index.js"
@@ -30,7 +30,8 @@
30
30
  "better-sqlite3": "^12.1.1",
31
31
  "commander": "^11.0.0",
32
32
  "fast-glob": "^3.3.3",
33
- "proper-lockfile": "^4.1.2"
33
+ "proper-lockfile": "^4.1.2",
34
+ "string-similarity-js": "^2.1.4"
34
35
  },
35
36
  "devDependencies": {
36
37
  "@types/better-sqlite3": "^7.6.13",