scai 0.1.64 โ 0.1.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/commands/AskCmd.js +18 -11
- package/dist/db/fileIndex.js +11 -101
- package/dist/fileRules/scoreFiles.js +71 -0
- package/dist/utils/buildContextualPrompt.js +18 -5
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -350,7 +350,7 @@ You can run it in two ways:
|
|
|
350
350
|
1. **Inline question**
|
|
351
351
|
|
|
352
352
|
```bash
|
|
353
|
-
scai ask "How does
|
|
353
|
+
scai ask "How does the controller work?"
|
|
354
354
|
```
|
|
355
355
|
|
|
356
356
|
2. **Interactive prompt**
|
|
@@ -363,7 +363,7 @@ You can run it in two ways:
|
|
|
363
363
|
, then type your question when prompted:
|
|
364
364
|
|
|
365
365
|
```
|
|
366
|
-
> How does
|
|
366
|
+
> How does the controller work?
|
|
367
367
|
```
|
|
368
368
|
|
|
369
369
|
</br>
|
package/dist/commands/AskCmd.js
CHANGED
|
@@ -33,7 +33,7 @@ export async function runAskCommand(query) {
|
|
|
33
33
|
fallbackResults.forEach((file, i) => {
|
|
34
34
|
console.log(` ${i + 1}. ๐ Fallback Match: ${file.path}`);
|
|
35
35
|
});
|
|
36
|
-
// ๐ฉ STEP 2: Merge results
|
|
36
|
+
// ๐ฉ STEP 2: Merge results
|
|
37
37
|
const seen = new Set();
|
|
38
38
|
const combinedResults = [];
|
|
39
39
|
for (const file of semanticResults) {
|
|
@@ -71,33 +71,39 @@ export async function runAskCommand(query) {
|
|
|
71
71
|
let code = '';
|
|
72
72
|
let topSummary = topFile.summary || '(No summary available)';
|
|
73
73
|
let topFunctions = [];
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
}
|
|
74
|
+
const fileFunctions = {};
|
|
75
|
+
// Truncate summary
|
|
76
|
+
topSummary = topSummary.split('\n').slice(0, MAX_SUMMARY_LINES).join('\n');
|
|
78
77
|
const allFileIds = combinedResults
|
|
79
78
|
.map(file => file.id)
|
|
80
79
|
.filter((id) => typeof id === 'number');
|
|
81
|
-
const allFunctionsMap = getFunctionsForFiles(allFileIds);
|
|
80
|
+
const allFunctionsMap = getFunctionsForFiles(allFileIds); // Record<number, Function[]>
|
|
82
81
|
try {
|
|
83
82
|
code = fs.readFileSync(filepath, 'utf-8');
|
|
84
83
|
const topFileId = topFile.id;
|
|
85
|
-
topFunctions = allFunctionsMap[topFileId]?.map(fn =>
|
|
84
|
+
topFunctions = allFunctionsMap[topFileId]?.map(fn => ({
|
|
85
|
+
name: fn.name,
|
|
86
|
+
content: fn.content
|
|
87
|
+
})) || [];
|
|
86
88
|
}
|
|
87
89
|
catch (err) {
|
|
88
90
|
console.warn(`โ ๏ธ Failed to read or analyze top file (${filepath}):`, err);
|
|
89
91
|
}
|
|
90
|
-
// ๐ฉ STEP 5: Build relatedFiles with functions
|
|
92
|
+
// ๐ฉ STEP 5: Build relatedFiles with functions and fileFunctions
|
|
91
93
|
const relatedFiles = combinedResults.slice(0, RELATED_FILES_LIMIT).map(file => {
|
|
92
94
|
const fileId = file.id;
|
|
93
95
|
let summary = file.summary || '(No summary available)';
|
|
94
96
|
if (summary) {
|
|
95
97
|
summary = summary.split('\n').slice(0, MAX_SUMMARY_LINES).join('\n');
|
|
96
98
|
}
|
|
99
|
+
const functions = allFunctionsMap[fileId]?.map(fn => ({
|
|
100
|
+
name: fn.name,
|
|
101
|
+
content: fn.content || '(No content available)', // Assuming content is available
|
|
102
|
+
})) || [];
|
|
97
103
|
return {
|
|
98
104
|
path: file.path,
|
|
99
105
|
summary,
|
|
100
|
-
functions
|
|
106
|
+
functions,
|
|
101
107
|
};
|
|
102
108
|
});
|
|
103
109
|
// ๐ฉ STEP 6: Generate file tree
|
|
@@ -116,8 +122,9 @@ export async function runAskCommand(query) {
|
|
|
116
122
|
functions: topFunctions,
|
|
117
123
|
relatedFiles,
|
|
118
124
|
projectFileTree: fileTree || undefined,
|
|
125
|
+
fileFunctions,
|
|
119
126
|
});
|
|
120
|
-
// ๐ฉ STEP 8: Save prompt
|
|
127
|
+
// ๐ฉ STEP 8: Save prompt
|
|
121
128
|
try {
|
|
122
129
|
if (!fs.existsSync(SCAI_HOME))
|
|
123
130
|
fs.mkdirSync(SCAI_HOME, { recursive: true });
|
|
@@ -144,7 +151,7 @@ export async function runAskCommand(query) {
|
|
|
144
151
|
// ๐ฉ Helper: Prompt once
|
|
145
152
|
function promptOnce(promptText) {
|
|
146
153
|
return new Promise(resolve => {
|
|
147
|
-
console.log(promptText);
|
|
154
|
+
console.log(promptText);
|
|
148
155
|
const rl = readline.createInterface({
|
|
149
156
|
input: process.stdin,
|
|
150
157
|
output: process.stdout,
|
package/dist/db/fileIndex.js
CHANGED
|
@@ -3,77 +3,46 @@ import path from 'path';
|
|
|
3
3
|
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
4
4
|
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
|
|
5
5
|
import * as sqlTemplates from './sqlTemplates.js';
|
|
6
|
-
import { stringSimilarity } from 'string-similarity-js';
|
|
7
6
|
import { CANDIDATE_LIMIT } from '../constants.js';
|
|
8
7
|
import { getDbForRepo } from './client.js';
|
|
9
|
-
|
|
10
|
-
* ๐ Index a single file into the database.
|
|
11
|
-
*
|
|
12
|
-
* - Normalizes the file path to prevent OS-specific path bugs.
|
|
13
|
-
* - Stores metadata like summary, type, lastModified, etc.
|
|
14
|
-
* - Sets `embedding` to null initially โ computed later.
|
|
15
|
-
*/
|
|
8
|
+
import { scoreFiles } from '../fileRules/scoreFiles.js'; // ๐ NEW
|
|
16
9
|
export function indexFile(filePath, summary, type) {
|
|
17
10
|
const stats = fs.statSync(filePath);
|
|
18
11
|
const lastModified = stats.mtime.toISOString();
|
|
19
12
|
const indexedAt = new Date().toISOString();
|
|
20
13
|
const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
|
|
21
|
-
const fileName = path.basename(normalizedPath);
|
|
22
|
-
// Insert into files table
|
|
14
|
+
const fileName = path.basename(normalizedPath);
|
|
23
15
|
const db = getDbForRepo();
|
|
24
16
|
db.prepare(sqlTemplates.upsertFileTemplate).run({
|
|
25
17
|
path: normalizedPath,
|
|
26
|
-
filename: fileName,
|
|
18
|
+
filename: fileName,
|
|
27
19
|
summary,
|
|
28
20
|
type,
|
|
29
21
|
lastModified,
|
|
30
22
|
indexedAt,
|
|
31
23
|
embedding: null
|
|
32
24
|
});
|
|
33
|
-
// Insert into files_fts table for full-text search, including filename
|
|
34
25
|
db.prepare(`
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
`).run({
|
|
26
|
+
INSERT OR REPLACE INTO files_fts (rowid, filename, summary, path)
|
|
27
|
+
VALUES ((SELECT id FROM files WHERE path = :path), :filename, :summary, :path)
|
|
28
|
+
`).run({
|
|
38
29
|
path: normalizedPath,
|
|
39
30
|
filename: fileName,
|
|
40
|
-
summary
|
|
31
|
+
summary,
|
|
41
32
|
});
|
|
42
33
|
console.log(`๐ Indexed: ${normalizedPath}`);
|
|
43
34
|
}
|
|
44
|
-
/**
|
|
45
|
-
* ๐ FTS5 keyword-based search using SQLite's full-text index.
|
|
46
|
-
*
|
|
47
|
-
* Use this when you want:
|
|
48
|
-
* - A simple keyword search.
|
|
49
|
-
* - Fast fallback search that doesn't rely on embeddings.
|
|
50
|
-
*
|
|
51
|
-
* Returns a full `FileRow[]` (all known metadata, but no similarity score).
|
|
52
|
-
*/
|
|
53
35
|
export function queryFiles(safeQuery, limit = 10) {
|
|
54
36
|
console.log(`Executing search query: ${safeQuery}`);
|
|
55
37
|
const db = getDbForRepo();
|
|
56
|
-
|
|
38
|
+
return db.prepare(`
|
|
57
39
|
SELECT f.id, f.path, f.filename, f.summary, f.type, f.last_modified, f.indexed_at
|
|
58
40
|
FROM files f
|
|
59
41
|
JOIN files_fts fts ON f.id = fts.rowid
|
|
60
42
|
WHERE fts.files_fts MATCH ?
|
|
61
43
|
LIMIT ?
|
|
62
44
|
`).all(safeQuery, limit);
|
|
63
|
-
return results;
|
|
64
45
|
}
|
|
65
|
-
/**
|
|
66
|
-
* ๐ง Combined semantic + FTS search (Hybrid).
|
|
67
|
-
*
|
|
68
|
-
* 1. Convert user query to embedding vector using OpenAI / Ollama etc.
|
|
69
|
-
* 2. Perform a keyword-based FTS match to limit candidates.
|
|
70
|
-
* 3. For each candidate, compute cosine similarity between query + file embedding.
|
|
71
|
-
* 4. Blend the BM25 score and cosine similarity to produce a final score.
|
|
72
|
-
* 5. Return top K most relevant results.
|
|
73
|
-
*
|
|
74
|
-
* โ ๏ธ This returns a **lighter-weight type** than `FileRow` โ doesn't include id, timestamps, etc.
|
|
75
|
-
* Use a wrapper type like `SearchResultWithScore` in calling code if you need both.
|
|
76
|
-
*/
|
|
77
46
|
export async function searchFiles(query, topK = 5) {
|
|
78
47
|
console.log(`๐ง Searching for query: "${query}"`);
|
|
79
48
|
const embedding = await generateEmbedding(query);
|
|
@@ -93,70 +62,11 @@ export async function searchFiles(query, topK = 5) {
|
|
|
93
62
|
LIMIT ?
|
|
94
63
|
`).all(safeQuery, CANDIDATE_LIMIT);
|
|
95
64
|
console.log(`FTS search returned ${ftsResults.length} results`);
|
|
96
|
-
if (ftsResults.length === 0)
|
|
65
|
+
if (ftsResults.length === 0)
|
|
97
66
|
return [];
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
|
|
101
|
-
const scored = ftsResults.map(result => {
|
|
102
|
-
let sim = 0;
|
|
103
|
-
let finalScore = 0;
|
|
104
|
-
const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
105
|
-
if (result.embedding) {
|
|
106
|
-
try {
|
|
107
|
-
const vector = JSON.parse(result.embedding);
|
|
108
|
-
sim = cosineSimilarity(embedding, vector);
|
|
109
|
-
}
|
|
110
|
-
catch (err) {
|
|
111
|
-
console.error(`โ Failed to parse embedding for ${result.path}:`, err);
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
const terms = query.toLowerCase().split(/\s+/);
|
|
115
|
-
const path = result.path.toLowerCase();
|
|
116
|
-
const summary = (result.summary || '').toLowerCase();
|
|
117
|
-
let termMatches = 0;
|
|
118
|
-
for (const term of terms) {
|
|
119
|
-
if (path.includes(term) || summary.includes(term)) {
|
|
120
|
-
termMatches += 1;
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
const matchRatio = termMatches / terms.length;
|
|
124
|
-
const termBoost = matchRatio >= 1 ? 1.0 : matchRatio >= 0.5 ? 0.5 : 0;
|
|
125
|
-
// ๐ง Final score with hybrid weighting (BM25 + Embedding + Term Boost)
|
|
126
|
-
finalScore = 0.4 * normalizedBm25 + 0.4 * sim + 0.2 * termBoost;
|
|
127
|
-
// โ
Fuzzy score using string-similarity-js
|
|
128
|
-
const fuzzyScore = stringSimilarity(query.toLowerCase(), `${path} ${summary}`);
|
|
129
|
-
finalScore += fuzzyScore * 10;
|
|
130
|
-
return {
|
|
131
|
-
id: result.id,
|
|
132
|
-
path: result.path,
|
|
133
|
-
summary: result.summary,
|
|
134
|
-
score: finalScore,
|
|
135
|
-
sim,
|
|
136
|
-
bm25: result.bm25Score,
|
|
137
|
-
};
|
|
138
|
-
});
|
|
139
|
-
const sorted = scored
|
|
140
|
-
.sort((a, b) => b.score - a.score)
|
|
141
|
-
.slice(0, topK);
|
|
142
|
-
console.log(`Returning top ${topK} results based on combined score`);
|
|
143
|
-
return sorted;
|
|
144
|
-
}
|
|
145
|
-
/**
|
|
146
|
-
* ๐ข Cosine similarity between two vectors.
|
|
147
|
-
*/
|
|
148
|
-
function cosineSimilarity(a, b) {
|
|
149
|
-
const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
|
|
150
|
-
const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
|
|
151
|
-
const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
|
|
152
|
-
return dot / (magA * magB);
|
|
67
|
+
const scored = scoreFiles(query, embedding, ftsResults);
|
|
68
|
+
return scored.slice(0, topK);
|
|
153
69
|
}
|
|
154
|
-
/**
|
|
155
|
-
* ๐ง Get all extracted functions for a given list of file IDs.
|
|
156
|
-
*
|
|
157
|
-
* - Used in `askCmd.ts` to fetch code snippets from relevant files.
|
|
158
|
-
* - Grouped by file_id.
|
|
159
|
-
*/
|
|
160
70
|
export function getFunctionsForFiles(fileIds) {
|
|
161
71
|
if (!fileIds.length)
|
|
162
72
|
return {};
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { stringSimilarity } from 'string-similarity-js';
|
|
2
|
+
export function scoreFiles(query, embedding, candidates) {
|
|
3
|
+
const terms = query.toLowerCase().split(/\s+/);
|
|
4
|
+
const bm25Min = Math.min(...candidates.map(r => r.bm25Score));
|
|
5
|
+
const bm25Max = Math.max(...candidates.map(r => r.bm25Score));
|
|
6
|
+
return candidates.map(result => {
|
|
7
|
+
let finalScore = 0;
|
|
8
|
+
let sim = 0;
|
|
9
|
+
const path = result.path.toLowerCase();
|
|
10
|
+
const filename = result.filename.toLowerCase();
|
|
11
|
+
const summary = (result.summary || '').toLowerCase();
|
|
12
|
+
// ๐ฏ Normalize BM25
|
|
13
|
+
const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
14
|
+
// ๐ง Embedding similarity
|
|
15
|
+
if (result.embedding) {
|
|
16
|
+
try {
|
|
17
|
+
const vector = JSON.parse(result.embedding);
|
|
18
|
+
sim = cosineSimilarity(embedding, vector);
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
sim = 0;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
// ๐งฉ Match ratio
|
|
25
|
+
let matchRatio = 0;
|
|
26
|
+
let matchedTerms = 0;
|
|
27
|
+
for (const term of terms) {
|
|
28
|
+
if (path.includes(term) || summary.includes(term))
|
|
29
|
+
matchedTerms++;
|
|
30
|
+
}
|
|
31
|
+
matchRatio = matchedTerms / terms.length;
|
|
32
|
+
const termBoost = matchRatio >= 1 ? 1.0 : matchRatio >= 0.5 ? 0.5 : 0;
|
|
33
|
+
// ๐ช Path heuristics
|
|
34
|
+
const isHtml = path.endsWith('.html');
|
|
35
|
+
const isSrc = path.includes('/src/') || path.includes('/controls/');
|
|
36
|
+
const isDoc = path.includes('/docs/') || path.includes('/mvndist/');
|
|
37
|
+
const isExactMatch = filename === `${terms[0]}.js`;
|
|
38
|
+
let weight = 1;
|
|
39
|
+
if (isHtml)
|
|
40
|
+
weight *= 0.85;
|
|
41
|
+
if (isDoc)
|
|
42
|
+
weight *= 0.8;
|
|
43
|
+
if (isSrc)
|
|
44
|
+
weight *= 1.2;
|
|
45
|
+
if (isExactMatch)
|
|
46
|
+
weight *= 1.5;
|
|
47
|
+
// ๐งช Fuzzy score
|
|
48
|
+
const fuzzyScore = stringSimilarity(query, `${path} ${summary}`);
|
|
49
|
+
// ๐งฎ Final composite score
|
|
50
|
+
finalScore =
|
|
51
|
+
(0.4 * normalizedBm25) +
|
|
52
|
+
(0.4 * sim) +
|
|
53
|
+
(0.2 * termBoost) +
|
|
54
|
+
(fuzzyScore * 0.5); // scale fuzzy match moderately
|
|
55
|
+
finalScore *= weight;
|
|
56
|
+
return {
|
|
57
|
+
id: result.id,
|
|
58
|
+
path: result.path,
|
|
59
|
+
summary: result.summary,
|
|
60
|
+
score: finalScore,
|
|
61
|
+
sim,
|
|
62
|
+
bm25: result.bm25Score,
|
|
63
|
+
};
|
|
64
|
+
}).sort((a, b) => b.score - a.score);
|
|
65
|
+
}
|
|
66
|
+
function cosineSimilarity(a, b) {
|
|
67
|
+
const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
|
|
68
|
+
const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
|
|
69
|
+
const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
|
|
70
|
+
return dot / (magA * magB);
|
|
71
|
+
}
|
|
@@ -4,13 +4,26 @@ export function buildContextualPrompt({ baseInstruction, code, summary, function
|
|
|
4
4
|
parts.push(`๐ File Summary:\n${summary}`);
|
|
5
5
|
}
|
|
6
6
|
if (functions?.length) {
|
|
7
|
-
|
|
7
|
+
// Display each function's name and content
|
|
8
|
+
const formattedFunctions = functions
|
|
9
|
+
.map(fn => `โข ${fn.name}:\n${fn.content}`)
|
|
10
|
+
.join('\n\n'); // Adds a line break between each function
|
|
11
|
+
parts.push(`๐ง Functions:\n${formattedFunctions}`);
|
|
12
|
+
}
|
|
13
|
+
else {
|
|
14
|
+
console.log(`๐ง No functions found `);
|
|
8
15
|
}
|
|
9
16
|
if (relatedFiles?.length) {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
.
|
|
13
|
-
|
|
17
|
+
// Include functions from related files
|
|
18
|
+
const formattedRelatedFiles = relatedFiles
|
|
19
|
+
.map(f => {
|
|
20
|
+
const relatedFunctions = f.functions
|
|
21
|
+
.map(fn => ` โข ${fn.name}:\n ${fn.content}`)
|
|
22
|
+
.join('\n\n'); // Adds a line break between related file functions
|
|
23
|
+
return `โข ${f.path}: ${f.summary}\n${relatedFunctions}`;
|
|
24
|
+
})
|
|
25
|
+
.join('\n\n'); // Adds a line break between related files
|
|
26
|
+
parts.push(`๐ Related Files:\n${formattedRelatedFiles}`);
|
|
14
27
|
}
|
|
15
28
|
if (projectFileTree) {
|
|
16
29
|
parts.push(`๐ Project File Structure:\n\`\`\`\n${projectFileTree.trim()}\n\`\`\``);
|