scai 0.1.27 โ 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/AskCmd.js +34 -10
- package/dist/commands/SummaryCmd.js +39 -30
- package/dist/db/fileIndex.js +32 -33
- package/dist/utils/normalizePath.js +10 -0
- package/package.json +3 -2
package/dist/commands/AskCmd.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import readline from 'readline';
|
|
2
|
-
import
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { searchFiles, queryFiles } from "../db/fileIndex.js";
|
|
4
|
+
import { sanitizeQueryForFts } from "../utils/sanitizeQuery.js";
|
|
3
5
|
import { generate } from "../lib/generate.js";
|
|
4
6
|
export async function runAskCommand(query) {
|
|
5
7
|
if (!query) {
|
|
@@ -12,21 +14,43 @@ export async function runAskCommand(query) {
|
|
|
12
14
|
}
|
|
13
15
|
console.log(`๐ Searching for: "${query}"\n`);
|
|
14
16
|
const start = Date.now();
|
|
15
|
-
const
|
|
17
|
+
const semanticResults = await searchFiles(query, 5);
|
|
16
18
|
const duration = Date.now() - start;
|
|
17
|
-
console.log(`โฑ๏ธ searchFiles took ${duration}ms and returned ${
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
console.log(`โฑ๏ธ searchFiles took ${duration}ms and returned ${semanticResults.length} result(s)`);
|
|
20
|
+
// Also run fallback keyword search
|
|
21
|
+
const safeQuery = sanitizeQueryForFts(query);
|
|
22
|
+
const fallbackResults = queryFiles(safeQuery, 10);
|
|
23
|
+
// Merge semantic and fallback results
|
|
24
|
+
const seen = new Set();
|
|
25
|
+
const combinedResults = [];
|
|
26
|
+
for (const file of semanticResults) {
|
|
27
|
+
const resolved = path.resolve(file.path);
|
|
28
|
+
seen.add(resolved);
|
|
29
|
+
combinedResults.push(file); // Already scored
|
|
30
|
+
}
|
|
31
|
+
for (const file of fallbackResults) {
|
|
32
|
+
const resolved = path.resolve(file.path);
|
|
33
|
+
if (!seen.has(resolved)) {
|
|
34
|
+
seen.add(resolved);
|
|
35
|
+
combinedResults.push({
|
|
36
|
+
path: file.path,
|
|
37
|
+
summary: file.summary,
|
|
38
|
+
score: 0.0, // fallback score
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
if (combinedResults.length > 0) {
|
|
43
|
+
console.log('๐ Closest files based on semantic + keyword match:');
|
|
44
|
+
combinedResults.forEach((file, i) => {
|
|
45
|
+
console.log(` ${i + 1}. ๐ Path: ${file.path} | Score: ${file.score?.toFixed(3) ?? 'fallback'}`);
|
|
22
46
|
});
|
|
23
47
|
}
|
|
24
48
|
else {
|
|
25
|
-
console.log('โ ๏ธ No similar
|
|
49
|
+
console.log('โ ๏ธ No similar files found. Asking the model for context only...');
|
|
26
50
|
}
|
|
27
|
-
//
|
|
51
|
+
// Aggregate summaries
|
|
28
52
|
let allSummaries = '';
|
|
29
|
-
for (const file of
|
|
53
|
+
for (const file of combinedResults) {
|
|
30
54
|
if (!file?.summary) {
|
|
31
55
|
console.warn(`โ ๏ธ No summary available for file: ${file?.path}`);
|
|
32
56
|
continue;
|
|
@@ -1,54 +1,53 @@
|
|
|
1
1
|
import fs from 'fs/promises';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import readline from 'readline';
|
|
4
|
-
import { queryFiles } from '../db/fileIndex.js';
|
|
4
|
+
import { queryFiles, indexFile } from '../db/fileIndex.js';
|
|
5
5
|
import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
6
6
|
import { summarizeCode } from '../utils/summarizer.js';
|
|
7
|
+
import { detectFileType } from '../utils/detectFileType.js';
|
|
8
|
+
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
9
|
+
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
|
|
10
|
+
import { db } from '../db/client.js';
|
|
7
11
|
export async function summarizeFile(filepath) {
|
|
8
12
|
let content = '';
|
|
9
|
-
let
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
const matches = queryFiles(`"${filepath}"`);
|
|
13
|
+
let filePathResolved;
|
|
14
|
+
// ๐ Resolve path like `scai find`
|
|
15
|
+
if (filepath) {
|
|
16
|
+
const sanitizedQuery = sanitizeQueryForFts(filepath);
|
|
17
|
+
const matches = queryFiles(sanitizedQuery);
|
|
15
18
|
if (matches.length > 0) {
|
|
16
|
-
const
|
|
17
|
-
filePathResolved = path.resolve(process.cwd(),
|
|
19
|
+
const topMatch = matches[0];
|
|
20
|
+
filePathResolved = path.resolve(process.cwd(), topMatch.path);
|
|
21
|
+
console.log(`๐ Matched file: ${path.relative(process.cwd(), filePathResolved)}`);
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
console.error(`โ Could not resolve file from query: "${filepath}"`);
|
|
25
|
+
return;
|
|
18
26
|
}
|
|
19
27
|
}
|
|
20
|
-
|
|
21
|
-
// Handle case where full filename with extension is provided
|
|
22
|
-
filePathResolved = path.resolve(process.cwd(), filepath);
|
|
23
|
-
}
|
|
24
|
-
// Now, let's search the database for a summary
|
|
28
|
+
// ๐ Load file content from resolved path
|
|
25
29
|
if (filePathResolved) {
|
|
30
|
+
const matches = queryFiles(`"${filePathResolved}"`);
|
|
31
|
+
const match = matches.find(row => path.resolve(row.path) === filePathResolved);
|
|
32
|
+
if (match?.summary) {
|
|
33
|
+
console.log(`๐ง Cached summary for ${filepath}:\n`);
|
|
34
|
+
console.log(summarizeCode(match.summary));
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
26
37
|
try {
|
|
27
|
-
// Try to find an existing summary from the database using the resolved path
|
|
28
|
-
const matches = queryFiles(`"${filePathResolved}"`);
|
|
29
|
-
const match = matches.find(row => path.resolve(row.path) === filePathResolved);
|
|
30
|
-
if (match?.summary) {
|
|
31
|
-
// If a summary exists in the database, use it
|
|
32
|
-
console.log(`๐ง Cached summary for ${filepath}:\n`);
|
|
33
|
-
console.log(summarizeCode(match.summary));
|
|
34
|
-
return;
|
|
35
|
-
}
|
|
36
|
-
// If no cached summary, read the file content
|
|
37
38
|
content = await fs.readFile(filePathResolved, 'utf-8');
|
|
38
39
|
}
|
|
39
40
|
catch (err) {
|
|
40
|
-
console.error(`โ Could not
|
|
41
|
+
console.error(`โ Could not read file: ${filePathResolved}\n${err.message}`);
|
|
41
42
|
return;
|
|
42
43
|
}
|
|
43
44
|
}
|
|
44
45
|
else if (!process.stdin.isTTY) {
|
|
45
|
-
// If no file path and input comes from stdin (piped content)
|
|
46
46
|
const rl = readline.createInterface({
|
|
47
47
|
input: process.stdin,
|
|
48
48
|
output: process.stdout,
|
|
49
49
|
terminal: false,
|
|
50
50
|
});
|
|
51
|
-
// Collect all piped input into the `content` string
|
|
52
51
|
for await (const line of rl) {
|
|
53
52
|
content += line + '\n';
|
|
54
53
|
}
|
|
@@ -57,17 +56,27 @@ export async function summarizeFile(filepath) {
|
|
|
57
56
|
console.error('โ No file provided and no piped input.\n๐ Usage: scai summ <file> or cat file | scai summ');
|
|
58
57
|
return;
|
|
59
58
|
}
|
|
60
|
-
//
|
|
59
|
+
// ๐ง Generate summary and save
|
|
61
60
|
if (content.trim()) {
|
|
62
61
|
console.log('๐งช Generating summary...\n');
|
|
63
|
-
// Generate a summary using your summarization pipeline
|
|
64
62
|
const response = await summaryModule.run({ content, filepath });
|
|
65
63
|
if (!response.summary) {
|
|
66
64
|
console.warn('โ ๏ธ No summary generated.');
|
|
67
65
|
return;
|
|
68
66
|
}
|
|
69
|
-
// Print the formatted summary
|
|
70
67
|
console.log(summarizeCode(response.summary));
|
|
68
|
+
if (filePathResolved) {
|
|
69
|
+
const fileType = detectFileType(filePathResolved);
|
|
70
|
+
indexFile(filePathResolved, response.summary, fileType);
|
|
71
|
+
console.log('๐พ Summary saved to local database.');
|
|
72
|
+
const embedding = await generateEmbedding(response.summary);
|
|
73
|
+
if (embedding) {
|
|
74
|
+
db.prepare(`
|
|
75
|
+
UPDATE files SET embedding = ? WHERE path = ?
|
|
76
|
+
`).run(JSON.stringify(embedding), filePathResolved.replace(/\\/g, '/'));
|
|
77
|
+
console.log('๐ Embedding saved to database.');
|
|
78
|
+
}
|
|
79
|
+
}
|
|
71
80
|
}
|
|
72
81
|
else {
|
|
73
82
|
console.error('โ No content provided to summarize.');
|
package/dist/db/fileIndex.js
CHANGED
|
@@ -53,17 +53,15 @@ export async function searchFiles(query, topK = 5) {
|
|
|
53
53
|
const safeQuery = sanitizeQueryForFts(query);
|
|
54
54
|
console.log(`Executing search query in FTS5: ${safeQuery}`);
|
|
55
55
|
const ftsResults = db.prepare(`
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
`).all(safeQuery, topK);
|
|
56
|
+
SELECT fts.rowid AS id, f.path, f.summary, f.type, bm25(files_fts) AS bm25Score, f.embedding
|
|
57
|
+
FROM files f
|
|
58
|
+
JOIN files_fts fts ON f.id = fts.rowid
|
|
59
|
+
WHERE fts.files_fts MATCH ?
|
|
60
|
+
ORDER BY bm25Score ASC
|
|
61
|
+
LIMIT ?
|
|
62
|
+
`).all(safeQuery, 20);
|
|
64
63
|
console.log(`FTS search returned ${ftsResults.length} results`);
|
|
65
64
|
if (ftsResults.length === 0) {
|
|
66
|
-
console.log('โ ๏ธ No results found from FTS search');
|
|
67
65
|
return [];
|
|
68
66
|
}
|
|
69
67
|
ftsResults.forEach(result => {
|
|
@@ -72,36 +70,37 @@ export async function searchFiles(query, topK = 5) {
|
|
|
72
70
|
const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
|
|
73
71
|
const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
|
|
74
72
|
const scored = ftsResults.map(result => {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
73
|
+
let finalScore = 0;
|
|
74
|
+
let sim = 0;
|
|
75
|
+
if (result.embedding) {
|
|
76
|
+
try {
|
|
77
|
+
const vector = JSON.parse(result.embedding);
|
|
78
|
+
sim = cosineSimilarity(embedding, vector);
|
|
79
|
+
const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
80
|
+
finalScore = 0.7 * sim + 0.3 * normalizedBm25;
|
|
81
|
+
}
|
|
82
|
+
catch (err) {
|
|
83
|
+
console.error(`โ Failed to parse embedding for ${result.path}:`, err);
|
|
84
|
+
finalScore = 0; // fallback
|
|
82
85
|
}
|
|
83
|
-
const vector = JSON.parse(embResult.embedding);
|
|
84
|
-
const sim = cosineSimilarity(embedding, vector);
|
|
85
|
-
const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
86
|
-
const finalScore = 0.7 * sim + 0.3 * normalizedBm25;
|
|
87
|
-
return {
|
|
88
|
-
path: result.path,
|
|
89
|
-
summary: result.summary,
|
|
90
|
-
score: finalScore,
|
|
91
|
-
sim,
|
|
92
|
-
bm25: normalizedBm25,
|
|
93
|
-
};
|
|
94
86
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
87
|
+
else {
|
|
88
|
+
// No embedding: fallback to inverse bm25-only
|
|
89
|
+
finalScore = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
98
90
|
}
|
|
99
|
-
|
|
100
|
-
|
|
91
|
+
return {
|
|
92
|
+
path: result.path,
|
|
93
|
+
summary: result.summary,
|
|
94
|
+
score: finalScore,
|
|
95
|
+
sim,
|
|
96
|
+
bm25: result.bm25Score,
|
|
97
|
+
};
|
|
98
|
+
});
|
|
99
|
+
const sorted = scored
|
|
101
100
|
.sort((a, b) => b.score - a.score)
|
|
102
101
|
.slice(0, topK);
|
|
103
102
|
console.log(`Returning top ${topK} results based on combined score`);
|
|
104
|
-
return
|
|
103
|
+
return sorted;
|
|
105
104
|
}
|
|
106
105
|
function cosineSimilarity(a, b) {
|
|
107
106
|
const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
// src/utils/normalizePath.ts
|
|
2
|
+
/**
|
|
3
|
+
* Normalizes a path string for loose, fuzzy matching:
|
|
4
|
+
* - Lowercases
|
|
5
|
+
* - Removes slashes and backslashes
|
|
6
|
+
* - Removes whitespace
|
|
7
|
+
*/
|
|
8
|
+
export function normalizePathForLooseMatch(p) {
|
|
9
|
+
return p.toLowerCase().replace(/[\\/]/g, '').replace(/\s+/g, '');
|
|
10
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "scai",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.29",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"scai": "./dist/index.js"
|
|
@@ -17,7 +17,8 @@
|
|
|
17
17
|
"refactor",
|
|
18
18
|
"devtools",
|
|
19
19
|
"local",
|
|
20
|
-
"typescript"
|
|
20
|
+
"typescript",
|
|
21
|
+
"llm"
|
|
21
22
|
],
|
|
22
23
|
"scripts": {
|
|
23
24
|
"build": "tsc",
|