scai 0.1.26 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/{QueryCmd.js → FindCmd.js} +8 -7
- package/dist/commands/SummaryCmd.js +55 -15
- package/dist/db/fileIndex.js +33 -46
- package/dist/index.js +4 -3
- package/dist/utils/normalizePath.js +10 -0
- package/dist/utils/sanitizeQuery.js +8 -5
- package/dist/utils/textWrapper.js +9 -1
- package/package.json +3 -2
- package/dist/commands/EnvCmd.js +0 -10
|
@@ -1,20 +1,21 @@
|
|
|
1
1
|
import { queryFiles } from '../db/fileIndex.js';
|
|
2
|
+
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
|
|
2
3
|
import path from 'path';
|
|
3
|
-
export async function
|
|
4
|
+
export async function runFindCommand(query) {
|
|
4
5
|
if (!query) {
|
|
5
|
-
console.error('❌ Please provide a search query.\n👉 Usage: scai
|
|
6
|
+
console.error('❌ Please provide a search query.\n👉 Usage: scai find "keyword"');
|
|
6
7
|
return;
|
|
7
8
|
}
|
|
8
|
-
console.log(
|
|
9
|
-
const
|
|
9
|
+
console.log(`\n🔍 Searching for: "${query}"\n`);
|
|
10
|
+
const sanitizedQuery = sanitizeQueryForFts(query);
|
|
11
|
+
const results = queryFiles(sanitizedQuery);
|
|
10
12
|
if (results.length === 0) {
|
|
11
13
|
console.log('⚠️ No matching files found.');
|
|
12
14
|
return;
|
|
13
15
|
}
|
|
16
|
+
console.log(`✅ Found ${results.length} result(s).`);
|
|
17
|
+
console.log();
|
|
14
18
|
results.forEach((result, index) => {
|
|
15
19
|
console.log(`📄 [${index + 1}] ${path.relative(process.cwd(), result.path)}`);
|
|
16
|
-
console.log(` 📝 ${result.summary}`);
|
|
17
|
-
console.log();
|
|
18
20
|
});
|
|
19
|
-
console.log(`✅ Found ${results.length} result(s).`);
|
|
20
21
|
}
|
|
@@ -1,23 +1,48 @@
|
|
|
1
1
|
import fs from 'fs/promises';
|
|
2
|
+
import path from 'path';
|
|
2
3
|
import readline from 'readline';
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
4
|
+
import { queryFiles, indexFile } from '../db/fileIndex.js';
|
|
5
|
+
import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
6
|
+
import { summarizeCode } from '../utils/summarizer.js';
|
|
7
|
+
import { detectFileType } from '../utils/detectFileType.js';
|
|
8
|
+
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
9
|
+
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
|
|
10
|
+
import { db } from '../db/client.js';
|
|
5
11
|
export async function summarizeFile(filepath) {
|
|
6
12
|
let content = '';
|
|
13
|
+
let filePathResolved;
|
|
14
|
+
// 📁 Resolve path like `scai find`
|
|
7
15
|
if (filepath) {
|
|
16
|
+
const sanitizedQuery = sanitizeQueryForFts(filepath);
|
|
17
|
+
const matches = queryFiles(sanitizedQuery);
|
|
18
|
+
if (matches.length > 0) {
|
|
19
|
+
const topMatch = matches[0];
|
|
20
|
+
filePathResolved = path.resolve(process.cwd(), topMatch.path);
|
|
21
|
+
console.log(`🔗 Matched file: ${path.relative(process.cwd(), filePathResolved)}`);
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
console.error(`❌ Could not resolve file from query: "${filepath}"`);
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
// 📄 Load file content from resolved path
|
|
29
|
+
if (filePathResolved) {
|
|
30
|
+
const matches = queryFiles(`"${filePathResolved}"`);
|
|
31
|
+
const match = matches.find(row => path.resolve(row.path) === filePathResolved);
|
|
32
|
+
if (match?.summary) {
|
|
33
|
+
console.log(`🧠 Cached summary for ${filepath}:\n`);
|
|
34
|
+
console.log(summarizeCode(match.summary));
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
8
37
|
try {
|
|
9
|
-
content = await fs.readFile(
|
|
38
|
+
content = await fs.readFile(filePathResolved, 'utf-8');
|
|
10
39
|
}
|
|
11
40
|
catch (err) {
|
|
12
|
-
console.error(`❌ Could not read
|
|
41
|
+
console.error(`❌ Could not read file: ${filePathResolved}\n${err.message}`);
|
|
13
42
|
return;
|
|
14
43
|
}
|
|
15
44
|
}
|
|
16
|
-
else if (process.stdin.isTTY) {
|
|
17
|
-
console.error('❌ No file provided and no piped input.\n👉 Usage: scai summ <file> or cat file | scai summ');
|
|
18
|
-
return;
|
|
19
|
-
}
|
|
20
|
-
else {
|
|
45
|
+
else if (!process.stdin.isTTY) {
|
|
21
46
|
const rl = readline.createInterface({
|
|
22
47
|
input: process.stdin,
|
|
23
48
|
output: process.stdout,
|
|
@@ -27,18 +52,33 @@ export async function summarizeFile(filepath) {
|
|
|
27
52
|
content += line + '\n';
|
|
28
53
|
}
|
|
29
54
|
}
|
|
55
|
+
else {
|
|
56
|
+
console.error('❌ No file provided and no piped input.\n👉 Usage: scai summ <file> or cat file | scai summ');
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
// 🧠 Generate summary and save
|
|
30
60
|
if (content.trim()) {
|
|
31
|
-
|
|
61
|
+
console.log('🧪 Generating summary...\n');
|
|
32
62
|
const response = await summaryModule.run({ content, filepath });
|
|
33
|
-
// Pass the summary text to the utility function for formatting
|
|
34
63
|
if (!response.summary) {
|
|
35
|
-
console.warn(
|
|
64
|
+
console.warn('⚠️ No summary generated.');
|
|
36
65
|
return;
|
|
37
66
|
}
|
|
38
|
-
|
|
39
|
-
|
|
67
|
+
console.log(summarizeCode(response.summary));
|
|
68
|
+
if (filePathResolved) {
|
|
69
|
+
const fileType = detectFileType(filePathResolved);
|
|
70
|
+
indexFile(filePathResolved, response.summary, fileType);
|
|
71
|
+
console.log('💾 Summary saved to local database.');
|
|
72
|
+
const embedding = await generateEmbedding(response.summary);
|
|
73
|
+
if (embedding) {
|
|
74
|
+
db.prepare(`
|
|
75
|
+
UPDATE files SET embedding = ? WHERE path = ?
|
|
76
|
+
`).run(JSON.stringify(embedding), filePathResolved.replace(/\\/g, '/'));
|
|
77
|
+
console.log('📐 Embedding saved to database.');
|
|
78
|
+
}
|
|
79
|
+
}
|
|
40
80
|
}
|
|
41
81
|
else {
|
|
42
|
-
console.error('❌ No
|
|
82
|
+
console.error('❌ No content provided to summarize.');
|
|
43
83
|
}
|
|
44
84
|
}
|
package/dist/db/fileIndex.js
CHANGED
|
@@ -29,15 +29,7 @@ export function indexFile(filePath, summary, type) {
|
|
|
29
29
|
/**
|
|
30
30
|
* Perform a raw keyword-based full-text search using the FTS5 index.
|
|
31
31
|
*/
|
|
32
|
-
export function queryFiles(
|
|
33
|
-
const safeQuery = query
|
|
34
|
-
.trim()
|
|
35
|
-
.split(/\s+/)
|
|
36
|
-
.map(token => {
|
|
37
|
-
token = token.replace(/[?*\\"]/g, '').replace(/'/g, "''");
|
|
38
|
-
return token.includes(' ') ? `"${token}"` : `${token}*`;
|
|
39
|
-
})
|
|
40
|
-
.join(' OR ');
|
|
32
|
+
export function queryFiles(safeQuery, limit = 10) {
|
|
41
33
|
console.log(`Executing search query: ${safeQuery}`);
|
|
42
34
|
const results = db.prepare(`
|
|
43
35
|
SELECT f.id, f.path, f.summary, f.type, f.last_modified, f.indexed_at
|
|
@@ -46,10 +38,6 @@ export function queryFiles(query, limit = 10) {
|
|
|
46
38
|
WHERE fts.files_fts MATCH ?
|
|
47
39
|
LIMIT ?
|
|
48
40
|
`).all(safeQuery, limit);
|
|
49
|
-
console.log(`Search returned ${results.length} results.`);
|
|
50
|
-
results.forEach(result => {
|
|
51
|
-
console.log(`📄 Found in FTS search: ${result.path}`);
|
|
52
|
-
});
|
|
53
41
|
return results;
|
|
54
42
|
}
|
|
55
43
|
/**
|
|
@@ -65,17 +53,15 @@ export async function searchFiles(query, topK = 5) {
|
|
|
65
53
|
const safeQuery = sanitizeQueryForFts(query);
|
|
66
54
|
console.log(`Executing search query in FTS5: ${safeQuery}`);
|
|
67
55
|
const ftsResults = db.prepare(`
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
`).all(safeQuery, topK);
|
|
56
|
+
SELECT fts.rowid AS id, f.path, f.summary, f.type, bm25(files_fts) AS bm25Score, f.embedding
|
|
57
|
+
FROM files f
|
|
58
|
+
JOIN files_fts fts ON f.id = fts.rowid
|
|
59
|
+
WHERE fts.files_fts MATCH ?
|
|
60
|
+
ORDER BY bm25Score ASC
|
|
61
|
+
LIMIT ?
|
|
62
|
+
`).all(safeQuery, 20);
|
|
76
63
|
console.log(`FTS search returned ${ftsResults.length} results`);
|
|
77
64
|
if (ftsResults.length === 0) {
|
|
78
|
-
console.log('⚠️ No results found from FTS search');
|
|
79
65
|
return [];
|
|
80
66
|
}
|
|
81
67
|
ftsResults.forEach(result => {
|
|
@@ -84,36 +70,37 @@ export async function searchFiles(query, topK = 5) {
|
|
|
84
70
|
const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
|
|
85
71
|
const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
|
|
86
72
|
const scored = ftsResults.map(result => {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
73
|
+
let finalScore = 0;
|
|
74
|
+
let sim = 0;
|
|
75
|
+
if (result.embedding) {
|
|
76
|
+
try {
|
|
77
|
+
const vector = JSON.parse(result.embedding);
|
|
78
|
+
sim = cosineSimilarity(embedding, vector);
|
|
79
|
+
const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
80
|
+
finalScore = 0.7 * sim + 0.3 * normalizedBm25;
|
|
81
|
+
}
|
|
82
|
+
catch (err) {
|
|
83
|
+
console.error(`❌ Failed to parse embedding for ${result.path}:`, err);
|
|
84
|
+
finalScore = 0; // fallback
|
|
94
85
|
}
|
|
95
|
-
const vector = JSON.parse(embResult.embedding);
|
|
96
|
-
const sim = cosineSimilarity(embedding, vector);
|
|
97
|
-
const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
98
|
-
const finalScore = 0.7 * sim + 0.3 * normalizedBm25;
|
|
99
|
-
return {
|
|
100
|
-
path: result.path,
|
|
101
|
-
summary: result.summary,
|
|
102
|
-
score: finalScore,
|
|
103
|
-
sim,
|
|
104
|
-
bm25: normalizedBm25,
|
|
105
|
-
};
|
|
106
86
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
87
|
+
else {
|
|
88
|
+
// No embedding: fallback to inverse bm25-only
|
|
89
|
+
finalScore = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
110
90
|
}
|
|
111
|
-
|
|
112
|
-
|
|
91
|
+
return {
|
|
92
|
+
path: result.path,
|
|
93
|
+
summary: result.summary,
|
|
94
|
+
score: finalScore,
|
|
95
|
+
sim,
|
|
96
|
+
bm25: result.bm25Score,
|
|
97
|
+
};
|
|
98
|
+
});
|
|
99
|
+
const sorted = scored
|
|
113
100
|
.sort((a, b) => b.score - a.score)
|
|
114
101
|
.slice(0, topK);
|
|
115
102
|
console.log(`Returning top ${topK} results based on combined score`);
|
|
116
|
-
return
|
|
103
|
+
return sorted;
|
|
117
104
|
}
|
|
118
105
|
function cosineSimilarity(a, b) {
|
|
119
106
|
const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
|
package/dist/index.js
CHANGED
|
@@ -5,6 +5,7 @@ import { Config } from './config.js';
|
|
|
5
5
|
import { createRequire } from 'module';
|
|
6
6
|
const require = createRequire(import.meta.url);
|
|
7
7
|
const { version } = require('../package.json');
|
|
8
|
+
// 🧠 Commands
|
|
8
9
|
import { suggestCommitMessage } from "./commands/CommitSuggesterCmd.js";
|
|
9
10
|
import { handleRefactor } from "./commands/RefactorCmd.js";
|
|
10
11
|
import { generateTests } from "./commands/TestGenCmd.js";
|
|
@@ -14,7 +15,7 @@ import { handleChangelogUpdate } from './commands/ChangeLogUpdateCmd.js';
|
|
|
14
15
|
import { runModulePipelineFromCLI } from './commands/ModulePipelineCmd.js';
|
|
15
16
|
import { runIndexCommand } from './commands/IndexCmd.js';
|
|
16
17
|
import { resetDatabase } from './commands/ResetDbCmd.js';
|
|
17
|
-
import {
|
|
18
|
+
import { runFindCommand } from './commands/FindCmd.js';
|
|
18
19
|
import { startDaemon } from './commands/DaemonCmd.js';
|
|
19
20
|
import { runStopDaemonCommand } from "./commands/StopDaemonCmd.js";
|
|
20
21
|
import { runAskCommand } from './commands/AskCmd.js';
|
|
@@ -113,9 +114,9 @@ cmd
|
|
|
113
114
|
});
|
|
114
115
|
// 🧠 Query and assistant
|
|
115
116
|
cmd
|
|
116
|
-
.command('
|
|
117
|
+
.command('find <query>')
|
|
117
118
|
.description('Search indexed files by keyword')
|
|
118
|
-
.action(
|
|
119
|
+
.action(runFindCommand);
|
|
119
120
|
cmd
|
|
120
121
|
.command('ask [question...]') // <- the ... makes it variadic
|
|
121
122
|
.description('Ask a question based on indexed files')
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
// src/utils/normalizePath.ts
|
|
2
|
+
/**
|
|
3
|
+
* Normalizes a path string for loose, fuzzy matching:
|
|
4
|
+
* - Lowercases
|
|
5
|
+
* - Removes slashes and backslashes
|
|
6
|
+
* - Removes whitespace
|
|
7
|
+
*/
|
|
8
|
+
export function normalizePathForLooseMatch(p) {
|
|
9
|
+
return p.toLowerCase().replace(/[\\/]/g, '').replace(/\s+/g, '');
|
|
10
|
+
}
|
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
// src/utils/sanitizeQuery.ts
|
|
2
2
|
import { STOP_WORDS } from '../config/StopWords.js';
|
|
3
3
|
export function sanitizeQueryForFts(input) {
|
|
4
|
+
input = input.trim().toLowerCase();
|
|
5
|
+
// If it's a single filename-like string (includes dots or slashes), quote it
|
|
6
|
+
if (/^[\w\-./]+$/.test(input) && !/\s/.test(input)) {
|
|
7
|
+
// Escape quotes and wrap with double-quotes for FTS safety
|
|
8
|
+
return `"${input.replace(/"/g, '""')}"*`;
|
|
9
|
+
}
|
|
10
|
+
// Otherwise, treat it as a natural language prompt
|
|
4
11
|
const tokens = input
|
|
5
|
-
.trim()
|
|
6
12
|
.split(/\s+/)
|
|
7
13
|
.map(token => token.toLowerCase())
|
|
8
14
|
.filter(token => token.length > 2 &&
|
|
9
15
|
!STOP_WORDS.has(token) &&
|
|
10
16
|
/^[a-z0-9]+$/.test(token))
|
|
11
17
|
.map(token => token.replace(/[?*\\"]/g, '').replace(/'/g, "''") + '*');
|
|
12
|
-
|
|
13
|
-
if (tokens.length === 0)
|
|
14
|
-
return '*';
|
|
15
|
-
return tokens.join(' OR ');
|
|
18
|
+
return tokens.length > 0 ? tokens.join(' OR ') : '*';
|
|
16
19
|
}
|
|
@@ -3,6 +3,14 @@ export function wrapText(text, maxWidth) {
|
|
|
3
3
|
let wrappedText = '';
|
|
4
4
|
let currentLine = '';
|
|
5
5
|
words.forEach(word => {
|
|
6
|
+
// If the word is longer than the maxWidth, break it up into multiple lines
|
|
7
|
+
if (word.length > maxWidth) {
|
|
8
|
+
// Break the word into smaller chunks
|
|
9
|
+
while (word.length > maxWidth) {
|
|
10
|
+
wrappedText += word.slice(0, maxWidth) + '\n';
|
|
11
|
+
word = word.slice(maxWidth);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
6
14
|
// Check if adding the word would exceed the max width
|
|
7
15
|
if ((currentLine + word).length > maxWidth) {
|
|
8
16
|
wrappedText += currentLine + '\n'; // Add the current line and start a new one
|
|
@@ -13,6 +21,6 @@ export function wrapText(text, maxWidth) {
|
|
|
13
21
|
}
|
|
14
22
|
});
|
|
15
23
|
// Append the last line if any
|
|
16
|
-
wrappedText += currentLine;
|
|
24
|
+
wrappedText += currentLine.trim(); // trim() to remove the extra space at the end
|
|
17
25
|
return wrappedText;
|
|
18
26
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "scai",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.28",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"scai": "./dist/index.js"
|
|
@@ -17,7 +17,8 @@
|
|
|
17
17
|
"refactor",
|
|
18
18
|
"devtools",
|
|
19
19
|
"local",
|
|
20
|
-
"typescript"
|
|
20
|
+
"typescript",
|
|
21
|
+
"llm"
|
|
21
22
|
],
|
|
22
23
|
"scripts": {
|
|
23
24
|
"build": "tsc",
|
package/dist/commands/EnvCmd.js
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
export function checkEnv() {
|
|
2
|
-
const requiredVars = ["DB_HOST", "API_KEY"];
|
|
3
|
-
const missing = requiredVars.filter((v) => !process.env[v]);
|
|
4
|
-
if (missing.length) {
|
|
5
|
-
console.warn("❌ Missing env vars:", missing.join(", "));
|
|
6
|
-
}
|
|
7
|
-
else {
|
|
8
|
-
console.log("✅ All env vars are set");
|
|
9
|
-
}
|
|
10
|
-
}
|