scai 0.1.23 โ 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/AskCmd.js +46 -49
- package/dist/commands/DaemonCmd.js +25 -79
- package/dist/commands/IndexCmd.js +24 -17
- package/dist/commands/ResetDbCmd.js +50 -4
- package/dist/commands/StopDaemonCmd.js +14 -3
- package/dist/config/IgnoredExtensions.js +7 -1
- package/dist/config/StopWords.js +28 -0
- package/dist/constants.js +5 -0
- package/dist/daemon/daemonBatch.js +65 -0
- package/dist/daemon/daemonWorker.js +27 -0
- package/dist/db/client.js +4 -0
- package/dist/db/fileIndex.js +53 -45
- package/dist/db/schema.js +29 -13
- package/dist/db/sqlTemplates.js +13 -4
- package/dist/index.js +29 -28
- package/dist/utils/log.js +15 -0
- package/dist/utils/removeIgnoredFiles.js +43 -0
- package/dist/utils/sanitizeQuery.js +16 -0
- package/dist/utils/shouldIgnoreFiles.js +8 -0
- package/dist/utils/specificFileExceptions.js +90 -0
- package/package.json +4 -2
package/dist/commands/AskCmd.js
CHANGED
|
@@ -1,66 +1,63 @@
|
|
|
1
|
+
import readline from 'readline';
|
|
1
2
|
import { searchFiles } from "../db/fileIndex.js";
|
|
2
3
|
import { generate } from "../lib/generate.js";
|
|
3
|
-
import { summaryModule } from "../pipeline/modules/summaryModule.js";
|
|
4
4
|
export async function runAskCommand(query) {
|
|
5
5
|
if (!query) {
|
|
6
|
-
|
|
6
|
+
query = await promptOnce('๐ง Ask your question:\n> ');
|
|
7
|
+
}
|
|
8
|
+
query = query.trim();
|
|
9
|
+
if (!query) {
|
|
10
|
+
console.error('โ No question provided.\n๐ Usage: scai ask "your question"');
|
|
7
11
|
return;
|
|
8
12
|
}
|
|
9
13
|
console.log(`๐ Searching for: "${query}"\n`);
|
|
10
|
-
|
|
11
|
-
const results = await searchFiles(query, 5);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
else {
|
|
14
|
+
const start = Date.now();
|
|
15
|
+
const results = await searchFiles(query, 5);
|
|
16
|
+
const duration = Date.now() - start;
|
|
17
|
+
console.log(`โฑ๏ธ searchFiles took ${duration}ms and returned ${results.length} result(s)`);
|
|
18
|
+
if (results.length > 0) {
|
|
16
19
|
console.log('๐ Closest files based on semantic similarity:');
|
|
17
|
-
results.forEach(file => {
|
|
18
|
-
console.log(
|
|
20
|
+
results.forEach((file, i) => {
|
|
21
|
+
console.log(` ${i + 1}. ๐ Path: ${file?.path} | Score: ${file?.score?.toFixed(3)}`);
|
|
19
22
|
});
|
|
20
23
|
}
|
|
24
|
+
else {
|
|
25
|
+
console.log('โ ๏ธ No similar embeddings found. Asking the model for context instead...');
|
|
26
|
+
}
|
|
27
|
+
// ๐ง Use stored summaries directly
|
|
21
28
|
let allSummaries = '';
|
|
22
29
|
for (const file of results) {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
continue;
|
|
27
|
-
}
|
|
28
|
-
console.log(`๐ Using cached summary for file: ${file?.path}`);
|
|
29
|
-
const summaryResponse = await summaryModule.run({ content: file?.summary ? file.summary : '', filepath: file?.path });
|
|
30
|
-
if (summaryResponse.summary) {
|
|
31
|
-
allSummaries += `\n${summaryResponse.summary}`;
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
catch (err) {
|
|
35
|
-
console.error(`โ Error processing file: ${file?.path}`, err instanceof Error ? err.message : err);
|
|
30
|
+
if (!file?.summary) {
|
|
31
|
+
console.warn(`โ ๏ธ No summary available for file: ${file?.path}`);
|
|
32
|
+
continue;
|
|
36
33
|
}
|
|
34
|
+
console.log(`๐ Using stored summary for: ${file.path}`);
|
|
35
|
+
allSummaries += `\n${file.summary}`;
|
|
37
36
|
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
48
|
-
catch (err) {
|
|
49
|
-
console.error('โ Model request failed:', err);
|
|
50
|
-
}
|
|
37
|
+
const input = {
|
|
38
|
+
content: allSummaries ? `${query}\n\n${allSummaries}` : query,
|
|
39
|
+
filepath: '',
|
|
40
|
+
};
|
|
41
|
+
try {
|
|
42
|
+
console.log(allSummaries.trim()
|
|
43
|
+
? '๐ง Summaries found, sending them to the model for synthesis...'
|
|
44
|
+
: 'โ ๏ธ No summaries found. Asking the model for context only...');
|
|
45
|
+
const modelResponse = await generate(input, 'llama3');
|
|
46
|
+
console.log(`\n๐ Model response:\n${modelResponse.content}`);
|
|
51
47
|
}
|
|
52
|
-
|
|
53
|
-
console.
|
|
54
|
-
try {
|
|
55
|
-
const input = {
|
|
56
|
-
content: query,
|
|
57
|
-
filepath: '',
|
|
58
|
-
};
|
|
59
|
-
const modelResponse = await generate(input, 'llama3');
|
|
60
|
-
console.log(`\n๐ Model response:\n${modelResponse.content}`);
|
|
61
|
-
}
|
|
62
|
-
catch (err) {
|
|
63
|
-
console.error('โ Model request failed:', err);
|
|
64
|
-
}
|
|
48
|
+
catch (err) {
|
|
49
|
+
console.error('โ Model request failed:', err);
|
|
65
50
|
}
|
|
66
51
|
}
|
|
52
|
+
function promptOnce(promptText) {
|
|
53
|
+
return new Promise(resolve => {
|
|
54
|
+
const rl = readline.createInterface({
|
|
55
|
+
input: process.stdin,
|
|
56
|
+
output: process.stdout
|
|
57
|
+
});
|
|
58
|
+
rl.question(promptText, answer => {
|
|
59
|
+
rl.close();
|
|
60
|
+
resolve(answer.trim());
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
}
|
|
@@ -1,86 +1,32 @@
|
|
|
1
|
-
import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
2
|
-
import { db } from '../db/client.js';
|
|
3
|
-
import fs from 'fs/promises';
|
|
4
1
|
import fsSync from 'fs';
|
|
2
|
+
import { LOG_PATH, PID_PATH } from '../constants.js';
|
|
3
|
+
import { log } from '../utils/log.js';
|
|
4
|
+
import { spawn } from 'child_process';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
5
6
|
import path from 'path';
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
const
|
|
10
|
-
const SLEEP_MS = 30 * 1000; // ๐ค Pause between batches
|
|
11
|
-
const IDLE_SLEEP_MS = 4 * SLEEP_MS; // ๐ค Longer pause if idle
|
|
12
|
-
const MAX_FILES_PER_BATCH = 5; // ๐๏ธ Throttle indexing per cycle
|
|
13
|
-
// ๐ค Utility
|
|
14
|
-
function sleep(ms) {
|
|
15
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
16
|
-
}
|
|
17
|
-
// ๐ชต Append to log file
|
|
18
|
-
function log(message) {
|
|
19
|
-
const timestamp = new Date().toISOString();
|
|
20
|
-
fsSync.appendFileSync(LOG_PATH, `[${timestamp}] ${message}\n`);
|
|
21
|
-
}
|
|
22
|
-
// โ Skip unwanted file types
|
|
23
|
-
function shouldIgnoreFile(filePath) {
|
|
24
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
25
|
-
return IGNORED_EXTENSIONS.includes(ext);
|
|
26
|
-
}
|
|
27
|
-
// ๐ง One summarization batch
|
|
28
|
-
export async function runDaemonBatch() {
|
|
29
|
-
const rows = db.prepare(`
|
|
30
|
-
SELECT path, type FROM files
|
|
31
|
-
WHERE summary IS NULL OR summary = ''
|
|
32
|
-
ORDER BY last_modified DESC
|
|
33
|
-
LIMIT ?
|
|
34
|
-
`).all(MAX_FILES_PER_BATCH);
|
|
35
|
-
if (rows.length === 0) {
|
|
36
|
-
log('โ
No files left to summarize.');
|
|
37
|
-
return false; // ๐ค Idle
|
|
38
|
-
}
|
|
39
|
-
for (const row of rows) {
|
|
40
|
-
if (!fsSync.existsSync(row.path)) {
|
|
41
|
-
log(`โ ๏ธ Skipped missing file: ${row.path}`);
|
|
42
|
-
continue;
|
|
43
|
-
}
|
|
44
|
-
if (shouldIgnoreFile(row.path)) {
|
|
45
|
-
log(`โ ๏ธ Skipped (extension): ${row.path}`);
|
|
46
|
-
continue;
|
|
47
|
-
}
|
|
7
|
+
export async function startDaemon() {
|
|
8
|
+
// If there's a PID file, check if the process is still running
|
|
9
|
+
if (fsSync.existsSync(PID_PATH)) {
|
|
10
|
+
const pid = parseInt(fsSync.readFileSync(PID_PATH, 'utf8'));
|
|
48
11
|
try {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
let embedding = null;
|
|
53
|
-
if (summary) {
|
|
54
|
-
const vector = await generateEmbedding(summary);
|
|
55
|
-
if (vector)
|
|
56
|
-
embedding = JSON.stringify(vector);
|
|
57
|
-
}
|
|
58
|
-
db.prepare(`
|
|
59
|
-
UPDATE files
|
|
60
|
-
SET summary = @summary, embedding = @embedding, indexed_at = datetime('now')
|
|
61
|
-
WHERE path = @path
|
|
62
|
-
`).run({ summary, embedding, path: row.path });
|
|
63
|
-
log(`๐ Summarized: ${row.path}`);
|
|
64
|
-
}
|
|
65
|
-
catch (err) {
|
|
66
|
-
log(`โ Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}`);
|
|
12
|
+
process.kill(pid, 0); // Check if process exists
|
|
13
|
+
log(`โ ๏ธ Daemon already running with PID ${pid}.`);
|
|
14
|
+
return;
|
|
67
15
|
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
}
|
|
72
|
-
// ๐ Daemon loop: runs until killed
|
|
73
|
-
export async function runDaemonScheduler() {
|
|
74
|
-
fsSync.mkdirSync(SCAI_HOME, { recursive: true });
|
|
75
|
-
fsSync.writeFileSync(PID_PATH, process.pid.toString(), 'utf-8');
|
|
76
|
-
fsSync.appendFileSync(LOG_PATH, `\n\n๐ง Daemon started at ${new Date().toISOString()} โ PID ${process.pid}\n`);
|
|
77
|
-
let cycles = 0;
|
|
78
|
-
while (true) {
|
|
79
|
-
const didWork = await runDaemonBatch();
|
|
80
|
-
cycles++;
|
|
81
|
-
if (cycles % 20 === 0) {
|
|
82
|
-
log(`๐ Still running. Cycles: ${cycles}`);
|
|
16
|
+
catch {
|
|
17
|
+
log(`โ ๏ธ Stale PID file found. Removing and restarting daemon...`);
|
|
18
|
+
fsSync.unlinkSync(PID_PATH);
|
|
83
19
|
}
|
|
84
|
-
await sleep(didWork ? SLEEP_MS : IDLE_SLEEP_MS);
|
|
85
20
|
}
|
|
21
|
+
log('๐ Starting summarizer daemon in background mode...');
|
|
22
|
+
log(`๐ Logs will be saved to: ${LOG_PATH}`);
|
|
23
|
+
process.env.BACKGROUND_MODE = 'true';
|
|
24
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
25
|
+
const __dirname = path.dirname(__filename);
|
|
26
|
+
const daemonWorkerPath = path.join(__dirname, '../daemon/daemonWorker.js');
|
|
27
|
+
const child = spawn(process.execPath, [daemonWorkerPath], {
|
|
28
|
+
detached: true,
|
|
29
|
+
stdio: ['ignore', 'ignore', 'ignore'],
|
|
30
|
+
});
|
|
31
|
+
child.unref();
|
|
86
32
|
}
|
|
@@ -1,14 +1,26 @@
|
|
|
1
1
|
import fg from 'fast-glob';
|
|
2
|
-
import fs from 'fs';
|
|
3
2
|
import path from 'path';
|
|
4
3
|
import { initSchema } from '../db/schema.js';
|
|
5
4
|
import { indexFile } from '../db/fileIndex.js';
|
|
6
5
|
import { shouldIgnoreFile } from '../utils/shouldIgnoreFiles.js';
|
|
7
6
|
import { detectFileType } from '../utils/detectFileType.js';
|
|
8
|
-
import {
|
|
7
|
+
import { startDaemon } from './DaemonCmd.js';
|
|
9
8
|
import { IGNORED_FOLDER_GLOBS } from '../config/IgnoredPaths.js';
|
|
10
9
|
import { Config } from '../config.js';
|
|
11
|
-
import {
|
|
10
|
+
import { DB_PATH } from '../constants.js';
|
|
11
|
+
import { log } from '../utils/log.js';
|
|
12
|
+
import lockfile from 'proper-lockfile';
|
|
13
|
+
// ๐ง Lock the database to prevent simultaneous access
|
|
14
|
+
async function lockDb() {
|
|
15
|
+
try {
|
|
16
|
+
const lock = await lockfile.lock(DB_PATH); // DB_PATH from constants.ts
|
|
17
|
+
return lock;
|
|
18
|
+
}
|
|
19
|
+
catch (err) {
|
|
20
|
+
log('โ Failed to acquire DB lock: ' + err);
|
|
21
|
+
throw err;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
12
24
|
export async function runIndexCommand(targetDir, options = {}) {
|
|
13
25
|
try {
|
|
14
26
|
initSchema();
|
|
@@ -33,7 +45,7 @@ export async function runIndexCommand(targetDir, options = {}) {
|
|
|
33
45
|
resolvedDir = Config.getIndexDir() || process.cwd();
|
|
34
46
|
Config.setIndexDir(resolvedDir); // persist if not yet saved
|
|
35
47
|
}
|
|
36
|
-
|
|
48
|
+
log(`๐ Indexing files in: ${resolvedDir}`);
|
|
37
49
|
const files = await fg('**/*.*', {
|
|
38
50
|
cwd: resolvedDir,
|
|
39
51
|
ignore: IGNORED_FOLDER_GLOBS,
|
|
@@ -41,6 +53,7 @@ export async function runIndexCommand(targetDir, options = {}) {
|
|
|
41
53
|
});
|
|
42
54
|
const countByExt = {};
|
|
43
55
|
let count = 0;
|
|
56
|
+
const release = await lockDb(); // Lock the DB before starting
|
|
44
57
|
for (const file of files) {
|
|
45
58
|
if (shouldIgnoreFile(file))
|
|
46
59
|
continue;
|
|
@@ -49,22 +62,16 @@ export async function runIndexCommand(targetDir, options = {}) {
|
|
|
49
62
|
indexFile(file, null, type); // Index file without summary
|
|
50
63
|
const ext = path.extname(file);
|
|
51
64
|
countByExt[ext] = (countByExt[ext] || 0) + 1;
|
|
52
|
-
|
|
65
|
+
log(`๐ Indexed: ${path.relative(resolvedDir, file)}`);
|
|
53
66
|
count++;
|
|
54
67
|
}
|
|
55
68
|
catch (err) {
|
|
56
|
-
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
console.log('๐ Indexed files by extension:', countByExt);
|
|
60
|
-
console.log(`โ
Done. Indexed ${count} files.`);
|
|
61
|
-
if (options.detached) {
|
|
62
|
-
if (fs.existsSync(PID_PATH)) {
|
|
63
|
-
console.warn(`โ ๏ธ Daemon already running (PID file found at ${PID_PATH}). Skipping launch.`);
|
|
64
|
-
}
|
|
65
|
-
else {
|
|
66
|
-
console.log('๐ Starting summarizer daemon in background mode...');
|
|
67
|
-
runDaemonScheduler();
|
|
69
|
+
log(`โ ๏ธ Skipped in indexCmd ${file}: ${err instanceof Error ? err.message : err}`);
|
|
68
70
|
}
|
|
69
71
|
}
|
|
72
|
+
log('๐ Indexed files by extension:', JSON.stringify(countByExt, null, 2));
|
|
73
|
+
log(`โ
Done. Indexed ${count} files.`);
|
|
74
|
+
await release(); // Release the DB lock after indexing is done
|
|
75
|
+
// Auto-start daemon if not already running
|
|
76
|
+
startDaemon();
|
|
70
77
|
}
|
|
@@ -1,13 +1,52 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
|
+
import fsp from 'fs/promises';
|
|
2
3
|
import { db } from '../db/client.js';
|
|
3
|
-
import { DB_PATH } from '../constants.js';
|
|
4
|
-
|
|
4
|
+
import { DB_PATH, SCAI_HOME } from '../constants.js';
|
|
5
|
+
import lockfile from 'proper-lockfile';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
function getBackupDir() {
|
|
8
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
9
|
+
return path.join(SCAI_HOME, `backup-${timestamp}`);
|
|
10
|
+
}
|
|
11
|
+
async function backupScaiFolder() {
|
|
12
|
+
const backupDir = getBackupDir();
|
|
13
|
+
try {
|
|
14
|
+
await fsp.mkdir(backupDir, { recursive: true });
|
|
15
|
+
const files = await fsp.readdir(SCAI_HOME);
|
|
16
|
+
for (const file of files) {
|
|
17
|
+
const srcPath = path.join(SCAI_HOME, file);
|
|
18
|
+
const destPath = path.join(backupDir, file);
|
|
19
|
+
const stat = await fsp.stat(srcPath);
|
|
20
|
+
if (stat.isFile()) {
|
|
21
|
+
await fsp.copyFile(srcPath, destPath);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
console.log(`๐ฆ Backed up .scai folder to: ${backupDir}`);
|
|
25
|
+
return backupDir;
|
|
26
|
+
}
|
|
27
|
+
catch (err) {
|
|
28
|
+
console.warn('โ ๏ธ Failed to back up .scai folder:', err instanceof Error ? err.message : err);
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export async function resetDatabase() {
|
|
33
|
+
console.log('๐ Backing up existing .scai folder...');
|
|
34
|
+
await backupScaiFolder();
|
|
5
35
|
try {
|
|
6
|
-
db.close();
|
|
36
|
+
db.close();
|
|
7
37
|
console.log('๐ Closed SQLite database connection.');
|
|
8
38
|
}
|
|
9
39
|
catch (err) {
|
|
10
|
-
console.warn('โ ๏ธ Could not close database:', err);
|
|
40
|
+
console.warn('โ ๏ธ Could not close database:', err instanceof Error ? err.message : err);
|
|
41
|
+
}
|
|
42
|
+
try {
|
|
43
|
+
const releaseLock = await lockfile.unlock(DB_PATH).catch(() => null);
|
|
44
|
+
if (releaseLock) {
|
|
45
|
+
console.log('๐ Released database lock.');
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
catch (err) {
|
|
49
|
+
console.warn('โ ๏ธ Failed to release database lock:', err instanceof Error ? err.message : err);
|
|
11
50
|
}
|
|
12
51
|
if (fs.existsSync(DB_PATH)) {
|
|
13
52
|
try {
|
|
@@ -22,5 +61,12 @@ export function resetDatabase() {
|
|
|
22
61
|
else {
|
|
23
62
|
console.log('โน๏ธ No existing database found at:', DB_PATH);
|
|
24
63
|
}
|
|
64
|
+
try {
|
|
65
|
+
fs.mkdirSync(path.dirname(DB_PATH), { recursive: true });
|
|
66
|
+
console.log('๐ Ensured that the database directory exists.');
|
|
67
|
+
}
|
|
68
|
+
catch (err) {
|
|
69
|
+
console.warn('โ ๏ธ Could not ensure DB directory exists:', err instanceof Error ? err.message : err);
|
|
70
|
+
}
|
|
25
71
|
console.log('โ
Database has been reset. You can now re-run: scai index');
|
|
26
72
|
}
|
|
@@ -9,15 +9,26 @@ export async function runStopDaemonCommand() {
|
|
|
9
9
|
}
|
|
10
10
|
const pid = parseInt(fs.readFileSync(PID_PATH, 'utf-8'), 10);
|
|
11
11
|
if (isNaN(pid)) {
|
|
12
|
-
console.error('โ ๏ธ Invalid PID file.');
|
|
12
|
+
console.error('โ ๏ธ Invalid PID file. Removing it...');
|
|
13
|
+
fs.unlinkSync(PID_PATH);
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
try {
|
|
17
|
+
// Check if process exists
|
|
18
|
+
process.kill(pid, 0);
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
console.warn(`โ ๏ธ No running process with PID ${pid}. Removing stale PID file.`);
|
|
22
|
+
fs.unlinkSync(PID_PATH);
|
|
13
23
|
return;
|
|
14
24
|
}
|
|
15
25
|
try {
|
|
16
|
-
process
|
|
26
|
+
// Attempt to terminate the process
|
|
27
|
+
process.kill(pid, 'SIGTERM');
|
|
17
28
|
fs.unlinkSync(PID_PATH);
|
|
18
29
|
console.log(`โ
Daemon process ${pid} stopped.`);
|
|
19
30
|
}
|
|
20
31
|
catch (err) {
|
|
21
|
-
console.error(`โ Failed to stop process ${pid}:`, err instanceof Error ? err.message : err);
|
|
32
|
+
console.error(`โ Failed to stop daemon process ${pid}:`, err instanceof Error ? err.message : err);
|
|
22
33
|
}
|
|
23
34
|
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
// src/config/IgnoredExtensions.ts
|
|
2
1
|
export const IGNORED_EXTENSIONS = [
|
|
3
2
|
// ๐ผ Media
|
|
4
3
|
'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg', '.ico',
|
|
@@ -43,4 +42,11 @@ export const IGNORED_EXTENSIONS = [
|
|
|
43
42
|
'.sap', '.sappkg', '.qbw', '.qbb',
|
|
44
43
|
// ๐ Lock files (but NOT lock *configs*)
|
|
45
44
|
'.lck', '.lockfile', '.db-lock', '.pid', '.socket',
|
|
45
|
+
// โ ๏ธ Added file types that are only kept by exception
|
|
46
|
+
'.xml', // Only specific XML files like pom.xml should be kept
|
|
47
|
+
'.json', // Kept only if exact filename is in exceptions
|
|
48
|
+
'.yaml', // Kept only if filename is explicitly whitelisted
|
|
49
|
+
'.yml',
|
|
50
|
+
'.md',
|
|
51
|
+
'.txt',
|
|
46
52
|
];
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* These common words are ignored from search queries
|
|
3
|
+
* to reduce noise and improve FTS and embedding match quality.
|
|
4
|
+
*/
|
|
5
|
+
export const STOP_WORDS = new Set([
|
|
6
|
+
// Articles & conjunctions
|
|
7
|
+
'a', 'an', 'and', 'but', 'or', 'nor',
|
|
8
|
+
// Prepositions
|
|
9
|
+
'at', 'by', 'for', 'from', 'in', 'into', 'of', 'on', 'to', 'with', 'about', 'above', 'below', 'under', 'over', 'through',
|
|
10
|
+
// Pronouns
|
|
11
|
+
'i', 'me', 'my', 'mine', 'you', 'your', 'yours', 'he', 'him', 'his', 'she', 'her', 'hers', 'it', 'its',
|
|
12
|
+
'we', 'us', 'our', 'ours', 'they', 'them', 'their', 'theirs',
|
|
13
|
+
// Determiners
|
|
14
|
+
'this', 'that', 'these', 'those', 'some', 'any', 'each', 'every', 'either', 'neither',
|
|
15
|
+
// Auxiliary and modal verbs
|
|
16
|
+
'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
|
17
|
+
'do', 'does', 'did',
|
|
18
|
+
'have', 'has', 'had',
|
|
19
|
+
'can', 'could', 'shall', 'should', 'will', 'would', 'may', 'might', 'must',
|
|
20
|
+
// Wh-words and generic question words
|
|
21
|
+
'what', 'which', 'who', 'whom', 'whose', 'where', 'when', 'why', 'how',
|
|
22
|
+
// Misc common functional words
|
|
23
|
+
'as', 'if', 'than', 'then', 'there', 'because', 'so', 'just', 'only', 'not', 'no',
|
|
24
|
+
// Very common verbs to strip from noisy queries
|
|
25
|
+
'use', 'get', 'make', 'need', 'want', 'let', 'help', 'work', 'see', 'look', 'like', 'know',
|
|
26
|
+
// Other frequent noise
|
|
27
|
+
'all', 'more', 'most', 'many', 'much', 'such', 'also', 'again'
|
|
28
|
+
]);
|
package/dist/constants.js
CHANGED
|
@@ -21,6 +21,11 @@ export const PID_PATH = path.join(SCAI_HOME, 'daemon.pid');
|
|
|
21
21
|
* ~/.scai/config.json
|
|
22
22
|
*/
|
|
23
23
|
export const CONFIG_PATH = path.join(SCAI_HOME, 'config.json');
|
|
24
|
+
/**
|
|
25
|
+
* Path to the daemon log file:
|
|
26
|
+
* ~/.scai/daemon.log
|
|
27
|
+
*/
|
|
28
|
+
export const LOG_PATH = path.join(SCAI_HOME, 'daemon.log');
|
|
24
29
|
/**
|
|
25
30
|
* Get the active index directory.
|
|
26
31
|
*
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
2
|
+
import { db } from '../db/client.js';
|
|
3
|
+
import fs from 'fs/promises';
|
|
4
|
+
import fsSync from 'fs';
|
|
5
|
+
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
6
|
+
import { DB_PATH } from '../constants.js';
|
|
7
|
+
import { log } from '../utils/log.js';
|
|
8
|
+
import lockfile from 'proper-lockfile';
|
|
9
|
+
import { shouldIgnoreFile } from '../utils/shouldIgnoreFiles.js';
|
|
10
|
+
const MAX_FILES_PER_BATCH = 5;
|
|
11
|
+
async function lockDb() {
|
|
12
|
+
try {
|
|
13
|
+
return await lockfile.lock(DB_PATH);
|
|
14
|
+
}
|
|
15
|
+
catch (err) {
|
|
16
|
+
log('โ Failed to acquire DB lock: ' + err);
|
|
17
|
+
throw err;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export async function runDaemonBatch() {
|
|
21
|
+
const rows = db.prepare(`
|
|
22
|
+
SELECT path, type FROM files
|
|
23
|
+
WHERE summary IS NULL OR summary = ''
|
|
24
|
+
ORDER BY last_modified DESC
|
|
25
|
+
LIMIT ?
|
|
26
|
+
`).all(MAX_FILES_PER_BATCH);
|
|
27
|
+
if (rows.length === 0) {
|
|
28
|
+
log('โ
No files left to summarize.');
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
const release = await lockDb();
|
|
32
|
+
for (const row of rows) {
|
|
33
|
+
if (!fsSync.existsSync(row.path)) {
|
|
34
|
+
log(`โ ๏ธ Skipped missing file: ${row.path}`);
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
if (shouldIgnoreFile(row.path)) {
|
|
38
|
+
log(`โ ๏ธ Skipped (extension): ${row.path}`);
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
try {
|
|
42
|
+
const content = await fs.readFile(row.path, 'utf-8');
|
|
43
|
+
const result = await summaryModule.run({ content, filepath: row.path });
|
|
44
|
+
const summary = result?.summary?.trim() || null;
|
|
45
|
+
let embedding = null;
|
|
46
|
+
if (summary) {
|
|
47
|
+
const vector = await generateEmbedding(summary);
|
|
48
|
+
if (vector)
|
|
49
|
+
embedding = JSON.stringify(vector);
|
|
50
|
+
}
|
|
51
|
+
db.prepare(`
|
|
52
|
+
UPDATE files
|
|
53
|
+
SET summary = @summary, embedding = @embedding, indexed_at = datetime('now')
|
|
54
|
+
WHERE path = @path
|
|
55
|
+
`).run({ summary, embedding, path: row.path });
|
|
56
|
+
log(`๐ Summarized: ${row.path}`);
|
|
57
|
+
}
|
|
58
|
+
catch (err) {
|
|
59
|
+
log(`โ Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}`);
|
|
60
|
+
}
|
|
61
|
+
await new Promise(resolve => setTimeout(resolve, 200));
|
|
62
|
+
}
|
|
63
|
+
await release();
|
|
64
|
+
return true;
|
|
65
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import fsSync from 'fs';
|
|
2
|
+
import { LOG_PATH, PID_PATH, SCAI_HOME } from '../constants.js';
|
|
3
|
+
import { log } from '../utils/log.js';
|
|
4
|
+
import { runDaemonBatch } from '../daemon/daemonBatch.js'; // โ
now from utils
|
|
5
|
+
const SLEEP_MS = 30 * 1000;
|
|
6
|
+
const IDLE_SLEEP_MS = 4 * SLEEP_MS;
|
|
7
|
+
function sleep(ms) {
|
|
8
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
9
|
+
}
|
|
10
|
+
async function runDaemonScheduler() {
|
|
11
|
+
fsSync.mkdirSync(SCAI_HOME, { recursive: true });
|
|
12
|
+
fsSync.writeFileSync(PID_PATH, process.pid.toString(), 'utf-8');
|
|
13
|
+
fsSync.appendFileSync(LOG_PATH, `\n\n๐ง Daemon started at ${new Date().toISOString()} โ PID ${process.pid}\n`);
|
|
14
|
+
let cycles = 0;
|
|
15
|
+
while (true) {
|
|
16
|
+
const didWork = await runDaemonBatch();
|
|
17
|
+
cycles++;
|
|
18
|
+
if (cycles % 20 === 0) {
|
|
19
|
+
log(`๐ Still running. Cycles: ${cycles}`);
|
|
20
|
+
}
|
|
21
|
+
await sleep(didWork ? SLEEP_MS : IDLE_SLEEP_MS);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
runDaemonScheduler().catch(err => {
|
|
25
|
+
log(`โ Daemon crashed: ${err instanceof Error ? err.message : String(err)}`);
|
|
26
|
+
process.exit(1);
|
|
27
|
+
});
|
package/dist/db/client.js
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import Database from 'better-sqlite3';
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import { DB_PATH, SCAI_HOME } from '../constants.js';
|
|
4
|
+
// Ensure the directory exists
|
|
4
5
|
fs.mkdirSync(SCAI_HOME, { recursive: true });
|
|
6
|
+
// Open the database connection
|
|
5
7
|
export const db = new Database(DB_PATH);
|
|
8
|
+
// Set journal_mode to WAL for better concurrency
|
|
9
|
+
db.pragma('journal_mode = WAL');
|
package/dist/db/fileIndex.js
CHANGED
|
@@ -1,18 +1,15 @@
|
|
|
1
1
|
import { db } from './client.js';
|
|
2
2
|
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
3
4
|
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
4
5
|
import * as sqlTemplates from './sqlTemplates.js';
|
|
5
|
-
import
|
|
6
|
+
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
|
|
6
7
|
/**
|
|
7
8
|
* Index a file into the local SQLite database.
|
|
8
9
|
*
|
|
9
10
|
* - Normalizes the file path for cross-platform compatibility.
|
|
10
11
|
* - Extracts file metadata (last modified time).
|
|
11
12
|
* - Performs an UPSERT into the `files` table with the latest summary/type/timestamp.
|
|
12
|
-
*
|
|
13
|
-
* @param filePath - Absolute path to the file being indexed
|
|
14
|
-
* @param summary - Optional summary of the file content
|
|
15
|
-
* @param type - File type or extension (e.g., 'md', 'ts')
|
|
16
13
|
*/
|
|
17
14
|
export function indexFile(filePath, summary, type) {
|
|
18
15
|
const stats = fs.statSync(filePath);
|
|
@@ -25,18 +22,12 @@ export function indexFile(filePath, summary, type) {
|
|
|
25
22
|
type,
|
|
26
23
|
lastModified,
|
|
27
24
|
indexedAt,
|
|
25
|
+
embedding: null
|
|
28
26
|
});
|
|
29
27
|
console.log(`๐ Indexed: ${normalizedPath}`);
|
|
30
28
|
}
|
|
31
29
|
/**
|
|
32
30
|
* Perform a raw keyword-based full-text search using the FTS5 index.
|
|
33
|
-
*
|
|
34
|
-
* - Tokenizes and sanitizes the input query string.
|
|
35
|
-
* - Performs a ranked search using BM25 scoring via the virtual FTS table.
|
|
36
|
-
* - Returns basic file metadata along with rank for ordering.
|
|
37
|
-
*
|
|
38
|
-
* @param query - The search query string (e.g., "api router config")
|
|
39
|
-
* @param limit - Max number of results to return (default: 10)
|
|
40
31
|
*/
|
|
41
32
|
export function queryFiles(query, limit = 10) {
|
|
42
33
|
const safeQuery = query
|
|
@@ -48,50 +39,59 @@ export function queryFiles(query, limit = 10) {
|
|
|
48
39
|
})
|
|
49
40
|
.join(' OR ');
|
|
50
41
|
console.log(`Executing search query: ${safeQuery}`);
|
|
51
|
-
const results = db.prepare(
|
|
52
|
-
|
|
53
|
-
|
|
42
|
+
const results = db.prepare(`
|
|
43
|
+
SELECT f.id, f.path, f.summary, f.type, f.last_modified, f.indexed_at
|
|
44
|
+
FROM files f
|
|
45
|
+
JOIN files_fts fts ON f.id = fts.rowid
|
|
46
|
+
WHERE fts.files_fts MATCH ?
|
|
47
|
+
LIMIT ?
|
|
48
|
+
`).all(safeQuery, limit);
|
|
49
|
+
console.log(`Search returned ${results.length} results.`);
|
|
50
|
+
results.forEach(result => {
|
|
51
|
+
console.log(`๐ Found in FTS search: ${result.path}`);
|
|
54
52
|
});
|
|
55
53
|
return results;
|
|
56
54
|
}
|
|
57
|
-
export function cosineSimilarity(a, b) {
|
|
58
|
-
const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
|
|
59
|
-
const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
|
|
60
|
-
const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
|
|
61
|
-
return dot / (magA * magB);
|
|
62
|
-
}
|
|
63
55
|
/**
|
|
64
56
|
* Perform a hybrid semantic + keyword-based search.
|
|
65
|
-
*
|
|
66
|
-
* - Generates a vector embedding of the query.
|
|
67
|
-
* - Runs an FTS search using BM25 ranking.
|
|
68
|
-
* - Looks up file embeddings from the database and compares using cosine similarity.
|
|
69
|
-
* - Combines similarity score and BM25 rank into a weighted final score.
|
|
70
|
-
*
|
|
71
|
-
* @param query - Natural language search query
|
|
72
|
-
* @param topK - Max number of top-ranked results to return (default: 5)
|
|
73
57
|
*/
|
|
74
58
|
export async function searchFiles(query, topK = 5) {
|
|
59
|
+
console.log(`๐ง Searching for query: "${query}"`);
|
|
75
60
|
const embedding = await generateEmbedding(query);
|
|
76
|
-
if (!embedding)
|
|
61
|
+
if (!embedding) {
|
|
62
|
+
console.log('โ ๏ธ Failed to generate embedding for query');
|
|
77
63
|
return [];
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
64
|
+
}
|
|
65
|
+
const safeQuery = sanitizeQueryForFts(query);
|
|
66
|
+
console.log(`Executing search query in FTS5: ${safeQuery}`);
|
|
67
|
+
const ftsResults = db.prepare(`
|
|
68
|
+
SELECT fts.rowid AS id, f.path, f.summary, f.type, bm25(files_fts) AS bm25Score
|
|
69
|
+
FROM files f
|
|
70
|
+
JOIN files_fts fts ON f.id = fts.rowid
|
|
71
|
+
WHERE fts.files_fts MATCH ?
|
|
72
|
+
AND f.embedding IS NOT NULL
|
|
73
|
+
ORDER BY bm25Score DESC
|
|
74
|
+
LIMIT ?
|
|
75
|
+
`).all(safeQuery, topK);
|
|
76
|
+
console.log(`FTS search returned ${ftsResults.length} results`);
|
|
77
|
+
if (ftsResults.length === 0) {
|
|
78
|
+
console.log('โ ๏ธ No results found from FTS search');
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
ftsResults.forEach(result => {
|
|
82
|
+
console.log(`๐ FTS found: ${result.path}`);
|
|
83
|
+
});
|
|
88
84
|
const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
|
|
89
85
|
const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
|
|
90
86
|
const scored = ftsResults.map(result => {
|
|
91
87
|
try {
|
|
92
|
-
const embResult = db.prepare(sqlTemplates.fetchEmbeddingTemplate).get({
|
|
93
|
-
|
|
88
|
+
const embResult = db.prepare(sqlTemplates.fetchEmbeddingTemplate).get({
|
|
89
|
+
path: result.path,
|
|
90
|
+
});
|
|
91
|
+
if (!embResult || typeof embResult.embedding !== 'string') {
|
|
92
|
+
console.log(`โ ๏ธ No embedding for file: ${result.path}`);
|
|
94
93
|
return null;
|
|
94
|
+
}
|
|
95
95
|
const vector = JSON.parse(embResult.embedding);
|
|
96
96
|
const sim = cosineSimilarity(embedding, vector);
|
|
97
97
|
const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
|
|
@@ -101,15 +101,23 @@ export async function searchFiles(query, topK = 5) {
|
|
|
101
101
|
summary: result.summary,
|
|
102
102
|
score: finalScore,
|
|
103
103
|
sim,
|
|
104
|
-
bm25: normalizedBm25
|
|
104
|
+
bm25: normalizedBm25,
|
|
105
105
|
};
|
|
106
106
|
}
|
|
107
107
|
catch (err) {
|
|
108
|
-
console.error(
|
|
108
|
+
console.error(`โ Error processing embedding for file: ${result.path}`, err);
|
|
109
109
|
return null;
|
|
110
110
|
}
|
|
111
|
-
})
|
|
111
|
+
})
|
|
112
|
+
.filter((r) => r !== null)
|
|
112
113
|
.sort((a, b) => b.score - a.score)
|
|
113
114
|
.slice(0, topK);
|
|
115
|
+
console.log(`Returning top ${topK} results based on combined score`);
|
|
114
116
|
return scored;
|
|
115
117
|
}
|
|
118
|
+
function cosineSimilarity(a, b) {
|
|
119
|
+
const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
|
|
120
|
+
const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
|
|
121
|
+
const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
|
|
122
|
+
return dot / (magA * magB);
|
|
123
|
+
}
|
package/dist/db/schema.js
CHANGED
|
@@ -1,19 +1,35 @@
|
|
|
1
1
|
import { db } from "./client.js";
|
|
2
2
|
export function initSchema() {
|
|
3
3
|
db.exec(`
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
);
|
|
4
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
5
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
6
|
+
path TEXT UNIQUE,
|
|
7
|
+
summary TEXT,
|
|
8
|
+
type TEXT,
|
|
9
|
+
indexed_at TEXT,
|
|
10
|
+
last_modified TEXT,
|
|
11
|
+
embedding TEXT
|
|
12
|
+
);
|
|
13
13
|
|
|
14
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
|
|
15
|
-
USING fts5(path, summary, content='files', content_rowid='id');
|
|
14
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
|
|
15
|
+
USING fts5(path, summary, content='files', content_rowid='id');
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
-- FTS Triggers to keep files_fts in sync
|
|
18
|
+
CREATE TRIGGER IF NOT EXISTS files_ai AFTER INSERT ON files BEGIN
|
|
19
|
+
INSERT INTO files_fts(rowid, path, summary)
|
|
20
|
+
VALUES (new.id, new.path, new.summary);
|
|
21
|
+
END;
|
|
22
|
+
|
|
23
|
+
CREATE TRIGGER IF NOT EXISTS files_au AFTER UPDATE ON files BEGIN
|
|
24
|
+
UPDATE files_fts SET
|
|
25
|
+
path = new.path,
|
|
26
|
+
summary = new.summary
|
|
27
|
+
WHERE rowid = new.id;
|
|
28
|
+
END;
|
|
29
|
+
|
|
30
|
+
CREATE TRIGGER IF NOT EXISTS files_ad AFTER DELETE ON files BEGIN
|
|
31
|
+
DELETE FROM files_fts WHERE rowid = old.id;
|
|
32
|
+
END;
|
|
33
|
+
`);
|
|
34
|
+
console.log('โ
SQLite schema initialized with FTS5 triggers');
|
|
19
35
|
}
|
package/dist/db/sqlTemplates.js
CHANGED
|
@@ -1,12 +1,21 @@
|
|
|
1
1
|
// Upsert file metadata into `files`
|
|
2
2
|
export const upsertFileTemplate = `
|
|
3
|
-
INSERT INTO files (path, summary, type, last_modified, indexed_at)
|
|
4
|
-
VALUES (:path, :summary, :type, :lastModified, :indexedAt)
|
|
3
|
+
INSERT INTO files (path, summary, type, last_modified, indexed_at, embedding)
|
|
4
|
+
VALUES (:path, :summary, :type, :lastModified, :indexedAt, :embedding)
|
|
5
5
|
ON CONFLICT(path) DO UPDATE SET
|
|
6
|
-
summary =
|
|
6
|
+
summary = CASE
|
|
7
|
+
WHEN excluded.summary IS NOT NULL AND excluded.summary != files.summary
|
|
8
|
+
THEN excluded.summary
|
|
9
|
+
ELSE files.summary
|
|
10
|
+
END,
|
|
7
11
|
type = excluded.type,
|
|
8
12
|
last_modified = excluded.last_modified,
|
|
9
|
-
indexed_at = excluded.indexed_at
|
|
13
|
+
indexed_at = excluded.indexed_at,
|
|
14
|
+
embedding = CASE
|
|
15
|
+
WHEN excluded.embedding IS NOT NULL AND excluded.embedding != files.embedding
|
|
16
|
+
THEN excluded.embedding
|
|
17
|
+
ELSE files.embedding
|
|
18
|
+
END
|
|
10
19
|
`;
|
|
11
20
|
// Fetch search results with BM25 ranking
|
|
12
21
|
export const fetchBm25ScoresTemplate = `
|
package/dist/index.js
CHANGED
|
@@ -5,9 +5,6 @@ import { Config } from './config.js';
|
|
|
5
5
|
import { createRequire } from 'module';
|
|
6
6
|
const require = createRequire(import.meta.url);
|
|
7
7
|
const { version } = require('../package.json');
|
|
8
|
-
// ๐ง Commands
|
|
9
|
-
import { checkEnv } from "./commands/EnvCmd.js";
|
|
10
|
-
import { checkGit } from "./commands/GitCmd.js";
|
|
11
8
|
import { suggestCommitMessage } from "./commands/CommitSuggesterCmd.js";
|
|
12
9
|
import { handleRefactor } from "./commands/RefactorCmd.js";
|
|
13
10
|
import { generateTests } from "./commands/TestGenCmd.js";
|
|
@@ -18,7 +15,7 @@ import { runModulePipelineFromCLI } from './commands/ModulePipelineCmd.js';
|
|
|
18
15
|
import { runIndexCommand } from './commands/IndexCmd.js';
|
|
19
16
|
import { resetDatabase } from './commands/ResetDbCmd.js';
|
|
20
17
|
import { runQueryCommand } from './commands/QueryCmd.js';
|
|
21
|
-
import {
|
|
18
|
+
import { startDaemon } from './commands/DaemonCmd.js';
|
|
22
19
|
import { runStopDaemonCommand } from "./commands/StopDaemonCmd.js";
|
|
23
20
|
import { runAskCommand } from './commands/AskCmd.js';
|
|
24
21
|
// ๐๏ธ CLI Setup
|
|
@@ -34,12 +31,14 @@ cmd
|
|
|
34
31
|
await bootstrap();
|
|
35
32
|
console.log('โ
Model initialization completed!');
|
|
36
33
|
});
|
|
34
|
+
cmd
|
|
35
|
+
.command('sugg')
|
|
36
|
+
.description('Suggest a commit message from staged changes')
|
|
37
|
+
.option('-c, --commit', 'Automatically commit with suggested message')
|
|
38
|
+
.action(suggestCommitMessage);
|
|
37
39
|
// ๐ง Group: Git-related commands
|
|
38
40
|
const git = cmd.command('git').description('Git utilities');
|
|
39
|
-
git
|
|
40
|
-
.command('status')
|
|
41
|
-
.description('Check Git status')
|
|
42
|
-
.action(checkGit);
|
|
41
|
+
// The sugg command under the 'git' group
|
|
43
42
|
git
|
|
44
43
|
.command('sugg')
|
|
45
44
|
.description('Suggest a commit message from staged changes')
|
|
@@ -66,15 +65,6 @@ gen
|
|
|
66
65
|
.command('tests <file>')
|
|
67
66
|
.description('Generate a Jest test file for the specified JS/TS module')
|
|
68
67
|
.action((file) => generateTests(file));
|
|
69
|
-
// ๐ Indexing
|
|
70
|
-
cmd
|
|
71
|
-
.command('index [targetDir]')
|
|
72
|
-
.description('Index supported files in the given directory (or current folder if none)')
|
|
73
|
-
.option('-d, --detached', 'Run summarizer daemon after indexing')
|
|
74
|
-
.option('--force', 'Force indexing even if another folder has already been indexed')
|
|
75
|
-
.action((targetDir, options) => {
|
|
76
|
-
runIndexCommand(targetDir, { detached: options.detached, force: options.force });
|
|
77
|
-
});
|
|
78
68
|
// โ๏ธ Group: Configuration settings
|
|
79
69
|
const set = cmd.command('set').description('Set configuration values');
|
|
80
70
|
set
|
|
@@ -99,35 +89,46 @@ set
|
|
|
99
89
|
Config.show();
|
|
100
90
|
});
|
|
101
91
|
// ๐งช Diagnostics and info
|
|
102
|
-
cmd
|
|
103
|
-
.command('env')
|
|
104
|
-
.description('Check environment variables')
|
|
105
|
-
.action(checkEnv);
|
|
106
92
|
cmd
|
|
107
93
|
.command('config')
|
|
108
94
|
.description('Show the currently active model and language settings')
|
|
109
95
|
.action(() => {
|
|
110
96
|
Config.show();
|
|
111
97
|
});
|
|
98
|
+
// Add explanation about alpha features directly in the help menu
|
|
99
|
+
cmd.addHelpText('after', `
|
|
100
|
+
๐จ Alpha Features:
|
|
101
|
+
- The "index", "daemon", "stop-daemon", "reset-db" commands are considered alpha features.
|
|
102
|
+
- These commands are in active development and may change in the future.
|
|
103
|
+
|
|
104
|
+
๐ก Use with caution and expect possible changes or instability.
|
|
105
|
+
`);
|
|
106
|
+
// ๐ Indexing
|
|
107
|
+
cmd
|
|
108
|
+
.command('index [targetDir]')
|
|
109
|
+
.description('Index supported files in the given directory (or current folder if none)')
|
|
110
|
+
.option('--force', 'Force indexing even if another folder has already been indexed')
|
|
111
|
+
.action((targetDir, options) => {
|
|
112
|
+
runIndexCommand(targetDir, { force: options.force });
|
|
113
|
+
});
|
|
112
114
|
// ๐ง Query and assistant
|
|
113
115
|
cmd
|
|
114
116
|
.command('query <query>')
|
|
115
117
|
.description('Search indexed files by keyword')
|
|
116
118
|
.action(runQueryCommand);
|
|
117
119
|
cmd
|
|
118
|
-
.command('ask')
|
|
119
|
-
.description('Ask a question
|
|
120
|
-
.
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
runAskCommand(q);
|
|
120
|
+
.command('ask [question...]') // <- the ... makes it variadic
|
|
121
|
+
.description('Ask a question based on indexed files')
|
|
122
|
+
.action((questionParts) => {
|
|
123
|
+
const fullQuery = questionParts?.join(' ');
|
|
124
|
+
runAskCommand(fullQuery);
|
|
124
125
|
});
|
|
125
126
|
// ๐ ๏ธ Background tasks and maintenance
|
|
126
127
|
cmd
|
|
127
128
|
.command('daemon')
|
|
128
129
|
.description('Run background summarization of indexed files')
|
|
129
130
|
.action(async () => {
|
|
130
|
-
await
|
|
131
|
+
await startDaemon(); // ignore the return value
|
|
131
132
|
});
|
|
132
133
|
cmd
|
|
133
134
|
.command('stop-daemon')
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import { LOG_PATH } from '../constants.js';
|
|
3
|
+
export function log(...args) {
|
|
4
|
+
const timestamp = new Date().toISOString();
|
|
5
|
+
const message = args.map(arg => typeof arg === 'string' ? arg : JSON.stringify(arg, null, 2)).join(' ');
|
|
6
|
+
const isBackground = process.env.BACKGROUND_MODE === 'true';
|
|
7
|
+
if (isBackground) {
|
|
8
|
+
// If running in background, log to a file
|
|
9
|
+
fs.appendFileSync(LOG_PATH, `[${timestamp}] ${message}\n`);
|
|
10
|
+
}
|
|
11
|
+
else {
|
|
12
|
+
// Otherwise, log to the console
|
|
13
|
+
console.log(`[${timestamp}] ${message}`);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import os from 'os';
|
|
4
|
+
import { IGNORED_EXTENSIONS } from '../config/IgnoredExtensions.js';
|
|
5
|
+
import { specificFileExceptions } from './specificFileExceptions.js';
|
|
6
|
+
// THIS FILE IS MEANT TO BE RUN AS A NODE JS SCRIPT. node dist/src/utilsremoveIgnoredFiles.js
|
|
7
|
+
// It removes wrongly indexed files that don't add value to the model.
|
|
8
|
+
const DB_PATH = path.join(os.homedir(), '.scai', 'db.sqlite');
|
|
9
|
+
const db = new Database(DB_PATH);
|
|
10
|
+
console.log('๐งน Removing files with ignored extensions from the database...');
|
|
11
|
+
// === Remove Files with Ignored Extensions, Excluding Specific Exceptions ===
|
|
12
|
+
IGNORED_EXTENSIONS.forEach(ext => {
|
|
13
|
+
try {
|
|
14
|
+
const filesToDelete = db.prepare(`
|
|
15
|
+
SELECT path FROM files WHERE path LIKE ?
|
|
16
|
+
`).all(`%${ext}`);
|
|
17
|
+
;
|
|
18
|
+
let deletedCount = 0;
|
|
19
|
+
filesToDelete.forEach(file => {
|
|
20
|
+
// Check if the file is in the exception list
|
|
21
|
+
if (!specificFileExceptions.includes(file.path)) {
|
|
22
|
+
// Delete the file from the database
|
|
23
|
+
const deleted = db.prepare(`DELETE FROM files WHERE path = ?`).run(file.path);
|
|
24
|
+
if (deleted.changes > 0) {
|
|
25
|
+
deletedCount++;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
console.log(`โ ๏ธ Skipped file (exception): ${file.path}`);
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
if (deletedCount > 0) {
|
|
33
|
+
console.log(`โ
Removed ${deletedCount} files with extension: ${ext}`);
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
console.log(`โ ๏ธ No deletions for files with extension: ${ext}`);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
catch (err) {
|
|
40
|
+
console.error("โ Failed to remove files with extension ${ext}:", err instanceof Error ? err.message : err);
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
console.log('๐งน Finished removing ignored files.');
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
// src/utils/sanitizeQuery.ts
|
|
2
|
+
import { STOP_WORDS } from '../config/StopWords.js';
|
|
3
|
+
export function sanitizeQueryForFts(input) {
|
|
4
|
+
const tokens = input
|
|
5
|
+
.trim()
|
|
6
|
+
.split(/\s+/)
|
|
7
|
+
.map(token => token.toLowerCase())
|
|
8
|
+
.filter(token => token.length > 2 &&
|
|
9
|
+
!STOP_WORDS.has(token) &&
|
|
10
|
+
/^[a-z0-9]+$/.test(token))
|
|
11
|
+
.map(token => token.replace(/[?*\\"]/g, '').replace(/'/g, "''") + '*');
|
|
12
|
+
// ๐ Prevent FTS syntax errors by returning a catch-all query
|
|
13
|
+
if (tokens.length === 0)
|
|
14
|
+
return '*';
|
|
15
|
+
return tokens.join(' OR ');
|
|
16
|
+
}
|
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
import path from 'path';
|
|
2
2
|
import { IGNORED_EXTENSIONS } from '../config/IgnoredExtensions.js';
|
|
3
|
+
import { specificFileExceptions } from '../utils/specificFileExceptions.js';
|
|
3
4
|
export function shouldIgnoreFile(filePath) {
|
|
5
|
+
// Get file extension
|
|
4
6
|
const ext = path.extname(filePath).toLowerCase();
|
|
7
|
+
// Check if the file is explicitly listed in the exceptions
|
|
8
|
+
const fileName = path.basename(filePath);
|
|
9
|
+
if (specificFileExceptions.includes(fileName)) {
|
|
10
|
+
return false; // Don't ignore if it's in the exceptions list
|
|
11
|
+
}
|
|
12
|
+
// If not in exceptions, check against ignored extensions
|
|
5
13
|
return IGNORED_EXTENSIONS.includes(ext);
|
|
6
14
|
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
export const specificFileExceptions = [
|
|
2
|
+
// ๐งโ๐ป Project Configuration Files
|
|
3
|
+
'package.json', // Keep package.json for NPM/Yarn dependency management
|
|
4
|
+
'package-lock.json', // Keep package-lock.json for npm lockfile
|
|
5
|
+
'yarn.lock', // Keep yarn.lock for Yarn dependency lockfile
|
|
6
|
+
'pnpm-lock.yaml', // Keep pnpm-lock.yaml for pnpm lockfile
|
|
7
|
+
'tsconfig.json', // Keep TypeScript configuration file
|
|
8
|
+
'tsconfig.build.json', // Keep build-specific tsconfig file
|
|
9
|
+
'tsconfig.prod.json', // Keep production-specific tsconfig file
|
|
10
|
+
'tsconfig.dev.json', // Keep development-specific tsconfig file
|
|
11
|
+
'jsconfig.json', // Keep jsconfig.json for JavaScript projects
|
|
12
|
+
'eslint.json', // Keep eslint configuration
|
|
13
|
+
'eslint.config.js', // Keep eslint config file
|
|
14
|
+
'babel.config.js', // Keep Babel configuration
|
|
15
|
+
'webpack.config.js', // Keep Webpack configuration
|
|
16
|
+
'webpack.dev.config.js', // Keep development-specific Webpack config
|
|
17
|
+
'webpack.prod.config.js', // Keep production-specific Webpack config
|
|
18
|
+
'rollup.config.js', // Keep Rollup configuration file
|
|
19
|
+
'gulpfile.js', // Keep Gulp task runner file
|
|
20
|
+
'Makefile', // Keep Makefile for project builds
|
|
21
|
+
// ๐งช Docker & CI/CD
|
|
22
|
+
'Dockerfile', // Keep Dockerfile
|
|
23
|
+
'Dockerfile.dev', // Keep Dockerfile for development
|
|
24
|
+
'docker-compose.yaml', // Keep docker-compose.yaml for container orchestration
|
|
25
|
+
'docker-compose.yml', // Keep docker-compose.yml (common variation)
|
|
26
|
+
'ci.yml', // Keep CI configuration file (e.g., GitHub Actions)
|
|
27
|
+
'gitlab-ci.yml', // Keep GitLab CI configuration file
|
|
28
|
+
'Jenkinsfile', // Keep Jenkins pipeline file
|
|
29
|
+
'circleci/config.yml', // Keep CircleCI configuration file
|
|
30
|
+
// ๐ Documentation and Readme Files
|
|
31
|
+
'README.md', // Keep README file for project documentation
|
|
32
|
+
'README.rst', // Keep README in reStructuredText format
|
|
33
|
+
'CONTRIBUTING.md', // Keep contributing guidelines
|
|
34
|
+
'CHANGELOG.md', // Keep changelog for tracking project history
|
|
35
|
+
'LICENSE', // Keep project license
|
|
36
|
+
'LICENSE.txt', // Keep license in text format
|
|
37
|
+
'LICENSE.md', // Keep license in markdown format
|
|
38
|
+
'NOTICE.txt', // Keep NOTICE file
|
|
39
|
+
'INSTALL.md', // Keep installation instructions
|
|
40
|
+
// ๐ ๏ธ Build and Deployment Configuration Files
|
|
41
|
+
'build.gradle', // Keep Gradle build file
|
|
42
|
+
'pom.xml', // Keep Maven Project Object Model (POM) file
|
|
43
|
+
'settings.gradle', // Keep Gradle settings file
|
|
44
|
+
'build.sh', // Keep shell script for building the project
|
|
45
|
+
'build.bash', // Keep bash build script
|
|
46
|
+
'deploy.sh', // Keep shell script for deployment
|
|
47
|
+
'ci.sh', // Keep shell script for CI
|
|
48
|
+
// ๐ง Other Project Files
|
|
49
|
+
'Makefile.am', // Keep Automake Makefile
|
|
50
|
+
'config.yaml', // Keep general config file in YAML format
|
|
51
|
+
'config.json', // Keep general config file in JSON format
|
|
52
|
+
'config.toml', // Keep TOML configuration file
|
|
53
|
+
'settings.json', // Keep settings configuration file
|
|
54
|
+
'settings.yml', // Keep settings configuration file in YAML format
|
|
55
|
+
'secrets.json', // Keep secrets (make sure they are handled securely)
|
|
56
|
+
// ๐ Web Development & Frontend
|
|
57
|
+
'index.html', // Keep main HTML file
|
|
58
|
+
'index.php', // Keep main PHP file
|
|
59
|
+
'app.js', // Keep main JavaScript entry file
|
|
60
|
+
'app.ts', // Keep main TypeScript entry file
|
|
61
|
+
'styles.css', // Keep main CSS file
|
|
62
|
+
'main.scss', // Keep main SCSS file
|
|
63
|
+
'main.less', // Keep main LESS file
|
|
64
|
+
'style.css', // Keep style CSS
|
|
65
|
+
'app.vue', // Keep Vue.js file
|
|
66
|
+
'index.vue', // Keep Vue.js index file
|
|
67
|
+
// ๐ ๏ธ Miscellaneous Important Files
|
|
68
|
+
'README.txt', // Keep documentation in text format
|
|
69
|
+
'data.json', // Keep data JSON file
|
|
70
|
+
'data.yml', // Keep data YAML file
|
|
71
|
+
'env.json', // Keep environment JSON file
|
|
72
|
+
'env.yml', // Keep environment YAML file
|
|
73
|
+
'.env', // Keep environment variable files
|
|
74
|
+
'.env.local', // Keep local environment variables
|
|
75
|
+
'.env.production', // Keep production environment variables
|
|
76
|
+
'.env.development', // Keep development environment variables
|
|
77
|
+
// ๐ง Test-related files
|
|
78
|
+
'test.config.js', // Keep test config for testing frameworks
|
|
79
|
+
'test-utils.js', // Keep test utility files
|
|
80
|
+
'test.setup.js', // Keep setup files for tests
|
|
81
|
+
'jest.setup.js', // Keep Jest setup files
|
|
82
|
+
'mocha.setup.js', // Keep Mocha setup files
|
|
83
|
+
'karma.conf.js', // Keep Karma configuration for tests
|
|
84
|
+
'cypress.json', // Keep Cypress config for end-to-end testing
|
|
85
|
+
'karma.conf.js', // Keep Karma test runner config
|
|
86
|
+
'tests.js', // Keep test file
|
|
87
|
+
'tests.ts', // Keep TypeScript test file
|
|
88
|
+
'test.js', // Keep JavaScript test file
|
|
89
|
+
'test.ts', // Keep TypeScript test file
|
|
90
|
+
];
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "scai",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.25",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"scai": "./dist/index.js"
|
|
@@ -27,12 +27,14 @@
|
|
|
27
27
|
"better-sqlite3": "^12.1.1",
|
|
28
28
|
"commander": "^11.0.0",
|
|
29
29
|
"fast-glob": "^3.3.3",
|
|
30
|
-
"ora": "^8.2.0"
|
|
30
|
+
"ora": "^8.2.0",
|
|
31
|
+
"proper-lockfile": "^4.1.2"
|
|
31
32
|
},
|
|
32
33
|
"devDependencies": {
|
|
33
34
|
"@types/better-sqlite3": "^7.6.13",
|
|
34
35
|
"@types/jest": "^30.0.0",
|
|
35
36
|
"@types/node": "^24.0.1",
|
|
37
|
+
"@types/proper-lockfile": "^4.1.4",
|
|
36
38
|
"jest": "^30.0.2",
|
|
37
39
|
"ts-jest": "^29.4.0",
|
|
38
40
|
"typescript": "^5.8.3"
|