scai 0.1.23 โ†’ 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,66 +1,63 @@
1
+ import readline from 'readline';
1
2
  import { searchFiles } from "../db/fileIndex.js";
2
3
  import { generate } from "../lib/generate.js";
3
- import { summaryModule } from "../pipeline/modules/summaryModule.js";
4
4
  export async function runAskCommand(query) {
5
5
  if (!query) {
6
- console.error('โŒ Please provide a search query.\n๐Ÿ‘‰ Usage: scai ask "keyword"');
6
+ query = await promptOnce('๐Ÿง  Ask your question:\n> ');
7
+ }
8
+ query = query.trim();
9
+ if (!query) {
10
+ console.error('โŒ No question provided.\n๐Ÿ‘‰ Usage: scai ask "your question"');
7
11
  return;
8
12
  }
9
13
  console.log(`๐Ÿ” Searching for: "${query}"\n`);
10
- // Use vector-based search
11
- const results = await searchFiles(query, 5); // Or 3 if you want fewer
12
- if (results.length === 0) {
13
- console.log('โš ๏ธ No similar embeddings found. Asking the model for context instead...');
14
- }
15
- else {
14
+ const start = Date.now();
15
+ const results = await searchFiles(query, 5);
16
+ const duration = Date.now() - start;
17
+ console.log(`โฑ๏ธ searchFiles took ${duration}ms and returned ${results.length} result(s)`);
18
+ if (results.length > 0) {
16
19
  console.log('๐Ÿ“Š Closest files based on semantic similarity:');
17
- results.forEach(file => {
18
- console.log(`๐Ÿ“„ Path: ${file?.path}`);
20
+ results.forEach((file, i) => {
21
+ console.log(` ${i + 1}. ๐Ÿ“„ Path: ${file?.path} | Score: ${file?.score?.toFixed(3)}`);
19
22
  });
20
23
  }
24
+ else {
25
+ console.log('โš ๏ธ No similar embeddings found. Asking the model for context instead...');
26
+ }
27
+ // ๐Ÿง  Use stored summaries directly
21
28
  let allSummaries = '';
22
29
  for (const file of results) {
23
- try {
24
- if (!file?.summary) {
25
- console.warn(`โš ๏ธ No summary available for file: ${file?.path}`);
26
- continue;
27
- }
28
- console.log(`๐Ÿ“ Using cached summary for file: ${file?.path}`);
29
- const summaryResponse = await summaryModule.run({ content: file?.summary ? file.summary : '', filepath: file?.path });
30
- if (summaryResponse.summary) {
31
- allSummaries += `\n${summaryResponse.summary}`;
32
- }
33
- }
34
- catch (err) {
35
- console.error(`โŒ Error processing file: ${file?.path}`, err instanceof Error ? err.message : err);
30
+ if (!file?.summary) {
31
+ console.warn(`โš ๏ธ No summary available for file: ${file?.path}`);
32
+ continue;
36
33
  }
34
+ console.log(`๐Ÿ“ Using stored summary for: ${file.path}`);
35
+ allSummaries += `\n${file.summary}`;
37
36
  }
38
- if (allSummaries.trim()) {
39
- console.log('๐Ÿง  Summaries found, sending them to the model for synthesis...');
40
- try {
41
- const input = {
42
- content: `${query}\n\n${allSummaries}`,
43
- filepath: '',
44
- };
45
- const modelResponse = await generate(input, 'llama3');
46
- console.log(`\n๐Ÿ“ Model response:\n${modelResponse.content}`);
47
- }
48
- catch (err) {
49
- console.error('โŒ Model request failed:', err);
50
- }
37
+ const input = {
38
+ content: allSummaries ? `${query}\n\n${allSummaries}` : query,
39
+ filepath: '',
40
+ };
41
+ try {
42
+ console.log(allSummaries.trim()
43
+ ? '๐Ÿง  Summaries found, sending them to the model for synthesis...'
44
+ : 'โš ๏ธ No summaries found. Asking the model for context only...');
45
+ const modelResponse = await generate(input, 'llama3');
46
+ console.log(`\n๐Ÿ“ Model response:\n${modelResponse.content}`);
51
47
  }
52
- else {
53
- console.log('โš ๏ธ No summaries found. Asking the model for context only...');
54
- try {
55
- const input = {
56
- content: query,
57
- filepath: '',
58
- };
59
- const modelResponse = await generate(input, 'llama3');
60
- console.log(`\n๐Ÿ“ Model response:\n${modelResponse.content}`);
61
- }
62
- catch (err) {
63
- console.error('โŒ Model request failed:', err);
64
- }
48
+ catch (err) {
49
+ console.error('โŒ Model request failed:', err);
65
50
  }
66
51
  }
52
+ function promptOnce(promptText) {
53
+ return new Promise(resolve => {
54
+ const rl = readline.createInterface({
55
+ input: process.stdin,
56
+ output: process.stdout
57
+ });
58
+ rl.question(promptText, answer => {
59
+ rl.close();
60
+ resolve(answer.trim());
61
+ });
62
+ });
63
+ }
@@ -1,86 +1,28 @@
1
- import { summaryModule } from '../pipeline/modules/summaryModule.js';
2
- import { db } from '../db/client.js';
3
- import fs from 'fs/promises';
4
1
  import fsSync from 'fs';
2
+ import { LOG_PATH, PID_PATH } from '../constants.js';
3
+ import { log } from '../utils/log.js';
4
+ import { spawn } from 'child_process';
5
+ import { fileURLToPath } from 'url';
5
6
  import path from 'path';
6
- import { generateEmbedding } from '../lib/generateEmbedding.js';
7
- import { IGNORED_EXTENSIONS } from '../config/IgnoredExtensions.js';
8
- import { PID_PATH, SCAI_HOME } from '../constants.js';
9
- const LOG_PATH = path.join(SCAI_HOME, 'daemon.log');
10
- const SLEEP_MS = 30 * 1000; // ๐Ÿ’ค Pause between batches
11
- const IDLE_SLEEP_MS = 4 * SLEEP_MS; // ๐Ÿ’ค Longer pause if idle
12
- const MAX_FILES_PER_BATCH = 5; // ๐ŸŽ›๏ธ Throttle indexing per cycle
13
- // ๐Ÿ’ค Utility
14
- function sleep(ms) {
15
- return new Promise(resolve => setTimeout(resolve, ms));
16
- }
17
- // ๐Ÿชต Append to log file
18
- function log(message) {
19
- const timestamp = new Date().toISOString();
20
- fsSync.appendFileSync(LOG_PATH, `[${timestamp}] ${message}\n`);
21
- }
22
- // โŒ Skip unwanted file types
23
- function shouldIgnoreFile(filePath) {
24
- const ext = path.extname(filePath).toLowerCase();
25
- return IGNORED_EXTENSIONS.includes(ext);
26
- }
27
- // ๐Ÿง  One summarization batch
28
- export async function runDaemonBatch() {
29
- const rows = db.prepare(`
30
- SELECT path, type FROM files
31
- WHERE summary IS NULL OR summary = ''
32
- ORDER BY last_modified DESC
33
- LIMIT ?
34
- `).all(MAX_FILES_PER_BATCH);
35
- if (rows.length === 0) {
36
- log('โœ… No files left to summarize.');
37
- return false; // ๐Ÿ’ค Idle
7
+ // ๐Ÿš€ Ensure daemon starts in the background
8
+ export async function startDaemon() {
9
+ if (fsSync.existsSync(PID_PATH)) {
10
+ log(`โš ๏ธ Daemon already running (PID file found at ${PID_PATH}). Skipping launch.`);
38
11
  }
39
- for (const row of rows) {
40
- if (!fsSync.existsSync(row.path)) {
41
- log(`โš ๏ธ Skipped missing file: ${row.path}`);
42
- continue;
43
- }
44
- if (shouldIgnoreFile(row.path)) {
45
- log(`โš ๏ธ Skipped (extension): ${row.path}`);
46
- continue;
47
- }
48
- try {
49
- const content = await fs.readFile(row.path, 'utf-8');
50
- const result = await summaryModule.run({ content, filepath: row.path });
51
- const summary = result?.summary?.trim() || null;
52
- let embedding = null;
53
- if (summary) {
54
- const vector = await generateEmbedding(summary);
55
- if (vector)
56
- embedding = JSON.stringify(vector);
57
- }
58
- db.prepare(`
59
- UPDATE files
60
- SET summary = @summary, embedding = @embedding, indexed_at = datetime('now')
61
- WHERE path = @path
62
- `).run({ summary, embedding, path: row.path });
63
- log(`๐Ÿ“ Summarized: ${row.path}`);
64
- }
65
- catch (err) {
66
- log(`โŒ Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}`);
67
- }
68
- await sleep(200); // ๐Ÿง˜ Micro delay between each file
69
- }
70
- return true; // โœ… Work was done
71
- }
72
- // ๐Ÿš€ Daemon loop: runs until killed
73
- export async function runDaemonScheduler() {
74
- fsSync.mkdirSync(SCAI_HOME, { recursive: true });
75
- fsSync.writeFileSync(PID_PATH, process.pid.toString(), 'utf-8');
76
- fsSync.appendFileSync(LOG_PATH, `\n\n๐Ÿง  Daemon started at ${new Date().toISOString()} โ€” PID ${process.pid}\n`);
77
- let cycles = 0;
78
- while (true) {
79
- const didWork = await runDaemonBatch();
80
- cycles++;
81
- if (cycles % 20 === 0) {
82
- log(`๐ŸŒ€ Still running. Cycles: ${cycles}`);
83
- }
84
- await sleep(didWork ? SLEEP_MS : IDLE_SLEEP_MS);
12
+ else {
13
+ log('๐Ÿš€ Starting summarizer daemon in background mode...');
14
+ log(`๐Ÿ“ Logs will be saved to: ${LOG_PATH}`);
15
+ // Before starting the background process, set the environment variable
16
+ process.env.BACKGROUND_MODE = 'true'; // Set the mode to background
17
+ // Compute absolute path to the background worker (adjust path if needed)
18
+ const __filename = fileURLToPath(import.meta.url);
19
+ const __dirname = path.dirname(__filename);
20
+ const daemonWorkerPath = path.join(__dirname, '../daemon/daemonWorker.js');
21
+ // Spawn the daemonWorker.js file in the background
22
+ const child = spawn(process.execPath, [daemonWorkerPath], {
23
+ detached: true, // Detach the process so it runs independently
24
+ stdio: ['ignore', 'ignore', 'ignore'], // Suppress the output
25
+ });
26
+ child.unref(); // Allow the parent process to exit without waiting for the child
85
27
  }
86
28
  }
@@ -1,14 +1,26 @@
1
1
  import fg from 'fast-glob';
2
- import fs from 'fs';
3
2
  import path from 'path';
4
3
  import { initSchema } from '../db/schema.js';
5
4
  import { indexFile } from '../db/fileIndex.js';
6
5
  import { shouldIgnoreFile } from '../utils/shouldIgnoreFiles.js';
7
6
  import { detectFileType } from '../utils/detectFileType.js';
8
- import { runDaemonScheduler } from './DaemonCmd.js';
7
+ import { startDaemon } from './DaemonCmd.js';
9
8
  import { IGNORED_FOLDER_GLOBS } from '../config/IgnoredPaths.js';
10
9
  import { Config } from '../config.js';
11
- import { PID_PATH } from '../constants.js';
10
+ import { DB_PATH } from '../constants.js';
11
+ import { log } from '../utils/log.js';
12
+ import lockfile from 'proper-lockfile';
13
+ // ๐Ÿง  Lock the database to prevent simultaneous access
14
+ async function lockDb() {
15
+ try {
16
+ const lock = await lockfile.lock(DB_PATH); // DB_PATH from constants.ts
17
+ return lock;
18
+ }
19
+ catch (err) {
20
+ log('โŒ Failed to acquire DB lock: ' + err);
21
+ throw err;
22
+ }
23
+ }
12
24
  export async function runIndexCommand(targetDir, options = {}) {
13
25
  try {
14
26
  initSchema();
@@ -33,7 +45,7 @@ export async function runIndexCommand(targetDir, options = {}) {
33
45
  resolvedDir = Config.getIndexDir() || process.cwd();
34
46
  Config.setIndexDir(resolvedDir); // persist if not yet saved
35
47
  }
36
- console.log(`๐Ÿ“‚ Indexing files in: ${resolvedDir}`);
48
+ log(`๐Ÿ“‚ Indexing files in: ${resolvedDir}`);
37
49
  const files = await fg('**/*.*', {
38
50
  cwd: resolvedDir,
39
51
  ignore: IGNORED_FOLDER_GLOBS,
@@ -41,6 +53,7 @@ export async function runIndexCommand(targetDir, options = {}) {
41
53
  });
42
54
  const countByExt = {};
43
55
  let count = 0;
56
+ const release = await lockDb(); // Lock the DB before starting
44
57
  for (const file of files) {
45
58
  if (shouldIgnoreFile(file))
46
59
  continue;
@@ -49,22 +62,16 @@ export async function runIndexCommand(targetDir, options = {}) {
49
62
  indexFile(file, null, type); // Index file without summary
50
63
  const ext = path.extname(file);
51
64
  countByExt[ext] = (countByExt[ext] || 0) + 1;
52
- console.log(`๐Ÿ“„ Indexed: ${path.relative(resolvedDir, file)}`);
65
+ log(`๐Ÿ“„ Indexed: ${path.relative(resolvedDir, file)}`);
53
66
  count++;
54
67
  }
55
68
  catch (err) {
56
- console.warn(`โš ๏ธ Skipped in indexCmd ${file}:`, err instanceof Error ? err.message : err);
57
- }
58
- }
59
- console.log('๐Ÿ“Š Indexed files by extension:', countByExt);
60
- console.log(`โœ… Done. Indexed ${count} files.`);
61
- if (options.detached) {
62
- if (fs.existsSync(PID_PATH)) {
63
- console.warn(`โš ๏ธ Daemon already running (PID file found at ${PID_PATH}). Skipping launch.`);
64
- }
65
- else {
66
- console.log('๐Ÿš€ Starting summarizer daemon in background mode...');
67
- runDaemonScheduler();
69
+ log(`โš ๏ธ Skipped in indexCmd ${file}: ${err instanceof Error ? err.message : err}`);
68
70
  }
69
71
  }
72
+ log('๐Ÿ“Š Indexed files by extension:', JSON.stringify(countByExt, null, 2));
73
+ log(`โœ… Done. Indexed ${count} files.`);
74
+ await release(); // Release the DB lock after indexing is done
75
+ // Auto-start daemon if not already running
76
+ startDaemon();
70
77
  }
@@ -1,13 +1,52 @@
1
1
  import fs from 'fs';
2
+ import fsp from 'fs/promises';
2
3
  import { db } from '../db/client.js';
3
- import { DB_PATH } from '../constants.js';
4
- export function resetDatabase() {
4
+ import { DB_PATH, SCAI_HOME } from '../constants.js';
5
+ import lockfile from 'proper-lockfile';
6
+ import path from 'path';
7
+ function getBackupDir() {
8
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
9
+ return path.join(SCAI_HOME, `backup-${timestamp}`);
10
+ }
11
+ async function backupScaiFolder() {
12
+ const backupDir = getBackupDir();
13
+ try {
14
+ await fsp.mkdir(backupDir, { recursive: true });
15
+ const files = await fsp.readdir(SCAI_HOME);
16
+ for (const file of files) {
17
+ const srcPath = path.join(SCAI_HOME, file);
18
+ const destPath = path.join(backupDir, file);
19
+ const stat = await fsp.stat(srcPath);
20
+ if (stat.isFile()) {
21
+ await fsp.copyFile(srcPath, destPath);
22
+ }
23
+ }
24
+ console.log(`๐Ÿ“ฆ Backed up .scai folder to: ${backupDir}`);
25
+ return backupDir;
26
+ }
27
+ catch (err) {
28
+ console.warn('โš ๏ธ Failed to back up .scai folder:', err instanceof Error ? err.message : err);
29
+ return null;
30
+ }
31
+ }
32
+ export async function resetDatabase() {
33
+ console.log('๐Ÿ” Backing up existing .scai folder...');
34
+ await backupScaiFolder();
5
35
  try {
6
- db.close(); // ๐Ÿ”’ Ensure the DB connection is closed
36
+ db.close();
7
37
  console.log('๐Ÿ”’ Closed SQLite database connection.');
8
38
  }
9
39
  catch (err) {
10
- console.warn('โš ๏ธ Could not close database:', err);
40
+ console.warn('โš ๏ธ Could not close database:', err instanceof Error ? err.message : err);
41
+ }
42
+ try {
43
+ const releaseLock = await lockfile.unlock(DB_PATH).catch(() => null);
44
+ if (releaseLock) {
45
+ console.log('๐Ÿ”“ Released database lock.');
46
+ }
47
+ }
48
+ catch (err) {
49
+ console.warn('โš ๏ธ Failed to release database lock:', err instanceof Error ? err.message : err);
11
50
  }
12
51
  if (fs.existsSync(DB_PATH)) {
13
52
  try {
@@ -22,5 +61,12 @@ export function resetDatabase() {
22
61
  else {
23
62
  console.log('โ„น๏ธ No existing database found at:', DB_PATH);
24
63
  }
64
+ try {
65
+ fs.mkdirSync(path.dirname(DB_PATH), { recursive: true });
66
+ console.log('๐Ÿ“ Ensured that the database directory exists.');
67
+ }
68
+ catch (err) {
69
+ console.warn('โš ๏ธ Could not ensure DB directory exists:', err instanceof Error ? err.message : err);
70
+ }
25
71
  console.log('โœ… Database has been reset. You can now re-run: scai index');
26
72
  }
@@ -0,0 +1,15 @@
1
+ // src/config/StopWords.ts
2
+ /**
3
+ * These common words are ignored from search queries
4
+ * to reduce noise and improve FTS and embedding match quality.
5
+ */
6
+ export const STOP_WORDS = new Set([
7
+ 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by',
8
+ 'for', 'if', 'in', 'into', 'is', 'it', 'no', 'not',
9
+ 'of', 'on', 'or', 'such', 'that', 'the', 'their',
10
+ 'then', 'there', 'these', 'they', 'this', 'to', 'was',
11
+ 'will', 'with', 'what', 'which', 'who', 'whom', 'where',
12
+ 'when', 'why', 'how', 'from', 'all', 'any', 'can',
13
+ 'did', 'do', 'has', 'have', 'i', 'me', 'my', 'you',
14
+ 'your', 'we', 'us', 'our'
15
+ ]);
package/dist/constants.js CHANGED
@@ -21,6 +21,11 @@ export const PID_PATH = path.join(SCAI_HOME, 'daemon.pid');
21
21
  * ~/.scai/config.json
22
22
  */
23
23
  export const CONFIG_PATH = path.join(SCAI_HOME, 'config.json');
24
+ /**
25
+ * Path to the daemon log file:
26
+ * ~/.scai/daemon.log
27
+ */
28
+ export const LOG_PATH = path.join(SCAI_HOME, 'daemon.log');
24
29
  /**
25
30
  * Get the active index directory.
26
31
  *
@@ -0,0 +1,65 @@
1
+ import { summaryModule } from '../pipeline/modules/summaryModule.js';
2
+ import { db } from '../db/client.js';
3
+ import fs from 'fs/promises';
4
+ import fsSync from 'fs';
5
+ import { generateEmbedding } from '../lib/generateEmbedding.js';
6
+ import { DB_PATH } from '../constants.js';
7
+ import { log } from '../utils/log.js';
8
+ import lockfile from 'proper-lockfile';
9
+ import { shouldIgnoreFile } from '../utils/shouldIgnoreFiles.js';
10
+ const MAX_FILES_PER_BATCH = 5;
11
+ async function lockDb() {
12
+ try {
13
+ return await lockfile.lock(DB_PATH);
14
+ }
15
+ catch (err) {
16
+ log('โŒ Failed to acquire DB lock: ' + err);
17
+ throw err;
18
+ }
19
+ }
20
+ export async function runDaemonBatch() {
21
+ const rows = db.prepare(`
22
+ SELECT path, type FROM files
23
+ WHERE summary IS NULL OR summary = ''
24
+ ORDER BY last_modified DESC
25
+ LIMIT ?
26
+ `).all(MAX_FILES_PER_BATCH);
27
+ if (rows.length === 0) {
28
+ log('โœ… No files left to summarize.');
29
+ return false;
30
+ }
31
+ const release = await lockDb();
32
+ for (const row of rows) {
33
+ if (!fsSync.existsSync(row.path)) {
34
+ log(`โš ๏ธ Skipped missing file: ${row.path}`);
35
+ continue;
36
+ }
37
+ if (shouldIgnoreFile(row.path)) {
38
+ log(`โš ๏ธ Skipped (extension): ${row.path}`);
39
+ continue;
40
+ }
41
+ try {
42
+ const content = await fs.readFile(row.path, 'utf-8');
43
+ const result = await summaryModule.run({ content, filepath: row.path });
44
+ const summary = result?.summary?.trim() || null;
45
+ let embedding = null;
46
+ if (summary) {
47
+ const vector = await generateEmbedding(summary);
48
+ if (vector)
49
+ embedding = JSON.stringify(vector);
50
+ }
51
+ db.prepare(`
52
+ UPDATE files
53
+ SET summary = @summary, embedding = @embedding, indexed_at = datetime('now')
54
+ WHERE path = @path
55
+ `).run({ summary, embedding, path: row.path });
56
+ log(`๐Ÿ“ Summarized: ${row.path}`);
57
+ }
58
+ catch (err) {
59
+ log(`โŒ Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}`);
60
+ }
61
+ await new Promise(resolve => setTimeout(resolve, 200));
62
+ }
63
+ await release();
64
+ return true;
65
+ }
@@ -0,0 +1,27 @@
1
+ import fsSync from 'fs';
2
+ import { LOG_PATH, PID_PATH, SCAI_HOME } from '../constants.js';
3
+ import { log } from '../utils/log.js';
4
+ import { runDaemonBatch } from '../daemon/daemonBatch.js'; // โœ… now from utils
5
+ const SLEEP_MS = 30 * 1000;
6
+ const IDLE_SLEEP_MS = 4 * SLEEP_MS;
7
+ function sleep(ms) {
8
+ return new Promise(resolve => setTimeout(resolve, ms));
9
+ }
10
+ async function runDaemonScheduler() {
11
+ fsSync.mkdirSync(SCAI_HOME, { recursive: true });
12
+ fsSync.writeFileSync(PID_PATH, process.pid.toString(), 'utf-8');
13
+ fsSync.appendFileSync(LOG_PATH, `\n\n๐Ÿง  Daemon started at ${new Date().toISOString()} โ€” PID ${process.pid}\n`);
14
+ let cycles = 0;
15
+ while (true) {
16
+ const didWork = await runDaemonBatch();
17
+ cycles++;
18
+ if (cycles % 20 === 0) {
19
+ log(`๐ŸŒ€ Still running. Cycles: ${cycles}`);
20
+ }
21
+ await sleep(didWork ? SLEEP_MS : IDLE_SLEEP_MS);
22
+ }
23
+ }
24
+ runDaemonScheduler().catch(err => {
25
+ log(`โŒ Daemon crashed: ${err instanceof Error ? err.message : String(err)}`);
26
+ process.exit(1);
27
+ });
package/dist/db/client.js CHANGED
@@ -1,5 +1,9 @@
1
1
  import Database from 'better-sqlite3';
2
2
  import fs from 'fs';
3
3
  import { DB_PATH, SCAI_HOME } from '../constants.js';
4
+ // Ensure the directory exists
4
5
  fs.mkdirSync(SCAI_HOME, { recursive: true });
6
+ // Open the database connection
5
7
  export const db = new Database(DB_PATH);
8
+ // Set journal_mode to WAL for better concurrency
9
+ db.pragma('journal_mode = WAL');
@@ -1,18 +1,15 @@
1
1
  import { db } from './client.js';
2
2
  import fs from 'fs';
3
+ import path from 'path';
3
4
  import { generateEmbedding } from '../lib/generateEmbedding.js';
4
5
  import * as sqlTemplates from './sqlTemplates.js';
5
- import path from 'path';
6
+ import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
6
7
  /**
7
8
  * Index a file into the local SQLite database.
8
9
  *
9
10
  * - Normalizes the file path for cross-platform compatibility.
10
11
  * - Extracts file metadata (last modified time).
11
12
  * - Performs an UPSERT into the `files` table with the latest summary/type/timestamp.
12
- *
13
- * @param filePath - Absolute path to the file being indexed
14
- * @param summary - Optional summary of the file content
15
- * @param type - File type or extension (e.g., 'md', 'ts')
16
13
  */
17
14
  export function indexFile(filePath, summary, type) {
18
15
  const stats = fs.statSync(filePath);
@@ -25,18 +22,12 @@ export function indexFile(filePath, summary, type) {
25
22
  type,
26
23
  lastModified,
27
24
  indexedAt,
25
+ embedding: null
28
26
  });
29
27
  console.log(`๐Ÿ“„ Indexed: ${normalizedPath}`);
30
28
  }
31
29
  /**
32
30
  * Perform a raw keyword-based full-text search using the FTS5 index.
33
- *
34
- * - Tokenizes and sanitizes the input query string.
35
- * - Performs a ranked search using BM25 scoring via the virtual FTS table.
36
- * - Returns basic file metadata along with rank for ordering.
37
- *
38
- * @param query - The search query string (e.g., "api router config")
39
- * @param limit - Max number of results to return (default: 10)
40
31
  */
41
32
  export function queryFiles(query, limit = 10) {
42
33
  const safeQuery = query
@@ -48,50 +39,59 @@ export function queryFiles(query, limit = 10) {
48
39
  })
49
40
  .join(' OR ');
50
41
  console.log(`Executing search query: ${safeQuery}`);
51
- const results = db.prepare(sqlTemplates.rawQueryTemplate).all({
52
- query: safeQuery,
53
- limit
42
+ const results = db.prepare(`
43
+ SELECT f.id, f.path, f.summary, f.type, f.last_modified, f.indexed_at
44
+ FROM files f
45
+ JOIN files_fts fts ON f.id = fts.rowid
46
+ WHERE fts.files_fts MATCH ?
47
+ LIMIT ?
48
+ `).all(safeQuery, limit);
49
+ console.log(`Search returned ${results.length} results.`);
50
+ results.forEach(result => {
51
+ console.log(`๐Ÿ“„ Found in FTS search: ${result.path}`);
54
52
  });
55
53
  return results;
56
54
  }
57
- export function cosineSimilarity(a, b) {
58
- const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
59
- const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
60
- const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
61
- return dot / (magA * magB);
62
- }
63
55
  /**
64
56
  * Perform a hybrid semantic + keyword-based search.
65
- *
66
- * - Generates a vector embedding of the query.
67
- * - Runs an FTS search using BM25 ranking.
68
- * - Looks up file embeddings from the database and compares using cosine similarity.
69
- * - Combines similarity score and BM25 rank into a weighted final score.
70
- *
71
- * @param query - Natural language search query
72
- * @param topK - Max number of top-ranked results to return (default: 5)
73
57
  */
74
58
  export async function searchFiles(query, topK = 5) {
59
+ console.log(`๐Ÿง  Searching for query: "${query}"`);
75
60
  const embedding = await generateEmbedding(query);
76
- if (!embedding)
61
+ if (!embedding) {
62
+ console.log('โš ๏ธ Failed to generate embedding for query');
77
63
  return [];
78
- const safeQuery = query
79
- .trim()
80
- .split(/\s+/)
81
- .map(token => {
82
- token = token.replace(/[?*\\"]/g, '').replace(/'/g, "''");
83
- return token.includes(' ') ? `"${token}"` : `${token}*`;
84
- })
85
- .join(' OR ');
86
- console.log(`Executing search query: ${safeQuery}`);
87
- const ftsResults = db.prepare(sqlTemplates.fetchBm25ScoresTemplate).all({ query: safeQuery });
64
+ }
65
+ const safeQuery = sanitizeQueryForFts(query);
66
+ console.log(`Executing search query in FTS5: ${safeQuery}`);
67
+ const ftsResults = db.prepare(`
68
+ SELECT fts.rowid AS id, f.path, f.summary, f.type, bm25(files_fts) AS bm25Score
69
+ FROM files f
70
+ JOIN files_fts fts ON f.id = fts.rowid
71
+ WHERE fts.files_fts MATCH ?
72
+ AND f.embedding IS NOT NULL
73
+ ORDER BY bm25Score DESC
74
+ LIMIT ?
75
+ `).all(safeQuery, topK);
76
+ console.log(`FTS search returned ${ftsResults.length} results`);
77
+ if (ftsResults.length === 0) {
78
+ console.log('โš ๏ธ No results found from FTS search');
79
+ return [];
80
+ }
81
+ ftsResults.forEach(result => {
82
+ console.log(`๐Ÿ“„ FTS found: ${result.path}`);
83
+ });
88
84
  const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
89
85
  const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
90
86
  const scored = ftsResults.map(result => {
91
87
  try {
92
- const embResult = db.prepare(sqlTemplates.fetchEmbeddingTemplate).get({ path: result.path });
93
- if (!embResult || typeof embResult.embedding !== 'string')
88
+ const embResult = db.prepare(sqlTemplates.fetchEmbeddingTemplate).get({
89
+ path: result.path,
90
+ });
91
+ if (!embResult || typeof embResult.embedding !== 'string') {
92
+ console.log(`โš ๏ธ No embedding for file: ${result.path}`);
94
93
  return null;
94
+ }
95
95
  const vector = JSON.parse(embResult.embedding);
96
96
  const sim = cosineSimilarity(embedding, vector);
97
97
  const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
@@ -101,15 +101,23 @@ export async function searchFiles(query, topK = 5) {
101
101
  summary: result.summary,
102
102
  score: finalScore,
103
103
  sim,
104
- bm25: normalizedBm25
104
+ bm25: normalizedBm25,
105
105
  };
106
106
  }
107
107
  catch (err) {
108
- console.error(`Error processing embedding for file: ${result.path}`, err);
108
+ console.error(`โŒ Error processing embedding for file: ${result.path}`, err);
109
109
  return null;
110
110
  }
111
- }).filter(Boolean)
111
+ })
112
+ .filter((r) => r !== null)
112
113
  .sort((a, b) => b.score - a.score)
113
114
  .slice(0, topK);
115
+ console.log(`Returning top ${topK} results based on combined score`);
114
116
  return scored;
115
117
  }
118
+ function cosineSimilarity(a, b) {
119
+ const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
120
+ const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
121
+ const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
122
+ return dot / (magA * magB);
123
+ }
package/dist/db/schema.js CHANGED
@@ -1,19 +1,35 @@
1
1
  import { db } from "./client.js";
2
2
  export function initSchema() {
3
3
  db.exec(`
4
- CREATE TABLE IF NOT EXISTS files (
5
- id INTEGER PRIMARY KEY AUTOINCREMENT,
6
- path TEXT UNIQUE,
7
- summary TEXT,
8
- type TEXT,
9
- indexed_at TEXT,
10
- last_modified TEXT,
11
- embedding TEXT
12
- );
4
+ CREATE TABLE IF NOT EXISTS files (
5
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
6
+ path TEXT UNIQUE,
7
+ summary TEXT,
8
+ type TEXT,
9
+ indexed_at TEXT,
10
+ last_modified TEXT,
11
+ embedding TEXT
12
+ );
13
13
 
14
- CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
15
- USING fts5(path, summary, content='files', content_rowid='id');
14
+ CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
15
+ USING fts5(path, summary, content='files', content_rowid='id');
16
16
 
17
- `);
18
- console.log('โœ… SQLite schema initialized');
17
+ -- FTS Triggers to keep files_fts in sync
18
+ CREATE TRIGGER IF NOT EXISTS files_ai AFTER INSERT ON files BEGIN
19
+ INSERT INTO files_fts(rowid, path, summary)
20
+ VALUES (new.id, new.path, new.summary);
21
+ END;
22
+
23
+ CREATE TRIGGER IF NOT EXISTS files_au AFTER UPDATE ON files BEGIN
24
+ UPDATE files_fts SET
25
+ path = new.path,
26
+ summary = new.summary
27
+ WHERE rowid = new.id;
28
+ END;
29
+
30
+ CREATE TRIGGER IF NOT EXISTS files_ad AFTER DELETE ON files BEGIN
31
+ DELETE FROM files_fts WHERE rowid = old.id;
32
+ END;
33
+ `);
34
+ console.log('โœ… SQLite schema initialized with FTS5 triggers');
19
35
  }
@@ -1,12 +1,21 @@
1
1
  // Upsert file metadata into `files`
2
2
  export const upsertFileTemplate = `
3
- INSERT INTO files (path, summary, type, last_modified, indexed_at)
4
- VALUES (:path, :summary, :type, :lastModified, :indexedAt)
3
+ INSERT INTO files (path, summary, type, last_modified, indexed_at, embedding)
4
+ VALUES (:path, :summary, :type, :lastModified, :indexedAt, :embedding)
5
5
  ON CONFLICT(path) DO UPDATE SET
6
- summary = excluded.summary,
6
+ summary = CASE
7
+ WHEN excluded.summary IS NOT NULL AND excluded.summary != files.summary
8
+ THEN excluded.summary
9
+ ELSE files.summary
10
+ END,
7
11
  type = excluded.type,
8
12
  last_modified = excluded.last_modified,
9
- indexed_at = excluded.indexed_at
13
+ indexed_at = excluded.indexed_at,
14
+ embedding = CASE
15
+ WHEN excluded.embedding IS NOT NULL AND excluded.embedding != files.embedding
16
+ THEN excluded.embedding
17
+ ELSE files.embedding
18
+ END
10
19
  `;
11
20
  // Fetch search results with BM25 ranking
12
21
  export const fetchBm25ScoresTemplate = `
package/dist/index.js CHANGED
@@ -5,9 +5,6 @@ import { Config } from './config.js';
5
5
  import { createRequire } from 'module';
6
6
  const require = createRequire(import.meta.url);
7
7
  const { version } = require('../package.json');
8
- // ๐Ÿง  Commands
9
- import { checkEnv } from "./commands/EnvCmd.js";
10
- import { checkGit } from "./commands/GitCmd.js";
11
8
  import { suggestCommitMessage } from "./commands/CommitSuggesterCmd.js";
12
9
  import { handleRefactor } from "./commands/RefactorCmd.js";
13
10
  import { generateTests } from "./commands/TestGenCmd.js";
@@ -18,7 +15,7 @@ import { runModulePipelineFromCLI } from './commands/ModulePipelineCmd.js';
18
15
  import { runIndexCommand } from './commands/IndexCmd.js';
19
16
  import { resetDatabase } from './commands/ResetDbCmd.js';
20
17
  import { runQueryCommand } from './commands/QueryCmd.js';
21
- import { runDaemonBatch } from './commands/DaemonCmd.js';
18
+ import { startDaemon } from './commands/DaemonCmd.js';
22
19
  import { runStopDaemonCommand } from "./commands/StopDaemonCmd.js";
23
20
  import { runAskCommand } from './commands/AskCmd.js';
24
21
  // ๐ŸŽ›๏ธ CLI Setup
@@ -34,12 +31,14 @@ cmd
34
31
  await bootstrap();
35
32
  console.log('โœ… Model initialization completed!');
36
33
  });
34
+ cmd
35
+ .command('sugg')
36
+ .description('Suggest a commit message from staged changes')
37
+ .option('-c, --commit', 'Automatically commit with suggested message')
38
+ .action(suggestCommitMessage);
37
39
  // ๐Ÿ”ง Group: Git-related commands
38
40
  const git = cmd.command('git').description('Git utilities');
39
- git
40
- .command('status')
41
- .description('Check Git status')
42
- .action(checkGit);
41
+ // The sugg command under the 'git' group
43
42
  git
44
43
  .command('sugg')
45
44
  .description('Suggest a commit message from staged changes')
@@ -66,15 +65,6 @@ gen
66
65
  .command('tests <file>')
67
66
  .description('Generate a Jest test file for the specified JS/TS module')
68
67
  .action((file) => generateTests(file));
69
- // ๐Ÿ” Indexing
70
- cmd
71
- .command('index [targetDir]')
72
- .description('Index supported files in the given directory (or current folder if none)')
73
- .option('-d, --detached', 'Run summarizer daemon after indexing')
74
- .option('--force', 'Force indexing even if another folder has already been indexed')
75
- .action((targetDir, options) => {
76
- runIndexCommand(targetDir, { detached: options.detached, force: options.force });
77
- });
78
68
  // โš™๏ธ Group: Configuration settings
79
69
  const set = cmd.command('set').description('Set configuration values');
80
70
  set
@@ -99,35 +89,46 @@ set
99
89
  Config.show();
100
90
  });
101
91
  // ๐Ÿงช Diagnostics and info
102
- cmd
103
- .command('env')
104
- .description('Check environment variables')
105
- .action(checkEnv);
106
92
  cmd
107
93
  .command('config')
108
94
  .description('Show the currently active model and language settings')
109
95
  .action(() => {
110
96
  Config.show();
111
97
  });
98
+ // Add explanation about alpha features directly in the help menu
99
+ cmd.addHelpText('after', `
100
+ ๐Ÿšจ Alpha Features:
101
+ - The "index", "daemon", "stop-daemon", "reset-db" commands are considered alpha features.
102
+ - These commands are in active development and may change in the future.
103
+
104
+ ๐Ÿ’ก Use with caution and expect possible changes or instability.
105
+ `);
106
+ // ๐Ÿ” Indexing
107
+ cmd
108
+ .command('index [targetDir]')
109
+ .description('Index supported files in the given directory (or current folder if none)')
110
+ .option('--force', 'Force indexing even if another folder has already been indexed')
111
+ .action((targetDir, options) => {
112
+ runIndexCommand(targetDir, { force: options.force });
113
+ });
112
114
  // ๐Ÿง  Query and assistant
113
115
  cmd
114
116
  .command('query <query>')
115
117
  .description('Search indexed files by keyword')
116
118
  .action(runQueryCommand);
117
119
  cmd
118
- .command('ask')
119
- .description('Ask a question using file summaries and a local model')
120
- .argument('<question...>', 'The question to ask')
121
- .action((question) => {
122
- const q = question.join(' ');
123
- runAskCommand(q);
120
+ .command('ask [question...]') // <- the ... makes it variadic
121
+ .description('Ask a question based on indexed files')
122
+ .action((questionParts) => {
123
+ const fullQuery = questionParts?.join(' ');
124
+ runAskCommand(fullQuery);
124
125
  });
125
126
  // ๐Ÿ› ๏ธ Background tasks and maintenance
126
127
  cmd
127
128
  .command('daemon')
128
129
  .description('Run background summarization of indexed files')
129
130
  .action(async () => {
130
- await runDaemonBatch(); // ignore the return value
131
+ await startDaemon(); // ignore the return value
131
132
  });
132
133
  cmd
133
134
  .command('stop-daemon')
@@ -0,0 +1,15 @@
1
+ import fs from 'fs';
2
+ import { LOG_PATH } from '../constants.js';
3
+ export function log(...args) {
4
+ const timestamp = new Date().toISOString();
5
+ const message = args.map(arg => typeof arg === 'string' ? arg : JSON.stringify(arg, null, 2)).join(' ');
6
+ const isBackground = process.env.BACKGROUND_MODE === 'true';
7
+ if (isBackground) {
8
+ // If running in background, log to a file
9
+ fs.appendFileSync(LOG_PATH, `[${timestamp}] ${message}\n`);
10
+ }
11
+ else {
12
+ // Otherwise, log to the console
13
+ console.log(`[${timestamp}] ${message}`);
14
+ }
15
+ }
@@ -0,0 +1,16 @@
1
+ // src/utils/sanitizeQuery.ts
2
+ import { STOP_WORDS } from '../config/StopWords.js';
3
+ export function sanitizeQueryForFts(input) {
4
+ const tokens = input
5
+ .trim()
6
+ .split(/\s+/)
7
+ .map(token => token.toLowerCase())
8
+ .filter(token => token.length > 2 &&
9
+ !STOP_WORDS.has(token) &&
10
+ /^[a-z0-9]+$/.test(token))
11
+ .map(token => token.replace(/[?*\\"]/g, '').replace(/'/g, "''") + '*');
12
+ // ๐Ÿ‘‡ Prevent FTS syntax errors by returning a catch-all query
13
+ if (tokens.length === 0)
14
+ return '*';
15
+ return tokens.join(' OR ');
16
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scai",
3
- "version": "0.1.23",
3
+ "version": "0.1.24",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "scai": "./dist/index.js"
@@ -27,12 +27,14 @@
27
27
  "better-sqlite3": "^12.1.1",
28
28
  "commander": "^11.0.0",
29
29
  "fast-glob": "^3.3.3",
30
- "ora": "^8.2.0"
30
+ "ora": "^8.2.0",
31
+ "proper-lockfile": "^4.1.2"
31
32
  },
32
33
  "devDependencies": {
33
34
  "@types/better-sqlite3": "^7.6.13",
34
35
  "@types/jest": "^30.0.0",
35
36
  "@types/node": "^24.0.1",
37
+ "@types/proper-lockfile": "^4.1.4",
36
38
  "jest": "^30.0.2",
37
39
  "ts-jest": "^29.4.0",
38
40
  "typescript": "^5.8.3"