scai 0.1.19 β†’ 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ import { searchFiles } from "../db/fileIndex.js";
2
+ import { generate } from "../lib/generate.js";
3
+ import { summaryModule } from "../pipeline/modules/summaryModule.js";
4
+ export async function runAskCommand(query) {
5
+ if (!query) {
6
+ console.error('❌ Please provide a search query.\nπŸ‘‰ Usage: scai ask "keyword"');
7
+ return;
8
+ }
9
+ console.log(`πŸ” Searching for: "${query}"\n`);
10
+ // Use vector-based search
11
+ const results = await searchFiles(query, 5); // Or 3 if you want fewer
12
+ if (results.length === 0) {
13
+ console.log('⚠️ No similar embeddings found. Asking the model for context instead...');
14
+ }
15
+ else {
16
+ console.log('πŸ“Š Closest files based on semantic similarity:');
17
+ results.forEach(file => {
18
+ console.log(`πŸ“„ Path: ${file?.path}`);
19
+ });
20
+ }
21
+ let allSummaries = '';
22
+ for (const file of results) {
23
+ try {
24
+ if (!file?.summary) {
25
+ console.warn(`⚠️ No summary available for file: ${file?.path}`);
26
+ continue;
27
+ }
28
+ console.log(`πŸ“ Using cached summary for file: ${file?.path}`);
29
+ const summaryResponse = await summaryModule.run({ content: file?.summary ? file.summary : '', filepath: file?.path });
30
+ if (summaryResponse.summary) {
31
+ allSummaries += `\n${summaryResponse.summary}`;
32
+ }
33
+ }
34
+ catch (err) {
35
+ console.error(`❌ Error processing file: ${file?.path}`, err instanceof Error ? err.message : err);
36
+ }
37
+ }
38
+ if (allSummaries.trim()) {
39
+ console.log('🧠 Summaries found, sending them to the model for synthesis...');
40
+ try {
41
+ const input = {
42
+ content: `${query}\n\n${allSummaries}`,
43
+ filepath: '',
44
+ };
45
+ const modelResponse = await generate(input, 'llama3');
46
+ console.log(`\nπŸ“ Model response:\n${modelResponse.content}`);
47
+ }
48
+ catch (err) {
49
+ console.error('❌ Model request failed:', err);
50
+ }
51
+ }
52
+ else {
53
+ console.log('⚠️ No summaries found. Asking the model for context only...');
54
+ try {
55
+ const input = {
56
+ content: query,
57
+ filepath: '',
58
+ };
59
+ const modelResponse = await generate(input, 'llama3');
60
+ console.log(`\nπŸ“ Model response:\n${modelResponse.content}`);
61
+ }
62
+ catch (err) {
63
+ console.error('❌ Model request failed:', err);
64
+ }
65
+ }
66
+ }
@@ -4,10 +4,17 @@ import fs from 'fs/promises';
4
4
  import fsSync from 'fs';
5
5
  import os from 'os';
6
6
  import path from 'path';
7
- const MAX_FILES = 500;
8
- const DAEMON_DURATION_MINUTES = 10;
7
+ import { generateEmbedding } from '../lib/generateEmbedding.js';
8
+ import { IGNORED_EXTENSIONS } from '../config/IgnoredExtensions.js';
9
+ const MAX_FILES = 1000;
10
+ const DAEMON_DURATION_MINUTES = 25;
9
11
  const DAEMON_INTERVAL_MINUTES = 30;
10
12
  const PID_PATH = path.join(os.homedir(), '.scai/daemon.pid');
13
+ // Helper function to check if a file should be ignored
14
+ const shouldIgnoreFile = (filePath) => {
15
+ const ext = path.extname(filePath).toLowerCase();
16
+ return IGNORED_EXTENSIONS.includes(ext);
17
+ };
11
18
  export async function runDaemonBatch() {
12
19
  console.log('πŸ“₯ Daemon batch: scanning for files to summarize...');
13
20
  const rows = db.prepare(`
@@ -15,19 +22,34 @@ export async function runDaemonBatch() {
15
22
  WHERE summary IS NULL OR summary = ''
16
23
  ORDER BY last_modified DESC
17
24
  LIMIT ?
18
- `).all(MAX_FILES);
25
+ `).all(MAX_FILES);
19
26
  if (rows.length === 0) {
20
27
  console.log('βœ… No files left to summarize.');
21
28
  return;
22
29
  }
23
30
  for (const row of rows) {
31
+ if (shouldIgnoreFile(row.path)) {
32
+ console.log(`⚠️ Ignored file (unwanted extension): ${row.path}`);
33
+ continue;
34
+ }
24
35
  try {
25
36
  const content = await fs.readFile(row.path, 'utf-8');
26
37
  const result = await summaryModule.run({ content, filepath: row.path });
27
38
  const summary = result?.summary?.trim() ? result.summary : null;
28
- db.prepare(`UPDATE files SET summary = ?, indexed_at = datetime('now') WHERE path = ?`)
29
- .run(summary, row.path);
39
+ let embedding = null;
40
+ if (summary) {
41
+ const vector = await generateEmbedding(summary);
42
+ if (vector)
43
+ embedding = JSON.stringify(vector);
44
+ }
45
+ // Using named parameters for better readability and flexibility
46
+ db.prepare(`
47
+ UPDATE files
48
+ SET summary = @summary, embedding = @embedding, indexed_at = datetime('now')
49
+ WHERE path = @path
50
+ `).run({ summary, embedding, path: row.path });
30
51
  console.log(`πŸ“ Summarized: ${row.path}`);
52
+ console.log(`πŸ”’ Embedded: ${row.path}`);
31
53
  }
32
54
  catch (err) {
33
55
  console.warn(`⚠️ Failed: ${row.path}`, err instanceof Error ? err.message : err);
@@ -1,4 +1,4 @@
1
- import { searchFiles } from '../db/fileIndex.js';
1
+ import { queryFiles } from '../db/fileIndex.js';
2
2
  import path from 'path';
3
3
  export async function runQueryCommand(query) {
4
4
  if (!query) {
@@ -6,7 +6,7 @@ export async function runQueryCommand(query) {
6
6
  return;
7
7
  }
8
8
  console.log(`πŸ” Searching for: "${query}"\n`);
9
- const results = searchFiles(query);
9
+ const results = queryFiles(query);
10
10
  if (results.length === 0) {
11
11
  console.log('⚠️ No matching files found.');
12
12
  return;
@@ -3,23 +3,44 @@ export const IGNORED_EXTENSIONS = [
3
3
  // πŸ–Ό Media
4
4
  '.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg', '.ico',
5
5
  '.mp4', '.mp3', '.mov', '.avi', '.mkv', '.flv', '.wav', '.flac',
6
+ '.aac', '.m4a', '.wma', '.3gp', '.webm', '.ogg', '.aiff', '.au',
6
7
  // πŸ“¦ Archives & install packages
7
8
  '.zip', '.tar', '.gz', '.bz2', '.xz', '.rar', '.7z',
8
- '.jar', '.war', '.ear', // Java packaging
9
- '.deb', '.rpm', '.pkg', '.msi', '.dmg', '.cab', '.xz',
9
+ '.jar', '.war', '.ear', '.deb', '.rpm', '.pkg', '.msi', '.dmg', '.cab', '.apk',
10
+ '.tar.gz', '.tar.bz2', '.tar.xz', '.tar.lzma', '.tar.zst',
10
11
  // 🧱 Binaries & executables
11
12
  '.exe', '.dll', '.bin', '.so', '.dylib', '.a', '.lib',
12
- '.iso', '.img', '.elf', '.o', '.obj',
13
+ '.iso', '.img', '.elf', '.o', '.obj', '.msm', '.vbs', '.jscript',
14
+ '.cmd', '.bat', '.ps1', '.sh', '.bash', '.run',
13
15
  // πŸ§ͺ Runtime / build / cache
14
- '.log', '.lock', '.tmp', '.map',
16
+ '.log', '.tmp', '.map',
15
17
  '.db', '.sqlite', '.pkl', '.sav', '.rdb', '.ldb',
16
- '.pyc', '.class', '.tsbuildinfo', '.coverage',
18
+ '.pyc', '.class', '.tsbuildinfo', '.coverage', '.eslintcache',
19
+ '.yarn', '.webpack', '.babel', '.compilercache',
17
20
  // πŸ”€ Fonts & styles
18
21
  '.woff', '.woff2', '.ttf', '.eot', '.otf', '.css.map',
22
+ '.scss', '.sass', '.less', '.styl',
19
23
  // πŸ” Certs, keys, credentials
20
- '.crt', '.key', '.pem', '.pub', '.asc', '.gpg',
24
+ '.crt', '.key', '.pem', '.pub', '.asc', '.gpg', '.p12', '.csr', '.der', '.pfx',
21
25
  // ♻️ Backups / temp
22
- '.bak', '.old', '.swp', '.swo', '.tmp', '.orig',
23
- // 🌐 Misc
24
- '.torrent', '.DS_Store', '.env.local', '.env.production', '.env.development',
26
+ '.bak', '.old', '.swp', '.swo', '.orig',
27
+ '.sublime-workspace', '.sublime-project', '.db-shm', '.db-wal',
28
+ // 🌐 System/config folders (still ignored by path, not extension)
29
+ '.DS_Store', '.bundle', '.npmrc',
30
+ // πŸ—ΊοΈ GIS / Geospatial
31
+ '.shp', '.shx', '.dbf', '.prj', '.qix', '.sbn', '.sbx', '.shp.xml', '.cpg', '.gpkg', '.mif', '.mid',
32
+ // πŸ“Š Enterprise BI / Reporting
33
+ '.pbix', '.rdl', '.rpt', '.bqy', '.iqy',
34
+ // πŸ§ͺ ETL / DWH / Modeling
35
+ '.abf', '.dtsx', '.bim', '.xmi',
36
+ // πŸ—οΈ CAD / Engineering
37
+ '.dwg', '.dxf', '.step', '.stp', '.sldprt', '.sldasm',
38
+ '.iges', '.igs', '.3ds', '.fbx',
39
+ // 🧾 Forms / Print / Publishing
40
+ '.xps', '.afpub', '.pub', '.indd', '.qxd', '.frm', '.frx', '.frl',
41
+ // πŸ’° ERP / Finance / Legacy DB
42
+ '.mbd', '.fdb', '.nav', '.accdb', '.mdb', '.gdb',
43
+ '.sap', '.sappkg', '.qbw', '.qbb',
44
+ // πŸ”’ Lock files (but NOT lock *configs*)
45
+ '.lck', '.lockfile', '.db-lock', '.pid', '.socket',
25
46
  ];
@@ -18,4 +18,28 @@ export const IGNORED_FOLDER_GLOBS = [
18
18
  '**/.output/**',
19
19
  '**/tmp/**',
20
20
  '**/*.test.*',
21
+ '**/.m2/**',
22
+ '**/.gradle/**',
23
+ '**/.tox/**',
24
+ '**/.nox/**',
25
+ '**/.hypothesis/**',
26
+ '**/.npm/**',
27
+ '**/.yarn/**',
28
+ '**/*.o',
29
+ '**/*.out',
30
+ '**/*.exe',
31
+ '**/*.dll',
32
+ '**/.cache/**',
33
+ '**/.pylint.d/**',
34
+ '**/.eslintcache/**',
35
+ '**/.cache-loader/**',
36
+ '**/.serverless/**',
37
+ '**/.docker/**',
38
+ '**/.sublime-workspace',
39
+ '**/.sublime-project',
40
+ '**/*.log',
41
+ '**/npm-debug.log',
42
+ '**/yarn-error.log',
43
+ '**/debug.log',
44
+ '**/Dockerfile',
21
45
  ];
@@ -19,5 +19,5 @@ export class ModelConfig {
19
19
  console.log(` Language: ${this.language}`);
20
20
  }
21
21
  }
22
- ModelConfig.model = 'codellama:7b';
22
+ ModelConfig.model = 'llama3';
23
23
  ModelConfig.language = 'ts';
@@ -1,51 +1,114 @@
1
+ // File: src/db/fileIndex.ts
1
2
  import { db } from './client.js';
2
3
  import fs from 'fs';
4
+ import { generateEmbedding } from '../lib/generateEmbedding.js';
5
+ import * as sqlTemplates from './sqlTemplates.js'; // Import the SQL templates
3
6
  export function indexFile(filePath, summary, type) {
4
7
  const stats = fs.statSync(filePath);
5
8
  const lastModified = stats.mtime.toISOString();
6
9
  // 1) INSERT new rows (only when path not present)
7
- const insertStmt = db.prepare(`
8
- INSERT OR IGNORE INTO files
9
- (path, summary, type, indexed_at, last_modified)
10
- VALUES (?, ?, ?, datetime('now'), ?)
11
- `);
12
- insertStmt.run(filePath, summary, type, lastModified);
10
+ const insertStmt = db.prepare(sqlTemplates.insertFileTemplate);
11
+ insertStmt.run({ path: filePath, summary, type, lastModified });
13
12
  // 2) UPDATE metadata if file already existed and changed
14
- const updateStmt = db.prepare(`
15
- UPDATE files
16
- SET type = ?,
17
- last_modified = ?,
18
- indexed_at = datetime('now')
19
- WHERE path = ?
20
- AND last_modified != ?
21
- `);
22
- updateStmt.run(type, lastModified, filePath, lastModified);
13
+ const updateStmt = db.prepare(sqlTemplates.updateFileTemplate);
14
+ updateStmt.run({ path: filePath, type, lastModified });
23
15
  // Step 1: Delete from FTS where the path matches
24
- db.prepare(`
25
- DELETE FROM files_fts
26
- WHERE rowid = (SELECT id FROM files WHERE path = ?)
27
- `).run(filePath);
16
+ db.prepare(sqlTemplates.deleteFromFtsTemplate).run({ path: filePath });
28
17
  // Step 2: Insert into FTS with the same id
29
- db.prepare(`
30
- INSERT INTO files_fts(rowid, path, summary)
31
- VALUES((SELECT id FROM files WHERE path = ?), ?, ?)
32
- `).run(filePath, filePath, summary);
18
+ db.prepare(sqlTemplates.insertIntoFtsTemplate).run({ path: filePath, summary });
33
19
  }
34
- export function searchFiles(query, limit = 10) {
35
- // Use FTS5 MATCH for relevance-ranked results
36
- const stmt = db.prepare(`
20
+ export function queryFiles(query, limit = 3) {
21
+ // Sanitize the query by removing or escaping special characters
22
+ const safeQuery = query
23
+ .trim()
24
+ .split(/\s+/)
25
+ .map(token => {
26
+ token = token
27
+ .replace(/[?*\\"]/g, '') // Remove question marks, asterisks, backslashes, and double quotes
28
+ .replace(/'/g, "''"); // Escape single quotes for SQL safety
29
+ // For multi-word queries, wrap the token in quotes for exact phrase matching
30
+ if (token.includes(' ')) {
31
+ return `"${token}"`; // Exact phrase match for multi-word tokens
32
+ }
33
+ return `${token}*`; // Prefix match for single tokens
34
+ })
35
+ .join(' OR ');
36
+ // Log the constructed query for debugging purposes
37
+ console.log(`Executing search query: ${safeQuery}`);
38
+ // Execute the query with safeQuery and limit as parameters
39
+ const sql = `
37
40
  SELECT f.path, f.summary, f.type, f.last_modified, f.indexed_at,
38
41
  bm25(files_fts) AS rank
39
42
  FROM files_fts
40
43
  JOIN files f ON files_fts.rowid = f.id
41
- WHERE files_fts MATCH ?
44
+ WHERE files_fts MATCH :query
42
45
  ORDER BY rank
43
- LIMIT ?
44
- `);
45
- const matchQuery = query
46
+ LIMIT :limit
47
+ `;
48
+ const results = db.prepare(sql).all({ query: safeQuery, limit });
49
+ return results;
50
+ }
51
+ export function cosineSimilarity(a, b) {
52
+ const dot = a.reduce((sum, ai, i) => sum + ai * b[i], 0);
53
+ const magA = Math.sqrt(a.reduce((sum, ai) => sum + ai * ai, 0));
54
+ const magB = Math.sqrt(b.reduce((sum, bi) => sum + bi * bi, 0));
55
+ return dot / (magA * magB);
56
+ }
57
+ export async function searchFiles(query, topK = 5) {
58
+ // Generate the query embedding
59
+ const embedding = await generateEmbedding(query);
60
+ if (!embedding)
61
+ return [];
62
+ // Sanitize the query by removing or escaping special characters
63
+ const safeQuery = query
46
64
  .trim()
47
65
  .split(/\s+/)
48
- .map(token => `${token}*`) // prefix search
49
- .join(' ');
50
- return stmt.all(matchQuery, limit);
66
+ .map(token => {
67
+ token = token
68
+ .replace(/[?*\\"]/g, '') // Remove question marks, asterisks, backslashes, and double quotes
69
+ .replace(/'/g, "''"); // Escape single quotes for SQL safety
70
+ // For multi-word queries, wrap the token in quotes for exact phrase matching
71
+ if (token.includes(' ')) {
72
+ return `"${token}"`; // Exact phrase match for multi-word tokens
73
+ }
74
+ return `${token}*`; // Prefix match for single tokens
75
+ })
76
+ .join(' OR ');
77
+ // Log the constructed query for debugging purposes
78
+ console.log(`Executing search query: ${safeQuery}`);
79
+ // Fetch BM25 scores from the FTS using the safeQuery string directly
80
+ const ftsResults = db.prepare(sqlTemplates.fetchBm25ScoresTemplate).all({ query: safeQuery });
81
+ const bm25Min = Math.min(...ftsResults.map(r => r.bm25Score));
82
+ const bm25Max = Math.max(...ftsResults.map(r => r.bm25Score));
83
+ // Calculate final score combining BM25 and cosine similarity
84
+ const scored = ftsResults.map(result => {
85
+ try {
86
+ // Fetch embedding for each file from the `files` table
87
+ const embResult = db.prepare(sqlTemplates.fetchEmbeddingTemplate).get({ path: result.path });
88
+ // Check if embedding exists and has the correct structure
89
+ if (!embResult || typeof embResult.embedding !== 'string')
90
+ return null;
91
+ // Parse the embedding
92
+ const vector = JSON.parse(embResult.embedding);
93
+ const sim = cosineSimilarity(embedding, vector);
94
+ // Normalize BM25 scores
95
+ const normalizedBm25 = 1 - ((result.bm25Score - bm25Min) / (bm25Max - bm25Min + 1e-5));
96
+ const normalizedSim = sim; // cosineSimilarity is already 0–1
97
+ const finalScore = 0.7 * normalizedSim + 0.3 * normalizedBm25;
98
+ return {
99
+ path: result.path,
100
+ summary: result.summary,
101
+ score: finalScore,
102
+ sim: normalizedSim,
103
+ bm25: normalizedBm25
104
+ };
105
+ }
106
+ catch (err) {
107
+ console.error(`Error processing embedding for file: ${result.path}`, err);
108
+ return null;
109
+ }
110
+ }).filter(Boolean)
111
+ .sort((a, b) => b.score - a.score)
112
+ .slice(0, topK);
113
+ return scored;
51
114
  }
package/dist/db/schema.js CHANGED
@@ -7,7 +7,8 @@ export function initSchema() {
7
7
  summary TEXT,
8
8
  type TEXT,
9
9
  indexed_at TEXT,
10
- last_modified TEXT
10
+ last_modified TEXT,
11
+ embedding TEXT
11
12
  );
12
13
 
13
14
  -- FTS5 table for fast full‑text search of summaries and paths
@@ -0,0 +1,37 @@
1
+ // Template for inserting or ignoring new file entries
2
+ export const insertFileTemplate = `
3
+ INSERT OR IGNORE INTO files
4
+ (path, summary, type, indexed_at, last_modified)
5
+ VALUES (:path, :summary, :type, datetime('now'), :lastModified)
6
+ `;
7
+ // Template for updating file metadata if it has changed
8
+ export const updateFileTemplate = `
9
+ UPDATE files
10
+ SET type = :type,
11
+ last_modified = :lastModified,
12
+ indexed_at = datetime('now')
13
+ WHERE path = :path
14
+ AND last_modified != :lastModified
15
+ `;
16
+ // Template for deleting a file from FTS
17
+ export const deleteFromFtsTemplate = `
18
+ DELETE FROM files_fts
19
+ WHERE rowid = (SELECT id FROM files WHERE path = :path)
20
+ `;
21
+ // Template for inserting a file into FTS with its ID
22
+ export const insertIntoFtsTemplate = `
23
+ INSERT INTO files_fts(rowid, path, summary)
24
+ VALUES((SELECT id FROM files WHERE path = :path), :path, :summary)
25
+ `;
26
+ // Template for fetching BM25 scores from FTS
27
+ export const fetchBm25ScoresTemplate = `
28
+ SELECT f.path, f.summary, f.type, bm25(files_fts) AS bm25Score
29
+ FROM files_fts
30
+ JOIN files f ON files_fts.rowid = f.id
31
+ WHERE files_fts MATCH :query
32
+ LIMIT 50
33
+ `;
34
+ // Template for fetching embedding for a specific file
35
+ export const fetchEmbeddingTemplate = `
36
+ SELECT embedding FROM files WHERE path = :path
37
+ `;
package/dist/index.js CHANGED
@@ -20,6 +20,7 @@ import { resetDatabase } from './commands/ResetDbCmd.js';
20
20
  import { runQueryCommand } from './commands/QueryCmd.js';
21
21
  import { runDaemonBatch } from './commands/DaemonCmd.js';
22
22
  import { runStopDaemonCommand } from "./commands/StopDaemonCmd.js";
23
+ import { runAskCommand } from './commands/AskCmd.js';
23
24
  // Create the CLI instance
24
25
  const cmd = new Command('scai')
25
26
  .version(version)
@@ -90,6 +91,15 @@ cmd
90
91
  .command('query <query>')
91
92
  .description('Search indexed files by keyword')
92
93
  .action(runQueryCommand);
94
+ // Command structure using Commander
95
+ cmd
96
+ .command('ask')
97
+ .description('Ask a question using file summaries and a local model')
98
+ .argument('<question...>', 'The question to ask')
99
+ .action((question) => {
100
+ const q = question.join(' ');
101
+ runAskCommand(q); // No model option, just pass the question
102
+ });
93
103
  cmd
94
104
  .command('reset-db')
95
105
  .description('Delete and reset the SQLite database')
@@ -0,0 +1,22 @@
1
+ export async function generateEmbedding(text) {
2
+ try {
3
+ const res = await fetch('http://localhost:11434/api/embeddings', {
4
+ method: 'POST',
5
+ headers: { 'Content-Type': 'application/json' },
6
+ body: JSON.stringify({
7
+ model: 'mistral', // or 'llama3' β€” whatever works best
8
+ prompt: text,
9
+ }),
10
+ });
11
+ if (!res.ok) {
12
+ console.error('❌ Failed to generate embedding:', await res.text());
13
+ return null;
14
+ }
15
+ const data = await res.json();
16
+ return data.embedding;
17
+ }
18
+ catch (err) {
19
+ console.error('❌ Embedding error:', err instanceof Error ? err.message : err);
20
+ return null;
21
+ }
22
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scai",
3
- "version": "0.1.19",
3
+ "version": "0.1.20",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "scai": "./dist/index.js"