scai 0.1.38 → 0.1.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,14 +2,14 @@ import fg from 'fast-glob';
2
2
  import path from 'path';
3
3
  import { initSchema } from '../db/schema.js';
4
4
  import { indexFile } from '../db/fileIndex.js';
5
- import { shouldIgnoreFile } from '../utils/shouldIgnoreFiles.js';
6
- import { detectFileType } from '../utils/detectFileType.js';
5
+ import { detectFileType } from '../fileRules/detectFileType.js';
7
6
  import { startDaemon } from './DaemonCmd.js';
8
- import { IGNORED_FOLDER_GLOBS } from '../config/IgnoredPaths.js';
7
+ import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js';
9
8
  import { Config } from '../config.js';
10
9
  import { DB_PATH } from '../constants.js';
11
10
  import { log } from '../utils/log.js';
12
11
  import lockfile from 'proper-lockfile';
12
+ import { classifyFile } from '../fileRules/classifyFile.js';
13
13
  // 🧠 Lock the database to prevent simultaneous access
14
14
  async function lockDb() {
15
15
  try {
@@ -55,8 +55,11 @@ export async function runIndexCommand(targetDir, options = {}) {
55
55
  let count = 0;
56
56
  const release = await lockDb(); // Lock the DB before starting
57
57
  for (const file of files) {
58
- if (shouldIgnoreFile(file))
58
+ const classification = classifyFile(file);
59
+ if (classification !== 'valid') {
60
+ log(`⏭️ Skipping (${classification}): ${file}`);
59
61
  continue;
62
+ }
60
63
  try {
61
64
  const type = detectFileType(file);
62
65
  indexFile(file, null, type); // Index file without summary
@@ -0,0 +1,15 @@
1
+ // src/commands/MigrateCmd.ts
2
+ import path from 'path';
3
+ import { pathToFileURL } from 'url';
4
+ import { fileURLToPath } from 'url';
5
+ export async function runMigrateCommand() {
6
+ const scriptPath = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '../../dist/scripts/migrateDb.js' // compiled output
7
+ );
8
+ try {
9
+ await import(pathToFileURL(scriptPath).href);
10
+ }
11
+ catch (err) {
12
+ console.error('❌ Failed to run migration script:', err);
13
+ process.exit(1);
14
+ }
15
+ }
@@ -1,5 +1,5 @@
1
1
  import { readFileSync } from 'fs';
2
- import { getModuleByName } from '../registry/moduleRegistry.js';
2
+ import { getModuleByName } from '../pipeline/registry/moduleRegistry.js';
3
3
  import { runModulePipeline } from '../pipeline/runModulePipeline.js';
4
4
  export async function runModulePipelineFromCLI(file, options) {
5
5
  if (!options.modules) {
@@ -4,7 +4,7 @@ import readline from 'readline';
4
4
  import { queryFiles, indexFile } from '../db/fileIndex.js';
5
5
  import { summaryModule } from '../pipeline/modules/summaryModule.js';
6
6
  import { summarizeCode } from '../utils/summarizer.js';
7
- import { detectFileType } from '../utils/detectFileType.js';
7
+ import { detectFileType } from '../fileRules/detectFileType.js';
8
8
  import { generateEmbedding } from '../lib/generateEmbedding.js';
9
9
  import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
10
10
  import { db } from '../db/client.js';
@@ -6,9 +6,8 @@ import { generateEmbedding } from '../lib/generateEmbedding.js';
6
6
  import { DB_PATH } from '../constants.js';
7
7
  import { log } from '../utils/log.js';
8
8
  import lockfile from 'proper-lockfile';
9
- import { shouldIgnoreFile } from '../utils/shouldIgnoreFiles.js';
10
9
  import { summaryModule } from '../pipeline/modules/summaryModule.js';
11
- import { isGeneratedOrBundledFile } from '../utils/fileClassifier.js';
10
+ import { classifyFile } from '../fileRules/classifyFile.js';
12
11
  const MAX_FILES_PER_BATCH = 5;
13
12
  async function lockDb() {
14
13
  try {
@@ -22,14 +21,14 @@ async function lockDb() {
22
21
  export async function runDaemonBatch() {
23
22
  log('🟡 Starting daemon batch...');
24
23
  const rows = db.prepare(`
25
- SELECT path, type, functions_extracted FROM files
24
+ SELECT path, type, processing_status FROM files
26
25
  WHERE (summary IS NULL OR summary = '')
27
- OR (functions_extracted IS NULL OR functions_extracted = 0)
26
+ OR processing_status = 'unprocessed'
28
27
  ORDER BY last_modified DESC
29
28
  LIMIT ?
30
29
  `).all(MAX_FILES_PER_BATCH);
31
30
  if (rows.length === 0) {
32
- log('✅ No files left to summarize.');
31
+ log('✅ No files left to process.');
33
32
  return false;
34
33
  }
35
34
  const release = await lockDb();
@@ -37,45 +36,37 @@ export async function runDaemonBatch() {
37
36
  log(`📂 Processing file: ${row.path}`);
38
37
  if (!fsSync.existsSync(row.path)) {
39
38
  log(`⚠️ Skipped missing file: ${row.path}`);
39
+ db.prepare(`UPDATE files SET processing_status = 'skipped' WHERE path = @path`).run({ path: row.path });
40
40
  continue;
41
41
  }
42
- if (shouldIgnoreFile(row.path)) {
43
- log(`⚠️ Skipped (extension): ${row.path}`);
44
- continue;
45
- }
46
- if (isGeneratedOrBundledFile(row.path)) {
47
- log(`🚫 Skipping likely generated file: ${row.path}`);
42
+ const classification = classifyFile(row.path);
43
+ if (classification !== 'valid') {
44
+ log(`⏭️ Skipping (${classification}): ${row.path}`);
45
+ db.prepare(`UPDATE files SET processing_status = 'skipped' WHERE path = @path`).run({ path: row.path });
48
46
  continue;
49
47
  }
50
48
  try {
51
49
  const content = await fs.readFile(row.path, 'utf-8');
52
- // 1. Lookup file ID in the database using its path
53
- const fileResult = db
54
- .prepare(`SELECT id FROM files WHERE path = @path`) // Replaced `?` with `@path`
55
- .get({ path: row.path });
50
+ const fileResult = db.prepare(`SELECT id FROM files WHERE path = @path`).get({ path: row.path });
56
51
  const fileId = fileResult?.id;
57
52
  if (fileId) {
58
- // 2. Extract and index functions for this file
59
53
  await indexFunctionsForFile(row.path, fileId);
60
- // 3. Mark the file as having functions extracted in the database
61
- db.prepare(`UPDATE files SET functions_extracted = 1 WHERE id = @id`).run({ id: fileId });
54
+ // Update processing_status to 'extracted' after function extraction attempt
55
+ db.prepare(`UPDATE files SET processing_status = 'extracted' WHERE id = @id`).run({ id: fileId });
62
56
  }
63
57
  else {
64
58
  log(`⚠️ Could not find fileId for ${row.path}`);
65
59
  }
66
- // 4. Summarize the file content
67
60
  log(`📝 Generating summary for ${row.path}...`);
68
61
  const summaryResult = await summaryModule.run({ content, filepath: row.path });
69
62
  const summary = summaryResult?.summary?.trim() || null;
70
63
  let embedding = null;
71
64
  if (summary) {
72
- // 5. Generate embedding for the summary
73
65
  const vector = await generateEmbedding(summary);
74
66
  if (vector) {
75
67
  embedding = JSON.stringify(vector);
76
68
  }
77
69
  }
78
- // 6. Save the summary and embedding in the database
79
70
  db.prepare(`
80
71
  UPDATE files
81
72
  SET summary = @summary, embedding = @embedding, indexed_at = datetime('now')
@@ -84,10 +75,9 @@ export async function runDaemonBatch() {
84
75
  log(`✅ Updated summary & embedding for ${row.path}\n`);
85
76
  }
86
77
  catch (err) {
87
- // 7. Error handling: Log the error message if the process fails
88
78
  log(`❌ Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}\n`);
79
+ db.prepare(`UPDATE files SET processing_status = 'failed' WHERE path = @path`).run({ path: row.path });
89
80
  }
90
- // 8. Optional delay between file processing to reduce load on the system
91
81
  await new Promise(resolve => setTimeout(resolve, 200));
92
82
  }
93
83
  await release();
@@ -78,12 +78,11 @@ export async function extractFromJS(filePath, content, fileId) {
78
78
  }
79
79
  log(`📌 Indexed function: ${fn.name} with ${calls.length} calls`);
80
80
  }
81
- // Mark the file as successfully extracted
81
+ // Mark as extracted using new processing_status column
82
82
  db.prepare(`
83
83
  UPDATE files
84
- SET functions_extracted = 1,
85
- functions_extracted_at = datetime('now')
84
+ SET processing_status = 'extracted'
86
85
  WHERE id = @fileId
87
- `).run({ fileId });
86
+ `).run({ fileId });
88
87
  log(`✅ Marked functions as extracted for ${filePath}`);
89
88
  }
@@ -1,9 +1,9 @@
1
1
  import { log } from '../../utils/log.js';
2
- import { detectFileType } from '../../utils/detectFileType.js';
2
+ import { detectFileType } from '../../fileRules/detectFileType.js';
3
3
  import { extractFromJava } from './extractFromJava.js';
4
4
  import { extractFromJS } from './extractFromJs.js';
5
5
  import { extractFromXML } from './extractFromXML.js';
6
- import { db } from '../client.js'; // Assuming db is imported from your DB utility
6
+ import { db } from '../client.js';
7
7
  /**
8
8
  * Detects file type and delegates to the appropriate extractor.
9
9
  */
@@ -13,42 +13,36 @@ export async function extractFunctionsFromFile(filePath, content, fileId) {
13
13
  if (type === 'js' || type === 'ts' || type === 'javascript' || type === 'typescript') {
14
14
  log(`✅ Attempting to extract JS functions from ${filePath}\n`);
15
15
  await extractFromJS(filePath, content, fileId);
16
- // Mark the file as "extracted" even if the extraction succeeds
17
- db.prepare(`
18
- UPDATE files SET functions_extracted = 1 WHERE id = @id
19
- `).run({ id: fileId });
20
16
  return true;
21
17
  }
22
18
  if (type === 'java') {
23
19
  log(`❌ Nothing extracted for ${filePath} due to missing implementation`);
24
20
  await extractFromJava(filePath, content, fileId);
25
- // Mark the file as "extracted" after the attempt
21
+ // move into extract file
26
22
  db.prepare(`
27
- UPDATE files SET functions_extracted = 1 WHERE id = @id
23
+ UPDATE files SET processing_status = 'skipped' WHERE id = @id
28
24
  `).run({ id: fileId });
29
25
  return false;
30
26
  }
31
27
  if (type === 'xml') {
32
28
  log(`❌ Nothing extracted for ${filePath} due to missing implementation`);
33
29
  await extractFromXML(filePath, content, fileId);
34
- // Mark the file as "extracted" after the attempt
30
+ // move into extract file
35
31
  db.prepare(`
36
- UPDATE files SET functions_extracted = 1 WHERE id = @id
32
+ UPDATE files SET processing_status = 'skipped' WHERE id = @id
37
33
  `).run({ id: fileId });
38
34
  return false;
39
35
  }
40
36
  log(`⚠️ Unsupported file type: ${type} for function extraction. Skipping ${filePath}`);
41
- // Mark unsupported file types as extracted to prevent future retries
42
37
  db.prepare(`
43
- UPDATE files SET functions_extracted = 1 WHERE id = @id
38
+ UPDATE files SET processing_status = 'skipped' WHERE id = @id
44
39
  `).run({ id: fileId });
45
40
  return false;
46
41
  }
47
42
  catch (error) {
48
43
  log(`❌ Failed to extract functions from ${filePath}: ${error instanceof Error ? error.message : error}`);
49
- // Mark as extracted on failure to ensure the system keeps moving forward
50
44
  db.prepare(`
51
- UPDATE files SET functions_extracted = 1 WHERE id = @id
45
+ UPDATE files SET processing_status = 'failed' WHERE id = @id
52
46
  `).run({ id: fileId });
53
47
  return false;
54
48
  }
package/dist/db/schema.js CHANGED
@@ -8,7 +8,8 @@ export function initSchema() {
8
8
  type TEXT,
9
9
  indexed_at TEXT,
10
10
  last_modified TEXT,
11
- embedding TEXT
11
+ embedding TEXT,
12
+ processing_status TEXT
12
13
  );
13
14
 
14
15
  CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
@@ -0,0 +1,10 @@
1
+ // src/fileRules/classifyFile.ts
2
+ import { isGeneratedOrBundledFile } from "./fileClassifier.js";
3
+ import { shouldIgnoreFile } from "./shouldIgnoreFiles.js";
4
+ export function classifyFile(filepath) {
5
+ if (shouldIgnoreFile(filepath))
6
+ return 'ignore';
7
+ if (isGeneratedOrBundledFile(filepath))
8
+ return 'generated';
9
+ return 'valid';
10
+ }
@@ -1,6 +1,6 @@
1
1
  import path from 'path';
2
- import { IGNORED_EXTENSIONS } from '../config/IgnoredExtensions.js';
3
- import { specificFileExceptions } from '../config/specificFileExceptions.js';
2
+ import { IGNORED_EXTENSIONS } from './ignoredExtensions.js';
3
+ import { specificFileExceptions } from './fileExceptions.js';
4
4
  export function shouldIgnoreFile(filePath) {
5
5
  // Get file extension
6
6
  const ext = path.extname(filePath).toLowerCase();
package/dist/index.js CHANGED
@@ -20,6 +20,7 @@ import { startDaemon } from './commands/DaemonCmd.js';
20
20
  import { runStopDaemonCommand } from "./commands/StopDaemonCmd.js";
21
21
  import { runAskCommand } from './commands/AskCmd.js';
22
22
  import { runBackupCommand } from './commands/BackupCmd.js';
23
+ import { runMigrateCommand } from "./commands/MigrateCmd.js";
23
24
  // 🎛️ CLI Setup
24
25
  const cmd = new Command('scai')
25
26
  .version(version)
@@ -133,6 +134,10 @@ cmd
133
134
  .command('stop-daemon')
134
135
  .description('Stop the background summarizer daemon')
135
136
  .action(runStopDaemonCommand);
137
+ cmd
138
+ .command('migrate')
139
+ .description('Run DB migration scripts')
140
+ .action(runMigrateCommand);
136
141
  cmd
137
142
  .command('reset-db')
138
143
  .description('Delete and reset the SQLite database')
@@ -0,0 +1,21 @@
1
+ import { addCommentsModule } from '../modules/commentModule.js';
2
+ import { cleanupModule } from '../modules/cleanupModule.js';
3
+ import { summaryModule } from '../modules/summaryModule.js';
4
+ import { generateTestsModule } from '../modules/generateTestsModule.js';
5
+ import { commitSuggesterModule } from '../modules/commitSuggesterModule.js';
6
+ import { changelogModule } from '../modules/changeLogModule.js';
7
+ // Add more as needed...
8
+ const builtInModules = {
9
+ comments: addCommentsModule,
10
+ cleanup: cleanupModule,
11
+ summary: summaryModule,
12
+ tests: generateTestsModule,
13
+ suggest: commitSuggesterModule,
14
+ changelog: changelogModule,
15
+ };
16
+ export function getModuleByName(name) {
17
+ return builtInModules[name];
18
+ }
19
+ export function listAvailableModules() {
20
+ return Object.keys(builtInModules);
21
+ }
@@ -0,0 +1,18 @@
1
+ // scripts/migrateDb.ts
2
+ import { db } from "../db/client.js";
3
+ import { log } from "../utils/log.js";
4
+ try {
5
+ db.prepare(`
6
+ ALTER TABLE files ADD COLUMN processing_status TEXT DEFAULT 'unprocessed'
7
+ `).run();
8
+ log('✅ Added processing_status column.');
9
+ }
10
+ catch (e) {
11
+ if (e instanceof Error && e.message.includes('duplicate column name')) {
12
+ log('ℹ️ Column processing_status already exists. Skipping.');
13
+ }
14
+ else {
15
+ log(`❌ Migration failed: ${e instanceof Error ? e.message : e}`);
16
+ }
17
+ }
18
+ // Add more migration steps here as needed.
@@ -1,5 +1,5 @@
1
1
  // src/utils/sanitizeQuery.ts
2
- import { STOP_WORDS } from '../config/StopWords.js';
2
+ import { STOP_WORDS } from '../fileRules/stopWords.js';
3
3
  export function sanitizeQueryForFts(input) {
4
4
  input = input.trim().toLowerCase();
5
5
  // If it's a single filename-like string (includes dots or slashes), quote it
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scai",
3
- "version": "0.1.38",
3
+ "version": "0.1.39",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "scai": "./dist/index.js"
@@ -1,21 +0,0 @@
1
- import { addCommentsModule } from '../pipeline/modules/commentModule.js';
2
- import { cleanupModule } from '../pipeline/modules/cleanupModule.js';
3
- import { summaryModule } from '../pipeline/modules/summaryModule.js';
4
- import { generateTestsModule } from '../pipeline/modules/generateTestsModule.js';
5
- import { commitSuggesterModule } from '../pipeline/modules/commitSuggesterModule.js';
6
- import { changelogModule } from '../pipeline/modules/changeLogModule.js';
7
- // Add more as needed...
8
- const builtInModules = {
9
- comments: addCommentsModule,
10
- cleanup: cleanupModule,
11
- summary: summaryModule,
12
- tests: generateTestsModule,
13
- suggest: commitSuggesterModule,
14
- changelog: changelogModule,
15
- };
16
- export function getModuleByName(name) {
17
- return builtInModules[name];
18
- }
19
- export function listAvailableModules() {
20
- return Object.keys(builtInModules);
21
- }
@@ -1,29 +0,0 @@
1
- import { db } from '../db/client.js';
2
- try {
3
- db.prepare(`
4
- ALTER TABLE files ADD COLUMN functions_extracted BOOLEAN DEFAULT 0
5
- `).run();
6
- console.log('✅ Added functions_extracted column.');
7
- }
8
- catch (e) {
9
- if (e instanceof Error ? e.message.includes('duplicate column name') : e) {
10
- console.log('ℹ️ Column functions_extracted already exists. Skipping.');
11
- }
12
- else {
13
- console.error('❌ Migration failed:', e instanceof Error ? e.message : e);
14
- }
15
- }
16
- try {
17
- db.prepare(`
18
- ALTER TABLE files ADD COLUMN functions_extracted_at DATETIME
19
- `).run();
20
- console.log('✅ Added functions_extracted_at column.');
21
- }
22
- catch (e) {
23
- if (e instanceof Error ? e.message.includes('duplicate column name') : e) {
24
- console.log('ℹ️ Column functions_extracted_at already exists. Skipping.');
25
- }
26
- else {
27
- console.error('❌ Migration failed:', e instanceof Error ? e.message : e);
28
- }
29
- }
@@ -1,43 +0,0 @@
1
- import Database from 'better-sqlite3';
2
- import path from 'path';
3
- import os from 'os';
4
- import { IGNORED_EXTENSIONS } from '../config/IgnoredExtensions.js';
5
- import { specificFileExceptions } from '../config/specificFileExceptions.js';
6
- // THIS FILE IS MEANT TO BE RUN AS A NODE JS SCRIPT. node dist/src/utilsremoveIgnoredFiles.js
7
- // It removes wrongly indexed files that don't add value to the model.
8
- const DB_PATH = path.join(os.homedir(), '.scai', 'db.sqlite');
9
- const db = new Database(DB_PATH);
10
- console.log('🧹 Removing files with ignored extensions from the database...');
11
- // === Remove Files with Ignored Extensions, Excluding Specific Exceptions ===
12
- IGNORED_EXTENSIONS.forEach(ext => {
13
- try {
14
- const filesToDelete = db.prepare(`
15
- SELECT path FROM files WHERE path LIKE ?
16
- `).all(`%${ext}`);
17
- ;
18
- let deletedCount = 0;
19
- filesToDelete.forEach(file => {
20
- // Check if the file is in the exception list
21
- if (!specificFileExceptions.includes(file.path)) {
22
- // Delete the file from the database
23
- const deleted = db.prepare(`DELETE FROM files WHERE path = ?`).run(file.path);
24
- if (deleted.changes > 0) {
25
- deletedCount++;
26
- }
27
- }
28
- else {
29
- console.log(`⚠️ Skipped file (exception): ${file.path}`);
30
- }
31
- });
32
- if (deletedCount > 0) {
33
- console.log(`✅ Removed ${deletedCount} files with extension: ${ext}`);
34
- }
35
- else {
36
- console.log(`⚠️ No deletions for files with extension: ${ext}`);
37
- }
38
- }
39
- catch (err) {
40
- console.error("❌ Failed to remove files with extension ${ext}:", err instanceof Error ? err.message : err);
41
- }
42
- });
43
- console.log('🧹 Finished removing ignored files.');
File without changes
File without changes