scai 0.1.109 → 0.1.111
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CHANGELOG.md +19 -1
- package/dist/commands/AskCmd.js +49 -79
- package/dist/commands/DaemonCmd.js +3 -1
- package/dist/config.js +13 -8
- package/dist/context.js +36 -10
- package/dist/daemon/daemonBatch.js +68 -14
- package/dist/daemon/daemonWorker.js +19 -2
- package/dist/db/fileIndex.js +2 -1
- package/dist/db/functionExtractors/extractFromJs.js +96 -16
- package/dist/db/functionExtractors/extractFromTs.js +73 -16
- package/dist/db/functionExtractors/index.js +34 -33
- package/dist/db/functionIndex.js +1 -1
- package/dist/db/schema.js +51 -5
- package/dist/index.js +5 -9
- package/dist/lib/generate.js +3 -2
- package/dist/modelSetup.js +17 -20
- package/dist/pipeline/modules/changeLogModule.js +1 -1
- package/dist/pipeline/modules/cleanupModule.js +32 -13
- package/dist/pipeline/modules/commentModule.js +1 -1
- package/dist/pipeline/modules/commitSuggesterModule.js +1 -1
- package/dist/pipeline/modules/generateTestsModule.js +1 -1
- package/dist/pipeline/modules/kgModule.js +55 -0
- package/dist/pipeline/modules/refactorModule.js +1 -1
- package/dist/pipeline/modules/repairTestsModule.js +1 -1
- package/dist/pipeline/modules/reviewModule.js +1 -1
- package/dist/pipeline/modules/summaryModule.js +1 -1
- package/dist/scripts/dbcheck.js +98 -0
- package/dist/utils/buildContextualPrompt.js +103 -65
- package/dist/utils/log.js +1 -1
- package/dist/utils/sanitizeQuery.js +14 -6
- package/package.json +2 -2
package/dist/CHANGELOG.md
CHANGED
|
@@ -171,4 +171,22 @@ Type handling with the module pipeline
|
|
|
171
171
|
## 2025-09-02
|
|
172
172
|
|
|
173
173
|
• Added test configuration for project and generated tests
|
|
174
|
-
• Add runTestsModule and repairTestsModule for testing pipeline
|
|
174
|
+
• Add runTestsModule and repairTestsModule for testing pipeline
|
|
175
|
+
|
|
176
|
+
## 2025-09-05
|
|
177
|
+
|
|
178
|
+
• Enable execution of files as executable files in the scripts
|
|
179
|
+
• Remove context failure if models not installed
|
|
180
|
+
• Add ability to set global model
|
|
181
|
+
|
|
182
|
+
## 2025-09-08
|
|
183
|
+
|
|
184
|
+
### Requires DB reset ('scai db reset' followed by 'scai index start')
|
|
185
|
+
|
|
186
|
+
1. Improved daemon batch processing by skipping missing files, classifying unknown file types, and persisting entities/tags in the database.
|
|
187
|
+
2. Invoke kgModule in daemonBatch to build knowledge graphs after indexing.
|
|
188
|
+
3. Improved data modeling and extraction logic for functions and classes in TypeScript files.
|
|
189
|
+
4. Updated Edge/Table schema for better query performance.
|
|
190
|
+
5. Update package-lock.json to caniuse-lite@1.0.30001741.
|
|
191
|
+
6. Enable execution of as an executable file in the scripts.
|
|
192
|
+
7. Remove context failure if models not installed. Add ability to set global model.
|
package/dist/commands/AskCmd.js
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import readline from 'readline';
|
|
4
|
-
import { searchFiles, queryFiles
|
|
4
|
+
import { searchFiles, queryFiles } from '../db/fileIndex.js';
|
|
5
5
|
import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
|
|
6
6
|
import { generate } from '../lib/generate.js';
|
|
7
7
|
import { buildContextualPrompt } from '../utils/buildContextualPrompt.js';
|
|
8
|
-
import { generateFocusedFileTree } from '../utils/fileTree.js';
|
|
9
8
|
import { log } from '../utils/log.js';
|
|
10
|
-
import { PROMPT_LOG_PATH, SCAI_HOME, RELATED_FILES_LIMIT,
|
|
9
|
+
import { PROMPT_LOG_PATH, SCAI_HOME, RELATED_FILES_LIMIT, getIndexDir } from '../constants.js';
|
|
11
10
|
import chalk from 'chalk';
|
|
12
11
|
export async function runAskCommand(query) {
|
|
13
12
|
if (!query) {
|
|
@@ -19,22 +18,21 @@ export async function runAskCommand(query) {
|
|
|
19
18
|
return;
|
|
20
19
|
}
|
|
21
20
|
console.log(`📁 Using index root: ${getIndexDir()}`);
|
|
22
|
-
|
|
23
|
-
// 🟩 STEP 1: Semantic Search
|
|
21
|
+
// Semantic Search
|
|
24
22
|
const start = Date.now();
|
|
25
|
-
const semanticResults = await searchFiles(query, RELATED_FILES_LIMIT);
|
|
23
|
+
const semanticResults = await searchFiles(query, RELATED_FILES_LIMIT);
|
|
26
24
|
const duration = Date.now() - start;
|
|
27
25
|
console.log(`⏱️ searchFiles took ${duration}ms and returned ${semanticResults.length} result(s)`);
|
|
28
26
|
semanticResults.forEach((file, i) => {
|
|
29
27
|
console.log(` ${i + 1}. 📄 Path: ${file.path} | Score: ${file.score?.toFixed(3) ?? 'n/a'}`);
|
|
30
28
|
});
|
|
31
|
-
//
|
|
29
|
+
// Fallback FTS search
|
|
32
30
|
const safeQuery = sanitizeQueryForFts(query);
|
|
33
|
-
const fallbackResults = queryFiles(safeQuery, 10);
|
|
31
|
+
const fallbackResults = queryFiles(safeQuery, 10);
|
|
34
32
|
fallbackResults.forEach((file, i) => {
|
|
35
33
|
console.log(` ${i + 1}. 🔎 Fallback Match: ${file.path}`);
|
|
36
34
|
});
|
|
37
|
-
//
|
|
35
|
+
// Merge results
|
|
38
36
|
const seen = new Set();
|
|
39
37
|
const combinedResults = [];
|
|
40
38
|
for (const file of semanticResults) {
|
|
@@ -56,7 +54,20 @@ export async function runAskCommand(query) {
|
|
|
56
54
|
});
|
|
57
55
|
}
|
|
58
56
|
}
|
|
59
|
-
//
|
|
57
|
+
// Exact match prioritization
|
|
58
|
+
const queryFilenameRaw = path.basename(query).toLowerCase();
|
|
59
|
+
const queryFilenameNoExt = queryFilenameRaw.replace(/\.[^/.]+$/, '');
|
|
60
|
+
const exactMatchIndex = combinedResults.findIndex(f => {
|
|
61
|
+
const base = path.basename(f.path).toLowerCase();
|
|
62
|
+
const baseNoExt = base.replace(/\.[^/.]+$/, '');
|
|
63
|
+
return base === queryFilenameRaw || baseNoExt === queryFilenameNoExt;
|
|
64
|
+
});
|
|
65
|
+
if (exactMatchIndex !== -1) {
|
|
66
|
+
const [exactMatch] = combinedResults.splice(exactMatchIndex, 1);
|
|
67
|
+
combinedResults.unshift(exactMatch);
|
|
68
|
+
console.log(`🎯 Exact match prioritized: ${exactMatch.path}`);
|
|
69
|
+
}
|
|
70
|
+
// Log combined results
|
|
60
71
|
if (combinedResults.length) {
|
|
61
72
|
console.log('\n📊 Final Related Files:');
|
|
62
73
|
combinedResults.forEach((f, i) => {
|
|
@@ -66,79 +77,38 @@ export async function runAskCommand(query) {
|
|
|
66
77
|
else {
|
|
67
78
|
console.log('⚠️ No similar files found. Using query only.');
|
|
68
79
|
}
|
|
69
|
-
//
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
let code = '';
|
|
73
|
-
let topSummary = topFile.summary || '(No summary available)';
|
|
74
|
-
let topFunctions = [];
|
|
75
|
-
const fileFunctions = {};
|
|
76
|
-
// Truncate summary
|
|
77
|
-
topSummary = topSummary.split('\n').slice(0, MAX_SUMMARY_LINES).join('\n');
|
|
78
|
-
const allFileIds = combinedResults
|
|
79
|
-
.map(file => file.id)
|
|
80
|
-
.filter((id) => typeof id === 'number');
|
|
81
|
-
const allFunctionsMap = getFunctionsForFiles(allFileIds); // Record<number, Function[]>
|
|
82
|
-
try {
|
|
83
|
-
code = fs.readFileSync(filepath, 'utf-8');
|
|
84
|
-
const topFileId = topFile.id;
|
|
85
|
-
topFunctions = allFunctionsMap[topFileId]?.map(fn => {
|
|
86
|
-
const content = fn.content
|
|
87
|
-
? fn.content.split('\n').slice(0, MAX_FUNCTION_LINES).join('\n')
|
|
88
|
-
: '(No content available)';
|
|
89
|
-
return {
|
|
90
|
-
name: fn.name,
|
|
91
|
-
content,
|
|
92
|
-
};
|
|
93
|
-
}) || [];
|
|
94
|
-
}
|
|
95
|
-
catch (err) {
|
|
96
|
-
console.warn(`⚠️ Failed to read or analyze top file (${filepath}):`, err);
|
|
80
|
+
// STEP 4+: Build contextual prompt using topFile + combinedResults
|
|
81
|
+
if (combinedResults.length === 0) {
|
|
82
|
+
throw new Error('❌ No search results found. Cannot build contextual prompt.');
|
|
97
83
|
}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
let summary = file.summary || '(No summary available)';
|
|
102
|
-
if (summary) {
|
|
103
|
-
summary = summary.split('\n').slice(0, MAX_SUMMARY_LINES).join('\n');
|
|
104
|
-
}
|
|
105
|
-
const functions = allFunctionsMap[fileId]?.map(fn => {
|
|
106
|
-
const content = fn.content
|
|
107
|
-
? fn.content.split('\n').slice(0, MAX_FUNCTION_LINES).join('\n')
|
|
108
|
-
: '(No content available)';
|
|
109
|
-
return {
|
|
110
|
-
name: fn.name,
|
|
111
|
-
content,
|
|
112
|
-
};
|
|
113
|
-
}) || [];
|
|
114
|
-
return {
|
|
115
|
-
path: file.path,
|
|
116
|
-
summary,
|
|
117
|
-
functions,
|
|
118
|
-
};
|
|
119
|
-
});
|
|
120
|
-
// 🟩 STEP 6: Generate file tree
|
|
121
|
-
let fileTree = '';
|
|
84
|
+
const file = combinedResults[0];
|
|
85
|
+
let code = "";
|
|
86
|
+
// STEP 4++: Add code to params
|
|
122
87
|
try {
|
|
123
|
-
|
|
88
|
+
code = fs.readFileSync(file.path, 'utf-8');
|
|
89
|
+
if (!code) {
|
|
90
|
+
console.warn(`⚠️ No code loaded for top file: ${file.path}`);
|
|
91
|
+
}
|
|
124
92
|
}
|
|
125
93
|
catch (e) {
|
|
126
|
-
console.
|
|
94
|
+
console.log("Error reading code from selected file: ", e instanceof Error ? e.message : e);
|
|
127
95
|
}
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
96
|
+
const topFile = {
|
|
97
|
+
id: file.id,
|
|
98
|
+
path: file.path,
|
|
99
|
+
summary: file.summary ?? "",
|
|
132
100
|
code,
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
}
|
|
101
|
+
};
|
|
102
|
+
const promptArgs = {
|
|
103
|
+
topFile,
|
|
104
|
+
relatedFiles: combinedResults,
|
|
105
|
+
query,
|
|
106
|
+
};
|
|
107
|
+
console.log(chalk.blueBright('\n📦 Building contextual prompt...'));
|
|
108
|
+
const promptContent = await buildContextualPrompt(promptArgs);
|
|
139
109
|
console.log(chalk.greenBright('✅ Prompt built successfully.'));
|
|
140
110
|
console.log(chalk.cyan(`[runAskCommand] Prompt token estimate: ~${Math.round(promptContent.length / 4)} tokens`));
|
|
141
|
-
//
|
|
111
|
+
// STEP 5: Save prompt
|
|
142
112
|
try {
|
|
143
113
|
if (!fs.existsSync(SCAI_HOME))
|
|
144
114
|
fs.mkdirSync(SCAI_HOME, { recursive: true });
|
|
@@ -148,21 +118,21 @@ export async function runAskCommand(query) {
|
|
|
148
118
|
catch (err) {
|
|
149
119
|
log('❌ Failed to write prompt log:', err);
|
|
150
120
|
}
|
|
151
|
-
//
|
|
121
|
+
// STEP 6: Ask model
|
|
152
122
|
try {
|
|
153
123
|
console.log('\n🤖 Asking the model...');
|
|
154
124
|
const input = {
|
|
155
125
|
content: promptContent,
|
|
156
|
-
filepath,
|
|
126
|
+
filepath: topFile.path,
|
|
157
127
|
};
|
|
158
|
-
const modelResponse = await generate(input
|
|
128
|
+
const modelResponse = await generate(input);
|
|
159
129
|
console.log(`\n🧠 Model Response:\n${modelResponse.content}`);
|
|
160
130
|
}
|
|
161
131
|
catch (err) {
|
|
162
132
|
console.error('❌ Model request failed:', err);
|
|
163
133
|
}
|
|
164
134
|
}
|
|
165
|
-
//
|
|
135
|
+
// Helper: Prompt once
|
|
166
136
|
function promptOnce(promptText) {
|
|
167
137
|
return new Promise(resolve => {
|
|
168
138
|
console.log(promptText);
|
|
@@ -23,9 +23,11 @@ export async function startDaemon() {
|
|
|
23
23
|
const __filename = fileURLToPath(import.meta.url);
|
|
24
24
|
const __dirname = path.dirname(__filename);
|
|
25
25
|
const daemonWorkerPath = path.join(__dirname, '../daemon/daemonWorker.js');
|
|
26
|
+
const out = fsSync.openSync(LOG_PATH, 'a');
|
|
27
|
+
const err = fsSync.openSync(LOG_PATH, 'a');
|
|
26
28
|
const child = spawn(process.execPath, [daemonWorkerPath], {
|
|
27
29
|
detached: true,
|
|
28
|
-
stdio: ['ignore',
|
|
30
|
+
stdio: ['ignore', out, err], // stdout/stderr -> log file
|
|
29
31
|
env: {
|
|
30
32
|
...process.env,
|
|
31
33
|
BACKGROUND_MODE: 'true',
|
package/dist/config.js
CHANGED
|
@@ -6,7 +6,7 @@ import { normalizePath } from './utils/contentUtils.js';
|
|
|
6
6
|
import chalk from 'chalk';
|
|
7
7
|
import { getHashedRepoKey } from './utils/repoKey.js';
|
|
8
8
|
const defaultConfig = {
|
|
9
|
-
model: '
|
|
9
|
+
model: 'llama3:8b',
|
|
10
10
|
contextLength: 4096,
|
|
11
11
|
language: 'ts',
|
|
12
12
|
indexDir: '',
|
|
@@ -55,18 +55,23 @@ export const Config = {
|
|
|
55
55
|
const repoCfg = cfg.repos?.[cfg.activeRepo ?? ''];
|
|
56
56
|
return repoCfg?.model || cfg.model;
|
|
57
57
|
},
|
|
58
|
-
setModel(model) {
|
|
58
|
+
setModel(model, scope = 'repo') {
|
|
59
59
|
const cfg = readConfig();
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
if (scope === 'repo') {
|
|
61
|
+
const active = cfg.activeRepo;
|
|
62
|
+
if (!active) {
|
|
63
|
+
console.error("❌ No active repo to set model for.");
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
62
66
|
cfg.repos[active] = { ...cfg.repos[active], model };
|
|
63
|
-
|
|
64
|
-
console.log(`📦 Model set to: ${model}`);
|
|
67
|
+
console.log(`📦 Model set for repo '${active}': ${model}`);
|
|
65
68
|
}
|
|
66
69
|
else {
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
// Set global default model
|
|
71
|
+
cfg.model = model;
|
|
72
|
+
console.log(`📦 Global default model set to: ${model}`);
|
|
69
73
|
}
|
|
74
|
+
writeConfig(cfg);
|
|
70
75
|
},
|
|
71
76
|
getLanguage() {
|
|
72
77
|
const cfg = readConfig();
|
package/dist/context.js
CHANGED
|
@@ -5,12 +5,25 @@ import { getHashedRepoKey } from "./utils/repoKey.js";
|
|
|
5
5
|
import { getDbForRepo, getDbPathForRepo } from "./db/client.js";
|
|
6
6
|
import fs from "fs";
|
|
7
7
|
import chalk from "chalk";
|
|
8
|
+
import { execSync } from "child_process";
|
|
9
|
+
function modelExists(model) {
|
|
10
|
+
try {
|
|
11
|
+
const output = execSync("ollama list", { encoding: "utf-8" });
|
|
12
|
+
return output
|
|
13
|
+
.split("\n")
|
|
14
|
+
.map(line => line.trim())
|
|
15
|
+
.filter(Boolean)
|
|
16
|
+
.some(line => line.toLowerCase().startsWith(model.toLowerCase() + " ") || line.toLowerCase() === model.toLowerCase());
|
|
17
|
+
}
|
|
18
|
+
catch (err) {
|
|
19
|
+
console.error(chalk.red("❌ Failed to check models with `ollama list`"));
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
8
23
|
export async function updateContext() {
|
|
9
24
|
const cwd = normalizePath(process.cwd());
|
|
10
25
|
const cfg = readConfig();
|
|
11
|
-
// 🔑 Find repoKey by matching indexDir to cwd
|
|
12
26
|
let repoKey = Object.keys(cfg.repos || {}).find((key) => normalizePath(cfg.repos[key]?.indexDir || "") === cwd);
|
|
13
|
-
// Initialize new repo config if not found
|
|
14
27
|
let isNewRepo = false;
|
|
15
28
|
if (!repoKey) {
|
|
16
29
|
repoKey = getHashedRepoKey(cwd);
|
|
@@ -19,28 +32,23 @@ export async function updateContext() {
|
|
|
19
32
|
cfg.repos[repoKey].indexDir = cwd;
|
|
20
33
|
isNewRepo = true;
|
|
21
34
|
}
|
|
22
|
-
// Check if active repo has changed
|
|
23
35
|
const activeRepoChanged = cfg.activeRepo !== repoKey;
|
|
24
|
-
// Always set this as active repo
|
|
25
36
|
cfg.activeRepo = repoKey;
|
|
26
37
|
writeConfig(cfg);
|
|
27
38
|
const repoCfg = cfg.repos[repoKey];
|
|
28
39
|
let ok = true;
|
|
29
|
-
// Only log detailed info if new repo or active repo changed
|
|
30
40
|
if (isNewRepo || activeRepoChanged) {
|
|
31
41
|
console.log(chalk.yellow("\n🔁 Updating context...\n"));
|
|
32
42
|
console.log(`✅ Active repo: ${chalk.green(repoKey)}`);
|
|
33
43
|
console.log(`✅ Index dir: ${chalk.cyan(repoCfg.indexDir || cwd)}`);
|
|
34
44
|
}
|
|
35
|
-
// GitHub token is optional
|
|
36
45
|
const token = repoCfg.githubToken || cfg.githubToken;
|
|
37
46
|
if (!token) {
|
|
38
|
-
console.log(`ℹ️ No GitHub token found. You can set one with
|
|
47
|
+
console.log(`ℹ️ No GitHub token found. You can set one with: ${chalk.bold(chalk.bgGreen("scai auth set"))}`);
|
|
39
48
|
}
|
|
40
49
|
else if (isNewRepo || activeRepoChanged) {
|
|
41
50
|
console.log(`✅ GitHub token present`);
|
|
42
51
|
}
|
|
43
|
-
// Ensure DB exists
|
|
44
52
|
const dbPath = getDbPathForRepo();
|
|
45
53
|
if (!fs.existsSync(dbPath)) {
|
|
46
54
|
console.log(chalk.yellow(`📦 Initializing DB at ${dbPath}`));
|
|
@@ -48,13 +56,31 @@ export async function updateContext() {
|
|
|
48
56
|
getDbForRepo();
|
|
49
57
|
}
|
|
50
58
|
catch {
|
|
51
|
-
ok = false;
|
|
59
|
+
ok = false;
|
|
52
60
|
}
|
|
53
61
|
}
|
|
54
62
|
else if (isNewRepo || activeRepoChanged) {
|
|
55
63
|
console.log(chalk.green("✅ Database present"));
|
|
56
64
|
}
|
|
57
|
-
//
|
|
65
|
+
// 🧠 Model check
|
|
66
|
+
const model = cfg.model;
|
|
67
|
+
if (!model) {
|
|
68
|
+
console.log(chalk.red("❌ No model configured.") +
|
|
69
|
+
"\n➡️ Set one with: " +
|
|
70
|
+
chalk.bold(chalk.bgGreen("scai config set-model <model>")));
|
|
71
|
+
ok = false;
|
|
72
|
+
}
|
|
73
|
+
else if (!modelExists(model)) {
|
|
74
|
+
console.log(chalk.red(`❌ Model '${model}' not installed in Ollama.`) +
|
|
75
|
+
"\n➡️ Install with: " +
|
|
76
|
+
chalk.bold(chalk.yellow(`ollama pull ${model}`)) +
|
|
77
|
+
" or choose another with: " +
|
|
78
|
+
chalk.bold(chalk.yellow("scai config set-model <model>")));
|
|
79
|
+
ok = false;
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
console.log(chalk.green(`✅ Model '${model}' available`));
|
|
83
|
+
}
|
|
58
84
|
if (ok) {
|
|
59
85
|
console.log(chalk.bold.green("\n✅ Context OK\n"));
|
|
60
86
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { indexCodeForFile } from '../db/functionIndex.js';
|
|
2
2
|
import fs from 'fs/promises';
|
|
3
3
|
import fsSync from 'fs';
|
|
4
4
|
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
@@ -8,6 +8,7 @@ import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
|
8
8
|
import { classifyFile } from '../fileRules/classifyFile.js';
|
|
9
9
|
import { getDbForRepo, getDbPathForRepo } from '../db/client.js';
|
|
10
10
|
import { markFileAsSkippedByPath, selectUnprocessedFiles, updateFileWithSummaryAndEmbedding, } from '../db/sqlTemplates.js';
|
|
11
|
+
import { kgModule } from '../pipeline/modules/kgModule.js';
|
|
11
12
|
const MAX_FILES_PER_BATCH = 5;
|
|
12
13
|
/**
|
|
13
14
|
* Acquires a lock on the database to ensure that only one daemon batch
|
|
@@ -32,7 +33,6 @@ async function lockDb() {
|
|
|
32
33
|
*/
|
|
33
34
|
export async function runDaemonBatch() {
|
|
34
35
|
log('🟡 Starting daemon batch...');
|
|
35
|
-
// Selects up to MAX_FILES_PER_BATCH files that haven't been processed yet
|
|
36
36
|
const db = getDbForRepo();
|
|
37
37
|
const rows = db.prepare(selectUnprocessedFiles).all(MAX_FILES_PER_BATCH);
|
|
38
38
|
if (rows.length === 0) {
|
|
@@ -42,13 +42,11 @@ export async function runDaemonBatch() {
|
|
|
42
42
|
const release = await lockDb();
|
|
43
43
|
for (const row of rows) {
|
|
44
44
|
log(`📂 Processing file: ${row.path}`);
|
|
45
|
-
// Skip if file is missing from the file system
|
|
46
45
|
if (!fsSync.existsSync(row.path)) {
|
|
47
46
|
log(`⚠️ Skipped missing file: ${row.path}`);
|
|
48
47
|
db.prepare(markFileAsSkippedByPath).run({ path: row.path });
|
|
49
48
|
continue;
|
|
50
49
|
}
|
|
51
|
-
// Skip if file is classified as something we don't process
|
|
52
50
|
const classification = classifyFile(row.path);
|
|
53
51
|
if (classification !== 'valid') {
|
|
54
52
|
log(`⏭️ Skipping (${classification}): ${row.path}`);
|
|
@@ -57,24 +55,20 @@ export async function runDaemonBatch() {
|
|
|
57
55
|
}
|
|
58
56
|
try {
|
|
59
57
|
const content = await fs.readFile(row.path, 'utf-8');
|
|
60
|
-
// Determine whether the file needs to be re-summarized
|
|
61
58
|
const needsResummary = !row.summary ||
|
|
62
59
|
!row.indexed_at ||
|
|
63
60
|
(row.last_modified && new Date(row.last_modified) > new Date(row.indexed_at));
|
|
64
61
|
if (needsResummary) {
|
|
65
62
|
log(`📝 Generating summary for ${row.path}...`);
|
|
66
|
-
// Generate a summary using the summary pipeline
|
|
67
63
|
const summaryResult = await summaryModule.run({ content, filepath: row.path });
|
|
68
64
|
const summary = summaryResult?.summary?.trim() || null;
|
|
69
65
|
let embedding = null;
|
|
70
|
-
// Generate an embedding from the summary (if present)
|
|
71
66
|
if (summary) {
|
|
72
67
|
const vector = await generateEmbedding(summary);
|
|
73
68
|
if (vector) {
|
|
74
69
|
embedding = JSON.stringify(vector);
|
|
75
70
|
}
|
|
76
71
|
}
|
|
77
|
-
// Update the file record with the new summary and embedding
|
|
78
72
|
db.prepare(updateFileWithSummaryAndEmbedding).run({
|
|
79
73
|
summary,
|
|
80
74
|
embedding,
|
|
@@ -85,19 +79,79 @@ export async function runDaemonBatch() {
|
|
|
85
79
|
else {
|
|
86
80
|
log(`⚡ Skipped summary (up-to-date) for ${row.path}`);
|
|
87
81
|
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
82
|
+
const success = await indexCodeForFile(row.path, row.id);
|
|
83
|
+
if (success) {
|
|
84
|
+
log(`✅ Indexed code for ${row.path}`);
|
|
85
|
+
try {
|
|
86
|
+
log(`🔗 Building Knowledge Graph for ${row.path}...`);
|
|
87
|
+
const kgInput = {
|
|
88
|
+
fileId: row.id,
|
|
89
|
+
filepath: row.path,
|
|
90
|
+
summary: row.summary || undefined,
|
|
91
|
+
};
|
|
92
|
+
const kgResult = await kgModule.run(kgInput, content);
|
|
93
|
+
log(`✅ Knowledge Graph built for ${row.path}`);
|
|
94
|
+
log(`Entities: ${kgResult.entities.length}, Edges: ${kgResult.edges.length}`);
|
|
95
|
+
// Persist KG entities + tags only if there are any
|
|
96
|
+
if (kgResult.entities.length > 0) {
|
|
97
|
+
const insertTag = db.prepare(`
|
|
98
|
+
INSERT OR IGNORE INTO tags_master (name) VALUES (:name)
|
|
99
|
+
`);
|
|
100
|
+
const getTagId = db.prepare(`
|
|
101
|
+
SELECT id FROM tags_master WHERE name = :name
|
|
102
|
+
`);
|
|
103
|
+
const insertEntityTag = db.prepare(`
|
|
104
|
+
INSERT OR IGNORE INTO entity_tags (entity_type, entity_id, tag_id)
|
|
105
|
+
VALUES (:entity_type, :entity_id, :tag_id)
|
|
106
|
+
`);
|
|
107
|
+
for (const entity of kgResult.entities) {
|
|
108
|
+
// Skip entity if type or tags are missing
|
|
109
|
+
if (!entity.type || !Array.isArray(entity.tags) || entity.tags.length === 0) {
|
|
110
|
+
console.warn(`⚠ Skipping entity due to missing type or tags:`, entity);
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
for (const tag of entity.tags) {
|
|
114
|
+
// Skip empty or invalid tags
|
|
115
|
+
if (!tag || typeof tag !== 'string') {
|
|
116
|
+
console.warn(`⚠ Skipping invalid tag for entity ${entity.type}:`, tag);
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
try {
|
|
120
|
+
// ✅ Use :name in SQL and plain key in object
|
|
121
|
+
insertTag.run({ name: tag });
|
|
122
|
+
const tagRow = getTagId.get({ name: tag });
|
|
123
|
+
if (!tagRow) {
|
|
124
|
+
console.warn(`⚠ Could not find tag ID for: ${tag}`);
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
insertEntityTag.run({
|
|
128
|
+
entity_type: entity.type,
|
|
129
|
+
entity_id: row.id,
|
|
130
|
+
tag_id: tagRow.id,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
catch (err) {
|
|
134
|
+
console.error(`❌ Failed to persist entity/tag:`, { entity, tag, error: err });
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
log(`✅ Persisted entities + tags for ${row.path}`);
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
log(`⚠️ No entities found for ${row.path}, skipping DB inserts`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch (kgErr) {
|
|
145
|
+
log(`❌ KG build failed for ${row.path}: ${kgErr instanceof Error ? kgErr.message : String(kgErr)}`);
|
|
146
|
+
}
|
|
92
147
|
}
|
|
93
148
|
else {
|
|
94
|
-
log(`ℹ️ No
|
|
149
|
+
log(`ℹ️ No code elements extracted for ${row.path}`);
|
|
95
150
|
}
|
|
96
151
|
}
|
|
97
152
|
catch (err) {
|
|
98
153
|
log(`❌ Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
99
154
|
}
|
|
100
|
-
// Add a small delay to throttle processing
|
|
101
155
|
await new Promise(resolve => setTimeout(resolve, 200));
|
|
102
156
|
}
|
|
103
157
|
await release();
|
|
@@ -31,9 +31,26 @@ export async function daemonWorker() {
|
|
|
31
31
|
while (true) {
|
|
32
32
|
try {
|
|
33
33
|
log('🔄 Running daemon batch...');
|
|
34
|
-
|
|
34
|
+
// Wrap the batch in debug
|
|
35
|
+
let didWork = false;
|
|
36
|
+
try {
|
|
37
|
+
log('🔹 Running runDaemonBatch()...');
|
|
38
|
+
didWork = await runDaemonBatch();
|
|
39
|
+
log('✅ runDaemonBatch() completed successfully');
|
|
40
|
+
}
|
|
41
|
+
catch (batchErr) {
|
|
42
|
+
log('🔥 Error inside runDaemonBatch():', batchErr);
|
|
43
|
+
}
|
|
35
44
|
if (!didWork) {
|
|
36
|
-
|
|
45
|
+
let queueEmpty = false;
|
|
46
|
+
try {
|
|
47
|
+
log('🔹 Checking if queue is empty...');
|
|
48
|
+
queueEmpty = await isQueueEmpty();
|
|
49
|
+
log(`🔹 Queue empty status: ${queueEmpty}`);
|
|
50
|
+
}
|
|
51
|
+
catch (queueErr) {
|
|
52
|
+
log('🔥 Error checking queue status:', queueErr);
|
|
53
|
+
}
|
|
37
54
|
if (queueEmpty) {
|
|
38
55
|
log('🕊️ No work found. Idling...');
|
|
39
56
|
await sleep(IDLE_SLEEP_MS * 3);
|
package/dist/db/fileIndex.js
CHANGED
|
@@ -6,6 +6,7 @@ import * as sqlTemplates from './sqlTemplates.js';
|
|
|
6
6
|
import { CANDIDATE_LIMIT } from '../constants.js';
|
|
7
7
|
import { getDbForRepo } from './client.js';
|
|
8
8
|
import { scoreFiles } from '../fileRules/scoreFiles.js'; // 👈 NEW
|
|
9
|
+
import chalk from 'chalk';
|
|
9
10
|
export function indexFile(filePath, summary, type) {
|
|
10
11
|
const stats = fs.statSync(filePath);
|
|
11
12
|
const lastModified = stats.mtime.toISOString();
|
|
@@ -44,7 +45,7 @@ export function queryFiles(safeQuery, limit = 10) {
|
|
|
44
45
|
`).all(safeQuery, limit);
|
|
45
46
|
}
|
|
46
47
|
export async function searchFiles(query, topK = 5) {
|
|
47
|
-
console.log(`🧠 Searching for query: "${query}"`);
|
|
48
|
+
console.log(chalk.yellow(`🧠 Searching for query: "${query}"`));
|
|
48
49
|
const embedding = await generateEmbedding(query);
|
|
49
50
|
if (!embedding) {
|
|
50
51
|
console.log('⚠️ Failed to generate embedding for query');
|