scai 0.1.109 → 0.1.110
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CHANGELOG.md +19 -1
- package/dist/commands/DaemonCmd.js +3 -1
- package/dist/config.js +13 -8
- package/dist/context.js +36 -10
- package/dist/daemon/daemonBatch.js +68 -14
- package/dist/daemon/daemonWorker.js +19 -2
- package/dist/db/functionExtractors/extractFromJs.js +96 -16
- package/dist/db/functionExtractors/extractFromTs.js +73 -16
- package/dist/db/functionExtractors/index.js +34 -33
- package/dist/db/functionIndex.js +1 -1
- package/dist/db/schema.js +51 -5
- package/dist/index.js +5 -9
- package/dist/modelSetup.js +17 -20
- package/dist/pipeline/modules/cleanupModule.js +32 -13
- package/dist/pipeline/modules/kgModule.js +55 -0
- package/dist/scripts/dbcheck.js +98 -0
- package/dist/utils/log.js +1 -1
- package/package.json +2 -2
package/dist/CHANGELOG.md
CHANGED
|
@@ -171,4 +171,22 @@ Type handling with the module pipeline
|
|
|
171
171
|
## 2025-09-02
|
|
172
172
|
|
|
173
173
|
• Added test configuration for project and generated tests
|
|
174
|
-
• Add runTestsModule and repairTestsModule for testing pipeline
|
|
174
|
+
• Add runTestsModule and repairTestsModule for testing pipeline
|
|
175
|
+
|
|
176
|
+
## 2025-09-05
|
|
177
|
+
|
|
178
|
+
• Enable execution of files as executable files in the scripts
|
|
179
|
+
• Remove context failure if models not installed
|
|
180
|
+
• Add ability to set global model
|
|
181
|
+
|
|
182
|
+
## 2025-09-08
|
|
183
|
+
|
|
184
|
+
### Requires DB reset ('scai db reset' followed by 'scai index start')
|
|
185
|
+
|
|
186
|
+
1. Improved daemon batch processing by skipping missing files, classifying unknown file types, and persisting entities/tags in the database.
|
|
187
|
+
2. Invoke kgModule in daemonBatch to build knowledge graphs after indexing.
|
|
188
|
+
3. Improved data modeling and extraction logic for functions and classes in TypeScript files.
|
|
189
|
+
4. Updated Edge/Table schema for better query performance.
|
|
190
|
+
5. Update package-lock.json to caniuse-lite@1.0.30001741.
|
|
191
|
+
6. Enable execution of as an executable file in the scripts.
|
|
192
|
+
7. Remove context failure if models not installed. Add ability to set global model.
|
|
@@ -23,9 +23,11 @@ export async function startDaemon() {
|
|
|
23
23
|
const __filename = fileURLToPath(import.meta.url);
|
|
24
24
|
const __dirname = path.dirname(__filename);
|
|
25
25
|
const daemonWorkerPath = path.join(__dirname, '../daemon/daemonWorker.js');
|
|
26
|
+
const out = fsSync.openSync(LOG_PATH, 'a');
|
|
27
|
+
const err = fsSync.openSync(LOG_PATH, 'a');
|
|
26
28
|
const child = spawn(process.execPath, [daemonWorkerPath], {
|
|
27
29
|
detached: true,
|
|
28
|
-
stdio: ['ignore',
|
|
30
|
+
stdio: ['ignore', out, err], // stdout/stderr -> log file
|
|
29
31
|
env: {
|
|
30
32
|
...process.env,
|
|
31
33
|
BACKGROUND_MODE: 'true',
|
package/dist/config.js
CHANGED
|
@@ -6,7 +6,7 @@ import { normalizePath } from './utils/contentUtils.js';
|
|
|
6
6
|
import chalk from 'chalk';
|
|
7
7
|
import { getHashedRepoKey } from './utils/repoKey.js';
|
|
8
8
|
const defaultConfig = {
|
|
9
|
-
model: '
|
|
9
|
+
model: 'llama3:8b',
|
|
10
10
|
contextLength: 4096,
|
|
11
11
|
language: 'ts',
|
|
12
12
|
indexDir: '',
|
|
@@ -55,18 +55,23 @@ export const Config = {
|
|
|
55
55
|
const repoCfg = cfg.repos?.[cfg.activeRepo ?? ''];
|
|
56
56
|
return repoCfg?.model || cfg.model;
|
|
57
57
|
},
|
|
58
|
-
setModel(model) {
|
|
58
|
+
setModel(model, scope = 'repo') {
|
|
59
59
|
const cfg = readConfig();
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
if (scope === 'repo') {
|
|
61
|
+
const active = cfg.activeRepo;
|
|
62
|
+
if (!active) {
|
|
63
|
+
console.error("❌ No active repo to set model for.");
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
62
66
|
cfg.repos[active] = { ...cfg.repos[active], model };
|
|
63
|
-
|
|
64
|
-
console.log(`📦 Model set to: ${model}`);
|
|
67
|
+
console.log(`📦 Model set for repo '${active}': ${model}`);
|
|
65
68
|
}
|
|
66
69
|
else {
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
// Set global default model
|
|
71
|
+
cfg.model = model;
|
|
72
|
+
console.log(`📦 Global default model set to: ${model}`);
|
|
69
73
|
}
|
|
74
|
+
writeConfig(cfg);
|
|
70
75
|
},
|
|
71
76
|
getLanguage() {
|
|
72
77
|
const cfg = readConfig();
|
package/dist/context.js
CHANGED
|
@@ -5,12 +5,25 @@ import { getHashedRepoKey } from "./utils/repoKey.js";
|
|
|
5
5
|
import { getDbForRepo, getDbPathForRepo } from "./db/client.js";
|
|
6
6
|
import fs from "fs";
|
|
7
7
|
import chalk from "chalk";
|
|
8
|
+
import { execSync } from "child_process";
|
|
9
|
+
function modelExists(model) {
|
|
10
|
+
try {
|
|
11
|
+
const output = execSync("ollama list", { encoding: "utf-8" });
|
|
12
|
+
return output
|
|
13
|
+
.split("\n")
|
|
14
|
+
.map(line => line.trim())
|
|
15
|
+
.filter(Boolean)
|
|
16
|
+
.some(line => line.toLowerCase().startsWith(model.toLowerCase() + " ") || line.toLowerCase() === model.toLowerCase());
|
|
17
|
+
}
|
|
18
|
+
catch (err) {
|
|
19
|
+
console.error(chalk.red("❌ Failed to check models with `ollama list`"));
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
8
23
|
export async function updateContext() {
|
|
9
24
|
const cwd = normalizePath(process.cwd());
|
|
10
25
|
const cfg = readConfig();
|
|
11
|
-
// 🔑 Find repoKey by matching indexDir to cwd
|
|
12
26
|
let repoKey = Object.keys(cfg.repos || {}).find((key) => normalizePath(cfg.repos[key]?.indexDir || "") === cwd);
|
|
13
|
-
// Initialize new repo config if not found
|
|
14
27
|
let isNewRepo = false;
|
|
15
28
|
if (!repoKey) {
|
|
16
29
|
repoKey = getHashedRepoKey(cwd);
|
|
@@ -19,28 +32,23 @@ export async function updateContext() {
|
|
|
19
32
|
cfg.repos[repoKey].indexDir = cwd;
|
|
20
33
|
isNewRepo = true;
|
|
21
34
|
}
|
|
22
|
-
// Check if active repo has changed
|
|
23
35
|
const activeRepoChanged = cfg.activeRepo !== repoKey;
|
|
24
|
-
// Always set this as active repo
|
|
25
36
|
cfg.activeRepo = repoKey;
|
|
26
37
|
writeConfig(cfg);
|
|
27
38
|
const repoCfg = cfg.repos[repoKey];
|
|
28
39
|
let ok = true;
|
|
29
|
-
// Only log detailed info if new repo or active repo changed
|
|
30
40
|
if (isNewRepo || activeRepoChanged) {
|
|
31
41
|
console.log(chalk.yellow("\n🔁 Updating context...\n"));
|
|
32
42
|
console.log(`✅ Active repo: ${chalk.green(repoKey)}`);
|
|
33
43
|
console.log(`✅ Index dir: ${chalk.cyan(repoCfg.indexDir || cwd)}`);
|
|
34
44
|
}
|
|
35
|
-
// GitHub token is optional
|
|
36
45
|
const token = repoCfg.githubToken || cfg.githubToken;
|
|
37
46
|
if (!token) {
|
|
38
|
-
console.log(`ℹ️ No GitHub token found. You can set one with
|
|
47
|
+
console.log(`ℹ️ No GitHub token found. You can set one with: ${chalk.bold(chalk.bgGreen("scai auth set"))}`);
|
|
39
48
|
}
|
|
40
49
|
else if (isNewRepo || activeRepoChanged) {
|
|
41
50
|
console.log(`✅ GitHub token present`);
|
|
42
51
|
}
|
|
43
|
-
// Ensure DB exists
|
|
44
52
|
const dbPath = getDbPathForRepo();
|
|
45
53
|
if (!fs.existsSync(dbPath)) {
|
|
46
54
|
console.log(chalk.yellow(`📦 Initializing DB at ${dbPath}`));
|
|
@@ -48,13 +56,31 @@ export async function updateContext() {
|
|
|
48
56
|
getDbForRepo();
|
|
49
57
|
}
|
|
50
58
|
catch {
|
|
51
|
-
ok = false;
|
|
59
|
+
ok = false;
|
|
52
60
|
}
|
|
53
61
|
}
|
|
54
62
|
else if (isNewRepo || activeRepoChanged) {
|
|
55
63
|
console.log(chalk.green("✅ Database present"));
|
|
56
64
|
}
|
|
57
|
-
//
|
|
65
|
+
// 🧠 Model check
|
|
66
|
+
const model = cfg.model;
|
|
67
|
+
if (!model) {
|
|
68
|
+
console.log(chalk.red("❌ No model configured.") +
|
|
69
|
+
"\n➡️ Set one with: " +
|
|
70
|
+
chalk.bold(chalk.bgGreen("scai config set-model <model>")));
|
|
71
|
+
ok = false;
|
|
72
|
+
}
|
|
73
|
+
else if (!modelExists(model)) {
|
|
74
|
+
console.log(chalk.red(`❌ Model '${model}' not installed in Ollama.`) +
|
|
75
|
+
"\n➡️ Install with: " +
|
|
76
|
+
chalk.bold(chalk.yellow(`ollama pull ${model}`)) +
|
|
77
|
+
" or choose another with: " +
|
|
78
|
+
chalk.bold(chalk.yellow("scai config set-model <model>")));
|
|
79
|
+
ok = false;
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
console.log(chalk.green(`✅ Model '${model}' available`));
|
|
83
|
+
}
|
|
58
84
|
if (ok) {
|
|
59
85
|
console.log(chalk.bold.green("\n✅ Context OK\n"));
|
|
60
86
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { indexCodeForFile } from '../db/functionIndex.js';
|
|
2
2
|
import fs from 'fs/promises';
|
|
3
3
|
import fsSync from 'fs';
|
|
4
4
|
import { generateEmbedding } from '../lib/generateEmbedding.js';
|
|
@@ -8,6 +8,7 @@ import { summaryModule } from '../pipeline/modules/summaryModule.js';
|
|
|
8
8
|
import { classifyFile } from '../fileRules/classifyFile.js';
|
|
9
9
|
import { getDbForRepo, getDbPathForRepo } from '../db/client.js';
|
|
10
10
|
import { markFileAsSkippedByPath, selectUnprocessedFiles, updateFileWithSummaryAndEmbedding, } from '../db/sqlTemplates.js';
|
|
11
|
+
import { kgModule } from '../pipeline/modules/kgModule.js';
|
|
11
12
|
const MAX_FILES_PER_BATCH = 5;
|
|
12
13
|
/**
|
|
13
14
|
* Acquires a lock on the database to ensure that only one daemon batch
|
|
@@ -32,7 +33,6 @@ async function lockDb() {
|
|
|
32
33
|
*/
|
|
33
34
|
export async function runDaemonBatch() {
|
|
34
35
|
log('🟡 Starting daemon batch...');
|
|
35
|
-
// Selects up to MAX_FILES_PER_BATCH files that haven't been processed yet
|
|
36
36
|
const db = getDbForRepo();
|
|
37
37
|
const rows = db.prepare(selectUnprocessedFiles).all(MAX_FILES_PER_BATCH);
|
|
38
38
|
if (rows.length === 0) {
|
|
@@ -42,13 +42,11 @@ export async function runDaemonBatch() {
|
|
|
42
42
|
const release = await lockDb();
|
|
43
43
|
for (const row of rows) {
|
|
44
44
|
log(`📂 Processing file: ${row.path}`);
|
|
45
|
-
// Skip if file is missing from the file system
|
|
46
45
|
if (!fsSync.existsSync(row.path)) {
|
|
47
46
|
log(`⚠️ Skipped missing file: ${row.path}`);
|
|
48
47
|
db.prepare(markFileAsSkippedByPath).run({ path: row.path });
|
|
49
48
|
continue;
|
|
50
49
|
}
|
|
51
|
-
// Skip if file is classified as something we don't process
|
|
52
50
|
const classification = classifyFile(row.path);
|
|
53
51
|
if (classification !== 'valid') {
|
|
54
52
|
log(`⏭️ Skipping (${classification}): ${row.path}`);
|
|
@@ -57,24 +55,20 @@ export async function runDaemonBatch() {
|
|
|
57
55
|
}
|
|
58
56
|
try {
|
|
59
57
|
const content = await fs.readFile(row.path, 'utf-8');
|
|
60
|
-
// Determine whether the file needs to be re-summarized
|
|
61
58
|
const needsResummary = !row.summary ||
|
|
62
59
|
!row.indexed_at ||
|
|
63
60
|
(row.last_modified && new Date(row.last_modified) > new Date(row.indexed_at));
|
|
64
61
|
if (needsResummary) {
|
|
65
62
|
log(`📝 Generating summary for ${row.path}...`);
|
|
66
|
-
// Generate a summary using the summary pipeline
|
|
67
63
|
const summaryResult = await summaryModule.run({ content, filepath: row.path });
|
|
68
64
|
const summary = summaryResult?.summary?.trim() || null;
|
|
69
65
|
let embedding = null;
|
|
70
|
-
// Generate an embedding from the summary (if present)
|
|
71
66
|
if (summary) {
|
|
72
67
|
const vector = await generateEmbedding(summary);
|
|
73
68
|
if (vector) {
|
|
74
69
|
embedding = JSON.stringify(vector);
|
|
75
70
|
}
|
|
76
71
|
}
|
|
77
|
-
// Update the file record with the new summary and embedding
|
|
78
72
|
db.prepare(updateFileWithSummaryAndEmbedding).run({
|
|
79
73
|
summary,
|
|
80
74
|
embedding,
|
|
@@ -85,19 +79,79 @@ export async function runDaemonBatch() {
|
|
|
85
79
|
else {
|
|
86
80
|
log(`⚡ Skipped summary (up-to-date) for ${row.path}`);
|
|
87
81
|
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
82
|
+
const success = await indexCodeForFile(row.path, row.id);
|
|
83
|
+
if (success) {
|
|
84
|
+
log(`✅ Indexed code for ${row.path}`);
|
|
85
|
+
try {
|
|
86
|
+
log(`🔗 Building Knowledge Graph for ${row.path}...`);
|
|
87
|
+
const kgInput = {
|
|
88
|
+
fileId: row.id,
|
|
89
|
+
filepath: row.path,
|
|
90
|
+
summary: row.summary || undefined,
|
|
91
|
+
};
|
|
92
|
+
const kgResult = await kgModule.run(kgInput, content);
|
|
93
|
+
log(`✅ Knowledge Graph built for ${row.path}`);
|
|
94
|
+
log(`Entities: ${kgResult.entities.length}, Edges: ${kgResult.edges.length}`);
|
|
95
|
+
// Persist KG entities + tags only if there are any
|
|
96
|
+
if (kgResult.entities.length > 0) {
|
|
97
|
+
const insertTag = db.prepare(`
|
|
98
|
+
INSERT OR IGNORE INTO tags_master (name) VALUES (:name)
|
|
99
|
+
`);
|
|
100
|
+
const getTagId = db.prepare(`
|
|
101
|
+
SELECT id FROM tags_master WHERE name = :name
|
|
102
|
+
`);
|
|
103
|
+
const insertEntityTag = db.prepare(`
|
|
104
|
+
INSERT OR IGNORE INTO entity_tags (entity_type, entity_id, tag_id)
|
|
105
|
+
VALUES (:entity_type, :entity_id, :tag_id)
|
|
106
|
+
`);
|
|
107
|
+
for (const entity of kgResult.entities) {
|
|
108
|
+
// Skip entity if type or tags are missing
|
|
109
|
+
if (!entity.type || !Array.isArray(entity.tags) || entity.tags.length === 0) {
|
|
110
|
+
console.warn(`⚠ Skipping entity due to missing type or tags:`, entity);
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
for (const tag of entity.tags) {
|
|
114
|
+
// Skip empty or invalid tags
|
|
115
|
+
if (!tag || typeof tag !== 'string') {
|
|
116
|
+
console.warn(`⚠ Skipping invalid tag for entity ${entity.type}:`, tag);
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
try {
|
|
120
|
+
// ✅ Use :name in SQL and plain key in object
|
|
121
|
+
insertTag.run({ name: tag });
|
|
122
|
+
const tagRow = getTagId.get({ name: tag });
|
|
123
|
+
if (!tagRow) {
|
|
124
|
+
console.warn(`⚠ Could not find tag ID for: ${tag}`);
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
insertEntityTag.run({
|
|
128
|
+
entity_type: entity.type,
|
|
129
|
+
entity_id: row.id,
|
|
130
|
+
tag_id: tagRow.id,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
catch (err) {
|
|
134
|
+
console.error(`❌ Failed to persist entity/tag:`, { entity, tag, error: err });
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
log(`✅ Persisted entities + tags for ${row.path}`);
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
log(`⚠️ No entities found for ${row.path}, skipping DB inserts`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch (kgErr) {
|
|
145
|
+
log(`❌ KG build failed for ${row.path}: ${kgErr instanceof Error ? kgErr.message : String(kgErr)}`);
|
|
146
|
+
}
|
|
92
147
|
}
|
|
93
148
|
else {
|
|
94
|
-
log(`ℹ️ No
|
|
149
|
+
log(`ℹ️ No code elements extracted for ${row.path}`);
|
|
95
150
|
}
|
|
96
151
|
}
|
|
97
152
|
catch (err) {
|
|
98
153
|
log(`❌ Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
99
154
|
}
|
|
100
|
-
// Add a small delay to throttle processing
|
|
101
155
|
await new Promise(resolve => setTimeout(resolve, 200));
|
|
102
156
|
}
|
|
103
157
|
await release();
|
|
@@ -31,9 +31,26 @@ export async function daemonWorker() {
|
|
|
31
31
|
while (true) {
|
|
32
32
|
try {
|
|
33
33
|
log('🔄 Running daemon batch...');
|
|
34
|
-
|
|
34
|
+
// Wrap the batch in debug
|
|
35
|
+
let didWork = false;
|
|
36
|
+
try {
|
|
37
|
+
log('🔹 Running runDaemonBatch()...');
|
|
38
|
+
didWork = await runDaemonBatch();
|
|
39
|
+
log('✅ runDaemonBatch() completed successfully');
|
|
40
|
+
}
|
|
41
|
+
catch (batchErr) {
|
|
42
|
+
log('🔥 Error inside runDaemonBatch():', batchErr);
|
|
43
|
+
}
|
|
35
44
|
if (!didWork) {
|
|
36
|
-
|
|
45
|
+
let queueEmpty = false;
|
|
46
|
+
try {
|
|
47
|
+
log('🔹 Checking if queue is empty...');
|
|
48
|
+
queueEmpty = await isQueueEmpty();
|
|
49
|
+
log(`🔹 Queue empty status: ${queueEmpty}`);
|
|
50
|
+
}
|
|
51
|
+
catch (queueErr) {
|
|
52
|
+
log('🔥 Error checking queue status:', queueErr);
|
|
53
|
+
}
|
|
37
54
|
if (queueEmpty) {
|
|
38
55
|
log('🕊️ No work found. Idling...');
|
|
39
56
|
await sleep(IDLE_SLEEP_MS * 3);
|
|
@@ -29,6 +29,7 @@ export async function extractFromJS(filePath, content, fileId) {
|
|
|
29
29
|
locations: true,
|
|
30
30
|
});
|
|
31
31
|
const functions = [];
|
|
32
|
+
const classes = [];
|
|
32
33
|
walkAncestor(ast, {
|
|
33
34
|
FunctionDeclaration(node, ancestors) {
|
|
34
35
|
const parent = ancestors[ancestors.length - 2];
|
|
@@ -60,31 +61,63 @@ export async function extractFromJS(filePath, content, fileId) {
|
|
|
60
61
|
content: content.slice(node.start, node.end),
|
|
61
62
|
});
|
|
62
63
|
},
|
|
64
|
+
ClassDeclaration(node) {
|
|
65
|
+
const className = node.id?.name || `${path.basename(filePath)}:<anon-class>`;
|
|
66
|
+
classes.push({
|
|
67
|
+
name: className,
|
|
68
|
+
start_line: node.loc?.start.line ?? -1,
|
|
69
|
+
end_line: node.loc?.end.line ?? -1,
|
|
70
|
+
content: content.slice(node.start, node.end),
|
|
71
|
+
superClass: node.superClass?.name ?? null,
|
|
72
|
+
});
|
|
73
|
+
},
|
|
74
|
+
ClassExpression(node) {
|
|
75
|
+
const className = node.id?.name || `${path.basename(filePath)}:<anon-class>`;
|
|
76
|
+
classes.push({
|
|
77
|
+
name: className,
|
|
78
|
+
start_line: node.loc?.start.line ?? -1,
|
|
79
|
+
end_line: node.loc?.end.line ?? -1,
|
|
80
|
+
content: content.slice(node.start, node.end),
|
|
81
|
+
superClass: node.superClass?.name ?? null,
|
|
82
|
+
});
|
|
83
|
+
},
|
|
63
84
|
});
|
|
64
|
-
if (functions.length === 0) {
|
|
65
|
-
log(`⚠️ No functions found in: ${filePath}`);
|
|
85
|
+
if (functions.length === 0 && classes.length === 0) {
|
|
86
|
+
log(`⚠️ No functions/classes found in: ${filePath}`);
|
|
66
87
|
db.prepare(markFileAsSkippedTemplate).run({ id: fileId });
|
|
67
88
|
return false;
|
|
68
89
|
}
|
|
69
|
-
log(`🔍 Found ${functions.length} functions in ${filePath}`);
|
|
90
|
+
log(`🔍 Found ${functions.length} functions and ${classes.length} classes in ${filePath}`);
|
|
91
|
+
// Insert functions
|
|
70
92
|
for (const fn of functions) {
|
|
71
93
|
const embedding = await generateEmbedding(fn.content);
|
|
72
|
-
const result = db
|
|
94
|
+
const result = db
|
|
95
|
+
.prepare(`
|
|
73
96
|
INSERT INTO functions (
|
|
74
97
|
file_id, name, start_line, end_line, content, embedding, lang
|
|
75
98
|
) VALUES (
|
|
76
99
|
@file_id, @name, @start_line, @end_line, @content, @embedding, @lang
|
|
77
100
|
)
|
|
78
|
-
`)
|
|
101
|
+
`)
|
|
102
|
+
.run({
|
|
79
103
|
file_id: fileId,
|
|
80
104
|
name: fn.name,
|
|
81
105
|
start_line: fn.start_line,
|
|
82
106
|
end_line: fn.end_line,
|
|
83
107
|
content: fn.content,
|
|
84
108
|
embedding: JSON.stringify(embedding),
|
|
85
|
-
lang: 'js'
|
|
109
|
+
lang: 'js',
|
|
110
|
+
});
|
|
111
|
+
const functionId = result.lastInsertRowid;
|
|
112
|
+
// file → function edge
|
|
113
|
+
db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
|
|
114
|
+
VALUES (@source_type, @source_id, @target_type, @target_id, 'contains')`).run({
|
|
115
|
+
source_type: 'file',
|
|
116
|
+
source_id: fileId,
|
|
117
|
+
target_type: 'function',
|
|
118
|
+
target_id: functionId,
|
|
86
119
|
});
|
|
87
|
-
|
|
120
|
+
// Walk inside function to find calls
|
|
88
121
|
const fnAst = parse(fn.content, {
|
|
89
122
|
ecmaVersion: 'latest',
|
|
90
123
|
sourceType: 'module',
|
|
@@ -96,26 +129,73 @@ export async function extractFromJS(filePath, content, fileId) {
|
|
|
96
129
|
if (node.callee?.type === 'Identifier' && node.callee.name) {
|
|
97
130
|
calls.push({ calleeName: node.callee.name });
|
|
98
131
|
}
|
|
99
|
-
}
|
|
132
|
+
},
|
|
100
133
|
});
|
|
101
134
|
for (const call of calls) {
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
135
|
+
// Store name for later resolution
|
|
136
|
+
db.prepare(`INSERT INTO function_calls (caller_id, callee_name) VALUES (@caller_id, @callee_name)`).run({ caller_id: functionId, callee_name: call.calleeName });
|
|
137
|
+
// Optional unresolved edge
|
|
138
|
+
db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
|
|
139
|
+
VALUES (@source_type, @source_id, @target_type, @target_id, 'calls')`).run({
|
|
140
|
+
source_type: 'function',
|
|
141
|
+
source_id: functionId,
|
|
142
|
+
target_type: 'function',
|
|
143
|
+
target_id: 0, // unresolved callee
|
|
108
144
|
});
|
|
109
145
|
}
|
|
110
146
|
log(`📌 Indexed function: ${fn.name} with ${calls.length} calls`);
|
|
111
147
|
}
|
|
148
|
+
// Insert classes
|
|
149
|
+
for (const cls of classes) {
|
|
150
|
+
const embedding = await generateEmbedding(cls.content);
|
|
151
|
+
const result = db
|
|
152
|
+
.prepare(`
|
|
153
|
+
INSERT INTO classes (
|
|
154
|
+
file_id, name, start_line, end_line, content, embedding, lang
|
|
155
|
+
) VALUES (
|
|
156
|
+
@file_id, @name, @start_line, @end_line, @content, @embedding, @lang
|
|
157
|
+
)
|
|
158
|
+
`)
|
|
159
|
+
.run({
|
|
160
|
+
file_id: fileId,
|
|
161
|
+
name: cls.name,
|
|
162
|
+
start_line: cls.start_line,
|
|
163
|
+
end_line: cls.end_line,
|
|
164
|
+
content: cls.content,
|
|
165
|
+
embedding: JSON.stringify(embedding),
|
|
166
|
+
lang: 'js',
|
|
167
|
+
});
|
|
168
|
+
const classId = result.lastInsertRowid;
|
|
169
|
+
// file → class edge
|
|
170
|
+
db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
|
|
171
|
+
VALUES (@source_type, @source_id, @target_type, @target_id, 'contains')`).run({
|
|
172
|
+
source_type: 'file',
|
|
173
|
+
source_id: fileId,
|
|
174
|
+
target_type: 'class',
|
|
175
|
+
target_id: classId,
|
|
176
|
+
});
|
|
177
|
+
// superclass → store unresolved reference
|
|
178
|
+
if (cls.superClass) {
|
|
179
|
+
db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
|
|
180
|
+
VALUES (@source_type, @source_id, @target_type, @target_id, 'inherits')`).run({
|
|
181
|
+
source_type: 'class',
|
|
182
|
+
source_id: classId,
|
|
183
|
+
target_type: 'class',
|
|
184
|
+
target_id: 0, // unresolved superclass
|
|
185
|
+
});
|
|
186
|
+
console.log(`🔗 Class ${cls.name} inherits ${cls.superClass} (edge stored for later resolution)`);
|
|
187
|
+
}
|
|
188
|
+
console.log(`🏷 Indexed class: ${cls.name} (id=${classId})`);
|
|
189
|
+
}
|
|
190
|
+
// Optional summary after extraction
|
|
191
|
+
console.log(`📊 Extraction summary for ${filePath}: ${functions.length} functions, ${classes.length} classes`);
|
|
112
192
|
db.prepare(markFileAsExtractedTemplate).run({ id: fileId });
|
|
113
|
-
log(`✅ Marked functions as extracted for ${filePath}`);
|
|
193
|
+
log(`✅ Marked functions/classes as extracted for ${filePath}`);
|
|
114
194
|
return true;
|
|
115
195
|
}
|
|
116
196
|
catch (err) {
|
|
117
197
|
log(`❌ Failed to extract from: ${filePath}`);
|
|
118
|
-
log(` ↳ ${
|
|
198
|
+
log(` ↳ ${err.message}`);
|
|
119
199
|
db.prepare(markFileAsFailedTemplate).run({ id: fileId });
|
|
120
200
|
return false;
|
|
121
201
|
}
|
|
@@ -1,15 +1,16 @@
|
|
|
1
|
-
import { Project, SyntaxKind } from 'ts-morph';
|
|
1
|
+
import { Project, SyntaxKind, } from 'ts-morph';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import { generateEmbedding } from '../../lib/generateEmbedding.js';
|
|
4
4
|
import { log } from '../../utils/log.js';
|
|
5
5
|
import { getDbForRepo } from '../client.js';
|
|
6
|
-
import { markFileAsSkippedTemplate, markFileAsExtractedTemplate, markFileAsFailedTemplate } from '../sqlTemplates.js';
|
|
6
|
+
import { markFileAsSkippedTemplate, markFileAsExtractedTemplate, markFileAsFailedTemplate, } from '../sqlTemplates.js';
|
|
7
7
|
export async function extractFromTS(filePath, content, fileId) {
|
|
8
8
|
const db = getDbForRepo();
|
|
9
9
|
try {
|
|
10
10
|
const project = new Project({ useInMemoryFileSystem: true });
|
|
11
11
|
const sourceFile = project.createSourceFile(filePath, content);
|
|
12
12
|
const functions = [];
|
|
13
|
+
const classes = [];
|
|
13
14
|
const allFuncs = [
|
|
14
15
|
...sourceFile.getDescendantsOfKind(SyntaxKind.FunctionDeclaration),
|
|
15
16
|
...sourceFile.getDescendantsOfKind(SyntaxKind.FunctionExpression),
|
|
@@ -22,45 +23,101 @@ export async function extractFromTS(filePath, content, fileId) {
|
|
|
22
23
|
const code = fn.getText();
|
|
23
24
|
functions.push({ name, start_line: start, end_line: end, content: code });
|
|
24
25
|
}
|
|
25
|
-
|
|
26
|
-
|
|
26
|
+
const allClasses = [
|
|
27
|
+
...sourceFile.getDescendantsOfKind(SyntaxKind.ClassDeclaration),
|
|
28
|
+
...sourceFile.getDescendantsOfKind(SyntaxKind.ClassExpression),
|
|
29
|
+
];
|
|
30
|
+
for (const cls of allClasses) {
|
|
31
|
+
const name = cls.getName() ?? `${path.basename(filePath)}:<anon-class>`;
|
|
32
|
+
const start = cls.getStartLineNumber();
|
|
33
|
+
const end = cls.getEndLineNumber();
|
|
34
|
+
const code = cls.getText();
|
|
35
|
+
const superClass = cls.getExtends()?.getText() ?? null;
|
|
36
|
+
classes.push({
|
|
37
|
+
name,
|
|
38
|
+
start_line: start,
|
|
39
|
+
end_line: end,
|
|
40
|
+
content: code,
|
|
41
|
+
superClass,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
if (functions.length === 0 && classes.length === 0) {
|
|
45
|
+
log(`⚠️ No functions/classes found in TS file: ${filePath}`);
|
|
27
46
|
db.prepare(markFileAsSkippedTemplate).run({ id: fileId });
|
|
28
47
|
return false;
|
|
29
48
|
}
|
|
30
|
-
log(`🔍 Found ${functions.length}
|
|
49
|
+
log(`🔍 Found ${functions.length} functions and ${classes.length} classes in ${filePath}`);
|
|
50
|
+
// Insert functions
|
|
31
51
|
for (const fn of functions) {
|
|
32
52
|
const embedding = await generateEmbedding(fn.content);
|
|
33
|
-
const result = db
|
|
53
|
+
const result = db
|
|
54
|
+
.prepare(`
|
|
34
55
|
INSERT INTO functions (
|
|
35
56
|
file_id, name, start_line, end_line, content, embedding, lang
|
|
36
57
|
) VALUES (
|
|
37
58
|
@file_id, @name, @start_line, @end_line, @content, @embedding, @lang
|
|
38
59
|
)
|
|
39
|
-
`)
|
|
60
|
+
`)
|
|
61
|
+
.run({
|
|
40
62
|
file_id: fileId,
|
|
41
63
|
name: fn.name,
|
|
42
64
|
start_line: fn.start_line,
|
|
43
65
|
end_line: fn.end_line,
|
|
44
66
|
content: fn.content,
|
|
45
67
|
embedding: JSON.stringify(embedding),
|
|
46
|
-
lang: 'ts'
|
|
68
|
+
lang: 'ts',
|
|
47
69
|
});
|
|
48
|
-
const
|
|
49
|
-
//
|
|
70
|
+
const functionId = result.lastInsertRowid;
|
|
71
|
+
// file → function edge
|
|
72
|
+
db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
|
|
73
|
+
VALUES ('file', @source_id, 'function', @target_id, 'contains')`).run({ source_id: fileId, target_id: functionId });
|
|
74
|
+
// Simplified call detection (regex)
|
|
50
75
|
const callMatches = fn.content.matchAll(/(\w+)\s*\(/g);
|
|
51
76
|
for (const match of callMatches) {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
caller_id: callerId,
|
|
77
|
+
// Store call by name (resolution happens later)
|
|
78
|
+
db.prepare(`INSERT INTO function_calls (caller_id, callee_name)
|
|
79
|
+
VALUES (@caller_id, @callee_name)`).run({
|
|
80
|
+
caller_id: functionId,
|
|
57
81
|
callee_name: match[1],
|
|
58
82
|
});
|
|
59
83
|
}
|
|
60
84
|
log(`📌 Indexed TS function: ${fn.name}`);
|
|
61
85
|
}
|
|
86
|
+
// Insert classes
|
|
87
|
+
for (const cls of classes) {
|
|
88
|
+
const embedding = await generateEmbedding(cls.content);
|
|
89
|
+
const result = db
|
|
90
|
+
.prepare(`
|
|
91
|
+
INSERT INTO classes (
|
|
92
|
+
file_id, name, start_line, end_line, content, embedding, lang
|
|
93
|
+
) VALUES (
|
|
94
|
+
@file_id, @name, @start_line, @end_line, @content, @embedding, @lang
|
|
95
|
+
)
|
|
96
|
+
`)
|
|
97
|
+
.run({
|
|
98
|
+
file_id: fileId,
|
|
99
|
+
name: cls.name,
|
|
100
|
+
start_line: cls.start_line,
|
|
101
|
+
end_line: cls.end_line,
|
|
102
|
+
content: cls.content,
|
|
103
|
+
embedding: JSON.stringify(embedding),
|
|
104
|
+
lang: 'ts',
|
|
105
|
+
});
|
|
106
|
+
const classId = result.lastInsertRowid;
|
|
107
|
+
// file → class edge
|
|
108
|
+
db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
|
|
109
|
+
VALUES ('file', @source_id, 'class', @target_id, 'contains')`).run({ source_id: fileId, target_id: classId });
|
|
110
|
+
// superclass reference → store in helper table for later resolution
|
|
111
|
+
if (cls.superClass) {
|
|
112
|
+
db.prepare(`INSERT INTO class_inheritance (class_id, super_name)
|
|
113
|
+
VALUES (@class_id, @super_name)`).run({ class_id: classId, super_name: cls.superClass });
|
|
114
|
+
log(`🔗 Class ${cls.name} extends ${cls.superClass} (edge stored for later resolution)`);
|
|
115
|
+
}
|
|
116
|
+
log(`🏷 Indexed TS class: ${cls.name} (id=${classId})`);
|
|
117
|
+
}
|
|
118
|
+
log(`📊 Extraction summary for ${filePath}: ${functions.length} functions, ${classes.length} classes`);
|
|
62
119
|
db.prepare(markFileAsExtractedTemplate).run({ id: fileId });
|
|
63
|
-
log(`✅ Marked TS functions as extracted for ${filePath}`);
|
|
120
|
+
log(`✅ Marked TS functions/classes as extracted for ${filePath}`);
|
|
64
121
|
return true;
|
|
65
122
|
}
|
|
66
123
|
catch (err) {
|
|
@@ -1,43 +1,44 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { extractFromJava } from
|
|
4
|
-
import { extractFromJS } from
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
/**
|
|
10
|
-
* Detects file type and delegates to the appropriate extractor.
|
|
11
|
-
*/
|
|
1
|
+
import { getDbForRepo } from "../client.js";
|
|
2
|
+
import { markFileAsSkippedByPath, markFileAsFailedTemplate } from "../sqlTemplates.js";
|
|
3
|
+
import { extractFromJava } from "./extractFromJava.js";
|
|
4
|
+
import { extractFromJS } from "./extractFromJs.js";
|
|
5
|
+
import { extractFromTS } from "./extractFromTs.js";
|
|
6
|
+
import { extractFromXML } from "./extractFromXML.js";
|
|
7
|
+
import { detectFileType } from "../../fileRules/detectFileType.js";
|
|
8
|
+
import { log } from "../../utils/log.js";
|
|
12
9
|
export async function extractFunctionsFromFile(filePath, content, fileId) {
|
|
13
10
|
const type = detectFileType(filePath).trim().toLowerCase();
|
|
14
11
|
const db = getDbForRepo();
|
|
15
12
|
try {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
13
|
+
let success = false;
|
|
14
|
+
switch (type) {
|
|
15
|
+
case 'js':
|
|
16
|
+
case 'javascript':
|
|
17
|
+
log(`📄 Extracting JS code from ${filePath}`);
|
|
18
|
+
success = await extractFromJS(filePath, content, fileId);
|
|
19
|
+
break;
|
|
20
|
+
case 'ts':
|
|
21
|
+
case 'typescript':
|
|
22
|
+
log(`📘 Extracting TS code from ${filePath}`);
|
|
23
|
+
success = await extractFromTS(filePath, content, fileId);
|
|
24
|
+
break;
|
|
25
|
+
case 'java':
|
|
26
|
+
log(`⚠️ Java extraction not implemented for ${filePath}`);
|
|
27
|
+
await extractFromJava(filePath, content, fileId);
|
|
28
|
+
return false;
|
|
29
|
+
case 'xml':
|
|
30
|
+
log(`⚠️ XML extraction not implemented for ${filePath}`);
|
|
31
|
+
await extractFromXML(filePath, content, fileId);
|
|
32
|
+
return false;
|
|
33
|
+
default:
|
|
34
|
+
log(`⚠️ Unsupported file type: ${type}. Skipping ${filePath}`);
|
|
35
|
+
db.prepare(markFileAsSkippedByPath).run({ path: filePath });
|
|
36
|
+
return false;
|
|
19
37
|
}
|
|
20
|
-
|
|
21
|
-
log(`📘 Extracting TS functions from ${filePath}`);
|
|
22
|
-
return await extractFromTS(filePath, content, fileId);
|
|
23
|
-
}
|
|
24
|
-
if (type === 'java') {
|
|
25
|
-
log(`❌ Nothing extracted for ${filePath} due to missing implementation`);
|
|
26
|
-
await extractFromJava(filePath, content, fileId);
|
|
27
|
-
return false;
|
|
28
|
-
}
|
|
29
|
-
if (type === 'xml') {
|
|
30
|
-
log(`❌ Nothing extracted for ${filePath} due to missing implementation`);
|
|
31
|
-
await extractFromXML(filePath, content, fileId);
|
|
32
|
-
return false;
|
|
33
|
-
}
|
|
34
|
-
log(`⚠️ Unsupported file type: ${type} for function extraction. Skipping ${filePath}`);
|
|
35
|
-
db.prepare(markFileAsSkippedByPath).run({ path: filePath });
|
|
36
|
-
return false;
|
|
38
|
+
return success;
|
|
37
39
|
}
|
|
38
40
|
catch (error) {
|
|
39
|
-
log(`❌ Failed to extract
|
|
40
|
-
// Use the sqlTemplate to mark the file as 'failed'
|
|
41
|
+
log(`❌ Failed to extract from ${filePath}: ${error instanceof Error ? error.message : error}`);
|
|
41
42
|
db.prepare(markFileAsFailedTemplate).run({ id: fileId });
|
|
42
43
|
return false;
|
|
43
44
|
}
|
package/dist/db/functionIndex.js
CHANGED
|
@@ -5,7 +5,7 @@ import { extractFunctionsFromFile } from './functionExtractors/index.js';
|
|
|
5
5
|
* Extracts functions from file if language is supported.
|
|
6
6
|
* Returns true if functions were extracted, false otherwise.
|
|
7
7
|
*/
|
|
8
|
-
export async function
|
|
8
|
+
export async function indexCodeForFile(filePath, fileId) {
|
|
9
9
|
const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
|
|
10
10
|
const content = fs.readFileSync(filePath, 'utf-8');
|
|
11
11
|
return await extractFunctionsFromFile(normalizedPath, content, fileId);
|
package/dist/db/schema.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { getDbForRepo } from "./client.js";
|
|
2
2
|
export function initSchema() {
|
|
3
3
|
const db = getDbForRepo();
|
|
4
|
+
// --- Existing tables ---
|
|
4
5
|
db.exec(`
|
|
5
|
-
-- Create the files table
|
|
6
6
|
CREATE TABLE IF NOT EXISTS files (
|
|
7
7
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
8
8
|
path TEXT UNIQUE,
|
|
@@ -16,12 +16,9 @@ export function initSchema() {
|
|
|
16
16
|
functions_extracted_at TEXT
|
|
17
17
|
);
|
|
18
18
|
|
|
19
|
-
-- Create the full-text search table, auto-updated via content=files
|
|
20
19
|
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
|
|
21
20
|
USING fts5(filename, summary, path, content='files', content_rowid='id');
|
|
22
21
|
`);
|
|
23
|
-
console.log('✅ SQLite schema initialized with FTS5 auto-sync');
|
|
24
|
-
// Create additional tables for functions and function_calls
|
|
25
22
|
db.exec(`
|
|
26
23
|
CREATE TABLE IF NOT EXISTS functions (
|
|
27
24
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -41,5 +38,54 @@ export function initSchema() {
|
|
|
41
38
|
callee_name TEXT
|
|
42
39
|
);
|
|
43
40
|
`);
|
|
44
|
-
|
|
41
|
+
// --- KG-specific additions ---
|
|
42
|
+
// Classes table
|
|
43
|
+
db.exec(`
|
|
44
|
+
CREATE TABLE IF NOT EXISTS classes (
|
|
45
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
46
|
+
file_id INTEGER REFERENCES files(id),
|
|
47
|
+
name TEXT,
|
|
48
|
+
start_line INTEGER,
|
|
49
|
+
end_line INTEGER,
|
|
50
|
+
content TEXT,
|
|
51
|
+
embedding TEXT,
|
|
52
|
+
lang TEXT
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
CREATE INDEX IF NOT EXISTS idx_class_file_id ON classes(file_id);
|
|
56
|
+
`);
|
|
57
|
+
// Edges table (function/class/file relations)
|
|
58
|
+
db.exec(`
|
|
59
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
60
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
61
|
+
source_type TEXT NOT NULL, -- 'function' | 'class' | 'file'
|
|
62
|
+
source_id INTEGER NOT NULL,
|
|
63
|
+
target_type TEXT NOT NULL,
|
|
64
|
+
target_id INTEGER NOT NULL,
|
|
65
|
+
relation TEXT NOT NULL -- e.g., 'calls', 'inherits', 'contains'
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_type, source_id);
|
|
69
|
+
CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_type, target_id);
|
|
70
|
+
`);
|
|
71
|
+
// --- Improved tags setup ---
|
|
72
|
+
// Master tag table
|
|
73
|
+
db.exec(`
|
|
74
|
+
CREATE TABLE IF NOT EXISTS tags_master (
|
|
75
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
76
|
+
name TEXT UNIQUE NOT NULL
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
CREATE TABLE IF NOT EXISTS entity_tags (
|
|
80
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
81
|
+
entity_type TEXT NOT NULL, -- 'function' | 'class' | 'file'
|
|
82
|
+
entity_id INTEGER NOT NULL,
|
|
83
|
+
tag_id INTEGER NOT NULL REFERENCES tags_master(id),
|
|
84
|
+
UNIQUE(entity_type, entity_id, tag_id)
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
CREATE INDEX IF NOT EXISTS idx_entity_tags_entity ON entity_tags(entity_type, entity_id);
|
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_entity_tags_tag ON entity_tags(tag_id);
|
|
89
|
+
`);
|
|
90
|
+
console.log('✅ KG schema initialized (files, functions, classes, edges, tags)');
|
|
45
91
|
}
|
package/dist/index.js
CHANGED
|
@@ -193,9 +193,11 @@ const config = cmd.command('config').description('Manage SCAI configuration');
|
|
|
193
193
|
config
|
|
194
194
|
.command('set-model <model>')
|
|
195
195
|
.description('Set the model to use')
|
|
196
|
-
.
|
|
196
|
+
.option('-g, --global', 'Set the global default model instead of the active repo')
|
|
197
|
+
.action(async (model, options) => {
|
|
197
198
|
await withContext(async () => {
|
|
198
|
-
|
|
199
|
+
const scope = options.global ? 'global' : 'repo';
|
|
200
|
+
Config.setModel(model, scope);
|
|
199
201
|
Config.show();
|
|
200
202
|
});
|
|
201
203
|
});
|
|
@@ -337,14 +339,8 @@ cmd.addHelpText('after', `
|
|
|
337
339
|
💡 Use with caution and expect possible changes or instability.
|
|
338
340
|
`);
|
|
339
341
|
cmd.parse(process.argv);
|
|
340
|
-
const opts = cmd.opts();
|
|
341
|
-
if (opts.model)
|
|
342
|
-
Config.setModel(opts.model);
|
|
343
|
-
if (opts.lang)
|
|
344
|
-
Config.setLanguage(opts.lang);
|
|
345
342
|
async function withContext(action) {
|
|
346
343
|
const ok = await updateContext();
|
|
347
|
-
if (!ok)
|
|
348
|
-
process.exit(1);
|
|
344
|
+
//if (!ok) process.exit(1);
|
|
349
345
|
await action();
|
|
350
346
|
}
|
package/dist/modelSetup.js
CHANGED
|
@@ -9,7 +9,7 @@ import { readConfig, writeConfig } from './config.js';
|
|
|
9
9
|
import { CONFIG_PATH } from './constants.js';
|
|
10
10
|
// Constants
|
|
11
11
|
const MODEL_PORT = 11434;
|
|
12
|
-
const REQUIRED_MODELS = ['llama3'
|
|
12
|
+
const REQUIRED_MODELS = ['llama3:8b'];
|
|
13
13
|
const OLLAMA_URL = 'https://ollama.com/download';
|
|
14
14
|
const isYesMode = process.argv.includes('--yes') || process.env.SCAI_YES === '1';
|
|
15
15
|
let ollamaChecked = false;
|
|
@@ -30,16 +30,16 @@ export async function autoInitIfNeeded() {
|
|
|
30
30
|
}
|
|
31
31
|
}
|
|
32
32
|
}
|
|
33
|
-
// 🗨 Prompt user with
|
|
34
|
-
function promptUser(question) {
|
|
33
|
+
// 🗨 Prompt user with configurable timeout
|
|
34
|
+
function promptUser(question, timeout = 20000) {
|
|
35
35
|
if (isYesMode)
|
|
36
36
|
return Promise.resolve('y');
|
|
37
37
|
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
38
38
|
return new Promise((resolve) => {
|
|
39
39
|
const timer = setTimeout(() => {
|
|
40
40
|
rl.close();
|
|
41
|
-
resolve('');
|
|
42
|
-
},
|
|
41
|
+
resolve(''); // treat empty as "continue"
|
|
42
|
+
}, timeout);
|
|
43
43
|
rl.question(question, (answer) => {
|
|
44
44
|
clearTimeout(timer);
|
|
45
45
|
rl.close();
|
|
@@ -89,7 +89,7 @@ async function ensureOllamaRunning() {
|
|
|
89
89
|
windowsHide: true,
|
|
90
90
|
});
|
|
91
91
|
child.unref();
|
|
92
|
-
await new Promise((res) => setTimeout(res, 10000));
|
|
92
|
+
await new Promise((res) => setTimeout(res, 10000)); // give more time
|
|
93
93
|
if (await isOllamaRunning()) {
|
|
94
94
|
console.log(chalk.green('✅ Ollama started successfully.'));
|
|
95
95
|
ollamaAvailable = true;
|
|
@@ -102,23 +102,21 @@ async function ensureOllamaRunning() {
|
|
|
102
102
|
process.exit(1);
|
|
103
103
|
}
|
|
104
104
|
}
|
|
105
|
-
//
|
|
105
|
+
// Ollama not detected; prompt user but allow continuing
|
|
106
106
|
console.log(chalk.red('❌ Ollama is not installed or not in PATH.'));
|
|
107
107
|
console.log(chalk.yellow(`📦 Ollama is required to run local AI models.`));
|
|
108
|
-
const answer = await promptUser(
|
|
108
|
+
const answer = await promptUser(`🌐 Recommended model: ${REQUIRED_MODELS.join(', ')}\nOpen download page in browser? (y/N): `);
|
|
109
109
|
if (answer.toLowerCase() === 'y') {
|
|
110
110
|
openBrowser(OLLAMA_URL);
|
|
111
111
|
}
|
|
112
|
-
|
|
113
|
-
await promptUser('👉 Press Enter once Ollama is installed and ready: ');
|
|
114
|
-
// Retry once
|
|
112
|
+
await promptUser('⏳ Press Enter once Ollama is installed or to continue without it: ');
|
|
115
113
|
if (await isOllamaRunning()) {
|
|
116
114
|
console.log(chalk.green('✅ Ollama detected. Continuing...'));
|
|
117
115
|
ollamaAvailable = true;
|
|
118
116
|
}
|
|
119
117
|
else {
|
|
120
|
-
console.log(chalk.
|
|
121
|
-
|
|
118
|
+
console.log(chalk.yellow('⚠️ Ollama not running. Models will not be available until installed.'));
|
|
119
|
+
ollamaAvailable = false; // continue anyway
|
|
122
120
|
}
|
|
123
121
|
}
|
|
124
122
|
// 🧰 List installed models
|
|
@@ -134,7 +132,7 @@ async function getInstalledModels() {
|
|
|
134
132
|
return [];
|
|
135
133
|
}
|
|
136
134
|
}
|
|
137
|
-
// 📥
|
|
135
|
+
// 📥 Suggest required models but don’t block
|
|
138
136
|
async function ensureModelsDownloaded() {
|
|
139
137
|
if (!ollamaAvailable)
|
|
140
138
|
return;
|
|
@@ -144,11 +142,11 @@ async function ensureModelsDownloaded() {
|
|
|
144
142
|
console.log(chalk.green('✅ All required models are installed.'));
|
|
145
143
|
return;
|
|
146
144
|
}
|
|
147
|
-
console.log(chalk.yellow(`📦
|
|
148
|
-
const answer = await promptUser('⬇️
|
|
145
|
+
console.log(chalk.yellow(`📦 Suggested models: ${missing.join(', ')}`));
|
|
146
|
+
const answer = await promptUser('⬇️ Download them now? (y/N, continue anyway): ');
|
|
149
147
|
if (answer.toLowerCase() !== 'y') {
|
|
150
|
-
console.log(chalk.
|
|
151
|
-
|
|
148
|
+
console.log(chalk.yellow('⚠️ Continuing without installing models. You can install later via config.'));
|
|
149
|
+
return;
|
|
152
150
|
}
|
|
153
151
|
for (const model of missing) {
|
|
154
152
|
try {
|
|
@@ -157,8 +155,7 @@ async function ensureModelsDownloaded() {
|
|
|
157
155
|
console.log(chalk.green(`✅ Pulled ${model}`));
|
|
158
156
|
}
|
|
159
157
|
catch {
|
|
160
|
-
console.log(chalk.red(`❌ Failed to pull ${model}
|
|
161
|
-
process.exit(1);
|
|
158
|
+
console.log(chalk.red(`❌ Failed to pull ${model}, continuing...`));
|
|
162
159
|
}
|
|
163
160
|
}
|
|
164
161
|
}
|
|
@@ -29,25 +29,21 @@ function isTopOrBottomNoise(line) {
|
|
|
29
29
|
}
|
|
30
30
|
export const cleanupModule = {
|
|
31
31
|
name: 'cleanup',
|
|
32
|
-
description: 'Remove markdown fences
|
|
32
|
+
description: 'Remove markdown fences, fluff, and non-JSON lines with colored logging',
|
|
33
33
|
async run(input) {
|
|
34
|
-
// Normalize line endings to \n
|
|
34
|
+
// Normalize line endings to \n
|
|
35
35
|
let content = input.content.replace(/\r\n/g, '\n');
|
|
36
36
|
let lines = content.split('\n');
|
|
37
37
|
// --- CLEAN TOP ---
|
|
38
|
-
// Remove noise lines before the first triple tick or end
|
|
39
38
|
while (lines.length && (lines[0].trim() === '' || isTopOrBottomNoise(lines[0]))) {
|
|
40
39
|
if (/^```(?:\w+)?$/.test(lines[0].trim()))
|
|
41
|
-
break;
|
|
40
|
+
break;
|
|
42
41
|
console.log(chalk.red(`[cleanupModule] Removing noise from top:`), chalk.yellow(`"${lines[0].trim()}"`));
|
|
43
42
|
lines.shift();
|
|
44
43
|
}
|
|
45
|
-
// If opening fence found at top, find matching closing fence
|
|
46
44
|
if (lines.length && /^```(?:\w+)?$/.test(lines[0].trim())) {
|
|
47
45
|
console.log(chalk.red(`[cleanupModule] Found opening fenced block at top.`));
|
|
48
|
-
// Remove opening fence line
|
|
49
46
|
lines.shift();
|
|
50
|
-
// Find closing fence index
|
|
51
47
|
let closingIndex = -1;
|
|
52
48
|
for (let i = 0; i < lines.length; i++) {
|
|
53
49
|
if (/^```(?:\w+)?$/.test(lines[i].trim())) {
|
|
@@ -57,26 +53,22 @@ export const cleanupModule = {
|
|
|
57
53
|
}
|
|
58
54
|
if (closingIndex !== -1) {
|
|
59
55
|
console.log(chalk.red(`[cleanupModule] Found closing fenced block at line ${closingIndex + 1}, removing fence lines.`));
|
|
60
|
-
// Remove closing fence line
|
|
61
56
|
lines.splice(closingIndex, 1);
|
|
62
57
|
}
|
|
63
58
|
else {
|
|
64
59
|
console.log(chalk.yellow(`[cleanupModule] No closing fenced block found, only removed opening fence.`));
|
|
65
60
|
}
|
|
66
|
-
// NO removal of noise lines after fenced block here (to keep new comments intact)
|
|
67
61
|
}
|
|
68
62
|
// --- CLEAN BOTTOM ---
|
|
69
|
-
// If closing fence found at bottom, remove only that triple tick line
|
|
70
63
|
if (lines.length && /^```(?:\w+)?$/.test(lines[lines.length - 1].trim())) {
|
|
71
64
|
console.log(chalk.red(`[cleanupModule] Removing closing fenced block line at bottom.`));
|
|
72
65
|
lines.pop();
|
|
73
66
|
}
|
|
74
|
-
// Remove noise lines after closing fence (now bottom)
|
|
75
67
|
while (lines.length && (lines[lines.length - 1].trim() === '' || isTopOrBottomNoise(lines[lines.length - 1]))) {
|
|
76
68
|
console.log(chalk.red(`[cleanupModule] Removing noise from bottom after fenced block:`), chalk.yellow(`"${lines[lines.length - 1].trim()}"`));
|
|
77
69
|
lines.pop();
|
|
78
70
|
}
|
|
79
|
-
// ---
|
|
71
|
+
// --- REMOVE ANY LINGERING TRIPLE TICK LINES ANYWHERE ---
|
|
80
72
|
lines = lines.filter(line => {
|
|
81
73
|
const trimmed = line.trim();
|
|
82
74
|
if (/^```(?:\w+)?$/.test(trimmed)) {
|
|
@@ -85,6 +77,33 @@ export const cleanupModule = {
|
|
|
85
77
|
}
|
|
86
78
|
return true;
|
|
87
79
|
});
|
|
88
|
-
|
|
80
|
+
// --- FINAL CLEANUP: KEEP ONLY JSON LINES INSIDE BRACES ---
|
|
81
|
+
let jsonLines = [];
|
|
82
|
+
let braceDepth = 0;
|
|
83
|
+
let insideBraces = false;
|
|
84
|
+
for (let line of lines) {
|
|
85
|
+
const trimmed = line.trim();
|
|
86
|
+
// Detect start of JSON object/array
|
|
87
|
+
if (!insideBraces && (trimmed.startsWith('{') || trimmed.startsWith('['))) {
|
|
88
|
+
insideBraces = true;
|
|
89
|
+
}
|
|
90
|
+
if (insideBraces) {
|
|
91
|
+
// Track nested braces/brackets
|
|
92
|
+
for (const char of trimmed) {
|
|
93
|
+
if (char === '{' || char === '[')
|
|
94
|
+
braceDepth++;
|
|
95
|
+
if (char === '}' || char === ']')
|
|
96
|
+
braceDepth--;
|
|
97
|
+
}
|
|
98
|
+
// Skip lines that are clearly non-JSON inside braces
|
|
99
|
+
if (!trimmed.startsWith('//') && !/^\/\*/.test(trimmed) && trimmed !== '') {
|
|
100
|
+
jsonLines.push(line);
|
|
101
|
+
}
|
|
102
|
+
// Stop collecting after outermost brace closed
|
|
103
|
+
if (braceDepth === 0)
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return { content: jsonLines.join('\n').trim() };
|
|
89
108
|
}
|
|
90
109
|
};
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { Config } from '../../config.js';
|
|
2
|
+
import { generate } from '../../lib/generate.js';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { cleanupModule } from './cleanupModule.js';
|
|
5
|
+
export const kgModule = {
|
|
6
|
+
name: 'knowledge-graph',
|
|
7
|
+
description: 'Generates a knowledge graph of entities, tags, and relationships from file content.',
|
|
8
|
+
run: async (input, content) => {
|
|
9
|
+
const model = Config.getModel();
|
|
10
|
+
const ext = input.filepath ? path.extname(input.filepath).toLowerCase() : '';
|
|
11
|
+
const filename = input.filepath ? path.basename(input.filepath) : '';
|
|
12
|
+
const prompt = `
|
|
13
|
+
You are an assistant specialized in building knowledge graphs from code or text.
|
|
14
|
+
|
|
15
|
+
Your task is to extract structured information from the file content below.
|
|
16
|
+
|
|
17
|
+
File: ${filename}
|
|
18
|
+
Extension: ${ext}
|
|
19
|
+
|
|
20
|
+
📋 Instructions:
|
|
21
|
+
- Identify all entities (functions, classes, modules, or main concepts)
|
|
22
|
+
- For each entity, generate tags describing its characteristics, purpose, or category
|
|
23
|
+
- Identify relationships between entities (e.g., "uses", "extends", "calls")
|
|
24
|
+
- Return output in JSON format with the following structure:
|
|
25
|
+
|
|
26
|
+
{
|
|
27
|
+
"entities": [
|
|
28
|
+
{ "name": "EntityName", "type": "class|function|module|concept", "tags": ["tag1", "tag2"] }
|
|
29
|
+
],
|
|
30
|
+
"edges": [
|
|
31
|
+
{ "from": "EntityName1", "to": "EntityName2", "type": "relationship_type" }
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
Do NOT include raw content from the file. Only provide the structured JSON output.
|
|
36
|
+
|
|
37
|
+
--- FILE CONTENT START ---
|
|
38
|
+
${content}
|
|
39
|
+
--- FILE CONTENT END ---
|
|
40
|
+
`.trim();
|
|
41
|
+
const response = await generate({ content: prompt, filepath: input.filepath }, model);
|
|
42
|
+
try {
|
|
43
|
+
// Clean the model output first
|
|
44
|
+
const cleaned = await cleanupModule.run({ content: response.content });
|
|
45
|
+
console.log("Cleaned knowledge graph data: ", cleaned);
|
|
46
|
+
const jsonString = cleaned.content;
|
|
47
|
+
const parsed = JSON.parse(jsonString);
|
|
48
|
+
return parsed;
|
|
49
|
+
}
|
|
50
|
+
catch (err) {
|
|
51
|
+
console.warn('⚠️ Failed to parse KG JSON:', err);
|
|
52
|
+
return { entities: [], edges: [] }; // fallback
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
};
|
package/dist/scripts/dbcheck.js
CHANGED
|
@@ -224,3 +224,101 @@ const functionRows = db.prepare(`
|
|
|
224
224
|
LIMIT 50
|
|
225
225
|
`).all();
|
|
226
226
|
console.table(functionRows);
|
|
227
|
+
// === Class Table Stats ===
|
|
228
|
+
console.log('\n📊 Stats for Table: classes');
|
|
229
|
+
console.log('-------------------------------------------');
|
|
230
|
+
try {
|
|
231
|
+
const classCount = db.prepare(`SELECT COUNT(*) AS count FROM classes`).get().count;
|
|
232
|
+
const distinctClassFiles = db.prepare(`SELECT COUNT(DISTINCT file_id) AS count FROM classes`).get().count;
|
|
233
|
+
console.log(`🏷 Total classes: ${classCount}`);
|
|
234
|
+
console.log(`📂 Distinct files: ${distinctClassFiles}`);
|
|
235
|
+
}
|
|
236
|
+
catch (err) {
|
|
237
|
+
console.error('❌ Error accessing classes table:', err.message);
|
|
238
|
+
}
|
|
239
|
+
// === Example Classes ===
|
|
240
|
+
console.log('\n🧪 Example extracted classes:');
|
|
241
|
+
try {
|
|
242
|
+
const sampleClasses = db.prepare(`
|
|
243
|
+
SELECT id, name, file_id, start_line, end_line, substr(content, 1, 100) || '...' AS short_body
|
|
244
|
+
FROM classes
|
|
245
|
+
ORDER BY id DESC
|
|
246
|
+
LIMIT 5
|
|
247
|
+
`).all();
|
|
248
|
+
sampleClasses.forEach(cls => {
|
|
249
|
+
console.log(`🏷 ID: ${cls.id}`);
|
|
250
|
+
console.log(` Name: ${cls.name}`);
|
|
251
|
+
console.log(` File: ${cls.file_id}`);
|
|
252
|
+
console.log(` Lines: ${cls.start_line}-${cls.end_line}`);
|
|
253
|
+
console.log(` Body: ${cls.short_body}\n`);
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
catch (err) {
|
|
257
|
+
console.error('❌ Error printing class examples:', err.message);
|
|
258
|
+
}
|
|
259
|
+
// === Edge Table Stats ===
|
|
260
|
+
console.log('\n📊 Stats for Table: edges');
|
|
261
|
+
console.log('-------------------------------------------');
|
|
262
|
+
try {
|
|
263
|
+
const edgeCount = db.prepare(`SELECT COUNT(*) AS count FROM edges`).get().count;
|
|
264
|
+
const distinctRelations = db.prepare(`SELECT COUNT(DISTINCT relation) AS count FROM edges`).get().count;
|
|
265
|
+
console.log(`🔗 Total edges: ${edgeCount}`);
|
|
266
|
+
console.log(`🧩 Distinct relations: ${distinctRelations}`);
|
|
267
|
+
}
|
|
268
|
+
catch (err) {
|
|
269
|
+
console.error('❌ Error accessing edges table:', err.message);
|
|
270
|
+
}
|
|
271
|
+
// === Example Edges ===
|
|
272
|
+
console.log('\n🧪 Example edges:');
|
|
273
|
+
try {
|
|
274
|
+
const sampleEdges = db.prepare(`
|
|
275
|
+
SELECT id, source_id, target_id, relation
|
|
276
|
+
FROM edges
|
|
277
|
+
ORDER BY id DESC
|
|
278
|
+
LIMIT 10
|
|
279
|
+
`).all();
|
|
280
|
+
sampleEdges.forEach(e => {
|
|
281
|
+
console.log(`🔗 Edge ${e.id}: ${e.source_id} -[${e.relation}]-> ${e.target_id}`);
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
catch (err) {
|
|
285
|
+
console.error('❌ Error printing edge examples:', err.message);
|
|
286
|
+
}
|
|
287
|
+
// === Tags Master Stats ===
|
|
288
|
+
console.log('\n📊 Stats for Table: tags_master');
|
|
289
|
+
console.log('-------------------------------------------');
|
|
290
|
+
try {
|
|
291
|
+
const tagCount = db.prepare(`SELECT COUNT(*) AS count FROM tags_master`).get().count;
|
|
292
|
+
console.log(`🏷 Total tags: ${tagCount}`);
|
|
293
|
+
const sampleTags = db.prepare(`
|
|
294
|
+
SELECT id, name
|
|
295
|
+
FROM tags_master
|
|
296
|
+
ORDER BY id DESC
|
|
297
|
+
LIMIT 5
|
|
298
|
+
`).all();
|
|
299
|
+
sampleTags.forEach(tag => {
|
|
300
|
+
console.log(`🏷 Tag ${tag.id}: ${tag.name}`);
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
catch (err) {
|
|
304
|
+
console.error('❌ Error accessing tags_master table:', err.message);
|
|
305
|
+
}
|
|
306
|
+
// === Entity Tags Stats ===
|
|
307
|
+
console.log('\n📊 Stats for Table: entity_tags');
|
|
308
|
+
console.log('-------------------------------------------');
|
|
309
|
+
try {
|
|
310
|
+
const entityTagCount = db.prepare(`SELECT COUNT(*) AS count FROM entity_tags`).get().count;
|
|
311
|
+
console.log(`🔗 Total entity-tags: ${entityTagCount}`);
|
|
312
|
+
const sampleEntityTags = db.prepare(`
|
|
313
|
+
SELECT id, entity_type, entity_id, tag_id
|
|
314
|
+
FROM entity_tags
|
|
315
|
+
ORDER BY id DESC
|
|
316
|
+
LIMIT 10
|
|
317
|
+
`).all();
|
|
318
|
+
sampleEntityTags.forEach(et => {
|
|
319
|
+
console.log(`🔗 EntityTag ${et.id}: ${et.entity_type} ${et.entity_id} -> tag ${et.tag_id}`);
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
catch (err) {
|
|
323
|
+
console.error('❌ Error accessing entity_tags table:', err.message);
|
|
324
|
+
}
|
package/dist/utils/log.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
+
import { LOG_PATH } from "../constants.js";
|
|
1
2
|
import fs from 'fs';
|
|
2
|
-
import { LOG_PATH } from '../constants.js';
|
|
3
3
|
export function log(...args) {
|
|
4
4
|
const timestamp = new Date().toISOString();
|
|
5
5
|
const message = args.map(arg => typeof arg === 'string' ? arg : JSON.stringify(arg, null, 2)).join(' ');
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "scai",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.110",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"bin": {
|
|
6
6
|
"scai": "./dist/index.js"
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"workflow"
|
|
35
35
|
],
|
|
36
36
|
"scripts": {
|
|
37
|
-
"build": "rm -rfd dist && tsc && git add .",
|
|
37
|
+
"build": "rm -rfd dist && tsc && chmod +x dist/index.js && git add .",
|
|
38
38
|
"start": "node dist/index.js"
|
|
39
39
|
},
|
|
40
40
|
"dependencies": {
|