scai 0.1.109 → 0.1.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/CHANGELOG.md CHANGED
@@ -171,4 +171,22 @@ Type handling with the module pipeline
171
171
  ## 2025-09-02
172
172
 
173
173
  • Added test configuration for project and generated tests
174
- • Add runTestsModule and repairTestsModule for testing pipeline
174
+ • Add runTestsModule and repairTestsModule for testing pipeline
175
+
176
+ ## 2025-09-05
177
+
178
+ • Enable execution of files as executable files in the scripts
179
+ • Remove context failure if models not installed
180
+ • Add ability to set global model
181
+
182
+ ## 2025-09-08
183
+
184
+ ### Requires DB reset ('scai db reset' followed by 'scai index start')
185
+
186
+ 1. Improved daemon batch processing by skipping missing files, classifying unknown file types, and persisting entities/tags in the database.
187
+ 2. Invoke kgModule in daemonBatch to build knowledge graphs after indexing.
188
+ 3. Improved data modeling and extraction logic for functions and classes in TypeScript files.
189
+ 4. Updated Edge/Table schema for better query performance.
190
+ 5. Update package-lock.json to caniuse-lite@1.0.30001741.
191
+ 6. Enable execution of as an executable file in the scripts.
192
+ 7. Remove context failure if models not installed. Add ability to set global model.
@@ -23,9 +23,11 @@ export async function startDaemon() {
23
23
  const __filename = fileURLToPath(import.meta.url);
24
24
  const __dirname = path.dirname(__filename);
25
25
  const daemonWorkerPath = path.join(__dirname, '../daemon/daemonWorker.js');
26
+ const out = fsSync.openSync(LOG_PATH, 'a');
27
+ const err = fsSync.openSync(LOG_PATH, 'a');
26
28
  const child = spawn(process.execPath, [daemonWorkerPath], {
27
29
  detached: true,
28
- stdio: ['ignore', 'ignore', 'ignore'],
30
+ stdio: ['ignore', out, err], // stdout/stderr -> log file
29
31
  env: {
30
32
  ...process.env,
31
33
  BACKGROUND_MODE: 'true',
package/dist/config.js CHANGED
@@ -6,7 +6,7 @@ import { normalizePath } from './utils/contentUtils.js';
6
6
  import chalk from 'chalk';
7
7
  import { getHashedRepoKey } from './utils/repoKey.js';
8
8
  const defaultConfig = {
9
- model: 'codellama:13b',
9
+ model: 'llama3:8b',
10
10
  contextLength: 4096,
11
11
  language: 'ts',
12
12
  indexDir: '',
@@ -55,18 +55,23 @@ export const Config = {
55
55
  const repoCfg = cfg.repos?.[cfg.activeRepo ?? ''];
56
56
  return repoCfg?.model || cfg.model;
57
57
  },
58
- setModel(model) {
58
+ setModel(model, scope = 'repo') {
59
59
  const cfg = readConfig();
60
- const active = cfg.activeRepo;
61
- if (active) {
60
+ if (scope === 'repo') {
61
+ const active = cfg.activeRepo;
62
+ if (!active) {
63
+ console.error("❌ No active repo to set model for.");
64
+ return;
65
+ }
62
66
  cfg.repos[active] = { ...cfg.repos[active], model };
63
- writeConfig(cfg);
64
- console.log(`📦 Model set to: ${model}`);
67
+ console.log(`📦 Model set for repo '${active}': ${model}`);
65
68
  }
66
69
  else {
67
- writeConfig({ model });
68
- console.log(`📦 Default model set to: ${model}`);
70
+ // Set global default model
71
+ cfg.model = model;
72
+ console.log(`📦 Global default model set to: ${model}`);
69
73
  }
74
+ writeConfig(cfg);
70
75
  },
71
76
  getLanguage() {
72
77
  const cfg = readConfig();
package/dist/context.js CHANGED
@@ -5,12 +5,25 @@ import { getHashedRepoKey } from "./utils/repoKey.js";
5
5
  import { getDbForRepo, getDbPathForRepo } from "./db/client.js";
6
6
  import fs from "fs";
7
7
  import chalk from "chalk";
8
+ import { execSync } from "child_process";
9
+ function modelExists(model) {
10
+ try {
11
+ const output = execSync("ollama list", { encoding: "utf-8" });
12
+ return output
13
+ .split("\n")
14
+ .map(line => line.trim())
15
+ .filter(Boolean)
16
+ .some(line => line.toLowerCase().startsWith(model.toLowerCase() + " ") || line.toLowerCase() === model.toLowerCase());
17
+ }
18
+ catch (err) {
19
+ console.error(chalk.red("❌ Failed to check models with `ollama list`"));
20
+ return false;
21
+ }
22
+ }
8
23
  export async function updateContext() {
9
24
  const cwd = normalizePath(process.cwd());
10
25
  const cfg = readConfig();
11
- // 🔑 Find repoKey by matching indexDir to cwd
12
26
  let repoKey = Object.keys(cfg.repos || {}).find((key) => normalizePath(cfg.repos[key]?.indexDir || "") === cwd);
13
- // Initialize new repo config if not found
14
27
  let isNewRepo = false;
15
28
  if (!repoKey) {
16
29
  repoKey = getHashedRepoKey(cwd);
@@ -19,28 +32,23 @@ export async function updateContext() {
19
32
  cfg.repos[repoKey].indexDir = cwd;
20
33
  isNewRepo = true;
21
34
  }
22
- // Check if active repo has changed
23
35
  const activeRepoChanged = cfg.activeRepo !== repoKey;
24
- // Always set this as active repo
25
36
  cfg.activeRepo = repoKey;
26
37
  writeConfig(cfg);
27
38
  const repoCfg = cfg.repos[repoKey];
28
39
  let ok = true;
29
- // Only log detailed info if new repo or active repo changed
30
40
  if (isNewRepo || activeRepoChanged) {
31
41
  console.log(chalk.yellow("\n🔁 Updating context...\n"));
32
42
  console.log(`✅ Active repo: ${chalk.green(repoKey)}`);
33
43
  console.log(`✅ Index dir: ${chalk.cyan(repoCfg.indexDir || cwd)}`);
34
44
  }
35
- // GitHub token is optional
36
45
  const token = repoCfg.githubToken || cfg.githubToken;
37
46
  if (!token) {
38
- console.log(`ℹ️ No GitHub token found. You can set one with the: ${chalk.bold(chalk.bgGreen("scai auth set"))} command`);
47
+ console.log(`ℹ️ No GitHub token found. You can set one with: ${chalk.bold(chalk.bgGreen("scai auth set"))}`);
39
48
  }
40
49
  else if (isNewRepo || activeRepoChanged) {
41
50
  console.log(`✅ GitHub token present`);
42
51
  }
43
- // Ensure DB exists
44
52
  const dbPath = getDbPathForRepo();
45
53
  if (!fs.existsSync(dbPath)) {
46
54
  console.log(chalk.yellow(`📦 Initializing DB at ${dbPath}`));
@@ -48,13 +56,31 @@ export async function updateContext() {
48
56
  getDbForRepo();
49
57
  }
50
58
  catch {
51
- ok = false; // DB init failed
59
+ ok = false;
52
60
  }
53
61
  }
54
62
  else if (isNewRepo || activeRepoChanged) {
55
63
  console.log(chalk.green("✅ Database present"));
56
64
  }
57
- // Final context status
65
+ // 🧠 Model check
66
+ const model = cfg.model;
67
+ if (!model) {
68
+ console.log(chalk.red("❌ No model configured.") +
69
+ "\n➡️ Set one with: " +
70
+ chalk.bold(chalk.bgGreen("scai config set-model <model>")));
71
+ ok = false;
72
+ }
73
+ else if (!modelExists(model)) {
74
+ console.log(chalk.red(`❌ Model '${model}' not installed in Ollama.`) +
75
+ "\n➡️ Install with: " +
76
+ chalk.bold(chalk.yellow(`ollama pull ${model}`)) +
77
+ " or choose another with: " +
78
+ chalk.bold(chalk.yellow("scai config set-model <model>")));
79
+ ok = false;
80
+ }
81
+ else {
82
+ console.log(chalk.green(`✅ Model '${model}' available`));
83
+ }
58
84
  if (ok) {
59
85
  console.log(chalk.bold.green("\n✅ Context OK\n"));
60
86
  }
@@ -1,4 +1,4 @@
1
- import { indexFunctionsForFile } from '../db/functionIndex.js';
1
+ import { indexCodeForFile } from '../db/functionIndex.js';
2
2
  import fs from 'fs/promises';
3
3
  import fsSync from 'fs';
4
4
  import { generateEmbedding } from '../lib/generateEmbedding.js';
@@ -8,6 +8,7 @@ import { summaryModule } from '../pipeline/modules/summaryModule.js';
8
8
  import { classifyFile } from '../fileRules/classifyFile.js';
9
9
  import { getDbForRepo, getDbPathForRepo } from '../db/client.js';
10
10
  import { markFileAsSkippedByPath, selectUnprocessedFiles, updateFileWithSummaryAndEmbedding, } from '../db/sqlTemplates.js';
11
+ import { kgModule } from '../pipeline/modules/kgModule.js';
11
12
  const MAX_FILES_PER_BATCH = 5;
12
13
  /**
13
14
  * Acquires a lock on the database to ensure that only one daemon batch
@@ -32,7 +33,6 @@ async function lockDb() {
32
33
  */
33
34
  export async function runDaemonBatch() {
34
35
  log('🟡 Starting daemon batch...');
35
- // Selects up to MAX_FILES_PER_BATCH files that haven't been processed yet
36
36
  const db = getDbForRepo();
37
37
  const rows = db.prepare(selectUnprocessedFiles).all(MAX_FILES_PER_BATCH);
38
38
  if (rows.length === 0) {
@@ -42,13 +42,11 @@ export async function runDaemonBatch() {
42
42
  const release = await lockDb();
43
43
  for (const row of rows) {
44
44
  log(`📂 Processing file: ${row.path}`);
45
- // Skip if file is missing from the file system
46
45
  if (!fsSync.existsSync(row.path)) {
47
46
  log(`⚠️ Skipped missing file: ${row.path}`);
48
47
  db.prepare(markFileAsSkippedByPath).run({ path: row.path });
49
48
  continue;
50
49
  }
51
- // Skip if file is classified as something we don't process
52
50
  const classification = classifyFile(row.path);
53
51
  if (classification !== 'valid') {
54
52
  log(`⏭️ Skipping (${classification}): ${row.path}`);
@@ -57,24 +55,20 @@ export async function runDaemonBatch() {
57
55
  }
58
56
  try {
59
57
  const content = await fs.readFile(row.path, 'utf-8');
60
- // Determine whether the file needs to be re-summarized
61
58
  const needsResummary = !row.summary ||
62
59
  !row.indexed_at ||
63
60
  (row.last_modified && new Date(row.last_modified) > new Date(row.indexed_at));
64
61
  if (needsResummary) {
65
62
  log(`📝 Generating summary for ${row.path}...`);
66
- // Generate a summary using the summary pipeline
67
63
  const summaryResult = await summaryModule.run({ content, filepath: row.path });
68
64
  const summary = summaryResult?.summary?.trim() || null;
69
65
  let embedding = null;
70
- // Generate an embedding from the summary (if present)
71
66
  if (summary) {
72
67
  const vector = await generateEmbedding(summary);
73
68
  if (vector) {
74
69
  embedding = JSON.stringify(vector);
75
70
  }
76
71
  }
77
- // Update the file record with the new summary and embedding
78
72
  db.prepare(updateFileWithSummaryAndEmbedding).run({
79
73
  summary,
80
74
  embedding,
@@ -85,19 +79,79 @@ export async function runDaemonBatch() {
85
79
  else {
86
80
  log(`⚡ Skipped summary (up-to-date) for ${row.path}`);
87
81
  }
88
- // Extract top-level functions from the file and update the DB
89
- const extracted = await indexFunctionsForFile(row.path, row.id);
90
- if (extracted) {
91
- log(`✅ Function extraction complete for ${row.path}\n`);
82
+ const success = await indexCodeForFile(row.path, row.id);
83
+ if (success) {
84
+ log(`✅ Indexed code for ${row.path}`);
85
+ try {
86
+ log(`🔗 Building Knowledge Graph for ${row.path}...`);
87
+ const kgInput = {
88
+ fileId: row.id,
89
+ filepath: row.path,
90
+ summary: row.summary || undefined,
91
+ };
92
+ const kgResult = await kgModule.run(kgInput, content);
93
+ log(`✅ Knowledge Graph built for ${row.path}`);
94
+ log(`Entities: ${kgResult.entities.length}, Edges: ${kgResult.edges.length}`);
95
+ // Persist KG entities + tags only if there are any
96
+ if (kgResult.entities.length > 0) {
97
+ const insertTag = db.prepare(`
98
+ INSERT OR IGNORE INTO tags_master (name) VALUES (:name)
99
+ `);
100
+ const getTagId = db.prepare(`
101
+ SELECT id FROM tags_master WHERE name = :name
102
+ `);
103
+ const insertEntityTag = db.prepare(`
104
+ INSERT OR IGNORE INTO entity_tags (entity_type, entity_id, tag_id)
105
+ VALUES (:entity_type, :entity_id, :tag_id)
106
+ `);
107
+ for (const entity of kgResult.entities) {
108
+ // Skip entity if type or tags are missing
109
+ if (!entity.type || !Array.isArray(entity.tags) || entity.tags.length === 0) {
110
+ console.warn(`⚠ Skipping entity due to missing type or tags:`, entity);
111
+ continue;
112
+ }
113
+ for (const tag of entity.tags) {
114
+ // Skip empty or invalid tags
115
+ if (!tag || typeof tag !== 'string') {
116
+ console.warn(`⚠ Skipping invalid tag for entity ${entity.type}:`, tag);
117
+ continue;
118
+ }
119
+ try {
120
+ // ✅ Use :name in SQL and plain key in object
121
+ insertTag.run({ name: tag });
122
+ const tagRow = getTagId.get({ name: tag });
123
+ if (!tagRow) {
124
+ console.warn(`⚠ Could not find tag ID for: ${tag}`);
125
+ continue;
126
+ }
127
+ insertEntityTag.run({
128
+ entity_type: entity.type,
129
+ entity_id: row.id,
130
+ tag_id: tagRow.id,
131
+ });
132
+ }
133
+ catch (err) {
134
+ console.error(`❌ Failed to persist entity/tag:`, { entity, tag, error: err });
135
+ }
136
+ }
137
+ }
138
+ log(`✅ Persisted entities + tags for ${row.path}`);
139
+ }
140
+ else {
141
+ log(`⚠️ No entities found for ${row.path}, skipping DB inserts`);
142
+ }
143
+ }
144
+ catch (kgErr) {
145
+ log(`❌ KG build failed for ${row.path}: ${kgErr instanceof Error ? kgErr.message : String(kgErr)}`);
146
+ }
92
147
  }
93
148
  else {
94
- log(`ℹ️ No functions extracted for ${row.path}\n`);
149
+ log(`ℹ️ No code elements extracted for ${row.path}`);
95
150
  }
96
151
  }
97
152
  catch (err) {
98
153
  log(`❌ Failed: ${row.path}: ${err instanceof Error ? err.message : String(err)}\n`);
99
154
  }
100
- // Add a small delay to throttle processing
101
155
  await new Promise(resolve => setTimeout(resolve, 200));
102
156
  }
103
157
  await release();
@@ -31,9 +31,26 @@ export async function daemonWorker() {
31
31
  while (true) {
32
32
  try {
33
33
  log('🔄 Running daemon batch...');
34
- const didWork = await runDaemonBatch();
34
+ // Wrap the batch in debug
35
+ let didWork = false;
36
+ try {
37
+ log('🔹 Running runDaemonBatch()...');
38
+ didWork = await runDaemonBatch();
39
+ log('✅ runDaemonBatch() completed successfully');
40
+ }
41
+ catch (batchErr) {
42
+ log('🔥 Error inside runDaemonBatch():', batchErr);
43
+ }
35
44
  if (!didWork) {
36
- const queueEmpty = await isQueueEmpty();
45
+ let queueEmpty = false;
46
+ try {
47
+ log('🔹 Checking if queue is empty...');
48
+ queueEmpty = await isQueueEmpty();
49
+ log(`🔹 Queue empty status: ${queueEmpty}`);
50
+ }
51
+ catch (queueErr) {
52
+ log('🔥 Error checking queue status:', queueErr);
53
+ }
37
54
  if (queueEmpty) {
38
55
  log('🕊️ No work found. Idling...');
39
56
  await sleep(IDLE_SLEEP_MS * 3);
@@ -29,6 +29,7 @@ export async function extractFromJS(filePath, content, fileId) {
29
29
  locations: true,
30
30
  });
31
31
  const functions = [];
32
+ const classes = [];
32
33
  walkAncestor(ast, {
33
34
  FunctionDeclaration(node, ancestors) {
34
35
  const parent = ancestors[ancestors.length - 2];
@@ -60,31 +61,63 @@ export async function extractFromJS(filePath, content, fileId) {
60
61
  content: content.slice(node.start, node.end),
61
62
  });
62
63
  },
64
+ ClassDeclaration(node) {
65
+ const className = node.id?.name || `${path.basename(filePath)}:<anon-class>`;
66
+ classes.push({
67
+ name: className,
68
+ start_line: node.loc?.start.line ?? -1,
69
+ end_line: node.loc?.end.line ?? -1,
70
+ content: content.slice(node.start, node.end),
71
+ superClass: node.superClass?.name ?? null,
72
+ });
73
+ },
74
+ ClassExpression(node) {
75
+ const className = node.id?.name || `${path.basename(filePath)}:<anon-class>`;
76
+ classes.push({
77
+ name: className,
78
+ start_line: node.loc?.start.line ?? -1,
79
+ end_line: node.loc?.end.line ?? -1,
80
+ content: content.slice(node.start, node.end),
81
+ superClass: node.superClass?.name ?? null,
82
+ });
83
+ },
63
84
  });
64
- if (functions.length === 0) {
65
- log(`⚠️ No functions found in: ${filePath}`);
85
+ if (functions.length === 0 && classes.length === 0) {
86
+ log(`⚠️ No functions/classes found in: ${filePath}`);
66
87
  db.prepare(markFileAsSkippedTemplate).run({ id: fileId });
67
88
  return false;
68
89
  }
69
- log(`🔍 Found ${functions.length} functions in ${filePath}`);
90
+ log(`🔍 Found ${functions.length} functions and ${classes.length} classes in ${filePath}`);
91
+ // Insert functions
70
92
  for (const fn of functions) {
71
93
  const embedding = await generateEmbedding(fn.content);
72
- const result = db.prepare(`
94
+ const result = db
95
+ .prepare(`
73
96
  INSERT INTO functions (
74
97
  file_id, name, start_line, end_line, content, embedding, lang
75
98
  ) VALUES (
76
99
  @file_id, @name, @start_line, @end_line, @content, @embedding, @lang
77
100
  )
78
- `).run({
101
+ `)
102
+ .run({
79
103
  file_id: fileId,
80
104
  name: fn.name,
81
105
  start_line: fn.start_line,
82
106
  end_line: fn.end_line,
83
107
  content: fn.content,
84
108
  embedding: JSON.stringify(embedding),
85
- lang: 'js'
109
+ lang: 'js',
110
+ });
111
+ const functionId = result.lastInsertRowid;
112
+ // file → function edge
113
+ db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
114
+ VALUES (@source_type, @source_id, @target_type, @target_id, 'contains')`).run({
115
+ source_type: 'file',
116
+ source_id: fileId,
117
+ target_type: 'function',
118
+ target_id: functionId,
86
119
  });
87
- const callerId = result.lastInsertRowid;
120
+ // Walk inside function to find calls
88
121
  const fnAst = parse(fn.content, {
89
122
  ecmaVersion: 'latest',
90
123
  sourceType: 'module',
@@ -96,26 +129,73 @@ export async function extractFromJS(filePath, content, fileId) {
96
129
  if (node.callee?.type === 'Identifier' && node.callee.name) {
97
130
  calls.push({ calleeName: node.callee.name });
98
131
  }
99
- }
132
+ },
100
133
  });
101
134
  for (const call of calls) {
102
- db.prepare(`
103
- INSERT INTO function_calls (caller_id, callee_name)
104
- VALUES (@caller_id, @callee_name)
105
- `).run({
106
- caller_id: callerId,
107
- callee_name: call.calleeName
135
+ // Store name for later resolution
136
+ db.prepare(`INSERT INTO function_calls (caller_id, callee_name) VALUES (@caller_id, @callee_name)`).run({ caller_id: functionId, callee_name: call.calleeName });
137
+ // Optional unresolved edge
138
+ db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
139
+ VALUES (@source_type, @source_id, @target_type, @target_id, 'calls')`).run({
140
+ source_type: 'function',
141
+ source_id: functionId,
142
+ target_type: 'function',
143
+ target_id: 0, // unresolved callee
108
144
  });
109
145
  }
110
146
  log(`📌 Indexed function: ${fn.name} with ${calls.length} calls`);
111
147
  }
148
+ // Insert classes
149
+ for (const cls of classes) {
150
+ const embedding = await generateEmbedding(cls.content);
151
+ const result = db
152
+ .prepare(`
153
+ INSERT INTO classes (
154
+ file_id, name, start_line, end_line, content, embedding, lang
155
+ ) VALUES (
156
+ @file_id, @name, @start_line, @end_line, @content, @embedding, @lang
157
+ )
158
+ `)
159
+ .run({
160
+ file_id: fileId,
161
+ name: cls.name,
162
+ start_line: cls.start_line,
163
+ end_line: cls.end_line,
164
+ content: cls.content,
165
+ embedding: JSON.stringify(embedding),
166
+ lang: 'js',
167
+ });
168
+ const classId = result.lastInsertRowid;
169
+ // file → class edge
170
+ db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
171
+ VALUES (@source_type, @source_id, @target_type, @target_id, 'contains')`).run({
172
+ source_type: 'file',
173
+ source_id: fileId,
174
+ target_type: 'class',
175
+ target_id: classId,
176
+ });
177
+ // superclass → store unresolved reference
178
+ if (cls.superClass) {
179
+ db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
180
+ VALUES (@source_type, @source_id, @target_type, @target_id, 'inherits')`).run({
181
+ source_type: 'class',
182
+ source_id: classId,
183
+ target_type: 'class',
184
+ target_id: 0, // unresolved superclass
185
+ });
186
+ console.log(`🔗 Class ${cls.name} inherits ${cls.superClass} (edge stored for later resolution)`);
187
+ }
188
+ console.log(`🏷 Indexed class: ${cls.name} (id=${classId})`);
189
+ }
190
+ // Optional summary after extraction
191
+ console.log(`📊 Extraction summary for ${filePath}: ${functions.length} functions, ${classes.length} classes`);
112
192
  db.prepare(markFileAsExtractedTemplate).run({ id: fileId });
113
- log(`✅ Marked functions as extracted for ${filePath}`);
193
+ log(`✅ Marked functions/classes as extracted for ${filePath}`);
114
194
  return true;
115
195
  }
116
196
  catch (err) {
117
197
  log(`❌ Failed to extract from: ${filePath}`);
118
- log(` ↳ ${String(err.message)}`);
198
+ log(` ↳ ${err.message}`);
119
199
  db.prepare(markFileAsFailedTemplate).run({ id: fileId });
120
200
  return false;
121
201
  }
@@ -1,15 +1,16 @@
1
- import { Project, SyntaxKind } from 'ts-morph';
1
+ import { Project, SyntaxKind, } from 'ts-morph';
2
2
  import path from 'path';
3
3
  import { generateEmbedding } from '../../lib/generateEmbedding.js';
4
4
  import { log } from '../../utils/log.js';
5
5
  import { getDbForRepo } from '../client.js';
6
- import { markFileAsSkippedTemplate, markFileAsExtractedTemplate, markFileAsFailedTemplate } from '../sqlTemplates.js';
6
+ import { markFileAsSkippedTemplate, markFileAsExtractedTemplate, markFileAsFailedTemplate, } from '../sqlTemplates.js';
7
7
  export async function extractFromTS(filePath, content, fileId) {
8
8
  const db = getDbForRepo();
9
9
  try {
10
10
  const project = new Project({ useInMemoryFileSystem: true });
11
11
  const sourceFile = project.createSourceFile(filePath, content);
12
12
  const functions = [];
13
+ const classes = [];
13
14
  const allFuncs = [
14
15
  ...sourceFile.getDescendantsOfKind(SyntaxKind.FunctionDeclaration),
15
16
  ...sourceFile.getDescendantsOfKind(SyntaxKind.FunctionExpression),
@@ -22,45 +23,101 @@ export async function extractFromTS(filePath, content, fileId) {
22
23
  const code = fn.getText();
23
24
  functions.push({ name, start_line: start, end_line: end, content: code });
24
25
  }
25
- if (functions.length === 0) {
26
- log(`⚠️ No functions found in TS file: ${filePath}`);
26
+ const allClasses = [
27
+ ...sourceFile.getDescendantsOfKind(SyntaxKind.ClassDeclaration),
28
+ ...sourceFile.getDescendantsOfKind(SyntaxKind.ClassExpression),
29
+ ];
30
+ for (const cls of allClasses) {
31
+ const name = cls.getName() ?? `${path.basename(filePath)}:<anon-class>`;
32
+ const start = cls.getStartLineNumber();
33
+ const end = cls.getEndLineNumber();
34
+ const code = cls.getText();
35
+ const superClass = cls.getExtends()?.getText() ?? null;
36
+ classes.push({
37
+ name,
38
+ start_line: start,
39
+ end_line: end,
40
+ content: code,
41
+ superClass,
42
+ });
43
+ }
44
+ if (functions.length === 0 && classes.length === 0) {
45
+ log(`⚠️ No functions/classes found in TS file: ${filePath}`);
27
46
  db.prepare(markFileAsSkippedTemplate).run({ id: fileId });
28
47
  return false;
29
48
  }
30
- log(`🔍 Found ${functions.length} TypeScript functions in ${filePath}`);
49
+ log(`🔍 Found ${functions.length} functions and ${classes.length} classes in ${filePath}`);
50
+ // Insert functions
31
51
  for (const fn of functions) {
32
52
  const embedding = await generateEmbedding(fn.content);
33
- const result = db.prepare(`
53
+ const result = db
54
+ .prepare(`
34
55
  INSERT INTO functions (
35
56
  file_id, name, start_line, end_line, content, embedding, lang
36
57
  ) VALUES (
37
58
  @file_id, @name, @start_line, @end_line, @content, @embedding, @lang
38
59
  )
39
- `).run({
60
+ `)
61
+ .run({
40
62
  file_id: fileId,
41
63
  name: fn.name,
42
64
  start_line: fn.start_line,
43
65
  end_line: fn.end_line,
44
66
  content: fn.content,
45
67
  embedding: JSON.stringify(embedding),
46
- lang: 'ts'
68
+ lang: 'ts',
47
69
  });
48
- const callerId = result.lastInsertRowid;
49
- // Simplified call detection (no walking for now)
70
+ const functionId = result.lastInsertRowid;
71
+ // file function edge
72
+ db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
73
+ VALUES ('file', @source_id, 'function', @target_id, 'contains')`).run({ source_id: fileId, target_id: functionId });
74
+ // Simplified call detection (regex)
50
75
  const callMatches = fn.content.matchAll(/(\w+)\s*\(/g);
51
76
  for (const match of callMatches) {
52
- db.prepare(`
53
- INSERT INTO function_calls (caller_id, callee_name)
54
- VALUES (@caller_id, @callee_name)
55
- `).run({
56
- caller_id: callerId,
77
+ // Store call by name (resolution happens later)
78
+ db.prepare(`INSERT INTO function_calls (caller_id, callee_name)
79
+ VALUES (@caller_id, @callee_name)`).run({
80
+ caller_id: functionId,
57
81
  callee_name: match[1],
58
82
  });
59
83
  }
60
84
  log(`📌 Indexed TS function: ${fn.name}`);
61
85
  }
86
+ // Insert classes
87
+ for (const cls of classes) {
88
+ const embedding = await generateEmbedding(cls.content);
89
+ const result = db
90
+ .prepare(`
91
+ INSERT INTO classes (
92
+ file_id, name, start_line, end_line, content, embedding, lang
93
+ ) VALUES (
94
+ @file_id, @name, @start_line, @end_line, @content, @embedding, @lang
95
+ )
96
+ `)
97
+ .run({
98
+ file_id: fileId,
99
+ name: cls.name,
100
+ start_line: cls.start_line,
101
+ end_line: cls.end_line,
102
+ content: cls.content,
103
+ embedding: JSON.stringify(embedding),
104
+ lang: 'ts',
105
+ });
106
+ const classId = result.lastInsertRowid;
107
+ // file → class edge
108
+ db.prepare(`INSERT INTO edges (source_type, source_id, target_type, target_id, relation)
109
+ VALUES ('file', @source_id, 'class', @target_id, 'contains')`).run({ source_id: fileId, target_id: classId });
110
+ // superclass reference → store in helper table for later resolution
111
+ if (cls.superClass) {
112
+ db.prepare(`INSERT INTO class_inheritance (class_id, super_name)
113
+ VALUES (@class_id, @super_name)`).run({ class_id: classId, super_name: cls.superClass });
114
+ log(`🔗 Class ${cls.name} extends ${cls.superClass} (edge stored for later resolution)`);
115
+ }
116
+ log(`🏷 Indexed TS class: ${cls.name} (id=${classId})`);
117
+ }
118
+ log(`📊 Extraction summary for ${filePath}: ${functions.length} functions, ${classes.length} classes`);
62
119
  db.prepare(markFileAsExtractedTemplate).run({ id: fileId });
63
- log(`✅ Marked TS functions as extracted for ${filePath}`);
120
+ log(`✅ Marked TS functions/classes as extracted for ${filePath}`);
64
121
  return true;
65
122
  }
66
123
  catch (err) {
@@ -1,43 +1,44 @@
1
- import { log } from '../../utils/log.js';
2
- import { detectFileType } from '../../fileRules/detectFileType.js';
3
- import { extractFromJava } from './extractFromJava.js';
4
- import { extractFromJS } from './extractFromJs.js';
5
- import { extractFromXML } from './extractFromXML.js';
6
- import { getDbForRepo } from '../client.js';
7
- import { markFileAsFailedTemplate, markFileAsSkippedByPath } from '../sqlTemplates.js';
8
- import { extractFromTS } from './extractFromTs.js';
9
- /**
10
- * Detects file type and delegates to the appropriate extractor.
11
- */
1
+ import { getDbForRepo } from "../client.js";
2
+ import { markFileAsSkippedByPath, markFileAsFailedTemplate } from "../sqlTemplates.js";
3
+ import { extractFromJava } from "./extractFromJava.js";
4
+ import { extractFromJS } from "./extractFromJs.js";
5
+ import { extractFromTS } from "./extractFromTs.js";
6
+ import { extractFromXML } from "./extractFromXML.js";
7
+ import { detectFileType } from "../../fileRules/detectFileType.js";
8
+ import { log } from "../../utils/log.js";
12
9
  export async function extractFunctionsFromFile(filePath, content, fileId) {
13
10
  const type = detectFileType(filePath).trim().toLowerCase();
14
11
  const db = getDbForRepo();
15
12
  try {
16
- if (type === 'js' || type === 'javascript') {
17
- log(`✅ Attempting to extract JS functions from ${filePath}`);
18
- return await extractFromJS(filePath, content, fileId);
13
+ let success = false;
14
+ switch (type) {
15
+ case 'js':
16
+ case 'javascript':
17
+ log(`📄 Extracting JS code from ${filePath}`);
18
+ success = await extractFromJS(filePath, content, fileId);
19
+ break;
20
+ case 'ts':
21
+ case 'typescript':
22
+ log(`📘 Extracting TS code from ${filePath}`);
23
+ success = await extractFromTS(filePath, content, fileId);
24
+ break;
25
+ case 'java':
26
+ log(`⚠️ Java extraction not implemented for ${filePath}`);
27
+ await extractFromJava(filePath, content, fileId);
28
+ return false;
29
+ case 'xml':
30
+ log(`⚠️ XML extraction not implemented for ${filePath}`);
31
+ await extractFromXML(filePath, content, fileId);
32
+ return false;
33
+ default:
34
+ log(`⚠️ Unsupported file type: ${type}. Skipping ${filePath}`);
35
+ db.prepare(markFileAsSkippedByPath).run({ path: filePath });
36
+ return false;
19
37
  }
20
- if (type === 'ts' || type === 'typescript') {
21
- log(`📘 Extracting TS functions from ${filePath}`);
22
- return await extractFromTS(filePath, content, fileId);
23
- }
24
- if (type === 'java') {
25
- log(`❌ Nothing extracted for ${filePath} due to missing implementation`);
26
- await extractFromJava(filePath, content, fileId);
27
- return false;
28
- }
29
- if (type === 'xml') {
30
- log(`❌ Nothing extracted for ${filePath} due to missing implementation`);
31
- await extractFromXML(filePath, content, fileId);
32
- return false;
33
- }
34
- log(`⚠️ Unsupported file type: ${type} for function extraction. Skipping ${filePath}`);
35
- db.prepare(markFileAsSkippedByPath).run({ path: filePath });
36
- return false;
38
+ return success;
37
39
  }
38
40
  catch (error) {
39
- log(`❌ Failed to extract functions from ${filePath}: ${error instanceof Error ? error.message : error}`);
40
- // Use the sqlTemplate to mark the file as 'failed'
41
+ log(`❌ Failed to extract from ${filePath}: ${error instanceof Error ? error.message : error}`);
41
42
  db.prepare(markFileAsFailedTemplate).run({ id: fileId });
42
43
  return false;
43
44
  }
@@ -5,7 +5,7 @@ import { extractFunctionsFromFile } from './functionExtractors/index.js';
5
5
  * Extracts functions from file if language is supported.
6
6
  * Returns true if functions were extracted, false otherwise.
7
7
  */
8
- export async function indexFunctionsForFile(filePath, fileId) {
8
+ export async function indexCodeForFile(filePath, fileId) {
9
9
  const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
10
10
  const content = fs.readFileSync(filePath, 'utf-8');
11
11
  return await extractFunctionsFromFile(normalizedPath, content, fileId);
package/dist/db/schema.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import { getDbForRepo } from "./client.js";
2
2
  export function initSchema() {
3
3
  const db = getDbForRepo();
4
+ // --- Existing tables ---
4
5
  db.exec(`
5
- -- Create the files table
6
6
  CREATE TABLE IF NOT EXISTS files (
7
7
  id INTEGER PRIMARY KEY AUTOINCREMENT,
8
8
  path TEXT UNIQUE,
@@ -16,12 +16,9 @@ export function initSchema() {
16
16
  functions_extracted_at TEXT
17
17
  );
18
18
 
19
- -- Create the full-text search table, auto-updated via content=files
20
19
  CREATE VIRTUAL TABLE IF NOT EXISTS files_fts
21
20
  USING fts5(filename, summary, path, content='files', content_rowid='id');
22
21
  `);
23
- console.log('✅ SQLite schema initialized with FTS5 auto-sync');
24
- // Create additional tables for functions and function_calls
25
22
  db.exec(`
26
23
  CREATE TABLE IF NOT EXISTS functions (
27
24
  id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -41,5 +38,54 @@ export function initSchema() {
41
38
  callee_name TEXT
42
39
  );
43
40
  `);
44
- console.log('✅ Schema for functions and function_calls initialized');
41
+ // --- KG-specific additions ---
42
+ // Classes table
43
+ db.exec(`
44
+ CREATE TABLE IF NOT EXISTS classes (
45
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
46
+ file_id INTEGER REFERENCES files(id),
47
+ name TEXT,
48
+ start_line INTEGER,
49
+ end_line INTEGER,
50
+ content TEXT,
51
+ embedding TEXT,
52
+ lang TEXT
53
+ );
54
+
55
+ CREATE INDEX IF NOT EXISTS idx_class_file_id ON classes(file_id);
56
+ `);
57
+ // Edges table (function/class/file relations)
58
+ db.exec(`
59
+ CREATE TABLE IF NOT EXISTS edges (
60
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
61
+ source_type TEXT NOT NULL, -- 'function' | 'class' | 'file'
62
+ source_id INTEGER NOT NULL,
63
+ target_type TEXT NOT NULL,
64
+ target_id INTEGER NOT NULL,
65
+ relation TEXT NOT NULL -- e.g., 'calls', 'inherits', 'contains'
66
+ );
67
+
68
+ CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_type, source_id);
69
+ CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_type, target_id);
70
+ `);
71
+ // --- Improved tags setup ---
72
+ // Master tag table
73
+ db.exec(`
74
+ CREATE TABLE IF NOT EXISTS tags_master (
75
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
76
+ name TEXT UNIQUE NOT NULL
77
+ );
78
+
79
+ CREATE TABLE IF NOT EXISTS entity_tags (
80
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
81
+ entity_type TEXT NOT NULL, -- 'function' | 'class' | 'file'
82
+ entity_id INTEGER NOT NULL,
83
+ tag_id INTEGER NOT NULL REFERENCES tags_master(id),
84
+ UNIQUE(entity_type, entity_id, tag_id)
85
+ );
86
+
87
+ CREATE INDEX IF NOT EXISTS idx_entity_tags_entity ON entity_tags(entity_type, entity_id);
88
+ CREATE INDEX IF NOT EXISTS idx_entity_tags_tag ON entity_tags(tag_id);
89
+ `);
90
+ console.log('✅ KG schema initialized (files, functions, classes, edges, tags)');
45
91
  }
package/dist/index.js CHANGED
@@ -193,9 +193,11 @@ const config = cmd.command('config').description('Manage SCAI configuration');
193
193
  config
194
194
  .command('set-model <model>')
195
195
  .description('Set the model to use')
196
- .action(async (model) => {
196
+ .option('-g, --global', 'Set the global default model instead of the active repo')
197
+ .action(async (model, options) => {
197
198
  await withContext(async () => {
198
- Config.setModel(model);
199
+ const scope = options.global ? 'global' : 'repo';
200
+ Config.setModel(model, scope);
199
201
  Config.show();
200
202
  });
201
203
  });
@@ -337,14 +339,8 @@ cmd.addHelpText('after', `
337
339
  💡 Use with caution and expect possible changes or instability.
338
340
  `);
339
341
  cmd.parse(process.argv);
340
- const opts = cmd.opts();
341
- if (opts.model)
342
- Config.setModel(opts.model);
343
- if (opts.lang)
344
- Config.setLanguage(opts.lang);
345
342
  async function withContext(action) {
346
343
  const ok = await updateContext();
347
- if (!ok)
348
- process.exit(1);
344
+ //if (!ok) process.exit(1);
349
345
  await action();
350
346
  }
@@ -9,7 +9,7 @@ import { readConfig, writeConfig } from './config.js';
9
9
  import { CONFIG_PATH } from './constants.js';
10
10
  // Constants
11
11
  const MODEL_PORT = 11434;
12
- const REQUIRED_MODELS = ['llama3', 'mistral'];
12
+ const REQUIRED_MODELS = ['llama3:8b'];
13
13
  const OLLAMA_URL = 'https://ollama.com/download';
14
14
  const isYesMode = process.argv.includes('--yes') || process.env.SCAI_YES === '1';
15
15
  let ollamaChecked = false;
@@ -30,16 +30,16 @@ export async function autoInitIfNeeded() {
30
30
  }
31
31
  }
32
32
  }
33
- // 🗨 Prompt user with 10-second timeout
34
- function promptUser(question) {
33
+ // 🗨 Prompt user with configurable timeout
34
+ function promptUser(question, timeout = 20000) {
35
35
  if (isYesMode)
36
36
  return Promise.resolve('y');
37
37
  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
38
38
  return new Promise((resolve) => {
39
39
  const timer = setTimeout(() => {
40
40
  rl.close();
41
- resolve('');
42
- }, 10000); // 10 second timeout
41
+ resolve(''); // treat empty as "continue"
42
+ }, timeout);
43
43
  rl.question(question, (answer) => {
44
44
  clearTimeout(timer);
45
45
  rl.close();
@@ -89,7 +89,7 @@ async function ensureOllamaRunning() {
89
89
  windowsHide: true,
90
90
  });
91
91
  child.unref();
92
- await new Promise((res) => setTimeout(res, 10000));
92
+ await new Promise((res) => setTimeout(res, 10000)); // give more time
93
93
  if (await isOllamaRunning()) {
94
94
  console.log(chalk.green('✅ Ollama started successfully.'));
95
95
  ollamaAvailable = true;
@@ -102,23 +102,21 @@ async function ensureOllamaRunning() {
102
102
  process.exit(1);
103
103
  }
104
104
  }
105
- // If we get here, Ollama likely isn't installed
105
+ // Ollama not detected; prompt user but allow continuing
106
106
  console.log(chalk.red('❌ Ollama is not installed or not in PATH.'));
107
107
  console.log(chalk.yellow(`📦 Ollama is required to run local AI models.`));
108
- const answer = await promptUser('🌐 Would you like to open the download page in your browser? (y/N): ');
108
+ const answer = await promptUser(`🌐 Recommended model: ${REQUIRED_MODELS.join(', ')}\nOpen download page in browser? (y/N): `);
109
109
  if (answer.toLowerCase() === 'y') {
110
110
  openBrowser(OLLAMA_URL);
111
111
  }
112
- console.log(chalk.yellow('⏳ Waiting for you to install Ollama and press Enter to continue...'));
113
- await promptUser('👉 Press Enter once Ollama is installed and ready: ');
114
- // Retry once
112
+ await promptUser('⏳ Press Enter once Ollama is installed or to continue without it: ');
115
113
  if (await isOllamaRunning()) {
116
114
  console.log(chalk.green('✅ Ollama detected. Continuing...'));
117
115
  ollamaAvailable = true;
118
116
  }
119
117
  else {
120
- console.log(chalk.red('Ollama still not detected. Please check your installation.'));
121
- process.exit(1);
118
+ console.log(chalk.yellow('⚠️ Ollama not running. Models will not be available until installed.'));
119
+ ollamaAvailable = false; // continue anyway
122
120
  }
123
121
  }
124
122
  // 🧰 List installed models
@@ -134,7 +132,7 @@ async function getInstalledModels() {
134
132
  return [];
135
133
  }
136
134
  }
137
- // 📥 Download missing models
135
+ // 📥 Suggest required models but don’t block
138
136
  async function ensureModelsDownloaded() {
139
137
  if (!ollamaAvailable)
140
138
  return;
@@ -144,11 +142,11 @@ async function ensureModelsDownloaded() {
144
142
  console.log(chalk.green('✅ All required models are installed.'));
145
143
  return;
146
144
  }
147
- console.log(chalk.yellow(`📦 Missing models: ${missing.join(', ')}`));
148
- const answer = await promptUser('⬇️ Do you want to download them now? (y/N): ');
145
+ console.log(chalk.yellow(`📦 Suggested models: ${missing.join(', ')}`));
146
+ const answer = await promptUser('⬇️ Download them now? (y/N, continue anyway): ');
149
147
  if (answer.toLowerCase() !== 'y') {
150
- console.log(chalk.red('🚫 Aborting due to missing models.'));
151
- process.exit(1);
148
+ console.log(chalk.yellow('⚠️ Continuing without installing models. You can install later via config.'));
149
+ return;
152
150
  }
153
151
  for (const model of missing) {
154
152
  try {
@@ -157,8 +155,7 @@ async function ensureModelsDownloaded() {
157
155
  console.log(chalk.green(`✅ Pulled ${model}`));
158
156
  }
159
157
  catch {
160
- console.log(chalk.red(`❌ Failed to pull ${model}.`));
161
- process.exit(1);
158
+ console.log(chalk.red(`❌ Failed to pull ${model}, continuing...`));
162
159
  }
163
160
  }
164
161
  }
@@ -29,25 +29,21 @@ function isTopOrBottomNoise(line) {
29
29
  }
30
30
  export const cleanupModule = {
31
31
  name: 'cleanup',
32
- description: 'Remove markdown fences and fluff from top/bottom of each chunk with colored logging',
32
+ description: 'Remove markdown fences, fluff, and non-JSON lines with colored logging',
33
33
  async run(input) {
34
- // Normalize line endings to \n to avoid issues with \r\n
34
+ // Normalize line endings to \n
35
35
  let content = input.content.replace(/\r\n/g, '\n');
36
36
  let lines = content.split('\n');
37
37
  // --- CLEAN TOP ---
38
- // Remove noise lines before the first triple tick or end
39
38
  while (lines.length && (lines[0].trim() === '' || isTopOrBottomNoise(lines[0]))) {
40
39
  if (/^```(?:\w+)?$/.test(lines[0].trim()))
41
- break; // Stop if opening fence found
40
+ break;
42
41
  console.log(chalk.red(`[cleanupModule] Removing noise from top:`), chalk.yellow(`"${lines[0].trim()}"`));
43
42
  lines.shift();
44
43
  }
45
- // If opening fence found at top, find matching closing fence
46
44
  if (lines.length && /^```(?:\w+)?$/.test(lines[0].trim())) {
47
45
  console.log(chalk.red(`[cleanupModule] Found opening fenced block at top.`));
48
- // Remove opening fence line
49
46
  lines.shift();
50
- // Find closing fence index
51
47
  let closingIndex = -1;
52
48
  for (let i = 0; i < lines.length; i++) {
53
49
  if (/^```(?:\w+)?$/.test(lines[i].trim())) {
@@ -57,26 +53,22 @@ export const cleanupModule = {
57
53
  }
58
54
  if (closingIndex !== -1) {
59
55
  console.log(chalk.red(`[cleanupModule] Found closing fenced block at line ${closingIndex + 1}, removing fence lines.`));
60
- // Remove closing fence line
61
56
  lines.splice(closingIndex, 1);
62
57
  }
63
58
  else {
64
59
  console.log(chalk.yellow(`[cleanupModule] No closing fenced block found, only removed opening fence.`));
65
60
  }
66
- // NO removal of noise lines after fenced block here (to keep new comments intact)
67
61
  }
68
62
  // --- CLEAN BOTTOM ---
69
- // If closing fence found at bottom, remove only that triple tick line
70
63
  if (lines.length && /^```(?:\w+)?$/.test(lines[lines.length - 1].trim())) {
71
64
  console.log(chalk.red(`[cleanupModule] Removing closing fenced block line at bottom.`));
72
65
  lines.pop();
73
66
  }
74
- // Remove noise lines after closing fence (now bottom)
75
67
  while (lines.length && (lines[lines.length - 1].trim() === '' || isTopOrBottomNoise(lines[lines.length - 1]))) {
76
68
  console.log(chalk.red(`[cleanupModule] Removing noise from bottom after fenced block:`), chalk.yellow(`"${lines[lines.length - 1].trim()}"`));
77
69
  lines.pop();
78
70
  }
79
- // --- FINAL CLEANUP: REMOVE ANY LINGERING TRIPLE TICK LINES ANYWHERE ---
71
+ // --- REMOVE ANY LINGERING TRIPLE TICK LINES ANYWHERE ---
80
72
  lines = lines.filter(line => {
81
73
  const trimmed = line.trim();
82
74
  if (/^```(?:\w+)?$/.test(trimmed)) {
@@ -85,6 +77,33 @@ export const cleanupModule = {
85
77
  }
86
78
  return true;
87
79
  });
88
- return { content: lines.join('\n').trim() };
80
+ // --- FINAL CLEANUP: KEEP ONLY JSON LINES INSIDE BRACES ---
81
+ let jsonLines = [];
82
+ let braceDepth = 0;
83
+ let insideBraces = false;
84
+ for (let line of lines) {
85
+ const trimmed = line.trim();
86
+ // Detect start of JSON object/array
87
+ if (!insideBraces && (trimmed.startsWith('{') || trimmed.startsWith('['))) {
88
+ insideBraces = true;
89
+ }
90
+ if (insideBraces) {
91
+ // Track nested braces/brackets
92
+ for (const char of trimmed) {
93
+ if (char === '{' || char === '[')
94
+ braceDepth++;
95
+ if (char === '}' || char === ']')
96
+ braceDepth--;
97
+ }
98
+ // Skip lines that are clearly non-JSON inside braces
99
+ if (!trimmed.startsWith('//') && !/^\/\*/.test(trimmed) && trimmed !== '') {
100
+ jsonLines.push(line);
101
+ }
102
+ // Stop collecting after outermost brace closed
103
+ if (braceDepth === 0)
104
+ break;
105
+ }
106
+ }
107
+ return { content: jsonLines.join('\n').trim() };
89
108
  }
90
109
  };
@@ -0,0 +1,55 @@
1
+ import { Config } from '../../config.js';
2
+ import { generate } from '../../lib/generate.js';
3
+ import path from 'path';
4
+ import { cleanupModule } from './cleanupModule.js';
5
+ export const kgModule = {
6
+ name: 'knowledge-graph',
7
+ description: 'Generates a knowledge graph of entities, tags, and relationships from file content.',
8
+ run: async (input, content) => {
9
+ const model = Config.getModel();
10
+ const ext = input.filepath ? path.extname(input.filepath).toLowerCase() : '';
11
+ const filename = input.filepath ? path.basename(input.filepath) : '';
12
+ const prompt = `
13
+ You are an assistant specialized in building knowledge graphs from code or text.
14
+
15
+ Your task is to extract structured information from the file content below.
16
+
17
+ File: ${filename}
18
+ Extension: ${ext}
19
+
20
+ 📋 Instructions:
21
+ - Identify all entities (functions, classes, modules, or main concepts)
22
+ - For each entity, generate tags describing its characteristics, purpose, or category
23
+ - Identify relationships between entities (e.g., "uses", "extends", "calls")
24
+ - Return output in JSON format with the following structure:
25
+
26
+ {
27
+ "entities": [
28
+ { "name": "EntityName", "type": "class|function|module|concept", "tags": ["tag1", "tag2"] }
29
+ ],
30
+ "edges": [
31
+ { "from": "EntityName1", "to": "EntityName2", "type": "relationship_type" }
32
+ ]
33
+ }
34
+
35
+ Do NOT include raw content from the file. Only provide the structured JSON output.
36
+
37
+ --- FILE CONTENT START ---
38
+ ${content}
39
+ --- FILE CONTENT END ---
40
+ `.trim();
41
+ const response = await generate({ content: prompt, filepath: input.filepath }, model);
42
+ try {
43
+ // Clean the model output first
44
+ const cleaned = await cleanupModule.run({ content: response.content });
45
+ console.log("Cleaned knowledge graph data: ", cleaned);
46
+ const jsonString = cleaned.content;
47
+ const parsed = JSON.parse(jsonString);
48
+ return parsed;
49
+ }
50
+ catch (err) {
51
+ console.warn('⚠️ Failed to parse KG JSON:', err);
52
+ return { entities: [], edges: [] }; // fallback
53
+ }
54
+ }
55
+ };
@@ -224,3 +224,101 @@ const functionRows = db.prepare(`
224
224
  LIMIT 50
225
225
  `).all();
226
226
  console.table(functionRows);
227
+ // === Class Table Stats ===
228
+ console.log('\n📊 Stats for Table: classes');
229
+ console.log('-------------------------------------------');
230
+ try {
231
+ const classCount = db.prepare(`SELECT COUNT(*) AS count FROM classes`).get().count;
232
+ const distinctClassFiles = db.prepare(`SELECT COUNT(DISTINCT file_id) AS count FROM classes`).get().count;
233
+ console.log(`🏷 Total classes: ${classCount}`);
234
+ console.log(`📂 Distinct files: ${distinctClassFiles}`);
235
+ }
236
+ catch (err) {
237
+ console.error('❌ Error accessing classes table:', err.message);
238
+ }
239
+ // === Example Classes ===
240
+ console.log('\n🧪 Example extracted classes:');
241
+ try {
242
+ const sampleClasses = db.prepare(`
243
+ SELECT id, name, file_id, start_line, end_line, substr(content, 1, 100) || '...' AS short_body
244
+ FROM classes
245
+ ORDER BY id DESC
246
+ LIMIT 5
247
+ `).all();
248
+ sampleClasses.forEach(cls => {
249
+ console.log(`🏷 ID: ${cls.id}`);
250
+ console.log(` Name: ${cls.name}`);
251
+ console.log(` File: ${cls.file_id}`);
252
+ console.log(` Lines: ${cls.start_line}-${cls.end_line}`);
253
+ console.log(` Body: ${cls.short_body}\n`);
254
+ });
255
+ }
256
+ catch (err) {
257
+ console.error('❌ Error printing class examples:', err.message);
258
+ }
259
+ // === Edge Table Stats ===
260
+ console.log('\n📊 Stats for Table: edges');
261
+ console.log('-------------------------------------------');
262
+ try {
263
+ const edgeCount = db.prepare(`SELECT COUNT(*) AS count FROM edges`).get().count;
264
+ const distinctRelations = db.prepare(`SELECT COUNT(DISTINCT relation) AS count FROM edges`).get().count;
265
+ console.log(`🔗 Total edges: ${edgeCount}`);
266
+ console.log(`🧩 Distinct relations: ${distinctRelations}`);
267
+ }
268
+ catch (err) {
269
+ console.error('❌ Error accessing edges table:', err.message);
270
+ }
271
+ // === Example Edges ===
272
+ console.log('\n🧪 Example edges:');
273
+ try {
274
+ const sampleEdges = db.prepare(`
275
+ SELECT id, source_id, target_id, relation
276
+ FROM edges
277
+ ORDER BY id DESC
278
+ LIMIT 10
279
+ `).all();
280
+ sampleEdges.forEach(e => {
281
+ console.log(`🔗 Edge ${e.id}: ${e.source_id} -[${e.relation}]-> ${e.target_id}`);
282
+ });
283
+ }
284
+ catch (err) {
285
+ console.error('❌ Error printing edge examples:', err.message);
286
+ }
287
+ // === Tags Master Stats ===
288
+ console.log('\n📊 Stats for Table: tags_master');
289
+ console.log('-------------------------------------------');
290
+ try {
291
+ const tagCount = db.prepare(`SELECT COUNT(*) AS count FROM tags_master`).get().count;
292
+ console.log(`🏷 Total tags: ${tagCount}`);
293
+ const sampleTags = db.prepare(`
294
+ SELECT id, name
295
+ FROM tags_master
296
+ ORDER BY id DESC
297
+ LIMIT 5
298
+ `).all();
299
+ sampleTags.forEach(tag => {
300
+ console.log(`🏷 Tag ${tag.id}: ${tag.name}`);
301
+ });
302
+ }
303
+ catch (err) {
304
+ console.error('❌ Error accessing tags_master table:', err.message);
305
+ }
306
+ // === Entity Tags Stats ===
307
+ console.log('\n📊 Stats for Table: entity_tags');
308
+ console.log('-------------------------------------------');
309
+ try {
310
+ const entityTagCount = db.prepare(`SELECT COUNT(*) AS count FROM entity_tags`).get().count;
311
+ console.log(`🔗 Total entity-tags: ${entityTagCount}`);
312
+ const sampleEntityTags = db.prepare(`
313
+ SELECT id, entity_type, entity_id, tag_id
314
+ FROM entity_tags
315
+ ORDER BY id DESC
316
+ LIMIT 10
317
+ `).all();
318
+ sampleEntityTags.forEach(et => {
319
+ console.log(`🔗 EntityTag ${et.id}: ${et.entity_type} ${et.entity_id} -> tag ${et.tag_id}`);
320
+ });
321
+ }
322
+ catch (err) {
323
+ console.error('❌ Error accessing entity_tags table:', err.message);
324
+ }
package/dist/utils/log.js CHANGED
@@ -1,5 +1,5 @@
1
+ import { LOG_PATH } from "../constants.js";
1
2
  import fs from 'fs';
2
- import { LOG_PATH } from '../constants.js';
3
3
  export function log(...args) {
4
4
  const timestamp = new Date().toISOString();
5
5
  const message = args.map(arg => typeof arg === 'string' ? arg : JSON.stringify(arg, null, 2)).join(' ');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scai",
3
- "version": "0.1.109",
3
+ "version": "0.1.110",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "scai": "./dist/index.js"
@@ -34,7 +34,7 @@
34
34
  "workflow"
35
35
  ],
36
36
  "scripts": {
37
- "build": "rm -rfd dist && tsc && git add .",
37
+ "build": "rm -rfd dist && tsc && chmod +x dist/index.js && git add .",
38
38
  "start": "node dist/index.js"
39
39
  },
40
40
  "dependencies": {