@optave/codegraph 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +311 -0
- package/grammars/tree-sitter-hcl.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-tsx.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +69 -0
- package/src/builder.js +547 -0
- package/src/cli.js +224 -0
- package/src/config.js +55 -0
- package/src/constants.js +28 -0
- package/src/cycles.js +104 -0
- package/src/db.js +117 -0
- package/src/embedder.js +330 -0
- package/src/export.js +138 -0
- package/src/index.js +39 -0
- package/src/logger.js +20 -0
- package/src/mcp.js +139 -0
- package/src/parser.js +573 -0
- package/src/queries.js +616 -0
- package/src/watcher.js +213 -0
package/src/embedder.js
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
import fs from "fs";
|
|
2
|
+
import path from "path";
|
|
3
|
+
|
|
4
|
+
import Database from 'better-sqlite3';
|
|
5
|
+
import { findDbPath, openReadonlyOrFail } from './db.js';
|
|
6
|
+
import { warn, debug } from './logger.js';
|
|
7
|
+
|
|
8
|
+
// Lazy-load transformers (heavy, optional module)
|
|
9
|
+
let pipeline = null;
|
|
10
|
+
let cos_sim = null;
|
|
11
|
+
let extractor = null;
|
|
12
|
+
let activeModel = null;
|
|
13
|
+
|
|
14
|
+
export const MODELS = {
|
|
15
|
+
'minilm': {
|
|
16
|
+
name: 'Xenova/all-MiniLM-L6-v2',
|
|
17
|
+
dim: 384,
|
|
18
|
+
desc: 'Smallest, fastest (~23MB). General text.',
|
|
19
|
+
quantized: true
|
|
20
|
+
},
|
|
21
|
+
'jina-small': {
|
|
22
|
+
name: 'Xenova/jina-embeddings-v2-small-en',
|
|
23
|
+
dim: 512,
|
|
24
|
+
desc: 'Small, good quality (~33MB). General text.',
|
|
25
|
+
quantized: false
|
|
26
|
+
},
|
|
27
|
+
'jina-base': {
|
|
28
|
+
name: 'Xenova/jina-embeddings-v2-base-en',
|
|
29
|
+
dim: 768,
|
|
30
|
+
desc: 'Good quality (~137MB). General text, 8192 token context.',
|
|
31
|
+
quantized: false
|
|
32
|
+
},
|
|
33
|
+
'nomic': {
|
|
34
|
+
name: 'Xenova/nomic-embed-text-v1',
|
|
35
|
+
dim: 768,
|
|
36
|
+
desc: 'Best local quality (~137MB). 8192 context, beats OpenAI ada-002.',
|
|
37
|
+
quantized: false
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
export const DEFAULT_MODEL = 'minilm';
|
|
42
|
+
const BATCH_SIZE_MAP = { 'minilm': 32, 'jina-small': 16, 'jina-base': 8, 'nomic': 8 };
|
|
43
|
+
const DEFAULT_BATCH_SIZE = 32;
|
|
44
|
+
|
|
45
|
+
function getModelConfig(modelKey) {
|
|
46
|
+
const key = modelKey || DEFAULT_MODEL;
|
|
47
|
+
const config = MODELS[key];
|
|
48
|
+
if (!config) {
|
|
49
|
+
console.error(`Unknown model: ${key}. Available: ${Object.keys(MODELS).join(', ')}`);
|
|
50
|
+
process.exit(1);
|
|
51
|
+
}
|
|
52
|
+
return config;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Lazy-load @huggingface/transformers.
|
|
57
|
+
* This is an optional dependency — gives a clear error if not installed.
|
|
58
|
+
*/
|
|
59
|
+
async function loadTransformers() {
|
|
60
|
+
try {
|
|
61
|
+
return await import('@huggingface/transformers');
|
|
62
|
+
} catch {
|
|
63
|
+
console.error(
|
|
64
|
+
'Semantic search requires @huggingface/transformers.\n' +
|
|
65
|
+
'Install it with: npm install @huggingface/transformers'
|
|
66
|
+
);
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function loadModel(modelKey) {
|
|
72
|
+
const config = getModelConfig(modelKey);
|
|
73
|
+
|
|
74
|
+
if (extractor && activeModel === config.name) return { extractor, config };
|
|
75
|
+
|
|
76
|
+
const transformers = await loadTransformers();
|
|
77
|
+
pipeline = transformers.pipeline;
|
|
78
|
+
cos_sim = transformers.cos_sim;
|
|
79
|
+
|
|
80
|
+
console.log(`Loading embedding model: ${config.name} (${config.dim}d)...`);
|
|
81
|
+
const opts = config.quantized ? { quantized: true } : {};
|
|
82
|
+
extractor = await pipeline('feature-extraction', config.name, opts);
|
|
83
|
+
activeModel = config.name;
|
|
84
|
+
console.log('Model loaded.');
|
|
85
|
+
return { extractor, config };
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Generate embeddings for an array of texts.
|
|
90
|
+
*/
|
|
91
|
+
export async function embed(texts, modelKey) {
|
|
92
|
+
const { extractor: ext, config } = await loadModel(modelKey);
|
|
93
|
+
const dim = config.dim;
|
|
94
|
+
const results = [];
|
|
95
|
+
const batchSize = BATCH_SIZE_MAP[modelKey || DEFAULT_MODEL] || DEFAULT_BATCH_SIZE;
|
|
96
|
+
|
|
97
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
98
|
+
const batch = texts.slice(i, i + batchSize);
|
|
99
|
+
const output = await ext(batch, { pooling: 'mean', normalize: true });
|
|
100
|
+
|
|
101
|
+
for (let j = 0; j < batch.length; j++) {
|
|
102
|
+
const start = j * dim;
|
|
103
|
+
const vec = new Float32Array(dim);
|
|
104
|
+
for (let k = 0; k < dim; k++) {
|
|
105
|
+
vec[k] = output.data[start + k];
|
|
106
|
+
}
|
|
107
|
+
results.push(vec);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (texts.length > batchSize) {
|
|
111
|
+
process.stdout.write(` Embedded ${Math.min(i + batchSize, texts.length)}/${texts.length}\r`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return { vectors: results, dim };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Cosine similarity between two Float32Arrays.
|
|
120
|
+
*/
|
|
121
|
+
export function cosineSim(a, b) {
|
|
122
|
+
let dot = 0, normA = 0, normB = 0;
|
|
123
|
+
for (let i = 0; i < a.length; i++) {
|
|
124
|
+
dot += a[i] * b[i];
|
|
125
|
+
normA += a[i] * a[i];
|
|
126
|
+
normB += b[i] * b[i];
|
|
127
|
+
}
|
|
128
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function initEmbeddingsSchema(db) {
|
|
132
|
+
db.exec(`
|
|
133
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
134
|
+
node_id INTEGER PRIMARY KEY,
|
|
135
|
+
vector BLOB NOT NULL,
|
|
136
|
+
text_preview TEXT,
|
|
137
|
+
FOREIGN KEY(node_id) REFERENCES nodes(id)
|
|
138
|
+
);
|
|
139
|
+
CREATE TABLE IF NOT EXISTS embedding_meta (
|
|
140
|
+
key TEXT PRIMARY KEY,
|
|
141
|
+
value TEXT
|
|
142
|
+
);
|
|
143
|
+
`);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Build embeddings for all functions/methods/classes in the graph.
|
|
148
|
+
*/
|
|
149
|
+
export async function buildEmbeddings(rootDir, modelKey) {
|
|
150
|
+
// path already imported at top
|
|
151
|
+
// fs already imported at top
|
|
152
|
+
const dbPath = findDbPath(null);
|
|
153
|
+
|
|
154
|
+
const db = new Database(dbPath);
|
|
155
|
+
initEmbeddingsSchema(db);
|
|
156
|
+
|
|
157
|
+
db.exec('DELETE FROM embeddings');
|
|
158
|
+
db.exec('DELETE FROM embedding_meta');
|
|
159
|
+
|
|
160
|
+
const nodes = db.prepare(
|
|
161
|
+
`SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`
|
|
162
|
+
).all();
|
|
163
|
+
|
|
164
|
+
console.log(`Building embeddings for ${nodes.length} symbols...`);
|
|
165
|
+
|
|
166
|
+
const byFile = new Map();
|
|
167
|
+
for (const node of nodes) {
|
|
168
|
+
if (!byFile.has(node.file)) byFile.set(node.file, []);
|
|
169
|
+
byFile.get(node.file).push(node);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const texts = [];
|
|
173
|
+
const nodeIds = [];
|
|
174
|
+
const previews = [];
|
|
175
|
+
|
|
176
|
+
for (const [file, fileNodes] of byFile) {
|
|
177
|
+
const fullPath = path.join(rootDir, file);
|
|
178
|
+
let lines;
|
|
179
|
+
try {
|
|
180
|
+
lines = fs.readFileSync(fullPath, 'utf-8').split('\n');
|
|
181
|
+
} catch (err) {
|
|
182
|
+
warn(`Cannot read ${file} for embeddings: ${err.message}`);
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
for (const node of fileNodes) {
|
|
187
|
+
const startLine = Math.max(0, node.line - 1);
|
|
188
|
+
const endLine = node.end_line
|
|
189
|
+
? Math.min(lines.length, node.end_line)
|
|
190
|
+
: Math.min(lines.length, startLine + 15);
|
|
191
|
+
const context = lines.slice(startLine, endLine).join('\n');
|
|
192
|
+
|
|
193
|
+
const text = `${node.kind} ${node.name} in ${file}\n${context}`;
|
|
194
|
+
texts.push(text);
|
|
195
|
+
nodeIds.push(node.id);
|
|
196
|
+
previews.push(`${node.name} (${node.kind}) -- ${file}:${node.line}`);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
console.log(`Embedding ${texts.length} symbols...`);
|
|
201
|
+
const { vectors, dim } = await embed(texts, modelKey);
|
|
202
|
+
|
|
203
|
+
const insert = db.prepare('INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview) VALUES (?, ?, ?)');
|
|
204
|
+
const insertMeta = db.prepare('INSERT OR REPLACE INTO embedding_meta (key, value) VALUES (?, ?)');
|
|
205
|
+
const insertAll = db.transaction(() => {
|
|
206
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
207
|
+
insert.run(nodeIds[i], Buffer.from(vectors[i].buffer), previews[i]);
|
|
208
|
+
}
|
|
209
|
+
const config = getModelConfig(modelKey);
|
|
210
|
+
insertMeta.run('model', config.name);
|
|
211
|
+
insertMeta.run('dim', String(dim));
|
|
212
|
+
insertMeta.run('count', String(vectors.length));
|
|
213
|
+
insertMeta.run('built_at', new Date().toISOString());
|
|
214
|
+
});
|
|
215
|
+
insertAll();
|
|
216
|
+
|
|
217
|
+
console.log(`\nStored ${vectors.length} embeddings (${dim}d, ${getModelConfig(modelKey).name}) in graph.db`);
|
|
218
|
+
db.close();
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Semantic search with pre-filter support to reduce the search space.
|
|
223
|
+
*/
|
|
224
|
+
export async function search(query, customDbPath, opts = {}) {
|
|
225
|
+
const limit = opts.limit || 15;
|
|
226
|
+
const noTests = opts.noTests || false;
|
|
227
|
+
const minScore = opts.minScore || 0.2;
|
|
228
|
+
|
|
229
|
+
const db = openReadonlyOrFail(customDbPath);
|
|
230
|
+
|
|
231
|
+
let count;
|
|
232
|
+
try {
|
|
233
|
+
count = db.prepare("SELECT COUNT(*) as c FROM embeddings").get().c;
|
|
234
|
+
} catch {
|
|
235
|
+
console.log('No embeddings table found. Run `codegraph embed` first.');
|
|
236
|
+
db.close();
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
if (count === 0) {
|
|
240
|
+
console.log('No embeddings found. Run `codegraph embed` first.');
|
|
241
|
+
db.close();
|
|
242
|
+
return;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
let storedModel = null;
|
|
246
|
+
let storedDim = null;
|
|
247
|
+
try {
|
|
248
|
+
const modelRow = db.prepare("SELECT value FROM embedding_meta WHERE key = 'model'").get();
|
|
249
|
+
const dimRow = db.prepare("SELECT value FROM embedding_meta WHERE key = 'dim'").get();
|
|
250
|
+
if (modelRow) storedModel = modelRow.value;
|
|
251
|
+
if (dimRow) storedDim = parseInt(dimRow.value);
|
|
252
|
+
} catch { /* old DB without meta table */ }
|
|
253
|
+
|
|
254
|
+
let modelKey = opts.model || null;
|
|
255
|
+
if (!modelKey && storedModel) {
|
|
256
|
+
for (const [key, config] of Object.entries(MODELS)) {
|
|
257
|
+
if (config.name === storedModel) { modelKey = key; break; }
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const { vectors: [queryVec], dim } = await embed([query], modelKey);
|
|
262
|
+
|
|
263
|
+
if (storedDim && dim !== storedDim) {
|
|
264
|
+
console.log(`Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`);
|
|
265
|
+
console.log(` Re-run \`codegraph embed\` with the same model, or use --model to match.`);
|
|
266
|
+
db.close();
|
|
267
|
+
return;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Pre-filter: allow filtering by kind or file pattern to reduce search space
|
|
271
|
+
const TEST_PATTERN = /\.(test|spec)\.|__test__|__tests__|\.stories\./;
|
|
272
|
+
let sql = `
|
|
273
|
+
SELECT e.node_id, e.vector, e.text_preview, n.name, n.kind, n.file, n.line
|
|
274
|
+
FROM embeddings e
|
|
275
|
+
JOIN nodes n ON e.node_id = n.id
|
|
276
|
+
`;
|
|
277
|
+
const params = [];
|
|
278
|
+
const conditions = [];
|
|
279
|
+
if (opts.kind) {
|
|
280
|
+
conditions.push('n.kind = ?');
|
|
281
|
+
params.push(opts.kind);
|
|
282
|
+
}
|
|
283
|
+
if (opts.filePattern) {
|
|
284
|
+
conditions.push('n.file LIKE ?');
|
|
285
|
+
params.push(`%${opts.filePattern}%`);
|
|
286
|
+
}
|
|
287
|
+
if (conditions.length > 0) {
|
|
288
|
+
sql += ' WHERE ' + conditions.join(' AND ');
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const rows = db.prepare(sql).all(...params);
|
|
292
|
+
|
|
293
|
+
const results = [];
|
|
294
|
+
for (const row of rows) {
|
|
295
|
+
if (noTests && TEST_PATTERN.test(row.file)) continue;
|
|
296
|
+
|
|
297
|
+
const vec = new Float32Array(new Uint8Array(row.vector).buffer);
|
|
298
|
+
const sim = cosineSim(queryVec, vec);
|
|
299
|
+
|
|
300
|
+
if (sim >= minScore) {
|
|
301
|
+
results.push({
|
|
302
|
+
name: row.name,
|
|
303
|
+
kind: row.kind,
|
|
304
|
+
file: row.file,
|
|
305
|
+
line: row.line,
|
|
306
|
+
similarity: sim
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
results.sort((a, b) => b.similarity - a.similarity);
|
|
312
|
+
|
|
313
|
+
console.log(`\nSemantic search: "${query}"\n`);
|
|
314
|
+
|
|
315
|
+
const topResults = results.slice(0, limit);
|
|
316
|
+
if (topResults.length === 0) {
|
|
317
|
+
console.log(' No results above threshold.');
|
|
318
|
+
} else {
|
|
319
|
+
for (const r of topResults) {
|
|
320
|
+
const bar = '#'.repeat(Math.round(r.similarity * 20));
|
|
321
|
+
const kindIcon = r.kind === 'function' ? 'f' : r.kind === 'class' ? '*' : 'o';
|
|
322
|
+
console.log(` ${(r.similarity * 100).toFixed(1)}% ${bar}`);
|
|
323
|
+
console.log(` ${kindIcon} ${r.name} -- ${r.file}:${r.line}`);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
console.log(`\n ${results.length} results total (showing top ${topResults.length})\n`);
|
|
328
|
+
db.close();
|
|
329
|
+
}
|
|
330
|
+
|
package/src/export.js
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Export the dependency graph in DOT (Graphviz) format.
|
|
5
|
+
*/
|
|
6
|
+
export function exportDOT(db, opts = {}) {
|
|
7
|
+
const fileLevel = opts.fileLevel !== false;
|
|
8
|
+
const lines = [
|
|
9
|
+
'digraph codegraph {',
|
|
10
|
+
' rankdir=LR;',
|
|
11
|
+
' node [shape=box, fontname="monospace", fontsize=10];',
|
|
12
|
+
' edge [color="#666666"];',
|
|
13
|
+
''
|
|
14
|
+
];
|
|
15
|
+
|
|
16
|
+
if (fileLevel) {
|
|
17
|
+
const edges = db.prepare(`
|
|
18
|
+
SELECT DISTINCT n1.file AS source, n2.file AS target
|
|
19
|
+
FROM edges e
|
|
20
|
+
JOIN nodes n1 ON e.source_id = n1.id
|
|
21
|
+
JOIN nodes n2 ON e.target_id = n2.id
|
|
22
|
+
WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
|
|
23
|
+
`).all();
|
|
24
|
+
|
|
25
|
+
const dirs = new Map();
|
|
26
|
+
const allFiles = new Set();
|
|
27
|
+
for (const { source, target } of edges) {
|
|
28
|
+
allFiles.add(source);
|
|
29
|
+
allFiles.add(target);
|
|
30
|
+
}
|
|
31
|
+
for (const file of allFiles) {
|
|
32
|
+
const dir = path.dirname(file) || '.';
|
|
33
|
+
if (!dirs.has(dir)) dirs.set(dir, []);
|
|
34
|
+
dirs.get(dir).push(file);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let clusterIdx = 0;
|
|
38
|
+
for (const [dir, files] of [...dirs].sort()) {
|
|
39
|
+
lines.push(` subgraph cluster_${clusterIdx++} {`);
|
|
40
|
+
lines.push(` label="${dir}";`);
|
|
41
|
+
lines.push(` style=dashed;`);
|
|
42
|
+
lines.push(` color="#999999";`);
|
|
43
|
+
for (const f of files) {
|
|
44
|
+
const label = path.basename(f);
|
|
45
|
+
lines.push(` "${f}" [label="${label}"];`);
|
|
46
|
+
}
|
|
47
|
+
lines.push(` }`);
|
|
48
|
+
lines.push('');
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
for (const { source, target } of edges) {
|
|
52
|
+
lines.push(` "${source}" -> "${target}";`);
|
|
53
|
+
}
|
|
54
|
+
} else {
|
|
55
|
+
const edges = db.prepare(`
|
|
56
|
+
SELECT n1.name AS source_name, n1.kind AS source_kind, n1.file AS source_file,
|
|
57
|
+
n2.name AS target_name, n2.kind AS target_kind, n2.file AS target_file,
|
|
58
|
+
e.kind AS edge_kind
|
|
59
|
+
FROM edges e
|
|
60
|
+
JOIN nodes n1 ON e.source_id = n1.id
|
|
61
|
+
JOIN nodes n2 ON e.target_id = n2.id
|
|
62
|
+
WHERE n1.kind IN ('function', 'method', 'class') AND n2.kind IN ('function', 'method', 'class')
|
|
63
|
+
AND e.kind = 'calls'
|
|
64
|
+
`).all();
|
|
65
|
+
|
|
66
|
+
for (const e of edges) {
|
|
67
|
+
const sId = `${e.source_file}:${e.source_name}`.replace(/[^a-zA-Z0-9_]/g, '_');
|
|
68
|
+
const tId = `${e.target_file}:${e.target_name}`.replace(/[^a-zA-Z0-9_]/g, '_');
|
|
69
|
+
lines.push(` ${sId} [label="${e.source_name}\\n${path.basename(e.source_file)}"];`);
|
|
70
|
+
lines.push(` ${tId} [label="${e.target_name}\\n${path.basename(e.target_file)}"];`);
|
|
71
|
+
lines.push(` ${sId} -> ${tId};`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
lines.push('}');
|
|
76
|
+
return lines.join('\n');
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Export the dependency graph in Mermaid format.
|
|
81
|
+
*/
|
|
82
|
+
export function exportMermaid(db, opts = {}) {
|
|
83
|
+
const fileLevel = opts.fileLevel !== false;
|
|
84
|
+
const lines = ['graph LR'];
|
|
85
|
+
|
|
86
|
+
if (fileLevel) {
|
|
87
|
+
const edges = db.prepare(`
|
|
88
|
+
SELECT DISTINCT n1.file AS source, n2.file AS target
|
|
89
|
+
FROM edges e
|
|
90
|
+
JOIN nodes n1 ON e.source_id = n1.id
|
|
91
|
+
JOIN nodes n2 ON e.target_id = n2.id
|
|
92
|
+
WHERE n1.file != n2.file AND e.kind IN ('imports', 'imports-type', 'calls')
|
|
93
|
+
`).all();
|
|
94
|
+
|
|
95
|
+
for (const { source, target } of edges) {
|
|
96
|
+
const s = source.replace(/[^a-zA-Z0-9]/g, '_');
|
|
97
|
+
const t = target.replace(/[^a-zA-Z0-9]/g, '_');
|
|
98
|
+
lines.push(` ${s}["${source}"] --> ${t}["${target}"]`);
|
|
99
|
+
}
|
|
100
|
+
} else {
|
|
101
|
+
const edges = db.prepare(`
|
|
102
|
+
SELECT n1.name AS source_name, n1.file AS source_file,
|
|
103
|
+
n2.name AS target_name, n2.file AS target_file
|
|
104
|
+
FROM edges e
|
|
105
|
+
JOIN nodes n1 ON e.source_id = n1.id
|
|
106
|
+
JOIN nodes n2 ON e.target_id = n2.id
|
|
107
|
+
WHERE n1.kind IN ('function', 'method', 'class') AND n2.kind IN ('function', 'method', 'class')
|
|
108
|
+
AND e.kind = 'calls'
|
|
109
|
+
`).all();
|
|
110
|
+
|
|
111
|
+
for (const e of edges) {
|
|
112
|
+
const sId = `${e.source_file}_${e.source_name}`.replace(/[^a-zA-Z0-9]/g, '_');
|
|
113
|
+
const tId = `${e.target_file}_${e.target_name}`.replace(/[^a-zA-Z0-9]/g, '_');
|
|
114
|
+
lines.push(` ${sId}["${e.source_name}"] --> ${tId}["${e.target_name}"]`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return lines.join('\n');
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Export as JSON adjacency list.
|
|
123
|
+
*/
|
|
124
|
+
export function exportJSON(db) {
|
|
125
|
+
const nodes = db.prepare(`
|
|
126
|
+
SELECT id, name, kind, file, line FROM nodes WHERE kind = 'file'
|
|
127
|
+
`).all();
|
|
128
|
+
|
|
129
|
+
const edges = db.prepare(`
|
|
130
|
+
SELECT DISTINCT n1.file AS source, n2.file AS target, e.kind
|
|
131
|
+
FROM edges e
|
|
132
|
+
JOIN nodes n1 ON e.source_id = n1.id
|
|
133
|
+
JOIN nodes n2 ON e.target_id = n2.id
|
|
134
|
+
WHERE n1.file != n2.file
|
|
135
|
+
`).all();
|
|
136
|
+
|
|
137
|
+
return { nodes, edges };
|
|
138
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* codegraph — Programmatic API
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { buildGraph, queryNameData, findCycles, exportDOT } from 'codegraph';
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Graph building
|
|
9
|
+
export { buildGraph, resolveImportPath, collectFiles, loadPathAliases } from './builder.js';
|
|
10
|
+
|
|
11
|
+
// Query functions (data-returning)
|
|
12
|
+
export {
|
|
13
|
+
queryNameData, impactAnalysisData, moduleMapData,
|
|
14
|
+
fileDepsData, fnDepsData, fnImpactData, diffImpactData
|
|
15
|
+
} from './queries.js';
|
|
16
|
+
|
|
17
|
+
// Watch mode
|
|
18
|
+
export { watchProject } from './watcher.js';
|
|
19
|
+
|
|
20
|
+
// Export (DOT/Mermaid/JSON)
|
|
21
|
+
export { exportDOT, exportMermaid, exportJSON } from './export.js';
|
|
22
|
+
|
|
23
|
+
// Circular dependency detection
|
|
24
|
+
export { findCycles, formatCycles } from './cycles.js';
|
|
25
|
+
|
|
26
|
+
// Embeddings
|
|
27
|
+
export { buildEmbeddings, search, embed, cosineSim, MODELS, DEFAULT_MODEL } from './embedder.js';
|
|
28
|
+
|
|
29
|
+
// Database utilities
|
|
30
|
+
export { openDb, initSchema, findDbPath, openReadonlyOrFail } from './db.js';
|
|
31
|
+
|
|
32
|
+
// Configuration
|
|
33
|
+
export { loadConfig } from './config.js';
|
|
34
|
+
|
|
35
|
+
// Shared constants
|
|
36
|
+
export { EXTENSIONS, IGNORE_DIRS, normalizePath } from './constants.js';
|
|
37
|
+
|
|
38
|
+
// Logger
|
|
39
|
+
export { setVerbose } from './logger.js';
|
package/src/logger.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
let verbose = false;
|
|
2
|
+
|
|
3
|
+
export function setVerbose(v) { verbose = v; }
|
|
4
|
+
export function isVerbose() { return verbose; }
|
|
5
|
+
|
|
6
|
+
export function warn(msg) {
|
|
7
|
+
process.stderr.write(`[codegraph WARN] ${msg}\n`);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function debug(msg) {
|
|
11
|
+
if (verbose) process.stderr.write(`[codegraph DEBUG] ${msg}\n`);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function info(msg) {
|
|
15
|
+
process.stderr.write(`[codegraph] ${msg}\n`);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function error(msg) {
|
|
19
|
+
process.stderr.write(`[codegraph ERROR] ${msg}\n`);
|
|
20
|
+
}
|
package/src/mcp.js
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP (Model Context Protocol) server for codegraph.
|
|
3
|
+
* Exposes codegraph queries as tools that AI coding assistants can call.
|
|
4
|
+
*
|
|
5
|
+
* Requires: npm install @modelcontextprotocol/sdk
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { createRequire } from 'node:module';
|
|
9
|
+
import { findDbPath } from './db.js';
|
|
10
|
+
import { findCycles } from './cycles.js';
|
|
11
|
+
|
|
12
|
+
const TOOLS = [
|
|
13
|
+
{
|
|
14
|
+
name: 'query_function',
|
|
15
|
+
description: 'Find callers and callees of a function by name',
|
|
16
|
+
inputSchema: {
|
|
17
|
+
type: 'object',
|
|
18
|
+
properties: {
|
|
19
|
+
name: { type: 'string', description: 'Function name to query (supports partial match)' },
|
|
20
|
+
depth: { type: 'number', description: 'Traversal depth for transitive callers', default: 2 }
|
|
21
|
+
},
|
|
22
|
+
required: ['name']
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
name: 'file_deps',
|
|
27
|
+
description: 'Show what a file imports and what imports it',
|
|
28
|
+
inputSchema: {
|
|
29
|
+
type: 'object',
|
|
30
|
+
properties: {
|
|
31
|
+
file: { type: 'string', description: 'File path (partial match supported)' }
|
|
32
|
+
},
|
|
33
|
+
required: ['file']
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
name: 'impact_analysis',
|
|
38
|
+
description: 'Show files affected by changes to a given file (transitive)',
|
|
39
|
+
inputSchema: {
|
|
40
|
+
type: 'object',
|
|
41
|
+
properties: {
|
|
42
|
+
file: { type: 'string', description: 'File path to analyze' }
|
|
43
|
+
},
|
|
44
|
+
required: ['file']
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
name: 'find_cycles',
|
|
49
|
+
description: 'Detect circular dependencies in the codebase',
|
|
50
|
+
inputSchema: {
|
|
51
|
+
type: 'object',
|
|
52
|
+
properties: {}
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: 'module_map',
|
|
57
|
+
description: 'Get high-level overview of most-connected files',
|
|
58
|
+
inputSchema: {
|
|
59
|
+
type: 'object',
|
|
60
|
+
properties: {
|
|
61
|
+
limit: { type: 'number', description: 'Number of top files to show', default: 20 }
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
];
|
|
66
|
+
|
|
67
|
+
export { TOOLS };
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Start the MCP server.
|
|
71
|
+
* This function requires @modelcontextprotocol/sdk to be installed.
|
|
72
|
+
*/
|
|
73
|
+
export async function startMCPServer(customDbPath) {
|
|
74
|
+
let Server, StdioServerTransport;
|
|
75
|
+
try {
|
|
76
|
+
const sdk = await import('@modelcontextprotocol/sdk/server/index.js');
|
|
77
|
+
Server = sdk.Server;
|
|
78
|
+
const transport = await import('@modelcontextprotocol/sdk/server/stdio.js');
|
|
79
|
+
StdioServerTransport = transport.StdioServerTransport;
|
|
80
|
+
} catch {
|
|
81
|
+
console.error(
|
|
82
|
+
'MCP server requires @modelcontextprotocol/sdk.\n' +
|
|
83
|
+
'Install it with: npm install @modelcontextprotocol/sdk'
|
|
84
|
+
);
|
|
85
|
+
process.exit(1);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Lazy import query functions to avoid circular deps at module load
|
|
89
|
+
const { queryNameData, impactAnalysisData, moduleMapData, fileDepsData } = await import('./queries.js');
|
|
90
|
+
|
|
91
|
+
const require = createRequire(import.meta.url);
|
|
92
|
+
const Database = require('better-sqlite3');
|
|
93
|
+
|
|
94
|
+
const server = new Server(
|
|
95
|
+
{ name: 'codegraph', version: '1.0.0' },
|
|
96
|
+
{ capabilities: { tools: {} } }
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
server.setRequestHandler('tools/list', async () => ({ tools: TOOLS }));
|
|
100
|
+
|
|
101
|
+
server.setRequestHandler('tools/call', async (request) => {
|
|
102
|
+
const { name, arguments: args } = request.params;
|
|
103
|
+
const dbPath = customDbPath || undefined;
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
let result;
|
|
107
|
+
switch (name) {
|
|
108
|
+
case 'query_function':
|
|
109
|
+
result = queryNameData(args.name, dbPath);
|
|
110
|
+
break;
|
|
111
|
+
case 'file_deps':
|
|
112
|
+
result = fileDepsData(args.file, dbPath);
|
|
113
|
+
break;
|
|
114
|
+
case 'impact_analysis':
|
|
115
|
+
result = impactAnalysisData(args.file, dbPath);
|
|
116
|
+
break;
|
|
117
|
+
case 'find_cycles': {
|
|
118
|
+
const db = new Database(findDbPath(dbPath), { readonly: true });
|
|
119
|
+
const cycles = findCycles(db);
|
|
120
|
+
db.close();
|
|
121
|
+
result = { cycles, count: cycles.length };
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
case 'module_map':
|
|
125
|
+
result = moduleMapData(dbPath, args.limit || 20);
|
|
126
|
+
break;
|
|
127
|
+
default:
|
|
128
|
+
return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
132
|
+
} catch (err) {
|
|
133
|
+
return { content: [{ type: 'text', text: `Error: ${err.message}` }], isError: true };
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
const transport = new StdioServerTransport();
|
|
138
|
+
await server.connect(transport);
|
|
139
|
+
}
|