@optave/codegraph 2.2.3-dev.44e8146 → 2.3.1-dev.1aeea34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -15
- package/package.json +7 -6
- package/src/builder.js +66 -0
- package/src/cli.js +116 -13
- package/src/cochange.js +498 -0
- package/src/config.js +6 -0
- package/src/db.js +40 -0
- package/src/embedder.js +61 -2
- package/src/export.js +158 -13
- package/src/extractors/helpers.js +2 -1
- package/src/extractors/javascript.js +294 -78
- package/src/index.js +13 -0
- package/src/mcp.js +62 -1
- package/src/parser.js +39 -2
- package/src/queries.js +158 -9
- package/src/registry.js +9 -1
- package/src/structure.js +94 -0
package/src/cochange.js
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Git co-change analysis — surfaces files that historically change together.
|
|
3
|
+
*
|
|
4
|
+
* Uses git log to find temporal coupling between files, computes Jaccard
|
|
5
|
+
* similarity coefficients, and stores results in the codegraph database.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { execFileSync } from 'node:child_process';
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import { normalizePath } from './constants.js';
|
|
12
|
+
import { findDbPath, initSchema, openDb, openReadonlyOrFail } from './db.js';
|
|
13
|
+
import { warn } from './logger.js';
|
|
14
|
+
import { isTestFile } from './queries.js';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Scan git history and return parsed commit data.
|
|
18
|
+
* @param {string} repoRoot - Absolute path to the git repo root
|
|
19
|
+
* @param {object} [opts]
|
|
20
|
+
* @param {string} [opts.since] - Git date expression (e.g. "1 year ago")
|
|
21
|
+
* @param {string} [opts.afterSha] - Only include commits after this SHA
|
|
22
|
+
* @returns {{ commits: Array<{sha: string, epoch: number, files: string[]}> }}
|
|
23
|
+
*/
|
|
24
|
+
export function scanGitHistory(repoRoot, opts = {}) {
|
|
25
|
+
const args = [
|
|
26
|
+
'log',
|
|
27
|
+
'--name-only',
|
|
28
|
+
'--pretty=format:%H%n%at',
|
|
29
|
+
'--no-merges',
|
|
30
|
+
'--diff-filter=AMRC',
|
|
31
|
+
];
|
|
32
|
+
if (opts.since) args.push(`--since=${opts.since}`);
|
|
33
|
+
if (opts.afterSha) args.push(`${opts.afterSha}..HEAD`);
|
|
34
|
+
args.push('--', '.');
|
|
35
|
+
|
|
36
|
+
let output;
|
|
37
|
+
try {
|
|
38
|
+
output = execFileSync('git', args, {
|
|
39
|
+
cwd: repoRoot,
|
|
40
|
+
encoding: 'utf-8',
|
|
41
|
+
maxBuffer: 50 * 1024 * 1024,
|
|
42
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
43
|
+
});
|
|
44
|
+
} catch (e) {
|
|
45
|
+
warn(`Failed to scan git history: ${e.message}`);
|
|
46
|
+
return { commits: [] };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (!output.trim()) return { commits: [] };
|
|
50
|
+
|
|
51
|
+
const commits = [];
|
|
52
|
+
// Split on double newlines to get blocks; each block is sha\nepoch\nfile1\nfile2...
|
|
53
|
+
const blocks = output.trim().split(/\n\n+/);
|
|
54
|
+
for (const block of blocks) {
|
|
55
|
+
const lines = block.split('\n').filter((l) => l.length > 0);
|
|
56
|
+
if (lines.length < 2) continue;
|
|
57
|
+
const sha = lines[0];
|
|
58
|
+
const epoch = parseInt(lines[1], 10);
|
|
59
|
+
if (Number.isNaN(epoch)) continue;
|
|
60
|
+
const files = lines.slice(2).map((f) => normalizePath(f));
|
|
61
|
+
if (files.length > 0) {
|
|
62
|
+
commits.push({ sha, epoch, files });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return { commits };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Compute co-change pairs from parsed commit data.
|
|
71
|
+
* @param {Array<{sha: string, epoch: number, files: string[]}>} commits
|
|
72
|
+
* @param {object} [opts]
|
|
73
|
+
* @param {number} [opts.minSupport=3] - Minimum number of co-occurrences
|
|
74
|
+
* @param {number} [opts.maxFilesPerCommit=50] - Skip commits with too many files
|
|
75
|
+
* @param {Set<string>} [opts.knownFiles] - If provided, only include pairs where both files are in this set
|
|
76
|
+
* @returns {Map<string, {commitCount: number, jaccard: number, lastEpoch: number}>}
|
|
77
|
+
*/
|
|
78
|
+
export function computeCoChanges(commits, opts = {}) {
|
|
79
|
+
const minSupport = opts.minSupport ?? 3;
|
|
80
|
+
const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50;
|
|
81
|
+
const knownFiles = opts.knownFiles || null;
|
|
82
|
+
|
|
83
|
+
const fileCommitCounts = new Map();
|
|
84
|
+
const pairCounts = new Map();
|
|
85
|
+
const pairLastEpoch = new Map();
|
|
86
|
+
|
|
87
|
+
for (const commit of commits) {
|
|
88
|
+
let { files } = commit;
|
|
89
|
+
if (files.length > maxFilesPerCommit) continue;
|
|
90
|
+
|
|
91
|
+
if (knownFiles) {
|
|
92
|
+
files = files.filter((f) => knownFiles.has(f));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Count per-file commits
|
|
96
|
+
for (const f of files) {
|
|
97
|
+
fileCommitCounts.set(f, (fileCommitCounts.get(f) || 0) + 1);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Generate all unique pairs (canonical: a < b)
|
|
101
|
+
const sorted = [...new Set(files)].sort();
|
|
102
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
103
|
+
for (let j = i + 1; j < sorted.length; j++) {
|
|
104
|
+
const key = `${sorted[i]}\0${sorted[j]}`;
|
|
105
|
+
pairCounts.set(key, (pairCounts.get(key) || 0) + 1);
|
|
106
|
+
const prev = pairLastEpoch.get(key) || 0;
|
|
107
|
+
if (commit.epoch > prev) pairLastEpoch.set(key, commit.epoch);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Filter by minSupport and compute Jaccard
|
|
113
|
+
const results = new Map();
|
|
114
|
+
for (const [key, count] of pairCounts) {
|
|
115
|
+
if (count < minSupport) continue;
|
|
116
|
+
const [fileA, fileB] = key.split('\0');
|
|
117
|
+
const countA = fileCommitCounts.get(fileA) || 0;
|
|
118
|
+
const countB = fileCommitCounts.get(fileB) || 0;
|
|
119
|
+
const jaccard = count / (countA + countB - count);
|
|
120
|
+
results.set(key, {
|
|
121
|
+
commitCount: count,
|
|
122
|
+
jaccard,
|
|
123
|
+
lastEpoch: pairLastEpoch.get(key) || 0,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return { pairs: results, fileCommitCounts };
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Analyze git history and populate co-change data in the database.
|
|
132
|
+
* @param {string} [customDbPath] - Path to graph.db
|
|
133
|
+
* @param {object} [opts]
|
|
134
|
+
* @param {string} [opts.since] - Git date expression
|
|
135
|
+
* @param {number} [opts.minSupport] - Minimum co-occurrence count
|
|
136
|
+
* @param {number} [opts.maxFilesPerCommit] - Max files per commit
|
|
137
|
+
* @param {boolean} [opts.full] - Force full re-scan
|
|
138
|
+
* @returns {{ pairsFound: number, commitsScanned: number, since: string, minSupport: number }}
|
|
139
|
+
*/
|
|
140
|
+
export function analyzeCoChanges(customDbPath, opts = {}) {
|
|
141
|
+
const dbPath = findDbPath(customDbPath);
|
|
142
|
+
const db = openDb(dbPath);
|
|
143
|
+
initSchema(db);
|
|
144
|
+
|
|
145
|
+
const repoRoot = path.resolve(path.dirname(dbPath), '..');
|
|
146
|
+
|
|
147
|
+
if (!fs.existsSync(path.join(repoRoot, '.git'))) {
|
|
148
|
+
db.close();
|
|
149
|
+
return { error: `Not a git repository: ${repoRoot}` };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const since = opts.since || '1 year ago';
|
|
153
|
+
const minSupport = opts.minSupport ?? 3;
|
|
154
|
+
const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50;
|
|
155
|
+
|
|
156
|
+
// Check for incremental state
|
|
157
|
+
let afterSha = null;
|
|
158
|
+
if (!opts.full) {
|
|
159
|
+
try {
|
|
160
|
+
const row = db
|
|
161
|
+
.prepare("SELECT value FROM co_change_meta WHERE key = 'last_analyzed_commit'")
|
|
162
|
+
.get();
|
|
163
|
+
if (row) afterSha = row.value;
|
|
164
|
+
} catch {
|
|
165
|
+
/* table may not exist yet */
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// If full re-scan, clear existing data
|
|
170
|
+
if (opts.full) {
|
|
171
|
+
db.exec('DELETE FROM co_changes');
|
|
172
|
+
db.exec('DELETE FROM co_change_meta');
|
|
173
|
+
db.exec('DELETE FROM file_commit_counts');
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Collect known files from the graph for filtering
|
|
177
|
+
let knownFiles = null;
|
|
178
|
+
try {
|
|
179
|
+
const rows = db.prepare('SELECT DISTINCT file FROM nodes').all();
|
|
180
|
+
knownFiles = new Set(rows.map((r) => r.file));
|
|
181
|
+
} catch {
|
|
182
|
+
/* nodes table may not exist */
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const { commits } = scanGitHistory(repoRoot, { since, afterSha });
|
|
186
|
+
const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, {
|
|
187
|
+
minSupport,
|
|
188
|
+
maxFilesPerCommit,
|
|
189
|
+
knownFiles,
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
// Upsert per-file commit counts so Jaccard can be recomputed from totals
|
|
193
|
+
const fileCountUpsert = db.prepare(`
|
|
194
|
+
INSERT INTO file_commit_counts (file, commit_count) VALUES (?, ?)
|
|
195
|
+
ON CONFLICT(file) DO UPDATE SET commit_count = commit_count + excluded.commit_count
|
|
196
|
+
`);
|
|
197
|
+
|
|
198
|
+
// Upsert pair counts (accumulate commit_count, jaccard placeholder — recomputed below)
|
|
199
|
+
const pairUpsert = db.prepare(`
|
|
200
|
+
INSERT INTO co_changes (file_a, file_b, commit_count, jaccard, last_commit_epoch)
|
|
201
|
+
VALUES (?, ?, ?, 0, ?)
|
|
202
|
+
ON CONFLICT(file_a, file_b) DO UPDATE SET
|
|
203
|
+
commit_count = commit_count + excluded.commit_count,
|
|
204
|
+
last_commit_epoch = MAX(co_changes.last_commit_epoch, excluded.last_commit_epoch)
|
|
205
|
+
`);
|
|
206
|
+
|
|
207
|
+
const insertMany = db.transaction(() => {
|
|
208
|
+
for (const [file, count] of fileCommitCounts) {
|
|
209
|
+
fileCountUpsert.run(file, count);
|
|
210
|
+
}
|
|
211
|
+
for (const [key, data] of coChanges) {
|
|
212
|
+
const [fileA, fileB] = key.split('\0');
|
|
213
|
+
pairUpsert.run(fileA, fileB, data.commitCount, data.lastEpoch);
|
|
214
|
+
}
|
|
215
|
+
});
|
|
216
|
+
insertMany();
|
|
217
|
+
|
|
218
|
+
// Recompute Jaccard for all affected pairs from total file commit counts
|
|
219
|
+
const affectedFiles = [...fileCommitCounts.keys()];
|
|
220
|
+
if (affectedFiles.length > 0) {
|
|
221
|
+
const ph = affectedFiles.map(() => '?').join(',');
|
|
222
|
+
db.prepare(`
|
|
223
|
+
UPDATE co_changes SET jaccard = (
|
|
224
|
+
SELECT CAST(co_changes.commit_count AS REAL) / (
|
|
225
|
+
COALESCE(fa.commit_count, 0) + COALESCE(fb.commit_count, 0) - co_changes.commit_count
|
|
226
|
+
)
|
|
227
|
+
FROM file_commit_counts fa, file_commit_counts fb
|
|
228
|
+
WHERE fa.file = co_changes.file_a AND fb.file = co_changes.file_b
|
|
229
|
+
)
|
|
230
|
+
WHERE file_a IN (${ph}) OR file_b IN (${ph})
|
|
231
|
+
`).run(...affectedFiles, ...affectedFiles);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Update metadata
|
|
235
|
+
const metaUpsert = db.prepare(`
|
|
236
|
+
INSERT INTO co_change_meta (key, value) VALUES (?, ?)
|
|
237
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value
|
|
238
|
+
`);
|
|
239
|
+
if (commits.length > 0) {
|
|
240
|
+
metaUpsert.run('last_analyzed_commit', commits[0].sha);
|
|
241
|
+
}
|
|
242
|
+
metaUpsert.run('analyzed_at', new Date().toISOString());
|
|
243
|
+
metaUpsert.run('since', since);
|
|
244
|
+
metaUpsert.run('min_support', String(minSupport));
|
|
245
|
+
|
|
246
|
+
const totalPairs = db.prepare('SELECT COUNT(*) as cnt FROM co_changes').get().cnt;
|
|
247
|
+
|
|
248
|
+
db.close();
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
pairsFound: totalPairs,
|
|
252
|
+
commitsScanned: commits.length,
|
|
253
|
+
since,
|
|
254
|
+
minSupport,
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Query co-change partners for a specific file.
|
|
260
|
+
* @param {string} file - File path (partial match supported)
|
|
261
|
+
* @param {string} [customDbPath]
|
|
262
|
+
* @param {object} [opts]
|
|
263
|
+
* @param {number} [opts.limit=20]
|
|
264
|
+
* @param {number} [opts.minJaccard=0.3]
|
|
265
|
+
* @param {boolean} [opts.noTests]
|
|
266
|
+
* @returns {{ file: string, partners: Array, meta: object }}
|
|
267
|
+
*/
|
|
268
|
+
export function coChangeData(file, customDbPath, opts = {}) {
|
|
269
|
+
const db = openReadonlyOrFail(customDbPath);
|
|
270
|
+
const limit = opts.limit || 20;
|
|
271
|
+
const minJaccard = opts.minJaccard ?? 0.3;
|
|
272
|
+
const noTests = opts.noTests || false;
|
|
273
|
+
|
|
274
|
+
// Check if co_changes table exists
|
|
275
|
+
try {
|
|
276
|
+
db.prepare('SELECT 1 FROM co_changes LIMIT 1').get();
|
|
277
|
+
} catch {
|
|
278
|
+
db.close();
|
|
279
|
+
return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' };
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Resolve file via partial match
|
|
283
|
+
const resolvedFile = resolveCoChangeFile(db, file);
|
|
284
|
+
if (!resolvedFile) {
|
|
285
|
+
db.close();
|
|
286
|
+
return { error: `No co-change data found for file matching "${file}"` };
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const rows = db
|
|
290
|
+
.prepare(
|
|
291
|
+
`SELECT file_a, file_b, commit_count, jaccard, last_commit_epoch
|
|
292
|
+
FROM co_changes
|
|
293
|
+
WHERE (file_a = ? OR file_b = ?) AND jaccard >= ?
|
|
294
|
+
ORDER BY jaccard DESC`,
|
|
295
|
+
)
|
|
296
|
+
.all(resolvedFile, resolvedFile, minJaccard);
|
|
297
|
+
|
|
298
|
+
const partners = [];
|
|
299
|
+
for (const row of rows) {
|
|
300
|
+
const partner = row.file_a === resolvedFile ? row.file_b : row.file_a;
|
|
301
|
+
if (noTests && isTestFile(partner)) continue;
|
|
302
|
+
partners.push({
|
|
303
|
+
file: partner,
|
|
304
|
+
commitCount: row.commit_count,
|
|
305
|
+
jaccard: row.jaccard,
|
|
306
|
+
lastCommitDate: row.last_commit_epoch
|
|
307
|
+
? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10)
|
|
308
|
+
: null,
|
|
309
|
+
});
|
|
310
|
+
if (partners.length >= limit) break;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
const meta = getCoChangeMeta(db);
|
|
314
|
+
db.close();
|
|
315
|
+
|
|
316
|
+
return { file: resolvedFile, partners, meta };
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Query top global co-change pairs.
|
|
321
|
+
* @param {string} [customDbPath]
|
|
322
|
+
* @param {object} [opts]
|
|
323
|
+
* @param {number} [opts.limit=20]
|
|
324
|
+
* @param {number} [opts.minJaccard=0.3]
|
|
325
|
+
* @param {boolean} [opts.noTests]
|
|
326
|
+
* @returns {{ pairs: Array, meta: object }}
|
|
327
|
+
*/
|
|
328
|
+
export function coChangeTopData(customDbPath, opts = {}) {
|
|
329
|
+
const db = openReadonlyOrFail(customDbPath);
|
|
330
|
+
const limit = opts.limit || 20;
|
|
331
|
+
const minJaccard = opts.minJaccard ?? 0.3;
|
|
332
|
+
const noTests = opts.noTests || false;
|
|
333
|
+
|
|
334
|
+
try {
|
|
335
|
+
db.prepare('SELECT 1 FROM co_changes LIMIT 1').get();
|
|
336
|
+
} catch {
|
|
337
|
+
db.close();
|
|
338
|
+
return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' };
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const rows = db
|
|
342
|
+
.prepare(
|
|
343
|
+
`SELECT file_a, file_b, commit_count, jaccard, last_commit_epoch
|
|
344
|
+
FROM co_changes
|
|
345
|
+
WHERE jaccard >= ?
|
|
346
|
+
ORDER BY jaccard DESC`,
|
|
347
|
+
)
|
|
348
|
+
.all(minJaccard);
|
|
349
|
+
|
|
350
|
+
const pairs = [];
|
|
351
|
+
for (const row of rows) {
|
|
352
|
+
if (noTests && (isTestFile(row.file_a) || isTestFile(row.file_b))) continue;
|
|
353
|
+
pairs.push({
|
|
354
|
+
fileA: row.file_a,
|
|
355
|
+
fileB: row.file_b,
|
|
356
|
+
commitCount: row.commit_count,
|
|
357
|
+
jaccard: row.jaccard,
|
|
358
|
+
lastCommitDate: row.last_commit_epoch
|
|
359
|
+
? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10)
|
|
360
|
+
: null,
|
|
361
|
+
});
|
|
362
|
+
if (pairs.length >= limit) break;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
const meta = getCoChangeMeta(db);
|
|
366
|
+
db.close();
|
|
367
|
+
|
|
368
|
+
return { pairs, meta };
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Batch-query co-change partners for a set of files.
|
|
373
|
+
* Takes an already-open readonly DB handle (for diff-impact integration).
|
|
374
|
+
* @param {string[]} files - File paths to query
|
|
375
|
+
* @param {import('better-sqlite3').Database} db - Already-open DB handle
|
|
376
|
+
* @param {object} [opts]
|
|
377
|
+
* @param {number} [opts.minJaccard=0.3]
|
|
378
|
+
* @param {number} [opts.limit=20]
|
|
379
|
+
* @param {boolean} [opts.noTests]
|
|
380
|
+
* @returns {Array<{file: string, coupledWith: string, commitCount: number, jaccard: number}>}
|
|
381
|
+
*/
|
|
382
|
+
export function coChangeForFiles(files, db, opts = {}) {
|
|
383
|
+
const minJaccard = opts.minJaccard ?? 0.3;
|
|
384
|
+
const limit = opts.limit ?? 20;
|
|
385
|
+
const noTests = opts.noTests || false;
|
|
386
|
+
const inputSet = new Set(files);
|
|
387
|
+
|
|
388
|
+
if (files.length === 0) return [];
|
|
389
|
+
|
|
390
|
+
const placeholders = files.map(() => '?').join(',');
|
|
391
|
+
const rows = db
|
|
392
|
+
.prepare(
|
|
393
|
+
`SELECT file_a, file_b, commit_count, jaccard
|
|
394
|
+
FROM co_changes
|
|
395
|
+
WHERE (file_a IN (${placeholders}) OR file_b IN (${placeholders}))
|
|
396
|
+
AND jaccard >= ?
|
|
397
|
+
ORDER BY jaccard DESC
|
|
398
|
+
LIMIT ?`,
|
|
399
|
+
)
|
|
400
|
+
.all(...files, ...files, minJaccard, limit);
|
|
401
|
+
|
|
402
|
+
const results = [];
|
|
403
|
+
for (const row of rows) {
|
|
404
|
+
const partner = inputSet.has(row.file_a) ? row.file_b : row.file_a;
|
|
405
|
+
const source = inputSet.has(row.file_a) ? row.file_a : row.file_b;
|
|
406
|
+
if (inputSet.has(partner)) continue;
|
|
407
|
+
if (noTests && isTestFile(partner)) continue;
|
|
408
|
+
results.push({
|
|
409
|
+
file: partner,
|
|
410
|
+
coupledWith: source,
|
|
411
|
+
commitCount: row.commit_count,
|
|
412
|
+
jaccard: row.jaccard,
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
return results;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Format co-change data for CLI output (single file).
|
|
421
|
+
*/
|
|
422
|
+
export function formatCoChange(data) {
|
|
423
|
+
if (data.error) return data.error;
|
|
424
|
+
if (data.partners.length === 0) return `No co-change partners found for ${data.file}`;
|
|
425
|
+
|
|
426
|
+
const lines = [`\nCo-change partners for ${data.file}:\n`];
|
|
427
|
+
for (const p of data.partners) {
|
|
428
|
+
const pct = `${(p.jaccard * 100).toFixed(0)}%`.padStart(4);
|
|
429
|
+
const commits = `${p.commitCount} commits`.padStart(12);
|
|
430
|
+
lines.push(` ${pct} ${commits} ${p.file}`);
|
|
431
|
+
}
|
|
432
|
+
if (data.meta?.analyzedAt) {
|
|
433
|
+
lines.push(`\n Analyzed: ${data.meta.analyzedAt} | Window: ${data.meta.since || 'all'}`);
|
|
434
|
+
}
|
|
435
|
+
return lines.join('\n');
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Format top co-change pairs for CLI output (global view).
|
|
440
|
+
*/
|
|
441
|
+
export function formatCoChangeTop(data) {
|
|
442
|
+
if (data.error) return data.error;
|
|
443
|
+
if (data.pairs.length === 0) return 'No co-change pairs found.';
|
|
444
|
+
|
|
445
|
+
const lines = ['\nTop co-change pairs:\n'];
|
|
446
|
+
for (const p of data.pairs) {
|
|
447
|
+
const pct = `${(p.jaccard * 100).toFixed(0)}%`.padStart(4);
|
|
448
|
+
const commits = `${p.commitCount} commits`.padStart(12);
|
|
449
|
+
lines.push(` ${pct} ${commits} ${p.fileA} <-> ${p.fileB}`);
|
|
450
|
+
}
|
|
451
|
+
if (data.meta?.analyzedAt) {
|
|
452
|
+
lines.push(`\n Analyzed: ${data.meta.analyzedAt} | Window: ${data.meta.since || 'all'}`);
|
|
453
|
+
}
|
|
454
|
+
return lines.join('\n');
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// ─── Internal Helpers ────────────────────────────────────────────────────
|
|
458
|
+
|
|
459
|
+
function resolveCoChangeFile(db, file) {
|
|
460
|
+
// Exact match first
|
|
461
|
+
const exact = db
|
|
462
|
+
.prepare(
|
|
463
|
+
'SELECT file_a FROM co_changes WHERE file_a = ? UNION SELECT file_b FROM co_changes WHERE file_b = ? LIMIT 1',
|
|
464
|
+
)
|
|
465
|
+
.get(file, file);
|
|
466
|
+
if (exact) return exact.file_a;
|
|
467
|
+
|
|
468
|
+
// Partial match (ends with)
|
|
469
|
+
const partial = db
|
|
470
|
+
.prepare(
|
|
471
|
+
`SELECT file_a AS file FROM co_changes WHERE file_a LIKE ?
|
|
472
|
+
UNION
|
|
473
|
+
SELECT file_b AS file FROM co_changes WHERE file_b LIKE ?
|
|
474
|
+
LIMIT 1`,
|
|
475
|
+
)
|
|
476
|
+
.get(`%${file}`, `%${file}`);
|
|
477
|
+
if (partial) return partial.file;
|
|
478
|
+
|
|
479
|
+
return null;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
function getCoChangeMeta(db) {
|
|
483
|
+
try {
|
|
484
|
+
const rows = db.prepare('SELECT key, value FROM co_change_meta').all();
|
|
485
|
+
const meta = {};
|
|
486
|
+
for (const row of rows) {
|
|
487
|
+
meta[row.key] = row.value;
|
|
488
|
+
}
|
|
489
|
+
return {
|
|
490
|
+
analyzedAt: meta.analyzed_at || null,
|
|
491
|
+
since: meta.since || null,
|
|
492
|
+
minSupport: meta.min_support ? parseInt(meta.min_support, 10) : null,
|
|
493
|
+
lastCommit: meta.last_analyzed_commit || null,
|
|
494
|
+
};
|
|
495
|
+
} catch {
|
|
496
|
+
return null;
|
|
497
|
+
}
|
|
498
|
+
}
|
package/src/config.js
CHANGED
|
@@ -24,6 +24,12 @@ export const DEFAULTS = {
|
|
|
24
24
|
llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },
|
|
25
25
|
search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 },
|
|
26
26
|
ci: { failOnCycles: false, impactThreshold: null },
|
|
27
|
+
coChange: {
|
|
28
|
+
since: '1 year ago',
|
|
29
|
+
minSupport: 3,
|
|
30
|
+
minJaccard: 0.3,
|
|
31
|
+
maxFilesPerCommit: 50,
|
|
32
|
+
},
|
|
27
33
|
};
|
|
28
34
|
|
|
29
35
|
/**
|
package/src/db.js
CHANGED
|
@@ -71,6 +71,36 @@ export const MIGRATIONS = [
|
|
|
71
71
|
version: 4,
|
|
72
72
|
up: `ALTER TABLE file_hashes ADD COLUMN size INTEGER DEFAULT 0;`,
|
|
73
73
|
},
|
|
74
|
+
{
|
|
75
|
+
version: 5,
|
|
76
|
+
up: `
|
|
77
|
+
CREATE TABLE IF NOT EXISTS co_changes (
|
|
78
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
79
|
+
file_a TEXT NOT NULL,
|
|
80
|
+
file_b TEXT NOT NULL,
|
|
81
|
+
commit_count INTEGER NOT NULL,
|
|
82
|
+
jaccard REAL NOT NULL,
|
|
83
|
+
last_commit_epoch INTEGER,
|
|
84
|
+
UNIQUE(file_a, file_b)
|
|
85
|
+
);
|
|
86
|
+
CREATE INDEX IF NOT EXISTS idx_co_changes_file_a ON co_changes(file_a);
|
|
87
|
+
CREATE INDEX IF NOT EXISTS idx_co_changes_file_b ON co_changes(file_b);
|
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_co_changes_jaccard ON co_changes(jaccard DESC);
|
|
89
|
+
CREATE TABLE IF NOT EXISTS co_change_meta (
|
|
90
|
+
key TEXT PRIMARY KEY,
|
|
91
|
+
value TEXT NOT NULL
|
|
92
|
+
);
|
|
93
|
+
`,
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
version: 6,
|
|
97
|
+
up: `
|
|
98
|
+
CREATE TABLE IF NOT EXISTS file_commit_counts (
|
|
99
|
+
file TEXT PRIMARY KEY,
|
|
100
|
+
commit_count INTEGER NOT NULL DEFAULT 0
|
|
101
|
+
);
|
|
102
|
+
`,
|
|
103
|
+
},
|
|
74
104
|
];
|
|
75
105
|
|
|
76
106
|
export function openDb(dbPath) {
|
|
@@ -115,6 +145,16 @@ export function initSchema(db) {
|
|
|
115
145
|
} catch {
|
|
116
146
|
/* already exists */
|
|
117
147
|
}
|
|
148
|
+
try {
|
|
149
|
+
db.exec('ALTER TABLE nodes ADD COLUMN role TEXT');
|
|
150
|
+
} catch {
|
|
151
|
+
/* already exists */
|
|
152
|
+
}
|
|
153
|
+
try {
|
|
154
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)');
|
|
155
|
+
} catch {
|
|
156
|
+
/* already exists */
|
|
157
|
+
}
|
|
118
158
|
}
|
|
119
159
|
|
|
120
160
|
export function findDbPath(customPath) {
|
package/src/embedder.js
CHANGED
|
@@ -16,6 +16,28 @@ function splitIdentifier(name) {
|
|
|
16
16
|
.trim();
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
+
/**
|
|
20
|
+
* Match a file path against a glob pattern.
|
|
21
|
+
* Supports *, **, and ? wildcards. Zero dependencies.
|
|
22
|
+
*/
|
|
23
|
+
function globMatch(filePath, pattern) {
|
|
24
|
+
// Normalize separators to forward slashes
|
|
25
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
26
|
+
// Escape regex specials except glob chars
|
|
27
|
+
let regex = pattern.replace(/\\/g, '/').replace(/[.+^${}()|[\]\\]/g, '\\$&');
|
|
28
|
+
// Replace ** first (matches any path segment), then * and ?
|
|
29
|
+
regex = regex.replace(/\*\*/g, '\0');
|
|
30
|
+
regex = regex.replace(/\*/g, '[^/]*');
|
|
31
|
+
regex = regex.replace(/\0/g, '.*');
|
|
32
|
+
regex = regex.replace(/\?/g, '[^/]');
|
|
33
|
+
try {
|
|
34
|
+
return new RegExp(`^${regex}$`).test(normalized);
|
|
35
|
+
} catch {
|
|
36
|
+
// Malformed pattern — fall back to substring match
|
|
37
|
+
return normalized.includes(pattern);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
19
41
|
// Lazy-load transformers (heavy, optional module)
|
|
20
42
|
let pipeline = null;
|
|
21
43
|
let _cos_sim = null;
|
|
@@ -76,7 +98,7 @@ export const MODELS = {
|
|
|
76
98
|
|
|
77
99
|
export const EMBEDDING_STRATEGIES = ['structured', 'source'];
|
|
78
100
|
|
|
79
|
-
export const DEFAULT_MODEL = '
|
|
101
|
+
export const DEFAULT_MODEL = 'nomic-v1.5';
|
|
80
102
|
const BATCH_SIZE_MAP = {
|
|
81
103
|
minilm: 32,
|
|
82
104
|
'jina-small': 16,
|
|
@@ -216,11 +238,26 @@ async function loadTransformers() {
|
|
|
216
238
|
}
|
|
217
239
|
}
|
|
218
240
|
|
|
241
|
+
/**
|
|
242
|
+
* Dispose the current ONNX session and free memory.
|
|
243
|
+
* Safe to call when no model is loaded (no-op).
|
|
244
|
+
*/
|
|
245
|
+
export async function disposeModel() {
|
|
246
|
+
if (extractor) {
|
|
247
|
+
await extractor.dispose();
|
|
248
|
+
extractor = null;
|
|
249
|
+
}
|
|
250
|
+
activeModel = null;
|
|
251
|
+
}
|
|
252
|
+
|
|
219
253
|
async function loadModel(modelKey) {
|
|
220
254
|
const config = getModelConfig(modelKey);
|
|
221
255
|
|
|
222
256
|
if (extractor && activeModel === config.name) return { extractor, config };
|
|
223
257
|
|
|
258
|
+
// Dispose previous model before loading a different one
|
|
259
|
+
await disposeModel();
|
|
260
|
+
|
|
224
261
|
const transformers = await loadTransformers();
|
|
225
262
|
pipeline = transformers.pipeline;
|
|
226
263
|
_cos_sim = transformers.cos_sim;
|
|
@@ -324,6 +361,14 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
|
|
|
324
361
|
const strategy = options.strategy || 'structured';
|
|
325
362
|
const dbPath = customDbPath || findDbPath(null);
|
|
326
363
|
|
|
364
|
+
if (!fs.existsSync(dbPath)) {
|
|
365
|
+
console.error(
|
|
366
|
+
`No codegraph database found at ${dbPath}.\n` +
|
|
367
|
+
`Run "codegraph build" first to analyze your codebase.`,
|
|
368
|
+
);
|
|
369
|
+
process.exit(1);
|
|
370
|
+
}
|
|
371
|
+
|
|
327
372
|
const db = new Database(dbPath);
|
|
328
373
|
initEmbeddingsSchema(db);
|
|
329
374
|
|
|
@@ -488,7 +533,8 @@ function _prepareSearch(customDbPath, opts = {}) {
|
|
|
488
533
|
conditions.push('n.kind = ?');
|
|
489
534
|
params.push(opts.kind);
|
|
490
535
|
}
|
|
491
|
-
|
|
536
|
+
const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
|
|
537
|
+
if (opts.filePattern && !isGlob) {
|
|
492
538
|
conditions.push('n.file LIKE ?');
|
|
493
539
|
params.push(`%${opts.filePattern}%`);
|
|
494
540
|
}
|
|
@@ -497,6 +543,9 @@ function _prepareSearch(customDbPath, opts = {}) {
|
|
|
497
543
|
}
|
|
498
544
|
|
|
499
545
|
let rows = db.prepare(sql).all(...params);
|
|
546
|
+
if (isGlob) {
|
|
547
|
+
rows = rows.filter((row) => globMatch(row.file, opts.filePattern));
|
|
548
|
+
}
|
|
500
549
|
if (noTests) {
|
|
501
550
|
rows = rows.filter((row) => !TEST_PATTERN.test(row.file));
|
|
502
551
|
}
|
|
@@ -660,6 +709,11 @@ export async function search(query, customDbPath, opts = {}) {
|
|
|
660
709
|
const data = await searchData(singleQuery, customDbPath, opts);
|
|
661
710
|
if (!data) return;
|
|
662
711
|
|
|
712
|
+
if (opts.json) {
|
|
713
|
+
console.log(JSON.stringify(data, null, 2));
|
|
714
|
+
return;
|
|
715
|
+
}
|
|
716
|
+
|
|
663
717
|
console.log(`\nSemantic search: "${singleQuery}"\n`);
|
|
664
718
|
|
|
665
719
|
if (data.results.length === 0) {
|
|
@@ -679,6 +733,11 @@ export async function search(query, customDbPath, opts = {}) {
|
|
|
679
733
|
const data = await multiSearchData(queries, customDbPath, opts);
|
|
680
734
|
if (!data) return;
|
|
681
735
|
|
|
736
|
+
if (opts.json) {
|
|
737
|
+
console.log(JSON.stringify(data, null, 2));
|
|
738
|
+
return;
|
|
739
|
+
}
|
|
740
|
+
|
|
682
741
|
console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`);
|
|
683
742
|
queries.forEach((q, i) => {
|
|
684
743
|
console.log(` [${i + 1}] "${q}"`);
|