@optave/codegraph 2.3.0 → 2.3.1-dev.1aeea34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -14
- package/package.json +5 -5
- package/src/builder.js +66 -0
- package/src/cli.js +113 -9
- package/src/cochange.js +498 -0
- package/src/config.js +6 -0
- package/src/db.js +40 -0
- package/src/embedder.js +53 -2
- package/src/export.js +158 -13
- package/src/extractors/helpers.js +2 -1
- package/src/extractors/javascript.js +294 -78
- package/src/index.js +13 -0
- package/src/mcp.js +62 -1
- package/src/parser.js +39 -2
- package/src/queries.js +158 -9
- package/src/registry.js +9 -1
- package/src/structure.js +94 -0
package/src/cochange.js
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Git co-change analysis — surfaces files that historically change together.
|
|
3
|
+
*
|
|
4
|
+
* Uses git log to find temporal coupling between files, computes Jaccard
|
|
5
|
+
* similarity coefficients, and stores results in the codegraph database.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { execFileSync } from 'node:child_process';
|
|
9
|
+
import fs from 'node:fs';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import { normalizePath } from './constants.js';
|
|
12
|
+
import { findDbPath, initSchema, openDb, openReadonlyOrFail } from './db.js';
|
|
13
|
+
import { warn } from './logger.js';
|
|
14
|
+
import { isTestFile } from './queries.js';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Scan git history and return parsed commit data.
|
|
18
|
+
* @param {string} repoRoot - Absolute path to the git repo root
|
|
19
|
+
* @param {object} [opts]
|
|
20
|
+
* @param {string} [opts.since] - Git date expression (e.g. "1 year ago")
|
|
21
|
+
* @param {string} [opts.afterSha] - Only include commits after this SHA
|
|
22
|
+
* @returns {{ commits: Array<{sha: string, epoch: number, files: string[]}> }}
|
|
23
|
+
*/
|
|
24
|
+
export function scanGitHistory(repoRoot, opts = {}) {
|
|
25
|
+
const args = [
|
|
26
|
+
'log',
|
|
27
|
+
'--name-only',
|
|
28
|
+
'--pretty=format:%H%n%at',
|
|
29
|
+
'--no-merges',
|
|
30
|
+
'--diff-filter=AMRC',
|
|
31
|
+
];
|
|
32
|
+
if (opts.since) args.push(`--since=${opts.since}`);
|
|
33
|
+
if (opts.afterSha) args.push(`${opts.afterSha}..HEAD`);
|
|
34
|
+
args.push('--', '.');
|
|
35
|
+
|
|
36
|
+
let output;
|
|
37
|
+
try {
|
|
38
|
+
output = execFileSync('git', args, {
|
|
39
|
+
cwd: repoRoot,
|
|
40
|
+
encoding: 'utf-8',
|
|
41
|
+
maxBuffer: 50 * 1024 * 1024,
|
|
42
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
43
|
+
});
|
|
44
|
+
} catch (e) {
|
|
45
|
+
warn(`Failed to scan git history: ${e.message}`);
|
|
46
|
+
return { commits: [] };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (!output.trim()) return { commits: [] };
|
|
50
|
+
|
|
51
|
+
const commits = [];
|
|
52
|
+
// Split on double newlines to get blocks; each block is sha\nepoch\nfile1\nfile2...
|
|
53
|
+
const blocks = output.trim().split(/\n\n+/);
|
|
54
|
+
for (const block of blocks) {
|
|
55
|
+
const lines = block.split('\n').filter((l) => l.length > 0);
|
|
56
|
+
if (lines.length < 2) continue;
|
|
57
|
+
const sha = lines[0];
|
|
58
|
+
const epoch = parseInt(lines[1], 10);
|
|
59
|
+
if (Number.isNaN(epoch)) continue;
|
|
60
|
+
const files = lines.slice(2).map((f) => normalizePath(f));
|
|
61
|
+
if (files.length > 0) {
|
|
62
|
+
commits.push({ sha, epoch, files });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return { commits };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Compute co-change pairs from parsed commit data.
|
|
71
|
+
* @param {Array<{sha: string, epoch: number, files: string[]}>} commits
|
|
72
|
+
* @param {object} [opts]
|
|
73
|
+
* @param {number} [opts.minSupport=3] - Minimum number of co-occurrences
|
|
74
|
+
* @param {number} [opts.maxFilesPerCommit=50] - Skip commits with too many files
|
|
75
|
+
* @param {Set<string>} [opts.knownFiles] - If provided, only include pairs where both files are in this set
|
|
76
|
+
* @returns {Map<string, {commitCount: number, jaccard: number, lastEpoch: number}>}
|
|
77
|
+
*/
|
|
78
|
+
export function computeCoChanges(commits, opts = {}) {
|
|
79
|
+
const minSupport = opts.minSupport ?? 3;
|
|
80
|
+
const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50;
|
|
81
|
+
const knownFiles = opts.knownFiles || null;
|
|
82
|
+
|
|
83
|
+
const fileCommitCounts = new Map();
|
|
84
|
+
const pairCounts = new Map();
|
|
85
|
+
const pairLastEpoch = new Map();
|
|
86
|
+
|
|
87
|
+
for (const commit of commits) {
|
|
88
|
+
let { files } = commit;
|
|
89
|
+
if (files.length > maxFilesPerCommit) continue;
|
|
90
|
+
|
|
91
|
+
if (knownFiles) {
|
|
92
|
+
files = files.filter((f) => knownFiles.has(f));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Count per-file commits
|
|
96
|
+
for (const f of files) {
|
|
97
|
+
fileCommitCounts.set(f, (fileCommitCounts.get(f) || 0) + 1);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Generate all unique pairs (canonical: a < b)
|
|
101
|
+
const sorted = [...new Set(files)].sort();
|
|
102
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
103
|
+
for (let j = i + 1; j < sorted.length; j++) {
|
|
104
|
+
const key = `${sorted[i]}\0${sorted[j]}`;
|
|
105
|
+
pairCounts.set(key, (pairCounts.get(key) || 0) + 1);
|
|
106
|
+
const prev = pairLastEpoch.get(key) || 0;
|
|
107
|
+
if (commit.epoch > prev) pairLastEpoch.set(key, commit.epoch);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Filter by minSupport and compute Jaccard
|
|
113
|
+
const results = new Map();
|
|
114
|
+
for (const [key, count] of pairCounts) {
|
|
115
|
+
if (count < minSupport) continue;
|
|
116
|
+
const [fileA, fileB] = key.split('\0');
|
|
117
|
+
const countA = fileCommitCounts.get(fileA) || 0;
|
|
118
|
+
const countB = fileCommitCounts.get(fileB) || 0;
|
|
119
|
+
const jaccard = count / (countA + countB - count);
|
|
120
|
+
results.set(key, {
|
|
121
|
+
commitCount: count,
|
|
122
|
+
jaccard,
|
|
123
|
+
lastEpoch: pairLastEpoch.get(key) || 0,
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return { pairs: results, fileCommitCounts };
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Analyze git history and populate co-change data in the database.
|
|
132
|
+
* @param {string} [customDbPath] - Path to graph.db
|
|
133
|
+
* @param {object} [opts]
|
|
134
|
+
* @param {string} [opts.since] - Git date expression
|
|
135
|
+
* @param {number} [opts.minSupport] - Minimum co-occurrence count
|
|
136
|
+
* @param {number} [opts.maxFilesPerCommit] - Max files per commit
|
|
137
|
+
* @param {boolean} [opts.full] - Force full re-scan
|
|
138
|
+
* @returns {{ pairsFound: number, commitsScanned: number, since: string, minSupport: number }}
|
|
139
|
+
*/
|
|
140
|
+
export function analyzeCoChanges(customDbPath, opts = {}) {
|
|
141
|
+
const dbPath = findDbPath(customDbPath);
|
|
142
|
+
const db = openDb(dbPath);
|
|
143
|
+
initSchema(db);
|
|
144
|
+
|
|
145
|
+
const repoRoot = path.resolve(path.dirname(dbPath), '..');
|
|
146
|
+
|
|
147
|
+
if (!fs.existsSync(path.join(repoRoot, '.git'))) {
|
|
148
|
+
db.close();
|
|
149
|
+
return { error: `Not a git repository: ${repoRoot}` };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const since = opts.since || '1 year ago';
|
|
153
|
+
const minSupport = opts.minSupport ?? 3;
|
|
154
|
+
const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50;
|
|
155
|
+
|
|
156
|
+
// Check for incremental state
|
|
157
|
+
let afterSha = null;
|
|
158
|
+
if (!opts.full) {
|
|
159
|
+
try {
|
|
160
|
+
const row = db
|
|
161
|
+
.prepare("SELECT value FROM co_change_meta WHERE key = 'last_analyzed_commit'")
|
|
162
|
+
.get();
|
|
163
|
+
if (row) afterSha = row.value;
|
|
164
|
+
} catch {
|
|
165
|
+
/* table may not exist yet */
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// If full re-scan, clear existing data
|
|
170
|
+
if (opts.full) {
|
|
171
|
+
db.exec('DELETE FROM co_changes');
|
|
172
|
+
db.exec('DELETE FROM co_change_meta');
|
|
173
|
+
db.exec('DELETE FROM file_commit_counts');
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Collect known files from the graph for filtering
|
|
177
|
+
let knownFiles = null;
|
|
178
|
+
try {
|
|
179
|
+
const rows = db.prepare('SELECT DISTINCT file FROM nodes').all();
|
|
180
|
+
knownFiles = new Set(rows.map((r) => r.file));
|
|
181
|
+
} catch {
|
|
182
|
+
/* nodes table may not exist */
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const { commits } = scanGitHistory(repoRoot, { since, afterSha });
|
|
186
|
+
const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, {
|
|
187
|
+
minSupport,
|
|
188
|
+
maxFilesPerCommit,
|
|
189
|
+
knownFiles,
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
// Upsert per-file commit counts so Jaccard can be recomputed from totals
|
|
193
|
+
const fileCountUpsert = db.prepare(`
|
|
194
|
+
INSERT INTO file_commit_counts (file, commit_count) VALUES (?, ?)
|
|
195
|
+
ON CONFLICT(file) DO UPDATE SET commit_count = commit_count + excluded.commit_count
|
|
196
|
+
`);
|
|
197
|
+
|
|
198
|
+
// Upsert pair counts (accumulate commit_count, jaccard placeholder — recomputed below)
|
|
199
|
+
const pairUpsert = db.prepare(`
|
|
200
|
+
INSERT INTO co_changes (file_a, file_b, commit_count, jaccard, last_commit_epoch)
|
|
201
|
+
VALUES (?, ?, ?, 0, ?)
|
|
202
|
+
ON CONFLICT(file_a, file_b) DO UPDATE SET
|
|
203
|
+
commit_count = commit_count + excluded.commit_count,
|
|
204
|
+
last_commit_epoch = MAX(co_changes.last_commit_epoch, excluded.last_commit_epoch)
|
|
205
|
+
`);
|
|
206
|
+
|
|
207
|
+
const insertMany = db.transaction(() => {
|
|
208
|
+
for (const [file, count] of fileCommitCounts) {
|
|
209
|
+
fileCountUpsert.run(file, count);
|
|
210
|
+
}
|
|
211
|
+
for (const [key, data] of coChanges) {
|
|
212
|
+
const [fileA, fileB] = key.split('\0');
|
|
213
|
+
pairUpsert.run(fileA, fileB, data.commitCount, data.lastEpoch);
|
|
214
|
+
}
|
|
215
|
+
});
|
|
216
|
+
insertMany();
|
|
217
|
+
|
|
218
|
+
// Recompute Jaccard for all affected pairs from total file commit counts
|
|
219
|
+
const affectedFiles = [...fileCommitCounts.keys()];
|
|
220
|
+
if (affectedFiles.length > 0) {
|
|
221
|
+
const ph = affectedFiles.map(() => '?').join(',');
|
|
222
|
+
db.prepare(`
|
|
223
|
+
UPDATE co_changes SET jaccard = (
|
|
224
|
+
SELECT CAST(co_changes.commit_count AS REAL) / (
|
|
225
|
+
COALESCE(fa.commit_count, 0) + COALESCE(fb.commit_count, 0) - co_changes.commit_count
|
|
226
|
+
)
|
|
227
|
+
FROM file_commit_counts fa, file_commit_counts fb
|
|
228
|
+
WHERE fa.file = co_changes.file_a AND fb.file = co_changes.file_b
|
|
229
|
+
)
|
|
230
|
+
WHERE file_a IN (${ph}) OR file_b IN (${ph})
|
|
231
|
+
`).run(...affectedFiles, ...affectedFiles);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Update metadata
|
|
235
|
+
const metaUpsert = db.prepare(`
|
|
236
|
+
INSERT INTO co_change_meta (key, value) VALUES (?, ?)
|
|
237
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value
|
|
238
|
+
`);
|
|
239
|
+
if (commits.length > 0) {
|
|
240
|
+
metaUpsert.run('last_analyzed_commit', commits[0].sha);
|
|
241
|
+
}
|
|
242
|
+
metaUpsert.run('analyzed_at', new Date().toISOString());
|
|
243
|
+
metaUpsert.run('since', since);
|
|
244
|
+
metaUpsert.run('min_support', String(minSupport));
|
|
245
|
+
|
|
246
|
+
const totalPairs = db.prepare('SELECT COUNT(*) as cnt FROM co_changes').get().cnt;
|
|
247
|
+
|
|
248
|
+
db.close();
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
pairsFound: totalPairs,
|
|
252
|
+
commitsScanned: commits.length,
|
|
253
|
+
since,
|
|
254
|
+
minSupport,
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Query co-change partners for a specific file.
|
|
260
|
+
* @param {string} file - File path (partial match supported)
|
|
261
|
+
* @param {string} [customDbPath]
|
|
262
|
+
* @param {object} [opts]
|
|
263
|
+
* @param {number} [opts.limit=20]
|
|
264
|
+
* @param {number} [opts.minJaccard=0.3]
|
|
265
|
+
* @param {boolean} [opts.noTests]
|
|
266
|
+
* @returns {{ file: string, partners: Array, meta: object }}
|
|
267
|
+
*/
|
|
268
|
+
export function coChangeData(file, customDbPath, opts = {}) {
|
|
269
|
+
const db = openReadonlyOrFail(customDbPath);
|
|
270
|
+
const limit = opts.limit || 20;
|
|
271
|
+
const minJaccard = opts.minJaccard ?? 0.3;
|
|
272
|
+
const noTests = opts.noTests || false;
|
|
273
|
+
|
|
274
|
+
// Check if co_changes table exists
|
|
275
|
+
try {
|
|
276
|
+
db.prepare('SELECT 1 FROM co_changes LIMIT 1').get();
|
|
277
|
+
} catch {
|
|
278
|
+
db.close();
|
|
279
|
+
return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' };
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Resolve file via partial match
|
|
283
|
+
const resolvedFile = resolveCoChangeFile(db, file);
|
|
284
|
+
if (!resolvedFile) {
|
|
285
|
+
db.close();
|
|
286
|
+
return { error: `No co-change data found for file matching "${file}"` };
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const rows = db
|
|
290
|
+
.prepare(
|
|
291
|
+
`SELECT file_a, file_b, commit_count, jaccard, last_commit_epoch
|
|
292
|
+
FROM co_changes
|
|
293
|
+
WHERE (file_a = ? OR file_b = ?) AND jaccard >= ?
|
|
294
|
+
ORDER BY jaccard DESC`,
|
|
295
|
+
)
|
|
296
|
+
.all(resolvedFile, resolvedFile, minJaccard);
|
|
297
|
+
|
|
298
|
+
const partners = [];
|
|
299
|
+
for (const row of rows) {
|
|
300
|
+
const partner = row.file_a === resolvedFile ? row.file_b : row.file_a;
|
|
301
|
+
if (noTests && isTestFile(partner)) continue;
|
|
302
|
+
partners.push({
|
|
303
|
+
file: partner,
|
|
304
|
+
commitCount: row.commit_count,
|
|
305
|
+
jaccard: row.jaccard,
|
|
306
|
+
lastCommitDate: row.last_commit_epoch
|
|
307
|
+
? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10)
|
|
308
|
+
: null,
|
|
309
|
+
});
|
|
310
|
+
if (partners.length >= limit) break;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
const meta = getCoChangeMeta(db);
|
|
314
|
+
db.close();
|
|
315
|
+
|
|
316
|
+
return { file: resolvedFile, partners, meta };
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Query top global co-change pairs.
|
|
321
|
+
* @param {string} [customDbPath]
|
|
322
|
+
* @param {object} [opts]
|
|
323
|
+
* @param {number} [opts.limit=20]
|
|
324
|
+
* @param {number} [opts.minJaccard=0.3]
|
|
325
|
+
* @param {boolean} [opts.noTests]
|
|
326
|
+
* @returns {{ pairs: Array, meta: object }}
|
|
327
|
+
*/
|
|
328
|
+
export function coChangeTopData(customDbPath, opts = {}) {
|
|
329
|
+
const db = openReadonlyOrFail(customDbPath);
|
|
330
|
+
const limit = opts.limit || 20;
|
|
331
|
+
const minJaccard = opts.minJaccard ?? 0.3;
|
|
332
|
+
const noTests = opts.noTests || false;
|
|
333
|
+
|
|
334
|
+
try {
|
|
335
|
+
db.prepare('SELECT 1 FROM co_changes LIMIT 1').get();
|
|
336
|
+
} catch {
|
|
337
|
+
db.close();
|
|
338
|
+
return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' };
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const rows = db
|
|
342
|
+
.prepare(
|
|
343
|
+
`SELECT file_a, file_b, commit_count, jaccard, last_commit_epoch
|
|
344
|
+
FROM co_changes
|
|
345
|
+
WHERE jaccard >= ?
|
|
346
|
+
ORDER BY jaccard DESC`,
|
|
347
|
+
)
|
|
348
|
+
.all(minJaccard);
|
|
349
|
+
|
|
350
|
+
const pairs = [];
|
|
351
|
+
for (const row of rows) {
|
|
352
|
+
if (noTests && (isTestFile(row.file_a) || isTestFile(row.file_b))) continue;
|
|
353
|
+
pairs.push({
|
|
354
|
+
fileA: row.file_a,
|
|
355
|
+
fileB: row.file_b,
|
|
356
|
+
commitCount: row.commit_count,
|
|
357
|
+
jaccard: row.jaccard,
|
|
358
|
+
lastCommitDate: row.last_commit_epoch
|
|
359
|
+
? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10)
|
|
360
|
+
: null,
|
|
361
|
+
});
|
|
362
|
+
if (pairs.length >= limit) break;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
const meta = getCoChangeMeta(db);
|
|
366
|
+
db.close();
|
|
367
|
+
|
|
368
|
+
return { pairs, meta };
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Batch-query co-change partners for a set of files.
|
|
373
|
+
* Takes an already-open readonly DB handle (for diff-impact integration).
|
|
374
|
+
* @param {string[]} files - File paths to query
|
|
375
|
+
* @param {import('better-sqlite3').Database} db - Already-open DB handle
|
|
376
|
+
* @param {object} [opts]
|
|
377
|
+
* @param {number} [opts.minJaccard=0.3]
|
|
378
|
+
* @param {number} [opts.limit=20]
|
|
379
|
+
* @param {boolean} [opts.noTests]
|
|
380
|
+
* @returns {Array<{file: string, coupledWith: string, commitCount: number, jaccard: number}>}
|
|
381
|
+
*/
|
|
382
|
+
export function coChangeForFiles(files, db, opts = {}) {
|
|
383
|
+
const minJaccard = opts.minJaccard ?? 0.3;
|
|
384
|
+
const limit = opts.limit ?? 20;
|
|
385
|
+
const noTests = opts.noTests || false;
|
|
386
|
+
const inputSet = new Set(files);
|
|
387
|
+
|
|
388
|
+
if (files.length === 0) return [];
|
|
389
|
+
|
|
390
|
+
const placeholders = files.map(() => '?').join(',');
|
|
391
|
+
const rows = db
|
|
392
|
+
.prepare(
|
|
393
|
+
`SELECT file_a, file_b, commit_count, jaccard
|
|
394
|
+
FROM co_changes
|
|
395
|
+
WHERE (file_a IN (${placeholders}) OR file_b IN (${placeholders}))
|
|
396
|
+
AND jaccard >= ?
|
|
397
|
+
ORDER BY jaccard DESC
|
|
398
|
+
LIMIT ?`,
|
|
399
|
+
)
|
|
400
|
+
.all(...files, ...files, minJaccard, limit);
|
|
401
|
+
|
|
402
|
+
const results = [];
|
|
403
|
+
for (const row of rows) {
|
|
404
|
+
const partner = inputSet.has(row.file_a) ? row.file_b : row.file_a;
|
|
405
|
+
const source = inputSet.has(row.file_a) ? row.file_a : row.file_b;
|
|
406
|
+
if (inputSet.has(partner)) continue;
|
|
407
|
+
if (noTests && isTestFile(partner)) continue;
|
|
408
|
+
results.push({
|
|
409
|
+
file: partner,
|
|
410
|
+
coupledWith: source,
|
|
411
|
+
commitCount: row.commit_count,
|
|
412
|
+
jaccard: row.jaccard,
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
return results;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Format co-change data for CLI output (single file).
|
|
421
|
+
*/
|
|
422
|
+
export function formatCoChange(data) {
|
|
423
|
+
if (data.error) return data.error;
|
|
424
|
+
if (data.partners.length === 0) return `No co-change partners found for ${data.file}`;
|
|
425
|
+
|
|
426
|
+
const lines = [`\nCo-change partners for ${data.file}:\n`];
|
|
427
|
+
for (const p of data.partners) {
|
|
428
|
+
const pct = `${(p.jaccard * 100).toFixed(0)}%`.padStart(4);
|
|
429
|
+
const commits = `${p.commitCount} commits`.padStart(12);
|
|
430
|
+
lines.push(` ${pct} ${commits} ${p.file}`);
|
|
431
|
+
}
|
|
432
|
+
if (data.meta?.analyzedAt) {
|
|
433
|
+
lines.push(`\n Analyzed: ${data.meta.analyzedAt} | Window: ${data.meta.since || 'all'}`);
|
|
434
|
+
}
|
|
435
|
+
return lines.join('\n');
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Format top co-change pairs for CLI output (global view).
|
|
440
|
+
*/
|
|
441
|
+
export function formatCoChangeTop(data) {
|
|
442
|
+
if (data.error) return data.error;
|
|
443
|
+
if (data.pairs.length === 0) return 'No co-change pairs found.';
|
|
444
|
+
|
|
445
|
+
const lines = ['\nTop co-change pairs:\n'];
|
|
446
|
+
for (const p of data.pairs) {
|
|
447
|
+
const pct = `${(p.jaccard * 100).toFixed(0)}%`.padStart(4);
|
|
448
|
+
const commits = `${p.commitCount} commits`.padStart(12);
|
|
449
|
+
lines.push(` ${pct} ${commits} ${p.fileA} <-> ${p.fileB}`);
|
|
450
|
+
}
|
|
451
|
+
if (data.meta?.analyzedAt) {
|
|
452
|
+
lines.push(`\n Analyzed: ${data.meta.analyzedAt} | Window: ${data.meta.since || 'all'}`);
|
|
453
|
+
}
|
|
454
|
+
return lines.join('\n');
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// ─── Internal Helpers ────────────────────────────────────────────────────
|
|
458
|
+
|
|
459
|
+
function resolveCoChangeFile(db, file) {
|
|
460
|
+
// Exact match first
|
|
461
|
+
const exact = db
|
|
462
|
+
.prepare(
|
|
463
|
+
'SELECT file_a FROM co_changes WHERE file_a = ? UNION SELECT file_b FROM co_changes WHERE file_b = ? LIMIT 1',
|
|
464
|
+
)
|
|
465
|
+
.get(file, file);
|
|
466
|
+
if (exact) return exact.file_a;
|
|
467
|
+
|
|
468
|
+
// Partial match (ends with)
|
|
469
|
+
const partial = db
|
|
470
|
+
.prepare(
|
|
471
|
+
`SELECT file_a AS file FROM co_changes WHERE file_a LIKE ?
|
|
472
|
+
UNION
|
|
473
|
+
SELECT file_b AS file FROM co_changes WHERE file_b LIKE ?
|
|
474
|
+
LIMIT 1`,
|
|
475
|
+
)
|
|
476
|
+
.get(`%${file}`, `%${file}`);
|
|
477
|
+
if (partial) return partial.file;
|
|
478
|
+
|
|
479
|
+
return null;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
function getCoChangeMeta(db) {
|
|
483
|
+
try {
|
|
484
|
+
const rows = db.prepare('SELECT key, value FROM co_change_meta').all();
|
|
485
|
+
const meta = {};
|
|
486
|
+
for (const row of rows) {
|
|
487
|
+
meta[row.key] = row.value;
|
|
488
|
+
}
|
|
489
|
+
return {
|
|
490
|
+
analyzedAt: meta.analyzed_at || null,
|
|
491
|
+
since: meta.since || null,
|
|
492
|
+
minSupport: meta.min_support ? parseInt(meta.min_support, 10) : null,
|
|
493
|
+
lastCommit: meta.last_analyzed_commit || null,
|
|
494
|
+
};
|
|
495
|
+
} catch {
|
|
496
|
+
return null;
|
|
497
|
+
}
|
|
498
|
+
}
|
package/src/config.js
CHANGED
|
@@ -24,6 +24,12 @@ export const DEFAULTS = {
|
|
|
24
24
|
llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },
|
|
25
25
|
search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 },
|
|
26
26
|
ci: { failOnCycles: false, impactThreshold: null },
|
|
27
|
+
coChange: {
|
|
28
|
+
since: '1 year ago',
|
|
29
|
+
minSupport: 3,
|
|
30
|
+
minJaccard: 0.3,
|
|
31
|
+
maxFilesPerCommit: 50,
|
|
32
|
+
},
|
|
27
33
|
};
|
|
28
34
|
|
|
29
35
|
/**
|
package/src/db.js
CHANGED
|
@@ -71,6 +71,36 @@ export const MIGRATIONS = [
|
|
|
71
71
|
version: 4,
|
|
72
72
|
up: `ALTER TABLE file_hashes ADD COLUMN size INTEGER DEFAULT 0;`,
|
|
73
73
|
},
|
|
74
|
+
{
|
|
75
|
+
version: 5,
|
|
76
|
+
up: `
|
|
77
|
+
CREATE TABLE IF NOT EXISTS co_changes (
|
|
78
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
79
|
+
file_a TEXT NOT NULL,
|
|
80
|
+
file_b TEXT NOT NULL,
|
|
81
|
+
commit_count INTEGER NOT NULL,
|
|
82
|
+
jaccard REAL NOT NULL,
|
|
83
|
+
last_commit_epoch INTEGER,
|
|
84
|
+
UNIQUE(file_a, file_b)
|
|
85
|
+
);
|
|
86
|
+
CREATE INDEX IF NOT EXISTS idx_co_changes_file_a ON co_changes(file_a);
|
|
87
|
+
CREATE INDEX IF NOT EXISTS idx_co_changes_file_b ON co_changes(file_b);
|
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_co_changes_jaccard ON co_changes(jaccard DESC);
|
|
89
|
+
CREATE TABLE IF NOT EXISTS co_change_meta (
|
|
90
|
+
key TEXT PRIMARY KEY,
|
|
91
|
+
value TEXT NOT NULL
|
|
92
|
+
);
|
|
93
|
+
`,
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
version: 6,
|
|
97
|
+
up: `
|
|
98
|
+
CREATE TABLE IF NOT EXISTS file_commit_counts (
|
|
99
|
+
file TEXT PRIMARY KEY,
|
|
100
|
+
commit_count INTEGER NOT NULL DEFAULT 0
|
|
101
|
+
);
|
|
102
|
+
`,
|
|
103
|
+
},
|
|
74
104
|
];
|
|
75
105
|
|
|
76
106
|
export function openDb(dbPath) {
|
|
@@ -115,6 +145,16 @@ export function initSchema(db) {
|
|
|
115
145
|
} catch {
|
|
116
146
|
/* already exists */
|
|
117
147
|
}
|
|
148
|
+
try {
|
|
149
|
+
db.exec('ALTER TABLE nodes ADD COLUMN role TEXT');
|
|
150
|
+
} catch {
|
|
151
|
+
/* already exists */
|
|
152
|
+
}
|
|
153
|
+
try {
|
|
154
|
+
db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)');
|
|
155
|
+
} catch {
|
|
156
|
+
/* already exists */
|
|
157
|
+
}
|
|
118
158
|
}
|
|
119
159
|
|
|
120
160
|
export function findDbPath(customPath) {
|
package/src/embedder.js
CHANGED
|
@@ -16,6 +16,28 @@ function splitIdentifier(name) {
|
|
|
16
16
|
.trim();
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
+
/**
|
|
20
|
+
* Match a file path against a glob pattern.
|
|
21
|
+
* Supports *, **, and ? wildcards. Zero dependencies.
|
|
22
|
+
*/
|
|
23
|
+
function globMatch(filePath, pattern) {
|
|
24
|
+
// Normalize separators to forward slashes
|
|
25
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
26
|
+
// Escape regex specials except glob chars
|
|
27
|
+
let regex = pattern.replace(/\\/g, '/').replace(/[.+^${}()|[\]\\]/g, '\\$&');
|
|
28
|
+
// Replace ** first (matches any path segment), then * and ?
|
|
29
|
+
regex = regex.replace(/\*\*/g, '\0');
|
|
30
|
+
regex = regex.replace(/\*/g, '[^/]*');
|
|
31
|
+
regex = regex.replace(/\0/g, '.*');
|
|
32
|
+
regex = regex.replace(/\?/g, '[^/]');
|
|
33
|
+
try {
|
|
34
|
+
return new RegExp(`^${regex}$`).test(normalized);
|
|
35
|
+
} catch {
|
|
36
|
+
// Malformed pattern — fall back to substring match
|
|
37
|
+
return normalized.includes(pattern);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
19
41
|
// Lazy-load transformers (heavy, optional module)
|
|
20
42
|
let pipeline = null;
|
|
21
43
|
let _cos_sim = null;
|
|
@@ -76,7 +98,7 @@ export const MODELS = {
|
|
|
76
98
|
|
|
77
99
|
export const EMBEDDING_STRATEGIES = ['structured', 'source'];
|
|
78
100
|
|
|
79
|
-
export const DEFAULT_MODEL = '
|
|
101
|
+
export const DEFAULT_MODEL = 'nomic-v1.5';
|
|
80
102
|
const BATCH_SIZE_MAP = {
|
|
81
103
|
minilm: 32,
|
|
82
104
|
'jina-small': 16,
|
|
@@ -216,11 +238,26 @@ async function loadTransformers() {
|
|
|
216
238
|
}
|
|
217
239
|
}
|
|
218
240
|
|
|
241
|
+
/**
|
|
242
|
+
* Dispose the current ONNX session and free memory.
|
|
243
|
+
* Safe to call when no model is loaded (no-op).
|
|
244
|
+
*/
|
|
245
|
+
export async function disposeModel() {
|
|
246
|
+
if (extractor) {
|
|
247
|
+
await extractor.dispose();
|
|
248
|
+
extractor = null;
|
|
249
|
+
}
|
|
250
|
+
activeModel = null;
|
|
251
|
+
}
|
|
252
|
+
|
|
219
253
|
async function loadModel(modelKey) {
|
|
220
254
|
const config = getModelConfig(modelKey);
|
|
221
255
|
|
|
222
256
|
if (extractor && activeModel === config.name) return { extractor, config };
|
|
223
257
|
|
|
258
|
+
// Dispose previous model before loading a different one
|
|
259
|
+
await disposeModel();
|
|
260
|
+
|
|
224
261
|
const transformers = await loadTransformers();
|
|
225
262
|
pipeline = transformers.pipeline;
|
|
226
263
|
_cos_sim = transformers.cos_sim;
|
|
@@ -496,7 +533,8 @@ function _prepareSearch(customDbPath, opts = {}) {
|
|
|
496
533
|
conditions.push('n.kind = ?');
|
|
497
534
|
params.push(opts.kind);
|
|
498
535
|
}
|
|
499
|
-
|
|
536
|
+
const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
|
|
537
|
+
if (opts.filePattern && !isGlob) {
|
|
500
538
|
conditions.push('n.file LIKE ?');
|
|
501
539
|
params.push(`%${opts.filePattern}%`);
|
|
502
540
|
}
|
|
@@ -505,6 +543,9 @@ function _prepareSearch(customDbPath, opts = {}) {
|
|
|
505
543
|
}
|
|
506
544
|
|
|
507
545
|
let rows = db.prepare(sql).all(...params);
|
|
546
|
+
if (isGlob) {
|
|
547
|
+
rows = rows.filter((row) => globMatch(row.file, opts.filePattern));
|
|
548
|
+
}
|
|
508
549
|
if (noTests) {
|
|
509
550
|
rows = rows.filter((row) => !TEST_PATTERN.test(row.file));
|
|
510
551
|
}
|
|
@@ -668,6 +709,11 @@ export async function search(query, customDbPath, opts = {}) {
|
|
|
668
709
|
const data = await searchData(singleQuery, customDbPath, opts);
|
|
669
710
|
if (!data) return;
|
|
670
711
|
|
|
712
|
+
if (opts.json) {
|
|
713
|
+
console.log(JSON.stringify(data, null, 2));
|
|
714
|
+
return;
|
|
715
|
+
}
|
|
716
|
+
|
|
671
717
|
console.log(`\nSemantic search: "${singleQuery}"\n`);
|
|
672
718
|
|
|
673
719
|
if (data.results.length === 0) {
|
|
@@ -687,6 +733,11 @@ export async function search(query, customDbPath, opts = {}) {
|
|
|
687
733
|
const data = await multiSearchData(queries, customDbPath, opts);
|
|
688
734
|
if (!data) return;
|
|
689
735
|
|
|
736
|
+
if (opts.json) {
|
|
737
|
+
console.log(JSON.stringify(data, null, 2));
|
|
738
|
+
return;
|
|
739
|
+
}
|
|
740
|
+
|
|
690
741
|
console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`);
|
|
691
742
|
queries.forEach((q, i) => {
|
|
692
743
|
console.log(` [${i + 1}] "${q}"`);
|