@optave/codegraph 2.2.3-dev.44e8146 → 2.3.1-dev.1aeea34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,498 @@
1
+ /**
2
+ * Git co-change analysis — surfaces files that historically change together.
3
+ *
4
+ * Uses git log to find temporal coupling between files, computes Jaccard
5
+ * similarity coefficients, and stores results in the codegraph database.
6
+ */
7
+
8
+ import { execFileSync } from 'node:child_process';
9
+ import fs from 'node:fs';
10
+ import path from 'node:path';
11
+ import { normalizePath } from './constants.js';
12
+ import { findDbPath, initSchema, openDb, openReadonlyOrFail } from './db.js';
13
+ import { warn } from './logger.js';
14
+ import { isTestFile } from './queries.js';
15
+
16
+ /**
17
+ * Scan git history and return parsed commit data.
18
+ * @param {string} repoRoot - Absolute path to the git repo root
19
+ * @param {object} [opts]
20
+ * @param {string} [opts.since] - Git date expression (e.g. "1 year ago")
21
+ * @param {string} [opts.afterSha] - Only include commits after this SHA
22
+ * @returns {{ commits: Array<{sha: string, epoch: number, files: string[]}> }}
23
+ */
24
+ export function scanGitHistory(repoRoot, opts = {}) {
25
+ const args = [
26
+ 'log',
27
+ '--name-only',
28
+ '--pretty=format:%H%n%at',
29
+ '--no-merges',
30
+ '--diff-filter=AMRC',
31
+ ];
32
+ if (opts.since) args.push(`--since=${opts.since}`);
33
+ if (opts.afterSha) args.push(`${opts.afterSha}..HEAD`);
34
+ args.push('--', '.');
35
+
36
+ let output;
37
+ try {
38
+ output = execFileSync('git', args, {
39
+ cwd: repoRoot,
40
+ encoding: 'utf-8',
41
+ maxBuffer: 50 * 1024 * 1024,
42
+ stdio: ['pipe', 'pipe', 'pipe'],
43
+ });
44
+ } catch (e) {
45
+ warn(`Failed to scan git history: ${e.message}`);
46
+ return { commits: [] };
47
+ }
48
+
49
+ if (!output.trim()) return { commits: [] };
50
+
51
+ const commits = [];
52
+ // Split on double newlines to get blocks; each block is sha\nepoch\nfile1\nfile2...
53
+ const blocks = output.trim().split(/\n\n+/);
54
+ for (const block of blocks) {
55
+ const lines = block.split('\n').filter((l) => l.length > 0);
56
+ if (lines.length < 2) continue;
57
+ const sha = lines[0];
58
+ const epoch = parseInt(lines[1], 10);
59
+ if (Number.isNaN(epoch)) continue;
60
+ const files = lines.slice(2).map((f) => normalizePath(f));
61
+ if (files.length > 0) {
62
+ commits.push({ sha, epoch, files });
63
+ }
64
+ }
65
+
66
+ return { commits };
67
+ }
68
+
69
+ /**
70
+ * Compute co-change pairs from parsed commit data.
71
+ * @param {Array<{sha: string, epoch: number, files: string[]}>} commits
72
+ * @param {object} [opts]
73
+ * @param {number} [opts.minSupport=3] - Minimum number of co-occurrences
74
+ * @param {number} [opts.maxFilesPerCommit=50] - Skip commits with too many files
75
+ * @param {Set<string>} [opts.knownFiles] - If provided, only include pairs where both files are in this set
76
+ * @returns {Map<string, {commitCount: number, jaccard: number, lastEpoch: number}>}
77
+ */
78
+ export function computeCoChanges(commits, opts = {}) {
79
+ const minSupport = opts.minSupport ?? 3;
80
+ const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50;
81
+ const knownFiles = opts.knownFiles || null;
82
+
83
+ const fileCommitCounts = new Map();
84
+ const pairCounts = new Map();
85
+ const pairLastEpoch = new Map();
86
+
87
+ for (const commit of commits) {
88
+ let { files } = commit;
89
+ if (files.length > maxFilesPerCommit) continue;
90
+
91
+ if (knownFiles) {
92
+ files = files.filter((f) => knownFiles.has(f));
93
+ }
94
+
95
+ // Count per-file commits
96
+ for (const f of files) {
97
+ fileCommitCounts.set(f, (fileCommitCounts.get(f) || 0) + 1);
98
+ }
99
+
100
+ // Generate all unique pairs (canonical: a < b)
101
+ const sorted = [...new Set(files)].sort();
102
+ for (let i = 0; i < sorted.length; i++) {
103
+ for (let j = i + 1; j < sorted.length; j++) {
104
+ const key = `${sorted[i]}\0${sorted[j]}`;
105
+ pairCounts.set(key, (pairCounts.get(key) || 0) + 1);
106
+ const prev = pairLastEpoch.get(key) || 0;
107
+ if (commit.epoch > prev) pairLastEpoch.set(key, commit.epoch);
108
+ }
109
+ }
110
+ }
111
+
112
+ // Filter by minSupport and compute Jaccard
113
+ const results = new Map();
114
+ for (const [key, count] of pairCounts) {
115
+ if (count < minSupport) continue;
116
+ const [fileA, fileB] = key.split('\0');
117
+ const countA = fileCommitCounts.get(fileA) || 0;
118
+ const countB = fileCommitCounts.get(fileB) || 0;
119
+ const jaccard = count / (countA + countB - count);
120
+ results.set(key, {
121
+ commitCount: count,
122
+ jaccard,
123
+ lastEpoch: pairLastEpoch.get(key) || 0,
124
+ });
125
+ }
126
+
127
+ return { pairs: results, fileCommitCounts };
128
+ }
129
+
130
+ /**
131
+ * Analyze git history and populate co-change data in the database.
132
+ * @param {string} [customDbPath] - Path to graph.db
133
+ * @param {object} [opts]
134
+ * @param {string} [opts.since] - Git date expression
135
+ * @param {number} [opts.minSupport] - Minimum co-occurrence count
136
+ * @param {number} [opts.maxFilesPerCommit] - Max files per commit
137
+ * @param {boolean} [opts.full] - Force full re-scan
138
+ * @returns {{ pairsFound: number, commitsScanned: number, since: string, minSupport: number }}
139
+ */
140
+ export function analyzeCoChanges(customDbPath, opts = {}) {
141
+ const dbPath = findDbPath(customDbPath);
142
+ const db = openDb(dbPath);
143
+ initSchema(db);
144
+
145
+ const repoRoot = path.resolve(path.dirname(dbPath), '..');
146
+
147
+ if (!fs.existsSync(path.join(repoRoot, '.git'))) {
148
+ db.close();
149
+ return { error: `Not a git repository: ${repoRoot}` };
150
+ }
151
+
152
+ const since = opts.since || '1 year ago';
153
+ const minSupport = opts.minSupport ?? 3;
154
+ const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50;
155
+
156
+ // Check for incremental state
157
+ let afterSha = null;
158
+ if (!opts.full) {
159
+ try {
160
+ const row = db
161
+ .prepare("SELECT value FROM co_change_meta WHERE key = 'last_analyzed_commit'")
162
+ .get();
163
+ if (row) afterSha = row.value;
164
+ } catch {
165
+ /* table may not exist yet */
166
+ }
167
+ }
168
+
169
+ // If full re-scan, clear existing data
170
+ if (opts.full) {
171
+ db.exec('DELETE FROM co_changes');
172
+ db.exec('DELETE FROM co_change_meta');
173
+ db.exec('DELETE FROM file_commit_counts');
174
+ }
175
+
176
+ // Collect known files from the graph for filtering
177
+ let knownFiles = null;
178
+ try {
179
+ const rows = db.prepare('SELECT DISTINCT file FROM nodes').all();
180
+ knownFiles = new Set(rows.map((r) => r.file));
181
+ } catch {
182
+ /* nodes table may not exist */
183
+ }
184
+
185
+ const { commits } = scanGitHistory(repoRoot, { since, afterSha });
186
+ const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, {
187
+ minSupport,
188
+ maxFilesPerCommit,
189
+ knownFiles,
190
+ });
191
+
192
+ // Upsert per-file commit counts so Jaccard can be recomputed from totals
193
+ const fileCountUpsert = db.prepare(`
194
+ INSERT INTO file_commit_counts (file, commit_count) VALUES (?, ?)
195
+ ON CONFLICT(file) DO UPDATE SET commit_count = commit_count + excluded.commit_count
196
+ `);
197
+
198
+ // Upsert pair counts (accumulate commit_count, jaccard placeholder — recomputed below)
199
+ const pairUpsert = db.prepare(`
200
+ INSERT INTO co_changes (file_a, file_b, commit_count, jaccard, last_commit_epoch)
201
+ VALUES (?, ?, ?, 0, ?)
202
+ ON CONFLICT(file_a, file_b) DO UPDATE SET
203
+ commit_count = commit_count + excluded.commit_count,
204
+ last_commit_epoch = MAX(co_changes.last_commit_epoch, excluded.last_commit_epoch)
205
+ `);
206
+
207
+ const insertMany = db.transaction(() => {
208
+ for (const [file, count] of fileCommitCounts) {
209
+ fileCountUpsert.run(file, count);
210
+ }
211
+ for (const [key, data] of coChanges) {
212
+ const [fileA, fileB] = key.split('\0');
213
+ pairUpsert.run(fileA, fileB, data.commitCount, data.lastEpoch);
214
+ }
215
+ });
216
+ insertMany();
217
+
218
+ // Recompute Jaccard for all affected pairs from total file commit counts
219
+ const affectedFiles = [...fileCommitCounts.keys()];
220
+ if (affectedFiles.length > 0) {
221
+ const ph = affectedFiles.map(() => '?').join(',');
222
+ db.prepare(`
223
+ UPDATE co_changes SET jaccard = (
224
+ SELECT CAST(co_changes.commit_count AS REAL) / (
225
+ COALESCE(fa.commit_count, 0) + COALESCE(fb.commit_count, 0) - co_changes.commit_count
226
+ )
227
+ FROM file_commit_counts fa, file_commit_counts fb
228
+ WHERE fa.file = co_changes.file_a AND fb.file = co_changes.file_b
229
+ )
230
+ WHERE file_a IN (${ph}) OR file_b IN (${ph})
231
+ `).run(...affectedFiles, ...affectedFiles);
232
+ }
233
+
234
+ // Update metadata
235
+ const metaUpsert = db.prepare(`
236
+ INSERT INTO co_change_meta (key, value) VALUES (?, ?)
237
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value
238
+ `);
239
+ if (commits.length > 0) {
240
+ metaUpsert.run('last_analyzed_commit', commits[0].sha);
241
+ }
242
+ metaUpsert.run('analyzed_at', new Date().toISOString());
243
+ metaUpsert.run('since', since);
244
+ metaUpsert.run('min_support', String(minSupport));
245
+
246
+ const totalPairs = db.prepare('SELECT COUNT(*) as cnt FROM co_changes').get().cnt;
247
+
248
+ db.close();
249
+
250
+ return {
251
+ pairsFound: totalPairs,
252
+ commitsScanned: commits.length,
253
+ since,
254
+ minSupport,
255
+ };
256
+ }
257
+
258
+ /**
259
+ * Query co-change partners for a specific file.
260
+ * @param {string} file - File path (partial match supported)
261
+ * @param {string} [customDbPath]
262
+ * @param {object} [opts]
263
+ * @param {number} [opts.limit=20]
264
+ * @param {number} [opts.minJaccard=0.3]
265
+ * @param {boolean} [opts.noTests]
266
+ * @returns {{ file: string, partners: Array, meta: object }}
267
+ */
268
+ export function coChangeData(file, customDbPath, opts = {}) {
269
+ const db = openReadonlyOrFail(customDbPath);
270
+ const limit = opts.limit || 20;
271
+ const minJaccard = opts.minJaccard ?? 0.3;
272
+ const noTests = opts.noTests || false;
273
+
274
+ // Check if co_changes table exists
275
+ try {
276
+ db.prepare('SELECT 1 FROM co_changes LIMIT 1').get();
277
+ } catch {
278
+ db.close();
279
+ return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' };
280
+ }
281
+
282
+ // Resolve file via partial match
283
+ const resolvedFile = resolveCoChangeFile(db, file);
284
+ if (!resolvedFile) {
285
+ db.close();
286
+ return { error: `No co-change data found for file matching "${file}"` };
287
+ }
288
+
289
+ const rows = db
290
+ .prepare(
291
+ `SELECT file_a, file_b, commit_count, jaccard, last_commit_epoch
292
+ FROM co_changes
293
+ WHERE (file_a = ? OR file_b = ?) AND jaccard >= ?
294
+ ORDER BY jaccard DESC`,
295
+ )
296
+ .all(resolvedFile, resolvedFile, minJaccard);
297
+
298
+ const partners = [];
299
+ for (const row of rows) {
300
+ const partner = row.file_a === resolvedFile ? row.file_b : row.file_a;
301
+ if (noTests && isTestFile(partner)) continue;
302
+ partners.push({
303
+ file: partner,
304
+ commitCount: row.commit_count,
305
+ jaccard: row.jaccard,
306
+ lastCommitDate: row.last_commit_epoch
307
+ ? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10)
308
+ : null,
309
+ });
310
+ if (partners.length >= limit) break;
311
+ }
312
+
313
+ const meta = getCoChangeMeta(db);
314
+ db.close();
315
+
316
+ return { file: resolvedFile, partners, meta };
317
+ }
318
+
319
+ /**
320
+ * Query top global co-change pairs.
321
+ * @param {string} [customDbPath]
322
+ * @param {object} [opts]
323
+ * @param {number} [opts.limit=20]
324
+ * @param {number} [opts.minJaccard=0.3]
325
+ * @param {boolean} [opts.noTests]
326
+ * @returns {{ pairs: Array, meta: object }}
327
+ */
328
+ export function coChangeTopData(customDbPath, opts = {}) {
329
+ const db = openReadonlyOrFail(customDbPath);
330
+ const limit = opts.limit || 20;
331
+ const minJaccard = opts.minJaccard ?? 0.3;
332
+ const noTests = opts.noTests || false;
333
+
334
+ try {
335
+ db.prepare('SELECT 1 FROM co_changes LIMIT 1').get();
336
+ } catch {
337
+ db.close();
338
+ return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' };
339
+ }
340
+
341
+ const rows = db
342
+ .prepare(
343
+ `SELECT file_a, file_b, commit_count, jaccard, last_commit_epoch
344
+ FROM co_changes
345
+ WHERE jaccard >= ?
346
+ ORDER BY jaccard DESC`,
347
+ )
348
+ .all(minJaccard);
349
+
350
+ const pairs = [];
351
+ for (const row of rows) {
352
+ if (noTests && (isTestFile(row.file_a) || isTestFile(row.file_b))) continue;
353
+ pairs.push({
354
+ fileA: row.file_a,
355
+ fileB: row.file_b,
356
+ commitCount: row.commit_count,
357
+ jaccard: row.jaccard,
358
+ lastCommitDate: row.last_commit_epoch
359
+ ? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10)
360
+ : null,
361
+ });
362
+ if (pairs.length >= limit) break;
363
+ }
364
+
365
+ const meta = getCoChangeMeta(db);
366
+ db.close();
367
+
368
+ return { pairs, meta };
369
+ }
370
+
371
+ /**
372
+ * Batch-query co-change partners for a set of files.
373
+ * Takes an already-open readonly DB handle (for diff-impact integration).
374
+ * @param {string[]} files - File paths to query
375
+ * @param {import('better-sqlite3').Database} db - Already-open DB handle
376
+ * @param {object} [opts]
377
+ * @param {number} [opts.minJaccard=0.3]
378
+ * @param {number} [opts.limit=20]
379
+ * @param {boolean} [opts.noTests]
380
+ * @returns {Array<{file: string, coupledWith: string, commitCount: number, jaccard: number}>}
381
+ */
382
+ export function coChangeForFiles(files, db, opts = {}) {
383
+ const minJaccard = opts.minJaccard ?? 0.3;
384
+ const limit = opts.limit ?? 20;
385
+ const noTests = opts.noTests || false;
386
+ const inputSet = new Set(files);
387
+
388
+ if (files.length === 0) return [];
389
+
390
+ const placeholders = files.map(() => '?').join(',');
391
+ const rows = db
392
+ .prepare(
393
+ `SELECT file_a, file_b, commit_count, jaccard
394
+ FROM co_changes
395
+ WHERE (file_a IN (${placeholders}) OR file_b IN (${placeholders}))
396
+ AND jaccard >= ?
397
+ ORDER BY jaccard DESC
398
+ LIMIT ?`,
399
+ )
400
+ .all(...files, ...files, minJaccard, limit);
401
+
402
+ const results = [];
403
+ for (const row of rows) {
404
+ const partner = inputSet.has(row.file_a) ? row.file_b : row.file_a;
405
+ const source = inputSet.has(row.file_a) ? row.file_a : row.file_b;
406
+ if (inputSet.has(partner)) continue;
407
+ if (noTests && isTestFile(partner)) continue;
408
+ results.push({
409
+ file: partner,
410
+ coupledWith: source,
411
+ commitCount: row.commit_count,
412
+ jaccard: row.jaccard,
413
+ });
414
+ }
415
+
416
+ return results;
417
+ }
418
+
419
+ /**
420
+ * Format co-change data for CLI output (single file).
421
+ */
422
+ export function formatCoChange(data) {
423
+ if (data.error) return data.error;
424
+ if (data.partners.length === 0) return `No co-change partners found for ${data.file}`;
425
+
426
+ const lines = [`\nCo-change partners for ${data.file}:\n`];
427
+ for (const p of data.partners) {
428
+ const pct = `${(p.jaccard * 100).toFixed(0)}%`.padStart(4);
429
+ const commits = `${p.commitCount} commits`.padStart(12);
430
+ lines.push(` ${pct} ${commits} ${p.file}`);
431
+ }
432
+ if (data.meta?.analyzedAt) {
433
+ lines.push(`\n Analyzed: ${data.meta.analyzedAt} | Window: ${data.meta.since || 'all'}`);
434
+ }
435
+ return lines.join('\n');
436
+ }
437
+
438
+ /**
439
+ * Format top co-change pairs for CLI output (global view).
440
+ */
441
+ export function formatCoChangeTop(data) {
442
+ if (data.error) return data.error;
443
+ if (data.pairs.length === 0) return 'No co-change pairs found.';
444
+
445
+ const lines = ['\nTop co-change pairs:\n'];
446
+ for (const p of data.pairs) {
447
+ const pct = `${(p.jaccard * 100).toFixed(0)}%`.padStart(4);
448
+ const commits = `${p.commitCount} commits`.padStart(12);
449
+ lines.push(` ${pct} ${commits} ${p.fileA} <-> ${p.fileB}`);
450
+ }
451
+ if (data.meta?.analyzedAt) {
452
+ lines.push(`\n Analyzed: ${data.meta.analyzedAt} | Window: ${data.meta.since || 'all'}`);
453
+ }
454
+ return lines.join('\n');
455
+ }
456
+
457
+ // ─── Internal Helpers ────────────────────────────────────────────────────
458
+
459
+ function resolveCoChangeFile(db, file) {
460
+ // Exact match first
461
+ const exact = db
462
+ .prepare(
463
+ 'SELECT file_a FROM co_changes WHERE file_a = ? UNION SELECT file_b FROM co_changes WHERE file_b = ? LIMIT 1',
464
+ )
465
+ .get(file, file);
466
+ if (exact) return exact.file_a;
467
+
468
+ // Partial match (ends with)
469
+ const partial = db
470
+ .prepare(
471
+ `SELECT file_a AS file FROM co_changes WHERE file_a LIKE ?
472
+ UNION
473
+ SELECT file_b AS file FROM co_changes WHERE file_b LIKE ?
474
+ LIMIT 1`,
475
+ )
476
+ .get(`%${file}`, `%${file}`);
477
+ if (partial) return partial.file;
478
+
479
+ return null;
480
+ }
481
+
482
+ function getCoChangeMeta(db) {
483
+ try {
484
+ const rows = db.prepare('SELECT key, value FROM co_change_meta').all();
485
+ const meta = {};
486
+ for (const row of rows) {
487
+ meta[row.key] = row.value;
488
+ }
489
+ return {
490
+ analyzedAt: meta.analyzed_at || null,
491
+ since: meta.since || null,
492
+ minSupport: meta.min_support ? parseInt(meta.min_support, 10) : null,
493
+ lastCommit: meta.last_analyzed_commit || null,
494
+ };
495
+ } catch {
496
+ return null;
497
+ }
498
+ }
package/src/config.js CHANGED
@@ -24,6 +24,12 @@ export const DEFAULTS = {
24
24
  llm: { provider: null, model: null, baseUrl: null, apiKey: null, apiKeyCommand: null },
25
25
  search: { defaultMinScore: 0.2, rrfK: 60, topK: 15 },
26
26
  ci: { failOnCycles: false, impactThreshold: null },
27
+ coChange: {
28
+ since: '1 year ago',
29
+ minSupport: 3,
30
+ minJaccard: 0.3,
31
+ maxFilesPerCommit: 50,
32
+ },
27
33
  };
28
34
 
29
35
  /**
package/src/db.js CHANGED
@@ -71,6 +71,36 @@ export const MIGRATIONS = [
71
71
  version: 4,
72
72
  up: `ALTER TABLE file_hashes ADD COLUMN size INTEGER DEFAULT 0;`,
73
73
  },
74
+ {
75
+ version: 5,
76
+ up: `
77
+ CREATE TABLE IF NOT EXISTS co_changes (
78
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
79
+ file_a TEXT NOT NULL,
80
+ file_b TEXT NOT NULL,
81
+ commit_count INTEGER NOT NULL,
82
+ jaccard REAL NOT NULL,
83
+ last_commit_epoch INTEGER,
84
+ UNIQUE(file_a, file_b)
85
+ );
86
+ CREATE INDEX IF NOT EXISTS idx_co_changes_file_a ON co_changes(file_a);
87
+ CREATE INDEX IF NOT EXISTS idx_co_changes_file_b ON co_changes(file_b);
88
+ CREATE INDEX IF NOT EXISTS idx_co_changes_jaccard ON co_changes(jaccard DESC);
89
+ CREATE TABLE IF NOT EXISTS co_change_meta (
90
+ key TEXT PRIMARY KEY,
91
+ value TEXT NOT NULL
92
+ );
93
+ `,
94
+ },
95
+ {
96
+ version: 6,
97
+ up: `
98
+ CREATE TABLE IF NOT EXISTS file_commit_counts (
99
+ file TEXT PRIMARY KEY,
100
+ commit_count INTEGER NOT NULL DEFAULT 0
101
+ );
102
+ `,
103
+ },
74
104
  ];
75
105
 
76
106
  export function openDb(dbPath) {
@@ -115,6 +145,16 @@ export function initSchema(db) {
115
145
  } catch {
116
146
  /* already exists */
117
147
  }
148
+ try {
149
+ db.exec('ALTER TABLE nodes ADD COLUMN role TEXT');
150
+ } catch {
151
+ /* already exists */
152
+ }
153
+ try {
154
+ db.exec('CREATE INDEX IF NOT EXISTS idx_nodes_role ON nodes(role)');
155
+ } catch {
156
+ /* already exists */
157
+ }
118
158
  }
119
159
 
120
160
  export function findDbPath(customPath) {
package/src/embedder.js CHANGED
@@ -16,6 +16,28 @@ function splitIdentifier(name) {
16
16
  .trim();
17
17
  }
18
18
 
19
+ /**
20
+ * Match a file path against a glob pattern.
21
+ * Supports *, **, and ? wildcards. Zero dependencies.
22
+ */
23
+ function globMatch(filePath, pattern) {
24
+ // Normalize separators to forward slashes
25
+ const normalized = filePath.replace(/\\/g, '/');
26
+ // Escape regex specials except glob chars
27
+ let regex = pattern.replace(/\\/g, '/').replace(/[.+^${}()|[\]\\]/g, '\\$&');
28
+ // Replace ** first (matches any path segment), then * and ?
29
+ regex = regex.replace(/\*\*/g, '\0');
30
+ regex = regex.replace(/\*/g, '[^/]*');
31
+ regex = regex.replace(/\0/g, '.*');
32
+ regex = regex.replace(/\?/g, '[^/]');
33
+ try {
34
+ return new RegExp(`^${regex}$`).test(normalized);
35
+ } catch {
36
+ // Malformed pattern — fall back to substring match
37
+ return normalized.includes(pattern);
38
+ }
39
+ }
40
+
19
41
  // Lazy-load transformers (heavy, optional module)
20
42
  let pipeline = null;
21
43
  let _cos_sim = null;
@@ -76,7 +98,7 @@ export const MODELS = {
76
98
 
77
99
  export const EMBEDDING_STRATEGIES = ['structured', 'source'];
78
100
 
79
- export const DEFAULT_MODEL = 'minilm';
101
+ export const DEFAULT_MODEL = 'nomic-v1.5';
80
102
  const BATCH_SIZE_MAP = {
81
103
  minilm: 32,
82
104
  'jina-small': 16,
@@ -216,11 +238,26 @@ async function loadTransformers() {
216
238
  }
217
239
  }
218
240
 
241
+ /**
242
+ * Dispose the current ONNX session and free memory.
243
+ * Safe to call when no model is loaded (no-op).
244
+ */
245
+ export async function disposeModel() {
246
+ if (extractor) {
247
+ await extractor.dispose();
248
+ extractor = null;
249
+ }
250
+ activeModel = null;
251
+ }
252
+
219
253
  async function loadModel(modelKey) {
220
254
  const config = getModelConfig(modelKey);
221
255
 
222
256
  if (extractor && activeModel === config.name) return { extractor, config };
223
257
 
258
+ // Dispose previous model before loading a different one
259
+ await disposeModel();
260
+
224
261
  const transformers = await loadTransformers();
225
262
  pipeline = transformers.pipeline;
226
263
  _cos_sim = transformers.cos_sim;
@@ -324,6 +361,14 @@ export async function buildEmbeddings(rootDir, modelKey, customDbPath, options =
324
361
  const strategy = options.strategy || 'structured';
325
362
  const dbPath = customDbPath || findDbPath(null);
326
363
 
364
+ if (!fs.existsSync(dbPath)) {
365
+ console.error(
366
+ `No codegraph database found at ${dbPath}.\n` +
367
+ `Run "codegraph build" first to analyze your codebase.`,
368
+ );
369
+ process.exit(1);
370
+ }
371
+
327
372
  const db = new Database(dbPath);
328
373
  initEmbeddingsSchema(db);
329
374
 
@@ -488,7 +533,8 @@ function _prepareSearch(customDbPath, opts = {}) {
488
533
  conditions.push('n.kind = ?');
489
534
  params.push(opts.kind);
490
535
  }
491
- if (opts.filePattern) {
536
+ const isGlob = opts.filePattern && /[*?[\]]/.test(opts.filePattern);
537
+ if (opts.filePattern && !isGlob) {
492
538
  conditions.push('n.file LIKE ?');
493
539
  params.push(`%${opts.filePattern}%`);
494
540
  }
@@ -497,6 +543,9 @@ function _prepareSearch(customDbPath, opts = {}) {
497
543
  }
498
544
 
499
545
  let rows = db.prepare(sql).all(...params);
546
+ if (isGlob) {
547
+ rows = rows.filter((row) => globMatch(row.file, opts.filePattern));
548
+ }
500
549
  if (noTests) {
501
550
  rows = rows.filter((row) => !TEST_PATTERN.test(row.file));
502
551
  }
@@ -660,6 +709,11 @@ export async function search(query, customDbPath, opts = {}) {
660
709
  const data = await searchData(singleQuery, customDbPath, opts);
661
710
  if (!data) return;
662
711
 
712
+ if (opts.json) {
713
+ console.log(JSON.stringify(data, null, 2));
714
+ return;
715
+ }
716
+
663
717
  console.log(`\nSemantic search: "${singleQuery}"\n`);
664
718
 
665
719
  if (data.results.length === 0) {
@@ -679,6 +733,11 @@ export async function search(query, customDbPath, opts = {}) {
679
733
  const data = await multiSearchData(queries, customDbPath, opts);
680
734
  if (!data) return;
681
735
 
736
+ if (opts.json) {
737
+ console.log(JSON.stringify(data, null, 2));
738
+ return;
739
+ }
740
+
682
741
  console.log(`\nMulti-query semantic search (RRF, k=${opts.rrfK || 60}):`);
683
742
  queries.forEach((q, i) => {
684
743
  console.log(` [${i + 1}] "${q}"`);