@optave/codegraph 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,303 @@
1
+ import path from 'node:path';
2
+ import Graph from 'graphology';
3
+ import louvain from 'graphology-communities-louvain';
4
+ import { openReadonlyOrFail } from './db.js';
5
+ import { isTestFile } from './queries.js';
6
+
7
+ // ─── Graph Construction ───────────────────────────────────────────────
8
+
9
+ /**
10
+ * Build a graphology graph from the codegraph SQLite database.
11
+ *
12
+ * @param {object} db - open better-sqlite3 database (readonly)
13
+ * @param {object} opts
14
+ * @param {boolean} [opts.functions] - Function-level instead of file-level
15
+ * @param {boolean} [opts.noTests] - Exclude test files
16
+ * @returns {Graph}
17
+ */
18
+ function buildGraphologyGraph(db, opts = {}) {
19
+ const graph = new Graph({ type: 'undirected' });
20
+
21
+ if (opts.functions) {
22
+ // Function-level: nodes = function/method/class symbols, edges = calls
23
+ let nodes = db
24
+ .prepare("SELECT id, name, kind, file FROM nodes WHERE kind IN ('function','method','class')")
25
+ .all();
26
+ if (opts.noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
27
+
28
+ const nodeIds = new Set();
29
+ for (const n of nodes) {
30
+ const key = String(n.id);
31
+ graph.addNode(key, { label: n.name, file: n.file, kind: n.kind });
32
+ nodeIds.add(n.id);
33
+ }
34
+
35
+ const edges = db.prepare("SELECT source_id, target_id FROM edges WHERE kind = 'calls'").all();
36
+ for (const e of edges) {
37
+ if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue;
38
+ const src = String(e.source_id);
39
+ const tgt = String(e.target_id);
40
+ if (src === tgt) continue;
41
+ if (!graph.hasEdge(src, tgt)) {
42
+ graph.addEdge(src, tgt);
43
+ }
44
+ }
45
+ } else {
46
+ // File-level: nodes = files, edges = imports + imports-type (deduplicated, cross-file)
47
+ let nodes = db.prepare("SELECT id, name, file FROM nodes WHERE kind = 'file'").all();
48
+ if (opts.noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
49
+
50
+ const nodeIds = new Set();
51
+ for (const n of nodes) {
52
+ const key = String(n.id);
53
+ graph.addNode(key, { label: n.file, file: n.file });
54
+ nodeIds.add(n.id);
55
+ }
56
+
57
+ const edges = db
58
+ .prepare("SELECT source_id, target_id FROM edges WHERE kind IN ('imports','imports-type')")
59
+ .all();
60
+ for (const e of edges) {
61
+ if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue;
62
+ const src = String(e.source_id);
63
+ const tgt = String(e.target_id);
64
+ if (src === tgt) continue;
65
+ if (!graph.hasEdge(src, tgt)) {
66
+ graph.addEdge(src, tgt);
67
+ }
68
+ }
69
+ }
70
+
71
+ return graph;
72
+ }
73
+
74
+ // ─── Directory Helpers ────────────────────────────────────────────────
75
+
76
+ function getDirectory(filePath) {
77
+ const dir = path.dirname(filePath);
78
+ return dir === '.' ? '(root)' : dir;
79
+ }
80
+
81
+ // ─── Core Analysis ────────────────────────────────────────────────────
82
+
83
+ /**
84
+ * Run Louvain community detection and return structured data.
85
+ *
86
+ * @param {string} [customDbPath] - Path to graph.db
87
+ * @param {object} [opts]
88
+ * @param {boolean} [opts.functions] - Function-level instead of file-level
89
+ * @param {number} [opts.resolution] - Louvain resolution (default 1.0)
90
+ * @param {boolean} [opts.noTests] - Exclude test files
91
+ * @param {boolean} [opts.drift] - Drift-only mode (omit community member lists)
92
+ * @param {boolean} [opts.json] - JSON output (used by CLI wrapper only)
93
+ * @returns {{ communities: object[], modularity: number, drift: object, summary: object }}
94
+ */
95
+ export function communitiesData(customDbPath, opts = {}) {
96
+ const db = openReadonlyOrFail(customDbPath);
97
+ const resolution = opts.resolution ?? 1.0;
98
+
99
+ const graph = buildGraphologyGraph(db, {
100
+ functions: opts.functions,
101
+ noTests: opts.noTests,
102
+ });
103
+ db.close();
104
+
105
+ // Handle empty or trivial graphs
106
+ if (graph.order === 0 || graph.size === 0) {
107
+ return {
108
+ communities: [],
109
+ modularity: 0,
110
+ drift: { splitCandidates: [], mergeCandidates: [] },
111
+ summary: { communityCount: 0, modularity: 0, nodeCount: graph.order, driftScore: 0 },
112
+ };
113
+ }
114
+
115
+ // Run Louvain
116
+ const details = louvain.detailed(graph, { resolution });
117
+ const assignments = details.communities; // node → community id
118
+ const modularity = details.modularity;
119
+
120
+ // Group nodes by community
121
+ const communityMap = new Map(); // community id → node keys[]
122
+ graph.forEachNode((key) => {
123
+ const cid = assignments[key];
124
+ if (!communityMap.has(cid)) communityMap.set(cid, []);
125
+ communityMap.get(cid).push(key);
126
+ });
127
+
128
+ // Build community objects
129
+ const communities = [];
130
+ const communityDirs = new Map(); // community id → Set<dir>
131
+
132
+ for (const [cid, members] of communityMap) {
133
+ const dirCounts = {};
134
+ const memberData = [];
135
+ for (const key of members) {
136
+ const attrs = graph.getNodeAttributes(key);
137
+ const dir = getDirectory(attrs.file);
138
+ dirCounts[dir] = (dirCounts[dir] || 0) + 1;
139
+ memberData.push({
140
+ name: attrs.label,
141
+ file: attrs.file,
142
+ ...(attrs.kind ? { kind: attrs.kind } : {}),
143
+ });
144
+ }
145
+
146
+ communityDirs.set(cid, new Set(Object.keys(dirCounts)));
147
+
148
+ communities.push({
149
+ id: cid,
150
+ size: members.length,
151
+ directories: dirCounts,
152
+ ...(opts.drift ? {} : { members: memberData }),
153
+ });
154
+ }
155
+
156
+ // Sort by size descending
157
+ communities.sort((a, b) => b.size - a.size);
158
+
159
+ // ─── Drift Analysis ─────────────────────────────────────────────
160
+
161
+ // Split candidates: directories with members in 2+ communities
162
+ const dirToCommunities = new Map(); // dir → Set<community id>
163
+ for (const [cid, dirs] of communityDirs) {
164
+ for (const dir of dirs) {
165
+ if (!dirToCommunities.has(dir)) dirToCommunities.set(dir, new Set());
166
+ dirToCommunities.get(dir).add(cid);
167
+ }
168
+ }
169
+ const splitCandidates = [];
170
+ for (const [dir, cids] of dirToCommunities) {
171
+ if (cids.size >= 2) {
172
+ splitCandidates.push({ directory: dir, communityCount: cids.size });
173
+ }
174
+ }
175
+ splitCandidates.sort((a, b) => b.communityCount - a.communityCount);
176
+
177
+ // Merge candidates: communities spanning 2+ directories
178
+ const mergeCandidates = [];
179
+ for (const c of communities) {
180
+ const dirCount = Object.keys(c.directories).length;
181
+ if (dirCount >= 2) {
182
+ mergeCandidates.push({
183
+ communityId: c.id,
184
+ size: c.size,
185
+ directoryCount: dirCount,
186
+ directories: Object.keys(c.directories),
187
+ });
188
+ }
189
+ }
190
+ mergeCandidates.sort((a, b) => b.directoryCount - a.directoryCount);
191
+
192
+ // Drift score: 0-100 based on how much directory structure diverges from communities
193
+ // Higher = more drift (directories don't match communities)
194
+ const totalDirs = dirToCommunities.size;
195
+ const splitDirs = splitCandidates.length;
196
+ const splitRatio = totalDirs > 0 ? splitDirs / totalDirs : 0;
197
+
198
+ const totalComms = communities.length;
199
+ const mergeComms = mergeCandidates.length;
200
+ const mergeRatio = totalComms > 0 ? mergeComms / totalComms : 0;
201
+
202
+ const driftScore = Math.round(((splitRatio + mergeRatio) / 2) * 100);
203
+
204
+ return {
205
+ communities: opts.drift ? [] : communities,
206
+ modularity: +modularity.toFixed(4),
207
+ drift: { splitCandidates, mergeCandidates },
208
+ summary: {
209
+ communityCount: communities.length,
210
+ modularity: +modularity.toFixed(4),
211
+ nodeCount: graph.order,
212
+ driftScore,
213
+ },
214
+ };
215
+ }
216
+
217
+ /**
218
+ * Lightweight summary for stats integration.
219
+ *
220
+ * @param {string} [customDbPath]
221
+ * @param {object} [opts]
222
+ * @param {boolean} [opts.noTests]
223
+ * @returns {{ communityCount: number, modularity: number, driftScore: number }}
224
+ */
225
+ export function communitySummaryForStats(customDbPath, opts = {}) {
226
+ const data = communitiesData(customDbPath, { ...opts, drift: true });
227
+ return data.summary;
228
+ }
229
+
230
+ // ─── CLI Display ──────────────────────────────────────────────────────
231
+
232
+ /**
233
+ * CLI entry point: run community detection and print results.
234
+ *
235
+ * @param {string} [customDbPath]
236
+ * @param {object} [opts]
237
+ */
238
+ export function communities(customDbPath, opts = {}) {
239
+ const data = communitiesData(customDbPath, opts);
240
+
241
+ if (opts.json) {
242
+ console.log(JSON.stringify(data, null, 2));
243
+ return;
244
+ }
245
+
246
+ if (data.summary.communityCount === 0) {
247
+ console.log(
248
+ '\nNo communities detected. The graph may be too small or disconnected.\n' +
249
+ 'Run "codegraph build" first to populate the graph.\n',
250
+ );
251
+ return;
252
+ }
253
+
254
+ const mode = opts.functions ? 'Function' : 'File';
255
+ console.log(`\n# ${mode}-Level Communities\n`);
256
+ console.log(
257
+ ` ${data.summary.communityCount} communities | ${data.summary.nodeCount} nodes | modularity: ${data.summary.modularity} | drift: ${data.summary.driftScore}%\n`,
258
+ );
259
+
260
+ if (!opts.drift) {
261
+ for (const c of data.communities) {
262
+ const dirs = Object.entries(c.directories)
263
+ .sort((a, b) => b[1] - a[1])
264
+ .map(([d, n]) => `${d} (${n})`)
265
+ .join(', ');
266
+ console.log(` Community ${c.id} (${c.size} members): ${dirs}`);
267
+ if (c.members) {
268
+ const shown = c.members.slice(0, 8);
269
+ for (const m of shown) {
270
+ const kind = m.kind ? ` [${m.kind}]` : '';
271
+ console.log(` - ${m.name}${kind} ${m.file}`);
272
+ }
273
+ if (c.members.length > 8) {
274
+ console.log(` ... and ${c.members.length - 8} more`);
275
+ }
276
+ }
277
+ }
278
+ }
279
+
280
+ // Drift analysis
281
+ const d = data.drift;
282
+ if (d.splitCandidates.length > 0 || d.mergeCandidates.length > 0) {
283
+ console.log(`\n# Drift Analysis (score: ${data.summary.driftScore}%)\n`);
284
+
285
+ if (d.splitCandidates.length > 0) {
286
+ console.log(' Split candidates (directories spanning multiple communities):');
287
+ for (const s of d.splitCandidates.slice(0, 10)) {
288
+ console.log(` - ${s.directory} → ${s.communityCount} communities`);
289
+ }
290
+ }
291
+
292
+ if (d.mergeCandidates.length > 0) {
293
+ console.log(' Merge candidates (communities spanning multiple directories):');
294
+ for (const m of d.mergeCandidates.slice(0, 10)) {
295
+ console.log(
296
+ ` - Community ${m.communityId} (${m.size} members) → ${m.directoryCount} dirs: ${m.directories.join(', ')}`,
297
+ );
298
+ }
299
+ }
300
+ }
301
+
302
+ console.log();
303
+ }