@ophan/core 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/community-detectors/index.d.ts +20 -0
  2. package/dist/community-detectors/index.d.ts.map +1 -0
  3. package/dist/community-detectors/index.js +45 -0
  4. package/dist/community-detectors/label-prop.d.ts +20 -0
  5. package/dist/community-detectors/label-prop.d.ts.map +1 -0
  6. package/dist/community-detectors/label-prop.js +77 -0
  7. package/dist/community-detectors/leiden.d.ts +22 -0
  8. package/dist/community-detectors/leiden.d.ts.map +1 -0
  9. package/dist/community-detectors/leiden.js +312 -0
  10. package/dist/community-detectors/louvain.d.ts +13 -0
  11. package/dist/community-detectors/louvain.d.ts.map +1 -0
  12. package/dist/community-detectors/louvain.js +29 -0
  13. package/dist/community-detectors/types.d.ts +36 -0
  14. package/dist/community-detectors/types.d.ts.map +1 -0
  15. package/dist/{parsers/__fixtures__/no-functions.js → community-detectors/types.js} +0 -2
  16. package/dist/edge-resolvers/call.d.ts +13 -0
  17. package/dist/edge-resolvers/call.d.ts.map +1 -0
  18. package/dist/edge-resolvers/call.js +40 -0
  19. package/dist/edge-resolvers/co-location.d.ts +16 -0
  20. package/dist/edge-resolvers/co-location.d.ts.map +1 -0
  21. package/dist/edge-resolvers/co-location.js +129 -0
  22. package/dist/edge-resolvers/import.d.ts +16 -0
  23. package/dist/edge-resolvers/import.d.ts.map +1 -0
  24. package/dist/edge-resolvers/import.js +118 -0
  25. package/dist/edge-resolvers/index.d.ts +9 -0
  26. package/dist/edge-resolvers/index.d.ts.map +1 -0
  27. package/dist/edge-resolvers/index.js +29 -0
  28. package/dist/edge-resolvers/jsx-ref.d.ts +13 -0
  29. package/dist/edge-resolvers/jsx-ref.d.ts.map +1 -0
  30. package/dist/edge-resolvers/jsx-ref.js +40 -0
  31. package/dist/edge-resolvers/types.d.ts +40 -0
  32. package/dist/edge-resolvers/types.d.ts.map +1 -0
  33. package/dist/edge-resolvers/types.js +2 -0
  34. package/dist/graph.d.ts +293 -0
  35. package/dist/graph.d.ts.map +1 -0
  36. package/dist/graph.js +1295 -0
  37. package/dist/index.d.ts +37 -8
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +385 -183
  40. package/dist/migrations.d.ts +25 -0
  41. package/dist/migrations.d.ts.map +1 -0
  42. package/dist/migrations.js +323 -0
  43. package/dist/module-resolvers/index.d.ts +11 -0
  44. package/dist/module-resolvers/index.d.ts.map +1 -0
  45. package/dist/module-resolvers/index.js +67 -0
  46. package/dist/module-resolvers/javascript.d.ts +18 -0
  47. package/dist/module-resolvers/javascript.d.ts.map +1 -0
  48. package/dist/module-resolvers/javascript.js +130 -0
  49. package/dist/module-resolvers/types.d.ts +18 -0
  50. package/dist/module-resolvers/types.d.ts.map +1 -0
  51. package/dist/module-resolvers/types.js +2 -0
  52. package/dist/parsers/python.d.ts.map +1 -1
  53. package/dist/parsers/python.js +38 -4
  54. package/dist/parsers/typescript.d.ts.map +1 -1
  55. package/dist/parsers/typescript.js +133 -0
  56. package/dist/practices.d.ts +28 -0
  57. package/dist/practices.d.ts.map +1 -0
  58. package/dist/practices.js +95 -0
  59. package/dist/schemas.d.ts +251 -3
  60. package/dist/schemas.d.ts.map +1 -1
  61. package/dist/schemas.js +121 -6
  62. package/dist/shared.d.ts +8 -0
  63. package/dist/shared.d.ts.map +1 -1
  64. package/dist/summarize.d.ts +165 -0
  65. package/dist/summarize.d.ts.map +1 -0
  66. package/dist/summarize.js +1067 -0
  67. package/ophan_logo.png +0 -0
  68. package/package.json +9 -2
  69. package/dist/parsers/__fixtures__/arrow-functions.d.ts +0 -5
  70. package/dist/parsers/__fixtures__/arrow-functions.d.ts.map +0 -1
  71. package/dist/parsers/__fixtures__/arrow-functions.js +0 -16
  72. package/dist/parsers/__fixtures__/class-methods.d.ts +0 -6
  73. package/dist/parsers/__fixtures__/class-methods.d.ts.map +0 -1
  74. package/dist/parsers/__fixtures__/class-methods.js +0 -12
  75. package/dist/parsers/__fixtures__/no-functions.d.ts +0 -9
  76. package/dist/parsers/__fixtures__/no-functions.d.ts.map +0 -1
package/dist/graph.js ADDED
@@ -0,0 +1,1295 @@
1
+ "use strict";
2
+ // Graph analysis module — builds a function relationship graph, runs community detection,
3
+ // and produces hierarchical documentation structure.
4
+ //
5
+ // Architecture:
6
+ // 1. Parsers extract call sites, imports, exports → FunctionInfo with relationship fields
7
+ // 2. This module resolves names to content hashes → function_edges table
8
+ // 3. graphology builds in-memory graph from edges
9
+ // 4. Community detection (configurable algorithm) → communities table
10
+ // 5. (Phase 2) Hierarchical summarization → community_summaries table
11
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
12
+ if (k2 === undefined) k2 = k;
13
+ var desc = Object.getOwnPropertyDescriptor(m, k);
14
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
15
+ desc = { enumerable: true, get: function() { return m[k]; } };
16
+ }
17
+ Object.defineProperty(o, k2, desc);
18
+ }) : (function(o, m, k, k2) {
19
+ if (k2 === undefined) k2 = k;
20
+ o[k2] = m[k];
21
+ }));
22
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
23
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
24
+ }) : function(o, v) {
25
+ o["default"] = v;
26
+ });
27
+ var __importStar = (this && this.__importStar) || (function () {
28
+ var ownKeys = function(o) {
29
+ ownKeys = Object.getOwnPropertyNames || function (o) {
30
+ var ar = [];
31
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
32
+ return ar;
33
+ };
34
+ return ownKeys(o);
35
+ };
36
+ return function (mod) {
37
+ if (mod && mod.__esModule) return mod;
38
+ var result = {};
39
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
40
+ __setModuleDefault(result, mod);
41
+ return result;
42
+ };
43
+ })();
44
+ var __importDefault = (this && this.__importDefault) || function (mod) {
45
+ return (mod && mod.__esModule) ? mod : { "default": mod };
46
+ };
47
+ Object.defineProperty(exports, "__esModule", { value: true });
48
+ exports.DEFAULT_COMPARISONS = exports.DEFAULT_GRAPH_CONFIG = exports.DEFAULT_EDGE_WEIGHTS = void 0;
49
+ exports.computeDirectoryDistance = computeDirectoryDistance;
50
+ exports.computePackage = computePackage;
51
+ exports.buildModuleResolver = buildModuleResolver;
52
+ exports.loadGraphConfig = loadGraphConfig;
53
+ exports.saveGraphConfig = saveGraphConfig;
54
+ exports.buildEdgeResolverContext = buildEdgeResolverContext;
55
+ exports.resolveEdges = resolveEdges;
56
+ exports.addTransitiveEdges = addTransitiveEdges;
57
+ exports.storeEdges = storeEdges;
58
+ exports.storeEdgesIncremental = storeEdgesIncremental;
59
+ exports.loadEdges = loadEdges;
60
+ exports.storeCommunities = storeCommunities;
61
+ exports.loadCommunities = loadCommunities;
62
+ exports.computeCommunityEdges = computeCommunityEdges;
63
+ exports.storeCommunityEdges = storeCommunityEdges;
64
+ exports.loadCommunityEdges = loadCommunityEdges;
65
+ exports.buildGraph = buildGraph;
66
+ exports.computeCentrality = computeCentrality;
67
+ exports.rescueDissolvedNodes = rescueDissolvedNodes;
68
+ exports.detectCommunities = detectCommunities;
69
+ exports.matchCommunities = matchCommunities;
70
+ exports.labelPropagation = labelPropagation;
71
+ exports.computeChangedHashes = computeChangedHashes;
72
+ exports.analyzeGraph = analyzeGraph;
73
+ exports.detectHierarchicalCommunities = detectHierarchicalCommunities;
74
+ exports.computeComparisonMetrics = computeComparisonMetrics;
75
+ exports.runComparison = runComparison;
76
+ const path = __importStar(require("path"));
77
+ const fs = __importStar(require("fs"));
78
+ const graphology_1 = __importDefault(require("graphology"));
79
+ const betweenness_1 = __importDefault(require("graphology-metrics/centrality/betweenness"));
80
+ const community_detectors_1 = require("./community-detectors");
81
+ const edge_resolvers_1 = require("./edge-resolvers");
82
+ /**
83
+ * Default edge weights for graph construction.
84
+ *
85
+ * These control how strongly each relationship type pulls functions into the
86
+ * same community. All weights are relative — what matters is the ratio between
87
+ * them, not the absolute values. Weights from different edge types between the
88
+ * same function pair are summed in the merged graph.
89
+ *
90
+ * Edit these to tune community detection. After changing, re-run:
91
+ * pnpm dev graph --compare --path .
92
+ * to see the effect across all algorithms.
93
+ */
94
+ exports.DEFAULT_EDGE_WEIGHTS = {
95
+ /**
96
+ * Direct function calls: `fnA()` calls `fnB()`.
97
+ * Strongest signal — actual runtime dependency.
98
+ * ↑ increase: call relationships dominate clustering
99
+ * ↓ decrease: proximity and imports become relatively more important
100
+ */
101
+ call: 0.5,
102
+ /**
103
+ * Type references: function A uses a type defined near function B.
104
+ * NOT YET EXTRACTED by parsers — no type_ref edges are currently produced.
105
+ * Reserved for future use.
106
+ */
107
+ type_ref: 0.5,
108
+ /**
109
+ * Import statements: `import { validate } from "./auth"` creates an edge.
110
+ * Includes barrel file fallback (workspace package re-exports).
111
+ * ↑ increase: module boundaries become stronger clustering signal
112
+ * ↓ decrease: imports matter less relative to calls and proximity
113
+ */
114
+ import: 0.2,
115
+ /**
116
+ * File/directory proximity. Two sub-types:
117
+ * Same-file: all pairs connected, weight decays as 1/offset (adjacent=full, distance N=1/N)
118
+ * Same-directory (cross-file): half the same-file weight (hardcoded 0.5x multiplier)
119
+ *
120
+ * This is the main knob for "respect file boundaries". Higher values keep
121
+ * same-file functions together even when they have cross-file call edges.
122
+ *
123
+ * ↑ increase: files/directories cluster together more strongly
124
+ * ↓ decrease: only call/import relationships drive clustering
125
+ * 0: disable proximity entirely (pure topology)
126
+ */
127
+ co_location: 0.7,
128
+ /**
129
+ * JSX component references: `<Button />`, `<Card />`.
130
+ * Separated from calls because UI composition is a weaker domain signal
131
+ * than functional dependency. Without this separation, functions using
132
+ * shared UI components get pulled into the UI component's community.
133
+ *
134
+ * ↑ increase: components rendering the same UI primitives cluster together
135
+ * ↓ decrease: JSX composition becomes noise
136
+ */
137
+ jsx_ref: 0.2,
138
+ /**
139
+ * 2-hop call chains: A→B→C creates A↔C edge.
140
+ * Derived in-memory (not stored in DB). Stacks with direct edges.
141
+ * ↑ increase: transitive relationships pull functions together more
142
+ * ↓ decrease: only direct connections drive clustering
143
+ */
144
+ transitive_call: 0.4,
145
+ /**
146
+ * Shared caller: if A calls both B and C, creates B↔C edge.
147
+ * Derived in-memory (not stored in DB). Callers with >15 callees are skipped.
148
+ * Weakest signal — correlation (same caller) is suggestive but indirect.
149
+ * ↑ increase: functions used by the same caller cluster together more
150
+ * ↓ decrease: only direct relationships matter
151
+ */
152
+ co_caller: 0.4,
153
+ };
154
+ exports.DEFAULT_GRAPH_CONFIG = {
155
+ algorithm: "leiden",
156
+ edgeWeights: { ...exports.DEFAULT_EDGE_WEIGHTS },
157
+ resolution: 3.0,
158
+ minCommunitySize: 3,
159
+ maxCommunitySize: 18,
160
+ autoResolution: true,
161
+ directoryDecay: 0.7,
162
+ };
163
+ /**
164
+ * Compute directory-level distance between two files relative to rootPath.
165
+ * Distance = number of unique directory segments when traversing from one file's
166
+ * directory to the other via their common ancestor.
167
+ *
168
+ * Examples (relative to root):
169
+ * src/components/Button.tsx vs src/components/Input.tsx → 0 (same dir)
170
+ * src/components/Button.tsx vs src/utils/format.ts → 2 (sibling dirs)
171
+ * packages/core/src/graph.ts vs packages/webapp/src/App.tsx → 4
172
+ */
173
+ function computeDirectoryDistance(filePathA, filePathB, rootPath) {
174
+ const segsA = path
175
+ .relative(rootPath, path.dirname(filePathA))
176
+ .split(path.sep)
177
+ .filter(Boolean);
178
+ const segsB = path
179
+ .relative(rootPath, path.dirname(filePathB))
180
+ .split(path.sep)
181
+ .filter(Boolean);
182
+ let common = 0;
183
+ while (common < segsA.length &&
184
+ common < segsB.length &&
185
+ segsA[common] === segsB[common]) {
186
+ common++;
187
+ }
188
+ return segsA.length - common + (segsB.length - common);
189
+ }
190
+ /**
191
+ * Extract a human-readable package label for a file.
192
+ * Uses the ModuleResolver to find the containing workspace package if available.
193
+ * Falls back to the first directory segment relative to root.
194
+ */
195
+ function computePackage(filePath, rootPath, resolver) {
196
+ if (resolver) {
197
+ let bestMatch = "";
198
+ let bestName = "root";
199
+ for (const [name, pkg] of resolver.workspacePackages) {
200
+ if (filePath.startsWith(pkg.dir + path.sep) &&
201
+ pkg.dir.length > bestMatch.length) {
202
+ bestMatch = pkg.dir;
203
+ bestName = name;
204
+ }
205
+ }
206
+ if (bestMatch)
207
+ return bestName;
208
+ }
209
+ const rel = path.relative(rootPath, filePath).replace(/\\/g, "/");
210
+ const parts = rel.split("/");
211
+ return parts.length > 1 ? parts[0] : "root";
212
+ }
213
+ /** Marker files that indicate a package/project root directory */
214
+ const PACKAGE_MARKERS = [
215
+ "package.json",
216
+ "pyproject.toml",
217
+ "go.mod",
218
+ "Cargo.toml",
219
+ "setup.py",
220
+ "pom.xml",
221
+ ];
222
+ /**
223
+ * Discover package directories by walking UP from analyzed file directories
224
+ * and looking for marker files (package.json, pyproject.toml, go.mod, etc.).
225
+ *
226
+ * Technology-agnostic: works on any codebase regardless of directory naming.
227
+ * The checked set ensures each directory is only inspected once.
228
+ */
229
+ function discoverPackageDirs(rootPath, functions) {
230
+ const fileDirs = new Set(functions.map((f) => path.dirname(f.filePath)));
231
+ const packageDirs = new Set();
232
+ const checked = new Set();
233
+ for (const dir of fileDirs) {
234
+ let current = dir;
235
+ while (current.startsWith(rootPath) && current !== path.dirname(current)) {
236
+ if (checked.has(current))
237
+ break;
238
+ checked.add(current);
239
+ for (const marker of PACKAGE_MARKERS) {
240
+ try {
241
+ fs.statSync(path.join(current, marker));
242
+ packageDirs.add(current);
243
+ break;
244
+ }
245
+ catch {
246
+ /* no marker here */
247
+ }
248
+ }
249
+ current = path.dirname(current);
250
+ }
251
+ }
252
+ // Always include root if it has a marker file
253
+ if (!checked.has(rootPath)) {
254
+ for (const marker of PACKAGE_MARKERS) {
255
+ try {
256
+ fs.statSync(path.join(rootPath, marker));
257
+ packageDirs.add(rootPath);
258
+ break;
259
+ }
260
+ catch {
261
+ /* no marker */
262
+ }
263
+ }
264
+ }
265
+ return [...packageDirs];
266
+ }
267
+ /**
268
+ * Build a ModuleResolver by discovering packages via marker files.
269
+ * Walks up from analyzed file directories to find package.json, pyproject.toml, etc.
270
+ * Reads package.json (for name + exports) and tsconfig.json (for path aliases).
271
+ * Runs once per graph command invocation — not per-edge.
272
+ */
273
+ function buildModuleResolver(rootPath, functions) {
274
+ const workspacePackages = new Map();
275
+ const pathAliases = new Map();
276
+ const packageDirs = functions
277
+ ? discoverPackageDirs(rootPath, functions)
278
+ : [rootPath]; // fallback: just root
279
+ for (const dir of packageDirs) {
280
+ // Read package.json for name + exports
281
+ try {
282
+ const pkgJson = JSON.parse(fs.readFileSync(path.join(dir, "package.json"), "utf-8"));
283
+ const name = pkgJson.name;
284
+ if (name) {
285
+ const exports = {};
286
+ if (pkgJson.exports && typeof pkgJson.exports === "object") {
287
+ for (const [key, value] of Object.entries(pkgJson.exports)) {
288
+ if (typeof value === "string")
289
+ exports[key] = value;
290
+ }
291
+ }
292
+ if (!exports["."] && pkgJson.main) {
293
+ exports["."] = pkgJson.main;
294
+ }
295
+ workspacePackages.set(name, { dir, exports });
296
+ }
297
+ }
298
+ catch {
299
+ /* skip dirs without valid package.json */
300
+ }
301
+ // Read tsconfig.json for paths
302
+ try {
303
+ const tsconfig = JSON.parse(fs.readFileSync(path.join(dir, "tsconfig.json"), "utf-8"));
304
+ const compilerOptions = tsconfig.compilerOptions;
305
+ if (compilerOptions?.paths) {
306
+ const baseUrl = compilerOptions.baseUrl
307
+ ? path.resolve(dir, compilerOptions.baseUrl)
308
+ : dir;
309
+ pathAliases.set(dir, { baseUrl, paths: compilerOptions.paths });
310
+ }
311
+ }
312
+ catch {
313
+ /* skip dirs without tsconfig or paths */
314
+ }
315
+ }
316
+ return { workspacePackages, pathAliases };
317
+ }
318
+ // ============ CONFIG PERSISTENCE ============
319
+ function loadGraphConfig(db) {
320
+ const row = db
321
+ .prepare("SELECT value FROM graph_config WHERE key = 'config'")
322
+ .get();
323
+ if (!row)
324
+ return { ...exports.DEFAULT_GRAPH_CONFIG };
325
+ try {
326
+ const stored = JSON.parse(row.value);
327
+ return {
328
+ ...exports.DEFAULT_GRAPH_CONFIG,
329
+ ...stored,
330
+ edgeWeights: {
331
+ ...exports.DEFAULT_GRAPH_CONFIG.edgeWeights,
332
+ ...(stored.edgeWeights || {}),
333
+ },
334
+ };
335
+ }
336
+ catch {
337
+ return { ...exports.DEFAULT_GRAPH_CONFIG };
338
+ }
339
+ }
340
+ function saveGraphConfig(db, config) {
341
+ db.prepare("INSERT OR REPLACE INTO graph_config (key, value) VALUES ('config', ?)").run(JSON.stringify(config));
342
+ }
343
+ // ============ EDGE RESOLUTION ============
344
+ /**
345
+ * Given extracted functions with call-site data, resolve name-based calls to
346
+ * content hash edges. Uses the file_functions index for name→hash lookup.
347
+ *
348
+ * Returns edges ready to insert into function_edges table.
349
+ */
350
+ /**
351
+ * Build shared lookup context for edge resolvers.
352
+ * Queries file_functions once and constructs all lookup maps that resolvers need.
353
+ * Called once per resolveEdges() invocation.
354
+ */
355
+ function buildEdgeResolverContext(db, functions, config, affectedHashes, resolver) {
356
+ // Query all file_functions rows once
357
+ const allDbRows = db
358
+ .prepare("SELECT file_path, function_name, content_hash FROM file_functions")
359
+ .all();
360
+ // Build name→hash lookup
361
+ const nameToHashes = new Map();
362
+ for (const row of allDbRows) {
363
+ const existing = nameToHashes.get(row.function_name) || new Set();
364
+ existing.add(row.content_hash);
365
+ nameToHashes.set(row.function_name, existing);
366
+ }
367
+ // Build file→hashes for import target resolution
368
+ const fileToHashes = new Map();
369
+ for (const row of allDbRows) {
370
+ const existing = fileToHashes.get(row.file_path) || new Set();
371
+ existing.add(row.content_hash);
372
+ fileToHashes.set(row.file_path, existing);
373
+ }
374
+ // Build (file::name) → hash for targeted import edges
375
+ const fileNameToHash = new Map();
376
+ for (const row of allDbRows) {
377
+ fileNameToHash.set(`${row.file_path}::${row.function_name}`, row.content_hash);
378
+ }
379
+ // Build package-scoped name→hashes for fallback import resolution.
380
+ // When imports resolve through barrel files (index.ts with re-exports),
381
+ // the imported name isn't defined in the barrel — it's in another file.
382
+ // This map lets us find the function by name within the package scope.
383
+ const pkgNameToHashes = new Map();
384
+ if (resolver) {
385
+ for (const [pkgName, pkg] of resolver.workspacePackages) {
386
+ const nameMap = new Map();
387
+ for (const row of allDbRows) {
388
+ if (row.file_path.startsWith(pkg.dir + path.sep)) {
389
+ const existing = nameMap.get(row.function_name) || new Set();
390
+ existing.add(row.content_hash);
391
+ nameMap.set(row.function_name, existing);
392
+ }
393
+ }
394
+ if (nameMap.size > 0) {
395
+ pkgNameToHashes.set(pkgName, nameMap);
396
+ }
397
+ }
398
+ }
399
+ // Build file → sorted functions map for proximity-based co-location edges
400
+ const fileToSortedFns = new Map();
401
+ for (const fn of functions) {
402
+ const list = fileToSortedFns.get(fn.filePath) || [];
403
+ list.push(fn);
404
+ fileToSortedFns.set(fn.filePath, list);
405
+ }
406
+ for (const fns of fileToSortedFns.values()) {
407
+ fns.sort((a, b) => a.startLine - b.startLine);
408
+ }
409
+ return {
410
+ nameToHashes,
411
+ fileToHashes,
412
+ fileNameToHash,
413
+ pkgNameToHashes,
414
+ fileToSortedFns,
415
+ functions,
416
+ config,
417
+ affectedHashes,
418
+ resolver,
419
+ };
420
+ }
421
+ function resolveEdges(db, functions, config, affectedHashes, resolver) {
422
+ const ctx = buildEdgeResolverContext(db, functions, config, affectedHashes, resolver);
423
+ const edges = [];
424
+ for (const edgeResolver of (0, edge_resolvers_1.getEdgeResolvers)()) {
425
+ edges.push(...edgeResolver.resolve(ctx));
426
+ }
427
+ return edges;
428
+ }
429
+ // ============ TRANSITIVE EDGES ============
430
+ /** Maximum callees per caller for co-caller edge generation.
431
+ * Hub functions with many callees would create O(n^2) edges — cap to prevent explosion. */
432
+ const CO_CALLER_CAP = 15;
433
+ /**
434
+ * Derive transitive edges from the call graph to increase connectivity.
435
+ * Pure function — no DB access.
436
+ *
437
+ * Two derived edge types:
438
+ * 1. **transitive_call** (A→B→C creates A↔C): 2-hop call chain connectivity.
439
+ * 2. **co_caller** (A→B, A→C creates B↔C): Functions called by the same caller.
440
+ *
441
+ * Edges are NOT deduped against direct edges — being related in multiple ways
442
+ * (e.g., direct call + transitive chain) intentionally increases connection strength.
443
+ * buildGraph() merges parallel edges by summing weights.
444
+ */
445
+ function addTransitiveEdges(edges, config) {
446
+ // Build directed call adjacency: caller → Set<callee>
447
+ // Only 'call' edges preserve direction (sourceHash=caller, targetHash=callee)
448
+ const callsTo = new Map();
449
+ for (const edge of edges) {
450
+ if (edge.edgeType === "call") {
451
+ let targets = callsTo.get(edge.sourceHash);
452
+ if (!targets) {
453
+ targets = new Set();
454
+ callsTo.set(edge.sourceHash, targets);
455
+ }
456
+ targets.add(edge.targetHash);
457
+ }
458
+ }
459
+ if (callsTo.size === 0)
460
+ return edges;
461
+ const newEdges = [];
462
+ // 1. Transitive call: A→B→C creates A↔C
463
+ const transitiveAdded = new Set();
464
+ for (const [caller, callees] of callsTo) {
465
+ for (const callee of callees) {
466
+ const indirectCallees = callsTo.get(callee);
467
+ if (!indirectCallees)
468
+ continue;
469
+ for (const indirect of indirectCallees) {
470
+ if (indirect === caller)
471
+ continue; // skip self-loops
472
+ const a = caller < indirect ? caller : indirect;
473
+ const b = caller < indirect ? indirect : caller;
474
+ const key = `${a}|${b}`;
475
+ if (!transitiveAdded.has(key)) {
476
+ transitiveAdded.add(key);
477
+ newEdges.push({
478
+ sourceHash: a,
479
+ targetHash: b,
480
+ edgeType: "transitive_call",
481
+ weight: config.edgeWeights.transitive_call,
482
+ });
483
+ }
484
+ }
485
+ }
486
+ }
487
+ // 2. Co-caller: A→B, A→C creates B↔C (capped to prevent hub explosion)
488
+ const coCallerAdded = new Set();
489
+ for (const [, callees] of callsTo) {
490
+ if (callees.size < 2 || callees.size > CO_CALLER_CAP)
491
+ continue;
492
+ const calleeList = [...callees];
493
+ for (let i = 0; i < calleeList.length; i++) {
494
+ for (let j = i + 1; j < calleeList.length; j++) {
495
+ const a = calleeList[i] < calleeList[j] ? calleeList[i] : calleeList[j];
496
+ const b = calleeList[i] < calleeList[j] ? calleeList[j] : calleeList[i];
497
+ const key = `${a}|${b}`;
498
+ if (!coCallerAdded.has(key)) {
499
+ coCallerAdded.add(key);
500
+ newEdges.push({
501
+ sourceHash: a,
502
+ targetHash: b,
503
+ edgeType: "co_caller",
504
+ weight: config.edgeWeights.co_caller,
505
+ });
506
+ }
507
+ }
508
+ }
509
+ }
510
+ return [...edges, ...newEdges];
511
+ }
512
+ /**
513
+ * Store resolved edges in the function_edges table.
514
+ * Clears existing edges first (they're rebuilt from source each scan).
515
+ */
516
+ function storeEdges(db, edges) {
517
+ db.exec("DELETE FROM function_edges");
518
+ const insert = db.prepare("INSERT OR REPLACE INTO function_edges (source_hash, target_hash, edge_type, weight) VALUES (?, ?, ?, ?)");
519
+ const tx = db.transaction(() => {
520
+ for (const edge of edges) {
521
+ insert.run(edge.sourceHash, edge.targetHash, edge.edgeType, edge.weight);
522
+ }
523
+ });
524
+ tx();
525
+ }
526
+ /**
527
+ * Incremental edge storage: delete edges for affected hashes, then insert new edges.
528
+ * Used when only a subset of functions changed — keeps unaffected edges in place.
529
+ */
530
+ function storeEdgesIncremental(db, affectedHashes, newEdges) {
531
+ const hashes = [...affectedHashes];
532
+ // Delete in chunks to stay within SQLite variable limits
533
+ const deleteStmt = db.prepare("DELETE FROM function_edges WHERE source_hash = ? OR target_hash = ?");
534
+ const insertStmt = db.prepare("INSERT OR REPLACE INTO function_edges (source_hash, target_hash, edge_type, weight) VALUES (?, ?, ?, ?)");
535
+ const tx = db.transaction(() => {
536
+ for (const hash of hashes) {
537
+ deleteStmt.run(hash, hash);
538
+ }
539
+ for (const edge of newEdges) {
540
+ insertStmt.run(edge.sourceHash, edge.targetHash, edge.edgeType, edge.weight);
541
+ }
542
+ });
543
+ tx();
544
+ }
545
+ /**
546
+ * Load all edges from the database.
547
+ */
548
+ function loadEdges(db) {
549
+ const rows = db
550
+ .prepare("SELECT source_hash, target_hash, edge_type, weight FROM function_edges")
551
+ .all();
552
+ return rows.map((r) => ({
553
+ sourceHash: r.source_hash,
554
+ targetHash: r.target_hash,
555
+ edgeType: r.edge_type,
556
+ weight: r.weight,
557
+ }));
558
+ }
559
+ /**
560
+ * Store community assignments in the communities table.
561
+ */
562
+ function storeCommunities(db, assignments) {
563
+ // Clear existing assignments for this algorithm + level
564
+ if (assignments.length === 0)
565
+ return;
566
+ const algorithm = assignments[0].algorithm;
567
+ const level = assignments[0].level;
568
+ db.prepare("DELETE FROM communities WHERE algorithm = ? AND level = ?").run(algorithm, level);
569
+ const insert = db.prepare("INSERT INTO communities (content_hash, level, community_id, algorithm) VALUES (?, ?, ?, ?)");
570
+ const tx = db.transaction(() => {
571
+ for (const a of assignments) {
572
+ insert.run(a.contentHash, a.level, a.communityId, a.algorithm);
573
+ }
574
+ });
575
+ tx();
576
+ }
577
+ /**
578
+ * Load community assignments from the database.
579
+ */
580
+ function loadCommunities(db, algorithm, level) {
581
+ const rows = db
582
+ .prepare("SELECT content_hash, level, community_id, algorithm FROM communities WHERE algorithm = ? AND level = ?")
583
+ .all(algorithm, level);
584
+ return rows.map((r) => ({
585
+ contentHash: r.content_hash,
586
+ level: r.level,
587
+ communityId: r.community_id,
588
+ algorithm: r.algorithm,
589
+ }));
590
+ }
591
+ // ============ COMMUNITY EDGES ============
592
+ /**
593
+ * Aggregate function-level edges into community-level edges.
594
+ * For each pair of distinct L0 communities, sums the weights of all
595
+ * function edges that cross between them.
596
+ */
597
+ function computeCommunityEdges(db, algorithm, edges) {
598
+ // Load L0 community assignments (excluding dissolved)
599
+ const assignments = db
600
+ .prepare("SELECT content_hash, community_id FROM communities WHERE level = 0 AND algorithm = ? AND community_id != '__dissolved'")
601
+ .all(algorithm);
602
+ const hashToCommunity = new Map();
603
+ for (const a of assignments) {
604
+ hashToCommunity.set(a.content_hash, a.community_id);
605
+ }
606
+ // Use provided edges (includes transitive) or load from DB
607
+ const fnEdges = edges
608
+ ? edges.map((e) => ({
609
+ source_hash: e.sourceHash,
610
+ target_hash: e.targetHash,
611
+ weight: e.weight,
612
+ }))
613
+ : db
614
+ .prepare("SELECT source_hash, target_hash, weight FROM function_edges")
615
+ .all();
616
+ // Accumulate cross-community edges
617
+ const edgeMap = new Map();
618
+ for (const e of fnEdges) {
619
+ const srcComm = hashToCommunity.get(e.source_hash);
620
+ const tgtComm = hashToCommunity.get(e.target_hash);
621
+ if (!srcComm || !tgtComm || srcComm === tgtComm)
622
+ continue;
623
+ // Canonical ordering
624
+ const a = srcComm < tgtComm ? srcComm : tgtComm;
625
+ const b = srcComm < tgtComm ? tgtComm : srcComm;
626
+ const key = `${a}|${b}`;
627
+ const existing = edgeMap.get(key) || { weight: 0, count: 0 };
628
+ existing.weight += e.weight;
629
+ existing.count += 1;
630
+ edgeMap.set(key, existing);
631
+ }
632
+ const result = [];
633
+ for (const [key, data] of edgeMap) {
634
+ const [a, b] = key.split("|");
635
+ result.push({
636
+ sourceCommunity: a,
637
+ targetCommunity: b,
638
+ algorithm,
639
+ weight: data.weight,
640
+ edgeCount: data.count,
641
+ });
642
+ }
643
+ return result;
644
+ }
645
+ /**
646
+ * Store community edges (full replace per algorithm).
647
+ */
648
+ function storeCommunityEdges(db, edges) {
649
+ if (edges.length === 0) {
650
+ db.exec("DELETE FROM community_edges");
651
+ return;
652
+ }
653
+ const algorithm = edges[0].algorithm;
654
+ db.prepare("DELETE FROM community_edges WHERE algorithm = ?").run(algorithm);
655
+ const insert = db.prepare("INSERT INTO community_edges (source_community, target_community, algorithm, weight, edge_count) VALUES (?, ?, ?, ?, ?)");
656
+ const tx = db.transaction(() => {
657
+ for (const edge of edges) {
658
+ insert.run(edge.sourceCommunity, edge.targetCommunity, edge.algorithm, edge.weight, edge.edgeCount);
659
+ }
660
+ });
661
+ tx();
662
+ }
663
+ /**
664
+ * Load community edges from the database.
665
+ */
666
+ function loadCommunityEdges(db, algorithm) {
667
+ const rows = db
668
+ .prepare("SELECT source_community, target_community, algorithm, weight, edge_count FROM community_edges WHERE algorithm = ?")
669
+ .all(algorithm);
670
+ return rows.map((r) => ({
671
+ sourceCommunity: r.source_community,
672
+ targetCommunity: r.target_community,
673
+ algorithm: r.algorithm,
674
+ weight: r.weight,
675
+ edgeCount: r.edge_count,
676
+ }));
677
+ }
678
+ /**
679
+ * Build a graphology graph from function edges.
680
+ * Creates an undirected weighted graph — multiple edges between the same
681
+ * pair (different edge types) are merged by summing weights.
682
+ */
683
+ function buildGraph(edges, hashToFilePath, rootPath, directoryDecay, allHashes) {
684
+ const graph = new graphology_1.default({ type: "undirected" });
685
+ // Add ALL function hashes as nodes (every extracted function is a node)
686
+ if (allHashes) {
687
+ for (const hash of allHashes) {
688
+ graph.addNode(hash);
689
+ }
690
+ }
691
+ // Also add any nodes from edges not already present
692
+ for (const edge of edges) {
693
+ if (!graph.hasNode(edge.sourceHash))
694
+ graph.addNode(edge.sourceHash);
695
+ if (!graph.hasNode(edge.targetHash))
696
+ graph.addNode(edge.targetHash);
697
+ }
698
+ // Pre-compute directory segments for each node (avoids repeated path operations per-edge)
699
+ const dirSegCache = new Map();
700
+ const applyDecay = hashToFilePath &&
701
+ rootPath &&
702
+ directoryDecay !== undefined &&
703
+ directoryDecay > 0;
704
+ if (applyDecay) {
705
+ for (const [hash, fp] of hashToFilePath) {
706
+ dirSegCache.set(hash, path
707
+ .relative(rootPath, path.dirname(fp))
708
+ .split(path.sep)
709
+ .filter(Boolean));
710
+ }
711
+ }
712
+ // Merge parallel edges (same pair, different types) by summing weights
713
+ const mergedWeights = new Map();
714
+ for (const edge of edges) {
715
+ const a = edge.sourceHash < edge.targetHash ? edge.sourceHash : edge.targetHash;
716
+ const b = edge.sourceHash < edge.targetHash ? edge.targetHash : edge.sourceHash;
717
+ const key = `${a}|${b}`;
718
+ let weight = edge.weight;
719
+ if (applyDecay) {
720
+ const segsA = dirSegCache.get(a);
721
+ const segsB = dirSegCache.get(b);
722
+ if (segsA && segsB) {
723
+ let common = 0;
724
+ while (common < segsA.length &&
725
+ common < segsB.length &&
726
+ segsA[common] === segsB[common]) {
727
+ common++;
728
+ }
729
+ const dist = segsA.length - common + (segsB.length - common);
730
+ if (dist > 0) {
731
+ weight *= Math.exp(-directoryDecay * dist);
732
+ }
733
+ }
734
+ }
735
+ mergedWeights.set(key, (mergedWeights.get(key) || 0) + weight);
736
+ }
737
+ for (const [key, weight] of mergedWeights) {
738
+ const [a, b] = key.split("|");
739
+ graph.addEdge(a, b, { weight });
740
+ }
741
+ return graph;
742
+ }
743
+ /**
744
+ * Compute betweenness centrality for all nodes in the graph.
745
+ * High-centrality nodes are "bridge" functions that many shortest paths
746
+ * pass through — coupling points between communities.
747
+ */
748
+ function computeCentrality(graph) {
749
+ if (graph.order === 0) {
750
+ return { scores: new Map(), ranked: [] };
751
+ }
752
+ const raw = (0, betweenness_1.default)(graph, {
753
+ getEdgeWeight: "weight",
754
+ normalized: true,
755
+ });
756
+ const scores = new Map();
757
+ const ranked = [];
758
+ for (const [node, score] of Object.entries(raw)) {
759
+ scores.set(node, score);
760
+ ranked.push({ contentHash: node, score });
761
+ }
762
+ ranked.sort((a, b) => b.score - a.score);
763
+ return { scores, ranked };
764
+ }
765
+ // ============ COMMUNITY DETECTION ============
766
+ /**
767
+ * Rescue dissolved nodes by assigning each to the community with the strongest
768
+ * total edge weight. Mutates assignments in-place. Returns the number rescued.
769
+ *
770
+ * Nodes with zero connections to any non-dissolved community stay dissolved.
771
+ * Rescued nodes update the membership map so subsequent rescues see them as
772
+ * placed (enabling chained rescue where node B is rescued into a community
773
+ * that node A was just rescued into).
774
+ */
775
+ function rescueDissolvedNodes(assignments, edges, dissolvedMarker) {
776
+ const dissolvedHashes = [];
777
+ for (const a of assignments) {
778
+ if (a.communityId === dissolvedMarker) {
779
+ dissolvedHashes.push(a.contentHash);
780
+ }
781
+ }
782
+ if (dissolvedHashes.length === 0)
783
+ return 0;
784
+ // Build membership map (hash → community) for non-dissolved nodes
785
+ const hashToComm = new Map();
786
+ for (const a of assignments) {
787
+ if (a.communityId !== dissolvedMarker) {
788
+ hashToComm.set(a.contentHash, a.communityId);
789
+ }
790
+ }
791
+ if (hashToComm.size === 0)
792
+ return 0;
793
+ let rescued = 0;
794
+ for (const hash of dissolvedHashes) {
795
+ // Sum edge weights to each community
796
+ const commWeights = new Map();
797
+ for (const edge of edges) {
798
+ const neighbor = edge.sourceHash === hash
799
+ ? edge.targetHash
800
+ : edge.targetHash === hash
801
+ ? edge.sourceHash
802
+ : null;
803
+ if (!neighbor)
804
+ continue;
805
+ const comm = hashToComm.get(neighbor);
806
+ if (comm) {
807
+ commWeights.set(comm, (commWeights.get(comm) || 0) + edge.weight);
808
+ }
809
+ }
810
+ // Find the community with strongest total connection
811
+ let bestComm = "";
812
+ let bestWeight = 0;
813
+ for (const [comm, weight] of commWeights) {
814
+ if (weight > bestWeight) {
815
+ bestWeight = weight;
816
+ bestComm = comm;
817
+ }
818
+ }
819
+ if (bestComm) {
820
+ const assignment = assignments.find((a) => a.contentHash === hash);
821
+ if (assignment) {
822
+ assignment.communityId = bestComm;
823
+ hashToComm.set(hash, bestComm); // chained rescue
824
+ rescued++;
825
+ }
826
+ }
827
+ }
828
+ return rescued;
829
+ }
830
+ /**
831
+ * Run community detection on the given edges using the configured algorithm.
832
+ * Returns assignments ready for storage, plus stats about the detection run.
833
+ *
834
+ * Small communities (< minCommunitySize) are dissolved — their members are
835
+ * assigned to a special "__dissolved" community. Dissolved nodes are then
836
+ * rescued: each is assigned to the community it's most connected to by total
837
+ * edge weight. Truly isolated nodes (zero connections) stay dissolved.
838
+ */
839
+ function detectCommunities(edges, config, hashToFilePath, rootPath, allHashes) {
840
+ const graph = buildGraph(edges, hashToFilePath, rootPath, config.directoryDecay, allHashes);
841
+ if (graph.order === 0) {
842
+ return {
843
+ assignments: [],
844
+ communityCount: 0,
845
+ modularity: null,
846
+ nodesInGraph: 0,
847
+ edgesInGraph: 0,
848
+ dissolvedCount: 0,
849
+ };
850
+ }
851
+ // Auto-resolution: scale resolution with codebase size for better granularity
852
+ let resolution = config.resolution;
853
+ if (config.autoResolution !== false) {
854
+ const nodeCount = graph.order;
855
+ if (nodeCount < 50)
856
+ resolution = Math.max(resolution, 1.0);
857
+ else if (nodeCount < 200)
858
+ resolution = Math.max(resolution, 1.5);
859
+ else
860
+ resolution = Math.max(resolution, 2.0);
861
+ }
862
+ const detector = (0, community_detectors_1.getDetector)(config.algorithm);
863
+ const raw = detector.detect(graph, { resolution, weightAttribute: "weight" });
864
+ const communities = raw.communities;
865
+ const modularity = raw.modularity;
866
+ // Group nodes by community
867
+ const communityMembers = new Map();
868
+ for (const [node, communityId] of Object.entries(communities)) {
869
+ const members = communityMembers.get(communityId) || [];
870
+ members.push(node);
871
+ communityMembers.set(communityId, members);
872
+ }
873
+ // Active splitting: re-run Louvain at higher resolution on oversized communities
874
+ const splitMembers = new Map();
875
+ for (const [communityId, members] of communityMembers) {
876
+ if (members.length > config.maxCommunitySize &&
877
+ detector.supportsResolution) {
878
+ const memberSet = new Set(members);
879
+ const subEdges = edges.filter((e) => memberSet.has(e.sourceHash) && memberSet.has(e.targetHash));
880
+ if (subEdges.length > 0) {
881
+ const subResult = detectCommunities(subEdges, {
882
+ ...config,
883
+ resolution: resolution * 2,
884
+ autoResolution: false, // prevent recursive auto-scaling
885
+ });
886
+ // Only accept the split if it produced multiple communities without destroying too many
887
+ if (subResult.communityCount > 1 &&
888
+ subResult.dissolvedCount <= members.length * 0.5) {
889
+ for (const a of subResult.assignments) {
890
+ a.communityId = `${communityId}_${a.communityId}`;
891
+ }
892
+ // Rescue dissolved split fragments into nearest sub-community
893
+ const dissolvedMarker = `${communityId}___dissolved`;
894
+ rescueDissolvedNodes(subResult.assignments, subEdges, dissolvedMarker);
895
+ splitMembers.set(String(communityId), []);
896
+ for (const a of subResult.assignments) {
897
+ const list = splitMembers.get(a.communityId) || [];
898
+ list.push(a.contentHash);
899
+ splitMembers.set(a.communityId, list);
900
+ }
901
+ }
902
+ }
903
+ }
904
+ }
905
+ // Build final assignments
906
+ const assignments = [];
907
+ let dissolvedCount = 0;
908
+ for (const [communityId, members] of communityMembers) {
909
+ // If this community was split, use the split assignments instead
910
+ if (splitMembers.has(String(communityId)))
911
+ continue;
912
+ const isTooSmall = members.length < config.minCommunitySize;
913
+ const assignedId = isTooSmall ? "__dissolved" : String(communityId);
914
+ if (isTooSmall)
915
+ dissolvedCount += members.length;
916
+ for (const contentHash of members) {
917
+ assignments.push({
918
+ contentHash,
919
+ level: 0,
920
+ communityId: assignedId,
921
+ algorithm: config.algorithm,
922
+ });
923
+ }
924
+ }
925
+ // Add split community assignments
926
+ for (const [subCommunityId, members] of splitMembers) {
927
+ // Detect dissolved groups from recursive splits (prefixed: "66___dissolved")
928
+ const isDissolvedGroup = subCommunityId.endsWith("___dissolved");
929
+ if (isDissolvedGroup) {
930
+ dissolvedCount += members.length;
931
+ }
932
+ const isTooSmall = members.length < config.minCommunitySize;
933
+ const assignedId = isDissolvedGroup || isTooSmall ? "__dissolved" : subCommunityId;
934
+ if (isTooSmall && !isDissolvedGroup)
935
+ dissolvedCount += members.length;
936
+ for (const contentHash of members) {
937
+ assignments.push({
938
+ contentHash,
939
+ level: 0,
940
+ communityId: assignedId,
941
+ algorithm: config.algorithm,
942
+ });
943
+ }
944
+ }
945
+ // Rescue top-level dissolved nodes into nearest community
946
+ const topRescued = rescueDissolvedNodes(assignments, edges, "__dissolved");
947
+ dissolvedCount -= topRescued;
948
+ // Count actual (non-dissolved) communities
949
+ const realCommunities = new Set(assignments
950
+ .filter((a) => a.communityId !== "__dissolved")
951
+ .map((a) => a.communityId));
952
+ return {
953
+ assignments,
954
+ communityCount: realCommunities.size,
955
+ modularity,
956
+ nodesInGraph: graph.order,
957
+ edgesInGraph: graph.size,
958
+ dissolvedCount,
959
+ effectiveResolution: resolution,
960
+ };
961
+ }
962
+ // ============ STABLE COMMUNITY MATCHING (Gate 1) ============
963
+ /**
964
+ * Match new community assignments to old ones by Jaccard similarity.
965
+ * Remaps new community IDs to reuse old IDs where membership overlaps significantly.
966
+ * This prevents summary cache misses caused by Louvain's arbitrary ID renumbering.
967
+ *
968
+ * Algorithm: greedy 1:1 matching — sort all (new, old, Jaccard) triples by score
969
+ * descending, accept matches where J ≥ threshold, each ID used at most once.
970
+ */
971
+ function matchCommunities(oldAssignments, newAssignments, threshold = 0.5) {
972
+ if (oldAssignments.length === 0 || newAssignments.length === 0) {
973
+ return newAssignments;
974
+ }
975
+ // Group by communityId → set of content hashes (skip __dissolved)
976
+ const groupByComm = (assignments) => {
977
+ const map = new Map();
978
+ for (const a of assignments) {
979
+ if (a.communityId === "__dissolved")
980
+ continue;
981
+ const set = map.get(a.communityId) || new Set();
982
+ set.add(a.contentHash);
983
+ map.set(a.communityId, set);
984
+ }
985
+ return map;
986
+ };
987
+ const oldGroups = groupByComm(oldAssignments);
988
+ const newGroups = groupByComm(newAssignments);
989
+ if (oldGroups.size === 0 || newGroups.size === 0) {
990
+ return newAssignments;
991
+ }
992
+ // Compute all Jaccard similarities
993
+ const candidates = [];
994
+ for (const [newId, newMembers] of newGroups) {
995
+ for (const [oldId, oldMembers] of oldGroups) {
996
+ let intersection = 0;
997
+ for (const h of newMembers) {
998
+ if (oldMembers.has(h))
999
+ intersection++;
1000
+ }
1001
+ const union = newMembers.size + oldMembers.size - intersection;
1002
+ if (union === 0)
1003
+ continue;
1004
+ const jaccard = intersection / union;
1005
+ if (jaccard >= threshold) {
1006
+ candidates.push({ newId, oldId, jaccard });
1007
+ }
1008
+ }
1009
+ }
1010
+ // Greedy 1:1 matching: best scores first
1011
+ candidates.sort((a, b) => b.jaccard - a.jaccard);
1012
+ const usedOld = new Set();
1013
+ const usedNew = new Set();
1014
+ const remap = new Map(); // newId → oldId
1015
+ for (const { newId, oldId } of candidates) {
1016
+ if (usedNew.has(newId) || usedOld.has(oldId))
1017
+ continue;
1018
+ remap.set(newId, oldId);
1019
+ usedNew.add(newId);
1020
+ usedOld.add(oldId);
1021
+ }
1022
+ if (remap.size === 0) {
1023
+ return newAssignments;
1024
+ }
1025
+ // Apply remapping
1026
+ return newAssignments.map((a) => {
1027
+ const remappedId = remap.get(a.communityId);
1028
+ if (remappedId) {
1029
+ return { ...a, communityId: remappedId };
1030
+ }
1031
+ return a;
1032
+ });
1033
+ }
1034
+ /**
1035
+ * Label Propagation Algorithm (LPA) for community detection.
1036
+ * Delegates to LabelPropDetector — kept as a standalone export for backward compatibility.
1037
+ */
1038
+ function labelPropagation(graph, maxIterations = 100) {
1039
+ const detector = new community_detectors_1.LabelPropDetector(maxIterations);
1040
+ const result = detector.detect(graph, {
1041
+ resolution: 0,
1042
+ weightAttribute: "weight",
1043
+ });
1044
+ return result.communities;
1045
+ }
1046
+ // ============ ORCHESTRATION ============
1047
+ /**
1048
+ * Compute the set of content hashes that changed between old and new function sets.
1049
+ * Returns the symmetric difference: hashes that appeared or disappeared.
1050
+ */
1051
+ function computeChangedHashes(oldHashes, newHashes) {
1052
+ const changed = new Set();
1053
+ for (const h of newHashes) {
1054
+ if (!oldHashes.has(h))
1055
+ changed.add(h);
1056
+ }
1057
+ for (const h of oldHashes) {
1058
+ if (!newHashes.has(h))
1059
+ changed.add(h);
1060
+ }
1061
+ return changed;
1062
+ }
1063
+ /**
1064
+ * Find edge neighbors of a set of hashes — any hash connected to them via function_edges.
1065
+ */
1066
+ function findEdgeNeighbors(db, hashes) {
1067
+ const neighbors = new Set();
1068
+ const stmt = db.prepare("SELECT source_hash, target_hash FROM function_edges WHERE source_hash = ? OR target_hash = ?");
1069
+ for (const hash of hashes) {
1070
+ const rows = stmt.all(hash, hash);
1071
+ for (const row of rows) {
1072
+ neighbors.add(row.source_hash);
1073
+ neighbors.add(row.target_hash);
1074
+ }
1075
+ }
1076
+ return neighbors;
1077
+ }
1078
+ /**
1079
+ * Full graph analysis pipeline: resolve edges → build graph → detect communities → store.
1080
+ * Call after file scanning to update the relationship graph.
1081
+ *
1082
+ * When `oldHashes` is provided, uses incremental edge resolution: only recomputes edges
1083
+ * for changed functions + their neighbors, keeping the rest of the graph stable.
1084
+ */
1085
+ function analyzeGraph(db, functions, config, oldHashes, rootPath) {
1086
+ const graphConfig = config || loadGraphConfig(db);
1087
+ const newHashes = new Set(functions.map((f) => f.contentHash));
1088
+ // Build module resolver using marker file discovery for workspace + alias import resolution
1089
+ const resolver = rootPath
1090
+ ? buildModuleResolver(rootPath, functions)
1091
+ : undefined;
1092
+ // Determine incremental vs full rebuild
1093
+ const changedHashes = oldHashes
1094
+ ? computeChangedHashes(oldHashes, newHashes)
1095
+ : null;
1096
+ const useIncremental = changedHashes !== null &&
1097
+ changedHashes.size > 0 &&
1098
+ changedHashes.size / Math.max(newHashes.size, 1) <= 0.5;
1099
+ let edges;
1100
+ if (changedHashes !== null && changedHashes.size === 0) {
1101
+ // No-change fast path: skip edge resolution, load existing edges
1102
+ edges = loadEdges(db);
1103
+ }
1104
+ else if (useIncremental) {
1105
+ // Incremental mode: only resolve edges for changed functions + their neighbors
1106
+ const neighbors = findEdgeNeighbors(db, changedHashes);
1107
+ const affectedHashes = new Set([...changedHashes, ...neighbors]);
1108
+ // Filter to only hashes that still exist (exclude removed hashes)
1109
+ for (const h of affectedHashes) {
1110
+ if (!newHashes.has(h))
1111
+ affectedHashes.delete(h);
1112
+ }
1113
+ const newEdges = resolveEdges(db, functions, graphConfig, affectedHashes, resolver);
1114
+ storeEdgesIncremental(db, affectedHashes, newEdges);
1115
+ edges = loadEdges(db);
1116
+ }
1117
+ else {
1118
+ // Full rebuild (first run or >50% changed)
1119
+ edges = resolveEdges(db, functions, graphConfig, undefined, resolver);
1120
+ storeEdges(db, edges);
1121
+ }
1122
+ // Add transitive edges (derived from call graph, not stored in DB)
1123
+ const edgesWithTransitive = addTransitiveEdges(edges, graphConfig);
1124
+ // Build hash→filePath map for directory distance decay
1125
+ let hashToFilePath;
1126
+ if (rootPath && (graphConfig.directoryDecay ?? 0.1) > 0) {
1127
+ hashToFilePath = new Map();
1128
+ for (const fn of functions) {
1129
+ hashToFilePath.set(fn.contentHash, fn.filePath);
1130
+ }
1131
+ }
1132
+ // Detect communities from all edges including transitive
1133
+ const result = detectCommunities(edgesWithTransitive, graphConfig, hashToFilePath, rootPath, newHashes);
1134
+ // Stable community matching — remap new IDs to old IDs by Jaccard similarity
1135
+ const oldAssignments = loadCommunities(db, graphConfig.algorithm, 0);
1136
+ const remapped = matchCommunities(oldAssignments, result.assignments);
1137
+ result.assignments = remapped;
1138
+ // Store community assignments
1139
+ storeCommunities(db, result.assignments);
1140
+ // Compute and store community edges (using full edge set including transitive)
1141
+ const communityEdges = computeCommunityEdges(db, graphConfig.algorithm, edgesWithTransitive);
1142
+ storeCommunityEdges(db, communityEdges);
1143
+ // Save config used (for reproducibility)
1144
+ saveGraphConfig(db, graphConfig);
1145
+ return result;
1146
+ }
1147
+ /**
1148
+ * Detect hierarchical communities: first L0 (function-level), then L1 (community-level).
1149
+ *
1150
+ * After L0 detection + community edge computation, builds a meta-graph where:
1151
+ * - Nodes = L0 community IDs (excluding __dissolved)
1152
+ * - Edges = community_edges (weighted by aggregate function edge weight)
1153
+ *
1154
+ * Runs Louvain on meta-graph to produce L1 group assignments.
1155
+ * Stores L1 assignments in the communities table with level=1,
1156
+ * where content_hash = L0 community_id.
1157
+ *
1158
+ * When `oldHashes` is provided, enables incremental edge resolution in the L0 step.
1159
+ */
1160
+ function detectHierarchicalCommunities(db, functions, config, oldHashes, rootPath) {
1161
+ const graphConfig = config || loadGraphConfig(db);
1162
+ // Step 1: Run L0 detection (also computes + stores community edges)
1163
+ const l0 = analyzeGraph(db, functions, graphConfig, oldHashes, rootPath);
1164
+ // Step 2: Load community edges
1165
+ const communityEdges = loadCommunityEdges(db, graphConfig.algorithm);
1166
+ // Step 3: Need at least 3 real communities for meaningful grouping
1167
+ const realCommunities = new Set(l0.assignments
1168
+ .filter((a) => a.communityId !== "__dissolved")
1169
+ .map((a) => a.communityId));
1170
+ if (realCommunities.size < 3 || communityEdges.length < 2) {
1171
+ return { l0, communityEdges, l1Assignments: [], l1GroupCount: 0 };
1172
+ }
1173
+ // Step 4: Build meta-graph from community edges
1174
+ const metaEdges = communityEdges.map((ce) => ({
1175
+ sourceHash: ce.sourceCommunity,
1176
+ targetHash: ce.targetCommunity,
1177
+ edgeType: "call",
1178
+ weight: ce.weight,
1179
+ }));
1180
+ const metaGraph = buildGraph(metaEdges);
1181
+ if (metaGraph.order < 3) {
1182
+ return { l0, communityEdges, l1Assignments: [], l1GroupCount: 0 };
1183
+ }
1184
+ // Step 5: Run community detection on meta-graph
1185
+ const metaDetector = (0, community_detectors_1.getDetector)(graphConfig.algorithm);
1186
+ const metaRaw = metaDetector.detect(metaGraph, {
1187
+ resolution: graphConfig.resolution,
1188
+ weightAttribute: "weight",
1189
+ });
1190
+ const metaCommunities = metaRaw.communities;
1191
+ // Step 6: Build L1 assignments
1192
+ const l1Members = new Map();
1193
+ for (const [communityId, groupId] of Object.entries(metaCommunities)) {
1194
+ const members = l1Members.get(groupId) || [];
1195
+ members.push(communityId);
1196
+ l1Members.set(groupId, members);
1197
+ }
1198
+ // Dissolve groups with only 1 community (no useful grouping)
1199
+ const l1Assignments = [];
1200
+ let realGroupCount = 0;
1201
+ for (const [groupId, members] of l1Members) {
1202
+ const isTooSmall = members.length < 2;
1203
+ if (!isTooSmall)
1204
+ realGroupCount++;
1205
+ for (const communityId of members) {
1206
+ l1Assignments.push({
1207
+ contentHash: communityId,
1208
+ level: 1,
1209
+ communityId: isTooSmall ? "__dissolved" : String(groupId),
1210
+ algorithm: graphConfig.algorithm,
1211
+ });
1212
+ }
1213
+ }
1214
+ // Step 6.5: Stable matching for L1 assignments
1215
+ const oldL1Assignments = loadCommunities(db, graphConfig.algorithm, 1);
1216
+ const remappedL1 = matchCommunities(oldL1Assignments, l1Assignments);
1217
+ // Step 7: Store L1 assignments
1218
+ if (remappedL1.length > 0) {
1219
+ storeCommunities(db, remappedL1);
1220
+ }
1221
+ return {
1222
+ l0,
1223
+ communityEdges,
1224
+ l1Assignments: remappedL1,
1225
+ l1GroupCount: realGroupCount,
1226
+ };
1227
+ }
1228
+ exports.DEFAULT_COMPARISONS = [
1229
+ { label: "louvain@1.0", algorithm: "louvain", resolution: 1.0 },
1230
+ { label: "louvain@1.5", algorithm: "louvain", resolution: 1.5 },
1231
+ { label: "louvain@2.0", algorithm: "louvain", resolution: 2.0 },
1232
+ { label: "louvain@3.0", algorithm: "louvain", resolution: 3.0 },
1233
+ { label: "leiden@1.5", algorithm: "leiden", resolution: 1.5 },
1234
+ { label: "leiden@2.0", algorithm: "leiden", resolution: 2.0 },
1235
+ { label: "label-propagation", algorithm: "label-propagation", resolution: 0 },
1236
+ ];
1237
+ /**
1238
+ * Compute comparison metrics from a DetectionResult.
1239
+ * Pure function — no DB or side effects.
1240
+ */
1241
+ function computeComparisonMetrics(result, label, algorithm, resolution) {
1242
+ const totalNodes = result.nodesInGraph;
1243
+ // Collect community sizes (excluding dissolved)
1244
+ const commSizes = new Map();
1245
+ for (const a of result.assignments) {
1246
+ if (a.communityId === "__dissolved")
1247
+ continue;
1248
+ commSizes.set(a.communityId, (commSizes.get(a.communityId) || 0) + 1);
1249
+ }
1250
+ const sizes = [...commSizes.values()].sort((a, b) => a - b);
1251
+ const minSize = sizes.length > 0 ? sizes[0] : 0;
1252
+ const maxSize = sizes.length > 0 ? sizes[sizes.length - 1] : 0;
1253
+ const medianSize = sizes.length > 0
1254
+ ? sizes.length % 2 === 1
1255
+ ? sizes[Math.floor(sizes.length / 2)]
1256
+ : Math.round((sizes[Math.floor(sizes.length / 2) - 1] +
1257
+ sizes[Math.floor(sizes.length / 2)]) /
1258
+ 2)
1259
+ : 0;
1260
+ const dissolvedPct = totalNodes > 0 ? Math.round((result.dissolvedCount / totalNodes) * 100) : 0;
1261
+ const coverage = totalNodes > 0
1262
+ ? Math.round(((totalNodes - result.dissolvedCount) / totalNodes) * 100)
1263
+ : 0;
1264
+ return {
1265
+ label,
1266
+ algorithm,
1267
+ resolution,
1268
+ communityCount: result.communityCount,
1269
+ dissolvedCount: result.dissolvedCount,
1270
+ dissolvedPct,
1271
+ coverage,
1272
+ minSize,
1273
+ medianSize,
1274
+ maxSize,
1275
+ modularity: result.modularity,
1276
+ nodesInGraph: result.nodesInGraph,
1277
+ edgesInGraph: result.edgesInGraph,
1278
+ };
1279
+ }
1280
+ /**
1281
+ * Run community detection at multiple configurations on the same edges.
1282
+ * Does NOT write to DB — pure computation for comparison.
1283
+ */
1284
+ function runComparison(edges, baseConfig, configurations, hashToFilePath, rootPath, allHashes) {
1285
+ return configurations.map((cfg) => {
1286
+ const config = {
1287
+ ...baseConfig,
1288
+ algorithm: cfg.algorithm,
1289
+ resolution: cfg.resolution,
1290
+ autoResolution: false, // explicit resolution for fair comparison
1291
+ };
1292
+ const result = detectCommunities(edges, config, hashToFilePath, rootPath, allHashes);
1293
+ return computeComparisonMetrics(result, cfg.label, cfg.algorithm, cfg.resolution || null);
1294
+ });
1295
+ }