@ophan/core 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/community-detectors/index.d.ts +20 -0
- package/dist/community-detectors/index.d.ts.map +1 -0
- package/dist/community-detectors/index.js +45 -0
- package/dist/community-detectors/label-prop.d.ts +20 -0
- package/dist/community-detectors/label-prop.d.ts.map +1 -0
- package/dist/community-detectors/label-prop.js +77 -0
- package/dist/community-detectors/leiden.d.ts +22 -0
- package/dist/community-detectors/leiden.d.ts.map +1 -0
- package/dist/community-detectors/leiden.js +312 -0
- package/dist/community-detectors/louvain.d.ts +13 -0
- package/dist/community-detectors/louvain.d.ts.map +1 -0
- package/dist/community-detectors/louvain.js +29 -0
- package/dist/community-detectors/types.d.ts +36 -0
- package/dist/community-detectors/types.d.ts.map +1 -0
- package/dist/{parsers/__fixtures__/no-functions.js → community-detectors/types.js} +0 -2
- package/dist/edge-resolvers/call.d.ts +13 -0
- package/dist/edge-resolvers/call.d.ts.map +1 -0
- package/dist/edge-resolvers/call.js +40 -0
- package/dist/edge-resolvers/co-location.d.ts +16 -0
- package/dist/edge-resolvers/co-location.d.ts.map +1 -0
- package/dist/edge-resolvers/co-location.js +129 -0
- package/dist/edge-resolvers/import.d.ts +16 -0
- package/dist/edge-resolvers/import.d.ts.map +1 -0
- package/dist/edge-resolvers/import.js +118 -0
- package/dist/edge-resolvers/index.d.ts +9 -0
- package/dist/edge-resolvers/index.d.ts.map +1 -0
- package/dist/edge-resolvers/index.js +29 -0
- package/dist/edge-resolvers/jsx-ref.d.ts +13 -0
- package/dist/edge-resolvers/jsx-ref.d.ts.map +1 -0
- package/dist/edge-resolvers/jsx-ref.js +40 -0
- package/dist/edge-resolvers/types.d.ts +40 -0
- package/dist/edge-resolvers/types.d.ts.map +1 -0
- package/dist/edge-resolvers/types.js +2 -0
- package/dist/graph.d.ts +293 -0
- package/dist/graph.d.ts.map +1 -0
- package/dist/graph.js +1295 -0
- package/dist/index.d.ts +37 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +385 -183
- package/dist/migrations.d.ts +25 -0
- package/dist/migrations.d.ts.map +1 -0
- package/dist/migrations.js +323 -0
- package/dist/module-resolvers/index.d.ts +11 -0
- package/dist/module-resolvers/index.d.ts.map +1 -0
- package/dist/module-resolvers/index.js +67 -0
- package/dist/module-resolvers/javascript.d.ts +18 -0
- package/dist/module-resolvers/javascript.d.ts.map +1 -0
- package/dist/module-resolvers/javascript.js +130 -0
- package/dist/module-resolvers/types.d.ts +18 -0
- package/dist/module-resolvers/types.d.ts.map +1 -0
- package/dist/module-resolvers/types.js +2 -0
- package/dist/parsers/python.d.ts.map +1 -1
- package/dist/parsers/python.js +38 -4
- package/dist/parsers/typescript.d.ts.map +1 -1
- package/dist/parsers/typescript.js +133 -0
- package/dist/practices.d.ts +28 -0
- package/dist/practices.d.ts.map +1 -0
- package/dist/practices.js +95 -0
- package/dist/schemas.d.ts +251 -3
- package/dist/schemas.d.ts.map +1 -1
- package/dist/schemas.js +121 -6
- package/dist/shared.d.ts +8 -0
- package/dist/shared.d.ts.map +1 -1
- package/dist/summarize.d.ts +165 -0
- package/dist/summarize.d.ts.map +1 -0
- package/dist/summarize.js +1067 -0
- package/ophan_logo.png +0 -0
- package/package.json +9 -2
- package/dist/parsers/__fixtures__/arrow-functions.d.ts +0 -5
- package/dist/parsers/__fixtures__/arrow-functions.d.ts.map +0 -1
- package/dist/parsers/__fixtures__/arrow-functions.js +0 -16
- package/dist/parsers/__fixtures__/class-methods.d.ts +0 -6
- package/dist/parsers/__fixtures__/class-methods.d.ts.map +0 -1
- package/dist/parsers/__fixtures__/class-methods.js +0 -12
- package/dist/parsers/__fixtures__/no-functions.d.ts +0 -9
- package/dist/parsers/__fixtures__/no-functions.d.ts.map +0 -1
package/dist/graph.js
ADDED
|
@@ -0,0 +1,1295 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// Graph analysis module — builds a function relationship graph, runs community detection,
|
|
3
|
+
// and produces hierarchical documentation structure.
|
|
4
|
+
//
|
|
5
|
+
// Architecture:
|
|
6
|
+
// 1. Parsers extract call sites, imports, exports → FunctionInfo with relationship fields
|
|
7
|
+
// 2. This module resolves names to content hashes → function_edges table
|
|
8
|
+
// 3. graphology builds in-memory graph from edges
|
|
9
|
+
// 4. Community detection (configurable algorithm) → communities table
|
|
10
|
+
// 5. (Phase 2) Hierarchical summarization → community_summaries table
|
|
11
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
12
|
+
if (k2 === undefined) k2 = k;
|
|
13
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
14
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
15
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
16
|
+
}
|
|
17
|
+
Object.defineProperty(o, k2, desc);
|
|
18
|
+
}) : (function(o, m, k, k2) {
|
|
19
|
+
if (k2 === undefined) k2 = k;
|
|
20
|
+
o[k2] = m[k];
|
|
21
|
+
}));
|
|
22
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
23
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
24
|
+
}) : function(o, v) {
|
|
25
|
+
o["default"] = v;
|
|
26
|
+
});
|
|
27
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
28
|
+
var ownKeys = function(o) {
|
|
29
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
30
|
+
var ar = [];
|
|
31
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
32
|
+
return ar;
|
|
33
|
+
};
|
|
34
|
+
return ownKeys(o);
|
|
35
|
+
};
|
|
36
|
+
return function (mod) {
|
|
37
|
+
if (mod && mod.__esModule) return mod;
|
|
38
|
+
var result = {};
|
|
39
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
40
|
+
__setModuleDefault(result, mod);
|
|
41
|
+
return result;
|
|
42
|
+
};
|
|
43
|
+
})();
|
|
44
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
45
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
46
|
+
};
|
|
47
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
48
|
+
exports.DEFAULT_COMPARISONS = exports.DEFAULT_GRAPH_CONFIG = exports.DEFAULT_EDGE_WEIGHTS = void 0;
|
|
49
|
+
exports.computeDirectoryDistance = computeDirectoryDistance;
|
|
50
|
+
exports.computePackage = computePackage;
|
|
51
|
+
exports.buildModuleResolver = buildModuleResolver;
|
|
52
|
+
exports.loadGraphConfig = loadGraphConfig;
|
|
53
|
+
exports.saveGraphConfig = saveGraphConfig;
|
|
54
|
+
exports.buildEdgeResolverContext = buildEdgeResolverContext;
|
|
55
|
+
exports.resolveEdges = resolveEdges;
|
|
56
|
+
exports.addTransitiveEdges = addTransitiveEdges;
|
|
57
|
+
exports.storeEdges = storeEdges;
|
|
58
|
+
exports.storeEdgesIncremental = storeEdgesIncremental;
|
|
59
|
+
exports.loadEdges = loadEdges;
|
|
60
|
+
exports.storeCommunities = storeCommunities;
|
|
61
|
+
exports.loadCommunities = loadCommunities;
|
|
62
|
+
exports.computeCommunityEdges = computeCommunityEdges;
|
|
63
|
+
exports.storeCommunityEdges = storeCommunityEdges;
|
|
64
|
+
exports.loadCommunityEdges = loadCommunityEdges;
|
|
65
|
+
exports.buildGraph = buildGraph;
|
|
66
|
+
exports.computeCentrality = computeCentrality;
|
|
67
|
+
exports.rescueDissolvedNodes = rescueDissolvedNodes;
|
|
68
|
+
exports.detectCommunities = detectCommunities;
|
|
69
|
+
exports.matchCommunities = matchCommunities;
|
|
70
|
+
exports.labelPropagation = labelPropagation;
|
|
71
|
+
exports.computeChangedHashes = computeChangedHashes;
|
|
72
|
+
exports.analyzeGraph = analyzeGraph;
|
|
73
|
+
exports.detectHierarchicalCommunities = detectHierarchicalCommunities;
|
|
74
|
+
exports.computeComparisonMetrics = computeComparisonMetrics;
|
|
75
|
+
exports.runComparison = runComparison;
|
|
76
|
+
const path = __importStar(require("path"));
|
|
77
|
+
const fs = __importStar(require("fs"));
|
|
78
|
+
const graphology_1 = __importDefault(require("graphology"));
|
|
79
|
+
const betweenness_1 = __importDefault(require("graphology-metrics/centrality/betweenness"));
|
|
80
|
+
const community_detectors_1 = require("./community-detectors");
|
|
81
|
+
const edge_resolvers_1 = require("./edge-resolvers");
|
|
82
|
+
/**
|
|
83
|
+
* Default edge weights for graph construction.
|
|
84
|
+
*
|
|
85
|
+
* These control how strongly each relationship type pulls functions into the
|
|
86
|
+
* same community. All weights are relative — what matters is the ratio between
|
|
87
|
+
* them, not the absolute values. Weights from different edge types between the
|
|
88
|
+
* same function pair are summed in the merged graph.
|
|
89
|
+
*
|
|
90
|
+
* Edit these to tune community detection. After changing, re-run:
|
|
91
|
+
* pnpm dev graph --compare --path .
|
|
92
|
+
* to see the effect across all algorithms.
|
|
93
|
+
*/
|
|
94
|
+
exports.DEFAULT_EDGE_WEIGHTS = {
|
|
95
|
+
/**
|
|
96
|
+
* Direct function calls: `fnA()` calls `fnB()`.
|
|
97
|
+
* Strongest signal — actual runtime dependency.
|
|
98
|
+
* ↑ increase: call relationships dominate clustering
|
|
99
|
+
* ↓ decrease: proximity and imports become relatively more important
|
|
100
|
+
*/
|
|
101
|
+
call: 0.5,
|
|
102
|
+
/**
|
|
103
|
+
* Type references: function A uses a type defined near function B.
|
|
104
|
+
* NOT YET EXTRACTED by parsers — no type_ref edges are currently produced.
|
|
105
|
+
* Reserved for future use.
|
|
106
|
+
*/
|
|
107
|
+
type_ref: 0.5,
|
|
108
|
+
/**
|
|
109
|
+
* Import statements: `import { validate } from "./auth"` creates an edge.
|
|
110
|
+
* Includes barrel file fallback (workspace package re-exports).
|
|
111
|
+
* ↑ increase: module boundaries become stronger clustering signal
|
|
112
|
+
* ↓ decrease: imports matter less relative to calls and proximity
|
|
113
|
+
*/
|
|
114
|
+
import: 0.2,
|
|
115
|
+
/**
|
|
116
|
+
* File/directory proximity. Two sub-types:
|
|
117
|
+
* Same-file: all pairs connected, weight decays as 1/offset (adjacent=full, distance N=1/N)
|
|
118
|
+
* Same-directory (cross-file): half the same-file weight (hardcoded 0.5x multiplier)
|
|
119
|
+
*
|
|
120
|
+
* This is the main knob for "respect file boundaries". Higher values keep
|
|
121
|
+
* same-file functions together even when they have cross-file call edges.
|
|
122
|
+
*
|
|
123
|
+
* ↑ increase: files/directories cluster together more strongly
|
|
124
|
+
* ↓ decrease: only call/import relationships drive clustering
|
|
125
|
+
* 0: disable proximity entirely (pure topology)
|
|
126
|
+
*/
|
|
127
|
+
co_location: 0.7,
|
|
128
|
+
/**
|
|
129
|
+
* JSX component references: `<Button />`, `<Card />`.
|
|
130
|
+
* Separated from calls because UI composition is a weaker domain signal
|
|
131
|
+
* than functional dependency. Without this separation, functions using
|
|
132
|
+
* shared UI components get pulled into the UI component's community.
|
|
133
|
+
*
|
|
134
|
+
* ↑ increase: components rendering the same UI primitives cluster together
|
|
135
|
+
* ↓ decrease: JSX composition becomes noise
|
|
136
|
+
*/
|
|
137
|
+
jsx_ref: 0.2,
|
|
138
|
+
/**
|
|
139
|
+
* 2-hop call chains: A→B→C creates A↔C edge.
|
|
140
|
+
* Derived in-memory (not stored in DB). Stacks with direct edges.
|
|
141
|
+
* ↑ increase: transitive relationships pull functions together more
|
|
142
|
+
* ↓ decrease: only direct connections drive clustering
|
|
143
|
+
*/
|
|
144
|
+
transitive_call: 0.4,
|
|
145
|
+
/**
|
|
146
|
+
* Shared caller: if A calls both B and C, creates B↔C edge.
|
|
147
|
+
* Derived in-memory (not stored in DB). Callers with >15 callees are skipped.
|
|
148
|
+
* Weakest signal — correlation (same caller) is suggestive but indirect.
|
|
149
|
+
* ↑ increase: functions used by the same caller cluster together more
|
|
150
|
+
* ↓ decrease: only direct relationships matter
|
|
151
|
+
*/
|
|
152
|
+
co_caller: 0.4,
|
|
153
|
+
};
|
|
154
|
+
exports.DEFAULT_GRAPH_CONFIG = {
|
|
155
|
+
algorithm: "leiden",
|
|
156
|
+
edgeWeights: { ...exports.DEFAULT_EDGE_WEIGHTS },
|
|
157
|
+
resolution: 3.0,
|
|
158
|
+
minCommunitySize: 3,
|
|
159
|
+
maxCommunitySize: 18,
|
|
160
|
+
autoResolution: true,
|
|
161
|
+
directoryDecay: 0.7,
|
|
162
|
+
};
|
|
163
|
+
/**
|
|
164
|
+
* Compute directory-level distance between two files relative to rootPath.
|
|
165
|
+
* Distance = number of unique directory segments when traversing from one file's
|
|
166
|
+
* directory to the other via their common ancestor.
|
|
167
|
+
*
|
|
168
|
+
* Examples (relative to root):
|
|
169
|
+
* src/components/Button.tsx vs src/components/Input.tsx → 0 (same dir)
|
|
170
|
+
* src/components/Button.tsx vs src/utils/format.ts → 2 (sibling dirs)
|
|
171
|
+
* packages/core/src/graph.ts vs packages/webapp/src/App.tsx → 4
|
|
172
|
+
*/
|
|
173
|
+
function computeDirectoryDistance(filePathA, filePathB, rootPath) {
|
|
174
|
+
const segsA = path
|
|
175
|
+
.relative(rootPath, path.dirname(filePathA))
|
|
176
|
+
.split(path.sep)
|
|
177
|
+
.filter(Boolean);
|
|
178
|
+
const segsB = path
|
|
179
|
+
.relative(rootPath, path.dirname(filePathB))
|
|
180
|
+
.split(path.sep)
|
|
181
|
+
.filter(Boolean);
|
|
182
|
+
let common = 0;
|
|
183
|
+
while (common < segsA.length &&
|
|
184
|
+
common < segsB.length &&
|
|
185
|
+
segsA[common] === segsB[common]) {
|
|
186
|
+
common++;
|
|
187
|
+
}
|
|
188
|
+
return segsA.length - common + (segsB.length - common);
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Extract a human-readable package label for a file.
|
|
192
|
+
* Uses the ModuleResolver to find the containing workspace package if available.
|
|
193
|
+
* Falls back to the first directory segment relative to root.
|
|
194
|
+
*/
|
|
195
|
+
function computePackage(filePath, rootPath, resolver) {
|
|
196
|
+
if (resolver) {
|
|
197
|
+
let bestMatch = "";
|
|
198
|
+
let bestName = "root";
|
|
199
|
+
for (const [name, pkg] of resolver.workspacePackages) {
|
|
200
|
+
if (filePath.startsWith(pkg.dir + path.sep) &&
|
|
201
|
+
pkg.dir.length > bestMatch.length) {
|
|
202
|
+
bestMatch = pkg.dir;
|
|
203
|
+
bestName = name;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
if (bestMatch)
|
|
207
|
+
return bestName;
|
|
208
|
+
}
|
|
209
|
+
const rel = path.relative(rootPath, filePath).replace(/\\/g, "/");
|
|
210
|
+
const parts = rel.split("/");
|
|
211
|
+
return parts.length > 1 ? parts[0] : "root";
|
|
212
|
+
}
|
|
213
|
+
/** Marker files that indicate a package/project root directory */
|
|
214
|
+
const PACKAGE_MARKERS = [
|
|
215
|
+
"package.json",
|
|
216
|
+
"pyproject.toml",
|
|
217
|
+
"go.mod",
|
|
218
|
+
"Cargo.toml",
|
|
219
|
+
"setup.py",
|
|
220
|
+
"pom.xml",
|
|
221
|
+
];
|
|
222
|
+
/**
|
|
223
|
+
* Discover package directories by walking UP from analyzed file directories
|
|
224
|
+
* and looking for marker files (package.json, pyproject.toml, go.mod, etc.).
|
|
225
|
+
*
|
|
226
|
+
* Technology-agnostic: works on any codebase regardless of directory naming.
|
|
227
|
+
* The checked set ensures each directory is only inspected once.
|
|
228
|
+
*/
|
|
229
|
+
function discoverPackageDirs(rootPath, functions) {
|
|
230
|
+
const fileDirs = new Set(functions.map((f) => path.dirname(f.filePath)));
|
|
231
|
+
const packageDirs = new Set();
|
|
232
|
+
const checked = new Set();
|
|
233
|
+
for (const dir of fileDirs) {
|
|
234
|
+
let current = dir;
|
|
235
|
+
while (current.startsWith(rootPath) && current !== path.dirname(current)) {
|
|
236
|
+
if (checked.has(current))
|
|
237
|
+
break;
|
|
238
|
+
checked.add(current);
|
|
239
|
+
for (const marker of PACKAGE_MARKERS) {
|
|
240
|
+
try {
|
|
241
|
+
fs.statSync(path.join(current, marker));
|
|
242
|
+
packageDirs.add(current);
|
|
243
|
+
break;
|
|
244
|
+
}
|
|
245
|
+
catch {
|
|
246
|
+
/* no marker here */
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
current = path.dirname(current);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
// Always include root if it has a marker file
|
|
253
|
+
if (!checked.has(rootPath)) {
|
|
254
|
+
for (const marker of PACKAGE_MARKERS) {
|
|
255
|
+
try {
|
|
256
|
+
fs.statSync(path.join(rootPath, marker));
|
|
257
|
+
packageDirs.add(rootPath);
|
|
258
|
+
break;
|
|
259
|
+
}
|
|
260
|
+
catch {
|
|
261
|
+
/* no marker */
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
return [...packageDirs];
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Build a ModuleResolver by discovering packages via marker files.
|
|
269
|
+
* Walks up from analyzed file directories to find package.json, pyproject.toml, etc.
|
|
270
|
+
* Reads package.json (for name + exports) and tsconfig.json (for path aliases).
|
|
271
|
+
* Runs once per graph command invocation — not per-edge.
|
|
272
|
+
*/
|
|
273
|
+
function buildModuleResolver(rootPath, functions) {
|
|
274
|
+
const workspacePackages = new Map();
|
|
275
|
+
const pathAliases = new Map();
|
|
276
|
+
const packageDirs = functions
|
|
277
|
+
? discoverPackageDirs(rootPath, functions)
|
|
278
|
+
: [rootPath]; // fallback: just root
|
|
279
|
+
for (const dir of packageDirs) {
|
|
280
|
+
// Read package.json for name + exports
|
|
281
|
+
try {
|
|
282
|
+
const pkgJson = JSON.parse(fs.readFileSync(path.join(dir, "package.json"), "utf-8"));
|
|
283
|
+
const name = pkgJson.name;
|
|
284
|
+
if (name) {
|
|
285
|
+
const exports = {};
|
|
286
|
+
if (pkgJson.exports && typeof pkgJson.exports === "object") {
|
|
287
|
+
for (const [key, value] of Object.entries(pkgJson.exports)) {
|
|
288
|
+
if (typeof value === "string")
|
|
289
|
+
exports[key] = value;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
if (!exports["."] && pkgJson.main) {
|
|
293
|
+
exports["."] = pkgJson.main;
|
|
294
|
+
}
|
|
295
|
+
workspacePackages.set(name, { dir, exports });
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
catch {
|
|
299
|
+
/* skip dirs without valid package.json */
|
|
300
|
+
}
|
|
301
|
+
// Read tsconfig.json for paths
|
|
302
|
+
try {
|
|
303
|
+
const tsconfig = JSON.parse(fs.readFileSync(path.join(dir, "tsconfig.json"), "utf-8"));
|
|
304
|
+
const compilerOptions = tsconfig.compilerOptions;
|
|
305
|
+
if (compilerOptions?.paths) {
|
|
306
|
+
const baseUrl = compilerOptions.baseUrl
|
|
307
|
+
? path.resolve(dir, compilerOptions.baseUrl)
|
|
308
|
+
: dir;
|
|
309
|
+
pathAliases.set(dir, { baseUrl, paths: compilerOptions.paths });
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
catch {
|
|
313
|
+
/* skip dirs without tsconfig or paths */
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return { workspacePackages, pathAliases };
|
|
317
|
+
}
|
|
318
|
+
// ============ CONFIG PERSISTENCE ============
|
|
319
|
+
function loadGraphConfig(db) {
|
|
320
|
+
const row = db
|
|
321
|
+
.prepare("SELECT value FROM graph_config WHERE key = 'config'")
|
|
322
|
+
.get();
|
|
323
|
+
if (!row)
|
|
324
|
+
return { ...exports.DEFAULT_GRAPH_CONFIG };
|
|
325
|
+
try {
|
|
326
|
+
const stored = JSON.parse(row.value);
|
|
327
|
+
return {
|
|
328
|
+
...exports.DEFAULT_GRAPH_CONFIG,
|
|
329
|
+
...stored,
|
|
330
|
+
edgeWeights: {
|
|
331
|
+
...exports.DEFAULT_GRAPH_CONFIG.edgeWeights,
|
|
332
|
+
...(stored.edgeWeights || {}),
|
|
333
|
+
},
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
catch {
|
|
337
|
+
return { ...exports.DEFAULT_GRAPH_CONFIG };
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
function saveGraphConfig(db, config) {
|
|
341
|
+
db.prepare("INSERT OR REPLACE INTO graph_config (key, value) VALUES ('config', ?)").run(JSON.stringify(config));
|
|
342
|
+
}
|
|
343
|
+
// ============ EDGE RESOLUTION ============
|
|
344
|
+
/**
|
|
345
|
+
* Given extracted functions with call-site data, resolve name-based calls to
|
|
346
|
+
* content hash edges. Uses the file_functions index for name→hash lookup.
|
|
347
|
+
*
|
|
348
|
+
* Returns edges ready to insert into function_edges table.
|
|
349
|
+
*/
|
|
350
|
+
/**
|
|
351
|
+
* Build shared lookup context for edge resolvers.
|
|
352
|
+
* Queries file_functions once and constructs all lookup maps that resolvers need.
|
|
353
|
+
* Called once per resolveEdges() invocation.
|
|
354
|
+
*/
|
|
355
|
+
function buildEdgeResolverContext(db, functions, config, affectedHashes, resolver) {
|
|
356
|
+
// Query all file_functions rows once
|
|
357
|
+
const allDbRows = db
|
|
358
|
+
.prepare("SELECT file_path, function_name, content_hash FROM file_functions")
|
|
359
|
+
.all();
|
|
360
|
+
// Build name→hash lookup
|
|
361
|
+
const nameToHashes = new Map();
|
|
362
|
+
for (const row of allDbRows) {
|
|
363
|
+
const existing = nameToHashes.get(row.function_name) || new Set();
|
|
364
|
+
existing.add(row.content_hash);
|
|
365
|
+
nameToHashes.set(row.function_name, existing);
|
|
366
|
+
}
|
|
367
|
+
// Build file→hashes for import target resolution
|
|
368
|
+
const fileToHashes = new Map();
|
|
369
|
+
for (const row of allDbRows) {
|
|
370
|
+
const existing = fileToHashes.get(row.file_path) || new Set();
|
|
371
|
+
existing.add(row.content_hash);
|
|
372
|
+
fileToHashes.set(row.file_path, existing);
|
|
373
|
+
}
|
|
374
|
+
// Build (file::name) → hash for targeted import edges
|
|
375
|
+
const fileNameToHash = new Map();
|
|
376
|
+
for (const row of allDbRows) {
|
|
377
|
+
fileNameToHash.set(`${row.file_path}::${row.function_name}`, row.content_hash);
|
|
378
|
+
}
|
|
379
|
+
// Build package-scoped name→hashes for fallback import resolution.
|
|
380
|
+
// When imports resolve through barrel files (index.ts with re-exports),
|
|
381
|
+
// the imported name isn't defined in the barrel — it's in another file.
|
|
382
|
+
// This map lets us find the function by name within the package scope.
|
|
383
|
+
const pkgNameToHashes = new Map();
|
|
384
|
+
if (resolver) {
|
|
385
|
+
for (const [pkgName, pkg] of resolver.workspacePackages) {
|
|
386
|
+
const nameMap = new Map();
|
|
387
|
+
for (const row of allDbRows) {
|
|
388
|
+
if (row.file_path.startsWith(pkg.dir + path.sep)) {
|
|
389
|
+
const existing = nameMap.get(row.function_name) || new Set();
|
|
390
|
+
existing.add(row.content_hash);
|
|
391
|
+
nameMap.set(row.function_name, existing);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
if (nameMap.size > 0) {
|
|
395
|
+
pkgNameToHashes.set(pkgName, nameMap);
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
// Build file → sorted functions map for proximity-based co-location edges
|
|
400
|
+
const fileToSortedFns = new Map();
|
|
401
|
+
for (const fn of functions) {
|
|
402
|
+
const list = fileToSortedFns.get(fn.filePath) || [];
|
|
403
|
+
list.push(fn);
|
|
404
|
+
fileToSortedFns.set(fn.filePath, list);
|
|
405
|
+
}
|
|
406
|
+
for (const fns of fileToSortedFns.values()) {
|
|
407
|
+
fns.sort((a, b) => a.startLine - b.startLine);
|
|
408
|
+
}
|
|
409
|
+
return {
|
|
410
|
+
nameToHashes,
|
|
411
|
+
fileToHashes,
|
|
412
|
+
fileNameToHash,
|
|
413
|
+
pkgNameToHashes,
|
|
414
|
+
fileToSortedFns,
|
|
415
|
+
functions,
|
|
416
|
+
config,
|
|
417
|
+
affectedHashes,
|
|
418
|
+
resolver,
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
function resolveEdges(db, functions, config, affectedHashes, resolver) {
|
|
422
|
+
const ctx = buildEdgeResolverContext(db, functions, config, affectedHashes, resolver);
|
|
423
|
+
const edges = [];
|
|
424
|
+
for (const edgeResolver of (0, edge_resolvers_1.getEdgeResolvers)()) {
|
|
425
|
+
edges.push(...edgeResolver.resolve(ctx));
|
|
426
|
+
}
|
|
427
|
+
return edges;
|
|
428
|
+
}
|
|
429
|
+
// ============ TRANSITIVE EDGES ============
|
|
430
|
+
/** Maximum callees per caller for co-caller edge generation.
|
|
431
|
+
* Hub functions with many callees would create O(n^2) edges — cap to prevent explosion. */
|
|
432
|
+
const CO_CALLER_CAP = 15;
|
|
433
|
+
/**
|
|
434
|
+
* Derive transitive edges from the call graph to increase connectivity.
|
|
435
|
+
* Pure function — no DB access.
|
|
436
|
+
*
|
|
437
|
+
* Two derived edge types:
|
|
438
|
+
* 1. **transitive_call** (A→B→C creates A↔C): 2-hop call chain connectivity.
|
|
439
|
+
* 2. **co_caller** (A→B, A→C creates B↔C): Functions called by the same caller.
|
|
440
|
+
*
|
|
441
|
+
* Edges are NOT deduped against direct edges — being related in multiple ways
|
|
442
|
+
* (e.g., direct call + transitive chain) intentionally increases connection strength.
|
|
443
|
+
* buildGraph() merges parallel edges by summing weights.
|
|
444
|
+
*/
|
|
445
|
+
function addTransitiveEdges(edges, config) {
|
|
446
|
+
// Build directed call adjacency: caller → Set<callee>
|
|
447
|
+
// Only 'call' edges preserve direction (sourceHash=caller, targetHash=callee)
|
|
448
|
+
const callsTo = new Map();
|
|
449
|
+
for (const edge of edges) {
|
|
450
|
+
if (edge.edgeType === "call") {
|
|
451
|
+
let targets = callsTo.get(edge.sourceHash);
|
|
452
|
+
if (!targets) {
|
|
453
|
+
targets = new Set();
|
|
454
|
+
callsTo.set(edge.sourceHash, targets);
|
|
455
|
+
}
|
|
456
|
+
targets.add(edge.targetHash);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
if (callsTo.size === 0)
|
|
460
|
+
return edges;
|
|
461
|
+
const newEdges = [];
|
|
462
|
+
// 1. Transitive call: A→B→C creates A↔C
|
|
463
|
+
const transitiveAdded = new Set();
|
|
464
|
+
for (const [caller, callees] of callsTo) {
|
|
465
|
+
for (const callee of callees) {
|
|
466
|
+
const indirectCallees = callsTo.get(callee);
|
|
467
|
+
if (!indirectCallees)
|
|
468
|
+
continue;
|
|
469
|
+
for (const indirect of indirectCallees) {
|
|
470
|
+
if (indirect === caller)
|
|
471
|
+
continue; // skip self-loops
|
|
472
|
+
const a = caller < indirect ? caller : indirect;
|
|
473
|
+
const b = caller < indirect ? indirect : caller;
|
|
474
|
+
const key = `${a}|${b}`;
|
|
475
|
+
if (!transitiveAdded.has(key)) {
|
|
476
|
+
transitiveAdded.add(key);
|
|
477
|
+
newEdges.push({
|
|
478
|
+
sourceHash: a,
|
|
479
|
+
targetHash: b,
|
|
480
|
+
edgeType: "transitive_call",
|
|
481
|
+
weight: config.edgeWeights.transitive_call,
|
|
482
|
+
});
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
// 2. Co-caller: A→B, A→C creates B↔C (capped to prevent hub explosion)
|
|
488
|
+
const coCallerAdded = new Set();
|
|
489
|
+
for (const [, callees] of callsTo) {
|
|
490
|
+
if (callees.size < 2 || callees.size > CO_CALLER_CAP)
|
|
491
|
+
continue;
|
|
492
|
+
const calleeList = [...callees];
|
|
493
|
+
for (let i = 0; i < calleeList.length; i++) {
|
|
494
|
+
for (let j = i + 1; j < calleeList.length; j++) {
|
|
495
|
+
const a = calleeList[i] < calleeList[j] ? calleeList[i] : calleeList[j];
|
|
496
|
+
const b = calleeList[i] < calleeList[j] ? calleeList[j] : calleeList[i];
|
|
497
|
+
const key = `${a}|${b}`;
|
|
498
|
+
if (!coCallerAdded.has(key)) {
|
|
499
|
+
coCallerAdded.add(key);
|
|
500
|
+
newEdges.push({
|
|
501
|
+
sourceHash: a,
|
|
502
|
+
targetHash: b,
|
|
503
|
+
edgeType: "co_caller",
|
|
504
|
+
weight: config.edgeWeights.co_caller,
|
|
505
|
+
});
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
return [...edges, ...newEdges];
|
|
511
|
+
}
|
|
512
|
+
/**
|
|
513
|
+
* Store resolved edges in the function_edges table.
|
|
514
|
+
* Clears existing edges first (they're rebuilt from source each scan).
|
|
515
|
+
*/
|
|
516
|
+
function storeEdges(db, edges) {
|
|
517
|
+
db.exec("DELETE FROM function_edges");
|
|
518
|
+
const insert = db.prepare("INSERT OR REPLACE INTO function_edges (source_hash, target_hash, edge_type, weight) VALUES (?, ?, ?, ?)");
|
|
519
|
+
const tx = db.transaction(() => {
|
|
520
|
+
for (const edge of edges) {
|
|
521
|
+
insert.run(edge.sourceHash, edge.targetHash, edge.edgeType, edge.weight);
|
|
522
|
+
}
|
|
523
|
+
});
|
|
524
|
+
tx();
|
|
525
|
+
}
|
|
526
|
+
/**
|
|
527
|
+
* Incremental edge storage: delete edges for affected hashes, then insert new edges.
|
|
528
|
+
* Used when only a subset of functions changed — keeps unaffected edges in place.
|
|
529
|
+
*/
|
|
530
|
+
function storeEdgesIncremental(db, affectedHashes, newEdges) {
|
|
531
|
+
const hashes = [...affectedHashes];
|
|
532
|
+
// Delete in chunks to stay within SQLite variable limits
|
|
533
|
+
const deleteStmt = db.prepare("DELETE FROM function_edges WHERE source_hash = ? OR target_hash = ?");
|
|
534
|
+
const insertStmt = db.prepare("INSERT OR REPLACE INTO function_edges (source_hash, target_hash, edge_type, weight) VALUES (?, ?, ?, ?)");
|
|
535
|
+
const tx = db.transaction(() => {
|
|
536
|
+
for (const hash of hashes) {
|
|
537
|
+
deleteStmt.run(hash, hash);
|
|
538
|
+
}
|
|
539
|
+
for (const edge of newEdges) {
|
|
540
|
+
insertStmt.run(edge.sourceHash, edge.targetHash, edge.edgeType, edge.weight);
|
|
541
|
+
}
|
|
542
|
+
});
|
|
543
|
+
tx();
|
|
544
|
+
}
|
|
545
|
+
/**
|
|
546
|
+
* Load all edges from the database.
|
|
547
|
+
*/
|
|
548
|
+
function loadEdges(db) {
|
|
549
|
+
const rows = db
|
|
550
|
+
.prepare("SELECT source_hash, target_hash, edge_type, weight FROM function_edges")
|
|
551
|
+
.all();
|
|
552
|
+
return rows.map((r) => ({
|
|
553
|
+
sourceHash: r.source_hash,
|
|
554
|
+
targetHash: r.target_hash,
|
|
555
|
+
edgeType: r.edge_type,
|
|
556
|
+
weight: r.weight,
|
|
557
|
+
}));
|
|
558
|
+
}
|
|
559
|
+
/**
|
|
560
|
+
* Store community assignments in the communities table.
|
|
561
|
+
*/
|
|
562
|
+
function storeCommunities(db, assignments) {
|
|
563
|
+
// Clear existing assignments for this algorithm + level
|
|
564
|
+
if (assignments.length === 0)
|
|
565
|
+
return;
|
|
566
|
+
const algorithm = assignments[0].algorithm;
|
|
567
|
+
const level = assignments[0].level;
|
|
568
|
+
db.prepare("DELETE FROM communities WHERE algorithm = ? AND level = ?").run(algorithm, level);
|
|
569
|
+
const insert = db.prepare("INSERT INTO communities (content_hash, level, community_id, algorithm) VALUES (?, ?, ?, ?)");
|
|
570
|
+
const tx = db.transaction(() => {
|
|
571
|
+
for (const a of assignments) {
|
|
572
|
+
insert.run(a.contentHash, a.level, a.communityId, a.algorithm);
|
|
573
|
+
}
|
|
574
|
+
});
|
|
575
|
+
tx();
|
|
576
|
+
}
|
|
577
|
+
/**
|
|
578
|
+
* Load community assignments from the database.
|
|
579
|
+
*/
|
|
580
|
+
function loadCommunities(db, algorithm, level) {
|
|
581
|
+
const rows = db
|
|
582
|
+
.prepare("SELECT content_hash, level, community_id, algorithm FROM communities WHERE algorithm = ? AND level = ?")
|
|
583
|
+
.all(algorithm, level);
|
|
584
|
+
return rows.map((r) => ({
|
|
585
|
+
contentHash: r.content_hash,
|
|
586
|
+
level: r.level,
|
|
587
|
+
communityId: r.community_id,
|
|
588
|
+
algorithm: r.algorithm,
|
|
589
|
+
}));
|
|
590
|
+
}
|
|
591
|
+
// ============ COMMUNITY EDGES ============
|
|
592
|
+
/**
|
|
593
|
+
* Aggregate function-level edges into community-level edges.
|
|
594
|
+
* For each pair of distinct L0 communities, sums the weights of all
|
|
595
|
+
* function edges that cross between them.
|
|
596
|
+
*/
|
|
597
|
+
function computeCommunityEdges(db, algorithm, edges) {
|
|
598
|
+
// Load L0 community assignments (excluding dissolved)
|
|
599
|
+
const assignments = db
|
|
600
|
+
.prepare("SELECT content_hash, community_id FROM communities WHERE level = 0 AND algorithm = ? AND community_id != '__dissolved'")
|
|
601
|
+
.all(algorithm);
|
|
602
|
+
const hashToCommunity = new Map();
|
|
603
|
+
for (const a of assignments) {
|
|
604
|
+
hashToCommunity.set(a.content_hash, a.community_id);
|
|
605
|
+
}
|
|
606
|
+
// Use provided edges (includes transitive) or load from DB
|
|
607
|
+
const fnEdges = edges
|
|
608
|
+
? edges.map((e) => ({
|
|
609
|
+
source_hash: e.sourceHash,
|
|
610
|
+
target_hash: e.targetHash,
|
|
611
|
+
weight: e.weight,
|
|
612
|
+
}))
|
|
613
|
+
: db
|
|
614
|
+
.prepare("SELECT source_hash, target_hash, weight FROM function_edges")
|
|
615
|
+
.all();
|
|
616
|
+
// Accumulate cross-community edges
|
|
617
|
+
const edgeMap = new Map();
|
|
618
|
+
for (const e of fnEdges) {
|
|
619
|
+
const srcComm = hashToCommunity.get(e.source_hash);
|
|
620
|
+
const tgtComm = hashToCommunity.get(e.target_hash);
|
|
621
|
+
if (!srcComm || !tgtComm || srcComm === tgtComm)
|
|
622
|
+
continue;
|
|
623
|
+
// Canonical ordering
|
|
624
|
+
const a = srcComm < tgtComm ? srcComm : tgtComm;
|
|
625
|
+
const b = srcComm < tgtComm ? tgtComm : srcComm;
|
|
626
|
+
const key = `${a}|${b}`;
|
|
627
|
+
const existing = edgeMap.get(key) || { weight: 0, count: 0 };
|
|
628
|
+
existing.weight += e.weight;
|
|
629
|
+
existing.count += 1;
|
|
630
|
+
edgeMap.set(key, existing);
|
|
631
|
+
}
|
|
632
|
+
const result = [];
|
|
633
|
+
for (const [key, data] of edgeMap) {
|
|
634
|
+
const [a, b] = key.split("|");
|
|
635
|
+
result.push({
|
|
636
|
+
sourceCommunity: a,
|
|
637
|
+
targetCommunity: b,
|
|
638
|
+
algorithm,
|
|
639
|
+
weight: data.weight,
|
|
640
|
+
edgeCount: data.count,
|
|
641
|
+
});
|
|
642
|
+
}
|
|
643
|
+
return result;
|
|
644
|
+
}
|
|
645
|
+
/**
|
|
646
|
+
* Store community edges (full replace per algorithm).
|
|
647
|
+
*/
|
|
648
|
+
function storeCommunityEdges(db, edges) {
|
|
649
|
+
if (edges.length === 0) {
|
|
650
|
+
db.exec("DELETE FROM community_edges");
|
|
651
|
+
return;
|
|
652
|
+
}
|
|
653
|
+
const algorithm = edges[0].algorithm;
|
|
654
|
+
db.prepare("DELETE FROM community_edges WHERE algorithm = ?").run(algorithm);
|
|
655
|
+
const insert = db.prepare("INSERT INTO community_edges (source_community, target_community, algorithm, weight, edge_count) VALUES (?, ?, ?, ?, ?)");
|
|
656
|
+
const tx = db.transaction(() => {
|
|
657
|
+
for (const edge of edges) {
|
|
658
|
+
insert.run(edge.sourceCommunity, edge.targetCommunity, edge.algorithm, edge.weight, edge.edgeCount);
|
|
659
|
+
}
|
|
660
|
+
});
|
|
661
|
+
tx();
|
|
662
|
+
}
|
|
663
|
+
/**
|
|
664
|
+
* Load community edges from the database.
|
|
665
|
+
*/
|
|
666
|
+
function loadCommunityEdges(db, algorithm) {
|
|
667
|
+
const rows = db
|
|
668
|
+
.prepare("SELECT source_community, target_community, algorithm, weight, edge_count FROM community_edges WHERE algorithm = ?")
|
|
669
|
+
.all(algorithm);
|
|
670
|
+
return rows.map((r) => ({
|
|
671
|
+
sourceCommunity: r.source_community,
|
|
672
|
+
targetCommunity: r.target_community,
|
|
673
|
+
algorithm: r.algorithm,
|
|
674
|
+
weight: r.weight,
|
|
675
|
+
edgeCount: r.edge_count,
|
|
676
|
+
}));
|
|
677
|
+
}
|
|
678
|
+
/**
|
|
679
|
+
* Build a graphology graph from function edges.
|
|
680
|
+
* Creates an undirected weighted graph — multiple edges between the same
|
|
681
|
+
* pair (different edge types) are merged by summing weights.
|
|
682
|
+
*/
|
|
683
|
+
function buildGraph(edges, hashToFilePath, rootPath, directoryDecay, allHashes) {
|
|
684
|
+
const graph = new graphology_1.default({ type: "undirected" });
|
|
685
|
+
// Add ALL function hashes as nodes (every extracted function is a node)
|
|
686
|
+
if (allHashes) {
|
|
687
|
+
for (const hash of allHashes) {
|
|
688
|
+
graph.addNode(hash);
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
// Also add any nodes from edges not already present
|
|
692
|
+
for (const edge of edges) {
|
|
693
|
+
if (!graph.hasNode(edge.sourceHash))
|
|
694
|
+
graph.addNode(edge.sourceHash);
|
|
695
|
+
if (!graph.hasNode(edge.targetHash))
|
|
696
|
+
graph.addNode(edge.targetHash);
|
|
697
|
+
}
|
|
698
|
+
// Pre-compute directory segments for each node (avoids repeated path operations per-edge)
|
|
699
|
+
const dirSegCache = new Map();
|
|
700
|
+
const applyDecay = hashToFilePath &&
|
|
701
|
+
rootPath &&
|
|
702
|
+
directoryDecay !== undefined &&
|
|
703
|
+
directoryDecay > 0;
|
|
704
|
+
if (applyDecay) {
|
|
705
|
+
for (const [hash, fp] of hashToFilePath) {
|
|
706
|
+
dirSegCache.set(hash, path
|
|
707
|
+
.relative(rootPath, path.dirname(fp))
|
|
708
|
+
.split(path.sep)
|
|
709
|
+
.filter(Boolean));
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
// Merge parallel edges (same pair, different types) by summing weights
|
|
713
|
+
const mergedWeights = new Map();
|
|
714
|
+
for (const edge of edges) {
|
|
715
|
+
const a = edge.sourceHash < edge.targetHash ? edge.sourceHash : edge.targetHash;
|
|
716
|
+
const b = edge.sourceHash < edge.targetHash ? edge.targetHash : edge.sourceHash;
|
|
717
|
+
const key = `${a}|${b}`;
|
|
718
|
+
let weight = edge.weight;
|
|
719
|
+
if (applyDecay) {
|
|
720
|
+
const segsA = dirSegCache.get(a);
|
|
721
|
+
const segsB = dirSegCache.get(b);
|
|
722
|
+
if (segsA && segsB) {
|
|
723
|
+
let common = 0;
|
|
724
|
+
while (common < segsA.length &&
|
|
725
|
+
common < segsB.length &&
|
|
726
|
+
segsA[common] === segsB[common]) {
|
|
727
|
+
common++;
|
|
728
|
+
}
|
|
729
|
+
const dist = segsA.length - common + (segsB.length - common);
|
|
730
|
+
if (dist > 0) {
|
|
731
|
+
weight *= Math.exp(-directoryDecay * dist);
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
mergedWeights.set(key, (mergedWeights.get(key) || 0) + weight);
|
|
736
|
+
}
|
|
737
|
+
for (const [key, weight] of mergedWeights) {
|
|
738
|
+
const [a, b] = key.split("|");
|
|
739
|
+
graph.addEdge(a, b, { weight });
|
|
740
|
+
}
|
|
741
|
+
return graph;
|
|
742
|
+
}
|
|
743
|
+
/**
|
|
744
|
+
* Compute betweenness centrality for all nodes in the graph.
|
|
745
|
+
* High-centrality nodes are "bridge" functions that many shortest paths
|
|
746
|
+
* pass through — coupling points between communities.
|
|
747
|
+
*/
|
|
748
|
+
function computeCentrality(graph) {
|
|
749
|
+
if (graph.order === 0) {
|
|
750
|
+
return { scores: new Map(), ranked: [] };
|
|
751
|
+
}
|
|
752
|
+
const raw = (0, betweenness_1.default)(graph, {
|
|
753
|
+
getEdgeWeight: "weight",
|
|
754
|
+
normalized: true,
|
|
755
|
+
});
|
|
756
|
+
const scores = new Map();
|
|
757
|
+
const ranked = [];
|
|
758
|
+
for (const [node, score] of Object.entries(raw)) {
|
|
759
|
+
scores.set(node, score);
|
|
760
|
+
ranked.push({ contentHash: node, score });
|
|
761
|
+
}
|
|
762
|
+
ranked.sort((a, b) => b.score - a.score);
|
|
763
|
+
return { scores, ranked };
|
|
764
|
+
}
|
|
765
|
+
// ============ COMMUNITY DETECTION ============
|
|
766
|
+
/**
|
|
767
|
+
* Rescue dissolved nodes by assigning each to the community with the strongest
|
|
768
|
+
* total edge weight. Mutates assignments in-place. Returns the number rescued.
|
|
769
|
+
*
|
|
770
|
+
* Nodes with zero connections to any non-dissolved community stay dissolved.
|
|
771
|
+
* Rescued nodes update the membership map so subsequent rescues see them as
|
|
772
|
+
* placed (enabling chained rescue where node B is rescued into a community
|
|
773
|
+
* that node A was just rescued into).
|
|
774
|
+
*/
|
|
775
|
+
function rescueDissolvedNodes(assignments, edges, dissolvedMarker) {
|
|
776
|
+
const dissolvedHashes = [];
|
|
777
|
+
for (const a of assignments) {
|
|
778
|
+
if (a.communityId === dissolvedMarker) {
|
|
779
|
+
dissolvedHashes.push(a.contentHash);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
if (dissolvedHashes.length === 0)
|
|
783
|
+
return 0;
|
|
784
|
+
// Build membership map (hash → community) for non-dissolved nodes
|
|
785
|
+
const hashToComm = new Map();
|
|
786
|
+
for (const a of assignments) {
|
|
787
|
+
if (a.communityId !== dissolvedMarker) {
|
|
788
|
+
hashToComm.set(a.contentHash, a.communityId);
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
if (hashToComm.size === 0)
|
|
792
|
+
return 0;
|
|
793
|
+
let rescued = 0;
|
|
794
|
+
for (const hash of dissolvedHashes) {
|
|
795
|
+
// Sum edge weights to each community
|
|
796
|
+
const commWeights = new Map();
|
|
797
|
+
for (const edge of edges) {
|
|
798
|
+
const neighbor = edge.sourceHash === hash
|
|
799
|
+
? edge.targetHash
|
|
800
|
+
: edge.targetHash === hash
|
|
801
|
+
? edge.sourceHash
|
|
802
|
+
: null;
|
|
803
|
+
if (!neighbor)
|
|
804
|
+
continue;
|
|
805
|
+
const comm = hashToComm.get(neighbor);
|
|
806
|
+
if (comm) {
|
|
807
|
+
commWeights.set(comm, (commWeights.get(comm) || 0) + edge.weight);
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
// Find the community with strongest total connection
|
|
811
|
+
let bestComm = "";
|
|
812
|
+
let bestWeight = 0;
|
|
813
|
+
for (const [comm, weight] of commWeights) {
|
|
814
|
+
if (weight > bestWeight) {
|
|
815
|
+
bestWeight = weight;
|
|
816
|
+
bestComm = comm;
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
if (bestComm) {
|
|
820
|
+
const assignment = assignments.find((a) => a.contentHash === hash);
|
|
821
|
+
if (assignment) {
|
|
822
|
+
assignment.communityId = bestComm;
|
|
823
|
+
hashToComm.set(hash, bestComm); // chained rescue
|
|
824
|
+
rescued++;
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
return rescued;
|
|
829
|
+
}
|
|
830
|
+
/**
|
|
831
|
+
* Run community detection on the given edges using the configured algorithm.
|
|
832
|
+
* Returns assignments ready for storage, plus stats about the detection run.
|
|
833
|
+
*
|
|
834
|
+
* Small communities (< minCommunitySize) are dissolved — their members are
|
|
835
|
+
* assigned to a special "__dissolved" community. Dissolved nodes are then
|
|
836
|
+
* rescued: each is assigned to the community it's most connected to by total
|
|
837
|
+
* edge weight. Truly isolated nodes (zero connections) stay dissolved.
|
|
838
|
+
*/
|
|
839
|
+
function detectCommunities(edges, config, hashToFilePath, rootPath, allHashes) {
|
|
840
|
+
const graph = buildGraph(edges, hashToFilePath, rootPath, config.directoryDecay, allHashes);
|
|
841
|
+
if (graph.order === 0) {
|
|
842
|
+
return {
|
|
843
|
+
assignments: [],
|
|
844
|
+
communityCount: 0,
|
|
845
|
+
modularity: null,
|
|
846
|
+
nodesInGraph: 0,
|
|
847
|
+
edgesInGraph: 0,
|
|
848
|
+
dissolvedCount: 0,
|
|
849
|
+
};
|
|
850
|
+
}
|
|
851
|
+
// Auto-resolution: scale resolution with codebase size for better granularity
|
|
852
|
+
let resolution = config.resolution;
|
|
853
|
+
if (config.autoResolution !== false) {
|
|
854
|
+
const nodeCount = graph.order;
|
|
855
|
+
if (nodeCount < 50)
|
|
856
|
+
resolution = Math.max(resolution, 1.0);
|
|
857
|
+
else if (nodeCount < 200)
|
|
858
|
+
resolution = Math.max(resolution, 1.5);
|
|
859
|
+
else
|
|
860
|
+
resolution = Math.max(resolution, 2.0);
|
|
861
|
+
}
|
|
862
|
+
const detector = (0, community_detectors_1.getDetector)(config.algorithm);
|
|
863
|
+
const raw = detector.detect(graph, { resolution, weightAttribute: "weight" });
|
|
864
|
+
const communities = raw.communities;
|
|
865
|
+
const modularity = raw.modularity;
|
|
866
|
+
// Group nodes by community
|
|
867
|
+
const communityMembers = new Map();
|
|
868
|
+
for (const [node, communityId] of Object.entries(communities)) {
|
|
869
|
+
const members = communityMembers.get(communityId) || [];
|
|
870
|
+
members.push(node);
|
|
871
|
+
communityMembers.set(communityId, members);
|
|
872
|
+
}
|
|
873
|
+
// Active splitting: re-run Louvain at higher resolution on oversized communities
|
|
874
|
+
const splitMembers = new Map();
|
|
875
|
+
for (const [communityId, members] of communityMembers) {
|
|
876
|
+
if (members.length > config.maxCommunitySize &&
|
|
877
|
+
detector.supportsResolution) {
|
|
878
|
+
const memberSet = new Set(members);
|
|
879
|
+
const subEdges = edges.filter((e) => memberSet.has(e.sourceHash) && memberSet.has(e.targetHash));
|
|
880
|
+
if (subEdges.length > 0) {
|
|
881
|
+
const subResult = detectCommunities(subEdges, {
|
|
882
|
+
...config,
|
|
883
|
+
resolution: resolution * 2,
|
|
884
|
+
autoResolution: false, // prevent recursive auto-scaling
|
|
885
|
+
});
|
|
886
|
+
// Only accept the split if it produced multiple communities without destroying too many
|
|
887
|
+
if (subResult.communityCount > 1 &&
|
|
888
|
+
subResult.dissolvedCount <= members.length * 0.5) {
|
|
889
|
+
for (const a of subResult.assignments) {
|
|
890
|
+
a.communityId = `${communityId}_${a.communityId}`;
|
|
891
|
+
}
|
|
892
|
+
// Rescue dissolved split fragments into nearest sub-community
|
|
893
|
+
const dissolvedMarker = `${communityId}___dissolved`;
|
|
894
|
+
rescueDissolvedNodes(subResult.assignments, subEdges, dissolvedMarker);
|
|
895
|
+
splitMembers.set(String(communityId), []);
|
|
896
|
+
for (const a of subResult.assignments) {
|
|
897
|
+
const list = splitMembers.get(a.communityId) || [];
|
|
898
|
+
list.push(a.contentHash);
|
|
899
|
+
splitMembers.set(a.communityId, list);
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
// Build final assignments
|
|
906
|
+
const assignments = [];
|
|
907
|
+
let dissolvedCount = 0;
|
|
908
|
+
for (const [communityId, members] of communityMembers) {
|
|
909
|
+
// If this community was split, use the split assignments instead
|
|
910
|
+
if (splitMembers.has(String(communityId)))
|
|
911
|
+
continue;
|
|
912
|
+
const isTooSmall = members.length < config.minCommunitySize;
|
|
913
|
+
const assignedId = isTooSmall ? "__dissolved" : String(communityId);
|
|
914
|
+
if (isTooSmall)
|
|
915
|
+
dissolvedCount += members.length;
|
|
916
|
+
for (const contentHash of members) {
|
|
917
|
+
assignments.push({
|
|
918
|
+
contentHash,
|
|
919
|
+
level: 0,
|
|
920
|
+
communityId: assignedId,
|
|
921
|
+
algorithm: config.algorithm,
|
|
922
|
+
});
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
// Add split community assignments
|
|
926
|
+
for (const [subCommunityId, members] of splitMembers) {
|
|
927
|
+
// Detect dissolved groups from recursive splits (prefixed: "66___dissolved")
|
|
928
|
+
const isDissolvedGroup = subCommunityId.endsWith("___dissolved");
|
|
929
|
+
if (isDissolvedGroup) {
|
|
930
|
+
dissolvedCount += members.length;
|
|
931
|
+
}
|
|
932
|
+
const isTooSmall = members.length < config.minCommunitySize;
|
|
933
|
+
const assignedId = isDissolvedGroup || isTooSmall ? "__dissolved" : subCommunityId;
|
|
934
|
+
if (isTooSmall && !isDissolvedGroup)
|
|
935
|
+
dissolvedCount += members.length;
|
|
936
|
+
for (const contentHash of members) {
|
|
937
|
+
assignments.push({
|
|
938
|
+
contentHash,
|
|
939
|
+
level: 0,
|
|
940
|
+
communityId: assignedId,
|
|
941
|
+
algorithm: config.algorithm,
|
|
942
|
+
});
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
// Rescue top-level dissolved nodes into nearest community
|
|
946
|
+
const topRescued = rescueDissolvedNodes(assignments, edges, "__dissolved");
|
|
947
|
+
dissolvedCount -= topRescued;
|
|
948
|
+
// Count actual (non-dissolved) communities
|
|
949
|
+
const realCommunities = new Set(assignments
|
|
950
|
+
.filter((a) => a.communityId !== "__dissolved")
|
|
951
|
+
.map((a) => a.communityId));
|
|
952
|
+
return {
|
|
953
|
+
assignments,
|
|
954
|
+
communityCount: realCommunities.size,
|
|
955
|
+
modularity,
|
|
956
|
+
nodesInGraph: graph.order,
|
|
957
|
+
edgesInGraph: graph.size,
|
|
958
|
+
dissolvedCount,
|
|
959
|
+
effectiveResolution: resolution,
|
|
960
|
+
};
|
|
961
|
+
}
|
|
962
|
+
// ============ STABLE COMMUNITY MATCHING (Gate 1) ============
|
|
963
|
+
/**
|
|
964
|
+
* Match new community assignments to old ones by Jaccard similarity.
|
|
965
|
+
* Remaps new community IDs to reuse old IDs where membership overlaps significantly.
|
|
966
|
+
* This prevents summary cache misses caused by Louvain's arbitrary ID renumbering.
|
|
967
|
+
*
|
|
968
|
+
* Algorithm: greedy 1:1 matching — sort all (new, old, Jaccard) triples by score
|
|
969
|
+
* descending, accept matches where J ≥ threshold, each ID used at most once.
|
|
970
|
+
*/
|
|
971
|
+
function matchCommunities(oldAssignments, newAssignments, threshold = 0.5) {
|
|
972
|
+
if (oldAssignments.length === 0 || newAssignments.length === 0) {
|
|
973
|
+
return newAssignments;
|
|
974
|
+
}
|
|
975
|
+
// Group by communityId → set of content hashes (skip __dissolved)
|
|
976
|
+
const groupByComm = (assignments) => {
|
|
977
|
+
const map = new Map();
|
|
978
|
+
for (const a of assignments) {
|
|
979
|
+
if (a.communityId === "__dissolved")
|
|
980
|
+
continue;
|
|
981
|
+
const set = map.get(a.communityId) || new Set();
|
|
982
|
+
set.add(a.contentHash);
|
|
983
|
+
map.set(a.communityId, set);
|
|
984
|
+
}
|
|
985
|
+
return map;
|
|
986
|
+
};
|
|
987
|
+
const oldGroups = groupByComm(oldAssignments);
|
|
988
|
+
const newGroups = groupByComm(newAssignments);
|
|
989
|
+
if (oldGroups.size === 0 || newGroups.size === 0) {
|
|
990
|
+
return newAssignments;
|
|
991
|
+
}
|
|
992
|
+
// Compute all Jaccard similarities
|
|
993
|
+
const candidates = [];
|
|
994
|
+
for (const [newId, newMembers] of newGroups) {
|
|
995
|
+
for (const [oldId, oldMembers] of oldGroups) {
|
|
996
|
+
let intersection = 0;
|
|
997
|
+
for (const h of newMembers) {
|
|
998
|
+
if (oldMembers.has(h))
|
|
999
|
+
intersection++;
|
|
1000
|
+
}
|
|
1001
|
+
const union = newMembers.size + oldMembers.size - intersection;
|
|
1002
|
+
if (union === 0)
|
|
1003
|
+
continue;
|
|
1004
|
+
const jaccard = intersection / union;
|
|
1005
|
+
if (jaccard >= threshold) {
|
|
1006
|
+
candidates.push({ newId, oldId, jaccard });
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
}
|
|
1010
|
+
// Greedy 1:1 matching: best scores first
|
|
1011
|
+
candidates.sort((a, b) => b.jaccard - a.jaccard);
|
|
1012
|
+
const usedOld = new Set();
|
|
1013
|
+
const usedNew = new Set();
|
|
1014
|
+
const remap = new Map(); // newId → oldId
|
|
1015
|
+
for (const { newId, oldId } of candidates) {
|
|
1016
|
+
if (usedNew.has(newId) || usedOld.has(oldId))
|
|
1017
|
+
continue;
|
|
1018
|
+
remap.set(newId, oldId);
|
|
1019
|
+
usedNew.add(newId);
|
|
1020
|
+
usedOld.add(oldId);
|
|
1021
|
+
}
|
|
1022
|
+
if (remap.size === 0) {
|
|
1023
|
+
return newAssignments;
|
|
1024
|
+
}
|
|
1025
|
+
// Apply remapping
|
|
1026
|
+
return newAssignments.map((a) => {
|
|
1027
|
+
const remappedId = remap.get(a.communityId);
|
|
1028
|
+
if (remappedId) {
|
|
1029
|
+
return { ...a, communityId: remappedId };
|
|
1030
|
+
}
|
|
1031
|
+
return a;
|
|
1032
|
+
});
|
|
1033
|
+
}
|
|
1034
|
+
/**
|
|
1035
|
+
* Label Propagation Algorithm (LPA) for community detection.
|
|
1036
|
+
* Delegates to LabelPropDetector — kept as a standalone export for backward compatibility.
|
|
1037
|
+
*/
|
|
1038
|
+
function labelPropagation(graph, maxIterations = 100) {
|
|
1039
|
+
const detector = new community_detectors_1.LabelPropDetector(maxIterations);
|
|
1040
|
+
const result = detector.detect(graph, {
|
|
1041
|
+
resolution: 0,
|
|
1042
|
+
weightAttribute: "weight",
|
|
1043
|
+
});
|
|
1044
|
+
return result.communities;
|
|
1045
|
+
}
|
|
1046
|
+
// ============ ORCHESTRATION ============
|
|
1047
|
+
/**
|
|
1048
|
+
* Compute the set of content hashes that changed between old and new function sets.
|
|
1049
|
+
* Returns the symmetric difference: hashes that appeared or disappeared.
|
|
1050
|
+
*/
|
|
1051
|
+
function computeChangedHashes(oldHashes, newHashes) {
|
|
1052
|
+
const changed = new Set();
|
|
1053
|
+
for (const h of newHashes) {
|
|
1054
|
+
if (!oldHashes.has(h))
|
|
1055
|
+
changed.add(h);
|
|
1056
|
+
}
|
|
1057
|
+
for (const h of oldHashes) {
|
|
1058
|
+
if (!newHashes.has(h))
|
|
1059
|
+
changed.add(h);
|
|
1060
|
+
}
|
|
1061
|
+
return changed;
|
|
1062
|
+
}
|
|
1063
|
+
/**
|
|
1064
|
+
* Find edge neighbors of a set of hashes — any hash connected to them via function_edges.
|
|
1065
|
+
*/
|
|
1066
|
+
function findEdgeNeighbors(db, hashes) {
|
|
1067
|
+
const neighbors = new Set();
|
|
1068
|
+
const stmt = db.prepare("SELECT source_hash, target_hash FROM function_edges WHERE source_hash = ? OR target_hash = ?");
|
|
1069
|
+
for (const hash of hashes) {
|
|
1070
|
+
const rows = stmt.all(hash, hash);
|
|
1071
|
+
for (const row of rows) {
|
|
1072
|
+
neighbors.add(row.source_hash);
|
|
1073
|
+
neighbors.add(row.target_hash);
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
return neighbors;
|
|
1077
|
+
}
|
|
1078
|
+
/**
|
|
1079
|
+
* Full graph analysis pipeline: resolve edges → build graph → detect communities → store.
|
|
1080
|
+
* Call after file scanning to update the relationship graph.
|
|
1081
|
+
*
|
|
1082
|
+
* When `oldHashes` is provided, uses incremental edge resolution: only recomputes edges
|
|
1083
|
+
* for changed functions + their neighbors, keeping the rest of the graph stable.
|
|
1084
|
+
*/
|
|
1085
|
+
function analyzeGraph(db, functions, config, oldHashes, rootPath) {
|
|
1086
|
+
const graphConfig = config || loadGraphConfig(db);
|
|
1087
|
+
const newHashes = new Set(functions.map((f) => f.contentHash));
|
|
1088
|
+
// Build module resolver using marker file discovery for workspace + alias import resolution
|
|
1089
|
+
const resolver = rootPath
|
|
1090
|
+
? buildModuleResolver(rootPath, functions)
|
|
1091
|
+
: undefined;
|
|
1092
|
+
// Determine incremental vs full rebuild
|
|
1093
|
+
const changedHashes = oldHashes
|
|
1094
|
+
? computeChangedHashes(oldHashes, newHashes)
|
|
1095
|
+
: null;
|
|
1096
|
+
const useIncremental = changedHashes !== null &&
|
|
1097
|
+
changedHashes.size > 0 &&
|
|
1098
|
+
changedHashes.size / Math.max(newHashes.size, 1) <= 0.5;
|
|
1099
|
+
let edges;
|
|
1100
|
+
if (changedHashes !== null && changedHashes.size === 0) {
|
|
1101
|
+
// No-change fast path: skip edge resolution, load existing edges
|
|
1102
|
+
edges = loadEdges(db);
|
|
1103
|
+
}
|
|
1104
|
+
else if (useIncremental) {
|
|
1105
|
+
// Incremental mode: only resolve edges for changed functions + their neighbors
|
|
1106
|
+
const neighbors = findEdgeNeighbors(db, changedHashes);
|
|
1107
|
+
const affectedHashes = new Set([...changedHashes, ...neighbors]);
|
|
1108
|
+
// Filter to only hashes that still exist (exclude removed hashes)
|
|
1109
|
+
for (const h of affectedHashes) {
|
|
1110
|
+
if (!newHashes.has(h))
|
|
1111
|
+
affectedHashes.delete(h);
|
|
1112
|
+
}
|
|
1113
|
+
const newEdges = resolveEdges(db, functions, graphConfig, affectedHashes, resolver);
|
|
1114
|
+
storeEdgesIncremental(db, affectedHashes, newEdges);
|
|
1115
|
+
edges = loadEdges(db);
|
|
1116
|
+
}
|
|
1117
|
+
else {
|
|
1118
|
+
// Full rebuild (first run or >50% changed)
|
|
1119
|
+
edges = resolveEdges(db, functions, graphConfig, undefined, resolver);
|
|
1120
|
+
storeEdges(db, edges);
|
|
1121
|
+
}
|
|
1122
|
+
// Add transitive edges (derived from call graph, not stored in DB)
|
|
1123
|
+
const edgesWithTransitive = addTransitiveEdges(edges, graphConfig);
|
|
1124
|
+
// Build hash→filePath map for directory distance decay
|
|
1125
|
+
let hashToFilePath;
|
|
1126
|
+
if (rootPath && (graphConfig.directoryDecay ?? 0.1) > 0) {
|
|
1127
|
+
hashToFilePath = new Map();
|
|
1128
|
+
for (const fn of functions) {
|
|
1129
|
+
hashToFilePath.set(fn.contentHash, fn.filePath);
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
// Detect communities from all edges including transitive
|
|
1133
|
+
const result = detectCommunities(edgesWithTransitive, graphConfig, hashToFilePath, rootPath, newHashes);
|
|
1134
|
+
// Stable community matching — remap new IDs to old IDs by Jaccard similarity
|
|
1135
|
+
const oldAssignments = loadCommunities(db, graphConfig.algorithm, 0);
|
|
1136
|
+
const remapped = matchCommunities(oldAssignments, result.assignments);
|
|
1137
|
+
result.assignments = remapped;
|
|
1138
|
+
// Store community assignments
|
|
1139
|
+
storeCommunities(db, result.assignments);
|
|
1140
|
+
// Compute and store community edges (using full edge set including transitive)
|
|
1141
|
+
const communityEdges = computeCommunityEdges(db, graphConfig.algorithm, edgesWithTransitive);
|
|
1142
|
+
storeCommunityEdges(db, communityEdges);
|
|
1143
|
+
// Save config used (for reproducibility)
|
|
1144
|
+
saveGraphConfig(db, graphConfig);
|
|
1145
|
+
return result;
|
|
1146
|
+
}
|
|
1147
|
+
/**
|
|
1148
|
+
* Detect hierarchical communities: first L0 (function-level), then L1 (community-level).
|
|
1149
|
+
*
|
|
1150
|
+
* After L0 detection + community edge computation, builds a meta-graph where:
|
|
1151
|
+
* - Nodes = L0 community IDs (excluding __dissolved)
|
|
1152
|
+
* - Edges = community_edges (weighted by aggregate function edge weight)
|
|
1153
|
+
*
|
|
1154
|
+
* Runs Louvain on meta-graph to produce L1 group assignments.
|
|
1155
|
+
* Stores L1 assignments in the communities table with level=1,
|
|
1156
|
+
* where content_hash = L0 community_id.
|
|
1157
|
+
*
|
|
1158
|
+
* When `oldHashes` is provided, enables incremental edge resolution in the L0 step.
|
|
1159
|
+
*/
|
|
1160
|
+
function detectHierarchicalCommunities(db, functions, config, oldHashes, rootPath) {
|
|
1161
|
+
const graphConfig = config || loadGraphConfig(db);
|
|
1162
|
+
// Step 1: Run L0 detection (also computes + stores community edges)
|
|
1163
|
+
const l0 = analyzeGraph(db, functions, graphConfig, oldHashes, rootPath);
|
|
1164
|
+
// Step 2: Load community edges
|
|
1165
|
+
const communityEdges = loadCommunityEdges(db, graphConfig.algorithm);
|
|
1166
|
+
// Step 3: Need at least 3 real communities for meaningful grouping
|
|
1167
|
+
const realCommunities = new Set(l0.assignments
|
|
1168
|
+
.filter((a) => a.communityId !== "__dissolved")
|
|
1169
|
+
.map((a) => a.communityId));
|
|
1170
|
+
if (realCommunities.size < 3 || communityEdges.length < 2) {
|
|
1171
|
+
return { l0, communityEdges, l1Assignments: [], l1GroupCount: 0 };
|
|
1172
|
+
}
|
|
1173
|
+
// Step 4: Build meta-graph from community edges
|
|
1174
|
+
const metaEdges = communityEdges.map((ce) => ({
|
|
1175
|
+
sourceHash: ce.sourceCommunity,
|
|
1176
|
+
targetHash: ce.targetCommunity,
|
|
1177
|
+
edgeType: "call",
|
|
1178
|
+
weight: ce.weight,
|
|
1179
|
+
}));
|
|
1180
|
+
const metaGraph = buildGraph(metaEdges);
|
|
1181
|
+
if (metaGraph.order < 3) {
|
|
1182
|
+
return { l0, communityEdges, l1Assignments: [], l1GroupCount: 0 };
|
|
1183
|
+
}
|
|
1184
|
+
// Step 5: Run community detection on meta-graph
|
|
1185
|
+
const metaDetector = (0, community_detectors_1.getDetector)(graphConfig.algorithm);
|
|
1186
|
+
const metaRaw = metaDetector.detect(metaGraph, {
|
|
1187
|
+
resolution: graphConfig.resolution,
|
|
1188
|
+
weightAttribute: "weight",
|
|
1189
|
+
});
|
|
1190
|
+
const metaCommunities = metaRaw.communities;
|
|
1191
|
+
// Step 6: Build L1 assignments
|
|
1192
|
+
const l1Members = new Map();
|
|
1193
|
+
for (const [communityId, groupId] of Object.entries(metaCommunities)) {
|
|
1194
|
+
const members = l1Members.get(groupId) || [];
|
|
1195
|
+
members.push(communityId);
|
|
1196
|
+
l1Members.set(groupId, members);
|
|
1197
|
+
}
|
|
1198
|
+
// Dissolve groups with only 1 community (no useful grouping)
|
|
1199
|
+
const l1Assignments = [];
|
|
1200
|
+
let realGroupCount = 0;
|
|
1201
|
+
for (const [groupId, members] of l1Members) {
|
|
1202
|
+
const isTooSmall = members.length < 2;
|
|
1203
|
+
if (!isTooSmall)
|
|
1204
|
+
realGroupCount++;
|
|
1205
|
+
for (const communityId of members) {
|
|
1206
|
+
l1Assignments.push({
|
|
1207
|
+
contentHash: communityId,
|
|
1208
|
+
level: 1,
|
|
1209
|
+
communityId: isTooSmall ? "__dissolved" : String(groupId),
|
|
1210
|
+
algorithm: graphConfig.algorithm,
|
|
1211
|
+
});
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
// Step 6.5: Stable matching for L1 assignments
|
|
1215
|
+
const oldL1Assignments = loadCommunities(db, graphConfig.algorithm, 1);
|
|
1216
|
+
const remappedL1 = matchCommunities(oldL1Assignments, l1Assignments);
|
|
1217
|
+
// Step 7: Store L1 assignments
|
|
1218
|
+
if (remappedL1.length > 0) {
|
|
1219
|
+
storeCommunities(db, remappedL1);
|
|
1220
|
+
}
|
|
1221
|
+
return {
|
|
1222
|
+
l0,
|
|
1223
|
+
communityEdges,
|
|
1224
|
+
l1Assignments: remappedL1,
|
|
1225
|
+
l1GroupCount: realGroupCount,
|
|
1226
|
+
};
|
|
1227
|
+
}
|
|
1228
|
+
exports.DEFAULT_COMPARISONS = [
|
|
1229
|
+
{ label: "louvain@1.0", algorithm: "louvain", resolution: 1.0 },
|
|
1230
|
+
{ label: "louvain@1.5", algorithm: "louvain", resolution: 1.5 },
|
|
1231
|
+
{ label: "louvain@2.0", algorithm: "louvain", resolution: 2.0 },
|
|
1232
|
+
{ label: "louvain@3.0", algorithm: "louvain", resolution: 3.0 },
|
|
1233
|
+
{ label: "leiden@1.5", algorithm: "leiden", resolution: 1.5 },
|
|
1234
|
+
{ label: "leiden@2.0", algorithm: "leiden", resolution: 2.0 },
|
|
1235
|
+
{ label: "label-propagation", algorithm: "label-propagation", resolution: 0 },
|
|
1236
|
+
];
|
|
1237
|
+
/**
|
|
1238
|
+
* Compute comparison metrics from a DetectionResult.
|
|
1239
|
+
* Pure function — no DB or side effects.
|
|
1240
|
+
*/
|
|
1241
|
+
function computeComparisonMetrics(result, label, algorithm, resolution) {
|
|
1242
|
+
const totalNodes = result.nodesInGraph;
|
|
1243
|
+
// Collect community sizes (excluding dissolved)
|
|
1244
|
+
const commSizes = new Map();
|
|
1245
|
+
for (const a of result.assignments) {
|
|
1246
|
+
if (a.communityId === "__dissolved")
|
|
1247
|
+
continue;
|
|
1248
|
+
commSizes.set(a.communityId, (commSizes.get(a.communityId) || 0) + 1);
|
|
1249
|
+
}
|
|
1250
|
+
const sizes = [...commSizes.values()].sort((a, b) => a - b);
|
|
1251
|
+
const minSize = sizes.length > 0 ? sizes[0] : 0;
|
|
1252
|
+
const maxSize = sizes.length > 0 ? sizes[sizes.length - 1] : 0;
|
|
1253
|
+
const medianSize = sizes.length > 0
|
|
1254
|
+
? sizes.length % 2 === 1
|
|
1255
|
+
? sizes[Math.floor(sizes.length / 2)]
|
|
1256
|
+
: Math.round((sizes[Math.floor(sizes.length / 2) - 1] +
|
|
1257
|
+
sizes[Math.floor(sizes.length / 2)]) /
|
|
1258
|
+
2)
|
|
1259
|
+
: 0;
|
|
1260
|
+
const dissolvedPct = totalNodes > 0 ? Math.round((result.dissolvedCount / totalNodes) * 100) : 0;
|
|
1261
|
+
const coverage = totalNodes > 0
|
|
1262
|
+
? Math.round(((totalNodes - result.dissolvedCount) / totalNodes) * 100)
|
|
1263
|
+
: 0;
|
|
1264
|
+
return {
|
|
1265
|
+
label,
|
|
1266
|
+
algorithm,
|
|
1267
|
+
resolution,
|
|
1268
|
+
communityCount: result.communityCount,
|
|
1269
|
+
dissolvedCount: result.dissolvedCount,
|
|
1270
|
+
dissolvedPct,
|
|
1271
|
+
coverage,
|
|
1272
|
+
minSize,
|
|
1273
|
+
medianSize,
|
|
1274
|
+
maxSize,
|
|
1275
|
+
modularity: result.modularity,
|
|
1276
|
+
nodesInGraph: result.nodesInGraph,
|
|
1277
|
+
edgesInGraph: result.edgesInGraph,
|
|
1278
|
+
};
|
|
1279
|
+
}
|
|
1280
|
+
/**
|
|
1281
|
+
* Run community detection at multiple configurations on the same edges.
|
|
1282
|
+
* Does NOT write to DB — pure computation for comparison.
|
|
1283
|
+
*/
|
|
1284
|
+
function runComparison(edges, baseConfig, configurations, hashToFilePath, rootPath, allHashes) {
|
|
1285
|
+
return configurations.map((cfg) => {
|
|
1286
|
+
const config = {
|
|
1287
|
+
...baseConfig,
|
|
1288
|
+
algorithm: cfg.algorithm,
|
|
1289
|
+
resolution: cfg.resolution,
|
|
1290
|
+
autoResolution: false, // explicit resolution for fair comparison
|
|
1291
|
+
};
|
|
1292
|
+
const result = detectCommunities(edges, config, hashToFilePath, rootPath, allHashes);
|
|
1293
|
+
return computeComparisonMetrics(result, cfg.label, cfg.algorithm, cfg.resolution || null);
|
|
1294
|
+
});
|
|
1295
|
+
}
|