@ophan/core 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/community-detectors/index.d.ts +20 -0
  2. package/dist/community-detectors/index.d.ts.map +1 -0
  3. package/dist/community-detectors/index.js +45 -0
  4. package/dist/community-detectors/label-prop.d.ts +20 -0
  5. package/dist/community-detectors/label-prop.d.ts.map +1 -0
  6. package/dist/community-detectors/label-prop.js +77 -0
  7. package/dist/community-detectors/leiden.d.ts +22 -0
  8. package/dist/community-detectors/leiden.d.ts.map +1 -0
  9. package/dist/community-detectors/leiden.js +312 -0
  10. package/dist/community-detectors/louvain.d.ts +13 -0
  11. package/dist/community-detectors/louvain.d.ts.map +1 -0
  12. package/dist/community-detectors/louvain.js +29 -0
  13. package/dist/community-detectors/types.d.ts +36 -0
  14. package/dist/community-detectors/types.d.ts.map +1 -0
  15. package/dist/{parsers/__fixtures__/no-functions.js → community-detectors/types.js} +0 -2
  16. package/dist/edge-resolvers/call.d.ts +13 -0
  17. package/dist/edge-resolvers/call.d.ts.map +1 -0
  18. package/dist/edge-resolvers/call.js +40 -0
  19. package/dist/edge-resolvers/co-location.d.ts +16 -0
  20. package/dist/edge-resolvers/co-location.d.ts.map +1 -0
  21. package/dist/edge-resolvers/co-location.js +129 -0
  22. package/dist/edge-resolvers/import.d.ts +16 -0
  23. package/dist/edge-resolvers/import.d.ts.map +1 -0
  24. package/dist/edge-resolvers/import.js +118 -0
  25. package/dist/edge-resolvers/index.d.ts +9 -0
  26. package/dist/edge-resolvers/index.d.ts.map +1 -0
  27. package/dist/edge-resolvers/index.js +29 -0
  28. package/dist/edge-resolvers/jsx-ref.d.ts +13 -0
  29. package/dist/edge-resolvers/jsx-ref.d.ts.map +1 -0
  30. package/dist/edge-resolvers/jsx-ref.js +40 -0
  31. package/dist/edge-resolvers/types.d.ts +40 -0
  32. package/dist/edge-resolvers/types.d.ts.map +1 -0
  33. package/dist/edge-resolvers/types.js +2 -0
  34. package/dist/graph.d.ts +293 -0
  35. package/dist/graph.d.ts.map +1 -0
  36. package/dist/graph.js +1295 -0
  37. package/dist/index.d.ts +37 -8
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +385 -183
  40. package/dist/migrations.d.ts +25 -0
  41. package/dist/migrations.d.ts.map +1 -0
  42. package/dist/migrations.js +323 -0
  43. package/dist/module-resolvers/index.d.ts +11 -0
  44. package/dist/module-resolvers/index.d.ts.map +1 -0
  45. package/dist/module-resolvers/index.js +67 -0
  46. package/dist/module-resolvers/javascript.d.ts +18 -0
  47. package/dist/module-resolvers/javascript.d.ts.map +1 -0
  48. package/dist/module-resolvers/javascript.js +130 -0
  49. package/dist/module-resolvers/types.d.ts +18 -0
  50. package/dist/module-resolvers/types.d.ts.map +1 -0
  51. package/dist/module-resolvers/types.js +2 -0
  52. package/dist/parsers/python.d.ts.map +1 -1
  53. package/dist/parsers/python.js +38 -4
  54. package/dist/parsers/typescript.d.ts.map +1 -1
  55. package/dist/parsers/typescript.js +133 -0
  56. package/dist/practices.d.ts +28 -0
  57. package/dist/practices.d.ts.map +1 -0
  58. package/dist/practices.js +95 -0
  59. package/dist/schemas.d.ts +251 -3
  60. package/dist/schemas.d.ts.map +1 -1
  61. package/dist/schemas.js +121 -6
  62. package/dist/shared.d.ts +8 -0
  63. package/dist/shared.d.ts.map +1 -1
  64. package/dist/summarize.d.ts +165 -0
  65. package/dist/summarize.d.ts.map +1 -0
  66. package/dist/summarize.js +1067 -0
  67. package/ophan_logo.png +0 -0
  68. package/package.json +9 -2
  69. package/dist/parsers/__fixtures__/arrow-functions.d.ts +0 -5
  70. package/dist/parsers/__fixtures__/arrow-functions.d.ts.map +0 -1
  71. package/dist/parsers/__fixtures__/arrow-functions.js +0 -16
  72. package/dist/parsers/__fixtures__/class-methods.d.ts +0 -6
  73. package/dist/parsers/__fixtures__/class-methods.d.ts.map +0 -1
  74. package/dist/parsers/__fixtures__/class-methods.js +0 -12
  75. package/dist/parsers/__fixtures__/no-functions.d.ts +0 -9
  76. package/dist/parsers/__fixtures__/no-functions.d.ts.map +0 -1
@@ -0,0 +1,1067 @@
1
+ "use strict";
2
+ // Community summarization module — generates rich narrative documentation for
3
+ // detected function communities using Claude.
4
+ //
5
+ // Architecture:
6
+ // 1. Graph module detects communities → communities table
7
+ // 2. This module gathers context for each community (function analysis + edges)
8
+ // 3. Sends context to Claude with prompt asking for markdown documentation
9
+ // 4. Stores results in community_summaries table with input_hash for caching
10
+ // 5. Hierarchical: L1 (subsystem) → L2 (system) → L3 (architecture)
11
+ //
12
+ // Caching: input_hash = SHA256 of sorted member content hashes. Same members
13
+ // = same hash = skip re-summarization. Changes propagate up: if L1 changes,
14
+ // L2's input_hash changes too.
15
+ var __importDefault = (this && this.__importDefault) || function (mod) {
16
+ return (mod && mod.__esModule) ? mod : { "default": mod };
17
+ };
18
+ Object.defineProperty(exports, "__esModule", { value: true });
19
+ exports.DEFAULT_SUMMARIZE_CONFIG = void 0;
20
+ exports.computePackageBreakdown = computePackageBreakdown;
21
+ exports.formatPackageBreakdown = formatPackageBreakdown;
22
+ exports.computeL1InputHash = computeL1InputHash;
23
+ exports.computeL2InputHash = computeL2InputHash;
24
+ exports.computeL3InputHash = computeL3InputHash;
25
+ exports.computeCCInputHash = computeCCInputHash;
26
+ exports.storeCommunitySignatures = storeCommunitySignatures;
27
+ exports.loadCommunitySignatures = loadCommunitySignatures;
28
+ exports.hasCommunityDrifted = hasCommunityDrifted;
29
+ exports.storeSummary = storeSummary;
30
+ exports.loadSummary = loadSummary;
31
+ exports.loadAllSummaries = loadAllSummaries;
32
+ exports.cleanupOrphanedSummaries = cleanupOrphanedSummaries;
33
+ exports.buildL1Context = buildL1Context;
34
+ exports.buildL1RawContext = buildL1RawContext;
35
+ exports.summarizeL1 = summarizeL1;
36
+ exports.summarizeL1Raw = summarizeL1Raw;
37
+ exports.summarizeL2 = summarizeL2;
38
+ exports.summarizeL3 = summarizeL3;
39
+ exports.detectCrossCuttingConcerns = detectCrossCuttingConcerns;
40
+ exports.summarizeCC = summarizeCC;
41
+ exports.summarizeCommunities = summarizeCommunities;
42
+ const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
43
+ const p_limit_1 = __importDefault(require("p-limit"));
44
+ const p_retry_1 = __importDefault(require("p-retry"));
45
+ const shared_1 = require("./shared");
46
+ const schemas_1 = require("./schemas");
47
+ const graph_1 = require("./graph");
48
+ // ============ PACKAGE BREAKDOWN ============
49
+ function computePackageBreakdown(members, rootPath, resolver) {
50
+ const counts = {};
51
+ for (const m of members) {
52
+ const pkg = (0, graph_1.computePackage)(m.filePath, rootPath, resolver);
53
+ counts[pkg] = (counts[pkg] || 0) + 1;
54
+ }
55
+ return counts;
56
+ }
57
+ function formatPackageBreakdown(breakdown) {
58
+ const total = Object.values(breakdown).reduce((sum, c) => sum + c, 0);
59
+ if (total === 0)
60
+ return "";
61
+ return Object.entries(breakdown)
62
+ .sort((a, b) => b[1] - a[1])
63
+ .map(([pkg, count]) => `${pkg} (${Math.round((count / total) * 100)}%)`)
64
+ .join(", ");
65
+ }
66
+ exports.DEFAULT_SUMMARIZE_CONFIG = {
67
+ algorithm: "louvain",
68
+ skipUnanalyzed: true,
69
+ };
70
+ // ============ INPUT HASH COMPUTATION ============
71
+ function computeL1InputHash(memberHashes, rawSource = false) {
72
+ const sorted = [...memberHashes].sort();
73
+ const prefix = rawSource ? "RAW:" : "";
74
+ return (0, shared_1.computeHash)(prefix + sorted.join(","));
75
+ }
76
+ function computeL2InputHash(l1InputHashes) {
77
+ const sorted = [...l1InputHashes].sort();
78
+ return (0, shared_1.computeHash)("L2:" + sorted.join(","));
79
+ }
80
+ function computeL3InputHash(l2InputHashes) {
81
+ const sorted = [...l2InputHashes].sort();
82
+ return (0, shared_1.computeHash)("L3:" + sorted.join(","));
83
+ }
84
+ function computeCCInputHash(concern) {
85
+ const sortedHashes = [...concern.bridgeFunctions.map((b) => b.contentHash)].sort();
86
+ const sortedCommunities = [...concern.affectedCommunities.map((c) => c.communityId)].sort();
87
+ return (0, shared_1.computeHash)("CC:" + concern.tag + ":" + sortedHashes.join(",") + ":" + sortedCommunities.join(","));
88
+ }
89
+ function storeCommunitySignatures(db, communityId, algorithm, members) {
90
+ const signatures = members.map((m) => ({
91
+ name: m.functionName,
92
+ paramCount: m.params.length,
93
+ }));
94
+ db.prepare(`
95
+ INSERT OR REPLACE INTO community_signatures (community_id, algorithm, signatures)
96
+ VALUES (?, ?, ?)
97
+ `).run(communityId, algorithm, JSON.stringify(signatures));
98
+ }
99
+ function loadCommunitySignatures(db, communityId, algorithm) {
100
+ // Table may not exist in older databases
101
+ const hasTable = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='community_signatures'").get();
102
+ if (!hasTable)
103
+ return null;
104
+ const row = db.prepare("SELECT signatures FROM community_signatures WHERE community_id = ? AND algorithm = ?").get(communityId, algorithm);
105
+ if (!row)
106
+ return null;
107
+ return JSON.parse(row.signatures);
108
+ }
109
+ /**
110
+ * Determine if a community has meaningfully changed by comparing function signatures.
111
+ * Returns true if the community should be re-summarized.
112
+ *
113
+ * A community is "drifted" if:
114
+ * - >threshold of members were added or removed (by name)
115
+ * - >threshold of surviving members changed param count
116
+ */
117
+ function hasCommunityDrifted(oldSigs, newMembers, threshold = 0.20) {
118
+ if (oldSigs.length === 0)
119
+ return true;
120
+ const oldNames = new Set(oldSigs.map((s) => s.name));
121
+ const newNames = new Set(newMembers.map((m) => m.functionName));
122
+ // Count added/removed
123
+ let added = 0;
124
+ for (const name of newNames) {
125
+ if (!oldNames.has(name))
126
+ added++;
127
+ }
128
+ let removed = 0;
129
+ for (const name of oldNames) {
130
+ if (!newNames.has(name))
131
+ removed++;
132
+ }
133
+ const membershipChange = (added + removed) / oldSigs.length;
134
+ if (membershipChange > threshold)
135
+ return true;
136
+ // Count drifted among surviving members (param count changed)
137
+ const oldMap = new Map(oldSigs.map((s) => [s.name, s]));
138
+ let driftedCount = 0;
139
+ for (const member of newMembers) {
140
+ const old = oldMap.get(member.functionName);
141
+ if (!old)
142
+ continue;
143
+ if (old.paramCount !== member.params.length)
144
+ driftedCount++;
145
+ }
146
+ if (driftedCount / oldSigs.length > threshold)
147
+ return true;
148
+ return false;
149
+ }
150
+ // ============ STORAGE ============
151
+ function storeSummary(db, communityId, level, algorithm, inputHash, summary, modelVersion) {
152
+ const now = Math.floor(Date.now() / 1000);
153
+ db.prepare(`
154
+ INSERT OR REPLACE INTO community_summaries
155
+ (community_id, level, algorithm, input_hash, summary, model_version, created_at)
156
+ VALUES (?, ?, ?, ?, ?, ?, ?)
157
+ `).run(communityId, level, algorithm, inputHash, JSON.stringify(summary), modelVersion, now);
158
+ }
159
+ function loadSummary(db, communityId, level, algorithm) {
160
+ const row = db.prepare(`
161
+ SELECT community_id, level, algorithm, input_hash, summary, model_version, created_at
162
+ FROM community_summaries
163
+ WHERE community_id = ? AND level = ? AND algorithm = ?
164
+ `).get(communityId, level, algorithm);
165
+ if (!row)
166
+ return null;
167
+ return {
168
+ communityId: row.community_id,
169
+ level: row.level,
170
+ algorithm: row.algorithm,
171
+ inputHash: row.input_hash,
172
+ summary: JSON.parse(row.summary),
173
+ modelVersion: row.model_version,
174
+ createdAt: row.created_at,
175
+ };
176
+ }
177
+ function loadAllSummaries(db, algorithm, level) {
178
+ const rows = db.prepare(`
179
+ SELECT community_id, level, algorithm, input_hash, summary, model_version, created_at
180
+ FROM community_summaries
181
+ WHERE algorithm = ? AND level = ?
182
+ `).all(algorithm, level);
183
+ return rows.map((row) => ({
184
+ communityId: row.community_id,
185
+ level: row.level,
186
+ algorithm: row.algorithm,
187
+ inputHash: row.input_hash,
188
+ summary: JSON.parse(row.summary),
189
+ modelVersion: row.model_version,
190
+ createdAt: row.created_at,
191
+ }));
192
+ }
193
+ /**
194
+ * Delete community_summaries rows at a given level whose community_id
195
+ * is NOT in the activeIds set. Called after each summarization level
196
+ * to remove orphans from previous runs.
197
+ */
198
+ function cleanupOrphanedSummaries(db, algorithm, level, activeIds) {
199
+ if (activeIds.size === 0) {
200
+ // No active IDs means we didn't process this level at all — delete everything
201
+ db.prepare("DELETE FROM community_summaries WHERE algorithm = ? AND level = ?").run(algorithm, level);
202
+ return;
203
+ }
204
+ const placeholders = [...activeIds].map(() => "?").join(",");
205
+ db.prepare(`DELETE FROM community_summaries WHERE algorithm = ? AND level = ? AND community_id NOT IN (${placeholders})`).run(algorithm, level, ...activeIds);
206
+ }
207
+ // ============ CONTEXT BUILDING ============
208
+ /**
209
+ * Resolve internal edges for a community's members.
210
+ * Finds all function_edges where both endpoints are in the member set,
211
+ * and populates callsTo/calledBy arrays on member contexts.
212
+ */
213
+ function resolveInternalEdges(db, memberHashSet, memberContexts) {
214
+ const hashToName = new Map(memberContexts.map((m) => [m.contentHash, m.functionName]));
215
+ const internalEdges = [];
216
+ const edges = db.prepare("SELECT source_hash, target_hash, edge_type FROM function_edges").all();
217
+ for (const edge of edges) {
218
+ if (memberHashSet.has(edge.source_hash) && memberHashSet.has(edge.target_hash)) {
219
+ const fromName = hashToName.get(edge.source_hash);
220
+ const toName = hashToName.get(edge.target_hash);
221
+ if (fromName && toName) {
222
+ internalEdges.push({ from: fromName, to: toName, type: edge.edge_type });
223
+ if (edge.edge_type === "call") {
224
+ const fromCtx = memberContexts.find((m) => m.contentHash === edge.source_hash);
225
+ const toCtx = memberContexts.find((m) => m.contentHash === edge.target_hash);
226
+ if (fromCtx && !fromCtx.callsTo.includes(toName))
227
+ fromCtx.callsTo.push(toName);
228
+ if (toCtx && !toCtx.calledBy.includes(fromName))
229
+ toCtx.calledBy.push(fromName);
230
+ }
231
+ }
232
+ }
233
+ }
234
+ return internalEdges;
235
+ }
236
+ function buildL1Context(db, communityId, algorithm, rootPath) {
237
+ const members = db.prepare("SELECT content_hash FROM communities WHERE community_id = ? AND level = 0 AND algorithm = ?").all(communityId, algorithm);
238
+ if (members.length === 0)
239
+ return null;
240
+ const memberHashes = members.map((m) => m.content_hash);
241
+ const memberHashSet = new Set(memberHashes);
242
+ const getFnInfo = db.prepare("SELECT function_name, file_path, language, entity_type FROM file_functions WHERE content_hash = ? LIMIT 1");
243
+ const getAnalysis = db.prepare("SELECT analysis_type, analysis FROM function_analysis WHERE content_hash = ?");
244
+ const memberContexts = [];
245
+ for (const hash of memberHashes) {
246
+ const fnInfo = getFnInfo.get(hash);
247
+ if (!fnInfo)
248
+ continue;
249
+ const analysisRows = getAnalysis.all(hash);
250
+ let doc = {};
251
+ let sec = {};
252
+ for (const row of analysisRows) {
253
+ const parsed = JSON.parse(row.analysis);
254
+ if (row.analysis_type === "documentation")
255
+ doc = parsed;
256
+ else if (row.analysis_type === "security")
257
+ sec = parsed;
258
+ }
259
+ memberContexts.push({
260
+ contentHash: hash,
261
+ functionName: fnInfo.function_name,
262
+ filePath: fnInfo.file_path,
263
+ language: fnInfo.language,
264
+ entityType: fnInfo.entity_type,
265
+ description: doc.description || "",
266
+ params: doc.params || [],
267
+ returns: doc.returns || { type: "unknown", description: "" },
268
+ dataTags: sec.dataTags || [],
269
+ securityFlags: sec.securityFlags || [],
270
+ callsTo: [],
271
+ calledBy: [],
272
+ });
273
+ }
274
+ if (memberContexts.length === 0)
275
+ return null;
276
+ const internalEdges = resolveInternalEdges(db, memberHashSet, memberContexts);
277
+ return {
278
+ communityId,
279
+ algorithm,
280
+ members: memberContexts,
281
+ inputHash: computeL1InputHash(memberHashes),
282
+ internalEdges,
283
+ packageInfo: rootPath
284
+ ? formatPackageBreakdown(computePackageBreakdown(memberContexts, rootPath))
285
+ : undefined,
286
+ };
287
+ }
288
+ /**
289
+ * Build L1 context using raw function source code instead of analysis metadata.
290
+ * Used when --raw-source flag is set. Source code comes from the sourceMap
291
+ * (contentHash → sourceCode) which is built from FunctionInfo[] during extraction.
292
+ */
293
+ function buildL1RawContext(db, communityId, algorithm, sourceMap, rootPath) {
294
+ const members = db.prepare("SELECT content_hash FROM communities WHERE community_id = ? AND level = 0 AND algorithm = ?").all(communityId, algorithm);
295
+ if (members.length === 0)
296
+ return null;
297
+ const memberHashes = members.map((m) => m.content_hash);
298
+ const memberHashSet = new Set(memberHashes);
299
+ const getFnInfo = db.prepare("SELECT function_name, file_path, language, entity_type FROM file_functions WHERE content_hash = ? LIMIT 1");
300
+ const getAnalysis = db.prepare("SELECT analysis_type, analysis FROM function_analysis WHERE content_hash = ?");
301
+ const memberContexts = [];
302
+ for (const hash of memberHashes) {
303
+ const fnInfo = getFnInfo.get(hash);
304
+ if (!fnInfo)
305
+ continue;
306
+ const source = sourceMap.get(hash);
307
+ if (!source)
308
+ continue;
309
+ // Load security analysis if available (still useful for meta even in raw-source mode)
310
+ const analysisRows = getAnalysis.all(hash);
311
+ let sec = {};
312
+ for (const row of analysisRows) {
313
+ if (row.analysis_type === "security")
314
+ sec = JSON.parse(row.analysis);
315
+ }
316
+ memberContexts.push({
317
+ contentHash: hash,
318
+ functionName: fnInfo.function_name,
319
+ filePath: fnInfo.file_path,
320
+ language: fnInfo.language,
321
+ entityType: fnInfo.entity_type,
322
+ description: source, // Raw source code instead of analysis description
323
+ params: [],
324
+ returns: { type: "", description: "" },
325
+ dataTags: sec.dataTags || [],
326
+ securityFlags: sec.securityFlags || [],
327
+ callsTo: [],
328
+ calledBy: [],
329
+ });
330
+ }
331
+ if (memberContexts.length === 0)
332
+ return null;
333
+ const internalEdges = resolveInternalEdges(db, memberHashSet, memberContexts);
334
+ return {
335
+ communityId,
336
+ algorithm,
337
+ members: memberContexts,
338
+ inputHash: computeL1InputHash(memberHashes, true),
339
+ internalEdges,
340
+ packageInfo: rootPath
341
+ ? formatPackageBreakdown(computePackageBreakdown(memberContexts, rootPath))
342
+ : undefined,
343
+ };
344
+ }
345
+ // ============ PROMPT TEMPLATES ============
346
+ const MODEL_VERSION = "claude-sonnet-4-20250514";
347
+ function buildL1Prompt(context) {
348
+ const memberDescriptions = context.members.map((m) => {
349
+ let desc = `### ${m.functionName} (${m.filePath})`;
350
+ if (m.description)
351
+ desc += `\n${m.description}`;
352
+ if (m.params.length > 0) {
353
+ desc += `\nParams: ${m.params.map((p) => `${p.name}: ${p.type}`).join(", ")}`;
354
+ }
355
+ if (m.returns.type !== "unknown")
356
+ desc += `\nReturns: ${m.returns.type} — ${m.returns.description}`;
357
+ if (m.dataTags.length > 0)
358
+ desc += `\nData tags: ${m.dataTags.join(", ")}`;
359
+ if (m.securityFlags.length > 0)
360
+ desc += `\nSecurity flags: ${m.securityFlags.join(", ")}`;
361
+ if (m.callsTo.length > 0)
362
+ desc += `\nCalls: ${m.callsTo.join(", ")}`;
363
+ if (m.calledBy.length > 0)
364
+ desc += `\nCalled by: ${m.calledBy.join(", ")}`;
365
+ return desc;
366
+ }).join("\n\n");
367
+ const edgesSummary = context.internalEdges.length > 0
368
+ ? `\nInternal relationships:\n${context.internalEdges.map((e) => ` ${e.from} -> ${e.to} (${e.type})`).join("\n")}`
369
+ : "";
370
+ const count = context.members.length;
371
+ const lengthGuidance = count <= 5
372
+ ? "This is a small group — keep the documentation concise (150-250 words)."
373
+ : count <= 10
374
+ ? "This is a medium group — write moderately detailed documentation (300-600 words)."
375
+ : "This is a large, complex group — write thorough documentation (500-1000 words).";
376
+ const packageLine = context.packageInfo
377
+ ? `\nPackage distribution: ${context.packageInfo}\n`
378
+ : "";
379
+ return `You are a senior engineer writing internal documentation for your team. A group of ${count} functions have been identified as a cohesive subsystem based on their call relationships and co-location in code.
380
+
381
+ Write documentation that helps a junior engineer understand what this subsystem does, how it works, and what to watch out for. ${lengthGuidance}
382
+ ${packageLine}
383
+ ${memberDescriptions}
384
+ ${edgesSummary}
385
+
386
+ Return a JSON object with exactly these fields:
387
+ - "title": string — short descriptive name for this subsystem (2-5 words, e.g. "Authentication Flow", "Database Connection Pool")
388
+ - "documentation": string — rich markdown documentation. Use ## for the title, ### for sections like "How It Works", "Key Functions", "Data Flow", "Security Notes" as appropriate. Write in a clear, helpful tone.
389
+ - "meta": object with:
390
+ - "securityPosture": string — one sentence about security characteristics (or "" if none)
391
+ - "dataClassification": string[] — what kind of data this handles (e.g. ["credentials", "pii"])
392
+ - "boundaries": string[] — external systems or interfaces (e.g. ["database", "external API"])
393
+ - "keyFunctions": string[] — names of the 2-3 most important functions
394
+ - "complexity": "low" | "medium" | "high"
395
+
396
+ CRITICAL: Return ONLY the raw JSON object. No markdown code fences, no explanation. Just the { ... } object directly.`;
397
+ }
398
+ function buildL1RawPrompt(context) {
399
+ const memberCode = context.members.map((m) => {
400
+ const lang = m.language || "typescript";
401
+ return `### ${m.functionName} (${m.filePath})\n\`\`\`${lang}\n${m.description}\n\`\`\``;
402
+ }).join("\n\n");
403
+ const edgesSummary = context.internalEdges.length > 0
404
+ ? `\nInternal call relationships:\n${context.internalEdges.map((e) => ` ${e.from} -> ${e.to} (${e.type})`).join("\n")}`
405
+ : "";
406
+ const count = context.members.length;
407
+ const lengthGuidance = count <= 5
408
+ ? "This is a small group — keep the documentation concise (200-350 words)."
409
+ : count <= 10
410
+ ? "This is a medium group — write moderately detailed documentation (400-700 words)."
411
+ : "This is a large, complex group — write thorough documentation (600-1200 words).";
412
+ const packageLine = context.packageInfo
413
+ ? `\nPackage distribution: ${context.packageInfo}\n`
414
+ : "";
415
+ return `You are a senior engineer writing internal documentation for your team. A group of ${count} functions have been identified as a cohesive subsystem based on their call relationships and co-location in code.
416
+
417
+ Analyze the source code below and write documentation that helps a junior engineer understand what this subsystem does, how it works, and what to watch out for. ${lengthGuidance}
418
+ ${packageLine}
419
+ ${memberCode}
420
+ ${edgesSummary}
421
+
422
+ Return a JSON object with exactly these fields:
423
+ - "title": string — short descriptive name for this subsystem (2-5 words, e.g. "Authentication Flow", "Database Connection Pool")
424
+ - "documentation": string — rich markdown documentation. Use ## for the title, ### for sections like "How It Works", "Key Functions", "Data Flow", "Security Notes" as appropriate. Write in a clear, helpful tone.
425
+ - "meta": object with:
426
+ - "securityPosture": string — one sentence about security characteristics (or "" if none)
427
+ - "dataClassification": string[] — what kind of data this handles (e.g. ["credentials", "pii"])
428
+ - "boundaries": string[] — external systems or interfaces (e.g. ["database", "external API"])
429
+ - "keyFunctions": string[] — names of the 2-3 most important functions
430
+ - "complexity": "low" | "medium" | "high"
431
+
432
+ CRITICAL: Return ONLY the raw JSON object. No markdown code fences, no explanation. Just the { ... } object directly.`;
433
+ }
434
+ function buildL2Prompt(context) {
435
+ const subsystemDocs = context.l1Summaries.map((s) => {
436
+ const pkgLine = s.packageInfo ? ` [${s.packageInfo}]` : "";
437
+ return `### ${s.title}${pkgLine}\n${s.documentation}`;
438
+ }).join("\n\n---\n\n");
439
+ const crossEdges = context.crossEdges.length > 0
440
+ ? `\nCross-subsystem connections:\n${context.crossEdges.map((e) => ` ${e.fromCommunity} <-> ${e.toCommunity} (${e.count} connections)`).join("\n")}`
441
+ : "";
442
+ return `You are a senior engineer writing system-level documentation that serves as a **standalone overview page** for a group of related subsystems. ${context.l1Summaries.length} subsystems have been identified as working together to form a larger system.
443
+
444
+ This documentation will be displayed as the top-level page for this system group, with links to each subsystem's detailed documentation below it. Write it as a comprehensive overview that orients a reader BEFORE they drill into subsystem details.
445
+
446
+ Below is the documentation for each subsystem:
447
+
448
+ ${subsystemDocs}
449
+ ${crossEdges}
450
+
451
+ Write documentation that helps a junior engineer understand:
452
+ 1. What this system group does overall (high-level purpose)
453
+ 2. How the subsystems relate to each other (which depends on which, data flow between them)
454
+ 3. Key architectural patterns and boundaries
455
+ 4. What to understand before diving into any individual subsystem
456
+
457
+ Return a JSON object with exactly these fields:
458
+ - "title": string — short descriptive name for this system (2-5 words)
459
+ - "documentation": string — rich markdown documentation. Use ## for the title, ### for sections like "Overview", "How Subsystems Connect", "Data Flow", "Key Patterns", "Security Considerations". This should work as a standalone page.
460
+ - "meta": object with:
461
+ - "securityPosture": string — system-level security assessment
462
+ - "dataClassification": string[] — aggregate data types handled
463
+ - "boundaries": string[] — external interfaces of the system
464
+ - "keyFunctions": string[] — most important functions across subsystems
465
+ - "complexity": "low" | "medium" | "high"
466
+
467
+ CRITICAL: Return ONLY the raw JSON object. No markdown code fences, no explanation.`;
468
+ }
469
+ function buildL3Prompt(l2Summaries) {
470
+ const systemDocs = l2Summaries.map((s) => {
471
+ return `### ${s.title}\n${s.documentation}`;
472
+ }).join("\n\n---\n\n");
473
+ return `You are a senior engineer writing the **top-level architecture overview page** for a codebase. This will be the first page a new team member reads. The codebase has ${l2Summaries.length} major system(s).
474
+
475
+ Below is the documentation for each system:
476
+
477
+ ${systemDocs}
478
+
479
+ Write an architecture overview that serves as a standalone landing page. It should:
480
+ 1. Explain what this software does in 1-2 sentences
481
+ 2. Describe the major systems and how they're organized
482
+ 3. Explain key architectural decisions and patterns
483
+ 4. Cover cross-cutting concerns (security, data flow, shared infrastructure)
484
+ 5. Guide a new team member on where to start reading
485
+
486
+ Return a JSON object with exactly these fields:
487
+ - "title": string — name for this architecture (e.g. "Ophan Security Analysis Platform")
488
+ - "documentation": string — rich markdown architecture overview. Use ## for the title, ### for sections like "Overview", "Major Systems", "Architecture Decisions", "Cross-Cutting Concerns", "Getting Started". This is a standalone page.
489
+ - "meta": object with:
490
+ - "securityPosture": string — overall security assessment
491
+ - "dataClassification": string[] — all data types handled across the codebase
492
+ - "boundaries": string[] — all external interfaces
493
+ - "keyFunctions": string[] — most critical functions in the codebase
494
+ - "complexity": "low" | "medium" | "high"
495
+
496
+ CRITICAL: Return ONLY the raw JSON object. No markdown code fences, no explanation.`;
497
+ }
498
+ // ============ CLAUDE CALLERS ============
499
+ function formatError(err) {
500
+ if (err instanceof Error)
501
+ return err.message;
502
+ if (typeof err === "object" && err !== null) {
503
+ const obj = err;
504
+ if (obj.message)
505
+ return String(obj.message);
506
+ if (obj.error && typeof obj.error === "object") {
507
+ const inner = obj.error;
508
+ return inner.message ? String(inner.message) : JSON.stringify(obj.error);
509
+ }
510
+ if (obj.status)
511
+ return `HTTP ${obj.status}${obj.type ? ` (${obj.type})` : ""}`;
512
+ return JSON.stringify(err);
513
+ }
514
+ return String(err);
515
+ }
516
+ async function callClaude(prompt) {
517
+ const anthropic = new sdk_1.default();
518
+ const response = await (0, p_retry_1.default)(() => anthropic.messages.create({
519
+ model: MODEL_VERSION,
520
+ max_tokens: 4096,
521
+ messages: [{ role: "user", content: prompt }],
522
+ }), {
523
+ retries: 4,
524
+ minTimeout: 2000,
525
+ onFailedAttempt: (err) => {
526
+ const status = err.status;
527
+ // Retry on rate limit (429) and overloaded (529)
528
+ if (status !== 429 && status !== 529)
529
+ throw err;
530
+ },
531
+ });
532
+ const text = response.content[0].type === "text" ? response.content[0].text : "";
533
+ return text.replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, "").trim();
534
+ }
535
+ async function summarizeL1(context) {
536
+ const text = await callClaude(buildL1Prompt(context));
537
+ try {
538
+ return schemas_1.L1Summary.parse(JSON.parse(text));
539
+ }
540
+ catch {
541
+ return schemas_1.L1Summary.parse({});
542
+ }
543
+ }
544
+ async function summarizeL1Raw(context) {
545
+ const text = await callClaude(buildL1RawPrompt(context));
546
+ try {
547
+ return schemas_1.L1Summary.parse(JSON.parse(text));
548
+ }
549
+ catch {
550
+ return schemas_1.L1Summary.parse({});
551
+ }
552
+ }
553
+ async function summarizeL2(context) {
554
+ const text = await callClaude(buildL2Prompt(context));
555
+ try {
556
+ return schemas_1.L2Summary.parse(JSON.parse(text));
557
+ }
558
+ catch {
559
+ return schemas_1.L2Summary.parse({});
560
+ }
561
+ }
562
+ async function summarizeL3(summaries) {
563
+ const text = await callClaude(buildL3Prompt(summaries));
564
+ try {
565
+ return schemas_1.L3Summary.parse(JSON.parse(text));
566
+ }
567
+ catch {
568
+ return schemas_1.L3Summary.parse({});
569
+ }
570
+ }
571
+ // ============ CROSS-EDGE COMPUTATION ============
572
+ function computeCrossEdges(db, l1Summaries, algorithm) {
573
+ const hashToCommunity = new Map();
574
+ for (const { communityId } of l1Summaries) {
575
+ const members = db.prepare("SELECT content_hash FROM communities WHERE community_id = ? AND level = 0 AND algorithm = ?").all(communityId, algorithm);
576
+ for (const { content_hash } of members) {
577
+ hashToCommunity.set(content_hash, communityId);
578
+ }
579
+ }
580
+ const crossCounts = new Map();
581
+ const edges = db.prepare("SELECT source_hash, target_hash FROM function_edges").all();
582
+ const communityTitles = new Map(l1Summaries.map((s) => [s.communityId, s.title]));
583
+ for (const { source_hash, target_hash } of edges) {
584
+ const fromCommunity = hashToCommunity.get(source_hash);
585
+ const toCommunity = hashToCommunity.get(target_hash);
586
+ if (fromCommunity && toCommunity && fromCommunity !== toCommunity) {
587
+ const key = [fromCommunity, toCommunity].sort().join("|");
588
+ crossCounts.set(key, (crossCounts.get(key) || 0) + 1);
589
+ }
590
+ }
591
+ return [...crossCounts.entries()].map(([key, count]) => {
592
+ const [a, b] = key.split("|");
593
+ return {
594
+ fromCommunity: communityTitles.get(a) || a,
595
+ toCommunity: communityTitles.get(b) || b,
596
+ count,
597
+ };
598
+ });
599
+ }
600
+ // ============ CROSS-CUTTING CONCERN DETECTION ============
601
+ /**
602
+ * Detect cross-cutting concerns by analyzing cross-community edges for shared
603
+ * dataTags/securityFlags. Returns concerns sorted by significance (security first,
604
+ * then by affected community count).
605
+ *
606
+ * Significance filter: only returns concerns with 2+ affected community pairs
607
+ * OR at least one bridge function above median centrality.
608
+ */
609
+ function detectCrossCuttingConcerns(db, algorithm, centralityScores, l1Communities) {
610
+ if (l1Communities.length < 2)
611
+ return [];
612
+ // Build hash→community lookup
613
+ const hashToCommunity = new Map();
614
+ for (const { communityId } of l1Communities) {
615
+ const members = db.prepare("SELECT content_hash FROM communities WHERE community_id = ? AND level = 0 AND algorithm = ?").all(communityId, algorithm);
616
+ for (const { content_hash } of members) {
617
+ hashToCommunity.set(content_hash, communityId);
618
+ }
619
+ }
620
+ const communityTitles = new Map(l1Communities.map((c) => [c.communityId, c.title]));
621
+ // Build hash→{dataTags, securityFlags, name} lookup
622
+ const hashToMeta = new Map();
623
+ const allHashes = [...hashToCommunity.keys()];
624
+ const getAnalysis = db.prepare("SELECT analysis_type, analysis FROM function_analysis WHERE content_hash = ?");
625
+ const getFnName = db.prepare("SELECT function_name FROM file_functions WHERE content_hash = ? LIMIT 1");
626
+ for (const hash of allHashes) {
627
+ const nameRow = getFnName.get(hash);
628
+ const analysisRows = getAnalysis.all(hash);
629
+ let dataTags = [];
630
+ let securityFlags = [];
631
+ for (const row of analysisRows) {
632
+ if (row.analysis_type === "security") {
633
+ const parsed = JSON.parse(row.analysis);
634
+ dataTags = parsed.dataTags || [];
635
+ securityFlags = parsed.securityFlags || [];
636
+ }
637
+ }
638
+ hashToMeta.set(hash, {
639
+ functionName: nameRow?.function_name || hash.slice(0, 8),
640
+ dataTags,
641
+ securityFlags,
642
+ });
643
+ }
644
+ // Scan cross-community edges and collect tags/flags per tag
645
+ const tagToConcern = new Map();
646
+ const edges = db.prepare("SELECT source_hash, target_hash FROM function_edges").all();
647
+ for (const { source_hash, target_hash } of edges) {
648
+ const fromCommunity = hashToCommunity.get(source_hash);
649
+ const toCommunity = hashToCommunity.get(target_hash);
650
+ if (!fromCommunity || !toCommunity || fromCommunity === toCommunity)
651
+ continue;
652
+ const sourceMeta = hashToMeta.get(source_hash);
653
+ const targetMeta = hashToMeta.get(target_hash);
654
+ if (!sourceMeta || !targetMeta)
655
+ continue;
656
+ const pairKey = [fromCommunity, toCommunity].sort().join("|");
657
+ // Collect shared tags from both endpoints
658
+ const allTags = new Set([...sourceMeta.dataTags, ...targetMeta.dataTags]);
659
+ const allFlags = new Set([...sourceMeta.securityFlags, ...targetMeta.securityFlags]);
660
+ for (const tag of allTags) {
661
+ if (!tagToConcern.has(tag)) {
662
+ tagToConcern.set(tag, {
663
+ concernType: "data_flow",
664
+ bridgeHashes: new Set(),
665
+ communityPairs: new Set(),
666
+ crossEdgeCount: 0,
667
+ });
668
+ }
669
+ const c = tagToConcern.get(tag);
670
+ c.bridgeHashes.add(source_hash);
671
+ c.bridgeHashes.add(target_hash);
672
+ c.communityPairs.add(pairKey);
673
+ c.crossEdgeCount++;
674
+ }
675
+ for (const flag of allFlags) {
676
+ if (!tagToConcern.has(flag)) {
677
+ tagToConcern.set(flag, {
678
+ concernType: "security",
679
+ bridgeHashes: new Set(),
680
+ communityPairs: new Set(),
681
+ crossEdgeCount: 0,
682
+ });
683
+ }
684
+ const c = tagToConcern.get(flag);
685
+ c.bridgeHashes.add(source_hash);
686
+ c.bridgeHashes.add(target_hash);
687
+ c.communityPairs.add(pairKey);
688
+ c.crossEdgeCount++;
689
+ }
690
+ }
691
+ // Compute median centrality for significance filter
692
+ const centralityValues = [...centralityScores.values()].filter((v) => v > 0);
693
+ centralityValues.sort((a, b) => a - b);
694
+ const medianCentrality = centralityValues.length > 0
695
+ ? centralityValues[Math.floor(centralityValues.length / 2)]
696
+ : 0;
697
+ // Filter and build results
698
+ const concerns = [];
699
+ for (const [tag, data] of tagToConcern) {
700
+ // Check significance: 2+ community pairs OR high-centrality bridge
701
+ const hasHighCentralityBridge = [...data.bridgeHashes].some((hash) => (centralityScores.get(hash) || 0) > medianCentrality);
702
+ if (data.communityPairs.size < 2 && !hasHighCentralityBridge)
703
+ continue;
704
+ // Build bridge functions list with centrality
705
+ const bridgeFunctions = [...data.bridgeHashes].map((hash) => {
706
+ const meta = hashToMeta.get(hash);
707
+ return {
708
+ contentHash: hash,
709
+ functionName: meta.functionName,
710
+ communityId: hashToCommunity.get(hash) || "",
711
+ centrality: centralityScores.get(hash) || 0,
712
+ };
713
+ }).sort((a, b) => b.centrality - a.centrality);
714
+ // Build affected communities list
715
+ const affectedCommunityIds = new Set();
716
+ for (const pair of data.communityPairs) {
717
+ const [a, b] = pair.split("|");
718
+ affectedCommunityIds.add(a);
719
+ affectedCommunityIds.add(b);
720
+ }
721
+ const affectedCommunities = [...affectedCommunityIds].map((id) => ({
722
+ communityId: id,
723
+ communityTitle: communityTitles.get(id) || id,
724
+ }));
725
+ concerns.push({
726
+ tag,
727
+ concernType: data.concernType,
728
+ bridgeFunctions,
729
+ affectedCommunities,
730
+ crossEdgeCount: data.crossEdgeCount,
731
+ });
732
+ }
733
+ // Sort: security concerns first, then by affected community count descending
734
+ concerns.sort((a, b) => {
735
+ if (a.concernType !== b.concernType) {
736
+ return a.concernType === "security" ? -1 : 1;
737
+ }
738
+ return b.affectedCommunities.length - a.affectedCommunities.length;
739
+ });
740
+ return concerns;
741
+ }
742
+ // ============ CROSS-CUTTING CONCERN PROMPT + CALLER ============
743
+ function buildCCPrompt(concern, communityDocs) {
744
+ const tagLabel = concern.concernType === "security"
745
+ ? (schemas_1.SECURITY_FLAG_LABELS[concern.tag] || concern.tag)
746
+ : (schemas_1.DATA_TAG_LABELS[concern.tag] || concern.tag);
747
+ const bridgeList = concern.bridgeFunctions.slice(0, 10).map((b) => {
748
+ const communityTitle = concern.affectedCommunities.find((c) => c.communityId === b.communityId)?.communityTitle || b.communityId;
749
+ return ` - ${b.functionName} (community: ${communityTitle}, centrality: ${b.centrality.toFixed(3)})`;
750
+ }).join("\n");
751
+ const communityExcerpts = concern.affectedCommunities.map((c) => {
752
+ const doc = communityDocs.get(c.communityId);
753
+ const excerpt = doc ? doc.slice(0, 500) + (doc.length > 500 ? "..." : "") : "(no documentation)";
754
+ return `### ${c.communityTitle}\n${excerpt}`;
755
+ }).join("\n\n");
756
+ return `You are a senior engineer documenting cross-cutting concerns in a codebase. A ${concern.concernType === "security" ? "security vulnerability pattern" : "data flow pattern"} has been detected spanning ${concern.affectedCommunities.length} subsystems.
757
+
758
+ Concern: "${tagLabel}" (${concern.concernType})
759
+ Cross-community edges: ${concern.crossEdgeCount}
760
+
761
+ Bridge functions (high-centrality functions connecting subsystems):
762
+ ${bridgeList}
763
+
764
+ Affected subsystems:
765
+ ${communityExcerpts}
766
+
767
+ Write documentation (200-400 words) that helps engineers understand:
768
+ 1. What this cross-cutting ${concern.concernType === "security" ? "vulnerability" : "data flow"} is and why it matters
769
+ 2. Which subsystems are affected and how they're connected through this concern
770
+ 3. What the bridge functions do in this context
771
+ 4. ${concern.concernType === "security" ? "What remediation steps should be considered" : "How data flows between the subsystems through this concern"}
772
+
773
+ Return a JSON object with exactly these fields:
774
+ - "title": string — descriptive name (e.g. "Cross-System PII Data Flow", "SQL Injection Attack Surface")
775
+ - "documentation": string — rich markdown documentation with ## title and ### sections
776
+ - "severity": "low" | "medium" | "high" — based on breadth and security impact
777
+
778
+ CRITICAL: Return ONLY the raw JSON object. No markdown code fences, no explanation.`;
779
+ }
780
+ async function summarizeCC(concern, communityDocs) {
781
+ const text = await callClaude(buildCCPrompt(concern, communityDocs));
782
+ try {
783
+ const parsed = JSON.parse(text);
784
+ return schemas_1.CrossCuttingConcernSummary.parse({
785
+ ...parsed,
786
+ concernType: concern.concernType,
787
+ tag: concern.tag,
788
+ bridgeFunctions: concern.bridgeFunctions,
789
+ affectedCommunities: concern.affectedCommunities,
790
+ });
791
+ }
792
+ catch {
793
+ return schemas_1.CrossCuttingConcernSummary.parse({
794
+ concernType: concern.concernType,
795
+ tag: concern.tag,
796
+ bridgeFunctions: concern.bridgeFunctions,
797
+ affectedCommunities: concern.affectedCommunities,
798
+ });
799
+ }
800
+ }
801
+ // ============ ORCHESTRATION ============
802
+ async function summarizeCommunities(db, options = {}) {
803
+ const config = { ...exports.DEFAULT_SUMMARIZE_CONFIG, ...options.config };
804
+ const { onProgress } = options;
805
+ const algorithm = config.algorithm;
806
+ const rawMode = !!options.sourceMap;
807
+ const rootPath = options.rootPath;
808
+ const doL1 = options._summarizeL1 || (rawMode ? summarizeL1Raw : summarizeL1);
809
+ const doL2 = options._summarizeL2 || summarizeL2;
810
+ const doL3 = options._summarizeL3 || summarizeL3;
811
+ const result = {
812
+ l1Summarized: 0, l1Cached: 0, l1DriftSkipped: 0,
813
+ l2Summarized: 0, l2Cached: 0,
814
+ l3Summarized: 0, l3Cached: 0,
815
+ ccDetected: 0, ccSummarized: 0, ccCached: 0,
816
+ };
817
+ // Track active community IDs at each level for orphan cleanup
818
+ const activeL1Ids = new Set();
819
+ const activeL2Ids = new Set();
820
+ const activeCCIds = new Set();
821
+ // ===== L1: Subsystem summaries =====
822
+ onProgress?.("Building L1 subsystem summaries...");
823
+ const l0Communities = db.prepare("SELECT DISTINCT community_id FROM communities WHERE level = 0 AND algorithm = ? AND community_id != '__dissolved'").all(algorithm);
824
+ const l1Results = [];
825
+ // Pass 1: build contexts, check cache, collect work
826
+ const l1Work = [];
827
+ for (const { community_id } of l0Communities) {
828
+ const context = rawMode
829
+ ? buildL1RawContext(db, community_id, algorithm, options.sourceMap, rootPath)
830
+ : buildL1Context(db, community_id, algorithm, rootPath);
831
+ if (!context)
832
+ continue;
833
+ // Skip communities where no members have descriptions (or source code in raw mode)
834
+ if (config.skipUnanalyzed && context.members.every((m) => !m.description)) {
835
+ onProgress?.(` Skipping community ${community_id} (no ${rawMode ? "source code" : "analysis data — run ophan analyze first"})`);
836
+ continue;
837
+ }
838
+ // Gate 1: Check cache (exact input hash match)
839
+ const existing = loadSummary(db, community_id, 1, algorithm);
840
+ if (existing && existing.inputHash === context.inputHash) {
841
+ result.l1Cached++;
842
+ activeL1Ids.add(community_id);
843
+ l1Results.push({
844
+ communityId: community_id,
845
+ summary: schemas_1.L1Summary.parse(existing.summary),
846
+ inputHash: context.inputHash,
847
+ packageInfo: context.packageInfo,
848
+ });
849
+ continue;
850
+ }
851
+ // Gate 2: Signature drift check (input hash changed but community may not have meaningfully drifted)
852
+ if (existing) {
853
+ const oldSigs = loadCommunitySignatures(db, community_id, algorithm);
854
+ if (oldSigs && !hasCommunityDrifted(oldSigs, context.members)) {
855
+ // Not drifted — reuse old summary, keep OLD inputHash to prevent L2/L3 cascade
856
+ result.l1DriftSkipped++;
857
+ activeL1Ids.add(community_id);
858
+ l1Results.push({
859
+ communityId: community_id,
860
+ summary: schemas_1.L1Summary.parse(existing.summary),
861
+ inputHash: existing.inputHash,
862
+ packageInfo: context.packageInfo,
863
+ });
864
+ onProgress?.(` Community ${community_id}: drift-skipped (reusing summary)`);
865
+ continue;
866
+ }
867
+ }
868
+ l1Work.push({ context, communityId: community_id });
869
+ }
870
+ // Pass 2: parallel Claude calls for cache misses
871
+ const l1Limit = (0, p_limit_1.default)(5);
872
+ await Promise.all(l1Work.map((work) => l1Limit(async () => {
873
+ onProgress?.(` Summarizing community ${work.communityId} (${work.context.members.length} functions)...`);
874
+ try {
875
+ const summary = await doL1(work.context);
876
+ storeSummary(db, work.communityId, 1, algorithm, work.context.inputHash, summary, MODEL_VERSION);
877
+ storeCommunitySignatures(db, work.communityId, algorithm, work.context.members);
878
+ result.l1Summarized++;
879
+ activeL1Ids.add(work.communityId);
880
+ l1Results.push({ communityId: work.communityId, summary, inputHash: work.context.inputHash, packageInfo: work.context.packageInfo });
881
+ }
882
+ catch (err) {
883
+ onProgress?.(` ⚠ Failed to summarize community ${work.communityId}: ${formatError(err)}`);
884
+ result.l1Failed = (result.l1Failed ?? 0) + 1;
885
+ activeL1Ids.add(work.communityId);
886
+ }
887
+ })));
888
+ // Clean up orphaned L1 summaries
889
+ cleanupOrphanedSummaries(db, algorithm, 1, activeL1Ids);
890
+ // ===== L2: System summaries =====
891
+ if (l1Results.length < 2)
892
+ return result;
893
+ onProgress?.("Building L2 system summaries...");
894
+ // Check for L1 hierarchy assignments
895
+ const l1HierarchyRows = db.prepare("SELECT content_hash, community_id FROM communities WHERE level = 1 AND algorithm = ? AND community_id != '__dissolved'").all(algorithm);
896
+ const l0ToGroup = new Map();
897
+ for (const row of l1HierarchyRows) {
898
+ l0ToGroup.set(row.content_hash, row.community_id);
899
+ }
900
+ // Group L1 summaries by their L1 parent group
901
+ const distinctGroups = new Set(l0ToGroup.values());
902
+ if (distinctGroups.size >= 2) {
903
+ // Per-group L2 summaries
904
+ const groupedL1s = new Map();
905
+ for (const l1 of l1Results) {
906
+ const groupId = l0ToGroup.get(l1.communityId) ?? "ungrouped";
907
+ const group = groupedL1s.get(groupId) || [];
908
+ group.push(l1);
909
+ groupedL1s.set(groupId, group);
910
+ }
911
+ // Pass 1: check cache, collect L2 work
912
+ const l2Work = [];
913
+ for (const [groupId, groupL1s] of groupedL1s) {
914
+ if (groupL1s.length < 1)
915
+ continue;
916
+ const l2CommunityId = `group_${groupId}`;
917
+ const l2InputHash = computeL2InputHash(groupL1s.map((s) => s.inputHash));
918
+ const existingL2 = loadSummary(db, l2CommunityId, 2, algorithm);
919
+ if (existingL2 && existingL2.inputHash === l2InputHash) {
920
+ result.l2Cached++;
921
+ activeL2Ids.add(l2CommunityId);
922
+ continue;
923
+ }
924
+ const crossEdges = computeCrossEdges(db, groupL1s.map((s) => ({ communityId: s.communityId, title: s.summary.title })), algorithm);
925
+ const l2Context = {
926
+ communityId: l2CommunityId,
927
+ algorithm,
928
+ l1Summaries: groupL1s.map((s) => ({
929
+ communityId: s.communityId,
930
+ title: s.summary.title,
931
+ documentation: s.summary.documentation,
932
+ packageInfo: s.packageInfo,
933
+ })),
934
+ crossEdges,
935
+ inputHash: l2InputHash,
936
+ };
937
+ l2Work.push({ groupId, l2Context, l2CommunityId });
938
+ }
939
+ // Pass 2: parallel Claude calls for L2 cache misses
940
+ const l2Limit = (0, p_limit_1.default)(3);
941
+ await Promise.all(l2Work.map((work) => l2Limit(async () => {
942
+ onProgress?.(` Summarizing group ${work.groupId} from ${work.l2Context.l1Summaries.length} subsystems...`);
943
+ try {
944
+ const l2Summary = await doL2(work.l2Context);
945
+ storeSummary(db, work.l2CommunityId, 2, algorithm, work.l2Context.inputHash, l2Summary, MODEL_VERSION);
946
+ result.l2Summarized++;
947
+ activeL2Ids.add(work.l2CommunityId);
948
+ }
949
+ catch (err) {
950
+ onProgress?.(` ⚠ Failed to summarize group ${work.groupId}: ${formatError(err)}`);
951
+ activeL2Ids.add(work.l2CommunityId);
952
+ }
953
+ })));
954
+ }
955
+ else {
956
+ // Fallback: single system_0 (no hierarchy or only 1 group)
957
+ const l2InputHash = computeL2InputHash(l1Results.map((s) => s.inputHash));
958
+ const l2CommunityId = "system_0";
959
+ const existingL2 = loadSummary(db, l2CommunityId, 2, algorithm);
960
+ if (existingL2 && existingL2.inputHash === l2InputHash) {
961
+ result.l2Cached++;
962
+ activeL2Ids.add(l2CommunityId);
963
+ }
964
+ else {
965
+ const crossEdges = computeCrossEdges(db, l1Results.map((s) => ({ communityId: s.communityId, title: s.summary.title })), algorithm);
966
+ const l2Context = {
967
+ communityId: l2CommunityId,
968
+ algorithm,
969
+ l1Summaries: l1Results.map((s) => ({
970
+ communityId: s.communityId,
971
+ title: s.summary.title,
972
+ documentation: s.summary.documentation,
973
+ packageInfo: s.packageInfo,
974
+ })),
975
+ crossEdges,
976
+ inputHash: l2InputHash,
977
+ };
978
+ onProgress?.(` Summarizing system from ${l1Results.length} subsystems...`);
979
+ try {
980
+ const l2Summary = await doL2(l2Context);
981
+ storeSummary(db, l2CommunityId, 2, algorithm, l2InputHash, l2Summary, MODEL_VERSION);
982
+ result.l2Summarized++;
983
+ activeL2Ids.add(l2CommunityId);
984
+ }
985
+ catch (err) {
986
+ onProgress?.(` ⚠ Failed to summarize system: ${formatError(err)}`);
987
+ activeL2Ids.add(l2CommunityId);
988
+ }
989
+ }
990
+ }
991
+ // ===== L3: Architecture overview =====
992
+ onProgress?.("Building L3 architecture overview...");
993
+ const allL2 = loadAllSummaries(db, algorithm, 2);
994
+ if (allL2.length === 0)
995
+ return result;
996
+ const l3InputHash = computeL3InputHash(allL2.map((s) => s.inputHash));
997
+ const l3CommunityId = "architecture";
998
+ const existingL3 = loadSummary(db, l3CommunityId, 3, algorithm);
999
+ if (existingL3 && existingL3.inputHash === l3InputHash) {
1000
+ result.l3Cached++;
1001
+ }
1002
+ else {
1003
+ const l2Summaries = allL2.map((s) => schemas_1.L2Summary.parse(s.summary));
1004
+ onProgress?.(" Generating architecture overview...");
1005
+ try {
1006
+ const l3Summary = await doL3(l2Summaries);
1007
+ storeSummary(db, l3CommunityId, 3, algorithm, l3InputHash, l3Summary, MODEL_VERSION);
1008
+ result.l3Summarized++;
1009
+ }
1010
+ catch (err) {
1011
+ onProgress?.(` ⚠ Failed to generate architecture overview: ${formatError(err)}`);
1012
+ }
1013
+ }
1014
+ // Clean up orphaned L2 summaries
1015
+ cleanupOrphanedSummaries(db, algorithm, 2, activeL2Ids);
1016
+ // ===== Cross-Cutting Concerns =====
1017
+ if (l1Results.length >= 2) {
1018
+ onProgress?.("Detecting cross-cutting concerns...");
1019
+ // Load edges and build graph for centrality
1020
+ const edgeRows = db.prepare("SELECT source_hash, target_hash, edge_type, weight FROM function_edges").all();
1021
+ const graphEdges = edgeRows.map((r) => ({
1022
+ sourceHash: r.source_hash,
1023
+ targetHash: r.target_hash,
1024
+ edgeType: r.edge_type,
1025
+ weight: r.weight,
1026
+ }));
1027
+ const graph = (0, graph_1.buildGraph)(graphEdges);
1028
+ const centrality = (0, graph_1.computeCentrality)(graph);
1029
+ const concerns = detectCrossCuttingConcerns(db, algorithm, centrality.scores, l1Results.map((s) => ({ communityId: s.communityId, title: s.summary.title })));
1030
+ result.ccDetected = concerns.length;
1031
+ if (concerns.length > 0) {
1032
+ const communityDocs = new Map(l1Results.map((r) => [r.communityId, r.summary.documentation]));
1033
+ const doCC = options._summarizeCC || summarizeCC;
1034
+ // Pass 1: check cache, collect CC work
1035
+ const ccWork = [];
1036
+ for (const concern of concerns) {
1037
+ const inputHash = computeCCInputHash(concern);
1038
+ const ccId = `concern_${concern.tag}`;
1039
+ const existing = loadSummary(db, ccId, 10, algorithm);
1040
+ if (existing && existing.inputHash === inputHash) {
1041
+ result.ccCached++;
1042
+ activeCCIds.add(ccId);
1043
+ continue;
1044
+ }
1045
+ ccWork.push({ concern, inputHash, ccId });
1046
+ }
1047
+ // Pass 2: parallel Claude calls for CC cache misses
1048
+ const ccLimit = (0, p_limit_1.default)(5);
1049
+ await Promise.all(ccWork.map((work) => ccLimit(async () => {
1050
+ onProgress?.(` Documenting concern: ${work.concern.tag} (${work.concern.affectedCommunities.length} subsystems)...`);
1051
+ try {
1052
+ const summary = await doCC(work.concern, communityDocs);
1053
+ storeSummary(db, work.ccId, 10, algorithm, work.inputHash, summary, MODEL_VERSION);
1054
+ result.ccSummarized++;
1055
+ activeCCIds.add(work.ccId);
1056
+ }
1057
+ catch (err) {
1058
+ onProgress?.(` ⚠ Failed to document concern ${work.concern.tag}: ${formatError(err)}`);
1059
+ activeCCIds.add(work.ccId);
1060
+ }
1061
+ })));
1062
+ }
1063
+ }
1064
+ // Clean up orphaned cross-cutting concern summaries
1065
+ cleanupOrphanedSummaries(db, algorithm, 10, activeCCIds);
1066
+ return result;
1067
+ }