codesummary 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,10 @@
1
1
  import fs from 'fs-extra';
2
2
  import path from 'path';
3
+ import RepositorySignals from './analysis/repositorySignals.js';
4
+ import GraphEngine from './graph/graphEngine.js';
5
+ import ProviderClient from './ai/providerClient.js';
6
+ import { buildSemanticClustersPrompt } from './ai/promptTemplates.js';
7
+ import { isAiSemanticEnabled } from './ai/featureFlags.js';
3
8
  import { formatFileSize } from './utils.js';
4
9
 
5
10
  /**
@@ -9,6 +14,8 @@ import { formatFileSize } from './utils.js';
9
14
  */
10
15
  export class LlmGenerator {
11
16
  constructor() {
17
+ this.repositorySignals = new RepositorySignals();
18
+ this.graphEngine = new GraphEngine();
12
19
  this.stats = {
13
20
  filesProcessed: 0,
14
21
  filesSkipped: 0,
@@ -22,9 +29,10 @@ export class LlmGenerator {
22
29
  * @param {Array} selectedExtensions - Extensions selected by user
23
30
  * @param {string} outputPath - Output .md file path
24
31
  * @param {string} projectName - Project name
32
+ * @param {object} generationOptions - Optional rendering options
25
33
  * @returns {Promise<object>} Result with outputPath and stats
26
34
  */
27
- async generateLlmOutput(filesByExtension, selectedExtensions, outputPath, projectName) {
35
+ async generateLlmOutput(filesByExtension, selectedExtensions, outputPath, projectName, generationOptions = {}) {
28
36
  this.stats.startTime = Date.now();
29
37
 
30
38
  // Collect and sort all selected files
@@ -35,14 +43,23 @@ export class LlmGenerator {
35
43
  }
36
44
  }
37
45
  allFiles.sort((a, b) => a.relativePath.localeCompare(b.relativePath));
46
+ const projectUnderstanding = await this.buildProjectUnderstanding(allFiles, {
47
+ ...generationOptions,
48
+ projectName
49
+ });
50
+ const renderPlan = this.buildRenderPlan(allFiles, projectUnderstanding, generationOptions);
38
51
 
39
52
  const stream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
40
53
 
41
54
  await this.writeLine(stream, this.buildHeader(projectName, allFiles));
42
- await this.writeLine(stream, this.buildFileTree(allFiles));
55
+ await this.writeLine(stream, this.buildFocusedContextSection(renderPlan));
56
+ await this.writeLine(stream, this.buildProjectSummary(projectUnderstanding));
57
+ await this.writeLine(stream, this.buildDependencySection(projectUnderstanding));
58
+ await this.writeLine(stream, this.buildSemanticSection(projectUnderstanding));
59
+ await this.writeLine(stream, this.buildFileTree(renderPlan.filesToRender, renderPlan.focusQuery ? 'Focused File Tree' : 'File Tree'));
43
60
 
44
- for (const file of allFiles) {
45
- const block = await this.buildFileBlock(file);
61
+ for (const file of renderPlan.filesToRender) {
62
+ const block = await this.buildFileBlock(file, projectUnderstanding.fileContentsByPath.get(file.relativePath));
46
63
  await this.writeLine(stream, block);
47
64
  }
48
65
 
@@ -59,6 +76,7 @@ export class LlmGenerator {
59
76
  totalFiles: this.stats.filesProcessed,
60
77
  skippedFiles: this.stats.filesSkipped,
61
78
  duration,
79
+ summaryData: this.buildLlmSummaryData(projectName, allFiles, projectUnderstanding, renderPlan)
62
80
  };
63
81
  }
64
82
 
@@ -90,14 +108,35 @@ export class LlmGenerator {
90
108
  );
91
109
  }
92
110
 
93
- buildFileTree(allFiles) {
111
+ buildFileTree(allFiles, title = 'File Tree') {
94
112
  const lines = allFiles.map(f => ` ${f.relativePath}`).join('\n');
95
- return `## File Tree\n\n\`\`\`\n${lines}\n\`\`\`\n\n---\n\n`;
113
+ return `## ${title}\n\n\`\`\`\n${lines}\n\`\`\`\n\n---\n\n`;
114
+ }
115
+
116
+ buildFocusedContextSection(renderPlan) {
117
+ const { focusQuery, maxTokens, filesToRender, excludedFiles, estimatedTokens } = renderPlan;
118
+ if (!focusQuery && !maxTokens) return '';
119
+
120
+ const primary = filesToRender.slice(0, 6).map(file => file.relativePath);
121
+ const supporting = filesToRender.slice(6, 16).map(file => file.relativePath);
122
+ const excluded = excludedFiles.slice(0, 6);
123
+
124
+ return (
125
+ `## Focused Context${focusQuery ? `: ${focusQuery}` : ''}\n\n` +
126
+ `${focusQuery ? `- Query: ${focusQuery}\n` : ''}` +
127
+ `${maxTokens ? `- Max tokens budget: ${maxTokens}\n` : ''}` +
128
+ `- Estimated selected tokens: ${estimatedTokens}\n` +
129
+ `- Selected files: ${filesToRender.length}\n` +
130
+ `${primary.length > 0 ? `- Primary files: ${primary.join(', ')}\n` : ''}` +
131
+ `${supporting.length > 0 ? `- Supporting files: ${supporting.join(', ')}\n` : ''}` +
132
+ `${excluded.length > 0 ? `- Excluded as lower relevance: ${excluded.join(', ')}\n` : ''}` +
133
+ '\n---\n\n'
134
+ );
96
135
  }
97
136
 
98
- async buildFileBlock(file) {
137
+ async buildFileBlock(file, prefetchedRawContent = null) {
99
138
  try {
100
- const raw = await fs.readFile(file.absolutePath, 'utf8');
139
+ const raw = prefetchedRawContent ?? await fs.readFile(file.absolutePath, 'utf8');
101
140
  const ext = path.extname(file.relativePath).toLowerCase();
102
141
  const optimized = this.optimizeContent(raw, ext);
103
142
  const lang = this.fenceLang(ext);
@@ -111,6 +150,682 @@ export class LlmGenerator {
111
150
  }
112
151
  }
113
152
 
153
+ /**
154
+ * Build heuristic project-understanding data before rendering.
155
+ * @param {Array} allFiles - Selected files
156
+ * @returns {Promise<object>}
157
+ */
158
+ async buildProjectUnderstanding(allFiles, generationOptions = {}) {
159
+ const fileContentsByPath = new Map();
160
+ const fileInfos = [];
161
+
162
+ for (const file of allFiles) {
163
+ const ext = path.extname(file.relativePath).toLowerCase();
164
+
165
+ try {
166
+ const raw = await fs.readFile(file.absolutePath, 'utf8');
167
+ fileContentsByPath.set(file.relativePath, raw);
168
+
169
+ const imports = this.repositorySignals.extractImports(raw, ext);
170
+ const calls = this.repositorySignals.extractCalls(raw);
171
+ const tags = this.repositorySignals.extractFileTags(file.relativePath, ext);
172
+ const lineCount = raw.split(/\r\n|\r|\n/).length;
173
+
174
+ fileInfos.push({
175
+ path: file.relativePath,
176
+ ext,
177
+ size: file.size || 0,
178
+ lineCount,
179
+ imports,
180
+ calls,
181
+ tags
182
+ });
183
+ } catch {
184
+ // Keep generation resilient even if single-file reads fail.
185
+ }
186
+ }
187
+
188
+ const graphAnalysis = await this.graphEngine.analyze(allFiles, fileContentsByPath);
189
+ const dependencyEdges = graphAnalysis.graph.edges.map(edge => [edge.from, edge.to]);
190
+ const inDegree = graphAnalysis.metrics.inDegree;
191
+ const outDegree = graphAnalysis.metrics.outDegree;
192
+
193
+ const fileScores = fileInfos.map(info => {
194
+ const indeg = inDegree.get(info.path) || 0;
195
+ const outdeg = outDegree.get(info.path) || 0;
196
+ const complexity = (Math.log2(Math.max(1, info.lineCount)) * 1.2) + (info.imports.length * 1.6) + (info.calls.length * 0.25);
197
+ const centrality = (indeg * 2) + (outdeg * 1.2);
198
+ const hotspotScore = complexity + centrality;
199
+
200
+ return { path: info.path, complexity, centrality, hotspotScore, indeg, outdeg };
201
+ });
202
+
203
+ const fallbackEntrypoints = this.detectFallbackEntrypoints(fileInfos);
204
+ const entrypoints = graphAnalysis.entrypoints.length > 0 ? graphAnalysis.entrypoints : fallbackEntrypoints;
205
+ const coreModules = fileScores
206
+ .slice()
207
+ .sort((a, b) => b.centrality - a.centrality || b.hotspotScore - a.hotspotScore)
208
+ .slice(0, 6);
209
+ const hotspots = fileScores
210
+ .slice()
211
+ .sort((a, b) => b.hotspotScore - a.hotspotScore)
212
+ .slice(0, 6);
213
+ const suggestedReadingOrder = this.buildSuggestedReadingOrder(entrypoints, dependencyEdges, fileScores);
214
+ const semanticClusters = this.buildSemanticClusters(fileInfos);
215
+ const aiSemantic = await this.enrichSemanticClustersWithAi({
216
+ semanticClusters,
217
+ entrypoints,
218
+ coreModules,
219
+ projectName: generationOptions.projectName || path.basename(process.cwd()),
220
+ aiOptions: generationOptions.ai || {}
221
+ });
222
+
223
+ return {
224
+ fileInfos,
225
+ fileContentsByPath: graphAnalysis.fileContentsByPath,
226
+ dependencyEdges,
227
+ entrypoints,
228
+ coreModules,
229
+ hotspots,
230
+ suggestedReadingOrder,
231
+ semanticClusters: aiSemantic.clusters,
232
+ aiSemantic,
233
+ graphMetrics: graphAnalysis.metrics,
234
+ connectedSubmodules: graphAnalysis.connectedSubmodules,
235
+ adapterModes: graphAnalysis.graph.metadata.adapterModes
236
+ };
237
+ }
238
+
239
+ async enrichSemanticClustersWithAi({ projectName, semanticClusters, entrypoints, coreModules, aiOptions }) {
240
+ const result = {
241
+ enabled: isAiSemanticEnabled(aiOptions),
242
+ provider: aiOptions?.provider || null,
243
+ model: aiOptions?.model || null,
244
+ used: false,
245
+ error: null,
246
+ errorCode: null,
247
+ retryable: false,
248
+ failureScope: null,
249
+ attempts: 0,
250
+ healthCheckOk: null,
251
+ clusters: semanticClusters
252
+ };
253
+
254
+ if (!result.enabled || semanticClusters.length === 0) {
255
+ return result;
256
+ }
257
+
258
+ try {
259
+ const client = new ProviderClient(aiOptions);
260
+ const health = await client.healthCheck();
261
+ result.healthCheckOk = Boolean(health?.ok);
262
+ if (!health?.ok) {
263
+ const healthErr = health?.error;
264
+ result.error = healthErr?.message || 'AI provider health check failed';
265
+ result.errorCode = healthErr?.code || 'provider_unavailable';
266
+ result.retryable = Boolean(healthErr?.retryable);
267
+ result.failureScope = healthErr?.scope || 'provider';
268
+ return result;
269
+ }
270
+
271
+ const messages = buildSemanticClustersPrompt({
272
+ projectName,
273
+ semanticClusters,
274
+ entrypoints,
275
+ coreModules
276
+ });
277
+
278
+ const response = await client.chat(messages, {
279
+ model: aiOptions.model,
280
+ timeoutMs: aiOptions.timeoutMs,
281
+ maxRetries: aiOptions.maxRetries,
282
+ retryBackoffMs: aiOptions.retryBackoffMs,
283
+ maxBackoffMs: aiOptions.maxBackoffMs
284
+ });
285
+ const parsed = this.safeParseJson(response.content);
286
+ const refined = this.normalizeAiSemanticClusters(parsed?.clusters, semanticClusters);
287
+ result.attempts = Number.isInteger(response?.attempts) ? response.attempts : 1;
288
+ if (refined.length > 0) {
289
+ result.clusters = refined;
290
+ result.used = true;
291
+ } else {
292
+ result.error = 'AI response produced no usable semantic clusters';
293
+ result.errorCode = 'invalid_semantic_payload';
294
+ result.failureScope = 'provider';
295
+ }
296
+ } catch (error) {
297
+ result.error = error.message;
298
+ result.errorCode = error.code || 'ai_error';
299
+ result.retryable = Boolean(error.retryable);
300
+ result.failureScope = error.scope || 'provider';
301
+ result.attempts = Number.isInteger(error?.details?.attempts) ? error.details.attempts : 0;
302
+ }
303
+
304
+ return result;
305
+ }
306
+
307
+ safeParseJson(rawText) {
308
+ if (typeof rawText !== 'string') return null;
309
+ const trimmed = rawText.trim();
310
+ try {
311
+ return JSON.parse(trimmed);
312
+ } catch {
313
+ const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i);
314
+ if (!fencedMatch) return null;
315
+ try {
316
+ return JSON.parse(fencedMatch[1].trim());
317
+ } catch {
318
+ return null;
319
+ }
320
+ }
321
+ }
322
+
323
+ normalizeAiSemanticClusters(clusters, fallbackClusters) {
324
+ if (!Array.isArray(clusters)) return fallbackClusters;
325
+ const normalized = clusters
326
+ .filter(cluster => cluster && typeof cluster.name === 'string' && Array.isArray(cluster.files))
327
+ .map(cluster => ({
328
+ name: cluster.name.trim(),
329
+ description: typeof cluster.description === 'string' ? cluster.description.trim() : '',
330
+ files: [...new Set(cluster.files.filter(file => typeof file === 'string' && file.trim().length > 0))]
331
+ }))
332
+ .filter(cluster => cluster.name.length > 0 && cluster.files.length > 0);
333
+
334
+ return normalized.length > 0 ? normalized : fallbackClusters;
335
+ }
336
+
337
+ buildRenderPlan(allFiles, projectUnderstanding, generationOptions) {
338
+ const focusQuery = (generationOptions.focus || '').trim();
339
+ const maxTokens = Number.isInteger(generationOptions.maxTokens) ? generationOptions.maxTokens : null;
340
+ const fileInfoByPath = new Map(projectUnderstanding.fileInfos.map(info => [info.path, info]));
341
+ const scoreByPath = new Map();
342
+ const baseCentralityByPath = new Map(projectUnderstanding.coreModules.map(item => [item.path, item.centrality]));
343
+ const adjacency = this.buildUndirectedAdjacency(projectUnderstanding.dependencyEdges);
344
+ const focusTerms = this.extractFocusTerms(focusQuery);
345
+
346
+ for (const file of allFiles) {
347
+ const info = fileInfoByPath.get(file.relativePath);
348
+ const score = this.computeFocusScore(file.relativePath, info, focusQuery, focusTerms, baseCentralityByPath);
349
+ scoreByPath.set(file.relativePath, score);
350
+ }
351
+
352
+ const rankedPaths = allFiles
353
+ .map(file => file.relativePath)
354
+ .sort((a, b) => (scoreByPath.get(b) || 0) - (scoreByPath.get(a) || 0) || a.localeCompare(b));
355
+
356
+ let candidatePaths;
357
+ if (focusQuery) {
358
+ const primarySeeds = rankedPaths.filter(pathKey => (scoreByPath.get(pathKey) || 0) > 0).slice(0, 8);
359
+ const fallbackSeeds = primarySeeds.length > 0 ? primarySeeds : rankedPaths.slice(0, 5);
360
+ candidatePaths = this.expandFocusNeighborhood(fallbackSeeds, adjacency, 2, 28);
361
+ } else {
362
+ candidatePaths = rankedPaths;
363
+ }
364
+
365
+ const selectedPaths = this.applyTokenBudget(candidatePaths, allFiles, projectUnderstanding.fileContentsByPath, maxTokens);
366
+ const selectedSet = new Set(selectedPaths);
367
+ const filesToRender = allFiles.filter(file => selectedSet.has(file.relativePath));
368
+ const excludedFiles = allFiles
369
+ .map(file => file.relativePath)
370
+ .filter(pathKey => !selectedSet.has(pathKey))
371
+ .sort((a, b) => (scoreByPath.get(b) || 0) - (scoreByPath.get(a) || 0) || a.localeCompare(b));
372
+
373
+ const estimatedTokens = filesToRender.reduce((sum, file) => (
374
+ sum + this.estimateFileTokens(file, projectUnderstanding.fileContentsByPath.get(file.relativePath))
375
+ ), 0);
376
+
377
+ return {
378
+ focusQuery,
379
+ focusTerms,
380
+ maxTokens,
381
+ filesToRender,
382
+ excludedFiles,
383
+ scoreByPath,
384
+ estimatedTokens
385
+ };
386
+ }
387
+
388
+ extractFocusTerms(focusQuery) {
389
+ if (!focusQuery) return [];
390
+ const stopwords = new Set(['the', 'and', 'for', 'with', 'from', 'into', 'that', 'this', 'how', 'what', 'where', 'when']);
391
+ return focusQuery
392
+ .toLowerCase()
393
+ .split(/[^a-z0-9_]+/)
394
+ .map(term => term.trim())
395
+ .filter(term => term.length >= 2 && !stopwords.has(term));
396
+ }
397
+
398
+ computeFocusScore(filePath, fileInfo, focusQuery, focusTerms, baseCentralityByPath) {
399
+ const lowPath = filePath.toLowerCase();
400
+ const tags = (fileInfo?.tags || []).map(tag => String(tag).toLowerCase());
401
+ const imports = (fileInfo?.imports || []).map(dep => String(dep).toLowerCase());
402
+ const calls = (fileInfo?.calls || []).map(call => String(call).toLowerCase());
403
+ const baseCentrality = baseCentralityByPath.get(filePath) || 0;
404
+
405
+ let score = baseCentrality * 0.1;
406
+
407
+ if (focusQuery) {
408
+ const normalizedQuery = focusQuery.toLowerCase();
409
+ if (lowPath.includes(normalizedQuery)) score += 10;
410
+ }
411
+
412
+ for (const term of focusTerms) {
413
+ if (lowPath.includes(term)) score += 4;
414
+ if (tags.some(tag => tag.includes(term))) score += 3;
415
+ if (imports.some(dep => dep.includes(term))) score += 2;
416
+ if (calls.some(call => call.includes(term))) score += 1;
417
+ }
418
+
419
+ return score;
420
+ }
421
+
422
+ buildUndirectedAdjacency(dependencyEdges) {
423
+ const adjacency = new Map();
424
+ const ensure = key => {
425
+ if (!adjacency.has(key)) adjacency.set(key, new Set());
426
+ return adjacency.get(key);
427
+ };
428
+
429
+ for (const [from, to] of dependencyEdges) {
430
+ ensure(from).add(to);
431
+ ensure(to).add(from);
432
+ }
433
+
434
+ return adjacency;
435
+ }
436
+
437
+ expandFocusNeighborhood(seedPaths, adjacency, maxDepth = 2, maxNodes = 28) {
438
+ const seen = new Set(seedPaths);
439
+ const queue = seedPaths.map(pathKey => ({ pathKey, depth: 0 }));
440
+
441
+ while (queue.length > 0 && seen.size < maxNodes) {
442
+ const current = queue.shift();
443
+ if (!current) continue;
444
+ if (current.depth >= maxDepth) continue;
445
+
446
+ const neighbors = [...(adjacency.get(current.pathKey) || [])].sort((a, b) => a.localeCompare(b));
447
+ for (const neighbor of neighbors) {
448
+ if (!seen.has(neighbor)) {
449
+ seen.add(neighbor);
450
+ queue.push({ pathKey: neighbor, depth: current.depth + 1 });
451
+ }
452
+ if (seen.size >= maxNodes) break;
453
+ }
454
+ }
455
+
456
+ return [...seen];
457
+ }
458
+
459
+ applyTokenBudget(candidatePaths, allFiles, fileContentsByPath, maxTokens) {
460
+ if (!maxTokens) return candidatePaths;
461
+
462
+ const fileByPath = new Map(allFiles.map(file => [file.relativePath, file]));
463
+ let usedTokens = 0;
464
+ const selected = [];
465
+
466
+ for (const pathKey of candidatePaths) {
467
+ const file = fileByPath.get(pathKey);
468
+ if (!file) continue;
469
+
470
+ const estimated = this.estimateFileTokens(file, fileContentsByPath.get(pathKey));
471
+ if (selected.length > 0 && usedTokens + estimated > maxTokens) continue;
472
+
473
+ selected.push(pathKey);
474
+ usedTokens += estimated;
475
+ if (usedTokens >= maxTokens) break;
476
+ }
477
+
478
+ if (selected.length === 0 && candidatePaths.length > 0) {
479
+ selected.push(candidatePaths[0]);
480
+ }
481
+
482
+ return selected;
483
+ }
484
+
485
+ estimateFileTokens(file, content) {
486
+ const baseHeaderTokens = 24;
487
+ const text = typeof content === 'string' ? content : '';
488
+ const bodyTokens = Math.ceil(text.length / 4);
489
+ const pathTokens = Math.ceil((file.relativePath || '').length / 4);
490
+ return baseHeaderTokens + bodyTokens + pathTokens;
491
+ }
492
+
493
+ detectFallbackEntrypoints(fileInfos) {
494
+ const sourceLikeExtensions = new Set([
495
+ '.js', '.jsx', '.mjs', '.cjs', '.ts', '.tsx',
496
+ '.py', '.java', '.cs', '.cpp', '.c', '.h',
497
+ '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala',
498
+ '.sh', '.bash', '.bat', '.ps1'
499
+ ]);
500
+
501
+ const priorityPatterns = [
502
+ /^bin\//,
503
+ /^src\/index\./,
504
+ /^src\/main\./,
505
+ /^src\/cli\./,
506
+ /\/index\./,
507
+ /\/main\./,
508
+ /\/app\./,
509
+ /\/server\./
510
+ ];
511
+
512
+ const scored = fileInfos.map(info => {
513
+ if (!sourceLikeExtensions.has(info.ext)) {
514
+ return { path: info.path, score: 0 };
515
+ }
516
+
517
+ const lowPath = info.path.toLowerCase();
518
+ const patternScore = priorityPatterns.reduce((score, pattern, idx) => (
519
+ pattern.test(lowPath) ? Math.max(score, priorityPatterns.length - idx) : score
520
+ ), 0);
521
+ const entryTagScore = info.tags.includes('entry') ? 2 : 0;
522
+
523
+ return {
524
+ path: info.path,
525
+ score: patternScore + entryTagScore
526
+ };
527
+ });
528
+
529
+ return scored
530
+ .filter(item => item.score > 0)
531
+ .sort((a, b) => b.score - a.score || a.path.localeCompare(b.path))
532
+ .slice(0, 6)
533
+ .map(item => item.path);
534
+ }
535
+
536
+ buildSuggestedReadingOrder(entrypoints, dependencyEdges, fileScores) {
537
+ const sourceLikeExtensions = new Set([
538
+ '.js', '.jsx', '.mjs', '.cjs', '.ts', '.tsx',
539
+ '.py', '.java', '.cs', '.cpp', '.c', '.h',
540
+ '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala',
541
+ '.sh', '.bash', '.bat', '.ps1'
542
+ ]);
543
+
544
+ const sourceScores = fileScores.filter(item => {
545
+ const ext = path.extname(item.path).toLowerCase();
546
+ return sourceLikeExtensions.has(ext);
547
+ });
548
+
549
+ const adjacency = new Map();
550
+ for (const [source, target] of dependencyEdges) {
551
+ if (!adjacency.has(source)) adjacency.set(source, []);
552
+ adjacency.get(source).push(target);
553
+ }
554
+
555
+ const order = [];
556
+ const seen = new Set();
557
+ const queue = [...entrypoints];
558
+
559
+ while (queue.length > 0 && order.length < 12) {
560
+ const current = queue.shift();
561
+ if (!current || seen.has(current)) continue;
562
+ seen.add(current);
563
+ order.push(current);
564
+
565
+ const deps = (adjacency.get(current) || []).slice().sort((a, b) => a.localeCompare(b));
566
+ for (const dep of deps) {
567
+ const depExt = path.extname(dep).toLowerCase();
568
+ if (!seen.has(dep) && sourceLikeExtensions.has(depExt)) {
569
+ queue.push(dep);
570
+ }
571
+ }
572
+ }
573
+
574
+ if (order.length < 12) {
575
+ const centralFallback = sourceScores
576
+ .slice()
577
+ .sort((a, b) => b.centrality - a.centrality || b.hotspotScore - a.hotspotScore)
578
+ .map(item => item.path);
579
+
580
+ for (const candidate of centralFallback) {
581
+ if (!seen.has(candidate)) {
582
+ seen.add(candidate);
583
+ order.push(candidate);
584
+ }
585
+ if (order.length >= 12) break;
586
+ }
587
+ }
588
+
589
+ return order;
590
+ }
591
+
592
+ buildProjectSummary(projectUnderstanding) {
593
+ const { fileInfos, entrypoints, coreModules, hotspots, suggestedReadingOrder, graphMetrics, connectedSubmodules, adapterModes } = projectUnderstanding;
594
+ const totalLines = fileInfos.reduce((sum, info) => sum + info.lineCount, 0);
595
+ const languageCount = new Map();
596
+
597
+ for (const info of fileInfos) {
598
+ languageCount.set(info.ext, (languageCount.get(info.ext) || 0) + 1);
599
+ }
600
+
601
+ const topExtensions = [...languageCount.entries()]
602
+ .sort((a, b) => b[1] - a[1])
603
+ .slice(0, 5)
604
+ .map(([ext, count]) => `${ext || '(none)'} (${count})`)
605
+ .join(', ');
606
+
607
+ const entrypointText = entrypoints.length > 0 ? entrypoints.join(', ') : 'None detected';
608
+ const coreText = coreModules.length > 0
609
+ ? coreModules.map(module => `${module.path} (centrality ${module.centrality.toFixed(1)})`).join(', ')
610
+ : 'None detected';
611
+ const hotspotText = hotspots.length > 0
612
+ ? hotspots.map(module => `${module.path} (score ${module.hotspotScore.toFixed(1)})`).join(', ')
613
+ : 'None detected';
614
+ const readingText = suggestedReadingOrder.length > 0 ? suggestedReadingOrder.join(' -> ') : 'None suggested';
615
+ const hubsText = graphMetrics.hubs.length > 0
616
+ ? graphMetrics.hubs.slice(0, 5).map(item => `${item.id} (${item.degree})`).join(', ')
617
+ : 'None detected';
618
+ const isolatedText = graphMetrics.isolated.length > 0 ? graphMetrics.isolated.slice(0, 5).join(', ') : 'None';
619
+ const adapterText = Object.keys(adapterModes).length > 0
620
+ ? Object.entries(adapterModes).map(([ext, mode]) => `${ext}:${mode}`).join(', ')
621
+ : 'None';
622
+ const submoduleText = connectedSubmodules.length > 0
623
+ ? connectedSubmodules.slice(0, 3).map(group => `[${group.length}] ${group.slice(0, 3).join(', ')}`).join(' | ')
624
+ : 'None detected';
625
+
626
+ return (
627
+ '## Project Summary\n\n' +
628
+ `- Total lines (approx): ${totalLines}\n` +
629
+ `- Top file types: ${topExtensions || 'None'}\n` +
630
+ `- Entrypoints: ${entrypointText}\n` +
631
+ `- Core modules: ${coreText}\n` +
632
+ `- Hotspots (complexity + centrality): ${hotspotText}\n` +
633
+ `- Graph hubs (out-degree): ${hubsText}\n` +
634
+ `- Isolated files: ${isolatedText}\n` +
635
+ `- Connected submodules: ${submoduleText}\n` +
636
+ `- Language analysis mode: ${adapterText}\n` +
637
+ `- Suggested reading order: ${readingText}\n\n` +
638
+ '---\n\n'
639
+ );
640
+ }
641
+
642
+ buildDependencySection(projectUnderstanding) {
643
+ const { dependencyEdges, graphMetrics } = projectUnderstanding;
644
+ const edges = dependencyEdges
645
+ .slice()
646
+ .sort((a, b) => {
647
+ if (a[0] !== b[0]) return a[0].localeCompare(b[0]);
648
+ return a[1].localeCompare(b[1]);
649
+ })
650
+ .slice(0, 30);
651
+
652
+ if (edges.length === 0) {
653
+ return '## Code Dependency Graph\n\n- No internal dependency edges detected with current heuristics.\n\n---\n\n';
654
+ }
655
+
656
+ const lines = edges.map(([source, target]) => `- ${source} -> ${target}`).join('\n');
657
+ const central = graphMetrics.centralNodes
658
+ .slice(0, 5)
659
+ .map(item => `- ${item.id} (in-degree ${item.degree})`)
660
+ .join('\n');
661
+ const hubs = graphMetrics.hubs
662
+ .slice(0, 5)
663
+ .map(item => `- ${item.id} (out-degree ${item.degree})`)
664
+ .join('\n');
665
+
666
+ return (
667
+ '## Code Dependency Graph\n\n' +
668
+ `${lines}\n\n` +
669
+ '### Central Nodes\n\n' +
670
+ `${central || '- None'}\n\n` +
671
+ '### Hub Nodes\n\n' +
672
+ `${hubs || '- None'}\n\n` +
673
+ '---\n\n'
674
+ );
675
+ }
676
+
677
+ buildSemanticClusters(fileInfos) {
678
+ const clusterMap = new Map();
679
+ const addToCluster = (name, filePath) => {
680
+ if (!clusterMap.has(name)) clusterMap.set(name, new Set());
681
+ clusterMap.get(name).add(filePath);
682
+ };
683
+
684
+ for (const info of fileInfos) {
685
+ const lowPath = info.path.toLowerCase();
686
+ const tags = new Set(info.tags || []);
687
+
688
+ if (lowPath.includes('/cli') || tags.has('entry')) addToCluster('CLI orchestration', info.path);
689
+ if (tags.has('config') || lowPath.includes('config')) addToCluster('Configuration', info.path);
690
+ if (tags.has('api') || lowPath.includes('/api/')) addToCluster('API surface', info.path);
691
+ if (tags.has('service')) addToCluster('Service layer', info.path);
692
+ if (tags.has('controller') || lowPath.includes('/controller')) addToCluster('Controllers', info.path);
693
+ if (tags.has('model') || lowPath.includes('/model')) addToCluster('Data models', info.path);
694
+ if (lowPath.includes('pdf') || lowPath.includes('doc') || lowPath.includes('summary')) addToCluster('Document generation', info.path);
695
+ if (tags.has('utility') || lowPath.includes('utils')) addToCluster('Shared utilities', info.path);
696
+ if (tags.has('test')) addToCluster('Testing', info.path);
697
+ if (lowPath.includes('/graph/')) addToCluster('Graph analysis', info.path);
698
+ if (info.ext === '.md' || info.ext === '.txt') addToCluster('Documentation', info.path);
699
+ }
700
+
701
+ return [...clusterMap.entries()]
702
+ .map(([name, files]) => ({
703
+ name,
704
+ files: [...files].sort((a, b) => a.localeCompare(b))
705
+ }))
706
+ .sort((a, b) => b.files.length - a.files.length || a.name.localeCompare(b.name))
707
+ .slice(0, 12);
708
+ }
709
+
710
+ buildSemanticSection(projectUnderstanding) {
711
+ const { semanticClusters, aiSemantic } = projectUnderstanding;
712
+
713
+ if (!semanticClusters || semanticClusters.length === 0) {
714
+ return '## Semantic Dependency Graph\n\n- No semantic clusters detected with current heuristics.\n\n---\n\n';
715
+ }
716
+
717
+ const aiNote = aiSemantic?.used
718
+ ? `- Enrichment mode: AI-assisted (${aiSemantic.provider || 'provider'}${aiSemantic.model ? ` / ${aiSemantic.model}` : ''})\n`
719
+ : (aiSemantic?.enabled
720
+ ? `- Enrichment mode: heuristic fallback (${this.describeAiFallbackReason(aiSemantic)})\n` +
721
+ `${aiSemantic.errorCode ? `- AI error code: ${aiSemantic.errorCode}\n` : ''}` +
722
+ `${aiSemantic.error ? `- AI error detail: ${aiSemantic.error}\n` : ''}` +
723
+ `${aiSemantic.attempts ? `- AI attempts: ${aiSemantic.attempts}\n` : ''}`
724
+ : '');
725
+
726
+ const lines = semanticClusters
727
+ .map(cluster => `- ${cluster.name}${cluster.description ? ` (${cluster.description})` : ''} -> ${cluster.files.slice(0, 6).join(', ')}`)
728
+ .join('\n');
729
+
730
+ return `## Semantic Dependency Graph\n\n${aiNote}${lines}\n\n---\n\n`;
731
+ }
732
+
733
+ buildLlmSummaryData(projectName, allFiles, projectUnderstanding, renderPlan = null) {
734
+ const { entrypoints, coreModules, hotspots, suggestedReadingOrder, dependencyEdges, graphMetrics, adapterModes, semanticClusters, aiSemantic } = projectUnderstanding;
735
+ const generatedAt = new Date().toISOString();
736
+ const totalSize = allFiles.reduce((sum, file) => sum + (file.size || 0), 0);
737
+
738
+ const codeGraph = {
739
+ edges: dependencyEdges.map(([from, to]) => ({ from, to })),
740
+ centralNodes: graphMetrics.centralNodes.map(item => ({ path: item.id, inDegree: item.degree })),
741
+ hubNodes: graphMetrics.hubs.map(item => ({ path: item.id, outDegree: item.degree })),
742
+ isolated: graphMetrics.isolated
743
+ };
744
+
745
+ return {
746
+ version: 1,
747
+ project: {
748
+ name: projectName,
749
+ generatedAt,
750
+ fileCount: allFiles.length,
751
+ totalSizeBytes: totalSize
752
+ },
753
+ summary: {
754
+ entrypoints,
755
+ coreModules: coreModules.map(item => ({
756
+ path: item.path,
757
+ centrality: Number(item.centrality.toFixed(2)),
758
+ hotspotScore: Number(item.hotspotScore.toFixed(2))
759
+ })),
760
+ hotspots: hotspots.map(item => ({
761
+ path: item.path,
762
+ score: Number(item.hotspotScore.toFixed(2))
763
+ })),
764
+ suggestedReadingOrder,
765
+ languageAnalysisMode: adapterModes,
766
+ aiSemantic: {
767
+ enabled: Boolean(aiSemantic?.enabled),
768
+ used: Boolean(aiSemantic?.used),
769
+ provider: aiSemantic?.provider || null,
770
+ model: aiSemantic?.model || null,
771
+ error: aiSemantic?.error || null,
772
+ errorCode: aiSemantic?.errorCode || null,
773
+ retryable: Boolean(aiSemantic?.retryable),
774
+ failureScope: aiSemantic?.failureScope || null,
775
+ fallbackReason: aiSemantic?.used ? null : this.describeAiFallbackReason(aiSemantic),
776
+ attempts: aiSemantic?.attempts || 0,
777
+ healthCheckOk: aiSemantic?.healthCheckOk
778
+ },
779
+ focus: renderPlan ? {
780
+ query: renderPlan.focusQuery || null,
781
+ maxTokens: renderPlan.maxTokens || null,
782
+ estimatedSelectedTokens: renderPlan.estimatedTokens,
783
+ selectedFiles: renderPlan.filesToRender.map(file => file.relativePath)
784
+ } : null
785
+ },
786
+ graphs: {
787
+ code: codeGraph,
788
+ semantic: {
789
+ clusters: semanticClusters.map(cluster => ({
790
+ name: cluster.name,
791
+ files: cluster.files
792
+ }))
793
+ }
794
+ }
795
+ };
796
+ }
797
+
798
+ /**
799
+ * Produce a human-readable fallback reason without assuming API downtime for every error.
800
+ */
801
+ describeAiFallbackReason(aiSemantic) {
802
+ if (!aiSemantic?.enabled || aiSemantic?.used) return 'not applicable';
803
+
804
+ const code = String(aiSemantic?.errorCode || '').toLowerCase();
805
+
806
+ if (['network_unavailable', 'timeout', 'server_error'].includes(code)) {
807
+ return 'API unavailable';
808
+ }
809
+
810
+ if (code === 'rate_limited') {
811
+ return 'API rate limited';
812
+ }
813
+
814
+ if (code === 'auth_error') {
815
+ return 'API authentication/configuration error';
816
+ }
817
+
818
+ if (['bad_request', 'unprocessable', 'payload_too_large'].includes(code)) {
819
+ return 'API request rejected';
820
+ }
821
+
822
+ if (['invalid_response', 'invalid_semantic_payload'].includes(code)) {
823
+ return 'AI response not usable';
824
+ }
825
+
826
+ return 'AI enrichment failed';
827
+ }
828
+
114
829
  /**
115
830
  * Apply lossless content optimisations
116
831
  * @param {string} content - Raw file content