devlensio 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/LICENSE +674 -0
  2. package/dist/clustering/index.d.ts +27 -0
  3. package/dist/clustering/index.js +149 -0
  4. package/dist/config/index.d.ts +10 -0
  5. package/dist/config/index.js +78 -0
  6. package/dist/config/providers/file.d.ts +19 -0
  7. package/dist/config/providers/file.js +215 -0
  8. package/dist/config/providers/request.d.ts +2 -0
  9. package/dist/config/providers/request.js +72 -0
  10. package/dist/config/types.d.ts +46 -0
  11. package/dist/config/types.js +81 -0
  12. package/dist/config/writer.d.ts +29 -0
  13. package/dist/config/writer.js +103 -0
  14. package/dist/filesystem/appRouter.d.ts +2 -0
  15. package/dist/filesystem/appRouter.js +126 -0
  16. package/dist/filesystem/backendRoutes.d.ts +2 -0
  17. package/dist/filesystem/backendRoutes.js +161 -0
  18. package/dist/filesystem/index.d.ts +2 -0
  19. package/dist/filesystem/index.js +28 -0
  20. package/dist/filesystem/index.test.d.ts +1 -0
  21. package/dist/filesystem/index.test.js +178 -0
  22. package/dist/filesystem/pagesRouter.d.ts +2 -0
  23. package/dist/filesystem/pagesRouter.js +109 -0
  24. package/dist/fingerprint/detectors.d.ts +8 -0
  25. package/dist/fingerprint/detectors.js +174 -0
  26. package/dist/fingerprint/index.d.ts +2 -0
  27. package/dist/fingerprint/index.js +41 -0
  28. package/dist/fingerprint/index.test.d.ts +1 -0
  29. package/dist/fingerprint/index.test.js +148 -0
  30. package/dist/graph/buildLookup.d.ts +10 -0
  31. package/dist/graph/buildLookup.js +32 -0
  32. package/dist/graph/edges/callEdges.d.ts +7 -0
  33. package/dist/graph/edges/callEdges.js +145 -0
  34. package/dist/graph/edges/eventEdges.d.ts +7 -0
  35. package/dist/graph/edges/eventEdges.js +203 -0
  36. package/dist/graph/edges/guardEdges.d.ts +3 -0
  37. package/dist/graph/edges/guardEdges.js +232 -0
  38. package/dist/graph/edges/hookEdges.d.ts +3 -0
  39. package/dist/graph/edges/hookEdges.js +54 -0
  40. package/dist/graph/edges/importEdges.d.ts +8 -0
  41. package/dist/graph/edges/importEdges.js +224 -0
  42. package/dist/graph/edges/propEdges.d.ts +3 -0
  43. package/dist/graph/edges/propEdges.js +142 -0
  44. package/dist/graph/edges/routeEdge.d.ts +3 -0
  45. package/dist/graph/edges/routeEdge.js +124 -0
  46. package/dist/graph/edges/stateEdges.d.ts +3 -0
  47. package/dist/graph/edges/stateEdges.js +206 -0
  48. package/dist/graph/edges/testEdges.d.ts +3 -0
  49. package/dist/graph/edges/testEdges.js +143 -0
  50. package/dist/graph/edges/utils.d.ts +2 -0
  51. package/dist/graph/edges/utils.js +25 -0
  52. package/dist/graph/index.d.ts +6 -0
  53. package/dist/graph/index.js +65 -0
  54. package/dist/graph/index.test.d.ts +1 -0
  55. package/dist/graph/index.test.js +542 -0
  56. package/dist/graph/thirdPartyLibs.d.ts +8 -0
  57. package/dist/graph/thirdPartyLibs.js +162 -0
  58. package/dist/index.d.ts +15 -0
  59. package/dist/index.js +15 -0
  60. package/dist/jobs/index.d.ts +5 -0
  61. package/dist/jobs/index.js +11 -0
  62. package/dist/jobs/queue/interface.d.ts +13 -0
  63. package/dist/jobs/queue/interface.js +1 -0
  64. package/dist/jobs/queue/memory.d.ts +24 -0
  65. package/dist/jobs/queue/memory.js +291 -0
  66. package/dist/jobs/runner.d.ts +3 -0
  67. package/dist/jobs/runner.js +136 -0
  68. package/dist/jobs/types.d.ts +112 -0
  69. package/dist/jobs/types.js +33 -0
  70. package/dist/parser/directives.d.ts +4 -0
  71. package/dist/parser/directives.js +31 -0
  72. package/dist/parser/extractors/components.d.ts +5 -0
  73. package/dist/parser/extractors/components.js +240 -0
  74. package/dist/parser/extractors/functions.d.ts +4 -0
  75. package/dist/parser/extractors/functions.js +240 -0
  76. package/dist/parser/extractors/hooks.d.ts +4 -0
  77. package/dist/parser/extractors/hooks.js +128 -0
  78. package/dist/parser/extractors/stores.d.ts +3 -0
  79. package/dist/parser/extractors/stores.js +181 -0
  80. package/dist/parser/index.d.ts +14 -0
  81. package/dist/parser/index.js +168 -0
  82. package/dist/parser/index.test.d.ts +1 -0
  83. package/dist/parser/index.test.js +319 -0
  84. package/dist/parser/typeUtils.d.ts +9 -0
  85. package/dist/parser/typeUtils.js +46 -0
  86. package/dist/pipeline/index.d.ts +50 -0
  87. package/dist/pipeline/index.js +249 -0
  88. package/dist/scoring/connectionCounter.d.ts +28 -0
  89. package/dist/scoring/connectionCounter.js +134 -0
  90. package/dist/scoring/fileScorer.d.ts +2 -0
  91. package/dist/scoring/fileScorer.js +44 -0
  92. package/dist/scoring/index.d.ts +22 -0
  93. package/dist/scoring/index.js +130 -0
  94. package/dist/scoring/index.test.d.ts +1 -0
  95. package/dist/scoring/index.test.js +453 -0
  96. package/dist/scoring/nodeScorer.d.ts +3 -0
  97. package/dist/scoring/nodeScorer.js +108 -0
  98. package/dist/scoring/noiseFilter.d.ts +18 -0
  99. package/dist/scoring/noiseFilter.js +92 -0
  100. package/dist/storage/fileStorage.d.ts +117 -0
  101. package/dist/storage/fileStorage.js +616 -0
  102. package/dist/storage/index.d.ts +4 -0
  103. package/dist/storage/index.js +2 -0
  104. package/dist/storage/interface.d.ts +27 -0
  105. package/dist/storage/interface.js +1 -0
  106. package/dist/summarizer/checkpoint.d.ts +15 -0
  107. package/dist/summarizer/checkpoint.js +110 -0
  108. package/dist/summarizer/index.d.ts +2 -0
  109. package/dist/summarizer/index.js +281 -0
  110. package/dist/summarizer/mapreduce.d.ts +4 -0
  111. package/dist/summarizer/mapreduce.js +87 -0
  112. package/dist/summarizer/prompts.d.ts +22 -0
  113. package/dist/summarizer/prompts.js +205 -0
  114. package/dist/summarizer/providers/anthropic.d.ts +9 -0
  115. package/dist/summarizer/providers/anthropic.js +78 -0
  116. package/dist/summarizer/providers/gemini.d.ts +9 -0
  117. package/dist/summarizer/providers/gemini.js +79 -0
  118. package/dist/summarizer/providers/index.d.ts +3 -0
  119. package/dist/summarizer/providers/index.js +43 -0
  120. package/dist/summarizer/providers/ollama.d.ts +9 -0
  121. package/dist/summarizer/providers/ollama.js +23 -0
  122. package/dist/summarizer/providers/openRouter.d.ts +9 -0
  123. package/dist/summarizer/providers/openRouter.js +19 -0
  124. package/dist/summarizer/providers/openai.d.ts +9 -0
  125. package/dist/summarizer/providers/openai.js +72 -0
  126. package/dist/summarizer/providers/types.d.ts +32 -0
  127. package/dist/summarizer/providers/types.js +1 -0
  128. package/dist/summarizer/retry.d.ts +7 -0
  129. package/dist/summarizer/retry.js +51 -0
  130. package/dist/summarizer/topological.d.ts +3 -0
  131. package/dist/summarizer/topological.js +105 -0
  132. package/dist/summarizer/types.d.ts +57 -0
  133. package/dist/summarizer/types.js +17 -0
  134. package/dist/types.d.ts +78 -0
  135. package/dist/types.js +1 -0
  136. package/package.json +48 -0
@@ -0,0 +1,110 @@
1
+ // Purely a read/write utility — no LLM calls, no complex logic.
2
+ //
3
+ // createCheckpoint() — called at start of fresh summarization run
4
+ // loadCheckpoint() — called on resume
5
+ // saveCheckpoint() — called after every level/group completes
6
+ // deleteCheckpoint() — called on cancel or completion
7
+ // getResumePoint() — returns { phase, levelIndex } — where to continue from
8
+ import fs from "fs";
9
+ import { getCheckpointPath } from "../storage/fileStorage.js";
10
+ // ─── Load / Save / Delete ─────────────────────────────────────────────────────
11
+ export function loadCheckpoint(graphId, commitHash) {
12
+ const file = getCheckpointPath(graphId, commitHash);
13
+ if (!fs.existsSync(file))
14
+ return undefined;
15
+ try {
16
+ return JSON.parse(fs.readFileSync(file, "utf-8"));
17
+ }
18
+ catch {
19
+ return undefined;
20
+ }
21
+ }
22
+ export function saveCheckpoint(checkpoint) {
23
+ const file = getCheckpointPath(checkpoint.graphId, checkpoint.commitHash);
24
+ checkpoint.updatedAt = new Date().toISOString();
25
+ // Atomic write — never corrupts on crash mid-write
26
+ const tmp = `${file}.tmp`;
27
+ fs.writeFileSync(tmp, JSON.stringify(checkpoint, null, 2), "utf-8");
28
+ fs.renameSync(tmp, file);
29
+ }
30
+ export function deleteCheckpoint(graphId, commitHash) {
31
+ const file = getCheckpointPath(graphId, commitHash);
32
+ if (fs.existsSync(file))
33
+ fs.unlinkSync(file);
34
+ }
35
+ // ─── Create ───────────────────────────────────────────────────────────────────
36
+ //
37
+ // Called once at the start of a fresh summarization run.
38
+ // nodeOrder is now string[][] — each inner array is one parallel level.
39
+ // On resume we load this file and never redo the topo sort.
40
+ export function createCheckpoint(graphId, commitHash, nodeOrder, cycleGroups, fileNodes) {
41
+ const now = new Date().toISOString();
42
+ const totalRegularNodes = nodeOrder.reduce((sum, level) => sum + level.length, 0);
43
+ const totalCycleNodes = cycleGroups.reduce((sum, g) => sum + g.size, 0);
44
+ const totalNodes = totalRegularNodes + totalCycleNodes + fileNodes.length;
45
+ const checkpoint = {
46
+ graphId,
47
+ commitHash,
48
+ status: "running",
49
+ createdAt: now,
50
+ updatedAt: now,
51
+ nodeOrder,
52
+ cycleGroups,
53
+ fileNodes,
54
+ // -1 = not started for all three phases
55
+ lastCompletedLevel: -1,
56
+ lastCompletedCycleGroup: -1,
57
+ lastCompletedFileNode: -1,
58
+ totalNodes,
59
+ completedNodes: 0,
60
+ };
61
+ saveCheckpoint(checkpoint);
62
+ return checkpoint;
63
+ }
64
+ export function getResumePoint(checkpoint) {
65
+ // Phase 1 — regular nodes (level by level)
66
+ if (checkpoint.lastCompletedLevel < checkpoint.nodeOrder.length - 1) {
67
+ return {
68
+ phase: "nodes",
69
+ index: checkpoint.lastCompletedLevel + 1,
70
+ };
71
+ }
72
+ // Phase 2 — cycle groups
73
+ if (checkpoint.lastCompletedCycleGroup < checkpoint.cycleGroups.length - 1) {
74
+ return {
75
+ phase: "cycles",
76
+ index: checkpoint.lastCompletedCycleGroup + 1,
77
+ };
78
+ }
79
+ // Phase 3 — file nodes
80
+ if (checkpoint.lastCompletedFileNode < checkpoint.fileNodes.length - 1) {
81
+ return {
82
+ phase: "files",
83
+ index: checkpoint.lastCompletedFileNode + 1,
84
+ };
85
+ }
86
+ return { phase: "done", index: -1 };
87
+ }
88
+ // ─── Progress update helpers ──────────────────────────────────────────────────
89
+ //
90
+ // Called by the batch loop after each level/group/file completes.
91
+ // Marks an entire level as completed — levels are atomic.
92
+ export function markLevelCompleted(checkpoint, levelIndex) {
93
+ checkpoint.lastCompletedLevel = levelIndex;
94
+ checkpoint.completedNodes += checkpoint.nodeOrder[levelIndex].length;
95
+ }
96
+ export function markCycleGroupCompleted(checkpoint, groupIndex) {
97
+ checkpoint.lastCompletedCycleGroup = groupIndex;
98
+ checkpoint.completedNodes += checkpoint.cycleGroups[groupIndex].size;
99
+ }
100
+ export function markFileNodeCompleted(checkpoint, index) {
101
+ checkpoint.lastCompletedFileNode = index;
102
+ checkpoint.completedNodes++;
103
+ }
104
+ // Marks a batch of file nodes as completed.
105
+ // batchEnd = index of the LAST node in the batch (inclusive).
106
+ // count = how many nodes were actually in the batch (may be < batchSize at end).
107
+ export function markFileNodeBatchCompleted(checkpoint, batchEnd, count) {
108
+ checkpoint.lastCompletedFileNode = batchEnd;
109
+ checkpoint.completedNodes += count;
110
+ }
@@ -0,0 +1,2 @@
1
+ import { type SummarizationInput } from "./types.js";
2
+ export declare function runSummarization(input: SummarizationInput): Promise<void>;
@@ -0,0 +1,281 @@
1
+ // ─── Summarization Architecture ───────────────────────────────────────────────
2
+ //
3
+ // Triggered after Phase 1 (analysis) completes for a job.
4
+ //
5
+ // SMART REUSE
6
+ // Before summarizing, we check if the commit was already summarized (skip entirely),
7
+ // or if a previous summarized commit exists — nodes whose codeHash hasn't changed
8
+ // get their summaries copied for free without any LLM call.
9
+ //
10
+ // JOB-SCOPED INDEXES
11
+ // edgeIndex, routeIndex, systemPrompt, and allNodesMap are built ONCE per job
12
+ // before the batch loop starts. A job is tied to a single commit — nodes, edges,
13
+ // and routes never change mid-job even if the user edits code. Building indexes
14
+ // once and reusing them across all batches avoids O(n×e) redundant work.
15
+ //
16
+ // THREE PHASES (in order)
17
+ // Phase 1 — nodeOrder[][] topo-sorted levels, each level runs in parallel
18
+ // Phase 2 — cycleGroups[] nodes with circular deps, grouped or individual
19
+ // Phase 3 — fileNodes[] FILE nodes last — use child summaries as context
20
+ //
21
+ // PAUSE / CANCEL
22
+ // Signals are checked between levels/groups — never mid-level.
23
+ // Levels complete atomically so resume always starts at a clean boundary.
24
+ // Checkpoint is saved after every level/group — O(1) resume via lastCompletedLevel.
25
+ //
26
+ // MAPREDUCE
27
+ // Nodes whose rawCode exceeds MAPREDUCE_TOKEN_THRESHOLD are split into chunks,
28
+ // each chunk summarized in parallel (map), then reduced into one final summary.
29
+ import { storage } from "../storage/index.js";
30
+ import { resolveConfig } from "../config/index.js";
31
+ import { FILE_BATCH_SIZE } from "./types.js";
32
+ import { buildTopologicalOrder } from "./topological.js";
33
+ import { createCheckpoint, loadCheckpoint, saveCheckpoint, deleteCheckpoint, getResumePoint, markLevelCompleted, markCycleGroupCompleted, markFileNodeBatchCompleted, } from "./checkpoint.js";
34
+ import { buildEdgeIndex, buildRouteIndex, buildSystemPrompt, buildPrompt, buildCycleGroupPrompt, } from "./prompts.js";
35
+ import { createLLMClient } from "./providers/index.js";
36
+ import { exceedsThreshold, mapreduceSummarize } from "./mapreduce.js";
37
+ import { withRetry } from "./retry.js";
38
+ import { MAX_GROUP_SUMMARY_SIZE } from "./types.js";
39
+ // ─── runSummarization ─────────────────────────────────────────────────────────
40
+ //
41
+ // Main entry point — called by runner.ts after Phase 1 completes.
42
+ // Handles fresh runs and resumes from checkpoint transparently.
43
+ //
44
+ // Flow:
45
+ // 1. Load commit data (nodes, edges, fingerprint, routes)
46
+ // 2. Check if already summarized → skip
47
+ // 3. Copy summaries from previous commit where codeHash matches → free reuse
48
+ // 4. Build indexes once (edges, routes, system prompt, allNodes map)
49
+ // 5. Build or load checkpoint
50
+ // 6. Run three phases: nodeOrder levels → cycleGroups → fileNodes
51
+ // 7. Save summaries to storage after every level/group
52
+ // 8. Check pause/cancel signals between levels
53
+ export async function runSummarization(input) {
54
+ const { job, queue, graphId, commitHash, repoPath, routes, callbacks } = input;
55
+ // ── Step 1: Load commit data ───────────────────────────────────────────────
56
+ const result = storage.getGraph(graphId, commitHash);
57
+ if (!result) {
58
+ callbacks.onError(`Commit data not found: ${graphId}/${commitHash}`);
59
+ return;
60
+ }
61
+ // ── Step 2: Skip if already summarized ────────────────────────────────────
62
+ if (storage.isCommitSummarized(graphId, commitHash)) {
63
+ callbacks.onComplete();
64
+ console.log("Already Summarized!");
65
+ return;
66
+ }
67
+ // ── Step 3: Copy summaries from previous commit where codeHash matches ────
68
+ // Nodes whose code hasn't changed don't need re-summarization.
69
+ // We identify them by codeHash — if it matches, copy the summary directly.
70
+ // Skipped when forceSummarize=true so every node gets a fresh LLM call.
71
+ if (input.previousCommitHash && !job.forceSummarize) {
72
+ const prevResult = storage.getGraph(graphId, input.previousCommitHash);
73
+ if (prevResult) {
74
+ const prevById = new Map(prevResult.allNodes.map(n => [n.id, n]));
75
+ let duplicateNodes = 0;
76
+ for (const node of result.allNodes) {
77
+ const prev = prevById.get(node.id);
78
+ if (prev &&
79
+ prev.technicalSummary &&
80
+ node.codeHash &&
81
+ node.codeHash === prev.codeHash) {
82
+ node.technicalSummary = prev.technicalSummary;
83
+ node.businessSummary = prev.businessSummary;
84
+ node.security = prev.security;
85
+ node.summaryModel = prev.summaryModel;
86
+ node.summarizedAt = prev.summarizedAt;
87
+ duplicateNodes++;
88
+ }
89
+ }
90
+ const toSummarize = result.allNodes.filter(n => !n.technicalSummary).length;
91
+ console.log(`${duplicateNodes} copied, ${toSummarize} to summarize`);
92
+ console.log(duplicateNodes, "Nodes' Summaries were copied from previous Hash", input.previousCommitHash);
93
+ }
94
+ }
95
+ // ── Step 4: Build job-scoped indexes — built once, reused every batch ─────
96
+ const config = await resolveConfig();
97
+ const client = createLLMClient(config.summarization);
98
+ const allNodesMap = new Map(result.allNodes.map(n => [n.id, n]));
99
+ const edgeIndex = buildEdgeIndex(result.allEdges);
100
+ const routeIndex = buildRouteIndex(routes);
101
+ const systemPrompt = buildSystemPrompt(result.fingerprint);
102
+ // Validate LLM connection before starting — fail fast
103
+ try {
104
+ await client.validateConnection();
105
+ }
106
+ catch (err) {
107
+ const msg = err instanceof Error ? err.message : "LLM connection failed";
108
+ callbacks.onError(msg);
109
+ return;
110
+ }
111
+ // ── Step 5: Build or load checkpoint ──────────────────────────────────────
112
+ let checkpoint = loadCheckpoint(graphId, commitHash);
113
+ const isResume = !!checkpoint;
114
+ if (!checkpoint) {
115
+ // Fresh run — build topo order and create checkpoint
116
+ const { nodeOrder, cycleGroups, fileNodes } = buildTopologicalOrder(result.allNodes, result.allEdges);
117
+ checkpoint = createCheckpoint(graphId, commitHash, nodeOrder, cycleGroups, fileNodes);
118
+ }
119
+ // On resume: nodes from already-completed levels may have summaries in memory
120
+ // (merged into the commit file by saveGraph on restart) but saveBatch will never
121
+ // be called for skipped levels — so those summaries would sit in memory and never
122
+ // be re-persisted after the fresh saveGraph overwrote the file.
123
+ // Fix: flush all already-summarized nodes to disk in one pass before the loop starts.
124
+ if (isResume) {
125
+ const alreadySummarized = result.allNodes.filter(n => n.technicalSummary);
126
+ if (alreadySummarized.length > 0) {
127
+ const updates = new Map(alreadySummarized.map(n => [n.id, {
128
+ technicalSummary: n.technicalSummary,
129
+ businessSummary: n.businessSummary ?? "",
130
+ security: n.security ?? { severity: "none", summary: "" },
131
+ summaryModel: n.summaryModel ?? "",
132
+ summarizedAt: n.summarizedAt ?? new Date().toISOString(),
133
+ }]));
134
+ storage.saveNodeSummaries(graphId, commitHash, updates);
135
+ console.log(`♻️ Re-persisted ${alreadySummarized.length} summaries from pre-crash levels`);
136
+ }
137
+ }
138
+ // Notify caller of total work
139
+ callbacks.onStarted(checkpoint.totalNodes);
140
+ // ── Helper: summarize one node ────────────────────────────────────────────
141
+ async function summarizeNode(node) {
142
+ // Skip if already summarized (copied from previous commit)
143
+ if (node.technicalSummary) {
144
+ console.log("Summary already exists, skipping for node", node.id);
145
+ return;
146
+ }
147
+ console.log(` Starting summarization for node "${node.id}"`);
148
+ const output = await withRetry(() => exceedsThreshold(node)
149
+ ? mapreduceSummarize(node, client, systemPrompt)
150
+ : client.summarize({
151
+ messages: buildPrompt({ node, allNodes: allNodesMap, edgeIndex, routeIndex, systemPrompt }),
152
+ temperature: 0,
153
+ }), undefined, node.id);
154
+ // Write summary back onto node in memory
155
+ node.technicalSummary = output.technicalSummary;
156
+ node.businessSummary = output.businessSummary;
157
+ node.security = output.security;
158
+ node.summaryModel = client.model;
159
+ node.summarizedAt = new Date().toISOString();
160
+ }
161
+ // ── Helper: save a batch of node updates to disk ──────────────────────────
162
+ function saveBatch(nodes) {
163
+ const updates = new Map(nodes.map(n => [n.id, {
164
+ technicalSummary: n.technicalSummary,
165
+ businessSummary: n.businessSummary ?? "",
166
+ security: n.security ?? { severity: "none", summary: "" },
167
+ summaryModel: n.summaryModel ?? client.model,
168
+ summarizedAt: n.summarizedAt ?? new Date().toISOString(),
169
+ }]));
170
+ storage.saveNodeSummaries(graphId, commitHash, updates);
171
+ }
172
+ // ── Helper: check pause/cancel signals ────────────────────────────────────
173
+ function shouldPause() { return queue.getJob(job.jobId)?.pauseRequested ?? false; }
174
+ function shouldCancel() { return queue.getJob(job.jobId)?.cancelRequested ?? false; }
175
+ // ── Derive start indexes for all three phases ─────────────────────────────
176
+ // If resumePoint is past a phase entirely, start index = length (skips loop).
177
+ const resumePoint = getResumePoint(checkpoint);
178
+ const levelStart = resumePoint.phase === "nodes" ? resumePoint.index :
179
+ resumePoint.phase === "done" ? checkpoint.nodeOrder.length :
180
+ /* cycles or files — nodes already done */ checkpoint.nodeOrder.length;
181
+ const cycleStart = resumePoint.phase === "cycles" ? resumePoint.index :
182
+ resumePoint.phase === "files" || resumePoint.phase === "done" ? checkpoint.cycleGroups.length :
183
+ /* nodes phase — cycles not started yet */ 0;
184
+ const fileStart = resumePoint.phase === "files" ? resumePoint.index :
185
+ resumePoint.phase === "done" ? checkpoint.fileNodes.length :
186
+ /* nodes or cycles phase — files not started yet */ 0;
187
+ // ── Step 6: Phase 1 — nodeOrder levels ────────────────────────────────────
188
+ // Each level is independent — all nodes in a level run in parallel.
189
+ // Levels complete atomically — checkpoint saves after each full level.
190
+ for (let lvl = levelStart; lvl < checkpoint.nodeOrder.length; lvl++) {
191
+ const level = checkpoint.nodeOrder[lvl];
192
+ const nodes = level.map(id => allNodesMap.get(id)).filter(Boolean);
193
+ const newInLevel = nodes.filter(n => !n.technicalSummary).length;
194
+ console.log(`🔍 Level ${lvl}: ${nodes.length} nodes, ${newInLevel} new to summarize (skipped ${nodes.length - newInLevel})`);
195
+ // All nodes in this level summarized in parallel
196
+ await Promise.all(nodes.map(summarizeNode));
197
+ saveBatch(nodes);
198
+ markLevelCompleted(checkpoint, lvl);
199
+ saveCheckpoint(checkpoint);
200
+ callbacks.onProgress(checkpoint.completedNodes, checkpoint.totalNodes, `level ${lvl}`);
201
+ if (shouldCancel()) {
202
+ deleteCheckpoint(graphId, commitHash);
203
+ callbacks.onCancel(true);
204
+ return;
205
+ }
206
+ if (shouldPause()) {
207
+ checkpoint.status = "paused";
208
+ saveCheckpoint(checkpoint);
209
+ callbacks.onPause();
210
+ return;
211
+ }
212
+ }
213
+ // ── Step 7: Phase 2 — cycleGroups ─────────────────────────────────────────
214
+ for (let gi = cycleStart; gi < checkpoint.cycleGroups.length; gi++) {
215
+ const group = checkpoint.cycleGroups[gi];
216
+ const nodes = group.nodeIds.map(id => allNodesMap.get(id)).filter(Boolean);
217
+ if (group.size <= MAX_GROUP_SUMMARY_SIZE) {
218
+ // Small cycle — one grouped LLM call
219
+ console.log(` Starting grouped LLM summary for cycle group ${gi} (${group.nodeIds.length} nodes)`);
220
+ const messages = buildCycleGroupPrompt(group.nodeIds, { allNodes: allNodesMap, edgeIndex, routeIndex, systemPrompt });
221
+ const output = await client.summarize({ messages, temperature: 0 });
222
+ // For grouped calls the LLM returns one summary — apply to all nodes in group
223
+ for (const node of nodes) {
224
+ node.technicalSummary = output.technicalSummary;
225
+ node.businessSummary = output.businessSummary;
226
+ node.security = output.security;
227
+ node.summaryModel = client.model;
228
+ node.summarizedAt = new Date().toISOString();
229
+ }
230
+ }
231
+ else {
232
+ // Large cycle — summarize individually
233
+ await Promise.all(nodes.map(summarizeNode));
234
+ }
235
+ saveBatch(nodes);
236
+ markCycleGroupCompleted(checkpoint, gi);
237
+ saveCheckpoint(checkpoint);
238
+ callbacks.onProgress(checkpoint.completedNodes, checkpoint.totalNodes, `cycle group ${gi}`);
239
+ if (shouldCancel()) {
240
+ deleteCheckpoint(graphId, commitHash);
241
+ callbacks.onCancel(true);
242
+ return;
243
+ }
244
+ if (shouldPause()) {
245
+ checkpoint.status = "paused";
246
+ saveCheckpoint(checkpoint);
247
+ callbacks.onPause();
248
+ return;
249
+ }
250
+ }
251
+ // ── Step 8: Phase 3 — fileNodes ───────────────────────────────────────────
252
+ // FILE nodes summarized last — they use child summaries as context.
253
+ for (let fi = fileStart; fi < checkpoint.fileNodes.length; fi += FILE_BATCH_SIZE) {
254
+ const batchEnd = Math.min(fi + FILE_BATCH_SIZE - 1, checkpoint.fileNodes.length - 1);
255
+ const batchIds = checkpoint.fileNodes.slice(fi, batchEnd + 1);
256
+ const batchNodes = batchIds.map(id => allNodesMap.get(id)).filter(Boolean);
257
+ //Debug-Log
258
+ const newInBatch = batchNodes.filter(n => !n.technicalSummary).length;
259
+ console.log(`🔍 File batch ${fi}-${batchEnd}: ${batchNodes.length} nodes, ${newInBatch} new to summarize (skipped ${batchNodes.length - newInBatch})`);
260
+ await Promise.all(batchNodes.map(summarizeNode));
261
+ saveBatch(batchNodes);
262
+ markFileNodeBatchCompleted(checkpoint, batchEnd, batchNodes.length);
263
+ saveCheckpoint(checkpoint);
264
+ callbacks.onProgress(checkpoint.completedNodes, checkpoint.totalNodes, `file batch ${fi}-${batchEnd}`);
265
+ if (shouldCancel()) {
266
+ deleteCheckpoint(graphId, commitHash);
267
+ callbacks.onCancel(true);
268
+ return;
269
+ }
270
+ if (shouldPause()) {
271
+ checkpoint.status = "paused";
272
+ saveCheckpoint(checkpoint);
273
+ callbacks.onPause();
274
+ return;
275
+ }
276
+ }
277
+ // ── Done ──────────────────────────────────────────────────────────────────
278
+ storage.markCommitSummarized(graphId, commitHash);
279
+ deleteCheckpoint(graphId, commitHash);
280
+ callbacks.onComplete();
281
+ }
@@ -0,0 +1,4 @@
1
+ import type { CodeNode } from "../types.js";
2
+ import type { LLMClient, NodeSummaryOutput } from "./providers/types.js";
3
+ export declare function exceedsThreshold(node: CodeNode): boolean;
4
+ export declare function mapreduceSummarize(node: CodeNode, client: LLMClient, systemPrompt: string): Promise<NodeSummaryOutput>;
@@ -0,0 +1,87 @@
1
+ import { MAPREDUCE_TOKEN_THRESHOLD } from "./types.js";
2
+ // ─── Token Estimation ─────────────────────────────────────────────────────────
3
+ //
4
+ // 1 token ≈ 4 characters — standard rule of thumb, accurate enough for
5
+ // threshold checks. Avoids adding a full tokenizer dependency.
6
+ function estimateTokens(text) {
7
+ return Math.ceil(text.length / 4);
8
+ }
9
+ export function exceedsThreshold(node) {
10
+ if (!node.rawCode)
11
+ return false;
12
+ return estimateTokens(node.rawCode) > MAPREDUCE_TOKEN_THRESHOLD;
13
+ }
14
+ // ─── Chunking ─────────────────────────────────────────────────────────────────
15
+ //
16
+ // Splits rawCode into chunks by line — never cuts mid-line.
17
+ // Each chunk targets MAPREDUCE_TOKEN_THRESHOLD tokens.
18
+ // Overlap of 10 lines between chunks preserves context at boundaries.
19
+ const CHUNK_OVERLAP_LINES = 10;
20
+ function chunkCode(rawCode) {
21
+ const lines = rawCode.split("\n");
22
+ const chunks = [];
23
+ const targetLines = Math.floor(MAPREDUCE_TOKEN_THRESHOLD * 4 / 50); // ~50 chars avg per line
24
+ let start = 0;
25
+ while (start < lines.length) {
26
+ const end = Math.min(start + targetLines, lines.length);
27
+ const chunk = lines.slice(start, end).join("\n");
28
+ chunks.push(chunk);
29
+ if (end === lines.length)
30
+ break;
31
+ start = end - CHUNK_OVERLAP_LINES; // overlap for context continuity
32
+ }
33
+ return chunks;
34
+ }
35
+ // ─── Map Phase ────────────────────────────────────────────────────────────────
36
+ //
37
+ // Summarizes each chunk individually.
38
+ // Chunk summaries are purely technical — no business/security analysis yet.
39
+ // That happens in the reduce phase where the full picture is available.
40
+ function buildChunkMessages(chunk, chunkIndex, totalChunks, nodeName, systemPrompt) {
41
+ return [
42
+ { role: "system", content: systemPrompt },
43
+ {
44
+ role: "user", content: `You are summarizing chunk ${chunkIndex + 1} of ${totalChunks} from a large code node named "${nodeName}".\n` +
45
+ `Provide a concise technical summary of what this chunk does. No business or security analysis yet.\n\n` +
46
+ `\`\`\`\n${chunk}\n\`\`\``
47
+ },
48
+ ];
49
+ }
50
+ // ─── Reduce Phase ─────────────────────────────────────────────────────────────
51
+ //
52
+ // Takes all chunk summaries and produces the final NodeSummaryOutput.
53
+ // Uses the same XML format as single-node summarization — consistent parsing.
54
+ function buildReduceMessages(chunkSummaries, nodeName, systemPrompt) {
55
+ const summaryList = chunkSummaries
56
+ .map((s, i) => `Chunk ${i + 1}:\n${s}`)
57
+ .join("\n\n");
58
+ return [
59
+ { role: "system", content: systemPrompt },
60
+ {
61
+ role: "user", content: `The following are chunk summaries of a large code node named "${nodeName}".\n` +
62
+ `Based on these summaries, produce the final complete summary in the required XML format.\n\n` +
63
+ `${summaryList}`
64
+ },
65
+ ];
66
+ }
67
+ // ─── Public API ───────────────────────────────────────────────────────────────
68
+ // Summarizes a node that exceeds the token threshold via map-reduce.
69
+ // Called from the batch loop instead of buildPrompt when exceedsThreshold() is true.
70
+ export async function mapreduceSummarize(node, client, systemPrompt) {
71
+ const rawCode = node.rawCode ?? "";
72
+ const chunks = chunkCode(rawCode);
73
+ // ── Map phase — summarize each chunk in parallel ──────────────
74
+ const chunkResults = await Promise.all(chunks.map((chunk, i) => {
75
+ const messages = buildChunkMessages(chunk, i, chunks.length, node.name, systemPrompt);
76
+ return client.summarize({ messages, temperature: 0 });
77
+ }));
78
+ const chunkSummaries = chunkResults.map(r => r.technicalSummary);
79
+ const totalTokensUsed = chunkResults.reduce((sum, r) => sum + r.tokensUsed, 0);
80
+ // ── Reduce phase — combine chunk summaries into final output ──
81
+ const reduceMessages = buildReduceMessages(chunkSummaries, node.name, systemPrompt);
82
+ const finalResult = await client.summarize({ messages: reduceMessages, temperature: 0 });
83
+ return {
84
+ ...finalResult,
85
+ tokensUsed: totalTokensUsed + finalResult.tokensUsed,
86
+ };
87
+ }
@@ -0,0 +1,22 @@
1
+ import type { CodeNode, CodeEdge, RouteNode, BackendRouteNode, ProjectFingerprint } from "../types.js";
2
+ import type { LLMMessage } from "./providers/types.js";
3
+ export interface EdgeIndex {
4
+ outgoing: Map<string, Map<string, string[]>>;
5
+ incoming: Map<string, Map<string, string[]>>;
6
+ }
7
+ export interface RouteIndex {
8
+ byFilePath: Map<string, RouteNode | BackendRouteNode>;
9
+ }
10
+ export declare function buildEdgeIndex(edges: CodeEdge[]): EdgeIndex;
11
+ export declare function buildRouteIndex(routes: RouteNode[] | BackendRouteNode[]): RouteIndex;
12
+ export interface PromptContext {
13
+ node: CodeNode;
14
+ allNodes: Map<string, CodeNode>;
15
+ edgeIndex: EdgeIndex;
16
+ routeIndex: RouteIndex;
17
+ systemPrompt: string;
18
+ }
19
+ export declare function buildSystemPrompt(fingerprint: ProjectFingerprint): string;
20
+ export declare const EDGE_LABELS: Record<string, string>;
21
+ export declare function buildPrompt(ctx: PromptContext): LLMMessage[];
22
+ export declare function buildCycleGroupPrompt(nodeIds: string[], ctx: Omit<PromptContext, "node">): LLMMessage[];