devlensio 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -0
- package/dist/clustering/index.d.ts +27 -0
- package/dist/clustering/index.js +149 -0
- package/dist/config/index.d.ts +10 -0
- package/dist/config/index.js +78 -0
- package/dist/config/providers/file.d.ts +19 -0
- package/dist/config/providers/file.js +215 -0
- package/dist/config/providers/request.d.ts +2 -0
- package/dist/config/providers/request.js +72 -0
- package/dist/config/types.d.ts +46 -0
- package/dist/config/types.js +81 -0
- package/dist/config/writer.d.ts +29 -0
- package/dist/config/writer.js +103 -0
- package/dist/filesystem/appRouter.d.ts +2 -0
- package/dist/filesystem/appRouter.js +126 -0
- package/dist/filesystem/backendRoutes.d.ts +2 -0
- package/dist/filesystem/backendRoutes.js +161 -0
- package/dist/filesystem/index.d.ts +2 -0
- package/dist/filesystem/index.js +28 -0
- package/dist/filesystem/index.test.d.ts +1 -0
- package/dist/filesystem/index.test.js +178 -0
- package/dist/filesystem/pagesRouter.d.ts +2 -0
- package/dist/filesystem/pagesRouter.js +109 -0
- package/dist/fingerprint/detectors.d.ts +8 -0
- package/dist/fingerprint/detectors.js +174 -0
- package/dist/fingerprint/index.d.ts +2 -0
- package/dist/fingerprint/index.js +41 -0
- package/dist/fingerprint/index.test.d.ts +1 -0
- package/dist/fingerprint/index.test.js +148 -0
- package/dist/graph/buildLookup.d.ts +10 -0
- package/dist/graph/buildLookup.js +32 -0
- package/dist/graph/edges/callEdges.d.ts +7 -0
- package/dist/graph/edges/callEdges.js +145 -0
- package/dist/graph/edges/eventEdges.d.ts +7 -0
- package/dist/graph/edges/eventEdges.js +203 -0
- package/dist/graph/edges/guardEdges.d.ts +3 -0
- package/dist/graph/edges/guardEdges.js +232 -0
- package/dist/graph/edges/hookEdges.d.ts +3 -0
- package/dist/graph/edges/hookEdges.js +54 -0
- package/dist/graph/edges/importEdges.d.ts +8 -0
- package/dist/graph/edges/importEdges.js +224 -0
- package/dist/graph/edges/propEdges.d.ts +3 -0
- package/dist/graph/edges/propEdges.js +142 -0
- package/dist/graph/edges/routeEdge.d.ts +3 -0
- package/dist/graph/edges/routeEdge.js +124 -0
- package/dist/graph/edges/stateEdges.d.ts +3 -0
- package/dist/graph/edges/stateEdges.js +206 -0
- package/dist/graph/edges/testEdges.d.ts +3 -0
- package/dist/graph/edges/testEdges.js +143 -0
- package/dist/graph/edges/utils.d.ts +2 -0
- package/dist/graph/edges/utils.js +25 -0
- package/dist/graph/index.d.ts +6 -0
- package/dist/graph/index.js +65 -0
- package/dist/graph/index.test.d.ts +1 -0
- package/dist/graph/index.test.js +542 -0
- package/dist/graph/thirdPartyLibs.d.ts +8 -0
- package/dist/graph/thirdPartyLibs.js +162 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +15 -0
- package/dist/jobs/index.d.ts +5 -0
- package/dist/jobs/index.js +11 -0
- package/dist/jobs/queue/interface.d.ts +13 -0
- package/dist/jobs/queue/interface.js +1 -0
- package/dist/jobs/queue/memory.d.ts +24 -0
- package/dist/jobs/queue/memory.js +291 -0
- package/dist/jobs/runner.d.ts +3 -0
- package/dist/jobs/runner.js +136 -0
- package/dist/jobs/types.d.ts +112 -0
- package/dist/jobs/types.js +33 -0
- package/dist/parser/directives.d.ts +4 -0
- package/dist/parser/directives.js +31 -0
- package/dist/parser/extractors/components.d.ts +5 -0
- package/dist/parser/extractors/components.js +240 -0
- package/dist/parser/extractors/functions.d.ts +4 -0
- package/dist/parser/extractors/functions.js +240 -0
- package/dist/parser/extractors/hooks.d.ts +4 -0
- package/dist/parser/extractors/hooks.js +128 -0
- package/dist/parser/extractors/stores.d.ts +3 -0
- package/dist/parser/extractors/stores.js +181 -0
- package/dist/parser/index.d.ts +14 -0
- package/dist/parser/index.js +168 -0
- package/dist/parser/index.test.d.ts +1 -0
- package/dist/parser/index.test.js +319 -0
- package/dist/parser/typeUtils.d.ts +9 -0
- package/dist/parser/typeUtils.js +46 -0
- package/dist/pipeline/index.d.ts +50 -0
- package/dist/pipeline/index.js +249 -0
- package/dist/scoring/connectionCounter.d.ts +28 -0
- package/dist/scoring/connectionCounter.js +134 -0
- package/dist/scoring/fileScorer.d.ts +2 -0
- package/dist/scoring/fileScorer.js +44 -0
- package/dist/scoring/index.d.ts +22 -0
- package/dist/scoring/index.js +130 -0
- package/dist/scoring/index.test.d.ts +1 -0
- package/dist/scoring/index.test.js +453 -0
- package/dist/scoring/nodeScorer.d.ts +3 -0
- package/dist/scoring/nodeScorer.js +108 -0
- package/dist/scoring/noiseFilter.d.ts +18 -0
- package/dist/scoring/noiseFilter.js +92 -0
- package/dist/storage/fileStorage.d.ts +117 -0
- package/dist/storage/fileStorage.js +616 -0
- package/dist/storage/index.d.ts +4 -0
- package/dist/storage/index.js +2 -0
- package/dist/storage/interface.d.ts +27 -0
- package/dist/storage/interface.js +1 -0
- package/dist/summarizer/checkpoint.d.ts +15 -0
- package/dist/summarizer/checkpoint.js +110 -0
- package/dist/summarizer/index.d.ts +2 -0
- package/dist/summarizer/index.js +281 -0
- package/dist/summarizer/mapreduce.d.ts +4 -0
- package/dist/summarizer/mapreduce.js +87 -0
- package/dist/summarizer/prompts.d.ts +22 -0
- package/dist/summarizer/prompts.js +205 -0
- package/dist/summarizer/providers/anthropic.d.ts +9 -0
- package/dist/summarizer/providers/anthropic.js +78 -0
- package/dist/summarizer/providers/gemini.d.ts +9 -0
- package/dist/summarizer/providers/gemini.js +79 -0
- package/dist/summarizer/providers/index.d.ts +3 -0
- package/dist/summarizer/providers/index.js +43 -0
- package/dist/summarizer/providers/ollama.d.ts +9 -0
- package/dist/summarizer/providers/ollama.js +23 -0
- package/dist/summarizer/providers/openRouter.d.ts +9 -0
- package/dist/summarizer/providers/openRouter.js +19 -0
- package/dist/summarizer/providers/openai.d.ts +9 -0
- package/dist/summarizer/providers/openai.js +72 -0
- package/dist/summarizer/providers/types.d.ts +32 -0
- package/dist/summarizer/providers/types.js +1 -0
- package/dist/summarizer/retry.d.ts +7 -0
- package/dist/summarizer/retry.js +51 -0
- package/dist/summarizer/topological.d.ts +3 -0
- package/dist/summarizer/topological.js +105 -0
- package/dist/summarizer/types.d.ts +57 -0
- package/dist/summarizer/types.js +17 -0
- package/dist/types.d.ts +78 -0
- package/dist/types.js +1 -0
- package/package.json +48 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
// Purely a read/write utility — no LLM calls, no complex logic.
|
|
2
|
+
//
|
|
3
|
+
// createCheckpoint() — called at start of fresh summarization run
|
|
4
|
+
// loadCheckpoint() — called on resume
|
|
5
|
+
// saveCheckpoint() — called after every level/group completes
|
|
6
|
+
// deleteCheckpoint() — called on cancel or completion
|
|
7
|
+
// getResumePoint() — returns { phase, levelIndex } — where to continue from
|
|
8
|
+
import fs from "fs";
|
|
9
|
+
import { getCheckpointPath } from "../storage/fileStorage.js";
|
|
10
|
+
// ─── Load / Save / Delete ─────────────────────────────────────────────────────
|
|
11
|
+
export function loadCheckpoint(graphId, commitHash) {
|
|
12
|
+
const file = getCheckpointPath(graphId, commitHash);
|
|
13
|
+
if (!fs.existsSync(file))
|
|
14
|
+
return undefined;
|
|
15
|
+
try {
|
|
16
|
+
return JSON.parse(fs.readFileSync(file, "utf-8"));
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
return undefined;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
export function saveCheckpoint(checkpoint) {
|
|
23
|
+
const file = getCheckpointPath(checkpoint.graphId, checkpoint.commitHash);
|
|
24
|
+
checkpoint.updatedAt = new Date().toISOString();
|
|
25
|
+
// Atomic write — never corrupts on crash mid-write
|
|
26
|
+
const tmp = `${file}.tmp`;
|
|
27
|
+
fs.writeFileSync(tmp, JSON.stringify(checkpoint, null, 2), "utf-8");
|
|
28
|
+
fs.renameSync(tmp, file);
|
|
29
|
+
}
|
|
30
|
+
export function deleteCheckpoint(graphId, commitHash) {
|
|
31
|
+
const file = getCheckpointPath(graphId, commitHash);
|
|
32
|
+
if (fs.existsSync(file))
|
|
33
|
+
fs.unlinkSync(file);
|
|
34
|
+
}
|
|
35
|
+
// ─── Create ───────────────────────────────────────────────────────────────────
|
|
36
|
+
//
|
|
37
|
+
// Called once at the start of a fresh summarization run.
|
|
38
|
+
// nodeOrder is now string[][] — each inner array is one parallel level.
|
|
39
|
+
// On resume we load this file and never redo the topo sort.
|
|
40
|
+
export function createCheckpoint(graphId, commitHash, nodeOrder, cycleGroups, fileNodes) {
|
|
41
|
+
const now = new Date().toISOString();
|
|
42
|
+
const totalRegularNodes = nodeOrder.reduce((sum, level) => sum + level.length, 0);
|
|
43
|
+
const totalCycleNodes = cycleGroups.reduce((sum, g) => sum + g.size, 0);
|
|
44
|
+
const totalNodes = totalRegularNodes + totalCycleNodes + fileNodes.length;
|
|
45
|
+
const checkpoint = {
|
|
46
|
+
graphId,
|
|
47
|
+
commitHash,
|
|
48
|
+
status: "running",
|
|
49
|
+
createdAt: now,
|
|
50
|
+
updatedAt: now,
|
|
51
|
+
nodeOrder,
|
|
52
|
+
cycleGroups,
|
|
53
|
+
fileNodes,
|
|
54
|
+
// -1 = not started for all three phases
|
|
55
|
+
lastCompletedLevel: -1,
|
|
56
|
+
lastCompletedCycleGroup: -1,
|
|
57
|
+
lastCompletedFileNode: -1,
|
|
58
|
+
totalNodes,
|
|
59
|
+
completedNodes: 0,
|
|
60
|
+
};
|
|
61
|
+
saveCheckpoint(checkpoint);
|
|
62
|
+
return checkpoint;
|
|
63
|
+
}
|
|
64
|
+
export function getResumePoint(checkpoint) {
|
|
65
|
+
// Phase 1 — regular nodes (level by level)
|
|
66
|
+
if (checkpoint.lastCompletedLevel < checkpoint.nodeOrder.length - 1) {
|
|
67
|
+
return {
|
|
68
|
+
phase: "nodes",
|
|
69
|
+
index: checkpoint.lastCompletedLevel + 1,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
// Phase 2 — cycle groups
|
|
73
|
+
if (checkpoint.lastCompletedCycleGroup < checkpoint.cycleGroups.length - 1) {
|
|
74
|
+
return {
|
|
75
|
+
phase: "cycles",
|
|
76
|
+
index: checkpoint.lastCompletedCycleGroup + 1,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
// Phase 3 — file nodes
|
|
80
|
+
if (checkpoint.lastCompletedFileNode < checkpoint.fileNodes.length - 1) {
|
|
81
|
+
return {
|
|
82
|
+
phase: "files",
|
|
83
|
+
index: checkpoint.lastCompletedFileNode + 1,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
return { phase: "done", index: -1 };
|
|
87
|
+
}
|
|
88
|
+
// ─── Progress update helpers ──────────────────────────────────────────────────
|
|
89
|
+
//
|
|
90
|
+
// Called by the batch loop after each level/group/file completes.
|
|
91
|
+
// Marks an entire level as completed — levels are atomic.
|
|
92
|
+
export function markLevelCompleted(checkpoint, levelIndex) {
|
|
93
|
+
checkpoint.lastCompletedLevel = levelIndex;
|
|
94
|
+
checkpoint.completedNodes += checkpoint.nodeOrder[levelIndex].length;
|
|
95
|
+
}
|
|
96
|
+
export function markCycleGroupCompleted(checkpoint, groupIndex) {
|
|
97
|
+
checkpoint.lastCompletedCycleGroup = groupIndex;
|
|
98
|
+
checkpoint.completedNodes += checkpoint.cycleGroups[groupIndex].size;
|
|
99
|
+
}
|
|
100
|
+
export function markFileNodeCompleted(checkpoint, index) {
|
|
101
|
+
checkpoint.lastCompletedFileNode = index;
|
|
102
|
+
checkpoint.completedNodes++;
|
|
103
|
+
}
|
|
104
|
+
// Marks a batch of file nodes as completed.
|
|
105
|
+
// batchEnd = index of the LAST node in the batch (inclusive).
|
|
106
|
+
// count = how many nodes were actually in the batch (may be < batchSize at end).
|
|
107
|
+
export function markFileNodeBatchCompleted(checkpoint, batchEnd, count) {
|
|
108
|
+
checkpoint.lastCompletedFileNode = batchEnd;
|
|
109
|
+
checkpoint.completedNodes += count;
|
|
110
|
+
}
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
// ─── Summarization Architecture ───────────────────────────────────────────────
|
|
2
|
+
//
|
|
3
|
+
// Triggered after Phase 1 (analysis) completes for a job.
|
|
4
|
+
//
|
|
5
|
+
// SMART REUSE
|
|
6
|
+
// Before summarizing, we check if the commit was already summarized (skip entirely),
|
|
7
|
+
// or if a previous summarized commit exists — nodes whose codeHash hasn't changed
|
|
8
|
+
// get their summaries copied for free without any LLM call.
|
|
9
|
+
//
|
|
10
|
+
// JOB-SCOPED INDEXES
|
|
11
|
+
// edgeIndex, routeIndex, systemPrompt, and allNodesMap are built ONCE per job
|
|
12
|
+
// before the batch loop starts. A job is tied to a single commit — nodes, edges,
|
|
13
|
+
// and routes never change mid-job even if the user edits code. Building indexes
|
|
14
|
+
// once and reusing them across all batches avoids O(n×e) redundant work.
|
|
15
|
+
//
|
|
16
|
+
// THREE PHASES (in order)
|
|
17
|
+
// Phase 1 — nodeOrder[][] topo-sorted levels, each level runs in parallel
|
|
18
|
+
// Phase 2 — cycleGroups[] nodes with circular deps, grouped or individual
|
|
19
|
+
// Phase 3 — fileNodes[] FILE nodes last — use child summaries as context
|
|
20
|
+
//
|
|
21
|
+
// PAUSE / CANCEL
|
|
22
|
+
// Signals are checked between levels/groups — never mid-level.
|
|
23
|
+
// Levels complete atomically so resume always starts at a clean boundary.
|
|
24
|
+
// Checkpoint is saved after every level/group — O(1) resume via lastCompletedLevel.
|
|
25
|
+
//
|
|
26
|
+
// MAPREDUCE
|
|
27
|
+
// Nodes whose rawCode exceeds MAPREDUCE_TOKEN_THRESHOLD are split into chunks,
|
|
28
|
+
// each chunk summarized in parallel (map), then reduced into one final summary.
|
|
29
|
+
import { storage } from "../storage/index.js";
|
|
30
|
+
import { resolveConfig } from "../config/index.js";
|
|
31
|
+
import { FILE_BATCH_SIZE } from "./types.js";
|
|
32
|
+
import { buildTopologicalOrder } from "./topological.js";
|
|
33
|
+
import { createCheckpoint, loadCheckpoint, saveCheckpoint, deleteCheckpoint, getResumePoint, markLevelCompleted, markCycleGroupCompleted, markFileNodeBatchCompleted, } from "./checkpoint.js";
|
|
34
|
+
import { buildEdgeIndex, buildRouteIndex, buildSystemPrompt, buildPrompt, buildCycleGroupPrompt, } from "./prompts.js";
|
|
35
|
+
import { createLLMClient } from "./providers/index.js";
|
|
36
|
+
import { exceedsThreshold, mapreduceSummarize } from "./mapreduce.js";
|
|
37
|
+
import { withRetry } from "./retry.js";
|
|
38
|
+
import { MAX_GROUP_SUMMARY_SIZE } from "./types.js";
|
|
39
|
+
// ─── runSummarization ─────────────────────────────────────────────────────────
|
|
40
|
+
//
|
|
41
|
+
// Main entry point — called by runner.ts after Phase 1 completes.
|
|
42
|
+
// Handles fresh runs and resumes from checkpoint transparently.
|
|
43
|
+
//
|
|
44
|
+
// Flow:
|
|
45
|
+
// 1. Load commit data (nodes, edges, fingerprint, routes)
|
|
46
|
+
// 2. Check if already summarized → skip
|
|
47
|
+
// 3. Copy summaries from previous commit where codeHash matches → free reuse
|
|
48
|
+
// 4. Build indexes once (edges, routes, system prompt, allNodes map)
|
|
49
|
+
// 5. Build or load checkpoint
|
|
50
|
+
// 6. Run three phases: nodeOrder levels → cycleGroups → fileNodes
|
|
51
|
+
// 7. Save summaries to storage after every level/group
|
|
52
|
+
// 8. Check pause/cancel signals between levels
|
|
53
|
+
export async function runSummarization(input) {
|
|
54
|
+
const { job, queue, graphId, commitHash, repoPath, routes, callbacks } = input;
|
|
55
|
+
// ── Step 1: Load commit data ───────────────────────────────────────────────
|
|
56
|
+
const result = storage.getGraph(graphId, commitHash);
|
|
57
|
+
if (!result) {
|
|
58
|
+
callbacks.onError(`Commit data not found: ${graphId}/${commitHash}`);
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
// ── Step 2: Skip if already summarized ────────────────────────────────────
|
|
62
|
+
if (storage.isCommitSummarized(graphId, commitHash)) {
|
|
63
|
+
callbacks.onComplete();
|
|
64
|
+
console.log("Already Summarized!");
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
// ── Step 3: Copy summaries from previous commit where codeHash matches ────
|
|
68
|
+
// Nodes whose code hasn't changed don't need re-summarization.
|
|
69
|
+
// We identify them by codeHash — if it matches, copy the summary directly.
|
|
70
|
+
// Skipped when forceSummarize=true so every node gets a fresh LLM call.
|
|
71
|
+
if (input.previousCommitHash && !job.forceSummarize) {
|
|
72
|
+
const prevResult = storage.getGraph(graphId, input.previousCommitHash);
|
|
73
|
+
if (prevResult) {
|
|
74
|
+
const prevById = new Map(prevResult.allNodes.map(n => [n.id, n]));
|
|
75
|
+
let duplicateNodes = 0;
|
|
76
|
+
for (const node of result.allNodes) {
|
|
77
|
+
const prev = prevById.get(node.id);
|
|
78
|
+
if (prev &&
|
|
79
|
+
prev.technicalSummary &&
|
|
80
|
+
node.codeHash &&
|
|
81
|
+
node.codeHash === prev.codeHash) {
|
|
82
|
+
node.technicalSummary = prev.technicalSummary;
|
|
83
|
+
node.businessSummary = prev.businessSummary;
|
|
84
|
+
node.security = prev.security;
|
|
85
|
+
node.summaryModel = prev.summaryModel;
|
|
86
|
+
node.summarizedAt = prev.summarizedAt;
|
|
87
|
+
duplicateNodes++;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
const toSummarize = result.allNodes.filter(n => !n.technicalSummary).length;
|
|
91
|
+
console.log(`${duplicateNodes} copied, ${toSummarize} to summarize`);
|
|
92
|
+
console.log(duplicateNodes, "Nodes' Summaries were copied from previous Hash", input.previousCommitHash);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// ── Step 4: Build job-scoped indexes — built once, reused every batch ─────
|
|
96
|
+
const config = await resolveConfig();
|
|
97
|
+
const client = createLLMClient(config.summarization);
|
|
98
|
+
const allNodesMap = new Map(result.allNodes.map(n => [n.id, n]));
|
|
99
|
+
const edgeIndex = buildEdgeIndex(result.allEdges);
|
|
100
|
+
const routeIndex = buildRouteIndex(routes);
|
|
101
|
+
const systemPrompt = buildSystemPrompt(result.fingerprint);
|
|
102
|
+
// Validate LLM connection before starting — fail fast
|
|
103
|
+
try {
|
|
104
|
+
await client.validateConnection();
|
|
105
|
+
}
|
|
106
|
+
catch (err) {
|
|
107
|
+
const msg = err instanceof Error ? err.message : "LLM connection failed";
|
|
108
|
+
callbacks.onError(msg);
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
// ── Step 5: Build or load checkpoint ──────────────────────────────────────
|
|
112
|
+
let checkpoint = loadCheckpoint(graphId, commitHash);
|
|
113
|
+
const isResume = !!checkpoint;
|
|
114
|
+
if (!checkpoint) {
|
|
115
|
+
// Fresh run — build topo order and create checkpoint
|
|
116
|
+
const { nodeOrder, cycleGroups, fileNodes } = buildTopologicalOrder(result.allNodes, result.allEdges);
|
|
117
|
+
checkpoint = createCheckpoint(graphId, commitHash, nodeOrder, cycleGroups, fileNodes);
|
|
118
|
+
}
|
|
119
|
+
// On resume: nodes from already-completed levels may have summaries in memory
|
|
120
|
+
// (merged into the commit file by saveGraph on restart) but saveBatch will never
|
|
121
|
+
// be called for skipped levels — so those summaries would sit in memory and never
|
|
122
|
+
// be re-persisted after the fresh saveGraph overwrote the file.
|
|
123
|
+
// Fix: flush all already-summarized nodes to disk in one pass before the loop starts.
|
|
124
|
+
if (isResume) {
|
|
125
|
+
const alreadySummarized = result.allNodes.filter(n => n.technicalSummary);
|
|
126
|
+
if (alreadySummarized.length > 0) {
|
|
127
|
+
const updates = new Map(alreadySummarized.map(n => [n.id, {
|
|
128
|
+
technicalSummary: n.technicalSummary,
|
|
129
|
+
businessSummary: n.businessSummary ?? "",
|
|
130
|
+
security: n.security ?? { severity: "none", summary: "" },
|
|
131
|
+
summaryModel: n.summaryModel ?? "",
|
|
132
|
+
summarizedAt: n.summarizedAt ?? new Date().toISOString(),
|
|
133
|
+
}]));
|
|
134
|
+
storage.saveNodeSummaries(graphId, commitHash, updates);
|
|
135
|
+
console.log(`♻️ Re-persisted ${alreadySummarized.length} summaries from pre-crash levels`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// Notify caller of total work
|
|
139
|
+
callbacks.onStarted(checkpoint.totalNodes);
|
|
140
|
+
// ── Helper: summarize one node ────────────────────────────────────────────
|
|
141
|
+
async function summarizeNode(node) {
|
|
142
|
+
// Skip if already summarized (copied from previous commit)
|
|
143
|
+
if (node.technicalSummary) {
|
|
144
|
+
console.log("Summary already exists, skipping for node", node.id);
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
console.log(` Starting summarization for node "${node.id}"`);
|
|
148
|
+
const output = await withRetry(() => exceedsThreshold(node)
|
|
149
|
+
? mapreduceSummarize(node, client, systemPrompt)
|
|
150
|
+
: client.summarize({
|
|
151
|
+
messages: buildPrompt({ node, allNodes: allNodesMap, edgeIndex, routeIndex, systemPrompt }),
|
|
152
|
+
temperature: 0,
|
|
153
|
+
}), undefined, node.id);
|
|
154
|
+
// Write summary back onto node in memory
|
|
155
|
+
node.technicalSummary = output.technicalSummary;
|
|
156
|
+
node.businessSummary = output.businessSummary;
|
|
157
|
+
node.security = output.security;
|
|
158
|
+
node.summaryModel = client.model;
|
|
159
|
+
node.summarizedAt = new Date().toISOString();
|
|
160
|
+
}
|
|
161
|
+
// ── Helper: save a batch of node updates to disk ──────────────────────────
|
|
162
|
+
function saveBatch(nodes) {
|
|
163
|
+
const updates = new Map(nodes.map(n => [n.id, {
|
|
164
|
+
technicalSummary: n.technicalSummary,
|
|
165
|
+
businessSummary: n.businessSummary ?? "",
|
|
166
|
+
security: n.security ?? { severity: "none", summary: "" },
|
|
167
|
+
summaryModel: n.summaryModel ?? client.model,
|
|
168
|
+
summarizedAt: n.summarizedAt ?? new Date().toISOString(),
|
|
169
|
+
}]));
|
|
170
|
+
storage.saveNodeSummaries(graphId, commitHash, updates);
|
|
171
|
+
}
|
|
172
|
+
// ── Helper: check pause/cancel signals ────────────────────────────────────
|
|
173
|
+
function shouldPause() { return queue.getJob(job.jobId)?.pauseRequested ?? false; }
|
|
174
|
+
function shouldCancel() { return queue.getJob(job.jobId)?.cancelRequested ?? false; }
|
|
175
|
+
// ── Derive start indexes for all three phases ─────────────────────────────
|
|
176
|
+
// If resumePoint is past a phase entirely, start index = length (skips loop).
|
|
177
|
+
const resumePoint = getResumePoint(checkpoint);
|
|
178
|
+
const levelStart = resumePoint.phase === "nodes" ? resumePoint.index :
|
|
179
|
+
resumePoint.phase === "done" ? checkpoint.nodeOrder.length :
|
|
180
|
+
/* cycles or files — nodes already done */ checkpoint.nodeOrder.length;
|
|
181
|
+
const cycleStart = resumePoint.phase === "cycles" ? resumePoint.index :
|
|
182
|
+
resumePoint.phase === "files" || resumePoint.phase === "done" ? checkpoint.cycleGroups.length :
|
|
183
|
+
/* nodes phase — cycles not started yet */ 0;
|
|
184
|
+
const fileStart = resumePoint.phase === "files" ? resumePoint.index :
|
|
185
|
+
resumePoint.phase === "done" ? checkpoint.fileNodes.length :
|
|
186
|
+
/* nodes or cycles phase — files not started yet */ 0;
|
|
187
|
+
// ── Step 6: Phase 1 — nodeOrder levels ────────────────────────────────────
|
|
188
|
+
// Each level is independent — all nodes in a level run in parallel.
|
|
189
|
+
// Levels complete atomically — checkpoint saves after each full level.
|
|
190
|
+
for (let lvl = levelStart; lvl < checkpoint.nodeOrder.length; lvl++) {
|
|
191
|
+
const level = checkpoint.nodeOrder[lvl];
|
|
192
|
+
const nodes = level.map(id => allNodesMap.get(id)).filter(Boolean);
|
|
193
|
+
const newInLevel = nodes.filter(n => !n.technicalSummary).length;
|
|
194
|
+
console.log(`🔍 Level ${lvl}: ${nodes.length} nodes, ${newInLevel} new to summarize (skipped ${nodes.length - newInLevel})`);
|
|
195
|
+
// All nodes in this level summarized in parallel
|
|
196
|
+
await Promise.all(nodes.map(summarizeNode));
|
|
197
|
+
saveBatch(nodes);
|
|
198
|
+
markLevelCompleted(checkpoint, lvl);
|
|
199
|
+
saveCheckpoint(checkpoint);
|
|
200
|
+
callbacks.onProgress(checkpoint.completedNodes, checkpoint.totalNodes, `level ${lvl}`);
|
|
201
|
+
if (shouldCancel()) {
|
|
202
|
+
deleteCheckpoint(graphId, commitHash);
|
|
203
|
+
callbacks.onCancel(true);
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
if (shouldPause()) {
|
|
207
|
+
checkpoint.status = "paused";
|
|
208
|
+
saveCheckpoint(checkpoint);
|
|
209
|
+
callbacks.onPause();
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
// ── Step 7: Phase 2 — cycleGroups ─────────────────────────────────────────
|
|
214
|
+
for (let gi = cycleStart; gi < checkpoint.cycleGroups.length; gi++) {
|
|
215
|
+
const group = checkpoint.cycleGroups[gi];
|
|
216
|
+
const nodes = group.nodeIds.map(id => allNodesMap.get(id)).filter(Boolean);
|
|
217
|
+
if (group.size <= MAX_GROUP_SUMMARY_SIZE) {
|
|
218
|
+
// Small cycle — one grouped LLM call
|
|
219
|
+
console.log(` Starting grouped LLM summary for cycle group ${gi} (${group.nodeIds.length} nodes)`);
|
|
220
|
+
const messages = buildCycleGroupPrompt(group.nodeIds, { allNodes: allNodesMap, edgeIndex, routeIndex, systemPrompt });
|
|
221
|
+
const output = await client.summarize({ messages, temperature: 0 });
|
|
222
|
+
// For grouped calls the LLM returns one summary — apply to all nodes in group
|
|
223
|
+
for (const node of nodes) {
|
|
224
|
+
node.technicalSummary = output.technicalSummary;
|
|
225
|
+
node.businessSummary = output.businessSummary;
|
|
226
|
+
node.security = output.security;
|
|
227
|
+
node.summaryModel = client.model;
|
|
228
|
+
node.summarizedAt = new Date().toISOString();
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
// Large cycle — summarize individually
|
|
233
|
+
await Promise.all(nodes.map(summarizeNode));
|
|
234
|
+
}
|
|
235
|
+
saveBatch(nodes);
|
|
236
|
+
markCycleGroupCompleted(checkpoint, gi);
|
|
237
|
+
saveCheckpoint(checkpoint);
|
|
238
|
+
callbacks.onProgress(checkpoint.completedNodes, checkpoint.totalNodes, `cycle group ${gi}`);
|
|
239
|
+
if (shouldCancel()) {
|
|
240
|
+
deleteCheckpoint(graphId, commitHash);
|
|
241
|
+
callbacks.onCancel(true);
|
|
242
|
+
return;
|
|
243
|
+
}
|
|
244
|
+
if (shouldPause()) {
|
|
245
|
+
checkpoint.status = "paused";
|
|
246
|
+
saveCheckpoint(checkpoint);
|
|
247
|
+
callbacks.onPause();
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
// ── Step 8: Phase 3 — fileNodes ───────────────────────────────────────────
|
|
252
|
+
// FILE nodes summarized last — they use child summaries as context.
|
|
253
|
+
for (let fi = fileStart; fi < checkpoint.fileNodes.length; fi += FILE_BATCH_SIZE) {
|
|
254
|
+
const batchEnd = Math.min(fi + FILE_BATCH_SIZE - 1, checkpoint.fileNodes.length - 1);
|
|
255
|
+
const batchIds = checkpoint.fileNodes.slice(fi, batchEnd + 1);
|
|
256
|
+
const batchNodes = batchIds.map(id => allNodesMap.get(id)).filter(Boolean);
|
|
257
|
+
//Debug-Log
|
|
258
|
+
const newInBatch = batchNodes.filter(n => !n.technicalSummary).length;
|
|
259
|
+
console.log(`🔍 File batch ${fi}-${batchEnd}: ${batchNodes.length} nodes, ${newInBatch} new to summarize (skipped ${batchNodes.length - newInBatch})`);
|
|
260
|
+
await Promise.all(batchNodes.map(summarizeNode));
|
|
261
|
+
saveBatch(batchNodes);
|
|
262
|
+
markFileNodeBatchCompleted(checkpoint, batchEnd, batchNodes.length);
|
|
263
|
+
saveCheckpoint(checkpoint);
|
|
264
|
+
callbacks.onProgress(checkpoint.completedNodes, checkpoint.totalNodes, `file batch ${fi}-${batchEnd}`);
|
|
265
|
+
if (shouldCancel()) {
|
|
266
|
+
deleteCheckpoint(graphId, commitHash);
|
|
267
|
+
callbacks.onCancel(true);
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
if (shouldPause()) {
|
|
271
|
+
checkpoint.status = "paused";
|
|
272
|
+
saveCheckpoint(checkpoint);
|
|
273
|
+
callbacks.onPause();
|
|
274
|
+
return;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
// ── Done ──────────────────────────────────────────────────────────────────
|
|
278
|
+
storage.markCommitSummarized(graphId, commitHash);
|
|
279
|
+
deleteCheckpoint(graphId, commitHash);
|
|
280
|
+
callbacks.onComplete();
|
|
281
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { CodeNode } from "../types.js";
|
|
2
|
+
import type { LLMClient, NodeSummaryOutput } from "./providers/types.js";
|
|
3
|
+
export declare function exceedsThreshold(node: CodeNode): boolean;
|
|
4
|
+
export declare function mapreduceSummarize(node: CodeNode, client: LLMClient, systemPrompt: string): Promise<NodeSummaryOutput>;
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { MAPREDUCE_TOKEN_THRESHOLD } from "./types.js";
|
|
2
|
+
// ─── Token Estimation ─────────────────────────────────────────────────────────
|
|
3
|
+
//
|
|
4
|
+
// 1 token ≈ 4 characters — standard rule of thumb, accurate enough for
|
|
5
|
+
// threshold checks. Avoids adding a full tokenizer dependency.
|
|
6
|
+
function estimateTokens(text) {
|
|
7
|
+
return Math.ceil(text.length / 4);
|
|
8
|
+
}
|
|
9
|
+
export function exceedsThreshold(node) {
|
|
10
|
+
if (!node.rawCode)
|
|
11
|
+
return false;
|
|
12
|
+
return estimateTokens(node.rawCode) > MAPREDUCE_TOKEN_THRESHOLD;
|
|
13
|
+
}
|
|
14
|
+
// ─── Chunking ─────────────────────────────────────────────────────────────────
|
|
15
|
+
//
|
|
16
|
+
// Splits rawCode into chunks by line — never cuts mid-line.
|
|
17
|
+
// Each chunk targets MAPREDUCE_TOKEN_THRESHOLD tokens.
|
|
18
|
+
// Overlap of 10 lines between chunks preserves context at boundaries.
|
|
19
|
+
const CHUNK_OVERLAP_LINES = 10;
|
|
20
|
+
function chunkCode(rawCode) {
|
|
21
|
+
const lines = rawCode.split("\n");
|
|
22
|
+
const chunks = [];
|
|
23
|
+
const targetLines = Math.floor(MAPREDUCE_TOKEN_THRESHOLD * 4 / 50); // ~50 chars avg per line
|
|
24
|
+
let start = 0;
|
|
25
|
+
while (start < lines.length) {
|
|
26
|
+
const end = Math.min(start + targetLines, lines.length);
|
|
27
|
+
const chunk = lines.slice(start, end).join("\n");
|
|
28
|
+
chunks.push(chunk);
|
|
29
|
+
if (end === lines.length)
|
|
30
|
+
break;
|
|
31
|
+
start = end - CHUNK_OVERLAP_LINES; // overlap for context continuity
|
|
32
|
+
}
|
|
33
|
+
return chunks;
|
|
34
|
+
}
|
|
35
|
+
// ─── Map Phase ────────────────────────────────────────────────────────────────
|
|
36
|
+
//
|
|
37
|
+
// Summarizes each chunk individually.
|
|
38
|
+
// Chunk summaries are purely technical — no business/security analysis yet.
|
|
39
|
+
// That happens in the reduce phase where the full picture is available.
|
|
40
|
+
function buildChunkMessages(chunk, chunkIndex, totalChunks, nodeName, systemPrompt) {
|
|
41
|
+
return [
|
|
42
|
+
{ role: "system", content: systemPrompt },
|
|
43
|
+
{
|
|
44
|
+
role: "user", content: `You are summarizing chunk ${chunkIndex + 1} of ${totalChunks} from a large code node named "${nodeName}".\n` +
|
|
45
|
+
`Provide a concise technical summary of what this chunk does. No business or security analysis yet.\n\n` +
|
|
46
|
+
`\`\`\`\n${chunk}\n\`\`\``
|
|
47
|
+
},
|
|
48
|
+
];
|
|
49
|
+
}
|
|
50
|
+
// ─── Reduce Phase ─────────────────────────────────────────────────────────────
|
|
51
|
+
//
|
|
52
|
+
// Takes all chunk summaries and produces the final NodeSummaryOutput.
|
|
53
|
+
// Uses the same XML format as single-node summarization — consistent parsing.
|
|
54
|
+
function buildReduceMessages(chunkSummaries, nodeName, systemPrompt) {
|
|
55
|
+
const summaryList = chunkSummaries
|
|
56
|
+
.map((s, i) => `Chunk ${i + 1}:\n${s}`)
|
|
57
|
+
.join("\n\n");
|
|
58
|
+
return [
|
|
59
|
+
{ role: "system", content: systemPrompt },
|
|
60
|
+
{
|
|
61
|
+
role: "user", content: `The following are chunk summaries of a large code node named "${nodeName}".\n` +
|
|
62
|
+
`Based on these summaries, produce the final complete summary in the required XML format.\n\n` +
|
|
63
|
+
`${summaryList}`
|
|
64
|
+
},
|
|
65
|
+
];
|
|
66
|
+
}
|
|
67
|
+
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
68
|
+
// Summarizes a node that exceeds the token threshold via map-reduce.
|
|
69
|
+
// Called from the batch loop instead of buildPrompt when exceedsThreshold() is true.
|
|
70
|
+
export async function mapreduceSummarize(node, client, systemPrompt) {
|
|
71
|
+
const rawCode = node.rawCode ?? "";
|
|
72
|
+
const chunks = chunkCode(rawCode);
|
|
73
|
+
// ── Map phase — summarize each chunk in parallel ──────────────
|
|
74
|
+
const chunkResults = await Promise.all(chunks.map((chunk, i) => {
|
|
75
|
+
const messages = buildChunkMessages(chunk, i, chunks.length, node.name, systemPrompt);
|
|
76
|
+
return client.summarize({ messages, temperature: 0 });
|
|
77
|
+
}));
|
|
78
|
+
const chunkSummaries = chunkResults.map(r => r.technicalSummary);
|
|
79
|
+
const totalTokensUsed = chunkResults.reduce((sum, r) => sum + r.tokensUsed, 0);
|
|
80
|
+
// ── Reduce phase — combine chunk summaries into final output ──
|
|
81
|
+
const reduceMessages = buildReduceMessages(chunkSummaries, node.name, systemPrompt);
|
|
82
|
+
const finalResult = await client.summarize({ messages: reduceMessages, temperature: 0 });
|
|
83
|
+
return {
|
|
84
|
+
...finalResult,
|
|
85
|
+
tokensUsed: totalTokensUsed + finalResult.tokensUsed,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { CodeNode, CodeEdge, RouteNode, BackendRouteNode, ProjectFingerprint } from "../types.js";
|
|
2
|
+
import type { LLMMessage } from "./providers/types.js";
|
|
3
|
+
export interface EdgeIndex {
|
|
4
|
+
outgoing: Map<string, Map<string, string[]>>;
|
|
5
|
+
incoming: Map<string, Map<string, string[]>>;
|
|
6
|
+
}
|
|
7
|
+
export interface RouteIndex {
|
|
8
|
+
byFilePath: Map<string, RouteNode | BackendRouteNode>;
|
|
9
|
+
}
|
|
10
|
+
export declare function buildEdgeIndex(edges: CodeEdge[]): EdgeIndex;
|
|
11
|
+
export declare function buildRouteIndex(routes: RouteNode[] | BackendRouteNode[]): RouteIndex;
|
|
12
|
+
export interface PromptContext {
|
|
13
|
+
node: CodeNode;
|
|
14
|
+
allNodes: Map<string, CodeNode>;
|
|
15
|
+
edgeIndex: EdgeIndex;
|
|
16
|
+
routeIndex: RouteIndex;
|
|
17
|
+
systemPrompt: string;
|
|
18
|
+
}
|
|
19
|
+
export declare function buildSystemPrompt(fingerprint: ProjectFingerprint): string;
|
|
20
|
+
export declare const EDGE_LABELS: Record<string, string>;
|
|
21
|
+
export declare function buildPrompt(ctx: PromptContext): LLMMessage[];
|
|
22
|
+
export declare function buildCycleGroupPrompt(nodeIds: string[], ctx: Omit<PromptContext, "node">): LLMMessage[];
|