@pruddiman/hem 0.0.1-beta-5671db0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/agents/arbiter-agent.d.ts +72 -0
- package/dist/agents/arbiter-agent.js +149 -0
- package/dist/agents/architecture-agent.d.ts +148 -0
- package/dist/agents/architecture-agent.js +459 -0
- package/dist/agents/base-agent.d.ts +44 -0
- package/dist/agents/base-agent.js +57 -0
- package/dist/agents/crossref-agent.d.ts +140 -0
- package/dist/agents/crossref-agent.js +560 -0
- package/dist/agents/crossref-arbiter-agent.d.ts +72 -0
- package/dist/agents/crossref-arbiter-agent.js +147 -0
- package/dist/agents/documentation-agent.d.ts +55 -0
- package/dist/agents/documentation-agent.js +159 -0
- package/dist/agents/exploration-agent.d.ts +58 -0
- package/dist/agents/exploration-agent.js +102 -0
- package/dist/agents/grouping-agent.d.ts +167 -0
- package/dist/agents/grouping-agent.js +557 -0
- package/dist/agents/index-agent.d.ts +86 -0
- package/dist/agents/index-agent.js +360 -0
- package/dist/agents/organization-agent.d.ts +144 -0
- package/dist/agents/organization-agent.js +607 -0
- package/dist/auth.d.ts +372 -0
- package/dist/auth.js +1072 -0
- package/dist/broadcast-mcp.d.ts +21 -0
- package/dist/broadcast-mcp.js +59 -0
- package/dist/changelog.d.ts +85 -0
- package/dist/changelog.js +223 -0
- package/dist/decision-queue.d.ts +173 -0
- package/dist/decision-queue.js +265 -0
- package/dist/diff-scope.d.ts +24 -0
- package/dist/diff-scope.js +28 -0
- package/dist/discovery.d.ts +54 -0
- package/dist/discovery.js +405 -0
- package/dist/grouping.d.ts +37 -0
- package/dist/grouping.js +343 -0
- package/dist/helpers/format.d.ts +5 -0
- package/dist/helpers/format.js +13 -0
- package/dist/helpers/index.d.ts +11 -0
- package/dist/helpers/index.js +11 -0
- package/dist/helpers/parsing.d.ts +52 -0
- package/dist/helpers/parsing.js +128 -0
- package/dist/helpers/paths.d.ts +41 -0
- package/dist/helpers/paths.js +67 -0
- package/dist/helpers/strings.d.ts +45 -0
- package/dist/helpers/strings.js +97 -0
- package/dist/index.d.ts +135 -0
- package/dist/index.js +1087 -0
- package/dist/merge-utils.d.ts +22 -0
- package/dist/merge-utils.js +34 -0
- package/dist/orchestrator.d.ts +194 -0
- package/dist/orchestrator.js +1169 -0
- package/dist/output.d.ts +106 -0
- package/dist/output.js +243 -0
- package/dist/progress.d.ts +228 -0
- package/dist/progress.js +644 -0
- package/dist/providers/copilot.d.ts +247 -0
- package/dist/providers/copilot.js +598 -0
- package/dist/providers/index.d.ts +15 -0
- package/dist/providers/index.js +12 -0
- package/dist/providers/opencode.d.ts +156 -0
- package/dist/providers/opencode.js +416 -0
- package/dist/providers/types.d.ts +156 -0
- package/dist/providers/types.js +16 -0
- package/dist/resources.d.ts +76 -0
- package/dist/resources.js +151 -0
- package/dist/search-index.d.ts +71 -0
- package/dist/search-index.js +187 -0
- package/dist/search-mcp.d.ts +25 -0
- package/dist/search-mcp.js +100 -0
- package/dist/server-utils.d.ts +56 -0
- package/dist/server-utils.js +135 -0
- package/dist/session.d.ts +227 -0
- package/dist/session.js +370 -0
- package/dist/types.d.ts +272 -0
- package/dist/types.js +5 -0
- package/dist/worktree.d.ts +82 -0
- package/dist/worktree.js +187 -0
- package/package.json +45 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-assisted index page generation agent for Hem.
|
|
3
|
+
*
|
|
4
|
+
* Post-processing agent that runs AFTER the architecture agent.
|
|
5
|
+
* Uses an OpenCode session to generate a rich `index.md` with a
|
|
6
|
+
* narrative introduction, project overview, and reading guide.
|
|
7
|
+
*
|
|
8
|
+
* The agent writes `index.md` with a `<!-- TOC -->` placeholder
|
|
9
|
+
* that the pipeline replaces with the procedurally generated
|
|
10
|
+
* table of contents link list.
|
|
11
|
+
*
|
|
12
|
+
* Architecture:
|
|
13
|
+
* - Reuses the `hem-arch` agent permissions (no new agent entry).
|
|
14
|
+
* - The pipeline reads back `index.md`, replaces the placeholder,
|
|
15
|
+
* then writes the final version.
|
|
16
|
+
* - Falls back to procedural TOC if the agent fails.
|
|
17
|
+
*/
|
|
18
|
+
import pLimit from "p-limit";
|
|
19
|
+
import { extractJSON } from "../helpers/parsing.js";
|
|
20
|
+
import { BaseAgent } from "./base-agent.js";
|
|
21
|
+
import { computeMaxConcurrency } from "../resources.js";
|
|
22
|
+
import { AuthExpiredError } from "../auth.js";
|
|
23
|
+
// ── Types ───────────────────────────────────────────────────────────────
|
|
24
|
+
/** Character-count threshold above which the index agent chunks its input. */
|
|
25
|
+
export const INDEX_CHUNK_THRESHOLD = 80_000;
|
|
26
|
+
// ── Agent ───────────────────────────────────────────────────────────────
|
|
27
|
+
/**
|
|
28
|
+
* An agent that uses an LLM to generate a rich index page (`index.md`)
|
|
29
|
+
* for a project's documentation.
|
|
30
|
+
*
|
|
31
|
+
* Writes `index.md` with a `<!-- TOC -->` placeholder that the pipeline
|
|
32
|
+
* replaces with the procedural table of contents link list.
|
|
33
|
+
*/
|
|
34
|
+
export class IndexAgent extends BaseAgent {
|
|
35
|
+
constructor(provider) {
|
|
36
|
+
super(provider);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Run the index page generation pipeline.
|
|
40
|
+
*
|
|
41
|
+
* @param params - All inputs needed for the index prompt.
|
|
42
|
+
* @param verbose - Optional logging callback (writes to stderr).
|
|
43
|
+
* @throws If session creation or prompting fails.
|
|
44
|
+
*/
|
|
45
|
+
async run(params, verbose) {
|
|
46
|
+
const tag = "index-agent";
|
|
47
|
+
// 1. Build the full prompt to measure size
|
|
48
|
+
const fullPrompt = IndexAgent.buildPrompt(params);
|
|
49
|
+
if (verbose) {
|
|
50
|
+
verbose(`[${tag}] Full prompt: ${fullPrompt.length.toLocaleString()} chars`);
|
|
51
|
+
}
|
|
52
|
+
// 2. If the prompt fits in a single context window, use the simple path
|
|
53
|
+
if (fullPrompt.length <= INDEX_CHUNK_THRESHOLD) {
|
|
54
|
+
const sessionId = await this.createSession("Hem: index overview");
|
|
55
|
+
if (verbose) {
|
|
56
|
+
verbose(`[${tag}] Session created: ${sessionId}`);
|
|
57
|
+
}
|
|
58
|
+
await this.provider.prompt(sessionId, fullPrompt, { agent: "hem-index" });
|
|
59
|
+
if (verbose) {
|
|
60
|
+
verbose(`[${tag}] Agent completed`);
|
|
61
|
+
}
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
// 3. Chunked path — split inputs across multiple sessions
|
|
65
|
+
const chunks = IndexAgent.chunkInputs(params);
|
|
66
|
+
if (verbose) {
|
|
67
|
+
verbose(`[${tag}] Chunking: ${chunks.length} chunks for ${params.allFindings.length} findings + ${params.allGroupSummaries.length} groups`);
|
|
68
|
+
}
|
|
69
|
+
// Run chunk sessions in parallel (bounded by resource concurrency)
|
|
70
|
+
const concurrency = computeMaxConcurrency();
|
|
71
|
+
const limit = pLimit(concurrency);
|
|
72
|
+
const settled = await Promise.allSettled(chunks.map((chunk, i) => limit(async () => {
|
|
73
|
+
const chunkPrompt = IndexAgent.buildChunkPrompt(params, chunk.findings, chunk.summaries, i + 1, chunks.length);
|
|
74
|
+
if (verbose) {
|
|
75
|
+
verbose(`[${tag}] Chunk ${i + 1}/${chunks.length}: ${chunkPrompt.length.toLocaleString()} chars`);
|
|
76
|
+
}
|
|
77
|
+
const sessionId = await this.createSession(`Hem: index chunk ${i + 1}/${chunks.length}`);
|
|
78
|
+
if (verbose) {
|
|
79
|
+
verbose(`[${tag}] Chunk session created: ${sessionId}`);
|
|
80
|
+
}
|
|
81
|
+
const responseText = await this.provider.prompt(sessionId, chunkPrompt, { agent: "hem-index" }) ?? "";
|
|
82
|
+
try {
|
|
83
|
+
const jsonStr = extractJSON(responseText);
|
|
84
|
+
const parsed = JSON.parse(jsonStr);
|
|
85
|
+
return {
|
|
86
|
+
narrative: parsed.narrative ?? "",
|
|
87
|
+
keyConcepts: Array.isArray(parsed.keyConcepts) ? parsed.keyConcepts : [],
|
|
88
|
+
readingGuideEntries: Array.isArray(parsed.readingGuideEntries) ? parsed.readingGuideEntries : [],
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
if (verbose) {
|
|
93
|
+
verbose(`[${tag}] Chunk ${i + 1} response was not valid JSON, using fallback`);
|
|
94
|
+
}
|
|
95
|
+
return {
|
|
96
|
+
narrative: responseText.slice(0, 500),
|
|
97
|
+
keyConcepts: [],
|
|
98
|
+
readingGuideEntries: [],
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
})));
|
|
102
|
+
// Collect results; use empty fallback for failed chunks so synthesis still runs.
|
|
103
|
+
// Re-throw AuthExpiredError immediately.
|
|
104
|
+
const chunkSummaries = [];
|
|
105
|
+
for (let i = 0; i < settled.length; i++) {
|
|
106
|
+
const result = settled[i];
|
|
107
|
+
if (result.status === "fulfilled") {
|
|
108
|
+
chunkSummaries.push(result.value);
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
if (result.reason instanceof AuthExpiredError) {
|
|
112
|
+
throw result.reason;
|
|
113
|
+
}
|
|
114
|
+
if (verbose) {
|
|
115
|
+
const msg = result.reason instanceof Error ? result.reason.message : String(result.reason);
|
|
116
|
+
verbose(`[${tag}] chunk ${i + 1}/${chunks.length} failed: ${msg}`);
|
|
117
|
+
}
|
|
118
|
+
chunkSummaries.push({ narrative: "", keyConcepts: [], readingGuideEntries: [] });
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
// 4. Synthesis session — combine chunk summaries into the final index.md
|
|
122
|
+
const synthesisPrompt = IndexAgent.buildSynthesisPrompt(params, chunkSummaries);
|
|
123
|
+
if (verbose) {
|
|
124
|
+
verbose(`[${tag}] Synthesis prompt: ${synthesisPrompt.length.toLocaleString()} chars`);
|
|
125
|
+
}
|
|
126
|
+
const synthesisSessionId = await this.createSession("Hem: index synthesis");
|
|
127
|
+
if (verbose) {
|
|
128
|
+
verbose(`[${tag}] Synthesis session created: ${synthesisSessionId}`);
|
|
129
|
+
}
|
|
130
|
+
await this.provider.prompt(synthesisSessionId, synthesisPrompt, { agent: "hem-index" });
|
|
131
|
+
if (verbose) {
|
|
132
|
+
verbose(`[${tag}] Agent completed (chunked: ${chunks.length} chunks + synthesis)`);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
// ── Static helpers (pure functions, easy to unit test) ───────────────
|
|
136
|
+
/**
|
|
137
|
+
* Builds the index page prompt from exploration findings, group
|
|
138
|
+
* summaries, and the list of generated documentation files.
|
|
139
|
+
*/
|
|
140
|
+
static buildPrompt(params) {
|
|
141
|
+
const { projectName, destinationPath, allFindings, allGroupSummaries, allDocFiles, } = params;
|
|
142
|
+
const parts = [];
|
|
143
|
+
// 1. System-level instructions
|
|
144
|
+
parts.push(`Generate a rich, narrative \`index.md\` that serves as the landing page`, `for the project's documentation. It should orient readers with a project`, `description, explain what the documentation covers, and provide a reading`, `guide.`, "", `**Write files directly using the edit tool.** Do NOT return Markdown content`, `in your response text. Instead, use the edit tool to create \`index.md\``, `in the destination directory. When you are done, stop.`, "");
|
|
145
|
+
// 2. Where to write
|
|
146
|
+
parts.push("## Destination", "", `Write the index page to: \`${destinationPath}/index.md\``, "");
|
|
147
|
+
// 3. TOC placeholder instruction
|
|
148
|
+
parts.push("## Table of contents placeholder", "", `The \`index.md\` MUST include the exact placeholder \`<!-- TOC -->\` on its`, `own line. The pipeline will replace this placeholder with an auto-generated,`, `navigable table of contents linking to every documentation page. Do NOT`, `generate your own links to individual doc pages — the placeholder handles that.`, "", "Example:", "", "```markdown", `# ${projectName} Documentation`, "", "Brief project description...", "", "## Reading guide", "", "How to navigate this documentation...", "", "<!-- TOC -->", "```", "");
|
|
149
|
+
// 4. Content guidelines
|
|
150
|
+
parts.push("## What to include", "", `1. **Project title as H1**: \`# ${projectName} Documentation\``, "", `2. **Project description**: 1-3 paragraphs explaining what ${projectName} is,`, ` what problem it solves, and who its audience is. Derive this from the`, ` exploration findings and generated documentation.`, "", `3. **Key concepts / terminology**: If the project uses domain-specific terms,`, ` briefly define the most important ones.`, "", `4. **Reading guide**: Help readers navigate the documentation. For example:`, ` - "New to the project? Start with [Architecture Overview](./architecture.md)."`, ` - Group documentation into logical themes and explain what each covers.`, "", `5. **The \`<!-- TOC -->\` placeholder**: Place this where you want the`, ` auto-generated table of contents to appear. Typically after your narrative`, ` content and before any closing remarks.`, "");
|
|
151
|
+
// 5. Quality standards
|
|
152
|
+
parts.push("## Quality standards", "", `- **Google Markdown style guide**: ATX headings, single H1, informative links.`, `- **No verbatim source code**: This is a documentation index, not a code reference.`, `- **Concise but informative**: Aim for a landing page that can be read in under 2 minutes.`, `- **Link to architecture.md**: If it exists, reference it in your reading guide.`, "");
|
|
153
|
+
// 6. Generated doc files for context
|
|
154
|
+
if (allDocFiles.length > 0) {
|
|
155
|
+
parts.push("## Generated documentation files", "");
|
|
156
|
+
parts.push("These documentation files have already been generated. Read them using tools", "to understand what the project is about and what content exists. Use this to", "write an informed project description and reading guide.", "");
|
|
157
|
+
for (const docFile of allDocFiles) {
|
|
158
|
+
parts.push(`- \`${destinationPath}/${docFile}\``);
|
|
159
|
+
}
|
|
160
|
+
parts.push("");
|
|
161
|
+
}
|
|
162
|
+
// 7. Exploration findings
|
|
163
|
+
parts.push("## Exploration findings", "");
|
|
164
|
+
if (allFindings.length > 0) {
|
|
165
|
+
parts.push("The exploration phase discovered these findings across all file groups.", "Use them to understand what the project does:", "");
|
|
166
|
+
for (const findings of allFindings) {
|
|
167
|
+
parts.push(`### Group: ${findings.groupId}`);
|
|
168
|
+
parts.push("");
|
|
169
|
+
parts.push(findings.text);
|
|
170
|
+
parts.push("");
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
else {
|
|
174
|
+
parts.push("No exploration findings available. Use tools to read the generated", "documentation files for context.", "");
|
|
175
|
+
}
|
|
176
|
+
// 8. File groups
|
|
177
|
+
parts.push("## File groups in this project", "");
|
|
178
|
+
if (allGroupSummaries.length > 0) {
|
|
179
|
+
for (const group of allGroupSummaries) {
|
|
180
|
+
parts.push(`- **${group.label}** (${group.id})`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
else {
|
|
184
|
+
parts.push("No file groups defined.");
|
|
185
|
+
}
|
|
186
|
+
parts.push("");
|
|
187
|
+
// 9. How to work
|
|
188
|
+
parts.push("## How to work", "");
|
|
189
|
+
parts.push(`1. **Read generated docs**: Use tools to read the generated documentation files`, ` to understand what the project is about.`, `2. **Read architecture.md**: If it exists, read it for the system overview.`, `3. **Write index.md**: Use the edit tool to write the index page.`, `4. **Include the TOC placeholder**: Remember to include \`<!-- TOC -->\` exactly once.`, "");
|
|
190
|
+
// 10. Done
|
|
191
|
+
parts.push("## When you are done", "");
|
|
192
|
+
parts.push(`After writing \`index.md\` using the edit tool, simply stop.`, `Do NOT return a JSON manifest or any other structured output.`, `The pipeline will read back \`index.md\` and replace the \`<!-- TOC -->\``, `placeholder with the auto-generated table of contents.`);
|
|
193
|
+
return parts.join("\n");
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Splits exploration findings and group summaries into chunks that
|
|
197
|
+
* each produce a prompt under {@link INDEX_CHUNK_THRESHOLD} characters.
|
|
198
|
+
*
|
|
199
|
+
* The doc file list is NOT chunked — it is always included in the
|
|
200
|
+
* synthesis prompt (it is just a list of paths and stays small).
|
|
201
|
+
*/
|
|
202
|
+
static chunkInputs(params) {
|
|
203
|
+
const { allFindings, allGroupSummaries } = params;
|
|
204
|
+
// Estimate the character cost of each finding and summary
|
|
205
|
+
const findingSizes = allFindings.map((f) => JSON.stringify(f).length);
|
|
206
|
+
const summarySizes = allGroupSummaries.map((s) => JSON.stringify(s).length);
|
|
207
|
+
const totalVariableSize = findingSizes.reduce((a, b) => a + b, 0) +
|
|
208
|
+
summarySizes.reduce((a, b) => a + b, 0);
|
|
209
|
+
// Base prompt overhead (system instructions, destination, TOC, etc.)
|
|
210
|
+
const baseOverhead = 3_000;
|
|
211
|
+
const budgetPerChunk = INDEX_CHUNK_THRESHOLD - baseOverhead;
|
|
212
|
+
// Calculate how many chunks we need
|
|
213
|
+
const numChunks = Math.max(1, Math.ceil(totalVariableSize / budgetPerChunk));
|
|
214
|
+
// Build a paired list sorted by size descending for balanced distribution
|
|
215
|
+
const items = [
|
|
216
|
+
...findingSizes.map((size, index) => ({
|
|
217
|
+
type: "finding",
|
|
218
|
+
index,
|
|
219
|
+
size,
|
|
220
|
+
})),
|
|
221
|
+
...summarySizes.map((size, index) => ({
|
|
222
|
+
type: "summary",
|
|
223
|
+
index,
|
|
224
|
+
size,
|
|
225
|
+
})),
|
|
226
|
+
];
|
|
227
|
+
items.sort((a, b) => b.size - a.size);
|
|
228
|
+
// Greedy bin-packing: assign each item to the lightest chunk
|
|
229
|
+
const chunkBins = Array.from({ length: numChunks }, () => ({
|
|
230
|
+
findings: [],
|
|
231
|
+
summaries: [],
|
|
232
|
+
size: 0,
|
|
233
|
+
}));
|
|
234
|
+
for (const item of items) {
|
|
235
|
+
// Find the chunk with the smallest current size
|
|
236
|
+
let minIdx = 0;
|
|
237
|
+
for (let j = 1; j < chunkBins.length; j++) {
|
|
238
|
+
if (chunkBins[j].size < chunkBins[minIdx].size) {
|
|
239
|
+
minIdx = j;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
const target = chunkBins[minIdx];
|
|
243
|
+
if (item.type === "finding") {
|
|
244
|
+
target.findings.push(allFindings[item.index]);
|
|
245
|
+
}
|
|
246
|
+
else {
|
|
247
|
+
target.summaries.push(allGroupSummaries[item.index]);
|
|
248
|
+
}
|
|
249
|
+
target.size += item.size;
|
|
250
|
+
}
|
|
251
|
+
// Filter out empty chunks (can happen if numChunks > items.length)
|
|
252
|
+
return chunkBins
|
|
253
|
+
.filter((c) => c.findings.length > 0 || c.summaries.length > 0)
|
|
254
|
+
.map(({ findings, summaries }) => ({ findings, summaries }));
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Builds a prompt for a single chunk session.
|
|
258
|
+
*
|
|
259
|
+
* The chunk agent reads a subset of findings and group summaries and
|
|
260
|
+
* returns a JSON summary — it does NOT write `index.md`.
|
|
261
|
+
*/
|
|
262
|
+
static buildChunkPrompt(params, chunkFindings, chunkSummaries, chunkIndex, totalChunks) {
|
|
263
|
+
const { projectName, destinationPath, allDocFiles } = params;
|
|
264
|
+
const parts = [];
|
|
265
|
+
parts.push(`Process chunk ${chunkIndex} of ${totalChunks} for the project **${projectName}**.`, `Analyze the exploration findings and file group summaries below, then return`, `a JSON summary.`, "", `**Do NOT write any files.** Do NOT use the edit tool. Return ONLY a JSON object`, `in your response.`, "");
|
|
266
|
+
// Context: doc files exist (for awareness, not for reading)
|
|
267
|
+
if (allDocFiles.length > 0) {
|
|
268
|
+
parts.push("## Documentation files (for context)", "");
|
|
269
|
+
parts.push(`These ${allDocFiles.length} documentation files have been generated.`, `You do not need to read them — just be aware they exist:`, "");
|
|
270
|
+
// Only list first 20 to keep the chunk prompt lean
|
|
271
|
+
const listed = allDocFiles.slice(0, 20);
|
|
272
|
+
for (const f of listed) {
|
|
273
|
+
parts.push(`- \`${destinationPath}/${f}\``);
|
|
274
|
+
}
|
|
275
|
+
if (allDocFiles.length > 20) {
|
|
276
|
+
parts.push(`- ... and ${allDocFiles.length - 20} more files`);
|
|
277
|
+
}
|
|
278
|
+
parts.push("");
|
|
279
|
+
}
|
|
280
|
+
// Findings for this chunk
|
|
281
|
+
parts.push("## Exploration findings (this chunk)", "");
|
|
282
|
+
if (chunkFindings.length > 0) {
|
|
283
|
+
for (const findings of chunkFindings) {
|
|
284
|
+
parts.push(`### Group: ${findings.groupId}`);
|
|
285
|
+
parts.push("");
|
|
286
|
+
parts.push(findings.text);
|
|
287
|
+
parts.push("");
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
else {
|
|
291
|
+
parts.push("No exploration findings in this chunk.", "");
|
|
292
|
+
}
|
|
293
|
+
// Group summaries for this chunk
|
|
294
|
+
parts.push("## File groups (this chunk)", "");
|
|
295
|
+
if (chunkSummaries.length > 0) {
|
|
296
|
+
for (const group of chunkSummaries) {
|
|
297
|
+
parts.push(`- **${group.label}** (${group.id})`);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
else {
|
|
301
|
+
parts.push("No file groups in this chunk.");
|
|
302
|
+
}
|
|
303
|
+
parts.push("");
|
|
304
|
+
// Response format
|
|
305
|
+
parts.push("## Response format", "", "Return a JSON object with this structure:", "", "```json", "{", ' "narrative": "1-2 paragraphs describing what this chunk of the project does",', ' "keyConcepts": ["concept1", "concept2"],', ' "readingGuideEntries": ["Start with X to understand Y", "See Z for details on W"]', "}", "```", "", "Focus on extracting the most important themes and concepts from the", "findings and groups assigned to this chunk.");
|
|
306
|
+
return parts.join("\n");
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Builds the synthesis prompt that combines chunk summaries into the
|
|
310
|
+
* final `index.md`. This prompt instructs the agent to write the file.
|
|
311
|
+
*/
|
|
312
|
+
static buildSynthesisPrompt(params, chunkSummaries) {
|
|
313
|
+
const { projectName, destinationPath, allDocFiles } = params;
|
|
314
|
+
const parts = [];
|
|
315
|
+
// 1. System instructions (same as original buildPrompt)
|
|
316
|
+
parts.push(`Generate a rich, narrative \`index.md\` that serves as the landing page`, `for the project's documentation. It should orient readers with a project`, `description, explain what the documentation covers, and provide a reading`, `guide.`, "", `**Write files directly using the edit tool.** Do NOT return Markdown content`, `in your response text. Instead, use the edit tool to create \`index.md\``, `in the destination directory. When you are done, stop.`, "");
|
|
317
|
+
// 2. Where to write
|
|
318
|
+
parts.push("## Destination", "", `Write the index page to: \`${destinationPath}/index.md\``, "");
|
|
319
|
+
// 3. TOC placeholder (same as original)
|
|
320
|
+
parts.push("## Table of contents placeholder", "", `The \`index.md\` MUST include the exact placeholder \`<!-- TOC -->\` on its`, `own line. The pipeline will replace this placeholder with an auto-generated,`, `navigable table of contents linking to every documentation page. Do NOT`, `generate your own links to individual doc pages — the placeholder handles that.`, "", "Example:", "", "```markdown", `# ${projectName} Documentation`, "", "Brief project description...", "", "## Reading guide", "", "How to navigate this documentation...", "", "<!-- TOC -->", "```", "");
|
|
321
|
+
// 4. Content guidelines (same as original)
|
|
322
|
+
parts.push("## What to include", "", `1. **Project title as H1**: \`# ${projectName} Documentation\``, "", `2. **Project description**: 1-3 paragraphs explaining what ${projectName} is,`, ` what problem it solves, and who its audience is. Synthesize from the`, ` chunk summaries below.`, "", `3. **Key concepts / terminology**: Combine the key concepts from all chunks.`, "", `4. **Reading guide**: Combine the reading guide entries from all chunks into`, ` a coherent navigation guide.`, "", `5. **The \`<!-- TOC -->\` placeholder**: Place this where you want the`, ` auto-generated table of contents to appear.`, "");
|
|
323
|
+
// 5. Quality standards (same as original)
|
|
324
|
+
parts.push("## Quality standards", "", `- **Google Markdown style guide**: ATX headings, single H1, informative links.`, `- **No verbatim source code**: This is a documentation index, not a code reference.`, `- **Concise but informative**: Aim for a landing page that can be read in under 2 minutes.`, `- **Link to architecture.md**: If it exists, reference it in your reading guide.`, "");
|
|
325
|
+
// 6. Chunk summaries (the main input for synthesis)
|
|
326
|
+
parts.push("## Analysis summaries from exploration", "");
|
|
327
|
+
parts.push(`The project was analyzed in ${chunkSummaries.length} chunks. Here are the`, `summaries from each chunk. Synthesize these into a coherent project description`, `and reading guide:`, "");
|
|
328
|
+
for (let i = 0; i < chunkSummaries.length; i++) {
|
|
329
|
+
const cs = chunkSummaries[i];
|
|
330
|
+
parts.push(`### Chunk ${i + 1}`, "");
|
|
331
|
+
parts.push(`**Narrative**: ${cs.narrative}`, "");
|
|
332
|
+
if (cs.keyConcepts.length > 0) {
|
|
333
|
+
parts.push(`**Key concepts**: ${cs.keyConcepts.join(", ")}`, "");
|
|
334
|
+
}
|
|
335
|
+
if (cs.readingGuideEntries.length > 0) {
|
|
336
|
+
parts.push("**Reading guide entries**:");
|
|
337
|
+
for (const entry of cs.readingGuideEntries) {
|
|
338
|
+
parts.push(`- ${entry}`);
|
|
339
|
+
}
|
|
340
|
+
parts.push("");
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
// 7. Generated doc files (full list for context)
|
|
344
|
+
if (allDocFiles.length > 0) {
|
|
345
|
+
parts.push("## Generated documentation files", "");
|
|
346
|
+
parts.push("These documentation files have been generated. You may read them using", "tools for additional context:", "");
|
|
347
|
+
for (const docFile of allDocFiles) {
|
|
348
|
+
parts.push(`- \`${destinationPath}/${docFile}\``);
|
|
349
|
+
}
|
|
350
|
+
parts.push("");
|
|
351
|
+
}
|
|
352
|
+
// 8. How to work
|
|
353
|
+
parts.push("## How to work", "");
|
|
354
|
+
parts.push(`1. **Review the chunk summaries above**: They contain the synthesized analysis.`, `2. **Optionally read generated docs**: For additional context.`, `3. **Write index.md**: Use the edit tool to write the index page.`, `4. **Include the TOC placeholder**: Remember to include \`<!-- TOC -->\` exactly once.`, "");
|
|
355
|
+
// 9. Done
|
|
356
|
+
parts.push("## When you are done", "");
|
|
357
|
+
parts.push(`After writing \`index.md\` using the edit tool, simply stop.`, `Do NOT return a JSON manifest or any other structured output.`, `The pipeline will read back \`index.md\` and replace the \`<!-- TOC -->\``, `placeholder with the auto-generated table of contents.`);
|
|
358
|
+
return parts.join("\n");
|
|
359
|
+
}
|
|
360
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-assisted organization agent for Hem.
|
|
3
|
+
*
|
|
4
|
+
* Post-processing agent that runs AFTER all doc agents complete.
|
|
5
|
+
* Reviews all written documentation files, restructures/renames/deduplicates
|
|
6
|
+
* for consistency.
|
|
7
|
+
*
|
|
8
|
+
* Architecture (v3 — parallel workers with broadcast):
|
|
9
|
+
* - Absorbs the old DeduplicationAgent's responsibility.
|
|
10
|
+
* - For large file sets (>8 files), splits work across N parallel workers.
|
|
11
|
+
* - Workers communicate via an MCP broadcast tool + prompt injection.
|
|
12
|
+
* - The orchestrator intercepts broadcast tool calls via SSE and relays
|
|
13
|
+
* messages to all peer workers + their active subagents.
|
|
14
|
+
* - For small file sets (≤8 files), falls back to the single-agent path.
|
|
15
|
+
* - The pipeline discovers the final file set by scanning disk afterward.
|
|
16
|
+
*/
|
|
17
|
+
import type { Provider } from "../providers/types.js";
|
|
18
|
+
import { BaseAgent } from "./base-agent.js";
|
|
19
|
+
/** File count threshold: use parallel workers above this, single agent below. */
|
|
20
|
+
export declare const PARALLEL_THRESHOLD = 8;
|
|
21
|
+
/**
|
|
22
|
+
* Hard ceiling on parallel org workers. The actual worker count is
|
|
23
|
+
* `min(MAX_WORKERS, computeOrgWorkers(fileCount), computeMaxConcurrency())`.
|
|
24
|
+
* The arbiter is excluded from this cap (it is lightweight).
|
|
25
|
+
*/
|
|
26
|
+
export declare const MAX_WORKERS = 7;
|
|
27
|
+
/**
|
|
28
|
+
* MCP tool name as it appears in SSE events.
|
|
29
|
+
* Format: "{mcp-server-name}_{tool-name}"
|
|
30
|
+
*/
|
|
31
|
+
export declare const BROADCAST_TOOL_NAME = "hem-broadcast_broadcast";
|
|
32
|
+
/** Parameters for the single-agent organization prompt. */
|
|
33
|
+
export interface OrgPromptParams {
|
|
34
|
+
/** Project name. */
|
|
35
|
+
projectName: string;
|
|
36
|
+
/** Absolute path to the destination directory. */
|
|
37
|
+
destinationPath: string;
|
|
38
|
+
/** Relative paths of all generated documentation files. */
|
|
39
|
+
allDocFiles: string[];
|
|
40
|
+
}
|
|
41
|
+
/** Parameters for a parallel org worker prompt. */
|
|
42
|
+
export interface OrgWorkerPromptParams {
|
|
43
|
+
/** Project name. */
|
|
44
|
+
projectName: string;
|
|
45
|
+
/** Absolute path to the destination directory. */
|
|
46
|
+
destinationPath: string;
|
|
47
|
+
/** Files assigned to THIS worker (relative paths). */
|
|
48
|
+
assignedFiles: string[];
|
|
49
|
+
/** ALL documentation files across all workers (for cross-reference awareness). */
|
|
50
|
+
allDocFiles: string[];
|
|
51
|
+
/** Label for this worker, e.g. "org-worker-1". */
|
|
52
|
+
workerLabel: string;
|
|
53
|
+
/** Total number of parallel workers. */
|
|
54
|
+
totalWorkers: number;
|
|
55
|
+
}
|
|
56
|
+
/** A single worker assignment: label → file list. */
|
|
57
|
+
export interface WorkerAssignment {
|
|
58
|
+
label: string;
|
|
59
|
+
files: string[];
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* An agent that reviews all generated documentation and restructures
|
|
63
|
+
* for consistency — deduplicating, renaming, and reorganizing files.
|
|
64
|
+
*
|
|
65
|
+
* Writes/edits files directly via the edit tool. The pipeline discovers
|
|
66
|
+
* the final file set by scanning disk afterward.
|
|
67
|
+
*/
|
|
68
|
+
export declare class OrganizationAgent extends BaseAgent {
|
|
69
|
+
constructor(provider: Provider);
|
|
70
|
+
/**
|
|
71
|
+
* Run the organization pass over all generated documentation.
|
|
72
|
+
* Automatically selects single-agent or parallel mode based on file count.
|
|
73
|
+
*
|
|
74
|
+
* @param params - Organization parameters including file paths.
|
|
75
|
+
* @param verbose - Optional logging callback (writes to stderr).
|
|
76
|
+
* @throws If session creation or prompting fails.
|
|
77
|
+
*/
|
|
78
|
+
run(params: OrgPromptParams, verbose?: (msg: string) => void): Promise<void>;
|
|
79
|
+
/**
|
|
80
|
+
* Single-agent organization pass (original behavior).
|
|
81
|
+
* Used when file count is ≤ PARALLEL_THRESHOLD.
|
|
82
|
+
*/
|
|
83
|
+
runSingle(params: OrgPromptParams, verbose?: (msg: string) => void): Promise<void>;
|
|
84
|
+
/**
|
|
85
|
+
* Parallel organization pass using multiple workers with an arbiter.
|
|
86
|
+
*
|
|
87
|
+
* 1. Computes worker count from resource limits (arbiter excluded).
|
|
88
|
+
* 2. Assigns files to workers via round-robin.
|
|
89
|
+
* 3. Subscribes to SSE events for broadcast interception.
|
|
90
|
+
* 4. Creates an arbiter session (long-lived coordinator).
|
|
91
|
+
* 5. Creates N worker sessions in parallel.
|
|
92
|
+
* 6. Relays broadcasts with targeted routing:
|
|
93
|
+
* - Worker → arbiter only.
|
|
94
|
+
* - Arbiter → @tagged worker(s) only (or all if @all-workers / no tag).
|
|
95
|
+
* - Completed sessions are excluded from relay.
|
|
96
|
+
* 7. Kills worker sessions immediately on completion (abort + delete).
|
|
97
|
+
* 8. Intercepts RECALL: broadcasts to respawn a worker for fixes.
|
|
98
|
+
* 9. Sends a final prompt to the arbiter so it can wrap up.
|
|
99
|
+
* 10. Kills the arbiter session.
|
|
100
|
+
*/
|
|
101
|
+
runParallel(params: OrgPromptParams, verbose?: (msg: string) => void): Promise<void>;
|
|
102
|
+
/**
|
|
103
|
+
* Kill a session: abort any running work, then delete the session.
|
|
104
|
+
* Best-effort — failures are logged but not thrown.
|
|
105
|
+
*/
|
|
106
|
+
private killSession;
|
|
107
|
+
/**
|
|
108
|
+
* Spawn a recalled worker session to apply specific fixes.
|
|
109
|
+
*
|
|
110
|
+
* Called when the arbiter broadcasts `RECALL: @org-worker-N <instructions>`.
|
|
111
|
+
* Creates a new session with the worker's original file assignment and a
|
|
112
|
+
* focused prompt containing the fix instructions. The session is killed
|
|
113
|
+
* immediately after completion.
|
|
114
|
+
*/
|
|
115
|
+
private runRecalledWorker;
|
|
116
|
+
/**
|
|
117
|
+
* Builds the single-agent organization prompt (original behavior).
|
|
118
|
+
*/
|
|
119
|
+
static buildPrompt(params: OrgPromptParams): string;
|
|
120
|
+
/**
|
|
121
|
+
* Builds the prompt for a parallel org worker.
|
|
122
|
+
*
|
|
123
|
+
* Each worker gets:
|
|
124
|
+
* - A scoped identity (e.g. "org-worker-1 of 3")
|
|
125
|
+
* - Its assigned file subset
|
|
126
|
+
* - The full file list for cross-reference awareness
|
|
127
|
+
* - Instructions for using the broadcast tool
|
|
128
|
+
* - Scoped task list (only edit YOUR files)
|
|
129
|
+
*/
|
|
130
|
+
static buildWorkerPrompt(params: OrgWorkerPromptParams): string;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Assigns documentation files to workers using round-robin distribution.
|
|
134
|
+
*
|
|
135
|
+
* Files are sorted alphabetically first so that files in the same
|
|
136
|
+
* directory tend to land on adjacent workers, preserving some locality.
|
|
137
|
+
* Then they are dealt out in order: file 0 → worker 1, file 1 → worker 2,
|
|
138
|
+
* …, wrapping around. This guarantees perfectly balanced workloads (±1 file).
|
|
139
|
+
*
|
|
140
|
+
* @param files - Relative file paths (e.g. "auth/overview.md").
|
|
141
|
+
* @param maxWorkers - Maximum number of workers to create.
|
|
142
|
+
* @returns An array of worker assignments, each with a label and file list.
|
|
143
|
+
*/
|
|
144
|
+
export declare function assignFilesToWorkers(files: string[], maxWorkers: number): WorkerAssignment[];
|