@pruddiman/hem 0.0.1-beta-5671db0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/LICENSE +21 -0
  2. package/dist/agents/arbiter-agent.d.ts +72 -0
  3. package/dist/agents/arbiter-agent.js +149 -0
  4. package/dist/agents/architecture-agent.d.ts +148 -0
  5. package/dist/agents/architecture-agent.js +459 -0
  6. package/dist/agents/base-agent.d.ts +44 -0
  7. package/dist/agents/base-agent.js +57 -0
  8. package/dist/agents/crossref-agent.d.ts +140 -0
  9. package/dist/agents/crossref-agent.js +560 -0
  10. package/dist/agents/crossref-arbiter-agent.d.ts +72 -0
  11. package/dist/agents/crossref-arbiter-agent.js +147 -0
  12. package/dist/agents/documentation-agent.d.ts +55 -0
  13. package/dist/agents/documentation-agent.js +159 -0
  14. package/dist/agents/exploration-agent.d.ts +58 -0
  15. package/dist/agents/exploration-agent.js +102 -0
  16. package/dist/agents/grouping-agent.d.ts +167 -0
  17. package/dist/agents/grouping-agent.js +557 -0
  18. package/dist/agents/index-agent.d.ts +86 -0
  19. package/dist/agents/index-agent.js +360 -0
  20. package/dist/agents/organization-agent.d.ts +144 -0
  21. package/dist/agents/organization-agent.js +607 -0
  22. package/dist/auth.d.ts +372 -0
  23. package/dist/auth.js +1072 -0
  24. package/dist/broadcast-mcp.d.ts +21 -0
  25. package/dist/broadcast-mcp.js +59 -0
  26. package/dist/changelog.d.ts +85 -0
  27. package/dist/changelog.js +223 -0
  28. package/dist/decision-queue.d.ts +173 -0
  29. package/dist/decision-queue.js +265 -0
  30. package/dist/diff-scope.d.ts +24 -0
  31. package/dist/diff-scope.js +28 -0
  32. package/dist/discovery.d.ts +54 -0
  33. package/dist/discovery.js +405 -0
  34. package/dist/grouping.d.ts +37 -0
  35. package/dist/grouping.js +343 -0
  36. package/dist/helpers/format.d.ts +5 -0
  37. package/dist/helpers/format.js +13 -0
  38. package/dist/helpers/index.d.ts +11 -0
  39. package/dist/helpers/index.js +11 -0
  40. package/dist/helpers/parsing.d.ts +52 -0
  41. package/dist/helpers/parsing.js +128 -0
  42. package/dist/helpers/paths.d.ts +41 -0
  43. package/dist/helpers/paths.js +67 -0
  44. package/dist/helpers/strings.d.ts +45 -0
  45. package/dist/helpers/strings.js +97 -0
  46. package/dist/index.d.ts +135 -0
  47. package/dist/index.js +1087 -0
  48. package/dist/merge-utils.d.ts +22 -0
  49. package/dist/merge-utils.js +34 -0
  50. package/dist/orchestrator.d.ts +194 -0
  51. package/dist/orchestrator.js +1169 -0
  52. package/dist/output.d.ts +106 -0
  53. package/dist/output.js +243 -0
  54. package/dist/progress.d.ts +228 -0
  55. package/dist/progress.js +644 -0
  56. package/dist/providers/copilot.d.ts +247 -0
  57. package/dist/providers/copilot.js +598 -0
  58. package/dist/providers/index.d.ts +15 -0
  59. package/dist/providers/index.js +12 -0
  60. package/dist/providers/opencode.d.ts +156 -0
  61. package/dist/providers/opencode.js +416 -0
  62. package/dist/providers/types.d.ts +156 -0
  63. package/dist/providers/types.js +16 -0
  64. package/dist/resources.d.ts +76 -0
  65. package/dist/resources.js +151 -0
  66. package/dist/search-index.d.ts +71 -0
  67. package/dist/search-index.js +187 -0
  68. package/dist/search-mcp.d.ts +25 -0
  69. package/dist/search-mcp.js +100 -0
  70. package/dist/server-utils.d.ts +56 -0
  71. package/dist/server-utils.js +135 -0
  72. package/dist/session.d.ts +227 -0
  73. package/dist/session.js +370 -0
  74. package/dist/types.d.ts +272 -0
  75. package/dist/types.js +5 -0
  76. package/dist/worktree.d.ts +82 -0
  77. package/dist/worktree.js +187 -0
  78. package/package.json +45 -0
@@ -0,0 +1,360 @@
1
+ /**
2
+ * LLM-assisted index page generation agent for Hem.
3
+ *
4
+ * Post-processing agent that runs AFTER the architecture agent.
5
+ * Uses an OpenCode session to generate a rich `index.md` with a
6
+ * narrative introduction, project overview, and reading guide.
7
+ *
8
+ * The agent writes `index.md` with a `<!-- TOC -->` placeholder
9
+ * that the pipeline replaces with the procedurally generated
10
+ * table of contents link list.
11
+ *
12
+ * Architecture:
13
+ * - Reuses the `hem-arch` agent permissions (no new agent entry).
14
+ * - The pipeline reads back `index.md`, replaces the placeholder,
15
+ * then writes the final version.
16
+ * - Falls back to procedural TOC if the agent fails.
17
+ */
18
+ import pLimit from "p-limit";
19
+ import { extractJSON } from "../helpers/parsing.js";
20
+ import { BaseAgent } from "./base-agent.js";
21
+ import { computeMaxConcurrency } from "../resources.js";
22
+ import { AuthExpiredError } from "../auth.js";
23
+ // ── Types ───────────────────────────────────────────────────────────────
24
+ /** Character-count threshold above which the index agent chunks its input. */
25
+ export const INDEX_CHUNK_THRESHOLD = 80_000;
26
+ // ── Agent ───────────────────────────────────────────────────────────────
27
+ /**
28
+ * An agent that uses an LLM to generate a rich index page (`index.md`)
29
+ * for a project's documentation.
30
+ *
31
+ * Writes `index.md` with a `<!-- TOC -->` placeholder that the pipeline
32
+ * replaces with the procedural table of contents link list.
33
+ */
34
+ export class IndexAgent extends BaseAgent {
35
+ constructor(provider) {
36
+ super(provider);
37
+ }
38
+ /**
39
+ * Run the index page generation pipeline.
40
+ *
41
+ * @param params - All inputs needed for the index prompt.
42
+ * @param verbose - Optional logging callback (writes to stderr).
43
+ * @throws If session creation or prompting fails.
44
+ */
45
+ async run(params, verbose) {
46
+ const tag = "index-agent";
47
+ // 1. Build the full prompt to measure size
48
+ const fullPrompt = IndexAgent.buildPrompt(params);
49
+ if (verbose) {
50
+ verbose(`[${tag}] Full prompt: ${fullPrompt.length.toLocaleString()} chars`);
51
+ }
52
+ // 2. If the prompt fits in a single context window, use the simple path
53
+ if (fullPrompt.length <= INDEX_CHUNK_THRESHOLD) {
54
+ const sessionId = await this.createSession("Hem: index overview");
55
+ if (verbose) {
56
+ verbose(`[${tag}] Session created: ${sessionId}`);
57
+ }
58
+ await this.provider.prompt(sessionId, fullPrompt, { agent: "hem-index" });
59
+ if (verbose) {
60
+ verbose(`[${tag}] Agent completed`);
61
+ }
62
+ return;
63
+ }
64
+ // 3. Chunked path — split inputs across multiple sessions
65
+ const chunks = IndexAgent.chunkInputs(params);
66
+ if (verbose) {
67
+ verbose(`[${tag}] Chunking: ${chunks.length} chunks for ${params.allFindings.length} findings + ${params.allGroupSummaries.length} groups`);
68
+ }
69
+ // Run chunk sessions in parallel (bounded by resource concurrency)
70
+ const concurrency = computeMaxConcurrency();
71
+ const limit = pLimit(concurrency);
72
+ const settled = await Promise.allSettled(chunks.map((chunk, i) => limit(async () => {
73
+ const chunkPrompt = IndexAgent.buildChunkPrompt(params, chunk.findings, chunk.summaries, i + 1, chunks.length);
74
+ if (verbose) {
75
+ verbose(`[${tag}] Chunk ${i + 1}/${chunks.length}: ${chunkPrompt.length.toLocaleString()} chars`);
76
+ }
77
+ const sessionId = await this.createSession(`Hem: index chunk ${i + 1}/${chunks.length}`);
78
+ if (verbose) {
79
+ verbose(`[${tag}] Chunk session created: ${sessionId}`);
80
+ }
81
+ const responseText = await this.provider.prompt(sessionId, chunkPrompt, { agent: "hem-index" }) ?? "";
82
+ try {
83
+ const jsonStr = extractJSON(responseText);
84
+ const parsed = JSON.parse(jsonStr);
85
+ return {
86
+ narrative: parsed.narrative ?? "",
87
+ keyConcepts: Array.isArray(parsed.keyConcepts) ? parsed.keyConcepts : [],
88
+ readingGuideEntries: Array.isArray(parsed.readingGuideEntries) ? parsed.readingGuideEntries : [],
89
+ };
90
+ }
91
+ catch {
92
+ if (verbose) {
93
+ verbose(`[${tag}] Chunk ${i + 1} response was not valid JSON, using fallback`);
94
+ }
95
+ return {
96
+ narrative: responseText.slice(0, 500),
97
+ keyConcepts: [],
98
+ readingGuideEntries: [],
99
+ };
100
+ }
101
+ })));
102
+ // Collect results; use empty fallback for failed chunks so synthesis still runs.
103
+ // Re-throw AuthExpiredError immediately.
104
+ const chunkSummaries = [];
105
+ for (let i = 0; i < settled.length; i++) {
106
+ const result = settled[i];
107
+ if (result.status === "fulfilled") {
108
+ chunkSummaries.push(result.value);
109
+ }
110
+ else {
111
+ if (result.reason instanceof AuthExpiredError) {
112
+ throw result.reason;
113
+ }
114
+ if (verbose) {
115
+ const msg = result.reason instanceof Error ? result.reason.message : String(result.reason);
116
+ verbose(`[${tag}] chunk ${i + 1}/${chunks.length} failed: ${msg}`);
117
+ }
118
+ chunkSummaries.push({ narrative: "", keyConcepts: [], readingGuideEntries: [] });
119
+ }
120
+ }
121
+ // 4. Synthesis session — combine chunk summaries into the final index.md
122
+ const synthesisPrompt = IndexAgent.buildSynthesisPrompt(params, chunkSummaries);
123
+ if (verbose) {
124
+ verbose(`[${tag}] Synthesis prompt: ${synthesisPrompt.length.toLocaleString()} chars`);
125
+ }
126
+ const synthesisSessionId = await this.createSession("Hem: index synthesis");
127
+ if (verbose) {
128
+ verbose(`[${tag}] Synthesis session created: ${synthesisSessionId}`);
129
+ }
130
+ await this.provider.prompt(synthesisSessionId, synthesisPrompt, { agent: "hem-index" });
131
+ if (verbose) {
132
+ verbose(`[${tag}] Agent completed (chunked: ${chunks.length} chunks + synthesis)`);
133
+ }
134
+ }
135
+ // ── Static helpers (pure functions, easy to unit test) ───────────────
136
+ /**
137
+ * Builds the index page prompt from exploration findings, group
138
+ * summaries, and the list of generated documentation files.
139
+ */
140
+ static buildPrompt(params) {
141
+ const { projectName, destinationPath, allFindings, allGroupSummaries, allDocFiles, } = params;
142
+ const parts = [];
143
+ // 1. System-level instructions
144
+ parts.push(`Generate a rich, narrative \`index.md\` that serves as the landing page`, `for the project's documentation. It should orient readers with a project`, `description, explain what the documentation covers, and provide a reading`, `guide.`, "", `**Write files directly using the edit tool.** Do NOT return Markdown content`, `in your response text. Instead, use the edit tool to create \`index.md\``, `in the destination directory. When you are done, stop.`, "");
145
+ // 2. Where to write
146
+ parts.push("## Destination", "", `Write the index page to: \`${destinationPath}/index.md\``, "");
147
+ // 3. TOC placeholder instruction
148
+ parts.push("## Table of contents placeholder", "", `The \`index.md\` MUST include the exact placeholder \`<!-- TOC -->\` on its`, `own line. The pipeline will replace this placeholder with an auto-generated,`, `navigable table of contents linking to every documentation page. Do NOT`, `generate your own links to individual doc pages — the placeholder handles that.`, "", "Example:", "", "```markdown", `# ${projectName} Documentation`, "", "Brief project description...", "", "## Reading guide", "", "How to navigate this documentation...", "", "<!-- TOC -->", "```", "");
149
+ // 4. Content guidelines
150
+ parts.push("## What to include", "", `1. **Project title as H1**: \`# ${projectName} Documentation\``, "", `2. **Project description**: 1-3 paragraphs explaining what ${projectName} is,`, ` what problem it solves, and who its audience is. Derive this from the`, ` exploration findings and generated documentation.`, "", `3. **Key concepts / terminology**: If the project uses domain-specific terms,`, ` briefly define the most important ones.`, "", `4. **Reading guide**: Help readers navigate the documentation. For example:`, ` - "New to the project? Start with [Architecture Overview](./architecture.md)."`, ` - Group documentation into logical themes and explain what each covers.`, "", `5. **The \`<!-- TOC -->\` placeholder**: Place this where you want the`, ` auto-generated table of contents to appear. Typically after your narrative`, ` content and before any closing remarks.`, "");
151
+ // 5. Quality standards
152
+ parts.push("## Quality standards", "", `- **Google Markdown style guide**: ATX headings, single H1, informative links.`, `- **No verbatim source code**: This is a documentation index, not a code reference.`, `- **Concise but informative**: Aim for a landing page that can be read in under 2 minutes.`, `- **Link to architecture.md**: If it exists, reference it in your reading guide.`, "");
153
+ // 6. Generated doc files for context
154
+ if (allDocFiles.length > 0) {
155
+ parts.push("## Generated documentation files", "");
156
+ parts.push("These documentation files have already been generated. Read them using tools", "to understand what the project is about and what content exists. Use this to", "write an informed project description and reading guide.", "");
157
+ for (const docFile of allDocFiles) {
158
+ parts.push(`- \`${destinationPath}/${docFile}\``);
159
+ }
160
+ parts.push("");
161
+ }
162
+ // 7. Exploration findings
163
+ parts.push("## Exploration findings", "");
164
+ if (allFindings.length > 0) {
165
+ parts.push("The exploration phase discovered these findings across all file groups.", "Use them to understand what the project does:", "");
166
+ for (const findings of allFindings) {
167
+ parts.push(`### Group: ${findings.groupId}`);
168
+ parts.push("");
169
+ parts.push(findings.text);
170
+ parts.push("");
171
+ }
172
+ }
173
+ else {
174
+ parts.push("No exploration findings available. Use tools to read the generated", "documentation files for context.", "");
175
+ }
176
+ // 8. File groups
177
+ parts.push("## File groups in this project", "");
178
+ if (allGroupSummaries.length > 0) {
179
+ for (const group of allGroupSummaries) {
180
+ parts.push(`- **${group.label}** (${group.id})`);
181
+ }
182
+ }
183
+ else {
184
+ parts.push("No file groups defined.");
185
+ }
186
+ parts.push("");
187
+ // 9. How to work
188
+ parts.push("## How to work", "");
189
+ parts.push(`1. **Read generated docs**: Use tools to read the generated documentation files`, ` to understand what the project is about.`, `2. **Read architecture.md**: If it exists, read it for the system overview.`, `3. **Write index.md**: Use the edit tool to write the index page.`, `4. **Include the TOC placeholder**: Remember to include \`<!-- TOC -->\` exactly once.`, "");
190
+ // 10. Done
191
+ parts.push("## When you are done", "");
192
+ parts.push(`After writing \`index.md\` using the edit tool, simply stop.`, `Do NOT return a JSON manifest or any other structured output.`, `The pipeline will read back \`index.md\` and replace the \`<!-- TOC -->\``, `placeholder with the auto-generated table of contents.`);
193
+ return parts.join("\n");
194
+ }
195
+ /**
196
+ * Splits exploration findings and group summaries into chunks that
197
+ * each produce a prompt under {@link INDEX_CHUNK_THRESHOLD} characters.
198
+ *
199
+ * The doc file list is NOT chunked — it is always included in the
200
+ * synthesis prompt (it is just a list of paths and stays small).
201
+ */
202
+ static chunkInputs(params) {
203
+ const { allFindings, allGroupSummaries } = params;
204
+ // Estimate the character cost of each finding and summary
205
+ const findingSizes = allFindings.map((f) => JSON.stringify(f).length);
206
+ const summarySizes = allGroupSummaries.map((s) => JSON.stringify(s).length);
207
+ const totalVariableSize = findingSizes.reduce((a, b) => a + b, 0) +
208
+ summarySizes.reduce((a, b) => a + b, 0);
209
+ // Base prompt overhead (system instructions, destination, TOC, etc.)
210
+ const baseOverhead = 3_000;
211
+ const budgetPerChunk = INDEX_CHUNK_THRESHOLD - baseOverhead;
212
+ // Calculate how many chunks we need
213
+ const numChunks = Math.max(1, Math.ceil(totalVariableSize / budgetPerChunk));
214
+ // Build a paired list sorted by size descending for balanced distribution
215
+ const items = [
216
+ ...findingSizes.map((size, index) => ({
217
+ type: "finding",
218
+ index,
219
+ size,
220
+ })),
221
+ ...summarySizes.map((size, index) => ({
222
+ type: "summary",
223
+ index,
224
+ size,
225
+ })),
226
+ ];
227
+ items.sort((a, b) => b.size - a.size);
228
+ // Greedy bin-packing: assign each item to the lightest chunk
229
+ const chunkBins = Array.from({ length: numChunks }, () => ({
230
+ findings: [],
231
+ summaries: [],
232
+ size: 0,
233
+ }));
234
+ for (const item of items) {
235
+ // Find the chunk with the smallest current size
236
+ let minIdx = 0;
237
+ for (let j = 1; j < chunkBins.length; j++) {
238
+ if (chunkBins[j].size < chunkBins[minIdx].size) {
239
+ minIdx = j;
240
+ }
241
+ }
242
+ const target = chunkBins[minIdx];
243
+ if (item.type === "finding") {
244
+ target.findings.push(allFindings[item.index]);
245
+ }
246
+ else {
247
+ target.summaries.push(allGroupSummaries[item.index]);
248
+ }
249
+ target.size += item.size;
250
+ }
251
+ // Filter out empty chunks (can happen if numChunks > items.length)
252
+ return chunkBins
253
+ .filter((c) => c.findings.length > 0 || c.summaries.length > 0)
254
+ .map(({ findings, summaries }) => ({ findings, summaries }));
255
+ }
256
+ /**
257
+ * Builds a prompt for a single chunk session.
258
+ *
259
+ * The chunk agent reads a subset of findings and group summaries and
260
+ * returns a JSON summary — it does NOT write `index.md`.
261
+ */
262
+ static buildChunkPrompt(params, chunkFindings, chunkSummaries, chunkIndex, totalChunks) {
263
+ const { projectName, destinationPath, allDocFiles } = params;
264
+ const parts = [];
265
+ parts.push(`Process chunk ${chunkIndex} of ${totalChunks} for the project **${projectName}**.`, `Analyze the exploration findings and file group summaries below, then return`, `a JSON summary.`, "", `**Do NOT write any files.** Do NOT use the edit tool. Return ONLY a JSON object`, `in your response.`, "");
266
+ // Context: doc files exist (for awareness, not for reading)
267
+ if (allDocFiles.length > 0) {
268
+ parts.push("## Documentation files (for context)", "");
269
+ parts.push(`These ${allDocFiles.length} documentation files have been generated.`, `You do not need to read them — just be aware they exist:`, "");
270
+ // Only list first 20 to keep the chunk prompt lean
271
+ const listed = allDocFiles.slice(0, 20);
272
+ for (const f of listed) {
273
+ parts.push(`- \`${destinationPath}/${f}\``);
274
+ }
275
+ if (allDocFiles.length > 20) {
276
+ parts.push(`- ... and ${allDocFiles.length - 20} more files`);
277
+ }
278
+ parts.push("");
279
+ }
280
+ // Findings for this chunk
281
+ parts.push("## Exploration findings (this chunk)", "");
282
+ if (chunkFindings.length > 0) {
283
+ for (const findings of chunkFindings) {
284
+ parts.push(`### Group: ${findings.groupId}`);
285
+ parts.push("");
286
+ parts.push(findings.text);
287
+ parts.push("");
288
+ }
289
+ }
290
+ else {
291
+ parts.push("No exploration findings in this chunk.", "");
292
+ }
293
+ // Group summaries for this chunk
294
+ parts.push("## File groups (this chunk)", "");
295
+ if (chunkSummaries.length > 0) {
296
+ for (const group of chunkSummaries) {
297
+ parts.push(`- **${group.label}** (${group.id})`);
298
+ }
299
+ }
300
+ else {
301
+ parts.push("No file groups in this chunk.");
302
+ }
303
+ parts.push("");
304
+ // Response format
305
+ parts.push("## Response format", "", "Return a JSON object with this structure:", "", "```json", "{", ' "narrative": "1-2 paragraphs describing what this chunk of the project does",', ' "keyConcepts": ["concept1", "concept2"],', ' "readingGuideEntries": ["Start with X to understand Y", "See Z for details on W"]', "}", "```", "", "Focus on extracting the most important themes and concepts from the", "findings and groups assigned to this chunk.");
306
+ return parts.join("\n");
307
+ }
308
+ /**
309
+ * Builds the synthesis prompt that combines chunk summaries into the
310
+ * final `index.md`. This prompt instructs the agent to write the file.
311
+ */
312
+ static buildSynthesisPrompt(params, chunkSummaries) {
313
+ const { projectName, destinationPath, allDocFiles } = params;
314
+ const parts = [];
315
+ // 1. System instructions (same as original buildPrompt)
316
+ parts.push(`Generate a rich, narrative \`index.md\` that serves as the landing page`, `for the project's documentation. It should orient readers with a project`, `description, explain what the documentation covers, and provide a reading`, `guide.`, "", `**Write files directly using the edit tool.** Do NOT return Markdown content`, `in your response text. Instead, use the edit tool to create \`index.md\``, `in the destination directory. When you are done, stop.`, "");
317
+ // 2. Where to write
318
+ parts.push("## Destination", "", `Write the index page to: \`${destinationPath}/index.md\``, "");
319
+ // 3. TOC placeholder (same as original)
320
+ parts.push("## Table of contents placeholder", "", `The \`index.md\` MUST include the exact placeholder \`<!-- TOC -->\` on its`, `own line. The pipeline will replace this placeholder with an auto-generated,`, `navigable table of contents linking to every documentation page. Do NOT`, `generate your own links to individual doc pages — the placeholder handles that.`, "", "Example:", "", "```markdown", `# ${projectName} Documentation`, "", "Brief project description...", "", "## Reading guide", "", "How to navigate this documentation...", "", "<!-- TOC -->", "```", "");
321
+ // 4. Content guidelines (same as original)
322
+ parts.push("## What to include", "", `1. **Project title as H1**: \`# ${projectName} Documentation\``, "", `2. **Project description**: 1-3 paragraphs explaining what ${projectName} is,`, ` what problem it solves, and who its audience is. Synthesize from the`, ` chunk summaries below.`, "", `3. **Key concepts / terminology**: Combine the key concepts from all chunks.`, "", `4. **Reading guide**: Combine the reading guide entries from all chunks into`, ` a coherent navigation guide.`, "", `5. **The \`<!-- TOC -->\` placeholder**: Place this where you want the`, ` auto-generated table of contents to appear.`, "");
323
+ // 5. Quality standards (same as original)
324
+ parts.push("## Quality standards", "", `- **Google Markdown style guide**: ATX headings, single H1, informative links.`, `- **No verbatim source code**: This is a documentation index, not a code reference.`, `- **Concise but informative**: Aim for a landing page that can be read in under 2 minutes.`, `- **Link to architecture.md**: If it exists, reference it in your reading guide.`, "");
325
+ // 6. Chunk summaries (the main input for synthesis)
326
+ parts.push("## Analysis summaries from exploration", "");
327
+ parts.push(`The project was analyzed in ${chunkSummaries.length} chunks. Here are the`, `summaries from each chunk. Synthesize these into a coherent project description`, `and reading guide:`, "");
328
+ for (let i = 0; i < chunkSummaries.length; i++) {
329
+ const cs = chunkSummaries[i];
330
+ parts.push(`### Chunk ${i + 1}`, "");
331
+ parts.push(`**Narrative**: ${cs.narrative}`, "");
332
+ if (cs.keyConcepts.length > 0) {
333
+ parts.push(`**Key concepts**: ${cs.keyConcepts.join(", ")}`, "");
334
+ }
335
+ if (cs.readingGuideEntries.length > 0) {
336
+ parts.push("**Reading guide entries**:");
337
+ for (const entry of cs.readingGuideEntries) {
338
+ parts.push(`- ${entry}`);
339
+ }
340
+ parts.push("");
341
+ }
342
+ }
343
+ // 7. Generated doc files (full list for context)
344
+ if (allDocFiles.length > 0) {
345
+ parts.push("## Generated documentation files", "");
346
+ parts.push("These documentation files have been generated. You may read them using", "tools for additional context:", "");
347
+ for (const docFile of allDocFiles) {
348
+ parts.push(`- \`${destinationPath}/${docFile}\``);
349
+ }
350
+ parts.push("");
351
+ }
352
+ // 8. How to work
353
+ parts.push("## How to work", "");
354
+ parts.push(`1. **Review the chunk summaries above**: They contain the synthesized analysis.`, `2. **Optionally read generated docs**: For additional context.`, `3. **Write index.md**: Use the edit tool to write the index page.`, `4. **Include the TOC placeholder**: Remember to include \`<!-- TOC -->\` exactly once.`, "");
355
+ // 9. Done
356
+ parts.push("## When you are done", "");
357
+ parts.push(`After writing \`index.md\` using the edit tool, simply stop.`, `Do NOT return a JSON manifest or any other structured output.`, `The pipeline will read back \`index.md\` and replace the \`<!-- TOC -->\``, `placeholder with the auto-generated table of contents.`);
358
+ return parts.join("\n");
359
+ }
360
+ }
@@ -0,0 +1,144 @@
1
+ /**
2
+ * LLM-assisted organization agent for Hem.
3
+ *
4
+ * Post-processing agent that runs AFTER all doc agents complete.
5
+ * Reviews all written documentation files, restructures/renames/deduplicates
6
+ * for consistency.
7
+ *
8
+ * Architecture (v3 — parallel workers with broadcast):
9
+ * - Absorbs the old DeduplicationAgent's responsibility.
10
+ * - For large file sets (>8 files), splits work across N parallel workers.
11
+ * - Workers communicate via an MCP broadcast tool + prompt injection.
12
+ * - The orchestrator intercepts broadcast tool calls via SSE and relays
13
+ * messages to all peer workers + their active subagents.
14
+ * - For small file sets (≤8 files), falls back to the single-agent path.
15
+ * - The pipeline discovers the final file set by scanning disk afterward.
16
+ */
17
+ import type { Provider } from "../providers/types.js";
18
+ import { BaseAgent } from "./base-agent.js";
19
+ /** File count threshold: use parallel workers above this, single agent below. */
20
+ export declare const PARALLEL_THRESHOLD = 8;
21
+ /**
22
+ * Hard ceiling on parallel org workers. The actual worker count is
23
+ * `min(MAX_WORKERS, computeOrgWorkers(fileCount), computeMaxConcurrency())`.
24
+ * The arbiter is excluded from this cap (it is lightweight).
25
+ */
26
+ export declare const MAX_WORKERS = 7;
27
+ /**
28
+ * MCP tool name as it appears in SSE events.
29
+ * Format: "{mcp-server-name}_{tool-name}"
30
+ */
31
+ export declare const BROADCAST_TOOL_NAME = "hem-broadcast_broadcast";
32
+ /** Parameters for the single-agent organization prompt. */
33
+ export interface OrgPromptParams {
34
+ /** Project name. */
35
+ projectName: string;
36
+ /** Absolute path to the destination directory. */
37
+ destinationPath: string;
38
+ /** Relative paths of all generated documentation files. */
39
+ allDocFiles: string[];
40
+ }
41
+ /** Parameters for a parallel org worker prompt. */
42
+ export interface OrgWorkerPromptParams {
43
+ /** Project name. */
44
+ projectName: string;
45
+ /** Absolute path to the destination directory. */
46
+ destinationPath: string;
47
+ /** Files assigned to THIS worker (relative paths). */
48
+ assignedFiles: string[];
49
+ /** ALL documentation files across all workers (for cross-reference awareness). */
50
+ allDocFiles: string[];
51
+ /** Label for this worker, e.g. "org-worker-1". */
52
+ workerLabel: string;
53
+ /** Total number of parallel workers. */
54
+ totalWorkers: number;
55
+ }
56
+ /** A single worker assignment: label → file list. */
57
+ export interface WorkerAssignment {
58
+ label: string;
59
+ files: string[];
60
+ }
61
+ /**
62
+ * An agent that reviews all generated documentation and restructures
63
+ * for consistency — deduplicating, renaming, and reorganizing files.
64
+ *
65
+ * Writes/edits files directly via the edit tool. The pipeline discovers
66
+ * the final file set by scanning disk afterward.
67
+ */
68
+ export declare class OrganizationAgent extends BaseAgent {
69
+ constructor(provider: Provider);
70
+ /**
71
+ * Run the organization pass over all generated documentation.
72
+ * Automatically selects single-agent or parallel mode based on file count.
73
+ *
74
+ * @param params - Organization parameters including file paths.
75
+ * @param verbose - Optional logging callback (writes to stderr).
76
+ * @throws If session creation or prompting fails.
77
+ */
78
+ run(params: OrgPromptParams, verbose?: (msg: string) => void): Promise<void>;
79
+ /**
80
+ * Single-agent organization pass (original behavior).
81
+ * Used when file count is ≤ PARALLEL_THRESHOLD.
82
+ */
83
+ runSingle(params: OrgPromptParams, verbose?: (msg: string) => void): Promise<void>;
84
+ /**
85
+ * Parallel organization pass using multiple workers with an arbiter.
86
+ *
87
+ * 1. Computes worker count from resource limits (arbiter excluded).
88
+ * 2. Assigns files to workers via round-robin.
89
+ * 3. Subscribes to SSE events for broadcast interception.
90
+ * 4. Creates an arbiter session (long-lived coordinator).
91
+ * 5. Creates N worker sessions in parallel.
92
+ * 6. Relays broadcasts with targeted routing:
93
+ * - Worker → arbiter only.
94
+ * - Arbiter → @tagged worker(s) only (or all if @all-workers / no tag).
95
+ * - Completed sessions are excluded from relay.
96
+ * 7. Kills worker sessions immediately on completion (abort + delete).
97
+ * 8. Intercepts RECALL: broadcasts to respawn a worker for fixes.
98
+ * 9. Sends a final prompt to the arbiter so it can wrap up.
99
+ * 10. Kills the arbiter session.
100
+ */
101
+ runParallel(params: OrgPromptParams, verbose?: (msg: string) => void): Promise<void>;
102
+ /**
103
+ * Kill a session: abort any running work, then delete the session.
104
+ * Best-effort — failures are logged but not thrown.
105
+ */
106
+ private killSession;
107
+ /**
108
+ * Spawn a recalled worker session to apply specific fixes.
109
+ *
110
+ * Called when the arbiter broadcasts `RECALL: @org-worker-N <instructions>`.
111
+ * Creates a new session with the worker's original file assignment and a
112
+ * focused prompt containing the fix instructions. The session is killed
113
+ * immediately after completion.
114
+ */
115
+ private runRecalledWorker;
116
+ /**
117
+ * Builds the single-agent organization prompt (original behavior).
118
+ */
119
+ static buildPrompt(params: OrgPromptParams): string;
120
+ /**
121
+ * Builds the prompt for a parallel org worker.
122
+ *
123
+ * Each worker gets:
124
+ * - A scoped identity (e.g. "org-worker-1 of 3")
125
+ * - Its assigned file subset
126
+ * - The full file list for cross-reference awareness
127
+ * - Instructions for using the broadcast tool
128
+ * - Scoped task list (only edit YOUR files)
129
+ */
130
+ static buildWorkerPrompt(params: OrgWorkerPromptParams): string;
131
+ }
132
+ /**
133
+ * Assigns documentation files to workers using round-robin distribution.
134
+ *
135
+ * Files are sorted alphabetically first so that files in the same
136
+ * directory tend to land on adjacent workers, preserving some locality.
137
+ * Then they are dealt out in order: file 0 → worker 1, file 1 → worker 2,
138
+ * …, wrapping around. This guarantees perfectly balanced workloads (±1 file).
139
+ *
140
+ * @param files - Relative file paths (e.g. "auth/overview.md").
141
+ * @param maxWorkers - Maximum number of workers to create.
142
+ * @returns An array of worker assignments, each with a label and file list.
143
+ */
144
+ export declare function assignFilesToWorkers(files: string[], maxWorkers: number): WorkerAssignment[];