@pruddiman/hem 0.0.1-beta-5671db0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/LICENSE +21 -0
  2. package/dist/agents/arbiter-agent.d.ts +72 -0
  3. package/dist/agents/arbiter-agent.js +149 -0
  4. package/dist/agents/architecture-agent.d.ts +148 -0
  5. package/dist/agents/architecture-agent.js +459 -0
  6. package/dist/agents/base-agent.d.ts +44 -0
  7. package/dist/agents/base-agent.js +57 -0
  8. package/dist/agents/crossref-agent.d.ts +140 -0
  9. package/dist/agents/crossref-agent.js +560 -0
  10. package/dist/agents/crossref-arbiter-agent.d.ts +72 -0
  11. package/dist/agents/crossref-arbiter-agent.js +147 -0
  12. package/dist/agents/documentation-agent.d.ts +55 -0
  13. package/dist/agents/documentation-agent.js +159 -0
  14. package/dist/agents/exploration-agent.d.ts +58 -0
  15. package/dist/agents/exploration-agent.js +102 -0
  16. package/dist/agents/grouping-agent.d.ts +167 -0
  17. package/dist/agents/grouping-agent.js +557 -0
  18. package/dist/agents/index-agent.d.ts +86 -0
  19. package/dist/agents/index-agent.js +360 -0
  20. package/dist/agents/organization-agent.d.ts +144 -0
  21. package/dist/agents/organization-agent.js +607 -0
  22. package/dist/auth.d.ts +372 -0
  23. package/dist/auth.js +1072 -0
  24. package/dist/broadcast-mcp.d.ts +21 -0
  25. package/dist/broadcast-mcp.js +59 -0
  26. package/dist/changelog.d.ts +85 -0
  27. package/dist/changelog.js +223 -0
  28. package/dist/decision-queue.d.ts +173 -0
  29. package/dist/decision-queue.js +265 -0
  30. package/dist/diff-scope.d.ts +24 -0
  31. package/dist/diff-scope.js +28 -0
  32. package/dist/discovery.d.ts +54 -0
  33. package/dist/discovery.js +405 -0
  34. package/dist/grouping.d.ts +37 -0
  35. package/dist/grouping.js +343 -0
  36. package/dist/helpers/format.d.ts +5 -0
  37. package/dist/helpers/format.js +13 -0
  38. package/dist/helpers/index.d.ts +11 -0
  39. package/dist/helpers/index.js +11 -0
  40. package/dist/helpers/parsing.d.ts +52 -0
  41. package/dist/helpers/parsing.js +128 -0
  42. package/dist/helpers/paths.d.ts +41 -0
  43. package/dist/helpers/paths.js +67 -0
  44. package/dist/helpers/strings.d.ts +45 -0
  45. package/dist/helpers/strings.js +97 -0
  46. package/dist/index.d.ts +135 -0
  47. package/dist/index.js +1087 -0
  48. package/dist/merge-utils.d.ts +22 -0
  49. package/dist/merge-utils.js +34 -0
  50. package/dist/orchestrator.d.ts +194 -0
  51. package/dist/orchestrator.js +1169 -0
  52. package/dist/output.d.ts +106 -0
  53. package/dist/output.js +243 -0
  54. package/dist/progress.d.ts +228 -0
  55. package/dist/progress.js +644 -0
  56. package/dist/providers/copilot.d.ts +247 -0
  57. package/dist/providers/copilot.js +598 -0
  58. package/dist/providers/index.d.ts +15 -0
  59. package/dist/providers/index.js +12 -0
  60. package/dist/providers/opencode.d.ts +156 -0
  61. package/dist/providers/opencode.js +416 -0
  62. package/dist/providers/types.d.ts +156 -0
  63. package/dist/providers/types.js +16 -0
  64. package/dist/resources.d.ts +76 -0
  65. package/dist/resources.js +151 -0
  66. package/dist/search-index.d.ts +71 -0
  67. package/dist/search-index.js +187 -0
  68. package/dist/search-mcp.d.ts +25 -0
  69. package/dist/search-mcp.js +100 -0
  70. package/dist/server-utils.d.ts +56 -0
  71. package/dist/server-utils.js +135 -0
  72. package/dist/session.d.ts +227 -0
  73. package/dist/session.js +370 -0
  74. package/dist/types.d.ts +272 -0
  75. package/dist/types.js +5 -0
  76. package/dist/worktree.d.ts +82 -0
  77. package/dist/worktree.js +187 -0
  78. package/package.json +45 -0
@@ -0,0 +1,557 @@
1
+ /**
2
+ * LLM-assisted file grouping agent for Hem.
3
+ *
4
+ * Uses an OpenCode session to analyze source files and produce semantically
5
+ * meaningful documentation groups. Falls back gracefully — returns `null`
6
+ * on any failure so the caller can use heuristic grouping instead.
7
+ *
8
+ * The agent:
9
+ * 1. Reads file contents and extracts import dependencies (programmatic).
10
+ * 2. Builds a structured prompt with file summaries + dependency graph.
11
+ * 3. Sends the prompt to an OpenCode session.
12
+ * 4. Parses the strongly-typed JSON response.
13
+ * 5. Maps results to `FileGroup[]` with hallucination guards.
14
+ */
15
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
16
+ import { join } from "node:path";
17
+ import { createHash } from "node:crypto";
18
+ import { extractJSON } from "../helpers/parsing.js";
19
+ import { commonDirectory } from "../grouping.js";
20
+ import { toKebabCase } from "../helpers/strings.js";
21
+ import { AuthExpiredError } from "../auth.js";
22
+ import { BaseAgent } from "./base-agent.js";
23
+ import pLimit from "p-limit";
24
+ import { computeMaxConcurrency } from "../resources.js";
25
+ /** File name for the on-disk grouping result cache. */
26
+ export const GROUPING_CACHE_FILE = "grouping-cache.json";
27
+ /**
28
+ * File count threshold: when there are more files than this, use chunked
29
+ * grouping (split into batches → parallel LLM sessions → merge results).
30
+ * Below this threshold, the original single-prompt approach is used.
31
+ */
32
+ export const GROUPING_CHUNK_THRESHOLD = 200;
33
+ /**
34
+ * Target number of files per batch when chunking. Batches are sized to
35
+ * stay well within LLM context limits (~100 files × 40 lines each ≈ 4000
36
+ * lines of context, comfortably under typical 128K-200K token windows).
37
+ */
38
+ export const FILES_PER_CHUNK = 100;
39
+ // ── Agent ───────────────────────────────────────────────────────────────
40
+ /**
41
+ * An agent that uses an LLM to group source files into cohesive
42
+ * documentation topics.
43
+ */
44
+ export class GroupingAgent extends BaseAgent {
45
+ projectName;
46
+ constructor(provider, projectName) {
47
+ super(provider);
48
+ this.projectName = projectName;
49
+ }
50
+ /**
51
+ * Run the full grouping pipeline: read → analyze → prompt → parse → map.
52
+ *
53
+ * @param files - Discovered source files to group.
54
+ * @param verbose - Optional logging callback (writes to stderr).
55
+ * @param cacheDir - Optional directory for the grouping result cache
56
+ * (typically the `.hem` directory in the project root).
57
+ * When provided, a cache hit skips the LLM call entirely.
58
+ * @returns `FileGroup[]` on success, `null` on any failure.
59
+ */
60
+ async run(files, verbose, cacheDir) {
61
+ try {
62
+ // ── Cache check (fast path) ────────────────────────────────────
63
+ const filesHash = GroupingAgent.computeFilesHash(files);
64
+ if (cacheDir) {
65
+ const cached = await GroupingAgent.loadGroupingCache(cacheDir, filesHash);
66
+ if (cached) {
67
+ if (verbose) {
68
+ verbose(`[grouping-agent] Cache hit (${filesHash.slice(0, 8)}…): skipping LLM call`);
69
+ }
70
+ const filesByPath = new Map(files.map((f) => [f.path, f]));
71
+ const groups = GroupingAgent.mapToFileGroups(cached, filesByPath);
72
+ if (groups.length > 0)
73
+ return groups;
74
+ if (verbose) {
75
+ verbose(`[grouping-agent] Cached results produced no valid groups; re-running LLM`);
76
+ }
77
+ }
78
+ }
79
+ // 1. Read file contents
80
+ const fileContents = await GroupingAgent.readFiles(files);
81
+ if (verbose) {
82
+ const totalBytes = fileContents.reduce((sum, f) => sum + f.content.length, 0);
83
+ verbose(`[grouping-agent] Read ${fileContents.length} files (${totalBytes.toLocaleString()} bytes)`);
84
+ }
85
+ // 2. Analyze imports
86
+ const imports = GroupingAgent.analyzeImports(fileContents);
87
+ if (verbose) {
88
+ const totalImports = [...imports.values()].reduce((sum, deps) => sum + deps.length, 0);
89
+ verbose(`[grouping-agent] Found ${totalImports} import relationships`);
90
+ }
91
+ // 3. Run LLM — chunked path for large projects, single-prompt otherwise
92
+ let rawResults;
93
+ if (fileContents.length > GROUPING_CHUNK_THRESHOLD) {
94
+ if (verbose) {
95
+ verbose(`[grouping-agent] File count (${fileContents.length}) exceeds threshold (${GROUPING_CHUNK_THRESHOLD}), using chunked grouping`);
96
+ }
97
+ const chunkedResults = await this.runChunkedRaw(fileContents, imports, verbose);
98
+ if (!chunkedResults)
99
+ return null;
100
+ rawResults = chunkedResults;
101
+ }
102
+ else {
103
+ // Single-prompt path
104
+ const prompt = GroupingAgent.buildPrompt(this.projectName, fileContents, imports);
105
+ if (verbose) {
106
+ verbose(`[grouping-agent] Prompt: ${prompt.length.toLocaleString()} chars`);
107
+ }
108
+ const sessionId = await this.createSession(`Hem: grouping — ${this.projectName}`);
109
+ if (verbose) {
110
+ verbose(`[grouping-agent] Session created: ${sessionId}`);
111
+ }
112
+ const response = await this.provider.prompt(sessionId, prompt, { agent: "hem-group" }) ?? "";
113
+ if (verbose) {
114
+ verbose(`[grouping-agent] Response: ${response.length.toLocaleString()} chars`);
115
+ }
116
+ const parsed = GroupingAgent.parseResponse(response);
117
+ if (!parsed) {
118
+ if (verbose) {
119
+ verbose(`[grouping-agent] Failed to parse JSON from response. First 500 chars: ${response.slice(0, 500)}`);
120
+ }
121
+ return null;
122
+ }
123
+ if (verbose) {
124
+ verbose(`[grouping-agent] Parsed ${parsed.length} groups from LLM`);
125
+ }
126
+ rawResults = parsed;
127
+ }
128
+ // 4. Save to cache (best-effort — never fail the pipeline on write errors)
129
+ if (cacheDir) {
130
+ GroupingAgent.saveGroupingCache(cacheDir, filesHash, rawResults).catch(() => { });
131
+ }
132
+ // 5. Map to FileGroup[]
133
+ const filesByPath = new Map(files.map((f) => [f.path, f]));
134
+ const groups = GroupingAgent.mapToFileGroups(rawResults, filesByPath);
135
+ if (groups.length === 0) {
136
+ if (verbose) {
137
+ verbose(`[grouping-agent] No valid groups after mapping (all file paths unrecognized?)`);
138
+ }
139
+ return null;
140
+ }
141
+ if (verbose) {
142
+ for (const g of groups) {
143
+ verbose(`[grouping-agent] Group "${g.label}" (${g.type}): ${g.files.map((f) => f.path).join(", ")}`);
144
+ }
145
+ }
146
+ return groups;
147
+ }
148
+ catch (err) {
149
+ // Re-throw auth errors — those should not be swallowed.
150
+ if (err instanceof AuthExpiredError) {
151
+ throw err;
152
+ }
153
+ if (verbose) {
154
+ const msg = err instanceof Error ? err.message : String(err);
155
+ verbose(`[grouping-agent] Error: ${msg}`);
156
+ }
157
+ return null;
158
+ }
159
+ }
160
+ /**
161
+ * Chunked grouping for large file sets.
162
+ *
163
+ * Splits files into batches that fit within context limits, runs
164
+ * multiple grouping sessions in parallel, and merges the results.
165
+ * Returns raw {@link GroupingResult}[] (before `mapToFileGroups`).
166
+ */
167
+ async runChunkedRaw(fileContents, imports, verbose) {
168
+ // 1. Split files into chunks
169
+ const chunks = GroupingAgent.splitIntoChunks(fileContents, FILES_PER_CHUNK);
170
+ if (verbose) {
171
+ verbose(`[grouping-agent] Split ${fileContents.length} files into ${chunks.length} chunks`);
172
+ }
173
+ // 2. Run grouping sessions in parallel, respecting resource limits
174
+ const concurrency = Math.max(1, computeMaxConcurrency());
175
+ const limit = pLimit(concurrency);
176
+ if (verbose) {
177
+ verbose(`[grouping-agent] Running ${chunks.length} chunk sessions (concurrency: ${concurrency})`);
178
+ }
179
+ const chunkResults = await Promise.allSettled(chunks.map((chunk, idx) => limit(async () => {
180
+ const chunkLabel = `chunk-${idx + 1}/${chunks.length}`;
181
+ if (verbose) {
182
+ verbose(`[grouping-agent] Starting ${chunkLabel} (${chunk.length} files)`);
183
+ }
184
+ // Build a per-chunk import map (only imports relevant to this chunk's files)
185
+ const chunkPaths = new Set(chunk.map((f) => f.path));
186
+ const chunkImports = new Map();
187
+ for (const [file, deps] of imports) {
188
+ if (chunkPaths.has(file)) {
189
+ chunkImports.set(file, deps);
190
+ }
191
+ }
192
+ const prompt = GroupingAgent.buildPrompt(this.projectName, chunk, chunkImports);
193
+ if (verbose) {
194
+ verbose(`[grouping-agent] ${chunkLabel} prompt: ${prompt.length.toLocaleString()} chars`);
195
+ }
196
+ const sessionId = await this.createSession(`Hem: grouping ${chunkLabel} — ${this.projectName}`);
197
+ const response = await this.provider.prompt(sessionId, prompt, { agent: "hem-group" }) ?? "";
198
+ const results = GroupingAgent.parseResponse(response);
199
+ if (!results) {
200
+ if (verbose) {
201
+ verbose(`[grouping-agent] ${chunkLabel} failed to parse JSON. First 500 chars: ${response.slice(0, 500)}`);
202
+ }
203
+ return [];
204
+ }
205
+ if (verbose) {
206
+ verbose(`[grouping-agent] ${chunkLabel} produced ${results.length} groups`);
207
+ }
208
+ return results;
209
+ })));
210
+ // 3. Collect successful results, log failures
211
+ const allResults = [];
212
+ for (let i = 0; i < chunkResults.length; i++) {
213
+ const result = chunkResults[i];
214
+ if (result.status === "fulfilled") {
215
+ allResults.push(...result.value);
216
+ }
217
+ else {
218
+ // Re-throw auth errors
219
+ if (result.reason instanceof AuthExpiredError) {
220
+ throw result.reason;
221
+ }
222
+ if (verbose) {
223
+ const msg = result.reason instanceof Error
224
+ ? result.reason.message
225
+ : String(result.reason);
226
+ verbose(`[grouping-agent] chunk-${i + 1}/${chunks.length} failed: ${msg}`);
227
+ }
228
+ }
229
+ }
230
+ if (allResults.length === 0) {
231
+ if (verbose) {
232
+ verbose(`[grouping-agent] All chunks failed or produced no groups`);
233
+ }
234
+ return null;
235
+ }
236
+ // 4. Merge overlapping groups from different chunks
237
+ const merged = GroupingAgent.mergeGroupingResults(allResults);
238
+ if (verbose) {
239
+ verbose(`[grouping-agent] Merged ${allResults.length} groups into ${merged.length} groups`);
240
+ }
241
+ return merged;
242
+ }
243
+ // ── Static helpers (pure functions, easy to unit test) ───────────────
244
+ /**
245
+ * Read file contents for all files. Files that fail to read get an
246
+ * error placeholder.
247
+ */
248
+ static async readFiles(files) {
249
+ const results = [];
250
+ for (const file of files) {
251
+ try {
252
+ const content = await readFile(file.absolutePath, "utf-8");
253
+ results.push({ path: file.path, content, size: file.size });
254
+ }
255
+ catch {
256
+ results.push({
257
+ path: file.path,
258
+ content: "[Error: unable to read file]",
259
+ size: file.size,
260
+ });
261
+ }
262
+ }
263
+ return results;
264
+ }
265
+ /**
266
+ * Extract import dependencies from file contents via regex.
267
+ *
268
+ * Handles:
269
+ * - `import ... from "./foo.js"`
270
+ * - `import ... from "../bar/baz.js"`
271
+ * - `import("./dynamic.js")`
272
+ * - `require("./cjs.js")`
273
+ *
274
+ * Only tracks local (relative) imports — ignores node_modules.
275
+ *
276
+ * @returns Map from file path → array of imported file paths.
277
+ */
278
+ static analyzeImports(fileContents) {
279
+ const imports = new Map();
280
+ // Build a set of known file basenames (without extension) for resolution
281
+ const knownFiles = new Set(fileContents.map((f) => f.path));
282
+ for (const file of fileContents) {
283
+ const deps = [];
284
+ // Match: import ... from "./path" or import ... from "../path"
285
+ const staticImportPattern = /(?:import|export)\s+.*?\s+from\s+["'](\.[^"']+)["']/g;
286
+ // Match: import("./path") or require("./path")
287
+ const dynamicImportPattern = /(?:import|require)\s*\(\s*["'](\.[^"']+)["']\s*\)/g;
288
+ for (const pattern of [staticImportPattern, dynamicImportPattern]) {
289
+ let match;
290
+ while ((match = pattern.exec(file.content)) !== null) {
291
+ const specifier = match[1];
292
+ // Resolve relative to the file's directory
293
+ const resolved = resolveImportPath(file.path, specifier, knownFiles);
294
+ if (resolved) {
295
+ deps.push(resolved);
296
+ }
297
+ }
298
+ }
299
+ if (deps.length > 0) {
300
+ imports.set(file.path, [...new Set(deps)]);
301
+ }
302
+ }
303
+ return imports;
304
+ }
305
+ /**
306
+ * Build the grouping prompt from file summaries and import graph.
307
+ *
308
+ * Includes the first 40 lines of each file plus the dependency graph
309
+ * so the LLM can make informed grouping decisions.
310
+ */
311
+ static buildPrompt(projectName, fileContents, imports) {
312
+ const parts = [];
313
+ // System instructions
314
+ parts.push(`Analyze source files for the "${projectName}" project and group them for documentation.`, "");
315
+ // File summaries (first 40 lines each)
316
+ parts.push("## File Summaries", "");
317
+ for (const file of fileContents) {
318
+ const lines = file.content.split("\n");
319
+ const summary = lines.slice(0, 40).join("\n");
320
+ const truncated = lines.length > 40 ? ` (showing 40/${lines.length} lines)` : "";
321
+ parts.push(`### \`${file.path}\` (${file.size.toLocaleString()} bytes${truncated})`, "", "```", summary, "```", "");
322
+ }
323
+ // Import dependencies
324
+ if (imports.size > 0) {
325
+ parts.push("## Import Dependencies", "");
326
+ for (const [file, deps] of imports) {
327
+ parts.push(`- ${file} → ${deps.join(", ")}`);
328
+ }
329
+ parts.push("");
330
+ }
331
+ // Task
332
+ parts.push("## Task", "", "Group these files into cohesive documentation topics. Each group becomes one documentation page.", "");
333
+ // Rules
334
+ parts.push("## Rules", "", "- 2-6 files per group", "- Files MAY appear in multiple groups when they are central to multiple features", "- Prefer feature-based (\"vertical\") groups that bundle related functionality across layers", "- Use \"horizontal\" type only for cross-cutting concerns (e.g., shared utilities, type definitions)", "- Every file must appear in at least one group", "");
335
+ // Output format
336
+ parts.push("## Output", "", "Respond with ONLY a JSON array inside a ```json code fence. No other text before or after the fence.", "", "Each element must match this exact schema:", "```json", JSON.stringify({
337
+ id: "kebab-case-identifier",
338
+ label: "Human Readable Name",
339
+ type: "vertical | horizontal",
340
+ files: ["relative/path.ts"],
341
+ rationale: "1-sentence explanation",
342
+ }, null, 2), "```");
343
+ return parts.join("\n");
344
+ }
345
+ /**
346
+ * Extract and validate JSON from the LLM response.
347
+ *
348
+ * Tries to find a fenced ```json block first, then falls back to
349
+ * parsing the entire response as JSON.
350
+ *
351
+ * @returns Validated array of `GroupingResult`, or `null` if invalid.
352
+ */
353
+ static parseResponse(response) {
354
+ try {
355
+ // Extract JSON from LLM response (handles fenced blocks, preamble text)
356
+ const raw = extractJSON(response);
357
+ const parsed = JSON.parse(raw);
358
+ if (!Array.isArray(parsed))
359
+ return null;
360
+ // Validate each element
361
+ for (const item of parsed) {
362
+ if (typeof item !== "object" ||
363
+ item === null ||
364
+ typeof item.id !== "string" ||
365
+ typeof item.label !== "string" ||
366
+ typeof item.type !== "string" ||
367
+ !Array.isArray(item.files) ||
368
+ typeof item.rationale !== "string") {
369
+ return null;
370
+ }
371
+ if (!["vertical", "horizontal"].includes(item.type))
372
+ return null;
373
+ if (item.files.length === 0)
374
+ return null;
375
+ if (!item.files.every((f) => typeof f === "string")) {
376
+ return null;
377
+ }
378
+ }
379
+ return parsed;
380
+ }
381
+ catch {
382
+ return null;
383
+ }
384
+ }
385
+ /**
386
+ * Map validated LLM results to `FileGroup[]`.
387
+ *
388
+ * Resolves file paths against the known file set, skipping any
389
+ * paths the LLM hallucinated. Computes `directory` via
390
+ * `commonDirectory()` from `grouping.ts`.
391
+ */
392
+ static mapToFileGroups(results, filesByPath) {
393
+ const groups = [];
394
+ for (const result of results) {
395
+ // Resolve file paths — skip any the LLM hallucinated
396
+ const resolvedFiles = [];
397
+ for (const filePath of result.files) {
398
+ const info = filesByPath.get(filePath);
399
+ if (info) {
400
+ resolvedFiles.push(info);
401
+ }
402
+ }
403
+ // Skip groups with fewer than 2 resolved files
404
+ if (resolvedFiles.length < 2)
405
+ continue;
406
+ groups.push({
407
+ id: toKebabCase(result.id),
408
+ label: result.label,
409
+ type: result.type,
410
+ files: resolvedFiles.sort((a, b) => a.path.localeCompare(b.path)),
411
+ directory: commonDirectory(resolvedFiles),
412
+ });
413
+ }
414
+ return groups;
415
+ }
416
+ /**
417
+ * Split file contents into batches of approximately `chunkSize` files.
418
+ *
419
+ * Files are split sequentially — no shuffling — so files that are
420
+ * close in the directory listing stay together, improving the LLM's
421
+ * ability to detect intra-chunk relationships.
422
+ */
423
+ static splitIntoChunks(fileContents, chunkSize) {
424
+ const chunks = [];
425
+ for (let i = 0; i < fileContents.length; i += chunkSize) {
426
+ chunks.push(fileContents.slice(i, i + chunkSize));
427
+ }
428
+ return chunks;
429
+ }
430
+ /**
431
+ * Merge grouping results from multiple chunks.
432
+ *
433
+ * Groups from different chunks may overlap — e.g., two chunks may each
434
+ * produce an "Authentication" group containing different files. This
435
+ * method merges groups with the same `id` (after kebab-case normalization)
436
+ * by combining their file lists and deduplicating.
437
+ *
438
+ * When groups share the same normalized ID:
439
+ * - File lists are unioned (deduplicated).
440
+ * - The label and type from the first occurrence are kept.
441
+ * - Rationales are concatenated.
442
+ *
443
+ * @param results - All `GroupingResult[]` from every chunk (flattened).
444
+ * @returns Merged array of `GroupingResult`.
445
+ */
446
+ static mergeGroupingResults(results) {
447
+ const merged = new Map();
448
+ for (const result of results) {
449
+ const normalizedId = toKebabCase(result.id);
450
+ const existing = merged.get(normalizedId);
451
+ if (existing) {
452
+ // Merge file lists (deduplicate)
453
+ const fileSet = new Set([...existing.files, ...result.files]);
454
+ existing.files = [...fileSet];
455
+ // Append rationale if different
456
+ if (!existing.rationale.includes(result.rationale)) {
457
+ existing.rationale = `${existing.rationale}; ${result.rationale}`;
458
+ }
459
+ }
460
+ else {
461
+ // First occurrence — clone to avoid mutating input
462
+ merged.set(normalizedId, {
463
+ id: result.id,
464
+ label: result.label,
465
+ type: result.type,
466
+ files: [...result.files],
467
+ rationale: result.rationale,
468
+ });
469
+ }
470
+ }
471
+ return [...merged.values()];
472
+ }
473
+ // ── Grouping cache helpers ────────────────────────────────────────────
474
+ /**
475
+ * Compute a stable hash of the file list for cache invalidation.
476
+ * Uses path + size so renames and size changes invalidate the cache.
477
+ */
478
+ static computeFilesHash(files) {
479
+ const key = files
480
+ .map((f) => `${f.path}:${f.size}`)
481
+ .sort()
482
+ .join("\n");
483
+ return createHash("sha256").update(key).digest("hex");
484
+ }
485
+ /**
486
+ * Attempt to load a valid cache entry from `cacheDir`.
487
+ * Returns `null` on miss, parse error, or hash mismatch.
488
+ */
489
+ static async loadGroupingCache(cacheDir, filesHash) {
490
+ try {
491
+ const cacheFile = join(cacheDir, GROUPING_CACHE_FILE);
492
+ const raw = await readFile(cacheFile, "utf-8");
493
+ const parsed = JSON.parse(raw);
494
+ if (typeof parsed !== "object" ||
495
+ parsed === null ||
496
+ parsed.version !== 1 ||
497
+ parsed.filesHash !== filesHash ||
498
+ !Array.isArray(parsed.results)) {
499
+ return null;
500
+ }
501
+ return parsed.results;
502
+ }
503
+ catch {
504
+ return null;
505
+ }
506
+ }
507
+ /**
508
+ * Write grouping results to the cache file (best-effort, never throws).
509
+ */
510
+ static async saveGroupingCache(cacheDir, filesHash, results) {
511
+ const cache = { version: 1, filesHash, results };
512
+ await mkdir(cacheDir, { recursive: true });
513
+ await writeFile(join(cacheDir, GROUPING_CACHE_FILE), JSON.stringify(cache, null, 2), "utf-8");
514
+ }
515
+ }
516
+ // ── Internal helpers ────────────────────────────────────────────────────
517
+ /**
518
+ * Resolve a relative import specifier to a known file path.
519
+ *
520
+ * Handles `.js` extension stripping (TypeScript sources import `.js`
521
+ * but the actual files are `.ts`/`.tsx`).
522
+ */
523
+ function resolveImportPath(fromFile, specifier, knownFiles) {
524
+ // Get directory of the importing file
525
+ const parts = fromFile.split("/");
526
+ parts.pop(); // remove filename
527
+ const dir = parts.join("/");
528
+ // Resolve the specifier relative to the importing file
529
+ const segments = (dir ? dir + "/" + specifier : specifier).split("/");
530
+ const resolved = [];
531
+ for (const seg of segments) {
532
+ if (seg === "." || seg === "")
533
+ continue;
534
+ if (seg === "..") {
535
+ resolved.pop();
536
+ }
537
+ else {
538
+ resolved.push(seg);
539
+ }
540
+ }
541
+ const resolvedPath = resolved.join("/");
542
+ // Try direct match
543
+ if (knownFiles.has(resolvedPath))
544
+ return resolvedPath;
545
+ // Try stripping .js and adding .ts / .tsx
546
+ const withoutJs = resolvedPath.replace(/\.js$/, "");
547
+ if (knownFiles.has(withoutJs + ".ts"))
548
+ return withoutJs + ".ts";
549
+ if (knownFiles.has(withoutJs + ".tsx"))
550
+ return withoutJs + ".tsx";
551
+ // Try the path as-is with .ts / .tsx
552
+ if (knownFiles.has(resolvedPath + ".ts"))
553
+ return resolvedPath + ".ts";
554
+ if (knownFiles.has(resolvedPath + ".tsx"))
555
+ return resolvedPath + ".tsx";
556
+ return null;
557
+ }
@@ -0,0 +1,86 @@
1
+ /**
2
+ * LLM-assisted index page generation agent for Hem.
3
+ *
4
+ * Post-processing agent that runs AFTER the architecture agent.
5
+ * Uses an OpenCode session to generate a rich `index.md` with a
6
+ * narrative introduction, project overview, and reading guide.
7
+ *
8
+ * The agent writes `index.md` with a `<!-- TOC -->` placeholder
9
+ * that the pipeline replaces with the procedurally generated
10
+ * table of contents link list.
11
+ *
12
+ * Architecture:
13
+ * - Reuses the `hem-arch` agent permissions (no new agent entry).
14
+ * - The pipeline reads back `index.md`, replaces the placeholder,
15
+ * then writes the final version.
16
+ * - Falls back to procedural TOC if the agent fails.
17
+ */
18
+ import type { Provider } from "../providers/types.js";
19
+ import type { ExplorationFindings } from "../types.js";
20
+ import { BaseAgent } from "./base-agent.js";
21
+ import type { ArchGroupSummary } from "./architecture-agent.js";
22
+ /** Character-count threshold above which the index agent chunks its input. */
23
+ export declare const INDEX_CHUNK_THRESHOLD = 80000;
24
+ /** Summary produced by a single chunk session for later synthesis. */
25
+ export interface IndexChunkSummary {
26
+ /** Short narrative describing what was found in this chunk. */
27
+ narrative: string;
28
+ /** Key concepts or terminology discovered. */
29
+ keyConcepts: string[];
30
+ /** Suggested reading guide entries. */
31
+ readingGuideEntries: string[];
32
+ }
33
+ /** Parameters for the index overview prompt. */
34
+ export interface IndexPromptParams {
35
+ projectName: string;
36
+ destinationPath: string;
37
+ allDocFiles: string[];
38
+ allFindings: ExplorationFindings[];
39
+ allGroupSummaries: ArchGroupSummary[];
40
+ }
41
+ /**
42
+ * An agent that uses an LLM to generate a rich index page (`index.md`)
43
+ * for a project's documentation.
44
+ *
45
+ * Writes `index.md` with a `<!-- TOC -->` placeholder that the pipeline
46
+ * replaces with the procedural table of contents link list.
47
+ */
48
+ export declare class IndexAgent extends BaseAgent {
49
+ constructor(provider: Provider);
50
+ /**
51
+ * Run the index page generation pipeline.
52
+ *
53
+ * @param params - All inputs needed for the index prompt.
54
+ * @param verbose - Optional logging callback (writes to stderr).
55
+ * @throws If session creation or prompting fails.
56
+ */
57
+ run(params: IndexPromptParams, verbose?: (msg: string) => void): Promise<void>;
58
+ /**
59
+ * Builds the index page prompt from exploration findings, group
60
+ * summaries, and the list of generated documentation files.
61
+ */
62
+ static buildPrompt(params: IndexPromptParams): string;
63
+ /**
64
+ * Splits exploration findings and group summaries into chunks that
65
+ * each produce a prompt under {@link INDEX_CHUNK_THRESHOLD} characters.
66
+ *
67
+ * The doc file list is NOT chunked — it is always included in the
68
+ * synthesis prompt (it is just a list of paths and stays small).
69
+ */
70
+ static chunkInputs(params: IndexPromptParams): Array<{
71
+ findings: ExplorationFindings[];
72
+ summaries: ArchGroupSummary[];
73
+ }>;
74
+ /**
75
+ * Builds a prompt for a single chunk session.
76
+ *
77
+ * The chunk agent reads a subset of findings and group summaries and
78
+ * returns a JSON summary — it does NOT write `index.md`.
79
+ */
80
+ static buildChunkPrompt(params: IndexPromptParams, chunkFindings: ExplorationFindings[], chunkSummaries: ArchGroupSummary[], chunkIndex: number, totalChunks: number): string;
81
+ /**
82
+ * Builds the synthesis prompt that combines chunk summaries into the
83
+ * final `index.md`. This prompt instructs the agent to write the file.
84
+ */
85
+ static buildSynthesisPrompt(params: IndexPromptParams, chunkSummaries: IndexChunkSummary[]): string;
86
+ }