@pruddiman/hem 0.0.1-beta-5671db0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/agents/arbiter-agent.d.ts +72 -0
- package/dist/agents/arbiter-agent.js +149 -0
- package/dist/agents/architecture-agent.d.ts +148 -0
- package/dist/agents/architecture-agent.js +459 -0
- package/dist/agents/base-agent.d.ts +44 -0
- package/dist/agents/base-agent.js +57 -0
- package/dist/agents/crossref-agent.d.ts +140 -0
- package/dist/agents/crossref-agent.js +560 -0
- package/dist/agents/crossref-arbiter-agent.d.ts +72 -0
- package/dist/agents/crossref-arbiter-agent.js +147 -0
- package/dist/agents/documentation-agent.d.ts +55 -0
- package/dist/agents/documentation-agent.js +159 -0
- package/dist/agents/exploration-agent.d.ts +58 -0
- package/dist/agents/exploration-agent.js +102 -0
- package/dist/agents/grouping-agent.d.ts +167 -0
- package/dist/agents/grouping-agent.js +557 -0
- package/dist/agents/index-agent.d.ts +86 -0
- package/dist/agents/index-agent.js +360 -0
- package/dist/agents/organization-agent.d.ts +144 -0
- package/dist/agents/organization-agent.js +607 -0
- package/dist/auth.d.ts +372 -0
- package/dist/auth.js +1072 -0
- package/dist/broadcast-mcp.d.ts +21 -0
- package/dist/broadcast-mcp.js +59 -0
- package/dist/changelog.d.ts +85 -0
- package/dist/changelog.js +223 -0
- package/dist/decision-queue.d.ts +173 -0
- package/dist/decision-queue.js +265 -0
- package/dist/diff-scope.d.ts +24 -0
- package/dist/diff-scope.js +28 -0
- package/dist/discovery.d.ts +54 -0
- package/dist/discovery.js +405 -0
- package/dist/grouping.d.ts +37 -0
- package/dist/grouping.js +343 -0
- package/dist/helpers/format.d.ts +5 -0
- package/dist/helpers/format.js +13 -0
- package/dist/helpers/index.d.ts +11 -0
- package/dist/helpers/index.js +11 -0
- package/dist/helpers/parsing.d.ts +52 -0
- package/dist/helpers/parsing.js +128 -0
- package/dist/helpers/paths.d.ts +41 -0
- package/dist/helpers/paths.js +67 -0
- package/dist/helpers/strings.d.ts +45 -0
- package/dist/helpers/strings.js +97 -0
- package/dist/index.d.ts +135 -0
- package/dist/index.js +1087 -0
- package/dist/merge-utils.d.ts +22 -0
- package/dist/merge-utils.js +34 -0
- package/dist/orchestrator.d.ts +194 -0
- package/dist/orchestrator.js +1169 -0
- package/dist/output.d.ts +106 -0
- package/dist/output.js +243 -0
- package/dist/progress.d.ts +228 -0
- package/dist/progress.js +644 -0
- package/dist/providers/copilot.d.ts +247 -0
- package/dist/providers/copilot.js +598 -0
- package/dist/providers/index.d.ts +15 -0
- package/dist/providers/index.js +12 -0
- package/dist/providers/opencode.d.ts +156 -0
- package/dist/providers/opencode.js +416 -0
- package/dist/providers/types.d.ts +156 -0
- package/dist/providers/types.js +16 -0
- package/dist/resources.d.ts +76 -0
- package/dist/resources.js +151 -0
- package/dist/search-index.d.ts +71 -0
- package/dist/search-index.js +187 -0
- package/dist/search-mcp.d.ts +25 -0
- package/dist/search-mcp.js +100 -0
- package/dist/server-utils.d.ts +56 -0
- package/dist/server-utils.js +135 -0
- package/dist/session.d.ts +227 -0
- package/dist/session.js +370 -0
- package/dist/types.d.ts +272 -0
- package/dist/types.js +5 -0
- package/dist/worktree.d.ts +82 -0
- package/dist/worktree.js +187 -0
- package/package.json +45 -0
|
@@ -0,0 +1,557 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-assisted file grouping agent for Hem.
|
|
3
|
+
*
|
|
4
|
+
* Uses an OpenCode session to analyze source files and produce semantically
|
|
5
|
+
* meaningful documentation groups. Falls back gracefully — returns `null`
|
|
6
|
+
* on any failure so the caller can use heuristic grouping instead.
|
|
7
|
+
*
|
|
8
|
+
* The agent:
|
|
9
|
+
* 1. Reads file contents and extracts import dependencies (programmatic).
|
|
10
|
+
* 2. Builds a structured prompt with file summaries + dependency graph.
|
|
11
|
+
* 3. Sends the prompt to an OpenCode session.
|
|
12
|
+
* 4. Parses the strongly-typed JSON response.
|
|
13
|
+
* 5. Maps results to `FileGroup[]` with hallucination guards.
|
|
14
|
+
*/
|
|
15
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
16
|
+
import { join } from "node:path";
|
|
17
|
+
import { createHash } from "node:crypto";
|
|
18
|
+
import { extractJSON } from "../helpers/parsing.js";
|
|
19
|
+
import { commonDirectory } from "../grouping.js";
|
|
20
|
+
import { toKebabCase } from "../helpers/strings.js";
|
|
21
|
+
import { AuthExpiredError } from "../auth.js";
|
|
22
|
+
import { BaseAgent } from "./base-agent.js";
|
|
23
|
+
import pLimit from "p-limit";
|
|
24
|
+
import { computeMaxConcurrency } from "../resources.js";
|
|
25
|
+
/** File name for the on-disk grouping result cache. */
|
|
26
|
+
export const GROUPING_CACHE_FILE = "grouping-cache.json";
|
|
27
|
+
/**
|
|
28
|
+
* File count threshold: when there are more files than this, use chunked
|
|
29
|
+
* grouping (split into batches → parallel LLM sessions → merge results).
|
|
30
|
+
* Below this threshold, the original single-prompt approach is used.
|
|
31
|
+
*/
|
|
32
|
+
export const GROUPING_CHUNK_THRESHOLD = 200;
|
|
33
|
+
/**
|
|
34
|
+
* Target number of files per batch when chunking. Batches are sized to
|
|
35
|
+
* stay well within LLM context limits (~100 files × 40 lines each ≈ 4000
|
|
36
|
+
* lines of context, comfortably under typical 128K-200K token windows).
|
|
37
|
+
*/
|
|
38
|
+
export const FILES_PER_CHUNK = 100;
|
|
39
|
+
// ── Agent ───────────────────────────────────────────────────────────────
|
|
40
|
+
/**
|
|
41
|
+
* An agent that uses an LLM to group source files into cohesive
|
|
42
|
+
* documentation topics.
|
|
43
|
+
*/
|
|
44
|
+
export class GroupingAgent extends BaseAgent {
|
|
45
|
+
projectName;
|
|
46
|
+
constructor(provider, projectName) {
|
|
47
|
+
super(provider);
|
|
48
|
+
this.projectName = projectName;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Run the full grouping pipeline: read → analyze → prompt → parse → map.
|
|
52
|
+
*
|
|
53
|
+
* @param files - Discovered source files to group.
|
|
54
|
+
* @param verbose - Optional logging callback (writes to stderr).
|
|
55
|
+
* @param cacheDir - Optional directory for the grouping result cache
|
|
56
|
+
* (typically the `.hem` directory in the project root).
|
|
57
|
+
* When provided, a cache hit skips the LLM call entirely.
|
|
58
|
+
* @returns `FileGroup[]` on success, `null` on any failure.
|
|
59
|
+
*/
|
|
60
|
+
async run(files, verbose, cacheDir) {
|
|
61
|
+
try {
|
|
62
|
+
// ── Cache check (fast path) ────────────────────────────────────
|
|
63
|
+
const filesHash = GroupingAgent.computeFilesHash(files);
|
|
64
|
+
if (cacheDir) {
|
|
65
|
+
const cached = await GroupingAgent.loadGroupingCache(cacheDir, filesHash);
|
|
66
|
+
if (cached) {
|
|
67
|
+
if (verbose) {
|
|
68
|
+
verbose(`[grouping-agent] Cache hit (${filesHash.slice(0, 8)}…): skipping LLM call`);
|
|
69
|
+
}
|
|
70
|
+
const filesByPath = new Map(files.map((f) => [f.path, f]));
|
|
71
|
+
const groups = GroupingAgent.mapToFileGroups(cached, filesByPath);
|
|
72
|
+
if (groups.length > 0)
|
|
73
|
+
return groups;
|
|
74
|
+
if (verbose) {
|
|
75
|
+
verbose(`[grouping-agent] Cached results produced no valid groups; re-running LLM`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// 1. Read file contents
|
|
80
|
+
const fileContents = await GroupingAgent.readFiles(files);
|
|
81
|
+
if (verbose) {
|
|
82
|
+
const totalBytes = fileContents.reduce((sum, f) => sum + f.content.length, 0);
|
|
83
|
+
verbose(`[grouping-agent] Read ${fileContents.length} files (${totalBytes.toLocaleString()} bytes)`);
|
|
84
|
+
}
|
|
85
|
+
// 2. Analyze imports
|
|
86
|
+
const imports = GroupingAgent.analyzeImports(fileContents);
|
|
87
|
+
if (verbose) {
|
|
88
|
+
const totalImports = [...imports.values()].reduce((sum, deps) => sum + deps.length, 0);
|
|
89
|
+
verbose(`[grouping-agent] Found ${totalImports} import relationships`);
|
|
90
|
+
}
|
|
91
|
+
// 3. Run LLM — chunked path for large projects, single-prompt otherwise
|
|
92
|
+
let rawResults;
|
|
93
|
+
if (fileContents.length > GROUPING_CHUNK_THRESHOLD) {
|
|
94
|
+
if (verbose) {
|
|
95
|
+
verbose(`[grouping-agent] File count (${fileContents.length}) exceeds threshold (${GROUPING_CHUNK_THRESHOLD}), using chunked grouping`);
|
|
96
|
+
}
|
|
97
|
+
const chunkedResults = await this.runChunkedRaw(fileContents, imports, verbose);
|
|
98
|
+
if (!chunkedResults)
|
|
99
|
+
return null;
|
|
100
|
+
rawResults = chunkedResults;
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
// Single-prompt path
|
|
104
|
+
const prompt = GroupingAgent.buildPrompt(this.projectName, fileContents, imports);
|
|
105
|
+
if (verbose) {
|
|
106
|
+
verbose(`[grouping-agent] Prompt: ${prompt.length.toLocaleString()} chars`);
|
|
107
|
+
}
|
|
108
|
+
const sessionId = await this.createSession(`Hem: grouping — ${this.projectName}`);
|
|
109
|
+
if (verbose) {
|
|
110
|
+
verbose(`[grouping-agent] Session created: ${sessionId}`);
|
|
111
|
+
}
|
|
112
|
+
const response = await this.provider.prompt(sessionId, prompt, { agent: "hem-group" }) ?? "";
|
|
113
|
+
if (verbose) {
|
|
114
|
+
verbose(`[grouping-agent] Response: ${response.length.toLocaleString()} chars`);
|
|
115
|
+
}
|
|
116
|
+
const parsed = GroupingAgent.parseResponse(response);
|
|
117
|
+
if (!parsed) {
|
|
118
|
+
if (verbose) {
|
|
119
|
+
verbose(`[grouping-agent] Failed to parse JSON from response. First 500 chars: ${response.slice(0, 500)}`);
|
|
120
|
+
}
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
if (verbose) {
|
|
124
|
+
verbose(`[grouping-agent] Parsed ${parsed.length} groups from LLM`);
|
|
125
|
+
}
|
|
126
|
+
rawResults = parsed;
|
|
127
|
+
}
|
|
128
|
+
// 4. Save to cache (best-effort — never fail the pipeline on write errors)
|
|
129
|
+
if (cacheDir) {
|
|
130
|
+
GroupingAgent.saveGroupingCache(cacheDir, filesHash, rawResults).catch(() => { });
|
|
131
|
+
}
|
|
132
|
+
// 5. Map to FileGroup[]
|
|
133
|
+
const filesByPath = new Map(files.map((f) => [f.path, f]));
|
|
134
|
+
const groups = GroupingAgent.mapToFileGroups(rawResults, filesByPath);
|
|
135
|
+
if (groups.length === 0) {
|
|
136
|
+
if (verbose) {
|
|
137
|
+
verbose(`[grouping-agent] No valid groups after mapping (all file paths unrecognized?)`);
|
|
138
|
+
}
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
if (verbose) {
|
|
142
|
+
for (const g of groups) {
|
|
143
|
+
verbose(`[grouping-agent] Group "${g.label}" (${g.type}): ${g.files.map((f) => f.path).join(", ")}`);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return groups;
|
|
147
|
+
}
|
|
148
|
+
catch (err) {
|
|
149
|
+
// Re-throw auth errors — those should not be swallowed.
|
|
150
|
+
if (err instanceof AuthExpiredError) {
|
|
151
|
+
throw err;
|
|
152
|
+
}
|
|
153
|
+
if (verbose) {
|
|
154
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
155
|
+
verbose(`[grouping-agent] Error: ${msg}`);
|
|
156
|
+
}
|
|
157
|
+
return null;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Chunked grouping for large file sets.
|
|
162
|
+
*
|
|
163
|
+
* Splits files into batches that fit within context limits, runs
|
|
164
|
+
* multiple grouping sessions in parallel, and merges the results.
|
|
165
|
+
* Returns raw {@link GroupingResult}[] (before `mapToFileGroups`).
|
|
166
|
+
*/
|
|
167
|
+
async runChunkedRaw(fileContents, imports, verbose) {
|
|
168
|
+
// 1. Split files into chunks
|
|
169
|
+
const chunks = GroupingAgent.splitIntoChunks(fileContents, FILES_PER_CHUNK);
|
|
170
|
+
if (verbose) {
|
|
171
|
+
verbose(`[grouping-agent] Split ${fileContents.length} files into ${chunks.length} chunks`);
|
|
172
|
+
}
|
|
173
|
+
// 2. Run grouping sessions in parallel, respecting resource limits
|
|
174
|
+
const concurrency = Math.max(1, computeMaxConcurrency());
|
|
175
|
+
const limit = pLimit(concurrency);
|
|
176
|
+
if (verbose) {
|
|
177
|
+
verbose(`[grouping-agent] Running ${chunks.length} chunk sessions (concurrency: ${concurrency})`);
|
|
178
|
+
}
|
|
179
|
+
const chunkResults = await Promise.allSettled(chunks.map((chunk, idx) => limit(async () => {
|
|
180
|
+
const chunkLabel = `chunk-${idx + 1}/${chunks.length}`;
|
|
181
|
+
if (verbose) {
|
|
182
|
+
verbose(`[grouping-agent] Starting ${chunkLabel} (${chunk.length} files)`);
|
|
183
|
+
}
|
|
184
|
+
// Build a per-chunk import map (only imports relevant to this chunk's files)
|
|
185
|
+
const chunkPaths = new Set(chunk.map((f) => f.path));
|
|
186
|
+
const chunkImports = new Map();
|
|
187
|
+
for (const [file, deps] of imports) {
|
|
188
|
+
if (chunkPaths.has(file)) {
|
|
189
|
+
chunkImports.set(file, deps);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
const prompt = GroupingAgent.buildPrompt(this.projectName, chunk, chunkImports);
|
|
193
|
+
if (verbose) {
|
|
194
|
+
verbose(`[grouping-agent] ${chunkLabel} prompt: ${prompt.length.toLocaleString()} chars`);
|
|
195
|
+
}
|
|
196
|
+
const sessionId = await this.createSession(`Hem: grouping ${chunkLabel} — ${this.projectName}`);
|
|
197
|
+
const response = await this.provider.prompt(sessionId, prompt, { agent: "hem-group" }) ?? "";
|
|
198
|
+
const results = GroupingAgent.parseResponse(response);
|
|
199
|
+
if (!results) {
|
|
200
|
+
if (verbose) {
|
|
201
|
+
verbose(`[grouping-agent] ${chunkLabel} failed to parse JSON. First 500 chars: ${response.slice(0, 500)}`);
|
|
202
|
+
}
|
|
203
|
+
return [];
|
|
204
|
+
}
|
|
205
|
+
if (verbose) {
|
|
206
|
+
verbose(`[grouping-agent] ${chunkLabel} produced ${results.length} groups`);
|
|
207
|
+
}
|
|
208
|
+
return results;
|
|
209
|
+
})));
|
|
210
|
+
// 3. Collect successful results, log failures
|
|
211
|
+
const allResults = [];
|
|
212
|
+
for (let i = 0; i < chunkResults.length; i++) {
|
|
213
|
+
const result = chunkResults[i];
|
|
214
|
+
if (result.status === "fulfilled") {
|
|
215
|
+
allResults.push(...result.value);
|
|
216
|
+
}
|
|
217
|
+
else {
|
|
218
|
+
// Re-throw auth errors
|
|
219
|
+
if (result.reason instanceof AuthExpiredError) {
|
|
220
|
+
throw result.reason;
|
|
221
|
+
}
|
|
222
|
+
if (verbose) {
|
|
223
|
+
const msg = result.reason instanceof Error
|
|
224
|
+
? result.reason.message
|
|
225
|
+
: String(result.reason);
|
|
226
|
+
verbose(`[grouping-agent] chunk-${i + 1}/${chunks.length} failed: ${msg}`);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
if (allResults.length === 0) {
|
|
231
|
+
if (verbose) {
|
|
232
|
+
verbose(`[grouping-agent] All chunks failed or produced no groups`);
|
|
233
|
+
}
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
// 4. Merge overlapping groups from different chunks
|
|
237
|
+
const merged = GroupingAgent.mergeGroupingResults(allResults);
|
|
238
|
+
if (verbose) {
|
|
239
|
+
verbose(`[grouping-agent] Merged ${allResults.length} groups into ${merged.length} groups`);
|
|
240
|
+
}
|
|
241
|
+
return merged;
|
|
242
|
+
}
|
|
243
|
+
// ── Static helpers (pure functions, easy to unit test) ───────────────
|
|
244
|
+
/**
|
|
245
|
+
* Read file contents for all files. Files that fail to read get an
|
|
246
|
+
* error placeholder.
|
|
247
|
+
*/
|
|
248
|
+
static async readFiles(files) {
|
|
249
|
+
const results = [];
|
|
250
|
+
for (const file of files) {
|
|
251
|
+
try {
|
|
252
|
+
const content = await readFile(file.absolutePath, "utf-8");
|
|
253
|
+
results.push({ path: file.path, content, size: file.size });
|
|
254
|
+
}
|
|
255
|
+
catch {
|
|
256
|
+
results.push({
|
|
257
|
+
path: file.path,
|
|
258
|
+
content: "[Error: unable to read file]",
|
|
259
|
+
size: file.size,
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return results;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Extract import dependencies from file contents via regex.
|
|
267
|
+
*
|
|
268
|
+
* Handles:
|
|
269
|
+
* - `import ... from "./foo.js"`
|
|
270
|
+
* - `import ... from "../bar/baz.js"`
|
|
271
|
+
* - `import("./dynamic.js")`
|
|
272
|
+
* - `require("./cjs.js")`
|
|
273
|
+
*
|
|
274
|
+
* Only tracks local (relative) imports — ignores node_modules.
|
|
275
|
+
*
|
|
276
|
+
* @returns Map from file path → array of imported file paths.
|
|
277
|
+
*/
|
|
278
|
+
static analyzeImports(fileContents) {
|
|
279
|
+
const imports = new Map();
|
|
280
|
+
// Build a set of known file basenames (without extension) for resolution
|
|
281
|
+
const knownFiles = new Set(fileContents.map((f) => f.path));
|
|
282
|
+
for (const file of fileContents) {
|
|
283
|
+
const deps = [];
|
|
284
|
+
// Match: import ... from "./path" or import ... from "../path"
|
|
285
|
+
const staticImportPattern = /(?:import|export)\s+.*?\s+from\s+["'](\.[^"']+)["']/g;
|
|
286
|
+
// Match: import("./path") or require("./path")
|
|
287
|
+
const dynamicImportPattern = /(?:import|require)\s*\(\s*["'](\.[^"']+)["']\s*\)/g;
|
|
288
|
+
for (const pattern of [staticImportPattern, dynamicImportPattern]) {
|
|
289
|
+
let match;
|
|
290
|
+
while ((match = pattern.exec(file.content)) !== null) {
|
|
291
|
+
const specifier = match[1];
|
|
292
|
+
// Resolve relative to the file's directory
|
|
293
|
+
const resolved = resolveImportPath(file.path, specifier, knownFiles);
|
|
294
|
+
if (resolved) {
|
|
295
|
+
deps.push(resolved);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
if (deps.length > 0) {
|
|
300
|
+
imports.set(file.path, [...new Set(deps)]);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
return imports;
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* Build the grouping prompt from file summaries and import graph.
|
|
307
|
+
*
|
|
308
|
+
* Includes the first 40 lines of each file plus the dependency graph
|
|
309
|
+
* so the LLM can make informed grouping decisions.
|
|
310
|
+
*/
|
|
311
|
+
static buildPrompt(projectName, fileContents, imports) {
|
|
312
|
+
const parts = [];
|
|
313
|
+
// System instructions
|
|
314
|
+
parts.push(`Analyze source files for the "${projectName}" project and group them for documentation.`, "");
|
|
315
|
+
// File summaries (first 40 lines each)
|
|
316
|
+
parts.push("## File Summaries", "");
|
|
317
|
+
for (const file of fileContents) {
|
|
318
|
+
const lines = file.content.split("\n");
|
|
319
|
+
const summary = lines.slice(0, 40).join("\n");
|
|
320
|
+
const truncated = lines.length > 40 ? ` (showing 40/${lines.length} lines)` : "";
|
|
321
|
+
parts.push(`### \`${file.path}\` (${file.size.toLocaleString()} bytes${truncated})`, "", "```", summary, "```", "");
|
|
322
|
+
}
|
|
323
|
+
// Import dependencies
|
|
324
|
+
if (imports.size > 0) {
|
|
325
|
+
parts.push("## Import Dependencies", "");
|
|
326
|
+
for (const [file, deps] of imports) {
|
|
327
|
+
parts.push(`- ${file} → ${deps.join(", ")}`);
|
|
328
|
+
}
|
|
329
|
+
parts.push("");
|
|
330
|
+
}
|
|
331
|
+
// Task
|
|
332
|
+
parts.push("## Task", "", "Group these files into cohesive documentation topics. Each group becomes one documentation page.", "");
|
|
333
|
+
// Rules
|
|
334
|
+
parts.push("## Rules", "", "- 2-6 files per group", "- Files MAY appear in multiple groups when they are central to multiple features", "- Prefer feature-based (\"vertical\") groups that bundle related functionality across layers", "- Use \"horizontal\" type only for cross-cutting concerns (e.g., shared utilities, type definitions)", "- Every file must appear in at least one group", "");
|
|
335
|
+
// Output format
|
|
336
|
+
parts.push("## Output", "", "Respond with ONLY a JSON array inside a ```json code fence. No other text before or after the fence.", "", "Each element must match this exact schema:", "```json", JSON.stringify({
|
|
337
|
+
id: "kebab-case-identifier",
|
|
338
|
+
label: "Human Readable Name",
|
|
339
|
+
type: "vertical | horizontal",
|
|
340
|
+
files: ["relative/path.ts"],
|
|
341
|
+
rationale: "1-sentence explanation",
|
|
342
|
+
}, null, 2), "```");
|
|
343
|
+
return parts.join("\n");
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Extract and validate JSON from the LLM response.
|
|
347
|
+
*
|
|
348
|
+
* Tries to find a fenced ```json block first, then falls back to
|
|
349
|
+
* parsing the entire response as JSON.
|
|
350
|
+
*
|
|
351
|
+
* @returns Validated array of `GroupingResult`, or `null` if invalid.
|
|
352
|
+
*/
|
|
353
|
+
static parseResponse(response) {
|
|
354
|
+
try {
|
|
355
|
+
// Extract JSON from LLM response (handles fenced blocks, preamble text)
|
|
356
|
+
const raw = extractJSON(response);
|
|
357
|
+
const parsed = JSON.parse(raw);
|
|
358
|
+
if (!Array.isArray(parsed))
|
|
359
|
+
return null;
|
|
360
|
+
// Validate each element
|
|
361
|
+
for (const item of parsed) {
|
|
362
|
+
if (typeof item !== "object" ||
|
|
363
|
+
item === null ||
|
|
364
|
+
typeof item.id !== "string" ||
|
|
365
|
+
typeof item.label !== "string" ||
|
|
366
|
+
typeof item.type !== "string" ||
|
|
367
|
+
!Array.isArray(item.files) ||
|
|
368
|
+
typeof item.rationale !== "string") {
|
|
369
|
+
return null;
|
|
370
|
+
}
|
|
371
|
+
if (!["vertical", "horizontal"].includes(item.type))
|
|
372
|
+
return null;
|
|
373
|
+
if (item.files.length === 0)
|
|
374
|
+
return null;
|
|
375
|
+
if (!item.files.every((f) => typeof f === "string")) {
|
|
376
|
+
return null;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
return parsed;
|
|
380
|
+
}
|
|
381
|
+
catch {
|
|
382
|
+
return null;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
/**
|
|
386
|
+
* Map validated LLM results to `FileGroup[]`.
|
|
387
|
+
*
|
|
388
|
+
* Resolves file paths against the known file set, skipping any
|
|
389
|
+
* paths the LLM hallucinated. Computes `directory` via
|
|
390
|
+
* `commonDirectory()` from `grouping.ts`.
|
|
391
|
+
*/
|
|
392
|
+
static mapToFileGroups(results, filesByPath) {
|
|
393
|
+
const groups = [];
|
|
394
|
+
for (const result of results) {
|
|
395
|
+
// Resolve file paths — skip any the LLM hallucinated
|
|
396
|
+
const resolvedFiles = [];
|
|
397
|
+
for (const filePath of result.files) {
|
|
398
|
+
const info = filesByPath.get(filePath);
|
|
399
|
+
if (info) {
|
|
400
|
+
resolvedFiles.push(info);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
// Skip groups with fewer than 2 resolved files
|
|
404
|
+
if (resolvedFiles.length < 2)
|
|
405
|
+
continue;
|
|
406
|
+
groups.push({
|
|
407
|
+
id: toKebabCase(result.id),
|
|
408
|
+
label: result.label,
|
|
409
|
+
type: result.type,
|
|
410
|
+
files: resolvedFiles.sort((a, b) => a.path.localeCompare(b.path)),
|
|
411
|
+
directory: commonDirectory(resolvedFiles),
|
|
412
|
+
});
|
|
413
|
+
}
|
|
414
|
+
return groups;
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Split file contents into batches of approximately `chunkSize` files.
|
|
418
|
+
*
|
|
419
|
+
* Files are split sequentially — no shuffling — so files that are
|
|
420
|
+
* close in the directory listing stay together, improving the LLM's
|
|
421
|
+
* ability to detect intra-chunk relationships.
|
|
422
|
+
*/
|
|
423
|
+
static splitIntoChunks(fileContents, chunkSize) {
|
|
424
|
+
const chunks = [];
|
|
425
|
+
for (let i = 0; i < fileContents.length; i += chunkSize) {
|
|
426
|
+
chunks.push(fileContents.slice(i, i + chunkSize));
|
|
427
|
+
}
|
|
428
|
+
return chunks;
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Merge grouping results from multiple chunks.
|
|
432
|
+
*
|
|
433
|
+
* Groups from different chunks may overlap — e.g., two chunks may each
|
|
434
|
+
* produce an "Authentication" group containing different files. This
|
|
435
|
+
* method merges groups with the same `id` (after kebab-case normalization)
|
|
436
|
+
* by combining their file lists and deduplicating.
|
|
437
|
+
*
|
|
438
|
+
* When groups share the same normalized ID:
|
|
439
|
+
* - File lists are unioned (deduplicated).
|
|
440
|
+
* - The label and type from the first occurrence are kept.
|
|
441
|
+
* - Rationales are concatenated.
|
|
442
|
+
*
|
|
443
|
+
* @param results - All `GroupingResult[]` from every chunk (flattened).
|
|
444
|
+
* @returns Merged array of `GroupingResult`.
|
|
445
|
+
*/
|
|
446
|
+
static mergeGroupingResults(results) {
|
|
447
|
+
const merged = new Map();
|
|
448
|
+
for (const result of results) {
|
|
449
|
+
const normalizedId = toKebabCase(result.id);
|
|
450
|
+
const existing = merged.get(normalizedId);
|
|
451
|
+
if (existing) {
|
|
452
|
+
// Merge file lists (deduplicate)
|
|
453
|
+
const fileSet = new Set([...existing.files, ...result.files]);
|
|
454
|
+
existing.files = [...fileSet];
|
|
455
|
+
// Append rationale if different
|
|
456
|
+
if (!existing.rationale.includes(result.rationale)) {
|
|
457
|
+
existing.rationale = `${existing.rationale}; ${result.rationale}`;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
else {
|
|
461
|
+
// First occurrence — clone to avoid mutating input
|
|
462
|
+
merged.set(normalizedId, {
|
|
463
|
+
id: result.id,
|
|
464
|
+
label: result.label,
|
|
465
|
+
type: result.type,
|
|
466
|
+
files: [...result.files],
|
|
467
|
+
rationale: result.rationale,
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
return [...merged.values()];
|
|
472
|
+
}
|
|
473
|
+
// ── Grouping cache helpers ────────────────────────────────────────────
|
|
474
|
+
/**
|
|
475
|
+
* Compute a stable hash of the file list for cache invalidation.
|
|
476
|
+
* Uses path + size so renames and size changes invalidate the cache.
|
|
477
|
+
*/
|
|
478
|
+
static computeFilesHash(files) {
|
|
479
|
+
const key = files
|
|
480
|
+
.map((f) => `${f.path}:${f.size}`)
|
|
481
|
+
.sort()
|
|
482
|
+
.join("\n");
|
|
483
|
+
return createHash("sha256").update(key).digest("hex");
|
|
484
|
+
}
|
|
485
|
+
/**
|
|
486
|
+
* Attempt to load a valid cache entry from `cacheDir`.
|
|
487
|
+
* Returns `null` on miss, parse error, or hash mismatch.
|
|
488
|
+
*/
|
|
489
|
+
static async loadGroupingCache(cacheDir, filesHash) {
|
|
490
|
+
try {
|
|
491
|
+
const cacheFile = join(cacheDir, GROUPING_CACHE_FILE);
|
|
492
|
+
const raw = await readFile(cacheFile, "utf-8");
|
|
493
|
+
const parsed = JSON.parse(raw);
|
|
494
|
+
if (typeof parsed !== "object" ||
|
|
495
|
+
parsed === null ||
|
|
496
|
+
parsed.version !== 1 ||
|
|
497
|
+
parsed.filesHash !== filesHash ||
|
|
498
|
+
!Array.isArray(parsed.results)) {
|
|
499
|
+
return null;
|
|
500
|
+
}
|
|
501
|
+
return parsed.results;
|
|
502
|
+
}
|
|
503
|
+
catch {
|
|
504
|
+
return null;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
/**
|
|
508
|
+
* Write grouping results to the cache file (best-effort, never throws).
|
|
509
|
+
*/
|
|
510
|
+
static async saveGroupingCache(cacheDir, filesHash, results) {
|
|
511
|
+
const cache = { version: 1, filesHash, results };
|
|
512
|
+
await mkdir(cacheDir, { recursive: true });
|
|
513
|
+
await writeFile(join(cacheDir, GROUPING_CACHE_FILE), JSON.stringify(cache, null, 2), "utf-8");
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
// ── Internal helpers ────────────────────────────────────────────────────
|
|
517
|
+
/**
|
|
518
|
+
* Resolve a relative import specifier to a known file path.
|
|
519
|
+
*
|
|
520
|
+
* Handles `.js` extension stripping (TypeScript sources import `.js`
|
|
521
|
+
* but the actual files are `.ts`/`.tsx`).
|
|
522
|
+
*/
|
|
523
|
+
function resolveImportPath(fromFile, specifier, knownFiles) {
|
|
524
|
+
// Get directory of the importing file
|
|
525
|
+
const parts = fromFile.split("/");
|
|
526
|
+
parts.pop(); // remove filename
|
|
527
|
+
const dir = parts.join("/");
|
|
528
|
+
// Resolve the specifier relative to the importing file
|
|
529
|
+
const segments = (dir ? dir + "/" + specifier : specifier).split("/");
|
|
530
|
+
const resolved = [];
|
|
531
|
+
for (const seg of segments) {
|
|
532
|
+
if (seg === "." || seg === "")
|
|
533
|
+
continue;
|
|
534
|
+
if (seg === "..") {
|
|
535
|
+
resolved.pop();
|
|
536
|
+
}
|
|
537
|
+
else {
|
|
538
|
+
resolved.push(seg);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
const resolvedPath = resolved.join("/");
|
|
542
|
+
// Try direct match
|
|
543
|
+
if (knownFiles.has(resolvedPath))
|
|
544
|
+
return resolvedPath;
|
|
545
|
+
// Try stripping .js and adding .ts / .tsx
|
|
546
|
+
const withoutJs = resolvedPath.replace(/\.js$/, "");
|
|
547
|
+
if (knownFiles.has(withoutJs + ".ts"))
|
|
548
|
+
return withoutJs + ".ts";
|
|
549
|
+
if (knownFiles.has(withoutJs + ".tsx"))
|
|
550
|
+
return withoutJs + ".tsx";
|
|
551
|
+
// Try the path as-is with .ts / .tsx
|
|
552
|
+
if (knownFiles.has(resolvedPath + ".ts"))
|
|
553
|
+
return resolvedPath + ".ts";
|
|
554
|
+
if (knownFiles.has(resolvedPath + ".tsx"))
|
|
555
|
+
return resolvedPath + ".tsx";
|
|
556
|
+
return null;
|
|
557
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-assisted index page generation agent for Hem.
|
|
3
|
+
*
|
|
4
|
+
* Post-processing agent that runs AFTER the architecture agent.
|
|
5
|
+
* Uses an OpenCode session to generate a rich `index.md` with a
|
|
6
|
+
* narrative introduction, project overview, and reading guide.
|
|
7
|
+
*
|
|
8
|
+
* The agent writes `index.md` with a `<!-- TOC -->` placeholder
|
|
9
|
+
* that the pipeline replaces with the procedurally generated
|
|
10
|
+
* table of contents link list.
|
|
11
|
+
*
|
|
12
|
+
* Architecture:
|
|
13
|
+
* - Reuses the `hem-arch` agent permissions (no new agent entry).
|
|
14
|
+
* - The pipeline reads back `index.md`, replaces the placeholder,
|
|
15
|
+
* then writes the final version.
|
|
16
|
+
* - Falls back to procedural TOC if the agent fails.
|
|
17
|
+
*/
|
|
18
|
+
import type { Provider } from "../providers/types.js";
|
|
19
|
+
import type { ExplorationFindings } from "../types.js";
|
|
20
|
+
import { BaseAgent } from "./base-agent.js";
|
|
21
|
+
import type { ArchGroupSummary } from "./architecture-agent.js";
|
|
22
|
+
/** Character-count threshold above which the index agent chunks its input. */
|
|
23
|
+
export declare const INDEX_CHUNK_THRESHOLD = 80000;
|
|
24
|
+
/** Summary produced by a single chunk session for later synthesis. */
|
|
25
|
+
export interface IndexChunkSummary {
|
|
26
|
+
/** Short narrative describing what was found in this chunk. */
|
|
27
|
+
narrative: string;
|
|
28
|
+
/** Key concepts or terminology discovered. */
|
|
29
|
+
keyConcepts: string[];
|
|
30
|
+
/** Suggested reading guide entries. */
|
|
31
|
+
readingGuideEntries: string[];
|
|
32
|
+
}
|
|
33
|
+
/** Parameters for the index overview prompt. */
|
|
34
|
+
export interface IndexPromptParams {
|
|
35
|
+
projectName: string;
|
|
36
|
+
destinationPath: string;
|
|
37
|
+
allDocFiles: string[];
|
|
38
|
+
allFindings: ExplorationFindings[];
|
|
39
|
+
allGroupSummaries: ArchGroupSummary[];
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* An agent that uses an LLM to generate a rich index page (`index.md`)
|
|
43
|
+
* for a project's documentation.
|
|
44
|
+
*
|
|
45
|
+
* Writes `index.md` with a `<!-- TOC -->` placeholder that the pipeline
|
|
46
|
+
* replaces with the procedural table of contents link list.
|
|
47
|
+
*/
|
|
48
|
+
export declare class IndexAgent extends BaseAgent {
|
|
49
|
+
constructor(provider: Provider);
|
|
50
|
+
/**
|
|
51
|
+
* Run the index page generation pipeline.
|
|
52
|
+
*
|
|
53
|
+
* @param params - All inputs needed for the index prompt.
|
|
54
|
+
* @param verbose - Optional logging callback (writes to stderr).
|
|
55
|
+
* @throws If session creation or prompting fails.
|
|
56
|
+
*/
|
|
57
|
+
run(params: IndexPromptParams, verbose?: (msg: string) => void): Promise<void>;
|
|
58
|
+
/**
|
|
59
|
+
* Builds the index page prompt from exploration findings, group
|
|
60
|
+
* summaries, and the list of generated documentation files.
|
|
61
|
+
*/
|
|
62
|
+
static buildPrompt(params: IndexPromptParams): string;
|
|
63
|
+
/**
|
|
64
|
+
* Splits exploration findings and group summaries into chunks that
|
|
65
|
+
* each produce a prompt under {@link INDEX_CHUNK_THRESHOLD} characters.
|
|
66
|
+
*
|
|
67
|
+
* The doc file list is NOT chunked — it is always included in the
|
|
68
|
+
* synthesis prompt (it is just a list of paths and stays small).
|
|
69
|
+
*/
|
|
70
|
+
static chunkInputs(params: IndexPromptParams): Array<{
|
|
71
|
+
findings: ExplorationFindings[];
|
|
72
|
+
summaries: ArchGroupSummary[];
|
|
73
|
+
}>;
|
|
74
|
+
/**
|
|
75
|
+
* Builds a prompt for a single chunk session.
|
|
76
|
+
*
|
|
77
|
+
* The chunk agent reads a subset of findings and group summaries and
|
|
78
|
+
* returns a JSON summary — it does NOT write `index.md`.
|
|
79
|
+
*/
|
|
80
|
+
static buildChunkPrompt(params: IndexPromptParams, chunkFindings: ExplorationFindings[], chunkSummaries: ArchGroupSummary[], chunkIndex: number, totalChunks: number): string;
|
|
81
|
+
/**
|
|
82
|
+
* Builds the synthesis prompt that combines chunk summaries into the
|
|
83
|
+
* final `index.md`. This prompt instructs the agent to write the file.
|
|
84
|
+
*/
|
|
85
|
+
static buildSynthesisPrompt(params: IndexPromptParams, chunkSummaries: IndexChunkSummary[]): string;
|
|
86
|
+
}
|