@pruddiman/hem 0.0.1-beta-5671db0 → 0.0.1-beta-72c22cf

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Structural priors for the deterministic grouping phase.
3
+ *
4
+ * The grouping pipeline respects two priors before falling back to derived
5
+ * clustering:
6
+ *
7
+ * 1. **Existing docs-folder scaffolding** — top-level subfolders of the
8
+ * destination directory (e.g. `docs/auth/`, `docs/api/`) become
9
+ * candidate group labels. Source files flow into the matching bucket.
10
+ * 2. **Src top-level structure** — depth-1 directories in the source tree
11
+ * that contain enough files are promoted as group labels, preserving
12
+ * parent-child nesting.
13
+ *
14
+ * This module handles prior #1 (destination scan) and exports matching
15
+ * utilities that `src/grouping.ts` uses for both priors.
16
+ */
17
+ /** A candidate group label derived from an existing docs-folder. */
18
+ export interface DocFolderPrior {
19
+ /** Folder name as it appears on disk (display label, original casing). */
20
+ name: string;
21
+ /** Lowercased key used for matching source files. */
22
+ key: string;
23
+ }
24
+ /** Collected priors from the destination directory. */
25
+ export interface GroupingPriors {
26
+ /**
27
+ * Top-level doc subfolders that should anchor grouping. Empty when the
28
+ * destination doesn't exist yet or has no subfolders.
29
+ */
30
+ docFolders: DocFolderPrior[];
31
+ }
32
+ /**
33
+ * Scan the destination directory for top-level subfolders and return them
34
+ * as grouping priors. When the destination doesn't exist or contains no
35
+ * subdirectories, returns empty priors.
36
+ *
37
+ * Folders prefixed with `.` or `_` are ignored (treated as metadata).
38
+ */
39
+ export declare function loadGroupingPriors(destinationPath: string): Promise<GroupingPriors>;
40
+ /**
41
+ * Test whether a file should be assigned to a doc-folder prior.
42
+ *
43
+ * Three match modes (any one counts as a hit):
44
+ * 1. The folder key appears as a directory segment in the file's path.
45
+ * 2. The file's stem contains the folder key as a word-boundary token.
46
+ * 3. The file's deepest common directory matches the folder key.
47
+ */
48
+ export declare function matchDocFolder(filePath: string, priors: readonly DocFolderPrior[]): DocFolderPrior | null;
49
+ /**
50
+ * Normalize a folder name to a matching key. Lowercases and collapses
51
+ * underscores/spaces to hyphens so `User_Auth/` and `user-auth/` share a
52
+ * key.
53
+ */
54
+ export declare function normalizeFolderKey(name: string): string;
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Structural priors for the deterministic grouping phase.
3
+ *
4
+ * The grouping pipeline respects two priors before falling back to derived
5
+ * clustering:
6
+ *
7
+ * 1. **Existing docs-folder scaffolding** — top-level subfolders of the
8
+ * destination directory (e.g. `docs/auth/`, `docs/api/`) become
9
+ * candidate group labels. Source files flow into the matching bucket.
10
+ * 2. **Src top-level structure** — depth-1 directories in the source tree
11
+ * that contain enough files are promoted as group labels, preserving
12
+ * parent-child nesting.
13
+ *
14
+ * This module handles prior #1 (destination scan) and exports matching
15
+ * utilities that `src/grouping.ts` uses for both priors.
16
+ */
17
+ import { readdir, stat } from "node:fs/promises";
18
+ import { join } from "node:path";
19
+ // ── Public API ──────────────────────────────────────────────────────────
20
+ /**
21
+ * Scan the destination directory for top-level subfolders and return them
22
+ * as grouping priors. When the destination doesn't exist or contains no
23
+ * subdirectories, returns empty priors.
24
+ *
25
+ * Folders prefixed with `.` or `_` are ignored (treated as metadata).
26
+ */
27
+ export async function loadGroupingPriors(destinationPath) {
28
+ let entries;
29
+ try {
30
+ entries = await readdir(destinationPath);
31
+ }
32
+ catch {
33
+ return { docFolders: [] };
34
+ }
35
+ const docFolders = [];
36
+ for (const entry of entries) {
37
+ if (entry.startsWith(".") || entry.startsWith("_"))
38
+ continue;
39
+ const entryPath = join(destinationPath, entry);
40
+ let stats;
41
+ try {
42
+ stats = await stat(entryPath);
43
+ }
44
+ catch {
45
+ continue;
46
+ }
47
+ if (!stats.isDirectory())
48
+ continue;
49
+ docFolders.push({ name: entry, key: normalizeFolderKey(entry) });
50
+ }
51
+ // Stable ordering for reproducible runs.
52
+ docFolders.sort((a, b) => a.key.localeCompare(b.key));
53
+ return { docFolders };
54
+ }
55
+ /**
56
+ * Test whether a file should be assigned to a doc-folder prior.
57
+ *
58
+ * Three match modes (any one counts as a hit):
59
+ * 1. The folder key appears as a directory segment in the file's path.
60
+ * 2. The file's stem contains the folder key as a word-boundary token.
61
+ * 3. The file's deepest common directory matches the folder key.
62
+ */
63
+ export function matchDocFolder(filePath, priors) {
64
+ if (priors.length === 0)
65
+ return null;
66
+ const segments = filePath.split("/").map((s) => s.toLowerCase());
67
+ const fileStem = extractStem(segments[segments.length - 1] ?? "");
68
+ // Prefer the prior with the longest key match to avoid `auth` stealing
69
+ // files that should go to `authentication`.
70
+ let best = null;
71
+ for (const prior of priors) {
72
+ const score = scoreMatch(segments, fileStem, prior.key);
73
+ if (score > 0 && (!best || score > best.score)) {
74
+ best = { prior, score };
75
+ }
76
+ }
77
+ return best?.prior ?? null;
78
+ }
79
+ /**
80
+ * Normalize a folder name to a matching key. Lowercases and collapses
81
+ * underscores/spaces to hyphens so `User_Auth/` and `user-auth/` share a
82
+ * key.
83
+ */
84
+ export function normalizeFolderKey(name) {
85
+ return name
86
+ .toLowerCase()
87
+ .replace(/[\s_]+/g, "-")
88
+ .replace(/-+/g, "-")
89
+ .replace(/^-|-$/g, "");
90
+ }
91
+ // ── Internal helpers ────────────────────────────────────────────────────
92
+ function extractStem(basename) {
93
+ const dotIndex = basename.indexOf(".");
94
+ return (dotIndex === -1 ? basename : basename.substring(0, dotIndex)).toLowerCase();
95
+ }
96
+ /**
97
+ * Scoring:
98
+ * - 3 for a directory-segment match (strongest signal).
99
+ * - 2 for a word-boundary match in the file stem.
100
+ * - 1 for a loose prefix/suffix/containment match in the stem.
101
+ * - 0 for no match.
102
+ *
103
+ * Longer keys win on ties (handled by caller).
104
+ */
105
+ function scoreMatch(segments, fileStem, key) {
106
+ if (key.length === 0)
107
+ return 0;
108
+ // 3: directory segment
109
+ for (let i = 0; i < segments.length - 1; i++) {
110
+ if (segments[i] === key)
111
+ return 3 + key.length / 100;
112
+ }
113
+ // 2: word-boundary in stem
114
+ const tokens = splitWordBoundaries(fileStem);
115
+ if (tokens.includes(key))
116
+ return 2 + key.length / 100;
117
+ // 1: substring with hyphen/underscore boundary
118
+ const paddedStem = "-" + fileStem + "-";
119
+ const paddedKey = "-" + key + "-";
120
+ if (paddedStem.includes(paddedKey))
121
+ return 1 + key.length / 100;
122
+ return 0;
123
+ }
124
+ function splitWordBoundaries(s) {
125
+ return s
126
+ .replace(/([a-z])([A-Z])/g, "$1-$2")
127
+ .toLowerCase()
128
+ .split(/[-_]/)
129
+ .filter((t) => t.length > 0);
130
+ }
@@ -1,14 +1,22 @@
1
1
  /**
2
2
  * File grouping module for Hem.
3
3
  *
4
- * Analyses discovered source files and groups them by feature vertical
5
- * (e.g., "user", "order") or architectural layer (e.g., controllers,
6
- * services). Each file appears in at most one group; feature grouping
7
- * takes priority over layer grouping.
4
+ * Analyses discovered source files and groups them in this priority order:
8
5
  *
9
- * Reference: FR-003, data-model.md lines 93-108.
6
+ * 1. Existing docs-folder priors folders already present in the
7
+ * destination directory anchor matching files (stable across runs).
8
+ * 2. Src top-level promotion — depth-1 source directories with ≥3 files
9
+ * become vertical groups, preserving parent-child nesting.
10
+ * 3. Legacy feature extraction — deepest non-layer directory as a label.
11
+ * 4. Architectural layer heuristic — suffix/dir pattern matching.
12
+ * 5. Import-graph connected components — clusters for files the above
13
+ * passes miss.
14
+ * 6. Catch-all "Other" group.
15
+ *
16
+ * Each file appears in exactly one group.
10
17
  */
11
18
  import type { FileInfo, FileGroup } from "./types.js";
19
+ import { type GroupingPriors } from "./grouping-priors.js";
12
20
  /**
13
21
  * Finds the common parent directory for a set of file paths.
14
22
  *
@@ -16,22 +24,21 @@ import type { FileInfo, FileGroup } from "./types.js";
16
24
  * @returns The common parent directory (relative path), or `"."` for root.
17
25
  */
18
26
  export declare function commonDirectory(files: FileInfo[]): string;
27
+ /** Optional inputs that steer deterministic grouping. */
28
+ export interface GroupingOptions {
29
+ /** Priors derived from the destination directory (if it exists). */
30
+ priors?: GroupingPriors;
31
+ /**
32
+ * Local-import adjacency map produced by `buildImportGraph`. When
33
+ * provided, files left unassigned after heuristic passes are clustered
34
+ * by connected components.
35
+ */
36
+ localEdges?: Map<string, string[]>;
37
+ }
19
38
  /**
20
- * Groups discovered files by feature vertical or architectural layer.
21
- *
22
- * Grouping strategy:
23
- * 1. Filter out binary files.
24
- * 2. Attempt to assign each file to a **feature vertical** group
25
- * based on its directory structure (e.g., files under `user/` →
26
- * "User" feature group).
27
- * 3. Files not assigned to a feature group are checked for
28
- * **architectural layer** membership based on file name suffixes
29
- * (e.g., `.controller.ts` → "Controllers" layer) or containing
30
- * directory (e.g., `services/` → "Services").
31
- * 4. Remaining files go into a catch-all "Other" group.
32
- * 5. Each file appears in at most one group.
39
+ * Groups discovered files. See module docstring for the priority order.
33
40
  *
34
- * @param files - Discovered files (may include binary files).
35
- * @returns Array of `FileGroup` objects.
41
+ * @param files - Discovered files (may include binary files).
42
+ * @param options - Optional priors and precomputed import graph.
36
43
  */
37
- export declare function groupFiles(files: FileInfo[]): FileGroup[];
44
+ export declare function groupFiles(files: FileInfo[], options?: GroupingOptions): FileGroup[];
package/dist/grouping.js CHANGED
@@ -1,15 +1,24 @@
1
1
  /**
2
2
  * File grouping module for Hem.
3
3
  *
4
- * Analyses discovered source files and groups them by feature vertical
5
- * (e.g., "user", "order") or architectural layer (e.g., controllers,
6
- * services). Each file appears in at most one group; feature grouping
7
- * takes priority over layer grouping.
4
+ * Analyses discovered source files and groups them in this priority order:
8
5
  *
9
- * Reference: FR-003, data-model.md lines 93-108.
6
+ * 1. Existing docs-folder priors folders already present in the
7
+ * destination directory anchor matching files (stable across runs).
8
+ * 2. Src top-level promotion — depth-1 source directories with ≥3 files
9
+ * become vertical groups, preserving parent-child nesting.
10
+ * 3. Legacy feature extraction — deepest non-layer directory as a label.
11
+ * 4. Architectural layer heuristic — suffix/dir pattern matching.
12
+ * 5. Import-graph connected components — clusters for files the above
13
+ * passes miss.
14
+ * 6. Catch-all "Other" group.
15
+ *
16
+ * Each file appears in exactly one group.
10
17
  */
11
18
  import { dirname } from "node:path";
12
19
  import { toKebabCase } from "./helpers/strings.js";
20
+ import { connectedComponents } from "./import-graph.js";
21
+ import { matchDocFolder, } from "./grouping-priors.js";
13
22
  // ── Layer detection ─────────────────────────────────────────────────────
14
23
  /**
15
24
  * Maps well-known file name suffixes to their architectural layer label.
@@ -216,99 +225,138 @@ function toDisplayLabel(name) {
216
225
  .replace(/\b\w/g, (ch) => ch.toUpperCase());
217
226
  }
218
227
  // ── Main ────────────────────────────────────────────────────────────────
228
+ /** Minimum files a top-level src directory needs before it's promoted. */
229
+ const TOP_LEVEL_PROMOTION_THRESHOLD = 3;
230
+ /** Minimum size of an import-graph connected component to become a group. */
231
+ const MIN_COMPONENT_SIZE = 2;
232
+ /** Components larger than this split along directory boundaries. */
233
+ const MAX_COMPONENT_SIZE = 6;
219
234
  /**
220
- * Groups discovered files by feature vertical or architectural layer.
221
- *
222
- * Grouping strategy:
223
- * 1. Filter out binary files.
224
- * 2. Attempt to assign each file to a **feature vertical** group
225
- * based on its directory structure (e.g., files under `user/` →
226
- * "User" feature group).
227
- * 3. Files not assigned to a feature group are checked for
228
- * **architectural layer** membership based on file name suffixes
229
- * (e.g., `.controller.ts` → "Controllers" layer) or containing
230
- * directory (e.g., `services/` → "Services").
231
- * 4. Remaining files go into a catch-all "Other" group.
232
- * 5. Each file appears in at most one group.
235
+ * Groups discovered files. See module docstring for the priority order.
233
236
  *
234
- * @param files - Discovered files (may include binary files).
235
- * @returns Array of `FileGroup` objects.
237
+ * @param files - Discovered files (may include binary files).
238
+ * @param options - Optional priors and precomputed import graph.
236
239
  */
237
- export function groupFiles(files) {
238
- // Step 1: filter to non-binary files only
240
+ export function groupFiles(files, options = {}) {
239
241
  const textFiles = files.filter((f) => !f.isBinary);
240
242
  if (textFiles.length === 0)
241
243
  return [];
242
- // Step 2: Build feature and layer buckets
244
+ const priors = options.priors?.docFolders ?? [];
245
+ const localEdges = options.localEdges;
246
+ /** Key → display label for vertical groups. */
247
+ const featureLabels = new Map();
248
+ /** Key → bucket of FileInfo for vertical groups. */
243
249
  const featureBuckets = new Map();
250
+ /** Keys that came from priors and should not be demoted. */
251
+ const pinnedKeys = new Set();
252
+ /** Layer buckets, keyed by display label. */
244
253
  const layerBuckets = new Map();
245
- const ungrouped = [];
246
- const assigned = new Set(); // track by relative path
247
- // First pass: try to assign every file to a feature vertical
254
+ /** Paths that already belong to a bucket. */
255
+ const assigned = new Set();
256
+ const addFeature = (key, label, file) => {
257
+ if (!featureBuckets.has(key)) {
258
+ featureBuckets.set(key, []);
259
+ featureLabels.set(key, label);
260
+ }
261
+ featureBuckets.get(key).push(file);
262
+ assigned.add(file.path);
263
+ };
264
+ // ── Pass 1: existing docs-folder priors ──
265
+ for (const file of textFiles) {
266
+ const match = matchDocFolder(file.path, priors);
267
+ if (!match)
268
+ continue;
269
+ addFeature(match.key, toDisplayLabel(match.name), file);
270
+ pinnedKeys.add(match.key);
271
+ }
272
+ // ── Pass 2: src top-level promotion (≥3 files) ──
273
+ const topLevelCounts = countTopLevelDirs(textFiles);
274
+ for (const file of textFiles) {
275
+ if (assigned.has(file.path))
276
+ continue;
277
+ const top = topLevelDir(file.path);
278
+ if (!top)
279
+ continue;
280
+ if (LAYER_DIRECTORIES.has(top.toLowerCase()))
281
+ continue;
282
+ const count = topLevelCounts.get(top) ?? 0;
283
+ if (count < TOP_LEVEL_PROMOTION_THRESHOLD)
284
+ continue;
285
+ addFeature(top.toLowerCase(), toDisplayLabel(top), file);
286
+ }
287
+ // ── Pass 3: legacy feature extraction (deepest non-layer dir) ──
248
288
  for (const file of textFiles) {
289
+ if (assigned.has(file.path))
290
+ continue;
249
291
  const feature = extractFeatureName(file.path);
250
- if (feature) {
251
- const key = feature.toLowerCase();
252
- if (!featureBuckets.has(key)) {
253
- featureBuckets.set(key, []);
254
- }
255
- featureBuckets.get(key).push(file);
256
- assigned.add(file.path);
257
- }
292
+ if (!feature)
293
+ continue;
294
+ addFeature(feature.toLowerCase(), toDisplayLabel(feature), file);
258
295
  }
259
- // Promote single-file features back to unassigned features with
260
- // only one file aren't meaningful groups by themselves. They'll get
261
- // a chance to be grouped by layer instead.
296
+ // Demote single-file feature buckets (unless pinned by a prior).
262
297
  for (const [key, bucket] of featureBuckets) {
298
+ if (pinnedKeys.has(key))
299
+ continue;
263
300
  if (bucket.length < 2) {
264
- for (const file of bucket) {
301
+ for (const file of bucket)
265
302
  assigned.delete(file.path);
266
- }
267
303
  featureBuckets.delete(key);
304
+ featureLabels.delete(key);
268
305
  }
269
306
  }
270
- // Second pass: unassigned files → try layer grouping
307
+ // ── Pass 4: architectural layers ──
271
308
  for (const file of textFiles) {
272
309
  if (assigned.has(file.path))
273
310
  continue;
274
311
  const layer = detectLayer(file.path) ?? detectLayerByDirectory(file.path);
275
- if (layer) {
276
- if (!layerBuckets.has(layer)) {
277
- layerBuckets.set(layer, []);
278
- }
279
- layerBuckets.get(layer).push(file);
280
- assigned.add(file.path);
281
- }
312
+ if (!layer)
313
+ continue;
314
+ if (!layerBuckets.has(layer))
315
+ layerBuckets.set(layer, []);
316
+ layerBuckets.get(layer).push(file);
317
+ assigned.add(file.path);
282
318
  }
283
- // Promote single-file layers back to ungrouped
319
+ // Demote single-file layer buckets.
284
320
  for (const [key, bucket] of layerBuckets) {
285
321
  if (bucket.length < 2) {
286
- for (const file of bucket) {
322
+ for (const file of bucket)
287
323
  assigned.delete(file.path);
288
- }
289
324
  layerBuckets.delete(key);
290
325
  }
291
326
  }
292
- // Third pass: remaining "Other"
293
- for (const file of textFiles) {
294
- if (!assigned.has(file.path)) {
295
- ungrouped.push(file);
327
+ // ── Pass 5: import-graph connected components ──
328
+ if (localEdges) {
329
+ const remaining = textFiles.filter((f) => !assigned.has(f.path));
330
+ const remainingPaths = remaining.map((f) => f.path);
331
+ const byPath = new Map(remaining.map((f) => [f.path, f]));
332
+ const components = connectedComponents(remainingPaths, localEdges);
333
+ const componentGroups = buildComponentGroups(components, byPath);
334
+ for (const cg of componentGroups) {
335
+ const key = cg.key;
336
+ if (!featureBuckets.has(key)) {
337
+ featureBuckets.set(key, []);
338
+ featureLabels.set(key, cg.label);
339
+ }
340
+ for (const file of cg.files) {
341
+ featureBuckets.get(key).push(file);
342
+ assigned.add(file.path);
343
+ }
296
344
  }
297
345
  }
298
- // Step 3: Convert buckets to FileGroup objects
346
+ // ── Pass 6: catch-all "Other" ──
347
+ const ungrouped = textFiles.filter((f) => !assigned.has(f.path));
348
+ // ── Build FileGroup objects ──
299
349
  const groups = [];
300
- // Feature (vertical) groups
301
350
  for (const [key, bucket] of featureBuckets) {
302
- const displayLabel = toDisplayLabel(key);
351
+ const label = featureLabels.get(key) ?? toDisplayLabel(key);
303
352
  groups.push({
304
- id: toKebabCase(displayLabel) + "-feature",
305
- label: displayLabel,
353
+ id: toKebabCase(label) + "-feature",
354
+ label,
306
355
  type: "vertical",
307
356
  files: bucket.sort((a, b) => a.path.localeCompare(b.path)),
308
357
  directory: commonDirectory(bucket),
309
358
  });
310
359
  }
311
- // Layer (horizontal) groups
312
360
  for (const [label, bucket] of layerBuckets) {
313
361
  groups.push({
314
362
  id: toKebabCase(label) + "-layer",
@@ -318,7 +366,6 @@ export function groupFiles(files) {
318
366
  directory: commonDirectory(bucket),
319
367
  });
320
368
  }
321
- // Catch-all "Other" group
322
369
  if (ungrouped.length > 0) {
323
370
  groups.push({
324
371
  id: "other",
@@ -328,11 +375,9 @@ export function groupFiles(files) {
328
375
  directory: commonDirectory(ungrouped),
329
376
  });
330
377
  }
331
- // Sort groups: verticals first, then horizontals, alphabetically within
332
378
  groups.sort((a, b) => {
333
379
  if (a.type !== b.type)
334
380
  return a.type === "vertical" ? -1 : 1;
335
- // "Other" always last within its type
336
381
  if (a.id === "other")
337
382
  return 1;
338
383
  if (b.id === "other")
@@ -341,3 +386,86 @@ export function groupFiles(files) {
341
386
  });
342
387
  return groups;
343
388
  }
389
+ // ── Internal pass helpers ───────────────────────────────────────────────
390
+ /**
391
+ * Returns the first path segment of a file's directory, or `null` when the
392
+ * file sits at the source root.
393
+ */
394
+ function topLevelDir(relativePath) {
395
+ const dir = dirname(relativePath);
396
+ if (dir === "." || dir === "")
397
+ return null;
398
+ const [head] = dir.split("/");
399
+ return head && head.length > 0 ? head : null;
400
+ }
401
+ function countTopLevelDirs(files) {
402
+ const counts = new Map();
403
+ for (const file of files) {
404
+ const top = topLevelDir(file.path);
405
+ if (!top)
406
+ continue;
407
+ counts.set(top, (counts.get(top) ?? 0) + 1);
408
+ }
409
+ return counts;
410
+ }
411
+ /**
412
+ * Turn connected components into groups suitable for the vertical buckets.
413
+ * Small components (2-6) stay intact; larger components bisect by
414
+ * directory prefix.
415
+ */
416
+ function buildComponentGroups(components, byPath) {
417
+ const out = [];
418
+ for (const component of components) {
419
+ if (component.length < MIN_COMPONENT_SIZE)
420
+ continue;
421
+ const files = component
422
+ .map((p) => byPath.get(p))
423
+ .filter((f) => Boolean(f));
424
+ if (files.length < MIN_COMPONENT_SIZE)
425
+ continue;
426
+ if (files.length <= MAX_COMPONENT_SIZE) {
427
+ out.push(componentToGroup(files));
428
+ continue;
429
+ }
430
+ for (const sub of bisectByDirectory(files)) {
431
+ if (sub.length < MIN_COMPONENT_SIZE)
432
+ continue;
433
+ out.push(componentToGroup(sub));
434
+ }
435
+ }
436
+ return out;
437
+ }
438
+ function componentToGroup(files) {
439
+ const commonDir = commonDirectory(files);
440
+ const basename = commonDir.split("/").filter((s) => s.length > 0).pop() ?? "cluster";
441
+ const label = toDisplayLabel(basename);
442
+ // Append a stable short hash of paths to avoid collisions with other buckets.
443
+ const key = basename.toLowerCase() + "-cluster";
444
+ return { key, label, files };
445
+ }
446
+ /**
447
+ * Split a large component into sub-groups by directory prefix. Files are
448
+ * bucketed by their first directory segment; singletons collapse back into
449
+ * a "mixed" bucket to avoid proliferation.
450
+ */
451
+ function bisectByDirectory(files) {
452
+ const byTop = new Map();
453
+ for (const file of files) {
454
+ const dir = dirname(file.path);
455
+ const top = dir === "." ? "" : dir.split("/")[0] ?? "";
456
+ if (!byTop.has(top))
457
+ byTop.set(top, []);
458
+ byTop.get(top).push(file);
459
+ }
460
+ const buckets = [];
461
+ const mixed = [];
462
+ for (const bucket of byTop.values()) {
463
+ if (bucket.length >= MIN_COMPONENT_SIZE)
464
+ buckets.push(bucket);
465
+ else
466
+ mixed.push(...bucket);
467
+ }
468
+ if (mixed.length >= MIN_COMPONENT_SIZE)
469
+ buckets.push(mixed);
470
+ return buckets;
471
+ }
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Import graph extraction for Hem.
3
+ *
4
+ * Produces two views of a file set, both built from regex-based import
5
+ * scanning (no AST / compiler API):
6
+ *
7
+ * - A **local** graph of resolved relative imports between known files,
8
+ * used by grouping for connected-component clustering.
9
+ * - An **external** map of bare-specifier imports per file (with line
10
+ * numbers), used by exploration to match packages against the
11
+ * integration catalog with file:line citations.
12
+ */
13
+ import type { FileInfo } from "./types.js";
14
+ /** One observation of an external import in a file. */
15
+ export interface ImportOccurrence {
16
+ /** The bare specifier as written (e.g., `"@azure/storage-blob"`, `"react"`). */
17
+ specifier: string;
18
+ /** 1-based line number where the import appears. */
19
+ line: number;
20
+ }
21
+ /** Two views of the import graph. */
22
+ export interface ImportAnalysis {
23
+ /**
24
+ * Undirected-friendly adjacency: file path → list of resolved local file
25
+ * paths it imports. Only includes edges where the target is a known file
26
+ * in the input set.
27
+ */
28
+ localEdges: Map<string, string[]>;
29
+ /**
30
+ * External imports per file (npm / PyPI / Go module / etc. bare
31
+ * specifiers). Each entry carries the original specifier and the line
32
+ * number it was found on so catalog hits can produce file:line citations.
33
+ */
34
+ externalImports: Map<string, ImportOccurrence[]>;
35
+ }
36
+ /**
37
+ * Build the import graph for a set of files. Files that fail to read are
38
+ * silently skipped (they contribute no edges). Reads run in parallel with
39
+ * a bounded concurrency to keep wall-clock time low on large projects.
40
+ */
41
+ export declare function buildImportGraph(files: FileInfo[]): Promise<ImportAnalysis>;
42
+ /**
43
+ * Union-find based connected-components over an undirected view of the
44
+ * local graph. `universe` is the set of files under consideration;
45
+ * isolated nodes appear as singleton components.
46
+ */
47
+ export declare function connectedComponents(universe: readonly string[], localEdges: Map<string, string[]>): string[][];
48
+ /**
49
+ * Compute fan-in (how many files import this file) and fan-out (how many
50
+ * files this file imports) for every node in `universe`.
51
+ */
52
+ export declare function computeDegrees(universe: readonly string[], localEdges: Map<string, string[]>): Map<string, {
53
+ fanIn: number;
54
+ fanOut: number;
55
+ }>;
56
+ /**
57
+ * Identify files participating in an import cycle. Uses iterative Tarjan's
58
+ * SCC; any SCC with ≥2 members or a self-loop marks its members as cyclic.
59
+ */
60
+ export declare function nodesInCycles(localEdges: Map<string, string[]>): Set<string>;
61
+ /**
62
+ * Yield every import specifier found in `content` along with the 1-based
63
+ * line number it appears on.
64
+ */
65
+ export declare function extractImports(content: string): Generator<{
66
+ specifier: string;
67
+ line: number;
68
+ }>;
69
+ /**
70
+ * Resolve a relative specifier from `fromFile` against the set of known
71
+ * file paths. Handles the common `.js` → `.ts`/`.tsx` mapping used in
72
+ * TypeScript sources.
73
+ */
74
+ export declare function resolveRelative(fromFile: string, specifier: string, knownFiles: ReadonlySet<string>): string | null;
@@ -0,0 +1,330 @@
1
+ /**
2
+ * Import graph extraction for Hem.
3
+ *
4
+ * Produces two views of a file set, both built from regex-based import
5
+ * scanning (no AST / compiler API):
6
+ *
7
+ * - A **local** graph of resolved relative imports between known files,
8
+ * used by grouping for connected-component clustering.
9
+ * - An **external** map of bare-specifier imports per file (with line
10
+ * numbers), used by exploration to match packages against the
11
+ * integration catalog with file:line citations.
12
+ */
13
+ import { readFile } from "node:fs/promises";
14
+ import pLimit from "p-limit";
15
+ /**
16
+ * Files larger than this byte count are skipped when building the import
17
+ * graph. Huge generated files (lockfiles, bundled output) rarely contain
18
+ * useful import edges and reading them can stall the pipeline for minutes.
19
+ */
20
+ const MAX_FILE_BYTES = 2 * 1024 * 1024; // 2 MB
21
+ /** Parallel file reads when building the graph. */
22
+ const READ_CONCURRENCY = 32;
23
+ // ── Regexes ─────────────────────────────────────────────────────────────
24
+ // Static: `import ... from "x"` or `export ... from "x"`
25
+ const STATIC_RE = /(?:import|export)\s+[^;'"`]*?\s+from\s+["']([^"']+)["']/g;
26
+ // Dynamic: `import("x")` or `require("x")`
27
+ const DYNAMIC_RE = /(?:import|require)\s*\(\s*["']([^"']+)["']\s*\)/g;
28
+ // ── Public API ──────────────────────────────────────────────────────────
29
+ /**
30
+ * Build the import graph for a set of files. Files that fail to read are
31
+ * silently skipped (they contribute no edges). Reads run in parallel with
32
+ * a bounded concurrency to keep wall-clock time low on large projects.
33
+ */
34
+ export async function buildImportGraph(files) {
35
+ const known = new Set(files.map((f) => f.path));
36
+ const localEdges = new Map();
37
+ const externalImports = new Map();
38
+ const limit = pLimit(READ_CONCURRENCY);
39
+ await Promise.all(files.map((file) => limit(async () => {
40
+ if (file.isBinary)
41
+ return;
42
+ if (file.size > MAX_FILE_BYTES)
43
+ return;
44
+ let content;
45
+ try {
46
+ content = await readFile(file.absolutePath, "utf-8");
47
+ }
48
+ catch {
49
+ return;
50
+ }
51
+ const local = [];
52
+ const external = [];
53
+ for (const { specifier, line } of extractImports(content)) {
54
+ if (isRelativeSpecifier(specifier)) {
55
+ const resolved = resolveRelative(file.path, specifier, known);
56
+ if (resolved && resolved !== file.path) {
57
+ local.push(resolved);
58
+ }
59
+ }
60
+ else if (isBareSpecifier(specifier)) {
61
+ external.push({ specifier, line });
62
+ }
63
+ // Absolute filesystem paths and URL-like specifiers are ignored.
64
+ }
65
+ if (local.length > 0) {
66
+ localEdges.set(file.path, dedupe(local));
67
+ }
68
+ if (external.length > 0) {
69
+ externalImports.set(file.path, external);
70
+ }
71
+ })));
72
+ return { localEdges, externalImports };
73
+ }
74
+ /**
75
+ * Union-find based connected-components over an undirected view of the
76
+ * local graph. `universe` is the set of files under consideration;
77
+ * isolated nodes appear as singleton components.
78
+ */
79
+ export function connectedComponents(universe, localEdges) {
80
+ const parent = new Map();
81
+ const rank = new Map();
82
+ for (const node of universe) {
83
+ parent.set(node, node);
84
+ rank.set(node, 0);
85
+ }
86
+ const find = (x) => {
87
+ let cur = x;
88
+ // Iterative path compression
89
+ while (parent.get(cur) !== cur) {
90
+ const p = parent.get(cur);
91
+ parent.set(cur, parent.get(p));
92
+ cur = parent.get(cur);
93
+ }
94
+ return cur;
95
+ };
96
+ const union = (a, b) => {
97
+ const ra = find(a);
98
+ const rb = find(b);
99
+ if (ra === rb)
100
+ return;
101
+ const rankA = rank.get(ra);
102
+ const rankB = rank.get(rb);
103
+ if (rankA < rankB) {
104
+ parent.set(ra, rb);
105
+ }
106
+ else if (rankA > rankB) {
107
+ parent.set(rb, ra);
108
+ }
109
+ else {
110
+ parent.set(rb, ra);
111
+ rank.set(ra, rankA + 1);
112
+ }
113
+ };
114
+ const inUniverse = new Set(universe);
115
+ for (const [from, tos] of localEdges) {
116
+ if (!inUniverse.has(from))
117
+ continue;
118
+ for (const to of tos) {
119
+ if (!inUniverse.has(to))
120
+ continue;
121
+ union(from, to);
122
+ }
123
+ }
124
+ const components = new Map();
125
+ for (const node of universe) {
126
+ const root = find(node);
127
+ if (!components.has(root))
128
+ components.set(root, []);
129
+ components.get(root).push(node);
130
+ }
131
+ return [...components.values()].map((c) => c.slice().sort((a, b) => a.localeCompare(b)));
132
+ }
133
+ /**
134
+ * Compute fan-in (how many files import this file) and fan-out (how many
135
+ * files this file imports) for every node in `universe`.
136
+ */
137
+ export function computeDegrees(universe, localEdges) {
138
+ const degrees = new Map();
139
+ for (const node of universe) {
140
+ degrees.set(node, { fanIn: 0, fanOut: 0 });
141
+ }
142
+ for (const [from, tos] of localEdges) {
143
+ const d = degrees.get(from);
144
+ if (!d)
145
+ continue;
146
+ d.fanOut = tos.length;
147
+ for (const to of tos) {
148
+ const td = degrees.get(to);
149
+ if (td)
150
+ td.fanIn += 1;
151
+ }
152
+ }
153
+ return degrees;
154
+ }
155
+ /**
156
+ * Identify files participating in an import cycle. Uses iterative Tarjan's
157
+ * SCC; any SCC with ≥2 members or a self-loop marks its members as cyclic.
158
+ */
159
+ export function nodesInCycles(localEdges) {
160
+ const index = new Map();
161
+ const lowlink = new Map();
162
+ const onStack = new Set();
163
+ const stack = [];
164
+ const result = new Set();
165
+ let counter = 0;
166
+ const nodes = new Set();
167
+ for (const [from, tos] of localEdges) {
168
+ nodes.add(from);
169
+ for (const to of tos)
170
+ nodes.add(to);
171
+ }
172
+ const strongConnect = (start) => {
173
+ const frames = [];
174
+ index.set(start, counter);
175
+ lowlink.set(start, counter);
176
+ counter++;
177
+ stack.push(start);
178
+ onStack.add(start);
179
+ frames.push({
180
+ node: start,
181
+ iter: (localEdges.get(start) ?? [])[Symbol.iterator](),
182
+ });
183
+ while (frames.length > 0) {
184
+ const frame = frames[frames.length - 1];
185
+ const next = frame.iter.next();
186
+ if (next.done) {
187
+ // Finished with frame.node — check if it's an SCC root.
188
+ if (lowlink.get(frame.node) === index.get(frame.node)) {
189
+ const component = [];
190
+ let w;
191
+ do {
192
+ w = stack.pop();
193
+ onStack.delete(w);
194
+ component.push(w);
195
+ } while (w !== frame.node);
196
+ const neighbours = localEdges.get(frame.node) ?? [];
197
+ const hasSelfLoop = neighbours.includes(frame.node);
198
+ if (component.length >= 2 || hasSelfLoop) {
199
+ for (const m of component)
200
+ result.add(m);
201
+ }
202
+ }
203
+ frames.pop();
204
+ // Propagate lowlink to parent.
205
+ if (frames.length > 0) {
206
+ const parentFrame = frames[frames.length - 1];
207
+ const pl = lowlink.get(parentFrame.node);
208
+ const cl = lowlink.get(frame.node);
209
+ if (cl < pl)
210
+ lowlink.set(parentFrame.node, cl);
211
+ }
212
+ continue;
213
+ }
214
+ const w = next.value;
215
+ if (!index.has(w)) {
216
+ index.set(w, counter);
217
+ lowlink.set(w, counter);
218
+ counter++;
219
+ stack.push(w);
220
+ onStack.add(w);
221
+ frames.push({ node: w, iter: (localEdges.get(w) ?? [])[Symbol.iterator]() });
222
+ }
223
+ else if (onStack.has(w)) {
224
+ const cur = lowlink.get(frame.node);
225
+ const wIndex = index.get(w);
226
+ if (wIndex < cur)
227
+ lowlink.set(frame.node, wIndex);
228
+ }
229
+ }
230
+ };
231
+ for (const node of nodes) {
232
+ if (!index.has(node))
233
+ strongConnect(node);
234
+ }
235
+ return result;
236
+ }
237
+ // ── Internal helpers ────────────────────────────────────────────────────
238
+ /**
239
+ * Yield every import specifier found in `content` along with the 1-based
240
+ * line number it appears on.
241
+ */
242
+ export function* extractImports(content) {
243
+ for (const re of [STATIC_RE, DYNAMIC_RE]) {
244
+ // Fresh state per content call.
245
+ re.lastIndex = 0;
246
+ let match;
247
+ while ((match = re.exec(content)) !== null) {
248
+ const specifier = match[1];
249
+ const line = lineNumberAt(content, match.index);
250
+ yield { specifier, line };
251
+ }
252
+ }
253
+ }
254
+ function lineNumberAt(content, offset) {
255
+ let line = 1;
256
+ for (let i = 0; i < offset && i < content.length; i++) {
257
+ if (content.charCodeAt(i) === 10 /* \n */)
258
+ line++;
259
+ }
260
+ return line;
261
+ }
262
+ function isRelativeSpecifier(spec) {
263
+ return spec.startsWith("./") || spec.startsWith("../") || spec === "." || spec === "..";
264
+ }
265
+ /**
266
+ * A bare specifier is an npm/Go/PyPI-style package reference: not relative,
267
+ * not an absolute filesystem path, not a URL, not an alias starting with
268
+ * `@/` or `~/`.
269
+ */
270
+ function isBareSpecifier(spec) {
271
+ if (spec.length === 0)
272
+ return false;
273
+ if (isRelativeSpecifier(spec))
274
+ return false;
275
+ if (spec.startsWith("/"))
276
+ return false;
277
+ if (spec.startsWith("~"))
278
+ return false;
279
+ if (/^[a-z][a-z0-9+.-]*:\/\//i.test(spec))
280
+ return false; // URL
281
+ // `@/foo` bundler aliases look like scoped packages; disambiguate by
282
+ // requiring a scoped package to contain a slash after the scope
283
+ // (`@scope/pkg`) while plain `@/alias` does not.
284
+ if (spec.startsWith("@") && !spec.slice(1).includes("/"))
285
+ return false;
286
+ return true;
287
+ }
288
+ /**
289
+ * Resolve a relative specifier from `fromFile` against the set of known
290
+ * file paths. Handles the common `.js` → `.ts`/`.tsx` mapping used in
291
+ * TypeScript sources.
292
+ */
293
+ export function resolveRelative(fromFile, specifier, knownFiles) {
294
+ const parts = fromFile.split("/");
295
+ parts.pop();
296
+ const dir = parts.join("/");
297
+ const segments = (dir ? dir + "/" + specifier : specifier).split("/");
298
+ const resolved = [];
299
+ for (const seg of segments) {
300
+ if (seg === "." || seg === "")
301
+ continue;
302
+ if (seg === "..") {
303
+ resolved.pop();
304
+ }
305
+ else {
306
+ resolved.push(seg);
307
+ }
308
+ }
309
+ const resolvedPath = resolved.join("/");
310
+ if (knownFiles.has(resolvedPath))
311
+ return resolvedPath;
312
+ const withoutJs = resolvedPath.replace(/\.js$/, "");
313
+ if (knownFiles.has(withoutJs + ".ts"))
314
+ return withoutJs + ".ts";
315
+ if (knownFiles.has(withoutJs + ".tsx"))
316
+ return withoutJs + ".tsx";
317
+ if (knownFiles.has(resolvedPath + ".ts"))
318
+ return resolvedPath + ".ts";
319
+ if (knownFiles.has(resolvedPath + ".tsx"))
320
+ return resolvedPath + ".tsx";
321
+ // Directory imports — try `index.ts` / `index.tsx`.
322
+ if (knownFiles.has(resolvedPath + "/index.ts"))
323
+ return resolvedPath + "/index.ts";
324
+ if (knownFiles.has(resolvedPath + "/index.tsx"))
325
+ return resolvedPath + "/index.tsx";
326
+ return null;
327
+ }
328
+ function dedupe(xs) {
329
+ return [...new Set(xs)];
330
+ }
package/dist/index.js CHANGED
@@ -33,7 +33,8 @@ import { createOpencode } from "@opencode-ai/sdk";
33
33
  import { findFreePort, trackServer, untrackServer, startWithRetry } from "./server-utils.js";
34
34
  import { discoverFiles, detectProjectName } from "./discovery.js";
35
35
  import { groupFiles } from "./grouping.js";
36
- import { GroupingAgent } from "./agents/grouping-agent.js";
36
+ import { buildImportGraph } from "./import-graph.js";
37
+ import { loadGroupingPriors } from "./grouping-priors.js";
37
38
  import { DocumentationAgent } from "./agents/documentation-agent.js";
38
39
  import { ArchitectureAgent } from "./agents/architecture-agent.js";
39
40
  import { IndexAgent } from "./agents/index-agent.js";
@@ -511,15 +512,27 @@ export async function handleGenerate(opts, deps = defaultDeps) {
511
512
  await waitUntilExit();
512
513
  return null;
513
514
  }
514
- // ── Step 10c: Grouping phase (LLM with heuristic fallback) ─────────
515
- const groupingAgent = new GroupingAgent(provider, projectName);
516
- let groups = await groupingAgent.run(textFiles, verboseLog, PROJECT_CONFIG_DIR);
517
- if (!groups) {
518
- if (cliOptions.verbose) {
519
- verboseLog(`[grouping] LLM grouping unavailable, using heuristic fallback`);
520
- }
521
- groups = deps.groupFiles(textFiles);
515
+ // ── Step 10c: Deterministic grouping (priors + import graph) ──────
516
+ const priors = await loadGroupingPriors(absoluteDestination);
517
+ if (cliOptions.verbose && priors.docFolders.length > 0) {
518
+ verboseLog(`[grouping] ${priors.docFolders.length} doc-folder prior(s): ${priors.docFolders
519
+ .map((p) => p.name)
520
+ .join(", ")}`);
521
+ }
522
+ if (cliOptions.verbose) {
523
+ verboseLog(`[grouping] building import graph from ${textFiles.length} files...`);
522
524
  }
525
+ const importGraphStart = Date.now();
526
+ const importAnalysis = await buildImportGraph(textFiles);
527
+ if (cliOptions.verbose) {
528
+ const elapsed = ((Date.now() - importGraphStart) / 1000).toFixed(1);
529
+ verboseLog(`[grouping] import graph built in ${elapsed}s: ${importAnalysis.localEdges.size} files with local edges, ` +
530
+ `${importAnalysis.externalImports.size} with external imports`);
531
+ }
532
+ const groups = deps.groupFiles(textFiles, {
533
+ priors,
534
+ localEdges: importAnalysis.localEdges,
535
+ });
523
536
  const featureGroups = groups.filter((g) => g.type === "vertical").length;
524
537
  const layerGroups = groups.filter((g) => g.type === "horizontal").length;
525
538
  if (cliOptions.verbose) {
@@ -16,6 +16,7 @@
16
16
  *
17
17
  * Reference: FR-005, FR-006, FR-007.
18
18
  */
19
+ import { type LimitFunction } from "p-limit";
19
20
  import type { ModelSelection, CLIOptions, FileGroup, GenerationContext, GenerationResult, ProgressState, ExplorationFindings } from "./types.js";
20
21
  import { createOpencode } from "@opencode-ai/sdk";
21
22
  import { DocumentationAgent } from "./agents/documentation-agent.js";
@@ -142,9 +143,15 @@ export declare function filterRelevantDocs(allDocs: Array<{
142
143
  * @param onGroupComplete - Optional callback invoked each time a group's
143
144
  * exploration finishes successfully. Used by the
144
145
  * streaming pipeline to launch doc agents eagerly.
146
+ * @param sharedLimit - Optional p-limit instance to share with the
147
+ * downstream documentation phase. When omitted, a
148
+ * local instance sized to `computeMaxConcurrency`
149
+ * is created. Passing a shared limit is how the
150
+ * real pipeline prevents exploration + generation
151
+ * from each spawning their own resource budget.
145
152
  * @returns All successful `ExplorationFindings[]`.
146
153
  */
147
- export declare function runExploration(explorationAgent: ExplorationAgent, groups: FileGroup[], options: CLIOptions, onProgress: (state: Partial<ProgressState>) => void, onGroupComplete?: (groupId: string, findings: ExplorationFindings) => void): Promise<ExplorationFindings[]>;
154
+ export declare function runExploration(explorationAgent: ExplorationAgent, groups: FileGroup[], options: CLIOptions, onProgress: (state: Partial<ProgressState>) => void, onGroupComplete?: (groupId: string, findings: ExplorationFindings) => void, sharedLimit?: LimitFunction): Promise<ExplorationFindings[]>;
148
155
  /**
149
156
  * Runs the doc agent for a single file group.
150
157
  *
@@ -390,9 +390,15 @@ async function resolveRelevantDocs(searchIndex, destinationPath, existingDocs, g
390
390
  * @param onGroupComplete - Optional callback invoked each time a group's
391
391
  * exploration finishes successfully. Used by the
392
392
  * streaming pipeline to launch doc agents eagerly.
393
+ * @param sharedLimit - Optional p-limit instance to share with the
394
+ * downstream documentation phase. When omitted, a
395
+ * local instance sized to `computeMaxConcurrency`
396
+ * is created. Passing a shared limit is how the
397
+ * real pipeline prevents exploration + generation
398
+ * from each spawning their own resource budget.
393
399
  * @returns All successful `ExplorationFindings[]`.
394
400
  */
395
- export async function runExploration(explorationAgent, groups, options, onProgress, onGroupComplete) {
401
+ export async function runExploration(explorationAgent, groups, options, onProgress, onGroupComplete, sharedLimit) {
396
402
  const sourceRoot = resolve(options.source);
397
403
  const verbose = options.verbose
398
404
  ? (msg) => {
@@ -403,7 +409,8 @@ export async function runExploration(explorationAgent, groups, options, onProgre
403
409
  const effectiveConcurrency = computeMaxConcurrency(options.concurrency);
404
410
  if (verbose) {
405
411
  verbose(`[orchestrator] Resource limits: ${describeResourceLimits(options.concurrency)}`);
406
- verbose(`[orchestrator] Starting exploration: ${groups.length} groups, concurrency=${effectiveConcurrency}`);
412
+ verbose(`[orchestrator] Starting exploration: ${groups.length} groups, concurrency=${effectiveConcurrency}` +
413
+ (sharedLimit ? " (shared with generation)" : ""));
407
414
  }
408
415
  // Build allGroups summary for cross-group awareness
409
416
  const allGroups = groups.map((group) => ({
@@ -429,7 +436,7 @@ export async function runExploration(explorationAgent, groups, options, onProgre
429
436
  phase: "exploration",
430
437
  explorationStatuses: [...explorationStatuses],
431
438
  });
432
- const limit = pLimit(effectiveConcurrency);
439
+ const limit = sharedLimit ?? pLimit(effectiveConcurrency);
433
440
  // ── Single-agent path (existing behavior, totalFiles < threshold) ──
434
441
  if (!isMultiAgent) {
435
442
  const results = await Promise.allSettled(groups.map((group, i) => limit(async () => {
@@ -756,6 +763,14 @@ export async function generateDocumentation(agent, groups, options, onProgress,
756
763
  }
757
764
  // Accumulated findings — grows as explorations complete.
758
765
  const allFindings = [];
766
+ // ── Shared concurrency limit ─────────────────────────────────────
767
+ // Exploration and documentation run concurrently via the streaming
768
+ // gate pattern below. Without a shared p-limit each phase would spin
769
+ // up its own resource budget, doubling the effective concurrency and
770
+ // OOMing Node on large projects. One semaphore spans both phases so
771
+ // `computeMaxConcurrency` actually caps total in-flight LLM sessions.
772
+ const sharedConcurrency = computeMaxConcurrency(options.concurrency);
773
+ const sharedLimit = pLimit(sharedConcurrency);
759
774
  // ── Launch exploration + existingDocs scan in parallel ───────────
760
775
  let explorationPromise;
761
776
  let existingDocsPromise;
@@ -768,7 +783,7 @@ export async function generateDocumentation(agent, groups, options, onProgress,
768
783
  (groupId, findings) => {
769
784
  allFindings.push(findings);
770
785
  groupGates.get(groupId)?.resolve(findings);
771
- });
786
+ }, sharedLimit);
772
787
  // When exploration fully settles, resolve any remaining gates for groups
773
788
  // whose exploration failed so their doc agents can proceed without findings.
774
789
  // On AuthExpiredError, reject all remaining gates to abort doc agents.
@@ -842,16 +857,15 @@ export async function generateDocumentation(agent, groups, options, onProgress,
842
857
  completedSessions: 0,
843
858
  failedSessions: 0,
844
859
  });
845
- const docConcurrency = computeMaxConcurrency(options.concurrency);
846
860
  // ── Multi-agent documentation detection ──────────────────────────
847
861
  const totalFiles = groups.reduce((sum, g) => sum + g.files.length, 0);
848
862
  const docAgentsPerGroup = computeAgentsPerGroup(totalFiles);
849
863
  const isMultiAgentDoc = docAgentsPerGroup > 1;
850
864
  if (verbose) {
851
- verbose(`[orchestrator] Starting documentation: ${groups.length} groups, concurrency=${docConcurrency}` +
865
+ verbose(`[orchestrator] Starting documentation: ${groups.length} groups, concurrency=${sharedConcurrency} (shared with exploration)` +
852
866
  (isMultiAgentDoc ? `, multi-agent=${docAgentsPerGroup} agents/group` : ""));
853
867
  }
854
- const limit = pLimit(docConcurrency);
868
+ const limit = sharedLimit;
855
869
  const results = await Promise.allSettled(groups.map((group, i) => {
856
870
  if (!isMultiAgentDoc) {
857
871
  // ── Single-agent path (existing behavior) ──────────────────
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pruddiman/hem",
3
- "version": "0.0.1-beta-5671db0",
3
+ "version": "0.0.1-beta-72c22cf",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "hem": "./dist/index.js"