@pruddiman/hem 0.0.1-beta-5671db0 → 0.0.1-beta-72c22cf
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/grouping-priors.d.ts +54 -0
- package/dist/grouping-priors.js +130 -0
- package/dist/grouping.d.ts +28 -21
- package/dist/grouping.js +191 -63
- package/dist/import-graph.d.ts +74 -0
- package/dist/import-graph.js +330 -0
- package/dist/index.js +22 -9
- package/dist/orchestrator.d.ts +8 -1
- package/dist/orchestrator.js +21 -7
- package/package.json +1 -1
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structural priors for the deterministic grouping phase.
|
|
3
|
+
*
|
|
4
|
+
* The grouping pipeline respects two priors before falling back to derived
|
|
5
|
+
* clustering:
|
|
6
|
+
*
|
|
7
|
+
* 1. **Existing docs-folder scaffolding** — top-level subfolders of the
|
|
8
|
+
* destination directory (e.g. `docs/auth/`, `docs/api/`) become
|
|
9
|
+
* candidate group labels. Source files flow into the matching bucket.
|
|
10
|
+
* 2. **Src top-level structure** — depth-1 directories in the source tree
|
|
11
|
+
* that contain enough files are promoted as group labels, preserving
|
|
12
|
+
* parent-child nesting.
|
|
13
|
+
*
|
|
14
|
+
* This module handles prior #1 (destination scan) and exports matching
|
|
15
|
+
* utilities that `src/grouping.ts` uses for both priors.
|
|
16
|
+
*/
|
|
17
|
+
/** A candidate group label derived from an existing docs-folder. */
|
|
18
|
+
export interface DocFolderPrior {
|
|
19
|
+
/** Folder name as it appears on disk (display label, original casing). */
|
|
20
|
+
name: string;
|
|
21
|
+
/** Lowercased key used for matching source files. */
|
|
22
|
+
key: string;
|
|
23
|
+
}
|
|
24
|
+
/** Collected priors from the destination directory. */
|
|
25
|
+
export interface GroupingPriors {
|
|
26
|
+
/**
|
|
27
|
+
* Top-level doc subfolders that should anchor grouping. Empty when the
|
|
28
|
+
* destination doesn't exist yet or has no subfolders.
|
|
29
|
+
*/
|
|
30
|
+
docFolders: DocFolderPrior[];
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Scan the destination directory for top-level subfolders and return them
|
|
34
|
+
* as grouping priors. When the destination doesn't exist or contains no
|
|
35
|
+
* subdirectories, returns empty priors.
|
|
36
|
+
*
|
|
37
|
+
* Folders prefixed with `.` or `_` are ignored (treated as metadata).
|
|
38
|
+
*/
|
|
39
|
+
export declare function loadGroupingPriors(destinationPath: string): Promise<GroupingPriors>;
|
|
40
|
+
/**
|
|
41
|
+
* Test whether a file should be assigned to a doc-folder prior.
|
|
42
|
+
*
|
|
43
|
+
* Three match modes (any one counts as a hit):
|
|
44
|
+
* 1. The folder key appears as a directory segment in the file's path.
|
|
45
|
+
* 2. The file's stem contains the folder key as a word-boundary token.
|
|
46
|
+
* 3. The file's deepest common directory matches the folder key.
|
|
47
|
+
*/
|
|
48
|
+
export declare function matchDocFolder(filePath: string, priors: readonly DocFolderPrior[]): DocFolderPrior | null;
|
|
49
|
+
/**
|
|
50
|
+
* Normalize a folder name to a matching key. Lowercases and collapses
|
|
51
|
+
* underscores/spaces to hyphens so `User_Auth/` and `user-auth/` share a
|
|
52
|
+
* key.
|
|
53
|
+
*/
|
|
54
|
+
export declare function normalizeFolderKey(name: string): string;
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structural priors for the deterministic grouping phase.
|
|
3
|
+
*
|
|
4
|
+
* The grouping pipeline respects two priors before falling back to derived
|
|
5
|
+
* clustering:
|
|
6
|
+
*
|
|
7
|
+
* 1. **Existing docs-folder scaffolding** — top-level subfolders of the
|
|
8
|
+
* destination directory (e.g. `docs/auth/`, `docs/api/`) become
|
|
9
|
+
* candidate group labels. Source files flow into the matching bucket.
|
|
10
|
+
* 2. **Src top-level structure** — depth-1 directories in the source tree
|
|
11
|
+
* that contain enough files are promoted as group labels, preserving
|
|
12
|
+
* parent-child nesting.
|
|
13
|
+
*
|
|
14
|
+
* This module handles prior #1 (destination scan) and exports matching
|
|
15
|
+
* utilities that `src/grouping.ts` uses for both priors.
|
|
16
|
+
*/
|
|
17
|
+
import { readdir, stat } from "node:fs/promises";
|
|
18
|
+
import { join } from "node:path";
|
|
19
|
+
// ── Public API ──────────────────────────────────────────────────────────
|
|
20
|
+
/**
|
|
21
|
+
* Scan the destination directory for top-level subfolders and return them
|
|
22
|
+
* as grouping priors. When the destination doesn't exist or contains no
|
|
23
|
+
* subdirectories, returns empty priors.
|
|
24
|
+
*
|
|
25
|
+
* Folders prefixed with `.` or `_` are ignored (treated as metadata).
|
|
26
|
+
*/
|
|
27
|
+
export async function loadGroupingPriors(destinationPath) {
|
|
28
|
+
let entries;
|
|
29
|
+
try {
|
|
30
|
+
entries = await readdir(destinationPath);
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return { docFolders: [] };
|
|
34
|
+
}
|
|
35
|
+
const docFolders = [];
|
|
36
|
+
for (const entry of entries) {
|
|
37
|
+
if (entry.startsWith(".") || entry.startsWith("_"))
|
|
38
|
+
continue;
|
|
39
|
+
const entryPath = join(destinationPath, entry);
|
|
40
|
+
let stats;
|
|
41
|
+
try {
|
|
42
|
+
stats = await stat(entryPath);
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
if (!stats.isDirectory())
|
|
48
|
+
continue;
|
|
49
|
+
docFolders.push({ name: entry, key: normalizeFolderKey(entry) });
|
|
50
|
+
}
|
|
51
|
+
// Stable ordering for reproducible runs.
|
|
52
|
+
docFolders.sort((a, b) => a.key.localeCompare(b.key));
|
|
53
|
+
return { docFolders };
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Test whether a file should be assigned to a doc-folder prior.
|
|
57
|
+
*
|
|
58
|
+
* Three match modes (any one counts as a hit):
|
|
59
|
+
* 1. The folder key appears as a directory segment in the file's path.
|
|
60
|
+
* 2. The file's stem contains the folder key as a word-boundary token.
|
|
61
|
+
* 3. The file's deepest common directory matches the folder key.
|
|
62
|
+
*/
|
|
63
|
+
export function matchDocFolder(filePath, priors) {
|
|
64
|
+
if (priors.length === 0)
|
|
65
|
+
return null;
|
|
66
|
+
const segments = filePath.split("/").map((s) => s.toLowerCase());
|
|
67
|
+
const fileStem = extractStem(segments[segments.length - 1] ?? "");
|
|
68
|
+
// Prefer the prior with the longest key match to avoid `auth` stealing
|
|
69
|
+
// files that should go to `authentication`.
|
|
70
|
+
let best = null;
|
|
71
|
+
for (const prior of priors) {
|
|
72
|
+
const score = scoreMatch(segments, fileStem, prior.key);
|
|
73
|
+
if (score > 0 && (!best || score > best.score)) {
|
|
74
|
+
best = { prior, score };
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return best?.prior ?? null;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Normalize a folder name to a matching key. Lowercases and collapses
|
|
81
|
+
* underscores/spaces to hyphens so `User_Auth/` and `user-auth/` share a
|
|
82
|
+
* key.
|
|
83
|
+
*/
|
|
84
|
+
export function normalizeFolderKey(name) {
|
|
85
|
+
return name
|
|
86
|
+
.toLowerCase()
|
|
87
|
+
.replace(/[\s_]+/g, "-")
|
|
88
|
+
.replace(/-+/g, "-")
|
|
89
|
+
.replace(/^-|-$/g, "");
|
|
90
|
+
}
|
|
91
|
+
// ── Internal helpers ────────────────────────────────────────────────────
|
|
92
|
+
function extractStem(basename) {
|
|
93
|
+
const dotIndex = basename.indexOf(".");
|
|
94
|
+
return (dotIndex === -1 ? basename : basename.substring(0, dotIndex)).toLowerCase();
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Scoring:
|
|
98
|
+
* - 3 for a directory-segment match (strongest signal).
|
|
99
|
+
* - 2 for a word-boundary match in the file stem.
|
|
100
|
+
* - 1 for a loose prefix/suffix/containment match in the stem.
|
|
101
|
+
* - 0 for no match.
|
|
102
|
+
*
|
|
103
|
+
* Longer keys win on ties (handled by caller).
|
|
104
|
+
*/
|
|
105
|
+
function scoreMatch(segments, fileStem, key) {
|
|
106
|
+
if (key.length === 0)
|
|
107
|
+
return 0;
|
|
108
|
+
// 3: directory segment
|
|
109
|
+
for (let i = 0; i < segments.length - 1; i++) {
|
|
110
|
+
if (segments[i] === key)
|
|
111
|
+
return 3 + key.length / 100;
|
|
112
|
+
}
|
|
113
|
+
// 2: word-boundary in stem
|
|
114
|
+
const tokens = splitWordBoundaries(fileStem);
|
|
115
|
+
if (tokens.includes(key))
|
|
116
|
+
return 2 + key.length / 100;
|
|
117
|
+
// 1: substring with hyphen/underscore boundary
|
|
118
|
+
const paddedStem = "-" + fileStem + "-";
|
|
119
|
+
const paddedKey = "-" + key + "-";
|
|
120
|
+
if (paddedStem.includes(paddedKey))
|
|
121
|
+
return 1 + key.length / 100;
|
|
122
|
+
return 0;
|
|
123
|
+
}
|
|
124
|
+
function splitWordBoundaries(s) {
|
|
125
|
+
return s
|
|
126
|
+
.replace(/([a-z])([A-Z])/g, "$1-$2")
|
|
127
|
+
.toLowerCase()
|
|
128
|
+
.split(/[-_]/)
|
|
129
|
+
.filter((t) => t.length > 0);
|
|
130
|
+
}
|
package/dist/grouping.d.ts
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* File grouping module for Hem.
|
|
3
3
|
*
|
|
4
|
-
* Analyses discovered source files and groups them
|
|
5
|
-
* (e.g., "user", "order") or architectural layer (e.g., controllers,
|
|
6
|
-
* services). Each file appears in at most one group; feature grouping
|
|
7
|
-
* takes priority over layer grouping.
|
|
4
|
+
* Analyses discovered source files and groups them in this priority order:
|
|
8
5
|
*
|
|
9
|
-
*
|
|
6
|
+
* 1. Existing docs-folder priors — folders already present in the
|
|
7
|
+
* destination directory anchor matching files (stable across runs).
|
|
8
|
+
* 2. Src top-level promotion — depth-1 source directories with ≥3 files
|
|
9
|
+
* become vertical groups, preserving parent-child nesting.
|
|
10
|
+
* 3. Legacy feature extraction — deepest non-layer directory as a label.
|
|
11
|
+
* 4. Architectural layer heuristic — suffix/dir pattern matching.
|
|
12
|
+
* 5. Import-graph connected components — clusters for files the above
|
|
13
|
+
* passes miss.
|
|
14
|
+
* 6. Catch-all "Other" group.
|
|
15
|
+
*
|
|
16
|
+
* Each file appears in exactly one group.
|
|
10
17
|
*/
|
|
11
18
|
import type { FileInfo, FileGroup } from "./types.js";
|
|
19
|
+
import { type GroupingPriors } from "./grouping-priors.js";
|
|
12
20
|
/**
|
|
13
21
|
* Finds the common parent directory for a set of file paths.
|
|
14
22
|
*
|
|
@@ -16,22 +24,21 @@ import type { FileInfo, FileGroup } from "./types.js";
|
|
|
16
24
|
* @returns The common parent directory (relative path), or `"."` for root.
|
|
17
25
|
*/
|
|
18
26
|
export declare function commonDirectory(files: FileInfo[]): string;
|
|
27
|
+
/** Optional inputs that steer deterministic grouping. */
|
|
28
|
+
export interface GroupingOptions {
|
|
29
|
+
/** Priors derived from the destination directory (if it exists). */
|
|
30
|
+
priors?: GroupingPriors;
|
|
31
|
+
/**
|
|
32
|
+
* Local-import adjacency map produced by `buildImportGraph`. When
|
|
33
|
+
* provided, files left unassigned after heuristic passes are clustered
|
|
34
|
+
* by connected components.
|
|
35
|
+
*/
|
|
36
|
+
localEdges?: Map<string, string[]>;
|
|
37
|
+
}
|
|
19
38
|
/**
|
|
20
|
-
* Groups discovered files
|
|
21
|
-
*
|
|
22
|
-
* Grouping strategy:
|
|
23
|
-
* 1. Filter out binary files.
|
|
24
|
-
* 2. Attempt to assign each file to a **feature vertical** group
|
|
25
|
-
* based on its directory structure (e.g., files under `user/` →
|
|
26
|
-
* "User" feature group).
|
|
27
|
-
* 3. Files not assigned to a feature group are checked for
|
|
28
|
-
* **architectural layer** membership based on file name suffixes
|
|
29
|
-
* (e.g., `.controller.ts` → "Controllers" layer) or containing
|
|
30
|
-
* directory (e.g., `services/` → "Services").
|
|
31
|
-
* 4. Remaining files go into a catch-all "Other" group.
|
|
32
|
-
* 5. Each file appears in at most one group.
|
|
39
|
+
* Groups discovered files. See module docstring for the priority order.
|
|
33
40
|
*
|
|
34
|
-
* @param files
|
|
35
|
-
* @
|
|
41
|
+
* @param files - Discovered files (may include binary files).
|
|
42
|
+
* @param options - Optional priors and precomputed import graph.
|
|
36
43
|
*/
|
|
37
|
-
export declare function groupFiles(files: FileInfo[]): FileGroup[];
|
|
44
|
+
export declare function groupFiles(files: FileInfo[], options?: GroupingOptions): FileGroup[];
|
package/dist/grouping.js
CHANGED
|
@@ -1,15 +1,24 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* File grouping module for Hem.
|
|
3
3
|
*
|
|
4
|
-
* Analyses discovered source files and groups them
|
|
5
|
-
* (e.g., "user", "order") or architectural layer (e.g., controllers,
|
|
6
|
-
* services). Each file appears in at most one group; feature grouping
|
|
7
|
-
* takes priority over layer grouping.
|
|
4
|
+
* Analyses discovered source files and groups them in this priority order:
|
|
8
5
|
*
|
|
9
|
-
*
|
|
6
|
+
* 1. Existing docs-folder priors — folders already present in the
|
|
7
|
+
* destination directory anchor matching files (stable across runs).
|
|
8
|
+
* 2. Src top-level promotion — depth-1 source directories with ≥3 files
|
|
9
|
+
* become vertical groups, preserving parent-child nesting.
|
|
10
|
+
* 3. Legacy feature extraction — deepest non-layer directory as a label.
|
|
11
|
+
* 4. Architectural layer heuristic — suffix/dir pattern matching.
|
|
12
|
+
* 5. Import-graph connected components — clusters for files the above
|
|
13
|
+
* passes miss.
|
|
14
|
+
* 6. Catch-all "Other" group.
|
|
15
|
+
*
|
|
16
|
+
* Each file appears in exactly one group.
|
|
10
17
|
*/
|
|
11
18
|
import { dirname } from "node:path";
|
|
12
19
|
import { toKebabCase } from "./helpers/strings.js";
|
|
20
|
+
import { connectedComponents } from "./import-graph.js";
|
|
21
|
+
import { matchDocFolder, } from "./grouping-priors.js";
|
|
13
22
|
// ── Layer detection ─────────────────────────────────────────────────────
|
|
14
23
|
/**
|
|
15
24
|
* Maps well-known file name suffixes to their architectural layer label.
|
|
@@ -216,99 +225,138 @@ function toDisplayLabel(name) {
|
|
|
216
225
|
.replace(/\b\w/g, (ch) => ch.toUpperCase());
|
|
217
226
|
}
|
|
218
227
|
// ── Main ────────────────────────────────────────────────────────────────
|
|
228
|
+
/** Minimum files a top-level src directory needs before it's promoted. */
|
|
229
|
+
const TOP_LEVEL_PROMOTION_THRESHOLD = 3;
|
|
230
|
+
/** Minimum size of an import-graph connected component to become a group. */
|
|
231
|
+
const MIN_COMPONENT_SIZE = 2;
|
|
232
|
+
/** Components larger than this split along directory boundaries. */
|
|
233
|
+
const MAX_COMPONENT_SIZE = 6;
|
|
219
234
|
/**
|
|
220
|
-
* Groups discovered files
|
|
221
|
-
*
|
|
222
|
-
* Grouping strategy:
|
|
223
|
-
* 1. Filter out binary files.
|
|
224
|
-
* 2. Attempt to assign each file to a **feature vertical** group
|
|
225
|
-
* based on its directory structure (e.g., files under `user/` →
|
|
226
|
-
* "User" feature group).
|
|
227
|
-
* 3. Files not assigned to a feature group are checked for
|
|
228
|
-
* **architectural layer** membership based on file name suffixes
|
|
229
|
-
* (e.g., `.controller.ts` → "Controllers" layer) or containing
|
|
230
|
-
* directory (e.g., `services/` → "Services").
|
|
231
|
-
* 4. Remaining files go into a catch-all "Other" group.
|
|
232
|
-
* 5. Each file appears in at most one group.
|
|
235
|
+
* Groups discovered files. See module docstring for the priority order.
|
|
233
236
|
*
|
|
234
|
-
* @param files
|
|
235
|
-
* @
|
|
237
|
+
* @param files - Discovered files (may include binary files).
|
|
238
|
+
* @param options - Optional priors and precomputed import graph.
|
|
236
239
|
*/
|
|
237
|
-
export function groupFiles(files) {
|
|
238
|
-
// Step 1: filter to non-binary files only
|
|
240
|
+
export function groupFiles(files, options = {}) {
|
|
239
241
|
const textFiles = files.filter((f) => !f.isBinary);
|
|
240
242
|
if (textFiles.length === 0)
|
|
241
243
|
return [];
|
|
242
|
-
|
|
244
|
+
const priors = options.priors?.docFolders ?? [];
|
|
245
|
+
const localEdges = options.localEdges;
|
|
246
|
+
/** Key → display label for vertical groups. */
|
|
247
|
+
const featureLabels = new Map();
|
|
248
|
+
/** Key → bucket of FileInfo for vertical groups. */
|
|
243
249
|
const featureBuckets = new Map();
|
|
250
|
+
/** Keys that came from priors and should not be demoted. */
|
|
251
|
+
const pinnedKeys = new Set();
|
|
252
|
+
/** Layer buckets, keyed by display label. */
|
|
244
253
|
const layerBuckets = new Map();
|
|
245
|
-
|
|
246
|
-
const assigned = new Set();
|
|
247
|
-
|
|
254
|
+
/** Paths that already belong to a bucket. */
|
|
255
|
+
const assigned = new Set();
|
|
256
|
+
const addFeature = (key, label, file) => {
|
|
257
|
+
if (!featureBuckets.has(key)) {
|
|
258
|
+
featureBuckets.set(key, []);
|
|
259
|
+
featureLabels.set(key, label);
|
|
260
|
+
}
|
|
261
|
+
featureBuckets.get(key).push(file);
|
|
262
|
+
assigned.add(file.path);
|
|
263
|
+
};
|
|
264
|
+
// ── Pass 1: existing docs-folder priors ──
|
|
265
|
+
for (const file of textFiles) {
|
|
266
|
+
const match = matchDocFolder(file.path, priors);
|
|
267
|
+
if (!match)
|
|
268
|
+
continue;
|
|
269
|
+
addFeature(match.key, toDisplayLabel(match.name), file);
|
|
270
|
+
pinnedKeys.add(match.key);
|
|
271
|
+
}
|
|
272
|
+
// ── Pass 2: src top-level promotion (≥3 files) ──
|
|
273
|
+
const topLevelCounts = countTopLevelDirs(textFiles);
|
|
274
|
+
for (const file of textFiles) {
|
|
275
|
+
if (assigned.has(file.path))
|
|
276
|
+
continue;
|
|
277
|
+
const top = topLevelDir(file.path);
|
|
278
|
+
if (!top)
|
|
279
|
+
continue;
|
|
280
|
+
if (LAYER_DIRECTORIES.has(top.toLowerCase()))
|
|
281
|
+
continue;
|
|
282
|
+
const count = topLevelCounts.get(top) ?? 0;
|
|
283
|
+
if (count < TOP_LEVEL_PROMOTION_THRESHOLD)
|
|
284
|
+
continue;
|
|
285
|
+
addFeature(top.toLowerCase(), toDisplayLabel(top), file);
|
|
286
|
+
}
|
|
287
|
+
// ── Pass 3: legacy feature extraction (deepest non-layer dir) ──
|
|
248
288
|
for (const file of textFiles) {
|
|
289
|
+
if (assigned.has(file.path))
|
|
290
|
+
continue;
|
|
249
291
|
const feature = extractFeatureName(file.path);
|
|
250
|
-
if (feature)
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
featureBuckets.set(key, []);
|
|
254
|
-
}
|
|
255
|
-
featureBuckets.get(key).push(file);
|
|
256
|
-
assigned.add(file.path);
|
|
257
|
-
}
|
|
292
|
+
if (!feature)
|
|
293
|
+
continue;
|
|
294
|
+
addFeature(feature.toLowerCase(), toDisplayLabel(feature), file);
|
|
258
295
|
}
|
|
259
|
-
//
|
|
260
|
-
// only one file aren't meaningful groups by themselves. They'll get
|
|
261
|
-
// a chance to be grouped by layer instead.
|
|
296
|
+
// Demote single-file feature buckets (unless pinned by a prior).
|
|
262
297
|
for (const [key, bucket] of featureBuckets) {
|
|
298
|
+
if (pinnedKeys.has(key))
|
|
299
|
+
continue;
|
|
263
300
|
if (bucket.length < 2) {
|
|
264
|
-
for (const file of bucket)
|
|
301
|
+
for (const file of bucket)
|
|
265
302
|
assigned.delete(file.path);
|
|
266
|
-
}
|
|
267
303
|
featureBuckets.delete(key);
|
|
304
|
+
featureLabels.delete(key);
|
|
268
305
|
}
|
|
269
306
|
}
|
|
270
|
-
//
|
|
307
|
+
// ── Pass 4: architectural layers ──
|
|
271
308
|
for (const file of textFiles) {
|
|
272
309
|
if (assigned.has(file.path))
|
|
273
310
|
continue;
|
|
274
311
|
const layer = detectLayer(file.path) ?? detectLayerByDirectory(file.path);
|
|
275
|
-
if (layer)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
}
|
|
312
|
+
if (!layer)
|
|
313
|
+
continue;
|
|
314
|
+
if (!layerBuckets.has(layer))
|
|
315
|
+
layerBuckets.set(layer, []);
|
|
316
|
+
layerBuckets.get(layer).push(file);
|
|
317
|
+
assigned.add(file.path);
|
|
282
318
|
}
|
|
283
|
-
//
|
|
319
|
+
// Demote single-file layer buckets.
|
|
284
320
|
for (const [key, bucket] of layerBuckets) {
|
|
285
321
|
if (bucket.length < 2) {
|
|
286
|
-
for (const file of bucket)
|
|
322
|
+
for (const file of bucket)
|
|
287
323
|
assigned.delete(file.path);
|
|
288
|
-
}
|
|
289
324
|
layerBuckets.delete(key);
|
|
290
325
|
}
|
|
291
326
|
}
|
|
292
|
-
//
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
327
|
+
// ── Pass 5: import-graph connected components ──
|
|
328
|
+
if (localEdges) {
|
|
329
|
+
const remaining = textFiles.filter((f) => !assigned.has(f.path));
|
|
330
|
+
const remainingPaths = remaining.map((f) => f.path);
|
|
331
|
+
const byPath = new Map(remaining.map((f) => [f.path, f]));
|
|
332
|
+
const components = connectedComponents(remainingPaths, localEdges);
|
|
333
|
+
const componentGroups = buildComponentGroups(components, byPath);
|
|
334
|
+
for (const cg of componentGroups) {
|
|
335
|
+
const key = cg.key;
|
|
336
|
+
if (!featureBuckets.has(key)) {
|
|
337
|
+
featureBuckets.set(key, []);
|
|
338
|
+
featureLabels.set(key, cg.label);
|
|
339
|
+
}
|
|
340
|
+
for (const file of cg.files) {
|
|
341
|
+
featureBuckets.get(key).push(file);
|
|
342
|
+
assigned.add(file.path);
|
|
343
|
+
}
|
|
296
344
|
}
|
|
297
345
|
}
|
|
298
|
-
//
|
|
346
|
+
// ── Pass 6: catch-all "Other" ──
|
|
347
|
+
const ungrouped = textFiles.filter((f) => !assigned.has(f.path));
|
|
348
|
+
// ── Build FileGroup objects ──
|
|
299
349
|
const groups = [];
|
|
300
|
-
// Feature (vertical) groups
|
|
301
350
|
for (const [key, bucket] of featureBuckets) {
|
|
302
|
-
const
|
|
351
|
+
const label = featureLabels.get(key) ?? toDisplayLabel(key);
|
|
303
352
|
groups.push({
|
|
304
|
-
id: toKebabCase(
|
|
305
|
-
label
|
|
353
|
+
id: toKebabCase(label) + "-feature",
|
|
354
|
+
label,
|
|
306
355
|
type: "vertical",
|
|
307
356
|
files: bucket.sort((a, b) => a.path.localeCompare(b.path)),
|
|
308
357
|
directory: commonDirectory(bucket),
|
|
309
358
|
});
|
|
310
359
|
}
|
|
311
|
-
// Layer (horizontal) groups
|
|
312
360
|
for (const [label, bucket] of layerBuckets) {
|
|
313
361
|
groups.push({
|
|
314
362
|
id: toKebabCase(label) + "-layer",
|
|
@@ -318,7 +366,6 @@ export function groupFiles(files) {
|
|
|
318
366
|
directory: commonDirectory(bucket),
|
|
319
367
|
});
|
|
320
368
|
}
|
|
321
|
-
// Catch-all "Other" group
|
|
322
369
|
if (ungrouped.length > 0) {
|
|
323
370
|
groups.push({
|
|
324
371
|
id: "other",
|
|
@@ -328,11 +375,9 @@ export function groupFiles(files) {
|
|
|
328
375
|
directory: commonDirectory(ungrouped),
|
|
329
376
|
});
|
|
330
377
|
}
|
|
331
|
-
// Sort groups: verticals first, then horizontals, alphabetically within
|
|
332
378
|
groups.sort((a, b) => {
|
|
333
379
|
if (a.type !== b.type)
|
|
334
380
|
return a.type === "vertical" ? -1 : 1;
|
|
335
|
-
// "Other" always last within its type
|
|
336
381
|
if (a.id === "other")
|
|
337
382
|
return 1;
|
|
338
383
|
if (b.id === "other")
|
|
@@ -341,3 +386,86 @@ export function groupFiles(files) {
|
|
|
341
386
|
});
|
|
342
387
|
return groups;
|
|
343
388
|
}
|
|
389
|
+
// ── Internal pass helpers ───────────────────────────────────────────────
|
|
390
|
+
/**
|
|
391
|
+
* Returns the first path segment of a file's directory, or `null` when the
|
|
392
|
+
* file sits at the source root.
|
|
393
|
+
*/
|
|
394
|
+
function topLevelDir(relativePath) {
|
|
395
|
+
const dir = dirname(relativePath);
|
|
396
|
+
if (dir === "." || dir === "")
|
|
397
|
+
return null;
|
|
398
|
+
const [head] = dir.split("/");
|
|
399
|
+
return head && head.length > 0 ? head : null;
|
|
400
|
+
}
|
|
401
|
+
function countTopLevelDirs(files) {
|
|
402
|
+
const counts = new Map();
|
|
403
|
+
for (const file of files) {
|
|
404
|
+
const top = topLevelDir(file.path);
|
|
405
|
+
if (!top)
|
|
406
|
+
continue;
|
|
407
|
+
counts.set(top, (counts.get(top) ?? 0) + 1);
|
|
408
|
+
}
|
|
409
|
+
return counts;
|
|
410
|
+
}
|
|
411
|
+
/**
|
|
412
|
+
* Turn connected components into groups suitable for the vertical buckets.
|
|
413
|
+
* Small components (2-6) stay intact; larger components bisect by
|
|
414
|
+
* directory prefix.
|
|
415
|
+
*/
|
|
416
|
+
function buildComponentGroups(components, byPath) {
|
|
417
|
+
const out = [];
|
|
418
|
+
for (const component of components) {
|
|
419
|
+
if (component.length < MIN_COMPONENT_SIZE)
|
|
420
|
+
continue;
|
|
421
|
+
const files = component
|
|
422
|
+
.map((p) => byPath.get(p))
|
|
423
|
+
.filter((f) => Boolean(f));
|
|
424
|
+
if (files.length < MIN_COMPONENT_SIZE)
|
|
425
|
+
continue;
|
|
426
|
+
if (files.length <= MAX_COMPONENT_SIZE) {
|
|
427
|
+
out.push(componentToGroup(files));
|
|
428
|
+
continue;
|
|
429
|
+
}
|
|
430
|
+
for (const sub of bisectByDirectory(files)) {
|
|
431
|
+
if (sub.length < MIN_COMPONENT_SIZE)
|
|
432
|
+
continue;
|
|
433
|
+
out.push(componentToGroup(sub));
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
return out;
|
|
437
|
+
}
|
|
438
|
+
function componentToGroup(files) {
|
|
439
|
+
const commonDir = commonDirectory(files);
|
|
440
|
+
const basename = commonDir.split("/").filter((s) => s.length > 0).pop() ?? "cluster";
|
|
441
|
+
const label = toDisplayLabel(basename);
|
|
442
|
+
// Append a stable short hash of paths to avoid collisions with other buckets.
|
|
443
|
+
const key = basename.toLowerCase() + "-cluster";
|
|
444
|
+
return { key, label, files };
|
|
445
|
+
}
|
|
446
|
+
/**
|
|
447
|
+
* Split a large component into sub-groups by directory prefix. Files are
|
|
448
|
+
* bucketed by their first directory segment; singletons collapse back into
|
|
449
|
+
* a "mixed" bucket to avoid proliferation.
|
|
450
|
+
*/
|
|
451
|
+
function bisectByDirectory(files) {
|
|
452
|
+
const byTop = new Map();
|
|
453
|
+
for (const file of files) {
|
|
454
|
+
const dir = dirname(file.path);
|
|
455
|
+
const top = dir === "." ? "" : dir.split("/")[0] ?? "";
|
|
456
|
+
if (!byTop.has(top))
|
|
457
|
+
byTop.set(top, []);
|
|
458
|
+
byTop.get(top).push(file);
|
|
459
|
+
}
|
|
460
|
+
const buckets = [];
|
|
461
|
+
const mixed = [];
|
|
462
|
+
for (const bucket of byTop.values()) {
|
|
463
|
+
if (bucket.length >= MIN_COMPONENT_SIZE)
|
|
464
|
+
buckets.push(bucket);
|
|
465
|
+
else
|
|
466
|
+
mixed.push(...bucket);
|
|
467
|
+
}
|
|
468
|
+
if (mixed.length >= MIN_COMPONENT_SIZE)
|
|
469
|
+
buckets.push(mixed);
|
|
470
|
+
return buckets;
|
|
471
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Import graph extraction for Hem.
|
|
3
|
+
*
|
|
4
|
+
* Produces two views of a file set, both built from regex-based import
|
|
5
|
+
* scanning (no AST / compiler API):
|
|
6
|
+
*
|
|
7
|
+
* - A **local** graph of resolved relative imports between known files,
|
|
8
|
+
* used by grouping for connected-component clustering.
|
|
9
|
+
* - An **external** map of bare-specifier imports per file (with line
|
|
10
|
+
* numbers), used by exploration to match packages against the
|
|
11
|
+
* integration catalog with file:line citations.
|
|
12
|
+
*/
|
|
13
|
+
import type { FileInfo } from "./types.js";
|
|
14
|
+
/** One observation of an external import in a file. */
|
|
15
|
+
export interface ImportOccurrence {
|
|
16
|
+
/** The bare specifier as written (e.g., `"@azure/storage-blob"`, `"react"`). */
|
|
17
|
+
specifier: string;
|
|
18
|
+
/** 1-based line number where the import appears. */
|
|
19
|
+
line: number;
|
|
20
|
+
}
|
|
21
|
+
/** Two views of the import graph. */
|
|
22
|
+
export interface ImportAnalysis {
|
|
23
|
+
/**
|
|
24
|
+
* Undirected-friendly adjacency: file path → list of resolved local file
|
|
25
|
+
* paths it imports. Only includes edges where the target is a known file
|
|
26
|
+
* in the input set.
|
|
27
|
+
*/
|
|
28
|
+
localEdges: Map<string, string[]>;
|
|
29
|
+
/**
|
|
30
|
+
* External imports per file (npm / PyPI / Go module / etc. bare
|
|
31
|
+
* specifiers). Each entry carries the original specifier and the line
|
|
32
|
+
* number it was found on so catalog hits can produce file:line citations.
|
|
33
|
+
*/
|
|
34
|
+
externalImports: Map<string, ImportOccurrence[]>;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Build the import graph for a set of files. Files that fail to read are
|
|
38
|
+
* silently skipped (they contribute no edges). Reads run in parallel with
|
|
39
|
+
* a bounded concurrency to keep wall-clock time low on large projects.
|
|
40
|
+
*/
|
|
41
|
+
export declare function buildImportGraph(files: FileInfo[]): Promise<ImportAnalysis>;
|
|
42
|
+
/**
|
|
43
|
+
* Union-find based connected-components over an undirected view of the
|
|
44
|
+
* local graph. `universe` is the set of files under consideration;
|
|
45
|
+
* isolated nodes appear as singleton components.
|
|
46
|
+
*/
|
|
47
|
+
export declare function connectedComponents(universe: readonly string[], localEdges: Map<string, string[]>): string[][];
|
|
48
|
+
/**
|
|
49
|
+
* Compute fan-in (how many files import this file) and fan-out (how many
|
|
50
|
+
* files this file imports) for every node in `universe`.
|
|
51
|
+
*/
|
|
52
|
+
export declare function computeDegrees(universe: readonly string[], localEdges: Map<string, string[]>): Map<string, {
|
|
53
|
+
fanIn: number;
|
|
54
|
+
fanOut: number;
|
|
55
|
+
}>;
|
|
56
|
+
/**
|
|
57
|
+
* Identify files participating in an import cycle. Uses iterative Tarjan's
|
|
58
|
+
* SCC; any SCC with ≥2 members or a self-loop marks its members as cyclic.
|
|
59
|
+
*/
|
|
60
|
+
export declare function nodesInCycles(localEdges: Map<string, string[]>): Set<string>;
|
|
61
|
+
/**
|
|
62
|
+
* Yield every import specifier found in `content` along with the 1-based
|
|
63
|
+
* line number it appears on.
|
|
64
|
+
*/
|
|
65
|
+
export declare function extractImports(content: string): Generator<{
|
|
66
|
+
specifier: string;
|
|
67
|
+
line: number;
|
|
68
|
+
}>;
|
|
69
|
+
/**
|
|
70
|
+
* Resolve a relative specifier from `fromFile` against the set of known
|
|
71
|
+
* file paths. Handles the common `.js` → `.ts`/`.tsx` mapping used in
|
|
72
|
+
* TypeScript sources.
|
|
73
|
+
*/
|
|
74
|
+
export declare function resolveRelative(fromFile: string, specifier: string, knownFiles: ReadonlySet<string>): string | null;
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Import graph extraction for Hem.
|
|
3
|
+
*
|
|
4
|
+
* Produces two views of a file set, both built from regex-based import
|
|
5
|
+
* scanning (no AST / compiler API):
|
|
6
|
+
*
|
|
7
|
+
* - A **local** graph of resolved relative imports between known files,
|
|
8
|
+
* used by grouping for connected-component clustering.
|
|
9
|
+
* - An **external** map of bare-specifier imports per file (with line
|
|
10
|
+
* numbers), used by exploration to match packages against the
|
|
11
|
+
* integration catalog with file:line citations.
|
|
12
|
+
*/
|
|
13
|
+
import { readFile } from "node:fs/promises";
|
|
14
|
+
import pLimit from "p-limit";
|
|
15
|
+
/**
|
|
16
|
+
* Files larger than this byte count are skipped when building the import
|
|
17
|
+
* graph. Huge generated files (lockfiles, bundled output) rarely contain
|
|
18
|
+
* useful import edges and reading them can stall the pipeline for minutes.
|
|
19
|
+
*/
|
|
20
|
+
const MAX_FILE_BYTES = 2 * 1024 * 1024; // 2 MB
|
|
21
|
+
/** Parallel file reads when building the graph. */
|
|
22
|
+
const READ_CONCURRENCY = 32;
|
|
23
|
+
// ── Regexes ─────────────────────────────────────────────────────────────
|
|
24
|
+
// Static: `import ... from "x"` or `export ... from "x"`
|
|
25
|
+
const STATIC_RE = /(?:import|export)\s+[^;'"`]*?\s+from\s+["']([^"']+)["']/g;
|
|
26
|
+
// Dynamic: `import("x")` or `require("x")`
|
|
27
|
+
const DYNAMIC_RE = /(?:import|require)\s*\(\s*["']([^"']+)["']\s*\)/g;
|
|
28
|
+
// ── Public API ──────────────────────────────────────────────────────────
|
|
29
|
+
/**
|
|
30
|
+
* Build the import graph for a set of files. Files that fail to read are
|
|
31
|
+
* silently skipped (they contribute no edges). Reads run in parallel with
|
|
32
|
+
* a bounded concurrency to keep wall-clock time low on large projects.
|
|
33
|
+
*/
|
|
34
|
+
export async function buildImportGraph(files) {
|
|
35
|
+
const known = new Set(files.map((f) => f.path));
|
|
36
|
+
const localEdges = new Map();
|
|
37
|
+
const externalImports = new Map();
|
|
38
|
+
const limit = pLimit(READ_CONCURRENCY);
|
|
39
|
+
await Promise.all(files.map((file) => limit(async () => {
|
|
40
|
+
if (file.isBinary)
|
|
41
|
+
return;
|
|
42
|
+
if (file.size > MAX_FILE_BYTES)
|
|
43
|
+
return;
|
|
44
|
+
let content;
|
|
45
|
+
try {
|
|
46
|
+
content = await readFile(file.absolutePath, "utf-8");
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
const local = [];
|
|
52
|
+
const external = [];
|
|
53
|
+
for (const { specifier, line } of extractImports(content)) {
|
|
54
|
+
if (isRelativeSpecifier(specifier)) {
|
|
55
|
+
const resolved = resolveRelative(file.path, specifier, known);
|
|
56
|
+
if (resolved && resolved !== file.path) {
|
|
57
|
+
local.push(resolved);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
else if (isBareSpecifier(specifier)) {
|
|
61
|
+
external.push({ specifier, line });
|
|
62
|
+
}
|
|
63
|
+
// Absolute filesystem paths and URL-like specifiers are ignored.
|
|
64
|
+
}
|
|
65
|
+
if (local.length > 0) {
|
|
66
|
+
localEdges.set(file.path, dedupe(local));
|
|
67
|
+
}
|
|
68
|
+
if (external.length > 0) {
|
|
69
|
+
externalImports.set(file.path, external);
|
|
70
|
+
}
|
|
71
|
+
})));
|
|
72
|
+
return { localEdges, externalImports };
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Union-find based connected-components over an undirected view of the
|
|
76
|
+
* local graph. `universe` is the set of files under consideration;
|
|
77
|
+
* isolated nodes appear as singleton components.
|
|
78
|
+
*/
|
|
79
|
+
export function connectedComponents(universe, localEdges) {
|
|
80
|
+
const parent = new Map();
|
|
81
|
+
const rank = new Map();
|
|
82
|
+
for (const node of universe) {
|
|
83
|
+
parent.set(node, node);
|
|
84
|
+
rank.set(node, 0);
|
|
85
|
+
}
|
|
86
|
+
const find = (x) => {
|
|
87
|
+
let cur = x;
|
|
88
|
+
// Iterative path compression
|
|
89
|
+
while (parent.get(cur) !== cur) {
|
|
90
|
+
const p = parent.get(cur);
|
|
91
|
+
parent.set(cur, parent.get(p));
|
|
92
|
+
cur = parent.get(cur);
|
|
93
|
+
}
|
|
94
|
+
return cur;
|
|
95
|
+
};
|
|
96
|
+
const union = (a, b) => {
|
|
97
|
+
const ra = find(a);
|
|
98
|
+
const rb = find(b);
|
|
99
|
+
if (ra === rb)
|
|
100
|
+
return;
|
|
101
|
+
const rankA = rank.get(ra);
|
|
102
|
+
const rankB = rank.get(rb);
|
|
103
|
+
if (rankA < rankB) {
|
|
104
|
+
parent.set(ra, rb);
|
|
105
|
+
}
|
|
106
|
+
else if (rankA > rankB) {
|
|
107
|
+
parent.set(rb, ra);
|
|
108
|
+
}
|
|
109
|
+
else {
|
|
110
|
+
parent.set(rb, ra);
|
|
111
|
+
rank.set(ra, rankA + 1);
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
const inUniverse = new Set(universe);
|
|
115
|
+
for (const [from, tos] of localEdges) {
|
|
116
|
+
if (!inUniverse.has(from))
|
|
117
|
+
continue;
|
|
118
|
+
for (const to of tos) {
|
|
119
|
+
if (!inUniverse.has(to))
|
|
120
|
+
continue;
|
|
121
|
+
union(from, to);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
const components = new Map();
|
|
125
|
+
for (const node of universe) {
|
|
126
|
+
const root = find(node);
|
|
127
|
+
if (!components.has(root))
|
|
128
|
+
components.set(root, []);
|
|
129
|
+
components.get(root).push(node);
|
|
130
|
+
}
|
|
131
|
+
return [...components.values()].map((c) => c.slice().sort((a, b) => a.localeCompare(b)));
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Compute fan-in (how many files import this file) and fan-out (how many
|
|
135
|
+
* files this file imports) for every node in `universe`.
|
|
136
|
+
*/
|
|
137
|
+
export function computeDegrees(universe, localEdges) {
|
|
138
|
+
const degrees = new Map();
|
|
139
|
+
for (const node of universe) {
|
|
140
|
+
degrees.set(node, { fanIn: 0, fanOut: 0 });
|
|
141
|
+
}
|
|
142
|
+
for (const [from, tos] of localEdges) {
|
|
143
|
+
const d = degrees.get(from);
|
|
144
|
+
if (!d)
|
|
145
|
+
continue;
|
|
146
|
+
d.fanOut = tos.length;
|
|
147
|
+
for (const to of tos) {
|
|
148
|
+
const td = degrees.get(to);
|
|
149
|
+
if (td)
|
|
150
|
+
td.fanIn += 1;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return degrees;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Identify files participating in an import cycle. Uses iterative Tarjan's
|
|
157
|
+
* SCC; any SCC with ≥2 members or a self-loop marks its members as cyclic.
|
|
158
|
+
*/
|
|
159
|
+
export function nodesInCycles(localEdges) {
|
|
160
|
+
const index = new Map();
|
|
161
|
+
const lowlink = new Map();
|
|
162
|
+
const onStack = new Set();
|
|
163
|
+
const stack = [];
|
|
164
|
+
const result = new Set();
|
|
165
|
+
let counter = 0;
|
|
166
|
+
const nodes = new Set();
|
|
167
|
+
for (const [from, tos] of localEdges) {
|
|
168
|
+
nodes.add(from);
|
|
169
|
+
for (const to of tos)
|
|
170
|
+
nodes.add(to);
|
|
171
|
+
}
|
|
172
|
+
const strongConnect = (start) => {
|
|
173
|
+
const frames = [];
|
|
174
|
+
index.set(start, counter);
|
|
175
|
+
lowlink.set(start, counter);
|
|
176
|
+
counter++;
|
|
177
|
+
stack.push(start);
|
|
178
|
+
onStack.add(start);
|
|
179
|
+
frames.push({
|
|
180
|
+
node: start,
|
|
181
|
+
iter: (localEdges.get(start) ?? [])[Symbol.iterator](),
|
|
182
|
+
});
|
|
183
|
+
while (frames.length > 0) {
|
|
184
|
+
const frame = frames[frames.length - 1];
|
|
185
|
+
const next = frame.iter.next();
|
|
186
|
+
if (next.done) {
|
|
187
|
+
// Finished with frame.node — check if it's an SCC root.
|
|
188
|
+
if (lowlink.get(frame.node) === index.get(frame.node)) {
|
|
189
|
+
const component = [];
|
|
190
|
+
let w;
|
|
191
|
+
do {
|
|
192
|
+
w = stack.pop();
|
|
193
|
+
onStack.delete(w);
|
|
194
|
+
component.push(w);
|
|
195
|
+
} while (w !== frame.node);
|
|
196
|
+
const neighbours = localEdges.get(frame.node) ?? [];
|
|
197
|
+
const hasSelfLoop = neighbours.includes(frame.node);
|
|
198
|
+
if (component.length >= 2 || hasSelfLoop) {
|
|
199
|
+
for (const m of component)
|
|
200
|
+
result.add(m);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
frames.pop();
|
|
204
|
+
// Propagate lowlink to parent.
|
|
205
|
+
if (frames.length > 0) {
|
|
206
|
+
const parentFrame = frames[frames.length - 1];
|
|
207
|
+
const pl = lowlink.get(parentFrame.node);
|
|
208
|
+
const cl = lowlink.get(frame.node);
|
|
209
|
+
if (cl < pl)
|
|
210
|
+
lowlink.set(parentFrame.node, cl);
|
|
211
|
+
}
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
const w = next.value;
|
|
215
|
+
if (!index.has(w)) {
|
|
216
|
+
index.set(w, counter);
|
|
217
|
+
lowlink.set(w, counter);
|
|
218
|
+
counter++;
|
|
219
|
+
stack.push(w);
|
|
220
|
+
onStack.add(w);
|
|
221
|
+
frames.push({ node: w, iter: (localEdges.get(w) ?? [])[Symbol.iterator]() });
|
|
222
|
+
}
|
|
223
|
+
else if (onStack.has(w)) {
|
|
224
|
+
const cur = lowlink.get(frame.node);
|
|
225
|
+
const wIndex = index.get(w);
|
|
226
|
+
if (wIndex < cur)
|
|
227
|
+
lowlink.set(frame.node, wIndex);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
};
|
|
231
|
+
for (const node of nodes) {
|
|
232
|
+
if (!index.has(node))
|
|
233
|
+
strongConnect(node);
|
|
234
|
+
}
|
|
235
|
+
return result;
|
|
236
|
+
}
|
|
237
|
+
// ── Internal helpers ────────────────────────────────────────────────────
|
|
238
|
+
/**
|
|
239
|
+
* Yield every import specifier found in `content` along with the 1-based
|
|
240
|
+
* line number it appears on.
|
|
241
|
+
*/
|
|
242
|
+
export function* extractImports(content) {
|
|
243
|
+
for (const re of [STATIC_RE, DYNAMIC_RE]) {
|
|
244
|
+
// Fresh state per content call.
|
|
245
|
+
re.lastIndex = 0;
|
|
246
|
+
let match;
|
|
247
|
+
while ((match = re.exec(content)) !== null) {
|
|
248
|
+
const specifier = match[1];
|
|
249
|
+
const line = lineNumberAt(content, match.index);
|
|
250
|
+
yield { specifier, line };
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
function lineNumberAt(content, offset) {
|
|
255
|
+
let line = 1;
|
|
256
|
+
for (let i = 0; i < offset && i < content.length; i++) {
|
|
257
|
+
if (content.charCodeAt(i) === 10 /* \n */)
|
|
258
|
+
line++;
|
|
259
|
+
}
|
|
260
|
+
return line;
|
|
261
|
+
}
|
|
262
|
+
function isRelativeSpecifier(spec) {
|
|
263
|
+
return spec.startsWith("./") || spec.startsWith("../") || spec === "." || spec === "..";
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* A bare specifier is an npm/Go/PyPI-style package reference: not relative,
|
|
267
|
+
* not an absolute filesystem path, not a URL, not an alias starting with
|
|
268
|
+
* `@/` or `~/`.
|
|
269
|
+
*/
|
|
270
|
+
function isBareSpecifier(spec) {
|
|
271
|
+
if (spec.length === 0)
|
|
272
|
+
return false;
|
|
273
|
+
if (isRelativeSpecifier(spec))
|
|
274
|
+
return false;
|
|
275
|
+
if (spec.startsWith("/"))
|
|
276
|
+
return false;
|
|
277
|
+
if (spec.startsWith("~"))
|
|
278
|
+
return false;
|
|
279
|
+
if (/^[a-z][a-z0-9+.-]*:\/\//i.test(spec))
|
|
280
|
+
return false; // URL
|
|
281
|
+
// `@/foo` bundler aliases look like scoped packages; disambiguate by
|
|
282
|
+
// requiring a scoped package to contain a slash after the scope
|
|
283
|
+
// (`@scope/pkg`) while plain `@/alias` does not.
|
|
284
|
+
if (spec.startsWith("@") && !spec.slice(1).includes("/"))
|
|
285
|
+
return false;
|
|
286
|
+
return true;
|
|
287
|
+
}
|
|
288
|
+
/**
|
|
289
|
+
* Resolve a relative specifier from `fromFile` against the set of known
|
|
290
|
+
* file paths. Handles the common `.js` → `.ts`/`.tsx` mapping used in
|
|
291
|
+
* TypeScript sources.
|
|
292
|
+
*/
|
|
293
|
+
export function resolveRelative(fromFile, specifier, knownFiles) {
|
|
294
|
+
const parts = fromFile.split("/");
|
|
295
|
+
parts.pop();
|
|
296
|
+
const dir = parts.join("/");
|
|
297
|
+
const segments = (dir ? dir + "/" + specifier : specifier).split("/");
|
|
298
|
+
const resolved = [];
|
|
299
|
+
for (const seg of segments) {
|
|
300
|
+
if (seg === "." || seg === "")
|
|
301
|
+
continue;
|
|
302
|
+
if (seg === "..") {
|
|
303
|
+
resolved.pop();
|
|
304
|
+
}
|
|
305
|
+
else {
|
|
306
|
+
resolved.push(seg);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
const resolvedPath = resolved.join("/");
|
|
310
|
+
if (knownFiles.has(resolvedPath))
|
|
311
|
+
return resolvedPath;
|
|
312
|
+
const withoutJs = resolvedPath.replace(/\.js$/, "");
|
|
313
|
+
if (knownFiles.has(withoutJs + ".ts"))
|
|
314
|
+
return withoutJs + ".ts";
|
|
315
|
+
if (knownFiles.has(withoutJs + ".tsx"))
|
|
316
|
+
return withoutJs + ".tsx";
|
|
317
|
+
if (knownFiles.has(resolvedPath + ".ts"))
|
|
318
|
+
return resolvedPath + ".ts";
|
|
319
|
+
if (knownFiles.has(resolvedPath + ".tsx"))
|
|
320
|
+
return resolvedPath + ".tsx";
|
|
321
|
+
// Directory imports — try `index.ts` / `index.tsx`.
|
|
322
|
+
if (knownFiles.has(resolvedPath + "/index.ts"))
|
|
323
|
+
return resolvedPath + "/index.ts";
|
|
324
|
+
if (knownFiles.has(resolvedPath + "/index.tsx"))
|
|
325
|
+
return resolvedPath + "/index.tsx";
|
|
326
|
+
return null;
|
|
327
|
+
}
|
|
328
|
+
function dedupe(xs) {
|
|
329
|
+
return [...new Set(xs)];
|
|
330
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -33,7 +33,8 @@ import { createOpencode } from "@opencode-ai/sdk";
|
|
|
33
33
|
import { findFreePort, trackServer, untrackServer, startWithRetry } from "./server-utils.js";
|
|
34
34
|
import { discoverFiles, detectProjectName } from "./discovery.js";
|
|
35
35
|
import { groupFiles } from "./grouping.js";
|
|
36
|
-
import {
|
|
36
|
+
import { buildImportGraph } from "./import-graph.js";
|
|
37
|
+
import { loadGroupingPriors } from "./grouping-priors.js";
|
|
37
38
|
import { DocumentationAgent } from "./agents/documentation-agent.js";
|
|
38
39
|
import { ArchitectureAgent } from "./agents/architecture-agent.js";
|
|
39
40
|
import { IndexAgent } from "./agents/index-agent.js";
|
|
@@ -511,15 +512,27 @@ export async function handleGenerate(opts, deps = defaultDeps) {
|
|
|
511
512
|
await waitUntilExit();
|
|
512
513
|
return null;
|
|
513
514
|
}
|
|
514
|
-
// ── Step 10c:
|
|
515
|
-
const
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
515
|
+
// ── Step 10c: Deterministic grouping (priors + import graph) ──────
|
|
516
|
+
const priors = await loadGroupingPriors(absoluteDestination);
|
|
517
|
+
if (cliOptions.verbose && priors.docFolders.length > 0) {
|
|
518
|
+
verboseLog(`[grouping] ${priors.docFolders.length} doc-folder prior(s): ${priors.docFolders
|
|
519
|
+
.map((p) => p.name)
|
|
520
|
+
.join(", ")}`);
|
|
521
|
+
}
|
|
522
|
+
if (cliOptions.verbose) {
|
|
523
|
+
verboseLog(`[grouping] building import graph from ${textFiles.length} files...`);
|
|
522
524
|
}
|
|
525
|
+
const importGraphStart = Date.now();
|
|
526
|
+
const importAnalysis = await buildImportGraph(textFiles);
|
|
527
|
+
if (cliOptions.verbose) {
|
|
528
|
+
const elapsed = ((Date.now() - importGraphStart) / 1000).toFixed(1);
|
|
529
|
+
verboseLog(`[grouping] import graph built in ${elapsed}s: ${importAnalysis.localEdges.size} files with local edges, ` +
|
|
530
|
+
`${importAnalysis.externalImports.size} with external imports`);
|
|
531
|
+
}
|
|
532
|
+
const groups = deps.groupFiles(textFiles, {
|
|
533
|
+
priors,
|
|
534
|
+
localEdges: importAnalysis.localEdges,
|
|
535
|
+
});
|
|
523
536
|
const featureGroups = groups.filter((g) => g.type === "vertical").length;
|
|
524
537
|
const layerGroups = groups.filter((g) => g.type === "horizontal").length;
|
|
525
538
|
if (cliOptions.verbose) {
|
package/dist/orchestrator.d.ts
CHANGED
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
*
|
|
17
17
|
* Reference: FR-005, FR-006, FR-007.
|
|
18
18
|
*/
|
|
19
|
+
import { type LimitFunction } from "p-limit";
|
|
19
20
|
import type { ModelSelection, CLIOptions, FileGroup, GenerationContext, GenerationResult, ProgressState, ExplorationFindings } from "./types.js";
|
|
20
21
|
import { createOpencode } from "@opencode-ai/sdk";
|
|
21
22
|
import { DocumentationAgent } from "./agents/documentation-agent.js";
|
|
@@ -142,9 +143,15 @@ export declare function filterRelevantDocs(allDocs: Array<{
|
|
|
142
143
|
* @param onGroupComplete - Optional callback invoked each time a group's
|
|
143
144
|
* exploration finishes successfully. Used by the
|
|
144
145
|
* streaming pipeline to launch doc agents eagerly.
|
|
146
|
+
* @param sharedLimit - Optional p-limit instance to share with the
|
|
147
|
+
* downstream documentation phase. When omitted, a
|
|
148
|
+
* local instance sized to `computeMaxConcurrency`
|
|
149
|
+
* is created. Passing a shared limit is how the
|
|
150
|
+
* real pipeline prevents exploration + generation
|
|
151
|
+
* from each spawning their own resource budget.
|
|
145
152
|
* @returns All successful `ExplorationFindings[]`.
|
|
146
153
|
*/
|
|
147
|
-
export declare function runExploration(explorationAgent: ExplorationAgent, groups: FileGroup[], options: CLIOptions, onProgress: (state: Partial<ProgressState>) => void, onGroupComplete?: (groupId: string, findings: ExplorationFindings) => void): Promise<ExplorationFindings[]>;
|
|
154
|
+
export declare function runExploration(explorationAgent: ExplorationAgent, groups: FileGroup[], options: CLIOptions, onProgress: (state: Partial<ProgressState>) => void, onGroupComplete?: (groupId: string, findings: ExplorationFindings) => void, sharedLimit?: LimitFunction): Promise<ExplorationFindings[]>;
|
|
148
155
|
/**
|
|
149
156
|
* Runs the doc agent for a single file group.
|
|
150
157
|
*
|
package/dist/orchestrator.js
CHANGED
|
@@ -390,9 +390,15 @@ async function resolveRelevantDocs(searchIndex, destinationPath, existingDocs, g
|
|
|
390
390
|
* @param onGroupComplete - Optional callback invoked each time a group's
|
|
391
391
|
* exploration finishes successfully. Used by the
|
|
392
392
|
* streaming pipeline to launch doc agents eagerly.
|
|
393
|
+
* @param sharedLimit - Optional p-limit instance to share with the
|
|
394
|
+
* downstream documentation phase. When omitted, a
|
|
395
|
+
* local instance sized to `computeMaxConcurrency`
|
|
396
|
+
* is created. Passing a shared limit is how the
|
|
397
|
+
* real pipeline prevents exploration + generation
|
|
398
|
+
* from each spawning their own resource budget.
|
|
393
399
|
* @returns All successful `ExplorationFindings[]`.
|
|
394
400
|
*/
|
|
395
|
-
export async function runExploration(explorationAgent, groups, options, onProgress, onGroupComplete) {
|
|
401
|
+
export async function runExploration(explorationAgent, groups, options, onProgress, onGroupComplete, sharedLimit) {
|
|
396
402
|
const sourceRoot = resolve(options.source);
|
|
397
403
|
const verbose = options.verbose
|
|
398
404
|
? (msg) => {
|
|
@@ -403,7 +409,8 @@ export async function runExploration(explorationAgent, groups, options, onProgre
|
|
|
403
409
|
const effectiveConcurrency = computeMaxConcurrency(options.concurrency);
|
|
404
410
|
if (verbose) {
|
|
405
411
|
verbose(`[orchestrator] Resource limits: ${describeResourceLimits(options.concurrency)}`);
|
|
406
|
-
verbose(`[orchestrator] Starting exploration: ${groups.length} groups, concurrency=${effectiveConcurrency}`
|
|
412
|
+
verbose(`[orchestrator] Starting exploration: ${groups.length} groups, concurrency=${effectiveConcurrency}` +
|
|
413
|
+
(sharedLimit ? " (shared with generation)" : ""));
|
|
407
414
|
}
|
|
408
415
|
// Build allGroups summary for cross-group awareness
|
|
409
416
|
const allGroups = groups.map((group) => ({
|
|
@@ -429,7 +436,7 @@ export async function runExploration(explorationAgent, groups, options, onProgre
|
|
|
429
436
|
phase: "exploration",
|
|
430
437
|
explorationStatuses: [...explorationStatuses],
|
|
431
438
|
});
|
|
432
|
-
const limit = pLimit(effectiveConcurrency);
|
|
439
|
+
const limit = sharedLimit ?? pLimit(effectiveConcurrency);
|
|
433
440
|
// ── Single-agent path (existing behavior, totalFiles < threshold) ──
|
|
434
441
|
if (!isMultiAgent) {
|
|
435
442
|
const results = await Promise.allSettled(groups.map((group, i) => limit(async () => {
|
|
@@ -756,6 +763,14 @@ export async function generateDocumentation(agent, groups, options, onProgress,
|
|
|
756
763
|
}
|
|
757
764
|
// Accumulated findings — grows as explorations complete.
|
|
758
765
|
const allFindings = [];
|
|
766
|
+
// ── Shared concurrency limit ─────────────────────────────────────
|
|
767
|
+
// Exploration and documentation run concurrently via the streaming
|
|
768
|
+
// gate pattern below. Without a shared p-limit each phase would spin
|
|
769
|
+
// up its own resource budget, doubling the effective concurrency and
|
|
770
|
+
// OOMing Node on large projects. One semaphore spans both phases so
|
|
771
|
+
// `computeMaxConcurrency` actually caps total in-flight LLM sessions.
|
|
772
|
+
const sharedConcurrency = computeMaxConcurrency(options.concurrency);
|
|
773
|
+
const sharedLimit = pLimit(sharedConcurrency);
|
|
759
774
|
// ── Launch exploration + existingDocs scan in parallel ───────────
|
|
760
775
|
let explorationPromise;
|
|
761
776
|
let existingDocsPromise;
|
|
@@ -768,7 +783,7 @@ export async function generateDocumentation(agent, groups, options, onProgress,
|
|
|
768
783
|
(groupId, findings) => {
|
|
769
784
|
allFindings.push(findings);
|
|
770
785
|
groupGates.get(groupId)?.resolve(findings);
|
|
771
|
-
});
|
|
786
|
+
}, sharedLimit);
|
|
772
787
|
// When exploration fully settles, resolve any remaining gates for groups
|
|
773
788
|
// whose exploration failed so their doc agents can proceed without findings.
|
|
774
789
|
// On AuthExpiredError, reject all remaining gates to abort doc agents.
|
|
@@ -842,16 +857,15 @@ export async function generateDocumentation(agent, groups, options, onProgress,
|
|
|
842
857
|
completedSessions: 0,
|
|
843
858
|
failedSessions: 0,
|
|
844
859
|
});
|
|
845
|
-
const docConcurrency = computeMaxConcurrency(options.concurrency);
|
|
846
860
|
// ── Multi-agent documentation detection ──────────────────────────
|
|
847
861
|
const totalFiles = groups.reduce((sum, g) => sum + g.files.length, 0);
|
|
848
862
|
const docAgentsPerGroup = computeAgentsPerGroup(totalFiles);
|
|
849
863
|
const isMultiAgentDoc = docAgentsPerGroup > 1;
|
|
850
864
|
if (verbose) {
|
|
851
|
-
verbose(`[orchestrator] Starting documentation: ${groups.length} groups, concurrency=${
|
|
865
|
+
verbose(`[orchestrator] Starting documentation: ${groups.length} groups, concurrency=${sharedConcurrency} (shared with exploration)` +
|
|
852
866
|
(isMultiAgentDoc ? `, multi-agent=${docAgentsPerGroup} agents/group` : ""));
|
|
853
867
|
}
|
|
854
|
-
const limit =
|
|
868
|
+
const limit = sharedLimit;
|
|
855
869
|
const results = await Promise.allSettled(groups.map((group, i) => {
|
|
856
870
|
if (!isMultiAgentDoc) {
|
|
857
871
|
// ── Single-agent path (existing behavior) ──────────────────
|