gitnexushub 0.4.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api.d.ts +90 -1
- package/dist/api.js +34 -0
- package/dist/index.js +27 -0
- package/dist/install-ci-command.d.ts +176 -0
- package/dist/install-ci-command.js +680 -0
- package/dist/wiki/claude.d.ts +11 -5
- package/dist/wiki/claude.js +8 -3
- package/dist/wiki/compose-overview.d.ts +29 -0
- package/dist/wiki/compose-overview.js +48 -0
- package/dist/wiki/concurrency.d.ts +20 -0
- package/dist/wiki/concurrency.js +91 -0
- package/dist/wiki/helpers.d.ts +102 -0
- package/dist/wiki/helpers.js +308 -0
- package/dist/wiki/incremental.d.ts +72 -0
- package/dist/wiki/incremental.js +214 -0
- package/dist/wiki/index.js +37 -0
- package/dist/wiki/session.d.ts +10 -0
- package/dist/wiki/session.js +89 -9
- package/dist/wiki/upload-command.d.ts +12 -0
- package/dist/wiki/upload-command.js +384 -53
- package/hooks/gitnexus-enterprise-hook.cjs +134 -0
- package/package.json +1 -1
- package/skills/gitnexus-debugging.md +89 -89
- package/skills/gitnexus-exploring.md +78 -78
- package/skills/gitnexus-impact-analysis.md +99 -99
- package/skills/gitnexus-pr-review.md +161 -161
package/dist/wiki/claude.d.ts
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
export interface ClaudeGenerationResult {
|
|
2
2
|
text: string;
|
|
3
|
+
structuredOutput?: unknown;
|
|
3
4
|
durationMs: number;
|
|
4
5
|
}
|
|
6
|
+
export interface ClaudeRunOptions {
|
|
7
|
+
cwd: string;
|
|
8
|
+
model?: string;
|
|
9
|
+
allowedTools?: string[];
|
|
10
|
+
outputFormat?: {
|
|
11
|
+
type: 'json_schema';
|
|
12
|
+
schema: Record<string, unknown>;
|
|
13
|
+
};
|
|
14
|
+
}
|
|
5
15
|
export interface ClaudeRunner {
|
|
6
|
-
run(prompt: string, opts:
|
|
7
|
-
cwd: string;
|
|
8
|
-
model?: string;
|
|
9
|
-
allowedTools?: string[];
|
|
10
|
-
}): Promise<ClaudeGenerationResult>;
|
|
16
|
+
run(prompt: string, opts: ClaudeRunOptions): Promise<ClaudeGenerationResult>;
|
|
11
17
|
}
|
|
12
18
|
export declare function createClaudeRunner(): ClaudeRunner;
|
package/dist/wiki/claude.js
CHANGED
|
@@ -14,6 +14,7 @@ export function createClaudeRunner() {
|
|
|
14
14
|
async run(prompt, opts) {
|
|
15
15
|
const start = Date.now();
|
|
16
16
|
let finalText = '';
|
|
17
|
+
let structuredOutput;
|
|
17
18
|
try {
|
|
18
19
|
for await (const msg of query({
|
|
19
20
|
prompt,
|
|
@@ -21,11 +22,15 @@ export function createClaudeRunner() {
|
|
|
21
22
|
cwd: opts.cwd,
|
|
22
23
|
model: opts.model,
|
|
23
24
|
allowedTools: opts.allowedTools ?? DEFAULT_ALLOWED_TOOLS,
|
|
25
|
+
outputFormat: opts.outputFormat,
|
|
24
26
|
},
|
|
25
27
|
})) {
|
|
26
28
|
const m = msg;
|
|
27
|
-
if (m.type === 'result'
|
|
28
|
-
|
|
29
|
+
if (m.type === 'result') {
|
|
30
|
+
if (typeof m.result === 'string')
|
|
31
|
+
finalText = m.result;
|
|
32
|
+
if (m.structured_output !== undefined)
|
|
33
|
+
structuredOutput = m.structured_output;
|
|
29
34
|
}
|
|
30
35
|
}
|
|
31
36
|
}
|
|
@@ -42,7 +47,7 @@ export function createClaudeRunner() {
|
|
|
42
47
|
});
|
|
43
48
|
}
|
|
44
49
|
const durationMs = Date.now() - start;
|
|
45
|
-
return { text: finalText, durationMs };
|
|
50
|
+
return { text: finalText, structuredOutput, durationMs };
|
|
46
51
|
},
|
|
47
52
|
};
|
|
48
53
|
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic wiki overview composer (connect-package mirror of
|
|
3
|
+
* gitnexus/src/core/wiki/compose-overview.ts).
|
|
4
|
+
*
|
|
5
|
+
* Skips the per-repo LLM "synthesize an overview" call by assembling:
|
|
6
|
+
* - Repo title heading
|
|
7
|
+
* - Auto-generated boilerplate
|
|
8
|
+
* - Table of contents linking to module slugs
|
|
9
|
+
* - Per-module section with the module's first-paragraph summary
|
|
10
|
+
* - Optional Mermaid architecture diagram
|
|
11
|
+
*
|
|
12
|
+
* Saves ~1 minute per generation. Quality is comparable because the LLM's
|
|
13
|
+
* overview was largely rephrasing leaf summaries anyway.
|
|
14
|
+
*
|
|
15
|
+
* Mirrored separately rather than imported across packages because
|
|
16
|
+
* gitnexus-connect is a distributable CLI that should not transitively pull
|
|
17
|
+
* in the OSS gitnexus runtime.
|
|
18
|
+
*/
|
|
19
|
+
export interface OverviewModule {
|
|
20
|
+
slug: string;
|
|
21
|
+
title: string;
|
|
22
|
+
summary: string;
|
|
23
|
+
}
|
|
24
|
+
export interface ComposeOverviewParams {
|
|
25
|
+
repoName: string;
|
|
26
|
+
modules: OverviewModule[];
|
|
27
|
+
architectureMermaid?: string;
|
|
28
|
+
}
|
|
29
|
+
export declare function composeOverview(p: ComposeOverviewParams): string;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic wiki overview composer (connect-package mirror of
|
|
3
|
+
* gitnexus/src/core/wiki/compose-overview.ts).
|
|
4
|
+
*
|
|
5
|
+
* Skips the per-repo LLM "synthesize an overview" call by assembling:
|
|
6
|
+
* - Repo title heading
|
|
7
|
+
* - Auto-generated boilerplate
|
|
8
|
+
* - Table of contents linking to module slugs
|
|
9
|
+
* - Per-module section with the module's first-paragraph summary
|
|
10
|
+
* - Optional Mermaid architecture diagram
|
|
11
|
+
*
|
|
12
|
+
* Saves ~1 minute per generation. Quality is comparable because the LLM's
|
|
13
|
+
* overview was largely rephrasing leaf summaries anyway.
|
|
14
|
+
*
|
|
15
|
+
* Mirrored separately rather than imported across packages because
|
|
16
|
+
* gitnexus-connect is a distributable CLI that should not transitively pull
|
|
17
|
+
* in the OSS gitnexus runtime.
|
|
18
|
+
*/
|
|
19
|
+
export function composeOverview(p) {
|
|
20
|
+
const parts = [];
|
|
21
|
+
parts.push(`# ${p.repoName}`);
|
|
22
|
+
parts.push('');
|
|
23
|
+
parts.push('Auto-generated wiki overview.');
|
|
24
|
+
parts.push('');
|
|
25
|
+
parts.push('## Table of Contents');
|
|
26
|
+
parts.push('');
|
|
27
|
+
for (const m of p.modules) {
|
|
28
|
+
parts.push(`- [${m.title}](${m.slug})`);
|
|
29
|
+
}
|
|
30
|
+
parts.push('');
|
|
31
|
+
parts.push('## Modules');
|
|
32
|
+
parts.push('');
|
|
33
|
+
for (const m of p.modules) {
|
|
34
|
+
parts.push(`### [${m.title}](${m.slug})`);
|
|
35
|
+
parts.push('');
|
|
36
|
+
parts.push((m.summary || '').trim() || '_(no summary available)_');
|
|
37
|
+
parts.push('');
|
|
38
|
+
}
|
|
39
|
+
if (p.architectureMermaid && p.architectureMermaid.trim().length > 0) {
|
|
40
|
+
parts.push('## Architecture Diagram');
|
|
41
|
+
parts.push('');
|
|
42
|
+
parts.push('```mermaid');
|
|
43
|
+
parts.push(p.architectureMermaid.trim());
|
|
44
|
+
parts.push('```');
|
|
45
|
+
parts.push('');
|
|
46
|
+
}
|
|
47
|
+
return parts.join('\n').replace(/\n{3,}/g, '\n\n');
|
|
48
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bounded concurrency with adaptive 429 backoff.
|
|
3
|
+
*
|
|
4
|
+
* Ports the Hub's runParallel pattern (gitnexus/src/core/wiki/generator.ts:1004-1056)
|
|
5
|
+
* to the client. Runs up to `concurrency` tasks in parallel; on any task that
|
|
6
|
+
* rejects with an Error whose message contains "429", the active concurrency
|
|
7
|
+
* drops by one (floor 1), the task is requeued, and the scheduler pauses for
|
|
8
|
+
* 5 seconds before retrying. All other errors propagate normally.
|
|
9
|
+
*
|
|
10
|
+
* Returns an array of results in the SAME ORDER as the input items.
|
|
11
|
+
*/
|
|
12
|
+
export interface RunConcurrentOptions {
|
|
13
|
+
/** Max tasks running simultaneously. Must be >= 1. */
|
|
14
|
+
concurrency: number;
|
|
15
|
+
/** Optional abort signal — in-flight tasks finish, no new ones start. */
|
|
16
|
+
abortSignal?: AbortSignal;
|
|
17
|
+
/** Optional callback to report adaptive concurrency drops. */
|
|
18
|
+
onRateLimit?: (newConcurrency: number) => void;
|
|
19
|
+
}
|
|
20
|
+
export declare function runConcurrent<T, R>(items: T[], fn: (item: T, index: number) => Promise<R>, opts: RunConcurrentOptions): Promise<R[]>;
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bounded concurrency with adaptive 429 backoff.
|
|
3
|
+
*
|
|
4
|
+
* Ports the Hub's runParallel pattern (gitnexus/src/core/wiki/generator.ts:1004-1056)
|
|
5
|
+
* to the client. Runs up to `concurrency` tasks in parallel; on any task that
|
|
6
|
+
* rejects with an Error whose message contains "429", the active concurrency
|
|
7
|
+
* drops by one (floor 1), the task is requeued, and the scheduler pauses for
|
|
8
|
+
* 5 seconds before retrying. All other errors propagate normally.
|
|
9
|
+
*
|
|
10
|
+
* Returns an array of results in the SAME ORDER as the input items.
|
|
11
|
+
*/
|
|
12
|
+
const RATE_LIMIT_DELAY_MS = 5000;
|
|
13
|
+
export async function runConcurrent(items, fn, opts) {
|
|
14
|
+
if (opts.concurrency < 1) {
|
|
15
|
+
throw new Error(`runConcurrent: concurrency must be >= 1, got ${opts.concurrency}`);
|
|
16
|
+
}
|
|
17
|
+
if (items.length === 0)
|
|
18
|
+
return [];
|
|
19
|
+
const results = new Array(items.length);
|
|
20
|
+
let activeConcurrency = opts.concurrency;
|
|
21
|
+
let running = 0;
|
|
22
|
+
let nextIdx = 0;
|
|
23
|
+
// Re-queued indices (from 429 backoff). Drained before picking new work.
|
|
24
|
+
const requeue = [];
|
|
25
|
+
let firstError = null;
|
|
26
|
+
return new Promise((resolve, reject) => {
|
|
27
|
+
const finishIfDone = () => {
|
|
28
|
+
if (firstError !== null) {
|
|
29
|
+
// Wait until every in-flight task settles before rejecting, so we don't
|
|
30
|
+
// resolve/reject while promises are still racing.
|
|
31
|
+
if (running === 0)
|
|
32
|
+
reject(firstError);
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
if (running === 0 && nextIdx >= items.length && requeue.length === 0) {
|
|
36
|
+
resolve(results);
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
const tick = () => {
|
|
40
|
+
if (firstError !== null) {
|
|
41
|
+
finishIfDone();
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
if (opts.abortSignal?.aborted) {
|
|
45
|
+
firstError = opts.abortSignal.reason ?? new Error('aborted');
|
|
46
|
+
finishIfDone();
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
while (running < activeConcurrency) {
|
|
50
|
+
let idx;
|
|
51
|
+
if (requeue.length > 0) {
|
|
52
|
+
idx = requeue.shift();
|
|
53
|
+
}
|
|
54
|
+
else if (nextIdx < items.length) {
|
|
55
|
+
idx = nextIdx++;
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
running++;
|
|
61
|
+
Promise.resolve()
|
|
62
|
+
.then(() => fn(items[idx], idx))
|
|
63
|
+
.then((value) => {
|
|
64
|
+
results[idx] = value;
|
|
65
|
+
running--;
|
|
66
|
+
tick();
|
|
67
|
+
})
|
|
68
|
+
.catch((err) => {
|
|
69
|
+
running--;
|
|
70
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
71
|
+
if (msg.includes('429')) {
|
|
72
|
+
activeConcurrency = Math.max(1, activeConcurrency - 1);
|
|
73
|
+
opts.onRateLimit?.(activeConcurrency);
|
|
74
|
+
requeue.push(idx);
|
|
75
|
+
const t = setTimeout(tick, RATE_LIMIT_DELAY_MS);
|
|
76
|
+
t.unref?.();
|
|
77
|
+
}
|
|
78
|
+
else if (firstError === null) {
|
|
79
|
+
firstError = err;
|
|
80
|
+
finishIfDone();
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
finishIfDone();
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
finishIfDone();
|
|
88
|
+
};
|
|
89
|
+
tick();
|
|
90
|
+
});
|
|
91
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure helpers for client-side wiki generation, ported from the
|
|
3
|
+
* retired server-side WikiGenerator (gitnexus/src/core/wiki/generator.ts).
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Extract a short summary from a generated page.
|
|
7
|
+
* Tries, in order: <!-- summary-end --> marker, content up to first ## heading,
|
|
8
|
+
* sentence-boundary truncation near maxLength.
|
|
9
|
+
*/
|
|
10
|
+
export declare function extractSummary(content: string, maxLength?: number): string;
|
|
11
|
+
/**
|
|
12
|
+
* Rough token-count heuristic: ~4 characters per token.
|
|
13
|
+
* Matches gitnexus/src/core/wiki/llm-client.ts:estimateTokens.
|
|
14
|
+
*/
|
|
15
|
+
export declare function estimateTokens(text: string): number;
|
|
16
|
+
/**
|
|
17
|
+
* Sum the estimated token count across a module's files on disk.
|
|
18
|
+
* Unreadable files (missing, permission errors, binary, etc.) contribute
|
|
19
|
+
* zero — mirrors Hub's generator.ts:estimateModuleTokens behavior.
|
|
20
|
+
*/
|
|
21
|
+
export declare function estimateModuleTokens(filePaths: string[], repoRoot: string): Promise<number>;
|
|
22
|
+
/**
|
|
23
|
+
* Read a human-readable project description from the repo root.
|
|
24
|
+
* Reads the first matching config file from a known allowlist plus a
|
|
25
|
+
* README excerpt. Returns a formatted multi-line string.
|
|
26
|
+
* Never throws; falls back to `Project: <dirname>` on any error.
|
|
27
|
+
*/
|
|
28
|
+
export declare function readProjectInfo(repoRoot: string): Promise<string>;
|
|
29
|
+
export interface ModuleRegistryEntry {
|
|
30
|
+
name: string;
|
|
31
|
+
slug: string;
|
|
32
|
+
symbols: string[];
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Fix broken [text](slug.md) links in generated markdown.
|
|
36
|
+
* Priority: valid slug > fuzzy name match > substring match > strip link.
|
|
37
|
+
* Only touches `[text](slug.md)` patterns — external URLs are untouched.
|
|
38
|
+
*/
|
|
39
|
+
export declare function validateAndFixCrossReferences(markdown: string, registry: Map<string, ModuleRegistryEntry>): string;
|
|
40
|
+
export interface FileWithExports {
|
|
41
|
+
filePath: string;
|
|
42
|
+
symbols: Array<{
|
|
43
|
+
name: string;
|
|
44
|
+
type: string;
|
|
45
|
+
}>;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Ensure every file the Hub told us about ends up in some module.
|
|
49
|
+
*
|
|
50
|
+
* The LLM can quietly drop files when producing the module tree (especially
|
|
51
|
+
* on longer file lists). Hub's generator.ts:499-502 swept these into an
|
|
52
|
+
* "Other" bucket; we mirror that here. If an "Other" module already exists
|
|
53
|
+
* (e.g., from fallback grouping), the unassigned files are merged into it.
|
|
54
|
+
*
|
|
55
|
+
* If `filesWithExportsRaw` is empty (older Hub that doesn't expose the
|
|
56
|
+
* field), this is a no-op.
|
|
57
|
+
*/
|
|
58
|
+
export declare function ensureAllFilesAssigned<T extends {
|
|
59
|
+
slug: string;
|
|
60
|
+
title: string;
|
|
61
|
+
files?: string[];
|
|
62
|
+
}>(modules: T[], filesWithExportsRaw: FileWithExports[], makeNode: (slug: string, title: string, files: string[]) => T): T[];
|
|
63
|
+
interface TreeNodeLike {
|
|
64
|
+
name: string;
|
|
65
|
+
slug: string;
|
|
66
|
+
files?: string[];
|
|
67
|
+
children?: TreeNodeLike[];
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Build a slug→{name,slug,symbols} registry from the module tree and
|
|
71
|
+
* per-file exported-symbol metadata. Collects up to 10 symbol names per module.
|
|
72
|
+
*/
|
|
73
|
+
export declare function buildModuleRegistry(tree: TreeNodeLike[], filesWithExports: FileWithExports[]): Map<string, ModuleRegistryEntry>;
|
|
74
|
+
/**
|
|
75
|
+
* Render the module registry as a markdown list for injection into prompts.
|
|
76
|
+
* Excludes `currentSlug` when provided so a module doesn't link to itself.
|
|
77
|
+
*/
|
|
78
|
+
export declare function formatModuleRegistry(registry: Map<string, ModuleRegistryEntry>, currentSlug?: string): string;
|
|
79
|
+
/**
|
|
80
|
+
* Fallback grouping for when the LLM's module-tree response is malformed.
|
|
81
|
+
* Uses graph communities if available, otherwise groups by top-level directory.
|
|
82
|
+
*/
|
|
83
|
+
export declare function fallbackGrouping(files: FileWithExports[], communityMapping?: Array<{
|
|
84
|
+
label: string;
|
|
85
|
+
files: string[];
|
|
86
|
+
}>): Record<string, string[]>;
|
|
87
|
+
export interface SplitNode {
|
|
88
|
+
name: string;
|
|
89
|
+
slug: string;
|
|
90
|
+
files: string[];
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Split a module's files into sub-modules by community membership.
|
|
94
|
+
* Each file is assigned to the community it appears in most.
|
|
95
|
+
* Returns null when the split doesn't yield >1 bucket — caller should
|
|
96
|
+
* fall back to subdirectory-based splitting.
|
|
97
|
+
*/
|
|
98
|
+
export declare function splitByCommunity(parentName: string, files: string[], communityMapping: Array<{
|
|
99
|
+
label: string;
|
|
100
|
+
files: string[];
|
|
101
|
+
}>): SplitNode[] | null;
|
|
102
|
+
export {};
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure helpers for client-side wiki generation, ported from the
|
|
3
|
+
* retired server-side WikiGenerator (gitnexus/src/core/wiki/generator.ts).
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Extract a short summary from a generated page.
|
|
7
|
+
* Tries, in order: <!-- summary-end --> marker, content up to first ## heading,
|
|
8
|
+
* sentence-boundary truncation near maxLength.
|
|
9
|
+
*/
|
|
10
|
+
export function extractSummary(content, maxLength = 800) {
|
|
11
|
+
const markerIdx = content.indexOf('<!-- summary-end -->');
|
|
12
|
+
if (markerIdx > 0) {
|
|
13
|
+
return content.slice(0, markerIdx).trim();
|
|
14
|
+
}
|
|
15
|
+
const lines = content.split('\n');
|
|
16
|
+
let pastTitle = false;
|
|
17
|
+
let result = '';
|
|
18
|
+
for (const line of lines) {
|
|
19
|
+
if (!pastTitle && line.startsWith('# ')) {
|
|
20
|
+
pastTitle = true;
|
|
21
|
+
result += line + '\n';
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
if (pastTitle && /^##\s/.test(line)) {
|
|
25
|
+
break;
|
|
26
|
+
}
|
|
27
|
+
result += line + '\n';
|
|
28
|
+
}
|
|
29
|
+
const candidate = result.trim();
|
|
30
|
+
if (candidate.length > 20 && candidate.length <= maxLength) {
|
|
31
|
+
return candidate;
|
|
32
|
+
}
|
|
33
|
+
// Use content directly if we don't have a good candidate from line parsing
|
|
34
|
+
const truncated = content.slice(0, maxLength);
|
|
35
|
+
const lastSentence = truncated.lastIndexOf('. ');
|
|
36
|
+
if (lastSentence > maxLength * 0.5) {
|
|
37
|
+
return truncated.slice(0, lastSentence + 1).trim();
|
|
38
|
+
}
|
|
39
|
+
return truncated.trim();
|
|
40
|
+
}
|
|
41
|
+
import fs from 'fs/promises';
|
|
42
|
+
import path from 'path';
|
|
43
|
+
/**
|
|
44
|
+
* Rough token-count heuristic: ~4 characters per token.
|
|
45
|
+
* Matches gitnexus/src/core/wiki/llm-client.ts:estimateTokens.
|
|
46
|
+
*/
|
|
47
|
+
export function estimateTokens(text) {
|
|
48
|
+
return Math.ceil(text.length / 4);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Sum the estimated token count across a module's files on disk.
|
|
52
|
+
* Unreadable files (missing, permission errors, binary, etc.) contribute
|
|
53
|
+
* zero — mirrors Hub's generator.ts:estimateModuleTokens behavior.
|
|
54
|
+
*/
|
|
55
|
+
export async function estimateModuleTokens(filePaths, repoRoot) {
|
|
56
|
+
let total = 0;
|
|
57
|
+
for (const fp of filePaths) {
|
|
58
|
+
try {
|
|
59
|
+
const content = await fs.readFile(path.join(repoRoot, fp), 'utf-8');
|
|
60
|
+
total += estimateTokens(content);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
/* skip unreadable files */
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return total;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Read a human-readable project description from the repo root.
|
|
70
|
+
* Reads the first matching config file from a known allowlist plus a
|
|
71
|
+
* README excerpt. Returns a formatted multi-line string.
|
|
72
|
+
* Never throws; falls back to `Project: <dirname>` on any error.
|
|
73
|
+
*/
|
|
74
|
+
export async function readProjectInfo(repoRoot) {
|
|
75
|
+
const candidates = [
|
|
76
|
+
'package.json',
|
|
77
|
+
'Cargo.toml',
|
|
78
|
+
'pyproject.toml',
|
|
79
|
+
'go.mod',
|
|
80
|
+
'pom.xml',
|
|
81
|
+
'build.gradle',
|
|
82
|
+
];
|
|
83
|
+
const lines = [`Project: ${path.basename(repoRoot)}`];
|
|
84
|
+
for (const file of candidates) {
|
|
85
|
+
const fullPath = path.join(repoRoot, file);
|
|
86
|
+
try {
|
|
87
|
+
const content = await fs.readFile(fullPath, 'utf-8');
|
|
88
|
+
if (file === 'package.json') {
|
|
89
|
+
try {
|
|
90
|
+
const pkg = JSON.parse(content);
|
|
91
|
+
if (pkg.name)
|
|
92
|
+
lines.push(`Name: ${pkg.name}`);
|
|
93
|
+
if (pkg.description)
|
|
94
|
+
lines.push(`Description: ${pkg.description}`);
|
|
95
|
+
if (pkg.scripts && typeof pkg.scripts === 'object') {
|
|
96
|
+
lines.push(`Scripts: ${Object.keys(pkg.scripts).join(', ')}`);
|
|
97
|
+
}
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
lines.push(`\n${file}:\n${content.slice(0, 500)}`);
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
for (const readme of ['README.md', 'readme.md', 'README.txt']) {
|
|
114
|
+
try {
|
|
115
|
+
const content = await fs.readFile(path.join(repoRoot, readme), 'utf-8');
|
|
116
|
+
lines.push(`\nREADME excerpt:\n${content.slice(0, 1000)}`);
|
|
117
|
+
break;
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return lines.join('\n');
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Fix broken [text](slug.md) links in generated markdown.
|
|
127
|
+
* Priority: valid slug > fuzzy name match > substring match > strip link.
|
|
128
|
+
* Only touches `[text](slug.md)` patterns — external URLs are untouched.
|
|
129
|
+
*/
|
|
130
|
+
export function validateAndFixCrossReferences(markdown, registry) {
|
|
131
|
+
const validSlugs = new Set();
|
|
132
|
+
const slugByName = new Map();
|
|
133
|
+
for (const entry of registry.values()) {
|
|
134
|
+
validSlugs.add(entry.slug);
|
|
135
|
+
slugByName.set(entry.name.toLowerCase(), entry.slug);
|
|
136
|
+
}
|
|
137
|
+
return markdown.replace(/\[([^\]]+)\]\(([a-z0-9][a-z0-9_-]*)\.md\)/g, (match, text, slug) => {
|
|
138
|
+
if (validSlugs.has(slug))
|
|
139
|
+
return match;
|
|
140
|
+
const fuzzy = slugByName.get(String(text).toLowerCase());
|
|
141
|
+
if (fuzzy)
|
|
142
|
+
return `[${text}](${fuzzy}.md)`;
|
|
143
|
+
for (const validSlug of validSlugs) {
|
|
144
|
+
if (validSlug.includes(slug) || slug.includes(validSlug)) {
|
|
145
|
+
return `[${text}](${validSlug}.md)`;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return String(text);
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Ensure every file the Hub told us about ends up in some module.
|
|
153
|
+
*
|
|
154
|
+
* The LLM can quietly drop files when producing the module tree (especially
|
|
155
|
+
* on longer file lists). Hub's generator.ts:499-502 swept these into an
|
|
156
|
+
* "Other" bucket; we mirror that here. If an "Other" module already exists
|
|
157
|
+
* (e.g., from fallback grouping), the unassigned files are merged into it.
|
|
158
|
+
*
|
|
159
|
+
* If `filesWithExportsRaw` is empty (older Hub that doesn't expose the
|
|
160
|
+
* field), this is a no-op.
|
|
161
|
+
*/
|
|
162
|
+
export function ensureAllFilesAssigned(modules, filesWithExportsRaw, makeNode) {
|
|
163
|
+
if (filesWithExportsRaw.length === 0)
|
|
164
|
+
return modules;
|
|
165
|
+
const assigned = new Set();
|
|
166
|
+
for (const mod of modules) {
|
|
167
|
+
for (const fp of mod.files ?? [])
|
|
168
|
+
assigned.add(fp);
|
|
169
|
+
}
|
|
170
|
+
const unassigned = [];
|
|
171
|
+
for (const entry of filesWithExportsRaw) {
|
|
172
|
+
if (!assigned.has(entry.filePath))
|
|
173
|
+
unassigned.push(entry.filePath);
|
|
174
|
+
}
|
|
175
|
+
if (unassigned.length === 0)
|
|
176
|
+
return modules;
|
|
177
|
+
const existingOther = modules.find((m) => m.slug === 'other');
|
|
178
|
+
if (existingOther) {
|
|
179
|
+
existingOther.files = [...(existingOther.files ?? []), ...unassigned];
|
|
180
|
+
return modules;
|
|
181
|
+
}
|
|
182
|
+
return [...modules, makeNode('other', 'Other', unassigned)];
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Build a slug→{name,slug,symbols} registry from the module tree and
|
|
186
|
+
* per-file exported-symbol metadata. Collects up to 10 symbol names per module.
|
|
187
|
+
*/
|
|
188
|
+
export function buildModuleRegistry(tree, filesWithExports) {
|
|
189
|
+
const exportMap = new Map(filesWithExports.map((f) => [f.filePath, f]));
|
|
190
|
+
const registry = new Map();
|
|
191
|
+
const addNode = (node) => {
|
|
192
|
+
const symbols = [];
|
|
193
|
+
const nodeFiles = node.children
|
|
194
|
+
? node.children.flatMap((c) => c.files ?? [])
|
|
195
|
+
: (node.files ?? []);
|
|
196
|
+
for (const fp of nodeFiles) {
|
|
197
|
+
const fileEntry = exportMap.get(fp);
|
|
198
|
+
if (fileEntry) {
|
|
199
|
+
for (const sym of fileEntry.symbols) {
|
|
200
|
+
if (symbols.length < 10 && !symbols.includes(sym.name))
|
|
201
|
+
symbols.push(sym.name);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
registry.set(node.slug, { name: node.name, slug: node.slug, symbols });
|
|
206
|
+
if (node.children) {
|
|
207
|
+
for (const child of node.children)
|
|
208
|
+
addNode(child);
|
|
209
|
+
}
|
|
210
|
+
};
|
|
211
|
+
for (const node of tree)
|
|
212
|
+
addNode(node);
|
|
213
|
+
return registry;
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Render the module registry as a markdown list for injection into prompts.
|
|
217
|
+
* Excludes `currentSlug` when provided so a module doesn't link to itself.
|
|
218
|
+
*/
|
|
219
|
+
export function formatModuleRegistry(registry, currentSlug) {
|
|
220
|
+
if (registry.size === 0)
|
|
221
|
+
return 'No module registry available.';
|
|
222
|
+
const lines = [];
|
|
223
|
+
for (const entry of registry.values()) {
|
|
224
|
+
if (entry.slug === currentSlug)
|
|
225
|
+
continue;
|
|
226
|
+
const symText = entry.symbols.length > 0 ? ` — exports: ${entry.symbols.join(', ')}` : '';
|
|
227
|
+
lines.push(`- [${entry.name}](${entry.slug}.md)${symText}`);
|
|
228
|
+
}
|
|
229
|
+
return lines.length > 0 ? lines.join('\n') : 'No module registry available.';
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Fallback grouping for when the LLM's module-tree response is malformed.
|
|
233
|
+
* Uses graph communities if available, otherwise groups by top-level directory.
|
|
234
|
+
*/
|
|
235
|
+
export function fallbackGrouping(files, communityMapping) {
|
|
236
|
+
if (communityMapping && communityMapping.length > 0) {
|
|
237
|
+
const result = {};
|
|
238
|
+
const assigned = new Set();
|
|
239
|
+
for (const group of communityMapping) {
|
|
240
|
+
if (group.files.length > 0) {
|
|
241
|
+
result[group.label] = [...group.files];
|
|
242
|
+
for (const f of group.files)
|
|
243
|
+
assigned.add(f);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
const unassigned = files.map((f) => f.filePath).filter((fp) => !assigned.has(fp));
|
|
247
|
+
if (unassigned.length > 0)
|
|
248
|
+
result['Other'] = unassigned;
|
|
249
|
+
if (Object.keys(result).length > 0)
|
|
250
|
+
return result;
|
|
251
|
+
}
|
|
252
|
+
const groups = new Map();
|
|
253
|
+
for (const f of files) {
|
|
254
|
+
const parts = f.filePath.replace(/\\/g, '/').split('/');
|
|
255
|
+
const topDir = parts.length > 1 ? parts[0] : 'Root';
|
|
256
|
+
let group = groups.get(topDir);
|
|
257
|
+
if (!group) {
|
|
258
|
+
group = [];
|
|
259
|
+
groups.set(topDir, group);
|
|
260
|
+
}
|
|
261
|
+
group.push(f.filePath);
|
|
262
|
+
}
|
|
263
|
+
return Object.fromEntries(groups);
|
|
264
|
+
}
|
|
265
|
+
function slugify(s) {
|
|
266
|
+
return s
|
|
267
|
+
.toLowerCase()
|
|
268
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
269
|
+
.replace(/^-|-$/g, '')
|
|
270
|
+
.slice(0, 60);
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Split a module's files into sub-modules by community membership.
|
|
274
|
+
* Each file is assigned to the community it appears in most.
|
|
275
|
+
* Returns null when the split doesn't yield >1 bucket — caller should
|
|
276
|
+
* fall back to subdirectory-based splitting.
|
|
277
|
+
*/
|
|
278
|
+
export function splitByCommunity(parentName, files, communityMapping) {
|
|
279
|
+
if (communityMapping.length === 0)
|
|
280
|
+
return null;
|
|
281
|
+
const subGroups = new Map();
|
|
282
|
+
for (const fp of files) {
|
|
283
|
+
let bestLabel = '';
|
|
284
|
+
let bestCount = 0;
|
|
285
|
+
for (const group of communityMapping) {
|
|
286
|
+
const count = group.files.filter((f) => f === fp).length;
|
|
287
|
+
if (count > bestCount) {
|
|
288
|
+
bestCount = count;
|
|
289
|
+
bestLabel = group.label;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
if (bestLabel) {
|
|
293
|
+
let g = subGroups.get(bestLabel);
|
|
294
|
+
if (!g) {
|
|
295
|
+
g = [];
|
|
296
|
+
subGroups.set(bestLabel, g);
|
|
297
|
+
}
|
|
298
|
+
g.push(fp);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
if (subGroups.size <= 1)
|
|
302
|
+
return null;
|
|
303
|
+
return Array.from(subGroups.entries()).map(([label, subFiles]) => ({
|
|
304
|
+
name: `${parentName} — ${label}`,
|
|
305
|
+
slug: slugify(`${parentName}-${label}`),
|
|
306
|
+
files: subFiles,
|
|
307
|
+
}));
|
|
308
|
+
}
|