@pugi/cli 0.1.0-beta.23 → 0.1.0-beta.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/auto-update/channels.js +122 -0
- package/dist/core/auto-update/checker.js +241 -0
- package/dist/core/auto-update/state.js +235 -0
- package/dist/core/engine/compaction-hook.js +154 -0
- package/dist/core/engine/native-pugi.js +67 -3
- package/dist/core/engine/tool-bridge.js +123 -3
- package/dist/core/hooks/events.js +44 -0
- package/dist/core/hooks/index.js +15 -0
- package/dist/core/hooks/registry.js +213 -0
- package/dist/core/hooks/runner.js +236 -0
- package/dist/core/init/scaffold.js +195 -0
- package/dist/core/lsp/cache.js +105 -0
- package/dist/core/lsp/language-detect.js +66 -0
- package/dist/core/lsp/post-edit-diagnostics.js +171 -0
- package/dist/core/repl/codebase-survey.js +308 -0
- package/dist/core/repl/init-interview.js +457 -0
- package/dist/core/repl/onboarding-state.js +297 -0
- package/dist/core/repl/session.js +84 -0
- package/dist/core/repl/slash-commands.js +25 -0
- package/dist/core/repo-map/build.js +125 -0
- package/dist/core/repo-map/cache.js +185 -0
- package/dist/core/repo-map/extractor.js +254 -0
- package/dist/core/repo-map/formatter.js +145 -0
- package/dist/core/repo-map/scanner.js +211 -0
- package/dist/core/session.js +44 -0
- package/dist/core/settings.js +9 -0
- package/dist/runtime/cli.js +170 -0
- package/dist/runtime/commands/hooks.js +184 -0
- package/dist/runtime/commands/lsp.js +25 -23
- package/dist/runtime/commands/repo-map.js +95 -0
- package/dist/runtime/commands/update.js +289 -0
- package/dist/runtime/version.js +1 -1
- package/dist/tui/repl-splash-mascot.js +19 -7
- package/package.json +3 -3
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repo-map cache — Leak L28 (2026-05-27).
|
|
3
|
+
*
|
|
4
|
+
* Persists the result of the scan + extract passes к
|
|
5
|
+
* `.pugi/repo-map.json` so subsequent boots reuse the symbol table
|
|
6
|
+
* without re-walking the workspace. The cache key is `(mtimeMs, size)`
|
|
7
|
+
* per file, matching the heuristic used by Node's own native
|
|
8
|
+
* `node:fs.statSync` cache and Git's index format. A file whose mtime
|
|
9
|
+
* AND size match the cached entry is presumed unchanged; otherwise the
|
|
10
|
+
* extractor re-runs against the fresh contents.
|
|
11
|
+
*
|
|
12
|
+
* Why not a content hash:
|
|
13
|
+
*
|
|
14
|
+
* The α6.5 file-cache module already hashes by content for the
|
|
15
|
+
* working-set heuristic, and the L28 use case is different — repo-
|
|
16
|
+
* map invalidation is "should we re-parse" not "is this content
|
|
17
|
+
* identical to the last cached version". A content-hash sweep would
|
|
18
|
+
* re-read every source file on every boot, defeating the purpose of
|
|
19
|
+
* a cache. mtime + size matches the cost profile (one stat call per
|
|
20
|
+
* file, no read) while catching every realistic edit pattern
|
|
21
|
+
* (editors universally update mtime; even `truncate` updates size).
|
|
22
|
+
*
|
|
23
|
+
* Why JSON not SQLite:
|
|
24
|
+
*
|
|
25
|
+
* The α6.5 index-store ships as a flat JSON blob and parses в <50 ms
|
|
26
|
+
* for typical repos (~2000 files); we match the format так the
|
|
27
|
+
* doctor probe + cabinet sync tools can read repo-map.json without
|
|
28
|
+
* spinning up a SQLite driver. The blob is gzip-friendly if a future
|
|
29
|
+
* sprint wants к ship it across the wire.
|
|
30
|
+
*
|
|
31
|
+
* Schema versioning: every cache entry carries `schemaVersion`. When
|
|
32
|
+
* the extractor surface changes (new symbol kinds, new summary
|
|
33
|
+
* format), bump the constant and existing caches are dropped on the
|
|
34
|
+
* next boot — same pattern as the migration runner.
|
|
35
|
+
*
|
|
36
|
+
* Pure-ish surface: reads / writes use `node:fs` sync, no logging.
|
|
37
|
+
* Errors are converted к structured results so the caller can decide
|
|
38
|
+
* whether к surface them or fall back к a cold rebuild.
|
|
39
|
+
*/
|
|
40
|
+
import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
|
|
41
|
+
import { dirname, join } from 'node:path';
|
|
42
|
+
/**
|
|
43
|
+
* Cache format version. Bump when:
|
|
44
|
+
* - `RepoMapSymbol` adds / renames a field
|
|
45
|
+
* - `RepoMapFileExtract` adds / renames a field
|
|
46
|
+
* - The mtime + size invalidation contract changes
|
|
47
|
+
*
|
|
48
|
+
* Old caches with a mismatched version are dropped on read.
|
|
49
|
+
*/
|
|
50
|
+
export const REPO_MAP_CACHE_VERSION = 1;
|
|
51
|
+
/**
|
|
52
|
+
* Default location for the workspace cache file. Mirrors the rest of
|
|
53
|
+
* the `.pugi/` convention: `<workspace>/.pugi/repo-map.json`.
|
|
54
|
+
*/
|
|
55
|
+
export function defaultCachePath(workspaceRoot) {
|
|
56
|
+
return join(workspaceRoot, '.pugi', 'repo-map.json');
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Read the cache file from disk. Returns a structured verdict; never
|
|
60
|
+
* throws. The 'missing' branch is the cold-boot happy path. The
|
|
61
|
+
* 'parse-error' branch signals corruption — the caller drops the
|
|
62
|
+
* cache and rebuilds. The 'version-mismatch' branch fires after an
|
|
63
|
+
* extractor schema bump.
|
|
64
|
+
*/
|
|
65
|
+
export function readRepoMapCache(path) {
|
|
66
|
+
if (!existsSync(path)) {
|
|
67
|
+
return { ok: false, reason: 'missing' };
|
|
68
|
+
}
|
|
69
|
+
let raw;
|
|
70
|
+
try {
|
|
71
|
+
raw = readFileSync(path, 'utf8');
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
return { ok: false, reason: 'parse-error' };
|
|
75
|
+
}
|
|
76
|
+
let parsed;
|
|
77
|
+
try {
|
|
78
|
+
parsed = JSON.parse(raw);
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
return { ok: false, reason: 'parse-error' };
|
|
82
|
+
}
|
|
83
|
+
if (!isCacheShape(parsed)) {
|
|
84
|
+
return { ok: false, reason: 'parse-error' };
|
|
85
|
+
}
|
|
86
|
+
if (parsed.schemaVersion !== REPO_MAP_CACHE_VERSION) {
|
|
87
|
+
return { ok: false, reason: 'version-mismatch' };
|
|
88
|
+
}
|
|
89
|
+
return { ok: true, cache: parsed };
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Write the cache atomically (write-then-rename) so a concurrent
|
|
93
|
+
* reader never sees a half-flushed JSON blob. Errors are surfaced as
|
|
94
|
+
* a structured boolean so the caller can decide whether к escalate —
|
|
95
|
+
* the engine boot path silently swallows write failures because
|
|
96
|
+
* repo-map is a best-effort enrichment.
|
|
97
|
+
*/
|
|
98
|
+
export function writeRepoMapCache(path, cache) {
|
|
99
|
+
try {
|
|
100
|
+
const dir = dirname(path);
|
|
101
|
+
if (!existsSync(dir)) {
|
|
102
|
+
mkdirSync(dir, { recursive: true });
|
|
103
|
+
}
|
|
104
|
+
const body = JSON.stringify(cache, null, 2) + '\n';
|
|
105
|
+
const tmp = path + '.tmp';
|
|
106
|
+
writeFileSync(tmp, body, { encoding: 'utf8' });
|
|
107
|
+
// Atomic rename. `fs.renameSync` is atomic on POSIX + on NTFS when
|
|
108
|
+
// src + dst live on the same volume, which is always true for
|
|
109
|
+
// `.pugi/`-local writes.
|
|
110
|
+
renameSync(tmp, path);
|
|
111
|
+
return { ok: true };
|
|
112
|
+
}
|
|
113
|
+
catch (error) {
|
|
114
|
+
return {
|
|
115
|
+
ok: false,
|
|
116
|
+
error: error instanceof Error ? error.message : String(error),
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
export function diffCacheAgainstScan(prior, scanned) {
|
|
121
|
+
const toRebuild = [];
|
|
122
|
+
const reuse = [];
|
|
123
|
+
const seen = new Set();
|
|
124
|
+
for (const file of scanned) {
|
|
125
|
+
seen.add(file.relPath);
|
|
126
|
+
const entry = prior?.entries[file.relPath];
|
|
127
|
+
if (!entry
|
|
128
|
+
|| entry.mtimeMs !== file.mtimeMs
|
|
129
|
+
|| entry.sizeBytes !== file.sizeBytes) {
|
|
130
|
+
toRebuild.push(file);
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
reuse.push(file.relPath);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
const toDrop = [];
|
|
137
|
+
if (prior) {
|
|
138
|
+
for (const key of Object.keys(prior.entries)) {
|
|
139
|
+
if (!seen.has(key))
|
|
140
|
+
toDrop.push(key);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
return { toRebuild, toDrop, reuse };
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Stitch a fresh cache object together from the prior surviving
|
|
147
|
+
* entries + the newly-extracted ones. Pure helper — the caller is
|
|
148
|
+
* responsible for actually writing the result.
|
|
149
|
+
*/
|
|
150
|
+
export function mergeCache(args) {
|
|
151
|
+
const { root, prior, scanned, freshExtracts } = args;
|
|
152
|
+
const entries = {};
|
|
153
|
+
for (const file of scanned) {
|
|
154
|
+
const fresh = freshExtracts.get(file.relPath);
|
|
155
|
+
if (fresh) {
|
|
156
|
+
entries[file.relPath] = {
|
|
157
|
+
mtimeMs: file.mtimeMs,
|
|
158
|
+
sizeBytes: file.sizeBytes,
|
|
159
|
+
extract: fresh,
|
|
160
|
+
};
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
const priorEntry = prior?.entries[file.relPath];
|
|
164
|
+
if (priorEntry) {
|
|
165
|
+
entries[file.relPath] = priorEntry;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
return {
|
|
169
|
+
schemaVersion: REPO_MAP_CACHE_VERSION,
|
|
170
|
+
root,
|
|
171
|
+
builtAtMs: args.nowMs ?? Date.now(),
|
|
172
|
+
entries,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
function isCacheShape(value) {
|
|
176
|
+
if (typeof value !== 'object' || value === null)
|
|
177
|
+
return false;
|
|
178
|
+
const v = value;
|
|
179
|
+
return (typeof v.schemaVersion === 'number'
|
|
180
|
+
&& typeof v.root === 'string'
|
|
181
|
+
&& typeof v.builtAtMs === 'number'
|
|
182
|
+
&& typeof v.entries === 'object'
|
|
183
|
+
&& v.entries !== null);
|
|
184
|
+
}
|
|
185
|
+
//# sourceMappingURL=cache.js.map
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Maximum symbols carried per file. The formatter further truncates к
|
|
3
|
+
* the 2 KB injection budget, but capping per-file here keeps a single
|
|
4
|
+
* giant `index.ts` from monopolising the map.
|
|
5
|
+
*/
|
|
6
|
+
export const MAX_SYMBOLS_PER_FILE = 50;
|
|
7
|
+
/**
|
|
8
|
+
* Extract symbols + summary from a single file. `kind` is dispatched
|
|
9
|
+
* on the lowercased extension. Files with an unrecognised extension
|
|
10
|
+
* return an empty symbol list with `summary: null` — the scanner
|
|
11
|
+
* already filtered by `SUPPORTED_EXTENSIONS` so this branch is mostly
|
|
12
|
+
* defensive (test fixtures sometimes pass `.txt`).
|
|
13
|
+
*/
|
|
14
|
+
export function extractFromFile(file, source) {
|
|
15
|
+
switch (file.ext) {
|
|
16
|
+
case '.ts':
|
|
17
|
+
case '.tsx':
|
|
18
|
+
case '.js':
|
|
19
|
+
case '.jsx':
|
|
20
|
+
case '.mjs':
|
|
21
|
+
case '.cjs':
|
|
22
|
+
return extractFromTsLike(file, source);
|
|
23
|
+
case '.md':
|
|
24
|
+
case '.mdx':
|
|
25
|
+
return extractFromMarkdown(file, source);
|
|
26
|
+
default:
|
|
27
|
+
return {
|
|
28
|
+
relPath: file.relPath,
|
|
29
|
+
ext: file.ext,
|
|
30
|
+
summary: null,
|
|
31
|
+
symbols: [],
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
/* ------------------------- TS / JS extraction ------------------------- */
|
|
36
|
+
/**
|
|
37
|
+
* Identifier pattern. We use the ASCII subset (letters/digits/`$`/`_`)
|
|
38
|
+
* rather than the full unicode ID start/continue range because the
|
|
39
|
+
* unicode tables would inflate the bundle by ~50 KB for zero benefit
|
|
40
|
+
* — Pugi's customers are typing English identifiers. Unicode names
|
|
41
|
+
* in source still PARSE (they just do not surface в the repo-map);
|
|
42
|
+
* the formatter degrades gracefully.
|
|
43
|
+
*/
|
|
44
|
+
const IDENT = '[A-Za-z_$][A-Za-z0-9_$]*';
|
|
45
|
+
/**
|
|
46
|
+
* Top-level declaration shapes:
|
|
47
|
+
*
|
|
48
|
+
* export? (default)? class Foo { ... }
|
|
49
|
+
* export? (default)? function foo() { ... }
|
|
50
|
+
* export? (default)? async function foo() { ... }
|
|
51
|
+
* export? (const|let|var) foo = (args) => { ... }
|
|
52
|
+
* export? (const|let|var) foo = async (args) => { ... }
|
|
53
|
+
* export? (const|let|var) foo = function (args) { ... }
|
|
54
|
+
* export? interface Foo { ... }
|
|
55
|
+
* export? type Foo = ...
|
|
56
|
+
* export? enum Foo { ... }
|
|
57
|
+
*
|
|
58
|
+
* The patterns anchor on start-of-line (`^` with the `m` flag) so they
|
|
59
|
+
* never match nested declarations inside a class body or a closure.
|
|
60
|
+
* That intentionally loses precision for module-level IIFEs (e.g.
|
|
61
|
+
* `;(function init() {})()`), but the L28 budget already drops nested
|
|
62
|
+
* symbols, so the loss is invisible to the operator.
|
|
63
|
+
*/
|
|
64
|
+
const TS_CLASS_RE = new RegExp(`^(export\\s+(?:default\\s+)?(?:abstract\\s+)?)?class\\s+(${IDENT})`, 'gm');
|
|
65
|
+
const TS_FUNCTION_RE = new RegExp(`^(export\\s+(?:default\\s+)?)?(?:async\\s+)?function\\s*\\*?\\s+(${IDENT})`, 'gm');
|
|
66
|
+
/**
|
|
67
|
+
* Arrow / function-expression `const|let|var foo = (...) => ...` shape.
|
|
68
|
+
* The optional type annotation between the identifier and the `=` is
|
|
69
|
+
* non-trivial because TS allows `(...) => ...` IN the annotation
|
|
70
|
+
* itself ("export const x: () => number = () => 1"). We allow `=>`
|
|
71
|
+
* as a unit inside the annotation by matching the char class
|
|
72
|
+
* `(?:=>|[^=\\n])*?` which consumes either an arrow token OR a
|
|
73
|
+
* non-`=` char; the trailing assignment `=` is then the first
|
|
74
|
+
* standalone `=` (`(?!>)`) on the line. The match tail (`=>` arrow,
|
|
75
|
+
* parenthesised arg list, generic, or `function` keyword) anchors
|
|
76
|
+
* the RHS so plain `const x = 1` does not surface as a function.
|
|
77
|
+
*/
|
|
78
|
+
const TS_ARROW_RE = new RegExp(`^(export\\s+)?(?:const|let|var)\\s+(${IDENT})\\b(?:=>|[^=\\n])*?=(?!>)\\s*(?:async\\s*)?(?:\\(|<|function\\b)`, 'gm');
|
|
79
|
+
const TS_INTERFACE_RE = new RegExp(`^(export\\s+)?interface\\s+(${IDENT})`, 'gm');
|
|
80
|
+
const TS_TYPE_RE = new RegExp(`^(export\\s+)?type\\s+(${IDENT})\\s*[=<]`, 'gm');
|
|
81
|
+
const TS_ENUM_RE = new RegExp(`^(export\\s+)?(?:const\\s+)?enum\\s+(${IDENT})`, 'gm');
|
|
82
|
+
/**
|
|
83
|
+
* Lead JSDoc / TSDoc block: `/** ... */` at the start of the file or
|
|
84
|
+
* preceded only by whitespace + import statements. We pick the first
|
|
85
|
+
* non-empty narrative line — the convention across Pugi's own codebase
|
|
86
|
+
* is that the headline sentence sits at the top of the block.
|
|
87
|
+
*/
|
|
88
|
+
const LEAD_DOC_RE = /\/\*\*([\s\S]*?)\*\//;
|
|
89
|
+
function extractFromTsLike(file, source) {
|
|
90
|
+
const symbols = [];
|
|
91
|
+
// We compute line numbers lazily by counting newlines в `source`
|
|
92
|
+
// up к each match's `.index`. Building a single prefix-newline
|
|
93
|
+
// array once is cheaper than calling `source.slice(0, idx).split`
|
|
94
|
+
// per match.
|
|
95
|
+
const lineStarts = computeLineStarts(source);
|
|
96
|
+
const lineFor = (offset) => binarySearchLine(lineStarts, offset);
|
|
97
|
+
const pushMatches = (regex, kind, exportIndex, nameIndex) => {
|
|
98
|
+
regex.lastIndex = 0;
|
|
99
|
+
let match;
|
|
100
|
+
while ((match = regex.exec(source)) !== null) {
|
|
101
|
+
if (symbols.length >= MAX_SYMBOLS_PER_FILE)
|
|
102
|
+
return;
|
|
103
|
+
const name = match[nameIndex];
|
|
104
|
+
if (!name)
|
|
105
|
+
continue;
|
|
106
|
+
const exported = Boolean(match[exportIndex]);
|
|
107
|
+
symbols.push({
|
|
108
|
+
name,
|
|
109
|
+
kind,
|
|
110
|
+
exported,
|
|
111
|
+
line: lineFor(match.index),
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
pushMatches(TS_CLASS_RE, 'class', 1, 2);
|
|
116
|
+
pushMatches(TS_FUNCTION_RE, 'function', 1, 2);
|
|
117
|
+
pushMatches(TS_ARROW_RE, 'const', 1, 2);
|
|
118
|
+
pushMatches(TS_INTERFACE_RE, 'interface', 1, 2);
|
|
119
|
+
pushMatches(TS_TYPE_RE, 'type', 1, 2);
|
|
120
|
+
pushMatches(TS_ENUM_RE, 'enum', 1, 2);
|
|
121
|
+
// Dedupe by `name + kind` — `export const x = function x() {}` would
|
|
122
|
+
// otherwise show up twice (arrow regex + function regex). Keep the
|
|
123
|
+
// earlier one (lowest line) and the exported flag if either match
|
|
124
|
+
// saw it.
|
|
125
|
+
const dedup = new Map();
|
|
126
|
+
for (const sym of symbols) {
|
|
127
|
+
const key = `${sym.kind}::${sym.name}`;
|
|
128
|
+
const prior = dedup.get(key);
|
|
129
|
+
if (!prior) {
|
|
130
|
+
dedup.set(key, sym);
|
|
131
|
+
}
|
|
132
|
+
else if (sym.line < prior.line) {
|
|
133
|
+
dedup.set(key, { ...sym, exported: prior.exported || sym.exported });
|
|
134
|
+
}
|
|
135
|
+
else if (sym.exported && !prior.exported) {
|
|
136
|
+
dedup.set(key, { ...prior, exported: true });
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
const deduped = Array.from(dedup.values()).sort((a, b) => a.line - b.line);
|
|
140
|
+
return {
|
|
141
|
+
relPath: file.relPath,
|
|
142
|
+
ext: file.ext,
|
|
143
|
+
summary: extractLeadDocSummary(source),
|
|
144
|
+
symbols: deduped.slice(0, MAX_SYMBOLS_PER_FILE),
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Extract the first narrative sentence from a leading JSDoc block.
|
|
149
|
+
* Returns null when no block is present in the first 4 KB of the
|
|
150
|
+
* file (cap protects against huge generated headers).
|
|
151
|
+
*/
|
|
152
|
+
export function extractLeadDocSummary(source) {
|
|
153
|
+
const window = source.slice(0, 4096);
|
|
154
|
+
const match = LEAD_DOC_RE.exec(window);
|
|
155
|
+
if (!match)
|
|
156
|
+
return null;
|
|
157
|
+
const body = match[1] ?? '';
|
|
158
|
+
for (const rawLine of body.split('\n')) {
|
|
159
|
+
const line = rawLine.replace(/^\s*\*\s?/u, '').trim();
|
|
160
|
+
if (line.length === 0)
|
|
161
|
+
continue;
|
|
162
|
+
// Skip the `@param` / `@returns` / `@deprecated` block prefixes —
|
|
163
|
+
// the summary is the prose lead, not the tag soup.
|
|
164
|
+
if (line.startsWith('@'))
|
|
165
|
+
continue;
|
|
166
|
+
// Truncate at 120 chars so a 5-line philosophical preamble does
|
|
167
|
+
// not blow the formatter's column budget.
|
|
168
|
+
return line.length > 120 ? line.slice(0, 117) + '...' : line;
|
|
169
|
+
}
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
/* -------------------------- Markdown extraction -------------------------- */
|
|
173
|
+
const MD_HEADING_RE = /^(#{1,6})\s+(.+?)\s*#*\s*$/gm;
|
|
174
|
+
function extractFromMarkdown(file, source) {
|
|
175
|
+
const symbols = [];
|
|
176
|
+
const lineStarts = computeLineStarts(source);
|
|
177
|
+
MD_HEADING_RE.lastIndex = 0;
|
|
178
|
+
let match;
|
|
179
|
+
while ((match = MD_HEADING_RE.exec(source)) !== null) {
|
|
180
|
+
if (symbols.length >= MAX_SYMBOLS_PER_FILE)
|
|
181
|
+
break;
|
|
182
|
+
const level = match[1]?.length ?? 0;
|
|
183
|
+
// Only H1 + H2 surface — the L28 budget cannot afford H3+ depth
|
|
184
|
+
// and the operator-readable map is meant к answer "what is в
|
|
185
|
+
// this file" not "what is the full TOC".
|
|
186
|
+
if (level > 2)
|
|
187
|
+
continue;
|
|
188
|
+
const name = (match[2] ?? '').trim();
|
|
189
|
+
if (!name)
|
|
190
|
+
continue;
|
|
191
|
+
symbols.push({
|
|
192
|
+
name,
|
|
193
|
+
kind: 'heading',
|
|
194
|
+
exported: true,
|
|
195
|
+
line: binarySearchLine(lineStarts, match.index),
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
return {
|
|
199
|
+
relPath: file.relPath,
|
|
200
|
+
ext: file.ext,
|
|
201
|
+
summary: extractMarkdownSummary(source),
|
|
202
|
+
symbols,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* First non-heading paragraph in the markdown file. Truncated к 120
|
|
207
|
+
* chars like the JSDoc summary so the formatter stays single-line.
|
|
208
|
+
*/
|
|
209
|
+
export function extractMarkdownSummary(source) {
|
|
210
|
+
const lines = source.split('\n').slice(0, 200);
|
|
211
|
+
let sawHeading = false;
|
|
212
|
+
for (const raw of lines) {
|
|
213
|
+
const line = raw.trim();
|
|
214
|
+
if (line.length === 0)
|
|
215
|
+
continue;
|
|
216
|
+
if (line.startsWith('#')) {
|
|
217
|
+
sawHeading = true;
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
// Skip front-matter delimiters and HTML/markdown directives.
|
|
221
|
+
if (line === '---' || line.startsWith('<!--'))
|
|
222
|
+
continue;
|
|
223
|
+
if (!sawHeading) {
|
|
224
|
+
// Pre-heading body — usually front-matter content. Skip it; the
|
|
225
|
+
// first POST-heading paragraph is the operator-facing summary.
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
return line.length > 120 ? line.slice(0, 117) + '...' : line;
|
|
229
|
+
}
|
|
230
|
+
return null;
|
|
231
|
+
}
|
|
232
|
+
/* ----------------------------- helpers ----------------------------- */
|
|
233
|
+
function computeLineStarts(source) {
|
|
234
|
+
const starts = [0];
|
|
235
|
+
for (let i = 0; i < source.length; i += 1) {
|
|
236
|
+
if (source.charCodeAt(i) === 10 /* \n */)
|
|
237
|
+
starts.push(i + 1);
|
|
238
|
+
}
|
|
239
|
+
return starts;
|
|
240
|
+
}
|
|
241
|
+
function binarySearchLine(starts, offset) {
|
|
242
|
+
// Returns 1-based line number.
|
|
243
|
+
let lo = 0;
|
|
244
|
+
let hi = starts.length - 1;
|
|
245
|
+
while (lo < hi) {
|
|
246
|
+
const mid = (lo + hi + 1) >>> 1;
|
|
247
|
+
if (starts[mid] <= offset)
|
|
248
|
+
lo = mid;
|
|
249
|
+
else
|
|
250
|
+
hi = mid - 1;
|
|
251
|
+
}
|
|
252
|
+
return lo + 1;
|
|
253
|
+
}
|
|
254
|
+
//# sourceMappingURL=extractor.js.map
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default byte cap for the engine system-prompt injection. The L28
|
|
3
|
+
* spec calls for 2000 tokens; conservatively that is ~8 KB of UTF-8
|
|
4
|
+
* (rough Claude tokeniser ratio: ~4 chars per token). We cap at 8 KB
|
|
5
|
+
* so the formatted block stays under the token budget across every
|
|
6
|
+
* supported model family without per-model accounting.
|
|
7
|
+
*/
|
|
8
|
+
export const DEFAULT_FORMAT_BYTES_CAP = 8 * 1024;
|
|
9
|
+
/**
|
|
10
|
+
* Maximum symbols per row. The engine row format is:
|
|
11
|
+
*
|
|
12
|
+
* `- path/to/file.ts — summary line — exports: Foo(class), bar(fn)`
|
|
13
|
+
*
|
|
14
|
+
* Beyond 6 symbols the row grows past the readable column budget and
|
|
15
|
+
* the additional names rarely move the needle for the model — the
|
|
16
|
+
* exports tail is signal-bearing only for the first few entries
|
|
17
|
+
* anyway (`index.ts` re-exports tend к pile up).
|
|
18
|
+
*/
|
|
19
|
+
export const MAX_SYMBOLS_PER_ROW = 6;
|
|
20
|
+
/**
|
|
21
|
+
* Render the repo-map text. The implementation is intentionally split
|
|
22
|
+
* into:
|
|
23
|
+
*
|
|
24
|
+
* - `prioritise(...)` — pure sort + filter, fully testable in
|
|
25
|
+
* isolation, no I/O of any kind.
|
|
26
|
+
* - `renderRow(...)` — one file's row, byte-counted.
|
|
27
|
+
* - main loop — assembles header + rows + footer, respecting cap.
|
|
28
|
+
*
|
|
29
|
+
* The split lets the spec assert each stage in isolation (priority
|
|
30
|
+
* order, single-row shape, truncation arithmetic).
|
|
31
|
+
*/
|
|
32
|
+
export function formatRepoMap(extracts, options = {}) {
|
|
33
|
+
const maxBytes = options.maxBytes ?? DEFAULT_FORMAT_BYTES_CAP;
|
|
34
|
+
const omitHeader = options.omitHeader === true;
|
|
35
|
+
const prioritised = prioritise(extracts);
|
|
36
|
+
const header = omitHeader
|
|
37
|
+
? ''
|
|
38
|
+
: `## Repo map\n\n${prioritised.length} source files indexed.\n\n`;
|
|
39
|
+
const headerBytes = byteLength(header);
|
|
40
|
+
if (headerBytes >= maxBytes) {
|
|
41
|
+
// Cap is smaller than even the header — emit nothing rather than
|
|
42
|
+
// a truncated header that the engine cannot parse.
|
|
43
|
+
return {
|
|
44
|
+
text: '',
|
|
45
|
+
filesIncluded: 0,
|
|
46
|
+
filesTotal: extracts.length,
|
|
47
|
+
bytes: 0,
|
|
48
|
+
truncated: extracts.length > 0,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
const rows = [];
|
|
52
|
+
let bytesUsed = headerBytes;
|
|
53
|
+
let filesIncluded = 0;
|
|
54
|
+
let truncated = false;
|
|
55
|
+
for (let i = 0; i < prioritised.length; i += 1) {
|
|
56
|
+
const row = renderRow(prioritised[i]);
|
|
57
|
+
const rowBytes = byteLength(row);
|
|
58
|
+
// Reserve space for the footer (`\n... N more files\n`). We
|
|
59
|
+
// overestimate at 64 bytes — the exact number depends on the
|
|
60
|
+
// file count digits but 64 covers any realistic case.
|
|
61
|
+
const footerReserve = i + 1 < prioritised.length ? 64 : 0;
|
|
62
|
+
if (bytesUsed + rowBytes + footerReserve > maxBytes) {
|
|
63
|
+
truncated = true;
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
rows.push(row);
|
|
67
|
+
bytesUsed += rowBytes;
|
|
68
|
+
filesIncluded += 1;
|
|
69
|
+
}
|
|
70
|
+
let text = header + rows.join('');
|
|
71
|
+
if (truncated) {
|
|
72
|
+
const omitted = prioritised.length - filesIncluded;
|
|
73
|
+
text += `\n... ${omitted} more file${omitted === 1 ? '' : 's'}\n`;
|
|
74
|
+
}
|
|
75
|
+
return {
|
|
76
|
+
text,
|
|
77
|
+
filesIncluded,
|
|
78
|
+
filesTotal: extracts.length,
|
|
79
|
+
bytes: byteLength(text),
|
|
80
|
+
truncated,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
/* ----------------------------- helpers ----------------------------- */
|
|
84
|
+
/**
|
|
85
|
+
* Sort the extracts by (exported-symbol count desc, path asc). The
|
|
86
|
+
* engine cares about the public surface; a file with 12 exported
|
|
87
|
+
* symbols carries more signal than 50 private helpers.
|
|
88
|
+
*/
|
|
89
|
+
export function prioritise(extracts) {
|
|
90
|
+
return [...extracts].sort((a, b) => {
|
|
91
|
+
const expA = countExports(a);
|
|
92
|
+
const expB = countExports(b);
|
|
93
|
+
if (expA !== expB)
|
|
94
|
+
return expB - expA;
|
|
95
|
+
return a.relPath < b.relPath ? -1 : a.relPath > b.relPath ? 1 : 0;
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
function countExports(extract) {
|
|
99
|
+
let n = 0;
|
|
100
|
+
for (const sym of extract.symbols)
|
|
101
|
+
if (sym.exported)
|
|
102
|
+
n += 1;
|
|
103
|
+
return n;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Render a single file row. Format:
|
|
107
|
+
*
|
|
108
|
+
* `- path/to/file.ts — summary — exports: Foo(class), bar(fn), Baz(type)`
|
|
109
|
+
*
|
|
110
|
+
* When there are no exported symbols, the `exports:` tail is dropped.
|
|
111
|
+
* When there is no summary, the dash separator is dropped.
|
|
112
|
+
*/
|
|
113
|
+
export function renderRow(extract) {
|
|
114
|
+
const exported = extract.symbols.filter((s) => s.exported);
|
|
115
|
+
const symbolsTail = exported.length > 0
|
|
116
|
+
? ` — exports: ${formatSymbolList(exported.slice(0, MAX_SYMBOLS_PER_ROW))}`
|
|
117
|
+
: '';
|
|
118
|
+
const summaryTail = extract.summary ? ` — ${extract.summary}` : '';
|
|
119
|
+
return `- ${extract.relPath}${summaryTail}${symbolsTail}\n`;
|
|
120
|
+
}
|
|
121
|
+
function formatSymbolList(symbols) {
|
|
122
|
+
return symbols.map((s) => `${s.name}(${shortKind(s.kind)})`).join(', ');
|
|
123
|
+
}
|
|
124
|
+
function shortKind(kind) {
|
|
125
|
+
switch (kind) {
|
|
126
|
+
case 'function':
|
|
127
|
+
return 'fn';
|
|
128
|
+
case 'class':
|
|
129
|
+
return 'class';
|
|
130
|
+
case 'interface':
|
|
131
|
+
return 'iface';
|
|
132
|
+
case 'type':
|
|
133
|
+
return 'type';
|
|
134
|
+
case 'enum':
|
|
135
|
+
return 'enum';
|
|
136
|
+
case 'const':
|
|
137
|
+
return 'const';
|
|
138
|
+
case 'heading':
|
|
139
|
+
return 'h';
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
function byteLength(s) {
|
|
143
|
+
return Buffer.byteLength(s, 'utf8');
|
|
144
|
+
}
|
|
145
|
+
//# sourceMappingURL=formatter.js.map
|