@pugi/cli 0.1.0-beta.22 → 0.1.0-beta.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/auth/env-provider.js +238 -0
- package/dist/core/diagnostics/probes/pugi-md.js +89 -0
- package/dist/core/engine/native-pugi.js +34 -1
- package/dist/core/pugi-md/context-injector.js +76 -0
- package/dist/core/pugi-md/walk-up.js +207 -0
- package/dist/core/release-notes/parser.js +241 -0
- package/dist/core/release-notes/state.js +116 -0
- package/dist/core/repl/session.js +107 -0
- package/dist/core/repl/slash-commands.js +35 -0
- package/dist/core/theme/context.js +91 -0
- package/dist/core/theme/presets.js +228 -0
- package/dist/core/theme/state.js +181 -0
- package/dist/core/vim/keymap.js +288 -0
- package/dist/core/vim/state.js +92 -0
- package/dist/runtime/cli.js +217 -14
- package/dist/runtime/commands/doctor.js +13 -0
- package/dist/runtime/commands/release-notes.js +229 -0
- package/dist/runtime/commands/theme.js +196 -0
- package/dist/runtime/commands/vim.js +140 -0
- package/dist/runtime/version.js +1 -1
- package/dist/tui/doctor-table.js +32 -17
- package/dist/tui/repl-render.js +17 -2
- package/dist/tui/repl.js +9 -1
- package/dist/tui/style-table.js +9 -3
- package/dist/tui/theme-table.js +29 -0
- package/dist/tui/vim-input.js +267 -0
- package/package.json +2 -2
- package/dist/core/engine/compaction-hook.js +0 -154
- package/dist/core/init/scaffold.js +0 -195
- package/dist/core/repl/codebase-survey.js +0 -308
- package/dist/core/repl/init-interview.js +0 -457
- package/dist/core/repl/onboarding-state.js +0 -297
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `pugi login --provider env` — env-var auth path (Leak L35).
|
|
3
|
+
*
|
|
4
|
+
* Claude Code, Codex CLI, and gh CLI all ship a way to authenticate via
|
|
5
|
+
* an environment variable so CI / container / scripted contexts can
|
|
6
|
+
* skip the device flow entirely. This module backs that path:
|
|
7
|
+
*
|
|
8
|
+
* 1. Resolve the candidate token (explicit `--key` flag beats
|
|
9
|
+
* `PUGI_API_KEY` env — same precedence as `gh auth login --token`).
|
|
10
|
+
* 2. Run a cheap local format check so an obviously malformed key
|
|
11
|
+
* (empty, whitespace, suspiciously short) fails fast WITHOUT
|
|
12
|
+
* shipping it to the server (no observability leak into the
|
|
13
|
+
* Anvil access log).
|
|
14
|
+
* 3. Call `GET /api/pugi/health` with `Authorization: Bearer <key>`
|
|
15
|
+
* so an expired / revoked / typo'd token surfaces immediately
|
|
16
|
+
* and the credential file never lands on disk for a dead key.
|
|
17
|
+
* 4. Map response to typed outcome the CLI dispatcher can render.
|
|
18
|
+
*
|
|
19
|
+
* The module is intentionally pure — fetch + reading env are injected,
|
|
20
|
+
* the writer is a separate concern. The CLI dispatcher composes
|
|
21
|
+
* `resolveEnvCandidateToken` + `assertTokenFormat` + `validateTokenAgainstHealth`
|
|
22
|
+
* and then writes the credential via `storeApiKey` on success.
|
|
23
|
+
*
|
|
24
|
+
* Failure modes are explicit so the dispatcher can pick the user-facing
|
|
25
|
+
* remediation string without re-parsing strings:
|
|
26
|
+
*
|
|
27
|
+
* - `missing` → no token in env or --key, halt with hint
|
|
28
|
+
* - `invalid-format` → token failed local format check, halt
|
|
29
|
+
* - `unauthorized` → server rejected the token (401 / 403)
|
|
30
|
+
* - `network-error` → fetch threw (DNS, refused, TLS)
|
|
31
|
+
* - `server-error` → server returned 5xx (transient — operator
|
|
32
|
+
* may want to retry once)
|
|
33
|
+
* - `unexpected-status`→ anything else non-2xx (treat as failure)
|
|
34
|
+
*
|
|
35
|
+
* NEVER log the raw token. Memory hits
|
|
36
|
+
* `feedback_no_claude_attribution_anywhere_hard_rule` plus the
|
|
37
|
+
* CSO bearer-leak sweep apply here. Use `maskApiKey` from
|
|
38
|
+
* `core/credentials.ts` when the dispatcher needs to surface the key
|
|
39
|
+
* to the operator.
|
|
40
|
+
*/
|
|
41
|
+
/**
|
|
42
|
+
* The minimum length below which we refuse to even ship the token to
|
|
43
|
+
* the server. Pugi-issued PATs are 48+ chars (`pugi_<32 base32>`), JWTs
|
|
44
|
+
* issued by the device flow are ~250 chars, legacy `sk-*` PATs we
|
|
45
|
+
* accept for compatibility are 32+. 16 is well below all three real
|
|
46
|
+
* shapes so it only catches obvious paste mistakes.
|
|
47
|
+
*/
|
|
48
|
+
export const MIN_TOKEN_LENGTH = 16;
|
|
49
|
+
/**
|
|
50
|
+
* The set of prefixes we recognise as plausibly-real Pugi-shaped
|
|
51
|
+
* tokens. Loose by design — the real validator is the server-side
|
|
52
|
+
* health probe. We just want to catch an operator who pasted the
|
|
53
|
+
* wrong string entirely (a username, a URL, a placeholder like
|
|
54
|
+
* "<your-key>") before it reaches the network.
|
|
55
|
+
*
|
|
56
|
+
* Three-segment JWTs are also accepted via the `looksLikeJwt`
|
|
57
|
+
* predicate so device-flow tokens copied out of `~/.pugi/credentials.json`
|
|
58
|
+
* on a different machine work.
|
|
59
|
+
*/
|
|
60
|
+
export const RECOGNISED_TOKEN_PREFIXES = ['pugi_', 'sk_', 'sk-', 'pat_'];
|
|
61
|
+
/**
|
|
62
|
+
* Returns the trimmed candidate token, or `null` when neither path
|
|
63
|
+
* produced one. Precedence: explicit flag arg beats env var (matches
|
|
64
|
+
* `gh auth login --with-token`, `aws configure set`, and `pugi config`
|
|
65
|
+
* which all prefer the most-specific operator intent over the ambient
|
|
66
|
+
* env).
|
|
67
|
+
*/
|
|
68
|
+
export function resolveEnvCandidateToken(input) {
|
|
69
|
+
const explicit = input.explicitKey?.trim();
|
|
70
|
+
if (explicit)
|
|
71
|
+
return explicit;
|
|
72
|
+
const env = input.env ?? process.env;
|
|
73
|
+
const fromEnv = env.PUGI_API_KEY?.trim();
|
|
74
|
+
if (fromEnv)
|
|
75
|
+
return fromEnv;
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Local-only format check. Returns `null` on accept, a human-readable
|
|
80
|
+
* error string on reject. Deliberately lenient — the server-side
|
|
81
|
+
* health probe is the source of truth. We only catch obvious paste
|
|
82
|
+
* mistakes (empty, whitespace-laden, too short, looks like a URL or
|
|
83
|
+
* a placeholder).
|
|
84
|
+
*/
|
|
85
|
+
export function assertTokenFormat(token) {
|
|
86
|
+
if (!token)
|
|
87
|
+
return 'Token is empty';
|
|
88
|
+
if (/\s/.test(token)) {
|
|
89
|
+
return 'Token contains whitespace — check for shell quoting issues or a stray newline';
|
|
90
|
+
}
|
|
91
|
+
if (token.length < MIN_TOKEN_LENGTH) {
|
|
92
|
+
return `Token too short (${token.length} chars; Pugi tokens are >= ${MIN_TOKEN_LENGTH})`;
|
|
93
|
+
}
|
|
94
|
+
if (token.startsWith('<') && token.endsWith('>')) {
|
|
95
|
+
return 'Token looks like a placeholder (`<your-key>`) — replace with the actual key';
|
|
96
|
+
}
|
|
97
|
+
if (/^https?:\/\//i.test(token)) {
|
|
98
|
+
return 'Token looks like a URL — did you mean --api-url?';
|
|
99
|
+
}
|
|
100
|
+
// Accept either a recognised prefix OR a JWT three-segment shape.
|
|
101
|
+
// Anything else still passes — the server probe will catch genuinely
|
|
102
|
+
// unknown keys. We just want to surface an obvious mistake.
|
|
103
|
+
const hasKnownPrefix = RECOGNISED_TOKEN_PREFIXES.some((p) => token.startsWith(p));
|
|
104
|
+
if (!hasKnownPrefix && !looksLikeJwt(token)) {
|
|
105
|
+
// Soft-fail: warn the operator but proceed. Returning null here
|
|
106
|
+
// would mask the case where the operator pasted something
|
|
107
|
+
// genuinely wrong but the server happens to accept it (impossible
|
|
108
|
+
// for real keys but defence-in-depth). Returning the warning
|
|
109
|
+
// string would block legacy keys. We choose to proceed — the
|
|
110
|
+
// server is the source of truth — and let the CLI dispatcher
|
|
111
|
+
// decide whether to surface a note. Tracked via a separate
|
|
112
|
+
// `warnUnknownPrefix` return on a future revision.
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* JWT three-segment check. Does NOT verify the signature — we just
|
|
119
|
+
* want to recognise the shape so device-flow tokens copied from one
|
|
120
|
+
* machine to another pass the format gate.
|
|
121
|
+
*/
|
|
122
|
+
export function looksLikeJwt(token) {
|
|
123
|
+
const parts = token.split('.');
|
|
124
|
+
if (parts.length !== 3)
|
|
125
|
+
return false;
|
|
126
|
+
return parts.every((p) => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 0);
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Call `GET /api/pugi/health` with the candidate token. Returns a
|
|
130
|
+
* typed outcome that the CLI dispatcher can map directly to an exit
|
|
131
|
+
* code + remediation string.
|
|
132
|
+
*
|
|
133
|
+
* Health endpoint conventions (see apps/admin-api):
|
|
134
|
+
* - 200 → token is valid, account is active
|
|
135
|
+
* - 401 → token unknown / malformed at the server boundary
|
|
136
|
+
* - 403 → token recognised but the account is suspended / paused
|
|
137
|
+
* - 5xx → server-side issue, operator can retry
|
|
138
|
+
* - network throw → DNS, refused, TLS — operator's connectivity issue
|
|
139
|
+
*
|
|
140
|
+
* We do not parse the body — the health endpoint's contract is the
|
|
141
|
+
* status code. Any future field (latency, region, build sha) can be
|
|
142
|
+
* surfaced by a separate `pugi doctor` probe without touching the
|
|
143
|
+
* login path.
|
|
144
|
+
*/
|
|
145
|
+
export async function validateTokenAgainstHealth(input) {
|
|
146
|
+
const fetchImpl = input.fetchImpl ?? fetch;
|
|
147
|
+
const now = input.now ?? Date.now;
|
|
148
|
+
const url = `${stripTrailingSlash(input.apiUrl)}/api/pugi/health`;
|
|
149
|
+
const started = now();
|
|
150
|
+
let response;
|
|
151
|
+
try {
|
|
152
|
+
response = await fetchImpl(url, {
|
|
153
|
+
method: 'GET',
|
|
154
|
+
headers: {
|
|
155
|
+
Authorization: `Bearer ${input.apiKey}`,
|
|
156
|
+
Accept: 'application/json',
|
|
157
|
+
},
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
// DNS failure, ECONNREFUSED, TLS handshake — anything that makes
|
|
162
|
+
// fetch throw before a status code is observable. We deliberately
|
|
163
|
+
// do NOT echo the URL host in the message body if it could leak a
|
|
164
|
+
// self-hosted Anvil hostname into a public CI log; the dispatcher
|
|
165
|
+
// composes the user-facing remediation.
|
|
166
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
167
|
+
return {
|
|
168
|
+
kind: 'network-error',
|
|
169
|
+
message: `Cannot reach ${input.apiUrl}; check your connection`,
|
|
170
|
+
cause,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
const latencyMs = now() - started;
|
|
174
|
+
const { status } = response;
|
|
175
|
+
if (status === 200) {
|
|
176
|
+
return { kind: 'ok', latencyMs };
|
|
177
|
+
}
|
|
178
|
+
if (status === 401 || status === 403) {
|
|
179
|
+
return {
|
|
180
|
+
kind: 'unauthorized',
|
|
181
|
+
status,
|
|
182
|
+
message: status === 401
|
|
183
|
+
? 'Token invalid or expired — run `pugi login --provider device` to get a fresh one'
|
|
184
|
+
: 'Token recognised but the account is suspended — check `pugi whoami` on a working machine or contact support',
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
if (status >= 500) {
|
|
188
|
+
return {
|
|
189
|
+
kind: 'server-error',
|
|
190
|
+
status,
|
|
191
|
+
message: `${input.apiUrl} returned ${status}; retry in a moment`,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
return {
|
|
195
|
+
kind: 'unexpected-status',
|
|
196
|
+
status,
|
|
197
|
+
message: `Unexpected ${status} from /api/pugi/health; treat as login failure`,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
export async function resolveAndValidateEnvLogin(input) {
|
|
201
|
+
const token = resolveEnvCandidateToken({
|
|
202
|
+
explicitKey: input.explicitKey,
|
|
203
|
+
env: input.env,
|
|
204
|
+
});
|
|
205
|
+
if (!token) {
|
|
206
|
+
return {
|
|
207
|
+
kind: 'missing',
|
|
208
|
+
message: 'pugi login --provider env requires a token. Export PUGI_API_KEY in the current shell or pass --key <value>.',
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
const formatError = assertTokenFormat(token);
|
|
212
|
+
if (formatError) {
|
|
213
|
+
return {
|
|
214
|
+
kind: 'invalid-format',
|
|
215
|
+
message: `pugi login --provider env: ${formatError}`,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
if (input.skipValidate) {
|
|
219
|
+
// Used by the existing `login-variants.spec.ts` regression suite
|
|
220
|
+
// so the test plane does not require a live network. Production
|
|
221
|
+
// path always validates.
|
|
222
|
+
return { kind: 'ok', token, latencyMs: 0 };
|
|
223
|
+
}
|
|
224
|
+
const probe = await validateTokenAgainstHealth({
|
|
225
|
+
apiUrl: input.apiUrl,
|
|
226
|
+
apiKey: token,
|
|
227
|
+
fetchImpl: input.fetchImpl,
|
|
228
|
+
now: input.now,
|
|
229
|
+
});
|
|
230
|
+
if (probe.kind === 'ok') {
|
|
231
|
+
return { kind: 'ok', token, latencyMs: probe.latencyMs };
|
|
232
|
+
}
|
|
233
|
+
return probe;
|
|
234
|
+
}
|
|
235
|
+
function stripTrailingSlash(url) {
|
|
236
|
+
return url.endsWith('/') ? url.slice(0, -1) : url;
|
|
237
|
+
}
|
|
238
|
+
//# sourceMappingURL=env-provider.js.map
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PUGI.md hierarchy probe — Leak L32 (2026-05-27).
|
|
3
|
+
*
|
|
4
|
+
* Surfaces how many ambient `PUGI.md` / `CLAUDE.md` files were
|
|
5
|
+
* discovered by the cwd → homedir walk-up. Operators triaging "why
|
|
6
|
+
* is the model not following my project conventions" can run
|
|
7
|
+
* `pugi doctor` and immediately see whether the hierarchy walk
|
|
8
|
+
* loaded the file they expected.
|
|
9
|
+
*
|
|
10
|
+
* Status semantics:
|
|
11
|
+
* - `skipped` when bare mode is active (the walk is deliberately
|
|
12
|
+
* disabled). The row still renders so the JSON schema stays
|
|
13
|
+
* stable for downstream consumers.
|
|
14
|
+
* - `skipped` when zero files were found. This is the default
|
|
15
|
+
* state on a clean machine and is NOT an error — most operators
|
|
16
|
+
* do not maintain a `~/PUGI.md`.
|
|
17
|
+
* - `ok` when one or more files were discovered. The detail names
|
|
18
|
+
* the closest file and the total count.
|
|
19
|
+
*
|
|
20
|
+
* Side effects:
|
|
21
|
+
* - One filesystem walk from cwd to homedir (bounded by the walker's
|
|
22
|
+
* depth cap). Each level performs at most 2 `existsSync` calls.
|
|
23
|
+
* Cost is single-digit ms even on cold cache; well inside the
|
|
24
|
+
* doctor probe wall-clock budget.
|
|
25
|
+
*
|
|
26
|
+
* Wired into `buildDefaultProbes` in `runtime/commands/doctor.ts`.
|
|
27
|
+
*/
|
|
28
|
+
import { isBareMode } from '../../bare-mode/index.js';
|
|
29
|
+
import { walkUpPugiMd } from '../../pugi-md/walk-up.js';
|
|
30
|
+
export const PUGI_MD_DOCTOR_LABEL = 'PUGI.md HIERARCHY';
|
|
31
|
+
/** Detail string emitted when bare mode disables the walk. */
|
|
32
|
+
export const PUGI_MD_BARE_SKIP_DETAIL = 'skipped (--bare)';
|
|
33
|
+
/** Detail string emitted when the walk ran but found nothing. */
|
|
34
|
+
export const PUGI_MD_EMPTY_DETAIL = 'no PUGI.md / CLAUDE.md found in cwd → homedir';
|
|
35
|
+
export function probePugiMdHierarchy(input = {}) {
|
|
36
|
+
const env = input.env ?? process.env;
|
|
37
|
+
if (isBareMode(env)) {
|
|
38
|
+
return {
|
|
39
|
+
name: PUGI_MD_DOCTOR_LABEL,
|
|
40
|
+
status: 'skipped',
|
|
41
|
+
detail: PUGI_MD_BARE_SKIP_DETAIL,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
const cwd = input.cwd ?? process.cwd();
|
|
45
|
+
const walk = input.walkImpl ?? ((c, o) => walkUpPugiMd(c, o));
|
|
46
|
+
let files;
|
|
47
|
+
try {
|
|
48
|
+
files = walk(cwd, input.homedir !== undefined ? { homedir: input.homedir } : {});
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
// Defensive: walker is wrapped in per-file try/catch already, so
|
|
52
|
+
// a throw here means a programmer error (bad input). Degrade to
|
|
53
|
+
// a `warn` row rather than crashing the doctor sweep.
|
|
54
|
+
return {
|
|
55
|
+
name: PUGI_MD_DOCTOR_LABEL,
|
|
56
|
+
status: 'warn',
|
|
57
|
+
detail: 'walk-up failed (see logs)',
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
if (files.length === 0) {
|
|
61
|
+
return {
|
|
62
|
+
name: PUGI_MD_DOCTOR_LABEL,
|
|
63
|
+
status: 'skipped',
|
|
64
|
+
detail: PUGI_MD_EMPTY_DETAIL,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
// `files` is shallow-to-deep; the first entry is the closest to cwd.
|
|
68
|
+
// Operators reading the table care most about that one — it is the
|
|
69
|
+
// file that will most directly influence model behaviour. The
|
|
70
|
+
// additional count gives a quick "is the homedir / parent picked up
|
|
71
|
+
// too?" signal without listing every path.
|
|
72
|
+
const closest = files[0];
|
|
73
|
+
// closest is guaranteed non-undefined: files.length > 0 enforced
|
|
74
|
+
// by the early return above.
|
|
75
|
+
if (!closest) {
|
|
76
|
+
return {
|
|
77
|
+
name: PUGI_MD_DOCTOR_LABEL,
|
|
78
|
+
status: 'skipped',
|
|
79
|
+
detail: PUGI_MD_EMPTY_DETAIL,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
const suffix = files.length === 1 ? '' : ` (+${files.length - 1} more)`;
|
|
83
|
+
return {
|
|
84
|
+
name: PUGI_MD_DOCTOR_LABEL,
|
|
85
|
+
status: 'ok',
|
|
86
|
+
detail: `${files.length} file${files.length === 1 ? '' : 's'}: ${closest.path}${suffix}`,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
//# sourceMappingURL=pugi-md.js.map
|
|
@@ -18,6 +18,8 @@ import { buildContextPrefix, spliceContextPrefix } from './context-prefix.js';
|
|
|
18
18
|
import { applyIntentMarker, classifyIntent } from './intent.js';
|
|
19
19
|
import { loadTraversedMarkdown } from '../context/markdown-traverse.js';
|
|
20
20
|
import { isBareMode } from '../bare-mode/index.js';
|
|
21
|
+
import { walkUpPugiMd } from '../pugi-md/walk-up.js';
|
|
22
|
+
import { renderAmbientContext } from '../pugi-md/context-injector.js';
|
|
21
23
|
// α7 L11 (2026-05-27): per-session DenialTrackingState. One instance
|
|
22
24
|
// per `run()` so denials cluster by (tool, args) within the same
|
|
23
25
|
// command but do NOT leak across CLI invocations.
|
|
@@ -234,6 +236,30 @@ export class NativePugiEngineAdapter {
|
|
|
234
236
|
// accurate; the REPL session retains the launch cwd for the
|
|
235
237
|
// lifetime of the session which is what the operator expects.
|
|
236
238
|
const cwdForTraverse = process.cwd();
|
|
239
|
+
// Leak L32 (2026-05-27): cwd → homedir walk-up that picks up every
|
|
240
|
+
// ambient `PUGI.md` (or `CLAUDE.md` as a fallback) the operator
|
|
241
|
+
// has placed above their workspace. This is the cross-project
|
|
242
|
+
// hierarchy walk — distinct from the workspace-bounded
|
|
243
|
+
// `loadTraversedMarkdown` below which only sees files INSIDE the
|
|
244
|
+
// workspace root. Render the concatenation once at session boot
|
|
245
|
+
// and prepend to the system prompt so the model treats the
|
|
246
|
+
// operator's personal guidance as ambient context for the whole
|
|
247
|
+
// session. `--bare` (Leak L22) skips this walk entirely.
|
|
248
|
+
let ambientContextBlock = '';
|
|
249
|
+
if (!isBareMode()) {
|
|
250
|
+
try {
|
|
251
|
+
const hierarchy = walkUpPugiMd(cwdForTraverse);
|
|
252
|
+
ambientContextBlock = renderAmbientContext(hierarchy);
|
|
253
|
+
}
|
|
254
|
+
catch {
|
|
255
|
+
// Pure FS surface — if it throws (programmer error in the
|
|
256
|
+
// walker, not a per-file fs error which is already swallowed
|
|
257
|
+
// inside) we drop ambient context for this session rather
|
|
258
|
+
// than crashing the engine loop. Doctor probe still surfaces
|
|
259
|
+
// the hierarchy state for operator triage.
|
|
260
|
+
ambientContextBlock = '';
|
|
261
|
+
}
|
|
262
|
+
}
|
|
237
263
|
let traverseResult;
|
|
238
264
|
// Leak L22 (2026-05-27): `--bare` skips the parent-dir PUGI.md /
|
|
239
265
|
// AGENTS.md / CLAUDE.md / GEMINI.md walk-up. The engine sees only
|
|
@@ -548,7 +574,14 @@ export class NativePugiEngineAdapter {
|
|
|
548
574
|
// pattern instead of re-issuing the same refused call.
|
|
549
575
|
denialTracking,
|
|
550
576
|
}),
|
|
551
|
-
|
|
577
|
+
// Leak L32 (2026-05-27): ambient `PUGI.md` hierarchy block
|
|
578
|
+
// prepended once at session boot. When the walk found
|
|
579
|
+
// nothing OR bare mode is on, `ambientContextBlock === ''`
|
|
580
|
+
// and the system prompt is unchanged — no leading blank
|
|
581
|
+
// line, no empty wrapper tag.
|
|
582
|
+
systemPrompt: ambientContextBlock
|
|
583
|
+
? `${ambientContextBlock}\n\n${systemPromptFor(kind)}`
|
|
584
|
+
: systemPromptFor(kind),
|
|
552
585
|
// β5a R5+R6+P1: per-turn `<context>` prefix + intent marker
|
|
553
586
|
// applied above. Falls back to verbatim `task.prompt` when
|
|
554
587
|
// both the prefix block is empty AND the intent classifier
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aggregate byte cap on the full rendered block. 96 KB = 3 files at
|
|
3
|
+
* the per-file cap, which is enough for cwd + parent + homedir while
|
|
4
|
+
* leaving plenty of prompt budget for the rest of the system prompt.
|
|
5
|
+
* Anything beyond is replaced with a truncation marker.
|
|
6
|
+
*/
|
|
7
|
+
export const MAX_INJECT_BYTES = 96 * 1024;
|
|
8
|
+
/**
|
|
9
|
+
* Marker line emitted when the aggregate cap is hit. Visible to the
|
|
10
|
+
* model so it knows ambient context was clipped; visible to the
|
|
11
|
+
* operator via the doctor probe so they can decide whether to trim
|
|
12
|
+
* their `PUGI.md` hierarchy.
|
|
13
|
+
*/
|
|
14
|
+
export const TRUNCATION_MARKER = '<ambient-context-truncated reason="aggregate-cap" />';
|
|
15
|
+
/**
|
|
16
|
+
* Render a HierarchyFile array into the system-prompt block. Returns
|
|
17
|
+
* `''` when `files` is empty. Each file becomes one
|
|
18
|
+
* `<ambient-context source="..." level="...">...</ambient-context>`
|
|
19
|
+
* stanza separated by a single newline.
|
|
20
|
+
*
|
|
21
|
+
* Determinism: same input always produces byte-identical output.
|
|
22
|
+
*/
|
|
23
|
+
export function renderAmbientContext(files) {
|
|
24
|
+
if (files.length === 0)
|
|
25
|
+
return '';
|
|
26
|
+
const stanzas = [];
|
|
27
|
+
let bytes = 0;
|
|
28
|
+
let truncated = false;
|
|
29
|
+
for (const file of files) {
|
|
30
|
+
const stanza = renderStanza(file);
|
|
31
|
+
const stanzaBytes = Buffer.byteLength(stanza, 'utf8') + 1; // newline join cost
|
|
32
|
+
if (bytes + stanzaBytes > MAX_INJECT_BYTES) {
|
|
33
|
+
truncated = true;
|
|
34
|
+
break;
|
|
35
|
+
}
|
|
36
|
+
stanzas.push(stanza);
|
|
37
|
+
bytes += stanzaBytes;
|
|
38
|
+
}
|
|
39
|
+
if (truncated)
|
|
40
|
+
stanzas.push(TRUNCATION_MARKER);
|
|
41
|
+
return stanzas.join('\n');
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Build a single `<ambient-context>` stanza for one HierarchyFile.
|
|
45
|
+
* The `source` attribute carries the absolute path (after realpath)
|
|
46
|
+
* so the model can cite which file a piece of guidance came from
|
|
47
|
+
* when it explains its decisions to the operator.
|
|
48
|
+
*/
|
|
49
|
+
function renderStanza(file) {
|
|
50
|
+
const sourceAttr = escapeAttr(file.path);
|
|
51
|
+
const levelAttr = String(file.level);
|
|
52
|
+
// No trailing newline inside `content` — the join adds one between
|
|
53
|
+
// stanzas. Trimming the file's trailing whitespace keeps the tag
|
|
54
|
+
// close to the content for readability when an engineer dumps the
|
|
55
|
+
// assembled prompt for debugging.
|
|
56
|
+
const trimmed = file.content.replace(/\s+$/g, '');
|
|
57
|
+
return [
|
|
58
|
+
`<ambient-context source="${sourceAttr}" level="${levelAttr}">`,
|
|
59
|
+
trimmed,
|
|
60
|
+
`</ambient-context>`,
|
|
61
|
+
].join('\n');
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Escape an XML attribute value. We expect operator-controlled paths
|
|
65
|
+
* (not adversarial input) but `&`, `"` and `<` are still possible in
|
|
66
|
+
* symlinked / unicode paths so we escape them defensively. The model
|
|
67
|
+
* has been trained to read this attribute as opaque metadata.
|
|
68
|
+
*/
|
|
69
|
+
function escapeAttr(value) {
|
|
70
|
+
return value
|
|
71
|
+
.replace(/&/g, '&')
|
|
72
|
+
.replace(/"/g, '"')
|
|
73
|
+
.replace(/</g, '<')
|
|
74
|
+
.replace(/>/g, '>');
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=context-injector.js.map
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Leak L32 (2026-05-27) — `PUGI.md` hierarchy walk-up to `$HOME`.
|
|
3
|
+
*
|
|
4
|
+
* Claude Code walks from `cwd` upward toward the user's homedir and
|
|
5
|
+
* concatenates every `CLAUDE.md` it finds at each intermediate level
|
|
6
|
+
* (deepest overrides shallowest). Pugi parity: same walk, looking for
|
|
7
|
+
* `PUGI.md` first at each level and accepting `CLAUDE.md` as a fallback
|
|
8
|
+
* — operators often have a leftover `~/CLAUDE.md` or a parent-dir
|
|
9
|
+
* `CLAUDE.md` from a previous Claude Code session and we want their
|
|
10
|
+
* ambient guidance picked up automatically without a migration step.
|
|
11
|
+
*
|
|
12
|
+
* Why this is a separate module from `core/context/markdown-traverse.ts`:
|
|
13
|
+
*
|
|
14
|
+
* - `markdown-traverse.ts` is the *workspace-bounded* walk (cwd → up
|
|
15
|
+
* to but NOT including `workspaceRoot`). It guards every read by
|
|
16
|
+
* `realpathSync` containment against the workspace root and
|
|
17
|
+
* refuses to escape — by design, because the per-dir markdown is
|
|
18
|
+
* part of the project's first-party context.
|
|
19
|
+
*
|
|
20
|
+
* - This module is the *home-bounded* walk (cwd → up to `homedir()`,
|
|
21
|
+
* OR until depth limit). It picks up the operator's personal /
|
|
22
|
+
* global guidance that lives ABOVE the workspace root. The two
|
|
23
|
+
* surfaces are complementary: workspace markdown encodes project
|
|
24
|
+
* conventions; this hierarchy walk encodes operator-level taste
|
|
25
|
+
* (preferred libraries, "always run prettier", style guides).
|
|
26
|
+
*
|
|
27
|
+
* Contract:
|
|
28
|
+
*
|
|
29
|
+
* - Walks from `cwd` upward. At each directory checks `PUGI.md`
|
|
30
|
+
* (preferred); when absent falls back to `CLAUDE.md`. Only ONE
|
|
31
|
+
* file per level is loaded — preferred wins.
|
|
32
|
+
* - Stops at `homedir()` INCLUSIVE — the file at `~/PUGI.md` or
|
|
33
|
+
* `~/CLAUDE.md` IS loaded (Claude Code parity: a `~/CLAUDE.md`
|
|
34
|
+
* applies to every project the operator opens).
|
|
35
|
+
* - Hard depth cap of `MAX_WALK_DEPTH` (20) directories regardless
|
|
36
|
+
* of how far cwd is from homedir; defense against symlinked or
|
|
37
|
+
* malicious cwd values.
|
|
38
|
+
* - Per-file byte cap `MAX_FILE_BYTES` (32 KB); over-cap files are
|
|
39
|
+
* truncated, not rejected, so a runaway `PUGI.md` does not break
|
|
40
|
+
* the prompt budget.
|
|
41
|
+
* - Returns shallow-to-deep order (cwd FIRST, homedir LAST). The
|
|
42
|
+
* caller is responsible for rendering precedence — Claude Code's
|
|
43
|
+
* rule is "deeper overrides shallower", which means the LAST
|
|
44
|
+
* entry in the rendered system prompt wins. Our order matches
|
|
45
|
+
* that convention so the context injector can splice directly.
|
|
46
|
+
*
|
|
47
|
+
* Safety:
|
|
48
|
+
*
|
|
49
|
+
* - No `realpath` on the directories themselves: the operator's
|
|
50
|
+
* cwd may live under a workspace symlink (common with macOS
|
|
51
|
+
* `/private/var/...`) and we want to honor what the operator
|
|
52
|
+
* sees in their shell. We DO resolve the candidate file via
|
|
53
|
+
* `realpathSync` before reading, but only to defeat
|
|
54
|
+
* symlinks-pointing-outside-homedir attacks; an off-tree symlink
|
|
55
|
+
* is skipped silently.
|
|
56
|
+
* - Catch + skip every fs error per file. The walk-up surface MUST
|
|
57
|
+
* NEVER break engine boot — missing read perms on a parent dir
|
|
58
|
+
* is the common case (e.g. `/etc` on a corp laptop) and the
|
|
59
|
+
* fallback is "no ambient context", not a crash.
|
|
60
|
+
*
|
|
61
|
+
* Pure module: no logging, no network, no fs writes.
|
|
62
|
+
*/
|
|
63
|
+
import { existsSync, readFileSync, realpathSync, statSync } from 'node:fs';
|
|
64
|
+
import { dirname, resolve } from 'node:path';
|
|
65
|
+
/**
|
|
66
|
+
* Hard ceiling on parent-dir traversal depth. 20 is generous — even
|
|
67
|
+
* deep monorepo layouts rarely sit more than 8-10 levels below the
|
|
68
|
+
* homedir on a developer's laptop. The cap exists so a misconfigured
|
|
69
|
+
* cwd (e.g. cwd outside the user's home filesystem entirely) cannot
|
|
70
|
+
* cause a multi-second fs scan of unrelated directories.
|
|
71
|
+
*/
|
|
72
|
+
export const MAX_WALK_DEPTH = 20;
|
|
73
|
+
/**
|
|
74
|
+
* Per-file byte cap. 32 KB matches the per-dir markdown traverse
|
|
75
|
+
* aggregate budget — generous enough for a fully written-out
|
|
76
|
+
* project / personal `PUGI.md` (~8000 words) while keeping any one
|
|
77
|
+
* file from blowing the prompt budget on its own.
|
|
78
|
+
*/
|
|
79
|
+
export const MAX_FILE_BYTES = 32 * 1024;
|
|
80
|
+
/**
|
|
81
|
+
* Filenames consulted at each level, in lookup order. `PUGI.md` is
|
|
82
|
+
* preferred — when both files coexist in a directory the Pugi-native
|
|
83
|
+
* file wins and the Claude Code shim is ignored. This is the same
|
|
84
|
+
* precedence used by `markdown-traverse.ts` for workspace-bounded
|
|
85
|
+
* walks; keeping the two surfaces consistent removes the "why does
|
|
86
|
+
* Pugi sometimes read CLAUDE.md and sometimes PUGI.md?" foot-gun.
|
|
87
|
+
*/
|
|
88
|
+
export const HIERARCHY_SOURCES = ['PUGI.md', 'CLAUDE.md'];
|
|
89
|
+
/**
|
|
90
|
+
* Walk from `cwd` upward, collecting ambient `PUGI.md` / `CLAUDE.md`
|
|
91
|
+
* files at each level until we reach the homedir (inclusive) or the
|
|
92
|
+
* depth cap.
|
|
93
|
+
*
|
|
94
|
+
* Returns an array ordered shallowest-first (cwd → homedir). When no
|
|
95
|
+
* files are found, returns `[]`. When `cwd` is OUTSIDE the homedir
|
|
96
|
+
* tree (e.g. the operator runs `pugi` from `/tmp`), the walk still
|
|
97
|
+
* proceeds upward but stops the moment we reach a filesystem root
|
|
98
|
+
* without ever entering the homedir — useful for ops/admin invocations
|
|
99
|
+
* where there is genuinely no personal context to load.
|
|
100
|
+
*/
|
|
101
|
+
export function walkUpPugiMd(cwd, opts = {}) {
|
|
102
|
+
const limit = clampLimit(opts.limit);
|
|
103
|
+
const home = opts.homedir;
|
|
104
|
+
let absCwd;
|
|
105
|
+
try {
|
|
106
|
+
absCwd = resolve(cwd);
|
|
107
|
+
}
|
|
108
|
+
catch {
|
|
109
|
+
return [];
|
|
110
|
+
}
|
|
111
|
+
const absHome = home ? resolve(home) : undefined;
|
|
112
|
+
const results = [];
|
|
113
|
+
let current = absCwd;
|
|
114
|
+
let level = 0;
|
|
115
|
+
const visited = new Set();
|
|
116
|
+
while (level <= limit) {
|
|
117
|
+
if (visited.has(current))
|
|
118
|
+
break; // pathological symlink loop guard
|
|
119
|
+
visited.add(current);
|
|
120
|
+
const found = tryLoadDirectory(current, level);
|
|
121
|
+
if (found)
|
|
122
|
+
results.push(found);
|
|
123
|
+
// Inclusive home boundary: load home if we are here, then stop.
|
|
124
|
+
if (absHome && current === absHome)
|
|
125
|
+
break;
|
|
126
|
+
const parent = dirname(current);
|
|
127
|
+
if (parent === current)
|
|
128
|
+
break; // hit filesystem root before homedir
|
|
129
|
+
current = parent;
|
|
130
|
+
level += 1;
|
|
131
|
+
}
|
|
132
|
+
return results;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Pick the first matching file in `dir`, read + cap it, and produce
|
|
136
|
+
* a HierarchyFile row. Returns `undefined` when no file in
|
|
137
|
+
* `HIERARCHY_SOURCES` exists or all reads error out (perms, symlink
|
|
138
|
+
* escape, etc.). NEVER throws — fs errors degrade to "no file at
|
|
139
|
+
* this level".
|
|
140
|
+
*/
|
|
141
|
+
function tryLoadDirectory(dir, level) {
|
|
142
|
+
for (const source of HIERARCHY_SOURCES) {
|
|
143
|
+
const candidate = resolve(dir, source);
|
|
144
|
+
if (!existsSync(candidate))
|
|
145
|
+
continue;
|
|
146
|
+
// Realpath the FILE to defeat symlink-points-elsewhere attacks.
|
|
147
|
+
// We do not realpath the directory itself — operators often run
|
|
148
|
+
// pugi from inside a workspace symlink and the walk should honor
|
|
149
|
+
// the path they see.
|
|
150
|
+
let realPath;
|
|
151
|
+
try {
|
|
152
|
+
realPath = realpathSync(candidate);
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
// Broken symlink or perms issue on the link itself. Skip this
|
|
156
|
+
// file and try the next source in the same directory.
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
let rawBytes;
|
|
160
|
+
try {
|
|
161
|
+
rawBytes = statSync(realPath).size;
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
let content;
|
|
167
|
+
try {
|
|
168
|
+
content = readFileSync(realPath, 'utf8');
|
|
169
|
+
}
|
|
170
|
+
catch {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
let truncated = false;
|
|
174
|
+
if (Buffer.byteLength(content, 'utf8') > MAX_FILE_BYTES) {
|
|
175
|
+
// Trim by character index sized to the byte cap. Mild over-trim
|
|
176
|
+
// on multi-byte boundaries is acceptable — we never under-trim
|
|
177
|
+
// (we'd exceed the cap) and the truncation is operator-visible
|
|
178
|
+
// via the `truncated` flag.
|
|
179
|
+
content = content.slice(0, MAX_FILE_BYTES);
|
|
180
|
+
truncated = true;
|
|
181
|
+
}
|
|
182
|
+
return {
|
|
183
|
+
path: realPath,
|
|
184
|
+
content,
|
|
185
|
+
level,
|
|
186
|
+
source,
|
|
187
|
+
truncated,
|
|
188
|
+
rawBytes,
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
return undefined;
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Bound the limit to `[0, MAX_WALK_DEPTH]`. A negative or zero value
|
|
195
|
+
* still permits the cwd-level file to load (level 0 is always
|
|
196
|
+
* considered) — passing `limit: 0` means "current directory only".
|
|
197
|
+
*/
|
|
198
|
+
function clampLimit(limit) {
|
|
199
|
+
if (typeof limit !== 'number' || !Number.isFinite(limit))
|
|
200
|
+
return MAX_WALK_DEPTH;
|
|
201
|
+
if (limit < 0)
|
|
202
|
+
return 0;
|
|
203
|
+
if (limit > MAX_WALK_DEPTH)
|
|
204
|
+
return MAX_WALK_DEPTH;
|
|
205
|
+
return Math.floor(limit);
|
|
206
|
+
}
|
|
207
|
+
//# sourceMappingURL=walk-up.js.map
|