@pugi/cli 0.1.0-beta.22 → 0.1.0-beta.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,238 @@
1
+ /**
2
+ * `pugi login --provider env` — env-var auth path (Leak L35).
3
+ *
4
+ * Claude Code, Codex CLI, and gh CLI all ship a way to authenticate via
5
+ * an environment variable so CI / container / scripted contexts can
6
+ * skip the device flow entirely. This module backs that path:
7
+ *
8
+ * 1. Resolve the candidate token (explicit `--key` flag beats
9
+ * `PUGI_API_KEY` env — same precedence as `gh auth login --token`).
10
+ * 2. Run a cheap local format check so an obviously malformed key
11
+ * (empty, whitespace, suspiciously short) fails fast WITHOUT
12
+ * shipping it to the server (no observability leak into the
13
+ * Anvil access log).
14
+ * 3. Call `GET /api/pugi/health` with `Authorization: Bearer <key>`
15
+ * so an expired / revoked / typo'd token surfaces immediately
16
+ * and the credential file never lands on disk for a dead key.
17
+ * 4. Map response to typed outcome the CLI dispatcher can render.
18
+ *
19
+ * The module is intentionally pure — fetch + reading env are injected,
20
+ * the writer is a separate concern. The CLI dispatcher composes
21
+ * `resolveEnvCandidateToken` + `assertTokenFormat` + `validateTokenAgainstHealth`
22
+ * and then writes the credential via `storeApiKey` on success.
23
+ *
24
+ * Failure modes are explicit so the dispatcher can pick the user-facing
25
+ * remediation string without re-parsing strings:
26
+ *
27
+ * - `missing` → no token in env or --key, halt with hint
28
+ * - `invalid-format` → token failed local format check, halt
29
+ * - `unauthorized` → server rejected the token (401 / 403)
30
+ * - `network-error` → fetch threw (DNS, refused, TLS)
31
+ * - `server-error` → server returned 5xx (transient — operator
32
+ * may want to retry once)
33
+ * - `unexpected-status`→ anything else non-2xx (treat as failure)
34
+ *
35
+ * NEVER log the raw token. Memory hits
36
+ * `feedback_no_claude_attribution_anywhere_hard_rule` plus the
37
+ * CSO bearer-leak sweep apply here. Use `maskApiKey` from
38
+ * `core/credentials.ts` when the dispatcher needs to surface the key
39
+ * to the operator.
40
+ */
41
+ /**
42
+ * The minimum length below which we refuse to even ship the token to
43
+ * the server. Pugi-issued PATs are 48+ chars (`pugi_<32 base32>`), JWTs
44
+ * issued by the device flow are ~250 chars, legacy `sk-*` PATs we
45
+ * accept for compatibility are 32+. 16 is well below all three real
46
+ * shapes so it only catches obvious paste mistakes.
47
+ */
48
+ export const MIN_TOKEN_LENGTH = 16;
49
+ /**
50
+ * The set of prefixes we recognise as plausibly-real Pugi-shaped
51
+ * tokens. Loose by design — the real validator is the server-side
52
+ * health probe. We just want to catch an operator who pasted the
53
+ * wrong string entirely (a username, a URL, a placeholder like
54
+ * "<your-key>") before it reaches the network.
55
+ *
56
+ * Three-segment JWTs are also accepted via the `looksLikeJwt`
57
+ * predicate so device-flow tokens copied out of `~/.pugi/credentials.json`
58
+ * on a different machine work.
59
+ */
60
+ export const RECOGNISED_TOKEN_PREFIXES = ['pugi_', 'sk_', 'sk-', 'pat_'];
61
+ /**
62
+ * Returns the trimmed candidate token, or `null` when neither path
63
+ * produced one. Precedence: explicit flag arg beats env var (matches
64
+ * `gh auth login --with-token`, `aws configure set`, and `pugi config`
65
+ * which all prefer the most-specific operator intent over the ambient
66
+ * env).
67
+ */
68
+ export function resolveEnvCandidateToken(input) {
69
+ const explicit = input.explicitKey?.trim();
70
+ if (explicit)
71
+ return explicit;
72
+ const env = input.env ?? process.env;
73
+ const fromEnv = env.PUGI_API_KEY?.trim();
74
+ if (fromEnv)
75
+ return fromEnv;
76
+ return null;
77
+ }
78
+ /**
79
+ * Local-only format check. Returns `null` on accept, a human-readable
80
+ * error string on reject. Deliberately lenient — the server-side
81
+ * health probe is the source of truth. We only catch obvious paste
82
+ * mistakes (empty, whitespace-laden, too short, looks like a URL or
83
+ * a placeholder).
84
+ */
85
+ export function assertTokenFormat(token) {
86
+ if (!token)
87
+ return 'Token is empty';
88
+ if (/\s/.test(token)) {
89
+ return 'Token contains whitespace — check for shell quoting issues or a stray newline';
90
+ }
91
+ if (token.length < MIN_TOKEN_LENGTH) {
92
+ return `Token too short (${token.length} chars; Pugi tokens are >= ${MIN_TOKEN_LENGTH})`;
93
+ }
94
+ if (token.startsWith('<') && token.endsWith('>')) {
95
+ return 'Token looks like a placeholder (`<your-key>`) — replace with the actual key';
96
+ }
97
+ if (/^https?:\/\//i.test(token)) {
98
+ return 'Token looks like a URL — did you mean --api-url?';
99
+ }
100
+ // Accept either a recognised prefix OR a JWT three-segment shape.
101
+ // Anything else still passes — the server probe will catch genuinely
102
+ // unknown keys. We just want to surface an obvious mistake.
103
+ const hasKnownPrefix = RECOGNISED_TOKEN_PREFIXES.some((p) => token.startsWith(p));
104
+ if (!hasKnownPrefix && !looksLikeJwt(token)) {
105
+ // Soft-fail: warn the operator but proceed. Returning null here
106
+ // would mask the case where the operator pasted something
107
+ // genuinely wrong but the server happens to accept it (impossible
108
+ // for real keys but defence-in-depth). Returning the warning
109
+ // string would block legacy keys. We choose to proceed — the
110
+ // server is the source of truth — and let the CLI dispatcher
111
+ // decide whether to surface a note. Tracked via a separate
112
+ // `warnUnknownPrefix` return on a future revision.
113
+ return null;
114
+ }
115
+ return null;
116
+ }
117
+ /**
118
+ * JWT three-segment check. Does NOT verify the signature — we just
119
+ * want to recognise the shape so device-flow tokens copied from one
120
+ * machine to another pass the format gate.
121
+ */
122
+ export function looksLikeJwt(token) {
123
+ const parts = token.split('.');
124
+ if (parts.length !== 3)
125
+ return false;
126
+ return parts.every((p) => /^[A-Za-z0-9_-]+$/.test(p) && p.length > 0);
127
+ }
128
+ /**
129
+ * Call `GET /api/pugi/health` with the candidate token. Returns a
130
+ * typed outcome that the CLI dispatcher can map directly to an exit
131
+ * code + remediation string.
132
+ *
133
+ * Health endpoint conventions (see apps/admin-api):
134
+ * - 200 → token is valid, account is active
135
+ * - 401 → token unknown / malformed at the server boundary
136
+ * - 403 → token recognised but the account is suspended / paused
137
+ * - 5xx → server-side issue, operator can retry
138
+ * - network throw → DNS, refused, TLS — operator's connectivity issue
139
+ *
140
+ * We do not parse the body — the health endpoint's contract is the
141
+ * status code. Any future field (latency, region, build sha) can be
142
+ * surfaced by a separate `pugi doctor` probe without touching the
143
+ * login path.
144
+ */
145
+ export async function validateTokenAgainstHealth(input) {
146
+ const fetchImpl = input.fetchImpl ?? fetch;
147
+ const now = input.now ?? Date.now;
148
+ const url = `${stripTrailingSlash(input.apiUrl)}/api/pugi/health`;
149
+ const started = now();
150
+ let response;
151
+ try {
152
+ response = await fetchImpl(url, {
153
+ method: 'GET',
154
+ headers: {
155
+ Authorization: `Bearer ${input.apiKey}`,
156
+ Accept: 'application/json',
157
+ },
158
+ });
159
+ }
160
+ catch (error) {
161
+ // DNS failure, ECONNREFUSED, TLS handshake — anything that makes
162
+ // fetch throw before a status code is observable. We deliberately
163
+ // do NOT echo the URL host in the message body if it could leak a
164
+ // self-hosted Anvil hostname into a public CI log; the dispatcher
165
+ // composes the user-facing remediation.
166
+ const cause = error instanceof Error ? error.message : String(error);
167
+ return {
168
+ kind: 'network-error',
169
+ message: `Cannot reach ${input.apiUrl}; check your connection`,
170
+ cause,
171
+ };
172
+ }
173
+ const latencyMs = now() - started;
174
+ const { status } = response;
175
+ if (status === 200) {
176
+ return { kind: 'ok', latencyMs };
177
+ }
178
+ if (status === 401 || status === 403) {
179
+ return {
180
+ kind: 'unauthorized',
181
+ status,
182
+ message: status === 401
183
+ ? 'Token invalid or expired — run `pugi login --provider device` to get a fresh one'
184
+ : 'Token recognised but the account is suspended — check `pugi whoami` on a working machine or contact support',
185
+ };
186
+ }
187
+ if (status >= 500) {
188
+ return {
189
+ kind: 'server-error',
190
+ status,
191
+ message: `${input.apiUrl} returned ${status}; retry in a moment`,
192
+ };
193
+ }
194
+ return {
195
+ kind: 'unexpected-status',
196
+ status,
197
+ message: `Unexpected ${status} from /api/pugi/health; treat as login failure`,
198
+ };
199
+ }
200
+ export async function resolveAndValidateEnvLogin(input) {
201
+ const token = resolveEnvCandidateToken({
202
+ explicitKey: input.explicitKey,
203
+ env: input.env,
204
+ });
205
+ if (!token) {
206
+ return {
207
+ kind: 'missing',
208
+ message: 'pugi login --provider env requires a token. Export PUGI_API_KEY in the current shell or pass --key <value>.',
209
+ };
210
+ }
211
+ const formatError = assertTokenFormat(token);
212
+ if (formatError) {
213
+ return {
214
+ kind: 'invalid-format',
215
+ message: `pugi login --provider env: ${formatError}`,
216
+ };
217
+ }
218
+ if (input.skipValidate) {
219
+ // Used by the existing `login-variants.spec.ts` regression suite
220
+ // so the test plane does not require a live network. Production
221
+ // path always validates.
222
+ return { kind: 'ok', token, latencyMs: 0 };
223
+ }
224
+ const probe = await validateTokenAgainstHealth({
225
+ apiUrl: input.apiUrl,
226
+ apiKey: token,
227
+ fetchImpl: input.fetchImpl,
228
+ now: input.now,
229
+ });
230
+ if (probe.kind === 'ok') {
231
+ return { kind: 'ok', token, latencyMs: probe.latencyMs };
232
+ }
233
+ return probe;
234
+ }
235
+ function stripTrailingSlash(url) {
236
+ return url.endsWith('/') ? url.slice(0, -1) : url;
237
+ }
238
+ //# sourceMappingURL=env-provider.js.map
@@ -0,0 +1,89 @@
1
+ /**
2
+ * PUGI.md hierarchy probe — Leak L32 (2026-05-27).
3
+ *
4
+ * Surfaces how many ambient `PUGI.md` / `CLAUDE.md` files were
5
+ * discovered by the cwd → homedir walk-up. Operators triaging "why
6
+ * is the model not following my project conventions" can run
7
+ * `pugi doctor` and immediately see whether the hierarchy walk
8
+ * loaded the file they expected.
9
+ *
10
+ * Status semantics:
11
+ * - `skipped` when bare mode is active (the walk is deliberately
12
+ * disabled). The row still renders so the JSON schema stays
13
+ * stable for downstream consumers.
14
+ * - `skipped` when zero files were found. This is the default
15
+ * state on a clean machine and is NOT an error — most operators
16
+ * do not maintain a `~/PUGI.md`.
17
+ * - `ok` when one or more files were discovered. The detail names
18
+ * the closest file and the total count.
19
+ *
20
+ * Side effects:
21
+ * - One filesystem walk from cwd to homedir (bounded by the walker's
22
+ * depth cap). Each level performs at most 2 `existsSync` calls.
23
+ * Cost is single-digit ms even on cold cache; well inside the
24
+ * doctor probe wall-clock budget.
25
+ *
26
+ * Wired into `buildDefaultProbes` in `runtime/commands/doctor.ts`.
27
+ */
28
+ import { isBareMode } from '../../bare-mode/index.js';
29
+ import { walkUpPugiMd } from '../../pugi-md/walk-up.js';
30
+ export const PUGI_MD_DOCTOR_LABEL = 'PUGI.md HIERARCHY';
31
+ /** Detail string emitted when bare mode disables the walk. */
32
+ export const PUGI_MD_BARE_SKIP_DETAIL = 'skipped (--bare)';
33
+ /** Detail string emitted when the walk ran but found nothing. */
34
+ export const PUGI_MD_EMPTY_DETAIL = 'no PUGI.md / CLAUDE.md found in cwd → homedir';
35
+ export function probePugiMdHierarchy(input = {}) {
36
+ const env = input.env ?? process.env;
37
+ if (isBareMode(env)) {
38
+ return {
39
+ name: PUGI_MD_DOCTOR_LABEL,
40
+ status: 'skipped',
41
+ detail: PUGI_MD_BARE_SKIP_DETAIL,
42
+ };
43
+ }
44
+ const cwd = input.cwd ?? process.cwd();
45
+ const walk = input.walkImpl ?? ((c, o) => walkUpPugiMd(c, o));
46
+ let files;
47
+ try {
48
+ files = walk(cwd, input.homedir !== undefined ? { homedir: input.homedir } : {});
49
+ }
50
+ catch {
51
+ // Defensive: walker is wrapped in per-file try/catch already, so
52
+ // a throw here means a programmer error (bad input). Degrade to
53
+ // a `warn` row rather than crashing the doctor sweep.
54
+ return {
55
+ name: PUGI_MD_DOCTOR_LABEL,
56
+ status: 'warn',
57
+ detail: 'walk-up failed (see logs)',
58
+ };
59
+ }
60
+ if (files.length === 0) {
61
+ return {
62
+ name: PUGI_MD_DOCTOR_LABEL,
63
+ status: 'skipped',
64
+ detail: PUGI_MD_EMPTY_DETAIL,
65
+ };
66
+ }
67
+ // `files` is shallow-to-deep; the first entry is the closest to cwd.
68
+ // Operators reading the table care most about that one — it is the
69
+ // file that will most directly influence model behaviour. The
70
+ // additional count gives a quick "is the homedir / parent picked up
71
+ // too?" signal without listing every path.
72
+ const closest = files[0];
73
+ // closest is guaranteed non-undefined: files.length > 0 enforced
74
+ // by the early return above.
75
+ if (!closest) {
76
+ return {
77
+ name: PUGI_MD_DOCTOR_LABEL,
78
+ status: 'skipped',
79
+ detail: PUGI_MD_EMPTY_DETAIL,
80
+ };
81
+ }
82
+ const suffix = files.length === 1 ? '' : ` (+${files.length - 1} more)`;
83
+ return {
84
+ name: PUGI_MD_DOCTOR_LABEL,
85
+ status: 'ok',
86
+ detail: `${files.length} file${files.length === 1 ? '' : 's'}: ${closest.path}${suffix}`,
87
+ };
88
+ }
89
+ //# sourceMappingURL=pugi-md.js.map
@@ -18,6 +18,8 @@ import { buildContextPrefix, spliceContextPrefix } from './context-prefix.js';
18
18
  import { applyIntentMarker, classifyIntent } from './intent.js';
19
19
  import { loadTraversedMarkdown } from '../context/markdown-traverse.js';
20
20
  import { isBareMode } from '../bare-mode/index.js';
21
+ import { walkUpPugiMd } from '../pugi-md/walk-up.js';
22
+ import { renderAmbientContext } from '../pugi-md/context-injector.js';
21
23
  // α7 L11 (2026-05-27): per-session DenialTrackingState. One instance
22
24
  // per `run()` so denials cluster by (tool, args) within the same
23
25
  // command but do NOT leak across CLI invocations.
@@ -234,6 +236,30 @@ export class NativePugiEngineAdapter {
234
236
  // accurate; the REPL session retains the launch cwd for the
235
237
  // lifetime of the session which is what the operator expects.
236
238
  const cwdForTraverse = process.cwd();
239
+ // Leak L32 (2026-05-27): cwd → homedir walk-up that picks up every
240
+ // ambient `PUGI.md` (or `CLAUDE.md` as a fallback) the operator
241
+ // has placed above their workspace. This is the cross-project
242
+ // hierarchy walk — distinct from the workspace-bounded
243
+ // `loadTraversedMarkdown` below which only sees files INSIDE the
244
+ // workspace root. Render the concatenation once at session boot
245
+ // and prepend to the system prompt so the model treats the
246
+ // operator's personal guidance as ambient context for the whole
247
+ // session. `--bare` (Leak L22) skips this walk entirely.
248
+ let ambientContextBlock = '';
249
+ if (!isBareMode()) {
250
+ try {
251
+ const hierarchy = walkUpPugiMd(cwdForTraverse);
252
+ ambientContextBlock = renderAmbientContext(hierarchy);
253
+ }
254
+ catch {
255
+ // Pure FS surface — if it throws (programmer error in the
256
+ // walker, not a per-file fs error which is already swallowed
257
+ // inside) we drop ambient context for this session rather
258
+ // than crashing the engine loop. Doctor probe still surfaces
259
+ // the hierarchy state for operator triage.
260
+ ambientContextBlock = '';
261
+ }
262
+ }
237
263
  let traverseResult;
238
264
  // Leak L22 (2026-05-27): `--bare` skips the parent-dir PUGI.md /
239
265
  // AGENTS.md / CLAUDE.md / GEMINI.md walk-up. The engine sees only
@@ -548,7 +574,14 @@ export class NativePugiEngineAdapter {
548
574
  // pattern instead of re-issuing the same refused call.
549
575
  denialTracking,
550
576
  }),
551
- systemPrompt: systemPromptFor(kind),
577
+ // Leak L32 (2026-05-27): ambient `PUGI.md` hierarchy block
578
+ // prepended once at session boot. When the walk found
579
+ // nothing OR bare mode is on, `ambientContextBlock === ''`
580
+ // and the system prompt is unchanged — no leading blank
581
+ // line, no empty wrapper tag.
582
+ systemPrompt: ambientContextBlock
583
+ ? `${ambientContextBlock}\n\n${systemPromptFor(kind)}`
584
+ : systemPromptFor(kind),
552
585
  // β5a R5+R6+P1: per-turn `<context>` prefix + intent marker
553
586
  // applied above. Falls back to verbatim `task.prompt` when
554
587
  // both the prefix block is empty AND the intent classifier
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Aggregate byte cap on the full rendered block. 96 KB = 3 files at
3
+ * the per-file cap, which is enough for cwd + parent + homedir while
4
+ * leaving plenty of prompt budget for the rest of the system prompt.
5
+ * Anything beyond is replaced with a truncation marker.
6
+ */
7
+ export const MAX_INJECT_BYTES = 96 * 1024;
8
+ /**
9
+ * Marker line emitted when the aggregate cap is hit. Visible to the
10
+ * model so it knows ambient context was clipped; visible to the
11
+ * operator via the doctor probe so they can decide whether to trim
12
+ * their `PUGI.md` hierarchy.
13
+ */
14
+ export const TRUNCATION_MARKER = '<ambient-context-truncated reason="aggregate-cap" />';
15
+ /**
16
+ * Render a HierarchyFile array into the system-prompt block. Returns
17
+ * `''` when `files` is empty. Each file becomes one
18
+ * `<ambient-context source="..." level="...">...</ambient-context>`
19
+ * stanza separated by a single newline.
20
+ *
21
+ * Determinism: same input always produces byte-identical output.
22
+ */
23
+ export function renderAmbientContext(files) {
24
+ if (files.length === 0)
25
+ return '';
26
+ const stanzas = [];
27
+ let bytes = 0;
28
+ let truncated = false;
29
+ for (const file of files) {
30
+ const stanza = renderStanza(file);
31
+ const stanzaBytes = Buffer.byteLength(stanza, 'utf8') + 1; // newline join cost
32
+ if (bytes + stanzaBytes > MAX_INJECT_BYTES) {
33
+ truncated = true;
34
+ break;
35
+ }
36
+ stanzas.push(stanza);
37
+ bytes += stanzaBytes;
38
+ }
39
+ if (truncated)
40
+ stanzas.push(TRUNCATION_MARKER);
41
+ return stanzas.join('\n');
42
+ }
43
+ /**
44
+ * Build a single `<ambient-context>` stanza for one HierarchyFile.
45
+ * The `source` attribute carries the absolute path (after realpath)
46
+ * so the model can cite which file a piece of guidance came from
47
+ * when it explains its decisions to the operator.
48
+ */
49
+ function renderStanza(file) {
50
+ const sourceAttr = escapeAttr(file.path);
51
+ const levelAttr = String(file.level);
52
+ // No trailing newline inside `content` — the join adds one between
53
+ // stanzas. Trimming the file's trailing whitespace keeps the tag
54
+ // close to the content for readability when an engineer dumps the
55
+ // assembled prompt for debugging.
56
+ const trimmed = file.content.replace(/\s+$/g, '');
57
+ return [
58
+ `<ambient-context source="${sourceAttr}" level="${levelAttr}">`,
59
+ trimmed,
60
+ `</ambient-context>`,
61
+ ].join('\n');
62
+ }
63
+ /**
64
+ * Escape an XML attribute value. We expect operator-controlled paths
65
+ * (not adversarial input) but `&`, `"` and `<` are still possible in
66
+ * symlinked / unicode paths so we escape them defensively. The model
67
+ * has been trained to read this attribute as opaque metadata.
68
+ */
69
+ function escapeAttr(value) {
70
+ return value
71
+ .replace(/&/g, '&amp;')
72
+ .replace(/"/g, '&quot;')
73
+ .replace(/</g, '&lt;')
74
+ .replace(/>/g, '&gt;');
75
+ }
76
+ //# sourceMappingURL=context-injector.js.map
@@ -0,0 +1,207 @@
1
+ /**
2
+ * Leak L32 (2026-05-27) — `PUGI.md` hierarchy walk-up to `$HOME`.
3
+ *
4
+ * Claude Code walks from `cwd` upward toward the user's homedir and
5
+ * concatenates every `CLAUDE.md` it finds at each intermediate level
6
+ * (deepest overrides shallowest). Pugi parity: same walk, looking for
7
+ * `PUGI.md` first at each level and accepting `CLAUDE.md` as a fallback
8
+ * — operators often have a leftover `~/CLAUDE.md` or a parent-dir
9
+ * `CLAUDE.md` from a previous Claude Code session and we want their
10
+ * ambient guidance picked up automatically without a migration step.
11
+ *
12
+ * Why this is a separate module from `core/context/markdown-traverse.ts`:
13
+ *
14
+ * - `markdown-traverse.ts` is the *workspace-bounded* walk (cwd → up
15
+ * to but NOT including `workspaceRoot`). It guards every read by
16
+ * `realpathSync` containment against the workspace root and
17
+ * refuses to escape — by design, because the per-dir markdown is
18
+ * part of the project's first-party context.
19
+ *
20
+ * - This module is the *home-bounded* walk (cwd → up to `homedir()`,
21
+ * OR until depth limit). It picks up the operator's personal /
22
+ * global guidance that lives ABOVE the workspace root. The two
23
+ * surfaces are complementary: workspace markdown encodes project
24
+ * conventions; this hierarchy walk encodes operator-level taste
25
+ * (preferred libraries, "always run prettier", style guides).
26
+ *
27
+ * Contract:
28
+ *
29
+ * - Walks from `cwd` upward. At each directory checks `PUGI.md`
30
+ * (preferred); when absent falls back to `CLAUDE.md`. Only ONE
31
+ * file per level is loaded — preferred wins.
32
+ * - Stops at `homedir()` INCLUSIVE — the file at `~/PUGI.md` or
33
+ * `~/CLAUDE.md` IS loaded (Claude Code parity: a `~/CLAUDE.md`
34
+ * applies to every project the operator opens).
35
+ * - Hard depth cap of `MAX_WALK_DEPTH` (20) directories regardless
36
+ * of how far cwd is from homedir; defense against symlinked or
37
+ * malicious cwd values.
38
+ * - Per-file byte cap `MAX_FILE_BYTES` (32 KB); over-cap files are
39
+ * truncated, not rejected, so a runaway `PUGI.md` does not break
40
+ * the prompt budget.
41
+ * - Returns shallow-to-deep order (cwd FIRST, homedir LAST). The
42
+ * caller is responsible for rendering precedence — Claude Code's
43
+ * rule is "deeper overrides shallower", which means the LAST
44
+ * entry in the rendered system prompt wins. Our order matches
45
+ * that convention so the context injector can splice directly.
46
+ *
47
+ * Safety:
48
+ *
49
+ * - No `realpath` on the directories themselves: the operator's
50
+ * cwd may live under a workspace symlink (common with macOS
51
+ * `/private/var/...`) and we want to honor what the operator
52
+ * sees in their shell. We DO resolve the candidate file via
53
+ * `realpathSync` before reading, but only to defeat
54
+ * symlinks-pointing-outside-homedir attacks; an off-tree symlink
55
+ * is skipped silently.
56
+ * - Catch + skip every fs error per file. The walk-up surface MUST
57
+ * NEVER break engine boot — missing read perms on a parent dir
58
+ * is the common case (e.g. `/etc` on a corp laptop) and the
59
+ * fallback is "no ambient context", not a crash.
60
+ *
61
+ * Pure module: no logging, no network, no fs writes.
62
+ */
63
+ import { existsSync, readFileSync, realpathSync, statSync } from 'node:fs';
64
+ import { dirname, resolve } from 'node:path';
65
+ /**
66
+ * Hard ceiling on parent-dir traversal depth. 20 is generous — even
67
+ * deep monorepo layouts rarely sit more than 8-10 levels below the
68
+ * homedir on a developer's laptop. The cap exists so a misconfigured
69
+ * cwd (e.g. cwd outside the user's home filesystem entirely) cannot
70
+ * cause a multi-second fs scan of unrelated directories.
71
+ */
72
+ export const MAX_WALK_DEPTH = 20;
73
+ /**
74
+ * Per-file byte cap. 32 KB matches the per-dir markdown traverse
75
+ * aggregate budget — generous enough for a fully written-out
76
+ * project / personal `PUGI.md` (~8000 words) while keeping any one
77
+ * file from blowing the prompt budget on its own.
78
+ */
79
+ export const MAX_FILE_BYTES = 32 * 1024;
80
+ /**
81
+ * Filenames consulted at each level, in lookup order. `PUGI.md` is
82
+ * preferred — when both files coexist in a directory the Pugi-native
83
+ * file wins and the Claude Code shim is ignored. This is the same
84
+ * precedence used by `markdown-traverse.ts` for workspace-bounded
85
+ * walks; keeping the two surfaces consistent removes the "why does
86
+ * Pugi sometimes read CLAUDE.md and sometimes PUGI.md?" foot-gun.
87
+ */
88
+ export const HIERARCHY_SOURCES = ['PUGI.md', 'CLAUDE.md'];
89
+ /**
90
+ * Walk from `cwd` upward, collecting ambient `PUGI.md` / `CLAUDE.md`
91
+ * files at each level until we reach the homedir (inclusive) or the
92
+ * depth cap.
93
+ *
94
+ * Returns an array ordered shallowest-first (cwd → homedir). When no
95
+ * files are found, returns `[]`. When `cwd` is OUTSIDE the homedir
96
+ * tree (e.g. the operator runs `pugi` from `/tmp`), the walk still
97
+ * proceeds upward but stops the moment we reach a filesystem root
98
+ * without ever entering the homedir — useful for ops/admin invocations
99
+ * where there is genuinely no personal context to load.
100
+ */
101
+ export function walkUpPugiMd(cwd, opts = {}) {
102
+ const limit = clampLimit(opts.limit);
103
+ const home = opts.homedir;
104
+ let absCwd;
105
+ try {
106
+ absCwd = resolve(cwd);
107
+ }
108
+ catch {
109
+ return [];
110
+ }
111
+ const absHome = home ? resolve(home) : undefined;
112
+ const results = [];
113
+ let current = absCwd;
114
+ let level = 0;
115
+ const visited = new Set();
116
+ while (level <= limit) {
117
+ if (visited.has(current))
118
+ break; // pathological symlink loop guard
119
+ visited.add(current);
120
+ const found = tryLoadDirectory(current, level);
121
+ if (found)
122
+ results.push(found);
123
+ // Inclusive home boundary: load home if we are here, then stop.
124
+ if (absHome && current === absHome)
125
+ break;
126
+ const parent = dirname(current);
127
+ if (parent === current)
128
+ break; // hit filesystem root before homedir
129
+ current = parent;
130
+ level += 1;
131
+ }
132
+ return results;
133
+ }
134
+ /**
135
+ * Pick the first matching file in `dir`, read + cap it, and produce
136
+ * a HierarchyFile row. Returns `undefined` when no file in
137
+ * `HIERARCHY_SOURCES` exists or all reads error out (perms, symlink
138
+ * escape, etc.). NEVER throws — fs errors degrade to "no file at
139
+ * this level".
140
+ */
141
+ function tryLoadDirectory(dir, level) {
142
+ for (const source of HIERARCHY_SOURCES) {
143
+ const candidate = resolve(dir, source);
144
+ if (!existsSync(candidate))
145
+ continue;
146
+ // Realpath the FILE to defeat symlink-points-elsewhere attacks.
147
+ // We do not realpath the directory itself — operators often run
148
+ // pugi from inside a workspace symlink and the walk should honor
149
+ // the path they see.
150
+ let realPath;
151
+ try {
152
+ realPath = realpathSync(candidate);
153
+ }
154
+ catch {
155
+ // Broken symlink or perms issue on the link itself. Skip this
156
+ // file and try the next source in the same directory.
157
+ continue;
158
+ }
159
+ let rawBytes;
160
+ try {
161
+ rawBytes = statSync(realPath).size;
162
+ }
163
+ catch {
164
+ continue;
165
+ }
166
+ let content;
167
+ try {
168
+ content = readFileSync(realPath, 'utf8');
169
+ }
170
+ catch {
171
+ continue;
172
+ }
173
+ let truncated = false;
174
+ if (Buffer.byteLength(content, 'utf8') > MAX_FILE_BYTES) {
175
+ // Trim by character index sized to the byte cap. Mild over-trim
176
+ // on multi-byte boundaries is acceptable — we never under-trim
177
+ // (we'd exceed the cap) and the truncation is operator-visible
178
+ // via the `truncated` flag.
179
+ content = content.slice(0, MAX_FILE_BYTES);
180
+ truncated = true;
181
+ }
182
+ return {
183
+ path: realPath,
184
+ content,
185
+ level,
186
+ source,
187
+ truncated,
188
+ rawBytes,
189
+ };
190
+ }
191
+ return undefined;
192
+ }
193
+ /**
194
+ * Bound the limit to `[0, MAX_WALK_DEPTH]`. A negative or zero value
195
+ * still permits the cwd-level file to load (level 0 is always
196
+ * considered) — passing `limit: 0` means "current directory only".
197
+ */
198
+ function clampLimit(limit) {
199
+ if (typeof limit !== 'number' || !Number.isFinite(limit))
200
+ return MAX_WALK_DEPTH;
201
+ if (limit < 0)
202
+ return 0;
203
+ if (limit > MAX_WALK_DEPTH)
204
+ return MAX_WALK_DEPTH;
205
+ return Math.floor(limit);
206
+ }
207
+ //# sourceMappingURL=walk-up.js.map