@pugi/cli 0.1.0-beta.50 → 0.1.0-beta.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,194 @@
1
+ /**
2
+ * Tenant-wide JSONL audit trail (Wave 7 Sprint 1 #21, 2026-05-29).
3
+ *
4
+ * Pugi already records every tool_call / tool_result in two places:
5
+ *
6
+ * 1. The global per-workspace log at `<workspace>/.pugi/events.jsonl`
7
+ * (audit-replay source of truth; see `core/session.ts`).
8
+ * 2. The per-session mirror at
9
+ * `<workspace>/.pugi/sessions/<sessionId>/events.jsonl`
10
+ * (operator-friendly per-run copy, see `native-pugi.ts`).
11
+ *
12
+ * Both live under the workspace directory and disappear when the
13
+ * operator wipes the workspace or runs many ephemeral sandboxes.
14
+ * What's missing is a TENANT-wide structured audit log: a single
15
+ * append-only NDJSON stream per (tenant, workspace) pair that the
16
+ * operator (or a SOC pipeline) can tail across every session over
17
+ * the lifetime of the host.
18
+ *
19
+ * Spec (Wave 7 #21):
20
+ *
21
+ * - Path: `~/.pugi/audit/<tenant>/<workspace-slug>-<hash>.jsonl`
22
+ * - One JSON line per event with shared shape:
23
+ * `{ ts, tenant, workspace, workspaceHash, event, sessionId, data }`
24
+ * - Events covered: `tool_call`, `tool_result`, `dispatch_start`,
25
+ * `dispatch_end`, `permission_denied`, `auto_compact`,
26
+ * `budget_exhausted`.
27
+ * - Append-only — no rotation logic. Operators wire `logrotate`
28
+ * themselves if they want size caps.
29
+ * - Opt-out: `PUGI_AUDIT_TRAIL_DISABLE=1`.
30
+ * - Failures NEVER throw. Audit MUST NOT break a dispatch.
31
+ * - Tenant fallback: when `PUGI_API_KEY` is unset, tenant is `local`.
32
+ *
33
+ * Why duplicate the per-session log on disk:
34
+ *
35
+ * The per-session mirror clusters by `sessionId` (one dir per run).
36
+ * To answer "what did this tenant DO across every session this week
37
+ * from this workspace" an operator otherwise has to glob hundreds of
38
+ * session dirs and merge by timestamp. The audit trail flattens that
39
+ * into one tail-able stream per (tenant, workspace) — same shape an
40
+ * ops pipeline would expect from a hosted log surface.
41
+ */
42
+ import { appendFileSync, mkdirSync } from 'node:fs';
43
+ import { createHash } from 'node:crypto';
44
+ import { homedir } from 'node:os';
45
+ import { basename, dirname, join, resolve } from 'node:path';
46
+ /**
47
+ * Opt-out env var. Mirrors the convention every other Pugi feature uses
48
+ * (`PUGI_BARE`, `PUGI_AGENTMEMORY_RECALL_ENABLED=false`, etc.).
49
+ * Operators set this when they pipe the CLI through a sandbox that
50
+ * already captures audit upstream and they want to skip the duplicate.
51
+ */
52
+ export const PUGI_AUDIT_TRAIL_DISABLE_VAR = 'PUGI_AUDIT_TRAIL_DISABLE';
53
+ /**
54
+ * Tenant fallback used when the operator has not exported
55
+ * `PUGI_API_KEY`. The audit trail still flows — it just lives under
56
+ * `~/.pugi/audit/local/...` so a single-user workstation gets a useful
57
+ * forensic log without needing API-key plumbing.
58
+ */
59
+ export const LOCAL_TENANT_FALLBACK = 'local';
60
+ /**
61
+ * Sanitize the workspace basename to a safe filesystem slug:
62
+ * lowercase a-z + 0-9 + `-`. Anything else collapses to `-`. We avoid
63
+ * the empty case (root workspace) by falling back to `workspace`.
64
+ *
65
+ * Why not a hash here too: the hash is appended separately so two
66
+ * workspaces with the same basename (e.g. two clones of the same repo
67
+ * sitting in different parent dirs) get distinct files. The slug is
68
+ * the human-readable half operators eyeball at `ls ~/.pugi/audit/...`.
69
+ */
70
+ export function sanitizeWorkspaceSlug(workspaceRoot) {
71
+ const base = basename(resolve(workspaceRoot));
72
+ const sanitized = base
73
+ .toLowerCase()
74
+ .replace(/[^a-z0-9-]+/g, '-')
75
+ .replace(/-+/g, '-')
76
+ .replace(/^-|-$/g, '');
77
+ return sanitized.length > 0 ? sanitized : 'workspace';
78
+ }
79
+ /**
80
+ * Stable, anonymous workspace handle. We use the FIRST 8 hex of
81
+ * sha256(workspaceRoot). 8 hex = 32 bits = ~4 billion buckets, more
82
+ * than enough to disambiguate `~/code/foo` from `~/other/foo` on the
83
+ * same host without leaking the absolute path through the file name.
84
+ *
85
+ * The hash is over the RESOLVED path so symlink trickery cannot point
86
+ * two different audit streams at the same file by accident.
87
+ */
88
+ export function computeWorkspaceHash(workspaceRoot) {
89
+ return createHash('sha256')
90
+ .update(resolve(workspaceRoot))
91
+ .digest('hex')
92
+ .slice(0, 8);
93
+ }
94
+ /**
95
+ * Derive the tenant slug from `PUGI_API_KEY`. We hash the key (sha256,
96
+ * 12 hex prefix) rather than emitting the raw key — the audit trail is
97
+ * a plaintext file on the local FS and the tenant slug shows up in
98
+ * every path under `~/.pugi/audit/`. A truncated hash is enough to
99
+ * cluster every (tenant, workspace) over time without leaking the key
100
+ * if the operator accidentally `tar`s their `~/.pugi` for support.
101
+ *
102
+ * The hash is purely a CLI-local clustering key — the runtime backend
103
+ * has its own (different) tenant identifier and never sees this.
104
+ */
105
+ export function resolveTenant(env = process.env) {
106
+ const key = env.PUGI_API_KEY?.trim();
107
+ if (!key)
108
+ return LOCAL_TENANT_FALLBACK;
109
+ // 12 hex = 48 bits — enough disambiguation for any realistic per-host
110
+ // tenant cardinality; still short enough for operators to eyeball at
111
+ // `ls ~/.pugi/audit/`.
112
+ return createHash('sha256').update(key).digest('hex').slice(0, 12);
113
+ }
114
+ /**
115
+ * Resolve the audit file path for a given (tenant, workspace) pair.
116
+ * Pure path arithmetic — the caller is responsible for `mkdir -p`
117
+ * before append (handled inside `writeAuditEvent`).
118
+ */
119
+ export function resolveAuditPath(workspaceRoot, tenant, home = homedir()) {
120
+ const slug = sanitizeWorkspaceSlug(workspaceRoot);
121
+ const hash = computeWorkspaceHash(workspaceRoot);
122
+ return join(home, '.pugi', 'audit', tenant, `${slug}-${hash}.jsonl`);
123
+ }
124
+ /**
125
+ * Predicate: is the audit trail disabled via env opt-out?
126
+ *
127
+ * Accept `1`, `true`, `yes` (case-insensitive) as positive; anything
128
+ * else — including `0`, `false`, `''`, and the var being absent — keeps
129
+ * the trail enabled. Mirrors the convention used in `bare-mode/` and
130
+ * elsewhere in the CLI.
131
+ */
132
+ export function isAuditDisabled(env = process.env) {
133
+ const raw = env[PUGI_AUDIT_TRAIL_DISABLE_VAR]?.trim().toLowerCase();
134
+ if (!raw)
135
+ return false;
136
+ return raw === '1' || raw === 'true' || raw === 'yes';
137
+ }
138
+ /**
139
+ * Append a single audit event to the per-tenant per-workspace NDJSON
140
+ * trail. Never throws — failures (FS unwritable, opt-out, malformed
141
+ * input) are silently swallowed so a misconfigured audit surface
142
+ * cannot break a dispatch. The engine adapter's existing per-session
143
+ * mirror remains intact as a redundant copy.
144
+ *
145
+ * Append-only: every call writes exactly one line. No rotation, no
146
+ * truncation. Operators wire `logrotate` if they want size caps.
147
+ *
148
+ * macOS hardening: we `mkdir -p` the parent dir on every call (cheap
149
+ * in practice — Node short-circuits when the dir exists) so a manual
150
+ * `rm -rf ~/.pugi/audit/<tenant>/` between runs does not turn the next
151
+ * append into ENOENT. The mode is `0o700` for the tenant dir and
152
+ * `0o600` for the JSONL file so curious users on a shared host cannot
153
+ * read another tenant's trail.
154
+ */
155
+ export function writeAuditEvent(input) {
156
+ const env = input.env ?? process.env;
157
+ if (isAuditDisabled(env))
158
+ return;
159
+ try {
160
+ const tenant = (input.tenant?.trim() || resolveTenant(env)) || LOCAL_TENANT_FALLBACK;
161
+ const home = input.home ?? homedir();
162
+ const path = resolveAuditPath(input.workspaceRoot, tenant, home);
163
+ const now = input.now ? input.now() : new Date().toISOString();
164
+ const envelope = {
165
+ ts: now,
166
+ tenant,
167
+ workspace: sanitizeWorkspaceSlug(input.workspaceRoot),
168
+ workspaceHash: computeWorkspaceHash(input.workspaceRoot),
169
+ event: input.event,
170
+ sessionId: input.sessionId,
171
+ data: input.data,
172
+ };
173
+ try {
174
+ mkdirSync(dirname(path), { recursive: true, mode: 0o700 });
175
+ }
176
+ catch {
177
+ // mkdir failure is silent — the appendFileSync below will surface
178
+ // the real error and the outer catch swallows it. We still try
179
+ // the write so EEXIST on the dir (the only real path here) does
180
+ // not block the append.
181
+ }
182
+ appendFileSync(path, `${JSON.stringify(envelope)}\n`, {
183
+ encoding: 'utf8',
184
+ mode: 0o600,
185
+ });
186
+ }
187
+ catch {
188
+ // Audit failures must NEVER break a dispatch. The session log + the
189
+ // per-session mirror under `<workspace>/.pugi/` remain as redundant
190
+ // surfaces. A future telemetry pass can surface the failure count
191
+ // via the doctor probe; for now silent no-op is the contract.
192
+ }
193
+ }
194
+ //# sourceMappingURL=audit-trail.js.map
@@ -0,0 +1,46 @@
1
+ const LIVE_PROMPT = 'Reply with the single word OK.';
2
+ const TIMEOUT_MS = 15_000;
3
+ export async function probeEngineLive(ctx, deps) {
4
+ const apiKey = deps.resolveApiKey(ctx.env);
5
+ if (!apiKey) {
6
+ return { name: 'ENGINE LIVE', status: 'skipped', detail: 'no API key (run `pugi login` or set PUGI_API_KEY)' };
7
+ }
8
+ const apiUrl = deps.resolveApiUrl(ctx.env);
9
+ const startedAt = deps.now();
10
+ const url = (apiUrl.endsWith('/') ? apiUrl.slice(0, -1) : apiUrl) + '/api/pugi/engine';
11
+ try {
12
+ const res = await deps.fetchImpl(url, {
13
+ method: 'POST',
14
+ signal: AbortSignal.timeout(TIMEOUT_MS),
15
+ headers: { 'content-type': 'application/json', authorization: `Bearer ${apiKey}` },
16
+ body: JSON.stringify({
17
+ personaSlug: 'main',
18
+ command: 'explain',
19
+ messages: [{ role: 'user', content: LIVE_PROMPT }],
20
+ tools: [],
21
+ temperature: 0,
22
+ maxTokens: 32,
23
+ }),
24
+ });
25
+ const latencyMs = deps.now() - startedAt;
26
+ if (!res.ok) {
27
+ const body = await res.text().catch(() => '');
28
+ return { name: 'ENGINE LIVE', status: 'error', detail: `engine returned HTTP ${res.status}${body ? `: ${body.slice(0, 200)}` : ''}`, latencyMs };
29
+ }
30
+ const json = (await res.json().catch(() => null));
31
+ if (!json)
32
+ return { name: 'ENGINE LIVE', status: 'error', detail: 'engine returned 2xx but body is not JSON', latencyMs };
33
+ const model = typeof json['model'] === 'string' ? json['model'] : 'unknown';
34
+ const content = typeof json['content'] === 'string' ? json['content'] : '';
35
+ if (!content.toLowerCase().includes('ok')) {
36
+ return { name: 'ENGINE LIVE', status: 'warn', detail: `round-trip OK via ${model} (${latencyMs}ms) but reply did not contain expected token; got "${content.slice(0, 80)}"`, latencyMs };
37
+ }
38
+ return { name: 'ENGINE LIVE', status: 'ok', detail: `round-trip OK via ${model} (${latencyMs}ms)`, latencyMs };
39
+ }
40
+ catch (error) {
41
+ const latencyMs = deps.now() - startedAt;
42
+ const message = error instanceof Error ? error.message : String(error);
43
+ return { name: 'ENGINE LIVE', status: 'error', detail: `engine round-trip failed: ${message}`, latencyMs };
44
+ }
45
+ }
46
+ //# sourceMappingURL=engine-live.js.map
@@ -0,0 +1,179 @@
1
+ /**
2
+ * Crude token-count heuristic mirroring `runEngineLoop`'s fallback
3
+ * accounting (transcript char count / 4). The CLI does not have access
4
+ * to a real tokenizer pre-flight — the runtime returns `usage.totalTokens`
5
+ * only on the server response, which is too late for our pre-turn gate.
6
+ * char/4 is in the right order of magnitude for English/TS and matches
7
+ * what the loop's own fallback uses on `tokensUsed === 0` upstream.
8
+ */
9
+ export function estimateTranscriptTokens(messages) {
10
+ let chars = 0;
11
+ for (const m of messages) {
12
+ chars += m.content.length;
13
+ const calls = m.toolCalls ?? [];
14
+ for (const c of calls) {
15
+ chars += c.name.length + c.arguments.length;
16
+ }
17
+ }
18
+ return Math.ceil(chars / 4);
19
+ }
20
+ const FILE_TOOL_NAMES = new Set([
21
+ 'read',
22
+ 'write',
23
+ 'edit',
24
+ 'multi_edit',
25
+ 'multiEdit',
26
+ ]);
27
+ /**
28
+ * Walk the dropped slice and pull out tool-call metadata. We parse the
29
+ * `arguments` JSON best-effort — a bad parse is harmless here because
30
+ * the executor surfaced the canonical error to the model already; the
31
+ * gist just under-counts that one call.
32
+ */
33
+ export function summarizeDroppedTurns(dropped) {
34
+ let toolCalls = 0;
35
+ let bashCalls = 0;
36
+ const files = new Set();
37
+ for (const m of dropped) {
38
+ if (m.role === 'assistant') {
39
+ const calls = m.toolCalls ?? [];
40
+ toolCalls += calls.length;
41
+ for (const c of calls) {
42
+ if (c.name === 'bash') {
43
+ bashCalls += 1;
44
+ continue;
45
+ }
46
+ if (FILE_TOOL_NAMES.has(c.name)) {
47
+ const p = extractPath(c.arguments);
48
+ if (p)
49
+ files.add(p);
50
+ }
51
+ }
52
+ }
53
+ }
54
+ return {
55
+ toolCalls,
56
+ fileCount: files.size,
57
+ bashCalls,
58
+ messagesDropped: dropped.length,
59
+ };
60
+ }
61
+ function extractPath(rawArgs) {
62
+ if (!rawArgs)
63
+ return null;
64
+ try {
65
+ const parsed = JSON.parse(rawArgs);
66
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
67
+ const obj = parsed;
68
+ const path = obj['path'] ?? obj['filePath'];
69
+ if (typeof path === 'string' && path.length > 0)
70
+ return path;
71
+ }
72
+ }
73
+ catch {
74
+ return null;
75
+ }
76
+ return null;
77
+ }
78
+ /**
79
+ * Format the deterministic gist string spliced into the synthetic
80
+ * system message. Stable shape so spec assertions and operator
81
+ * logs do not drift turn-over-turn.
82
+ */
83
+ export function renderAutoCompactSentinel(stats) {
84
+ return (`[auto-compact] Earlier turns ` +
85
+ `(${stats.toolCalls} tool calls, ${stats.fileCount} files read, ${stats.bashCalls} bash commands) ` +
86
+ `summarized to free transcript headroom. ` +
87
+ `Recent turns and the original task remain in context; ` +
88
+ `re-read any earlier file by name if you need its contents again.`);
89
+ }
90
+ /**
91
+ * Minimum transcript length (in messages) before compact is allowed.
92
+ * We always retain `system + user` (the first 2) + the last 2 turns,
93
+ * so anything <= 4 messages has nothing in the middle to drop.
94
+ * Compacting на 4-message transcript would either be a no-op or
95
+ * accidentally drop the user's original task.
96
+ */
97
+ export const MIN_COMPACT_TRANSCRIPT_LENGTH = 5;
98
+ /**
99
+ * Pure gate. Returns `compact` when ALL of:
100
+ * - `config.enabled` is true
101
+ * - estimated transcript tokens >= `thresholdRatio * maxTokens`
102
+ * - transcript length >= 5 (need history to drop)
103
+ */
104
+ export function evaluateAutoCompactDecision(input) {
105
+ const usedTokens = estimateTranscriptTokens(input.transcript);
106
+ if (!input.config.enabled) {
107
+ return { kind: 'skip', reason: 'disabled', usedTokens };
108
+ }
109
+ if (input.transcript.length < MIN_COMPACT_TRANSCRIPT_LENGTH) {
110
+ return { kind: 'skip', reason: 'transcript-too-short', usedTokens };
111
+ }
112
+ const thresholdTokens = Math.floor(input.config.thresholdRatio * input.maxTokens);
113
+ if (usedTokens < thresholdTokens) {
114
+ return { kind: 'skip', reason: 'below-threshold', usedTokens };
115
+ }
116
+ return { kind: 'compact', usedTokens, thresholdTokens };
117
+ }
118
+ /**
119
+ * Rewrite the transcript: keep the first two messages (system + user
120
+ * task), drop the middle (assistant + tool turns), insert a synthetic
121
+ * system sentinel summarizing what was dropped, then re-append the
122
+ * last 2 messages so the model has the most-recent tool result + its
123
+ * own last reply in full fidelity.
124
+ *
125
+ * Precondition: caller has already checked the decision is `compact`
126
+ * (length >= MIN_COMPACT_TRANSCRIPT_LENGTH). The function still guards
127
+ * with a defensive identity-return on shorter transcripts so a careless
128
+ * caller cannot corrupt the prefix.
129
+ */
130
+ export function compactTranscript(transcript) {
131
+ const preUsedTokens = estimateTranscriptTokens(transcript);
132
+ if (transcript.length < MIN_COMPACT_TRANSCRIPT_LENGTH) {
133
+ return {
134
+ transcript: transcript.slice(),
135
+ droppedCount: 0,
136
+ gist: '',
137
+ stats: { toolCalls: 0, fileCount: 0, bashCalls: 0, messagesDropped: 0 },
138
+ preUsedTokens,
139
+ postUsedTokens: preUsedTokens,
140
+ };
141
+ }
142
+ // Always retain: index 0 (system) + index 1 (original user task) +
143
+ // last 2 messages. The middle slice is what gets summarised.
144
+ const head = transcript.slice(0, 2);
145
+ const tail = transcript.slice(-2);
146
+ const middle = transcript.slice(2, -2);
147
+ const stats = summarizeDroppedTurns(middle);
148
+ const gist = renderAutoCompactSentinel(stats);
149
+ const sentinelMessage = {
150
+ role: 'system',
151
+ content: gist,
152
+ };
153
+ const next = [...head, sentinelMessage, ...tail];
154
+ const postUsedTokens = estimateTranscriptTokens(next);
155
+ return {
156
+ transcript: next,
157
+ droppedCount: middle.length,
158
+ gist,
159
+ stats,
160
+ preUsedTokens,
161
+ postUsedTokens,
162
+ };
163
+ }
164
+ /**
165
+ * Convenience composer used by `runEngineLoop`: evaluate → compact in
166
+ * one shot. Returns `null` when the decision was `skip` so the loop
167
+ * driver can branch cheaply без destructuring two layers of records.
168
+ */
169
+ export function maybeCompact(transcript, maxTokens, config) {
170
+ const decision = evaluateAutoCompactDecision({
171
+ transcript,
172
+ maxTokens,
173
+ config,
174
+ });
175
+ if (decision.kind === 'skip')
176
+ return null;
177
+ return compactTranscript(transcript);
178
+ }
179
+ //# sourceMappingURL=auto-compact.js.map
@@ -1,3 +1,60 @@
1
+ /**
2
+ * Auto-compact (mid-loop transcript summarization) default trip point as
3
+ * a fraction of the per-command `maxTokens` envelope. CEO P1 #14 (CC
4
+ * parity): when transcript char-count tokens cross 75% of the budget,
5
+ * the engine loop drops the middle turns and inserts a deterministic
6
+ * `[auto-compact]` sentinel so the loop can continue без the model
7
+ * tripping the `budget_exhausted` terminal status mid-build.
8
+ *
9
+ * Empirically — `pugi code "big refactor"` hits the 80k cap on turn 4-5
10
+ * and refuses to finish; `pugi fix` does the same at 50k. Auto-compact
11
+ * keeps the recent N turns + a one-line gist of the dropped tool calls
12
+ * so the model retains the most recent state without paying for the
13
+ * full prefix.
14
+ *
15
+ * Operators can opt out / retune via `.pugi/settings.json`:
16
+ *
17
+ * {
18
+ * "autoCompact": { "enabled": true, "thresholdRatio": 0.75 }
19
+ * }
20
+ *
21
+ * Bad values fall back silently to the default — the engine loop never
22
+ * crashes on a malformed settings field (mirrors `resolveBudget`).
23
+ */
24
+ export const AUTO_COMPACT_THRESHOLD_RATIO = 0.75;
25
+ export const DEFAULT_AUTO_COMPACT_CONFIG = {
26
+ enabled: true,
27
+ thresholdRatio: AUTO_COMPACT_THRESHOLD_RATIO,
28
+ };
29
+ /**
30
+ * Pull the auto-compact override from `.pugi/settings.json`. Uses the
31
+ * same defensive-cast pattern as `readSettingsBudget` so an unknown
32
+ * field shape silently falls back к defaults (the gate is a comfort
33
+ * feature; a malformed settings line must not break the engine loop).
34
+ *
35
+ * Returns the merged config — caller never sees `undefined`.
36
+ */
37
+ export function resolveAutoCompactConfig(settings) {
38
+ if (!settings)
39
+ return DEFAULT_AUTO_COMPACT_CONFIG;
40
+ const root = settings.autoCompact;
41
+ if (!root || typeof root !== 'object' || Array.isArray(root)) {
42
+ return DEFAULT_AUTO_COMPACT_CONFIG;
43
+ }
44
+ const r = root;
45
+ const enabledRaw = r['enabled'];
46
+ const thresholdRaw = r['thresholdRatio'];
47
+ const enabled = typeof enabledRaw === 'boolean'
48
+ ? enabledRaw
49
+ : DEFAULT_AUTO_COMPACT_CONFIG.enabled;
50
+ let thresholdRatio = DEFAULT_AUTO_COMPACT_CONFIG.thresholdRatio;
51
+ if (typeof thresholdRaw === 'number' && Number.isFinite(thresholdRaw)) {
52
+ if (thresholdRaw > 0 && thresholdRaw <= 1) {
53
+ thresholdRatio = thresholdRaw;
54
+ }
55
+ }
56
+ return { enabled, thresholdRatio };
57
+ }
1
58
  /**
2
59
  * β1 defaults. Source of truth for the per-command budget envelope.
3
60
  * The runtime is allowed to look these up directly (no need to round
@@ -6,7 +6,9 @@ import { FileReadCache } from '../file-cache.js';
6
6
  import { loadSettings } from '../settings.js';
7
7
  import { openSession, recordToolCall, recordToolResult } from '../session.js';
8
8
  import { prewarmRealDispatch } from '../subagents/dispatcher.js';
9
- import { resolveBudget } from './budgets.js';
9
+ import { resolveAutoCompactConfig, resolveBudget } from './budgets.js';
10
+ import { maybeCompact } from './auto-compact.js';
11
+ import { writeAuditEvent } from '../audit/audit-trail.js';
10
12
  import { buildExecutor, buildToolsSchema } from './tool-bridge.js';
11
13
  import { personaSlugFor, systemPromptFor } from './prompts.js';
12
14
  import { CancellationToken } from '../repl/cancellation.js';
@@ -188,6 +190,13 @@ export class NativePugiEngineAdapter {
188
190
  // budget so a careless caller cannot disable the call-count
189
191
  // guard by setting only token count.
190
192
  const budget = resolveBudget(kind, settings, task.budget?.tokens ? { maxTokens: task.budget.tokens } : undefined);
193
+ // CEO P1 #14 (auto-compact, 2026-05-29): resolve the per-workspace
194
+ // override of the 75% threshold gate. Default is `{ enabled: true,
195
+ // thresholdRatio: 0.75 }`; operators kill it via
196
+ // `.pugi/settings.json::autoCompact.enabled = false` или retune the
197
+ // ratio. The resolved config is captured by the closure that
198
+ // `runEngineLoop` invokes pre-send on every turn.
199
+ const autoCompactConfig = resolveAutoCompactConfig(settings);
191
200
  // β3 streaming: pre-build the typed stream event queue so the hook
192
201
  // callbacks below can push live events that this async generator
193
202
  // yields IMMEDIATELY (instead of buffering until `runEngineLoop`
@@ -220,6 +229,23 @@ export class NativePugiEngineAdapter {
220
229
  type: 'status',
221
230
  message: `Pugi engine starting: kind=${kind} budget=${budget.maxToolCalls} calls / ${budget.maxTokens} tokens`,
222
231
  });
232
+ // Wave 7 Sprint 1 #21 (2026-05-29): emit `dispatch_start` to the
233
+ // tenant-wide audit trail at `~/.pugi/audit/<tenant>/<slug>-<hash>
234
+ // .jsonl`. Append-only, never throws — a misconfigured audit
235
+ // surface must not block a dispatch. The per-session mirror under
236
+ // `.pugi/sessions/<id>/events.jsonl` remains as a redundant copy.
237
+ writeAuditEvent({
238
+ event: 'dispatch_start',
239
+ sessionId: session.id,
240
+ workspaceRoot: root,
241
+ data: {
242
+ kind,
243
+ promptLength: task.prompt.length,
244
+ maxToolCalls: budget.maxToolCalls,
245
+ maxTokens: budget.maxTokens,
246
+ model: this.options.model ?? null,
247
+ },
248
+ });
223
249
  // β5a R1+R4+R5+R6+P1 (2026-05-26): build the per-turn `<context>`
224
250
  // prefix and apply the intent marker so the model sees:
225
251
  // 1. cwd + open-files + per-dir-conventions block (R5+R6)
@@ -364,6 +390,39 @@ export class NativePugiEngineAdapter {
364
390
  // per-run log for operators and the cabinet UI (Sprint 2B).
365
391
  const sessionEventsPath = openSessionMirror(root, session.id);
366
392
  const hooks = {
393
+ // CEO P1 #14 (auto-compact, 2026-05-29): single operator-visible
394
+ // line on stderr — keep parity with Claude Code's
395
+ // `Compacted N turns into Y tokens; continuing.` message. We mirror
396
+ // the event into the session log + stream emitter as a `status`
397
+ // frame так that admin-api SSE consumers + the cabinet UI render
398
+ // it without a schema change.
399
+ onAutoCompact: (event) => {
400
+ const pct = Math.round((event.preUsedTokens / Math.max(1, event.maxTokens)) * 100);
401
+ const line = `engine: auto-compacted ${event.droppedCount} turns at ${event.preUsedTokens}/${event.maxTokens} (${pct}%)`;
402
+ // Single-line stderr write — operator-visible per spec.
403
+ process.stderr.write(`${line}\n`);
404
+ emitStream({ type: 'status', message: line });
405
+ appendSessionMirror(sessionEventsPath, {
406
+ type: 'auto_compact',
407
+ droppedCount: event.droppedCount,
408
+ preUsedTokens: event.preUsedTokens,
409
+ postUsedTokens: event.postUsedTokens,
410
+ maxTokens: event.maxTokens,
411
+ gist: event.gist,
412
+ });
413
+ // Wave 7 #21: tenant-wide audit trail mirror.
414
+ writeAuditEvent({
415
+ event: 'auto_compact',
416
+ sessionId: session.id,
417
+ workspaceRoot: root,
418
+ data: {
419
+ droppedCount: event.droppedCount,
420
+ preUsedTokens: event.preUsedTokens,
421
+ postUsedTokens: event.postUsedTokens,
422
+ maxTokens: event.maxTokens,
423
+ },
424
+ });
425
+ },
367
426
  onTurnStart: (turnIndex, messageCount) => {
368
427
  const msg = `turn ${turnIndex + 1}: requesting model (transcript=${messageCount} messages)`;
369
428
  emitStream({ type: 'status', message: msg });
@@ -453,6 +512,20 @@ export class NativePugiEngineAdapter {
453
512
  callId: call.id,
454
513
  argsPreview: call.arguments.slice(0, 200),
455
514
  });
515
+ // Wave 7 #21: tenant-wide audit trail mirror. Same payload
516
+ // shape as the session mirror but flattened so a `jq` query
517
+ // across all sessions for one (tenant, workspace) reads
518
+ // cleanly.
519
+ writeAuditEvent({
520
+ event: 'tool_call',
521
+ sessionId: session.id,
522
+ workspaceRoot: root,
523
+ data: {
524
+ tool: call.name,
525
+ callId: call.id,
526
+ argsPreview: call.arguments.slice(0, 200),
527
+ },
528
+ });
456
529
  },
457
530
  onToolResult: (call, result) => {
458
531
  const auditId = this.engineToolCallIds.get(call.id);
@@ -492,6 +565,18 @@ export class NativePugiEngineAdapter {
492
565
  ok: result.ok,
493
566
  summary: result.ok ? result.content.slice(0, 200) : result.error.slice(0, 200),
494
567
  });
568
+ // Wave 7 #21: tenant-wide audit trail mirror.
569
+ writeAuditEvent({
570
+ event: 'tool_result',
571
+ sessionId: session.id,
572
+ workspaceRoot: root,
573
+ data: {
574
+ tool: call.name,
575
+ callId: call.id,
576
+ ok: result.ok,
577
+ summary: result.ok ? result.content.slice(0, 200) : result.error.slice(0, 200),
578
+ },
579
+ });
495
580
  },
496
581
  };
497
582
  // β1b r1 (--allow-fetch / --allow-search wiring, 2026-05-26):
@@ -671,6 +756,14 @@ export class NativePugiEngineAdapter {
671
756
  command: kind,
672
757
  tag: dispatchTagFor(kind),
673
758
  model: this.options.model,
759
+ // CEO P1 #14 (auto-compact, 2026-05-29): pluggable compactor
760
+ // hook. The SDK driver invokes this pre-`client.send` on every
761
+ // turn. `maybeCompact` returns `null` below the 75% threshold
762
+ // или when the transcript is too short to drop history — the
763
+ // loop continues unchanged on the cold path. When it returns
764
+ // a result, the driver swaps the transcript + fires the
765
+ // `onAutoCompact` hook above which emits the stderr line.
766
+ autoCompact: ({ transcript, maxTokens }) => maybeCompact(transcript, maxTokens, autoCompactConfig),
674
767
  });
675
768
  }
676
769
  catch (err) {
@@ -696,6 +789,19 @@ export class NativePugiEngineAdapter {
696
789
  await loopPromise;
697
790
  if (loopError !== null) {
698
791
  const message = loopError instanceof Error ? loopError.message : String(loopError);
792
+ // Wave 7 #21: surface the crash to the audit trail before
793
+ // returning. Mirrors the `failed` arm of the structured path
794
+ // below so a SOC pipeline sees one `dispatch_end` per dispatch
795
+ // regardless of which code path produced it.
796
+ writeAuditEvent({
797
+ event: 'dispatch_end',
798
+ sessionId: session.id,
799
+ workspaceRoot: root,
800
+ data: {
801
+ status: 'crashed',
802
+ error: message,
803
+ },
804
+ });
699
805
  yield {
700
806
  type: 'result',
701
807
  result: {
@@ -714,6 +820,12 @@ export class NativePugiEngineAdapter {
714
820
  if (finalOutcome === null) {
715
821
  // Defensive — should never hit. `runEngineLoop` always either
716
822
  // resolves with an outcome or throws (and we catch that above).
823
+ writeAuditEvent({
824
+ event: 'dispatch_end',
825
+ sessionId: session.id,
826
+ workspaceRoot: root,
827
+ data: { status: 'no_outcome' },
828
+ });
717
829
  yield {
718
830
  type: 'result',
719
831
  result: {
@@ -756,6 +868,36 @@ export class NativePugiEngineAdapter {
756
868
  filesChanged: filesChangedList,
757
869
  reason: finalOutcome.reason,
758
870
  });
871
+ // Wave 7 #21: emit `dispatch_end` to the tenant-wide audit trail.
872
+ // When the loop tripped the per-command budget we ALSO emit a
873
+ // dedicated `budget_exhausted` row so a SOC query can filter on
874
+ // event type alone without parsing the `data.status` payload.
875
+ if (finalOutcome.status === 'budget_exhausted') {
876
+ writeAuditEvent({
877
+ event: 'budget_exhausted',
878
+ sessionId: session.id,
879
+ workspaceRoot: root,
880
+ data: {
881
+ toolCallCount: finalOutcome.toolCallCount,
882
+ turnsUsed: finalOutcome.turnsUsed,
883
+ tokensUsed: finalOutcome.tokensUsed,
884
+ reason: finalOutcome.reason ?? null,
885
+ },
886
+ });
887
+ }
888
+ writeAuditEvent({
889
+ event: 'dispatch_end',
890
+ sessionId: session.id,
891
+ workspaceRoot: root,
892
+ data: {
893
+ status: finalOutcome.status,
894
+ toolCallCount: finalOutcome.toolCallCount,
895
+ turnsUsed: finalOutcome.turnsUsed,
896
+ tokensUsed: finalOutcome.tokensUsed,
897
+ filesChangedCount: filesChangedList.length,
898
+ reason: finalOutcome.reason ?? null,
899
+ },
900
+ });
759
901
  yield {
760
902
  type: 'result',
761
903
  result: {
@@ -0,0 +1,410 @@
1
+ /**
2
+ * Hook chains — `PostToolUseFailure` + `TaskCompleted` first-class events.
3
+ *
4
+ * Wave 7 #24 (CEO P1, 2026-05-29). Pugi already had primitives for
5
+ * `PostToolUseFailure` in the legacy `core/hooks.ts` registry but no
6
+ * way to declare a **fallback chain** that fires automatically when a
7
+ * tool dispatch fails or when an entire `pugi <command>` dispatch
8
+ * completes. Claude Code exposes both events as first-class hook
9
+ * sources; this module wires them on the Pugi side.
10
+ *
11
+ * Why a fresh module rather than extending `core/hooks.ts`:
12
+ * - The legacy registry reads flat `hooks: [{event, match, run}]`
13
+ * arrays from `~/.pugi/hooks.json` + `.pugi/hooks.json`. Chains
14
+ * are declared in a nested `hooks: { EventName: [{matcher, run}] }`
15
+ * shape from `.pugi/settings.json` — a different file, different
16
+ * shape, and importantly a different opt-out grammar (per-chain
17
+ * `enabled: false`). Mixing the two readers would force every
18
+ * legacy caller to learn the chain shape.
19
+ * - Chains have richer payloads (TaskCompleted ships durationMs,
20
+ * toolCalls, filesChanged). Stuffing them through the legacy
21
+ * stdin/env contract would silently break v1 scripts that key on
22
+ * the existing payload shape.
23
+ * - Chain failures MUST NOT crash the dispatch (the model already
24
+ * finished). The legacy registry's `onFailure: 'block'` semantics
25
+ * would propagate the error — chains explicitly swallow it and
26
+ * log instead.
27
+ *
28
+ * The chain runner is intentionally self-contained: no dependency on
29
+ * `HookRegistry`, no trust-ledger gating (project settings are already
30
+ * trusted by virtue of being in the workspace's `.pugi/` dir, same as
31
+ * persona prompts), and a single `firePostToolUseFailureChain` /
32
+ * `fireTaskCompletedChain` entry point per event.
33
+ *
34
+ * Brand voice: ASCII only, no emoji, no em-dashes.
35
+ */
36
+ import { spawn } from 'node:child_process';
37
+ import { z } from 'zod';
38
+ import { loadSettings } from './settings.js';
39
+ /**
40
+ * Per-hook matcher. Both keys are optional and AND together — a hook
41
+ * with `matcher: { tool: 'write' }` fires for every PostToolUseFailure
42
+ * whose tool is `write`, regardless of command. A hook with no matcher
43
+ * at all (or `matcher: {}`) fires on every event of its kind.
44
+ */
45
+ const chainMatcherSchema = z
46
+ .object({
47
+ /** Compare against the failing tool name (PostToolUseFailure). */
48
+ tool: z.string().min(1).optional(),
49
+ /** Compare against the completed command name (TaskCompleted). */
50
+ command: z.string().min(1).optional(),
51
+ })
52
+ .strict();
53
+ /**
54
+ * Single chain entry. `run` is an array so chain authors can express
55
+ * "do A, then B, then C" without needing shell `&&` chaining — the
56
+ * runner executes the list sequentially and short-circuits on the
57
+ * first non-zero exit (within ONE entry; the next entry in the chain
58
+ * is still attempted).
59
+ *
60
+ * Hard-coded constants:
61
+ * - Default timeout per command: 10s (matches Claude Code).
62
+ * - Max timeout: 60s (matches legacy hooks.ts cap).
63
+ * - Stream cap: 256KB per stream (smaller than legacy 1MB because
64
+ * chain hooks are post-hoc notifications, not blocking gates).
65
+ */
66
+ const chainEntrySchema = z
67
+ .object({
68
+ matcher: chainMatcherSchema.optional(),
69
+ run: z.array(z.string().min(1)).min(1),
70
+ timeoutMs: z.number().int().positive().max(60_000).optional(),
71
+ })
72
+ .strict();
73
+ /**
74
+ * Per-event chain config. `enabled: false` short-circuits the whole
75
+ * chain (opt-out switch the operator can flip without deleting the
76
+ * entries). `entries` defaults to empty so the operator can declare
77
+ * `{ enabled: false }` to suppress a chain inherited from a parent
78
+ * config layer later.
79
+ */
80
+ const chainConfigSchema = z
81
+ .object({
82
+ enabled: z.boolean().default(true),
83
+ entries: z.array(chainEntrySchema).default([]),
84
+ })
85
+ .strict();
86
+ /**
87
+ * Settings-level `hooks` block. We accept BOTH:
88
+ * 1. The canonical nested form
89
+ * `hooks: { PostToolUseFailure: { enabled, entries: [...] } }`
90
+ * 2. The shorthand array form
91
+ * `hooks: { PostToolUseFailure: [...entries] }`
92
+ * — which the CEO spec uses as the example. The reader normalises
93
+ * it to the canonical form before handing back to callers.
94
+ *
95
+ * Both forms are passed through `parseHookChains` below so callers see
96
+ * one shape regardless of which the operator typed.
97
+ */
98
+ const settingsHooksShape = z
99
+ .object({
100
+ PostToolUseFailure: z
101
+ .union([chainConfigSchema, z.array(chainEntrySchema)])
102
+ .optional(),
103
+ TaskCompleted: z
104
+ .union([chainConfigSchema, z.array(chainEntrySchema)])
105
+ .optional(),
106
+ })
107
+ .strict()
108
+ .optional();
109
+ const DEFAULT_TIMEOUT_MS = 10_000;
110
+ const SIGKILL_GRACE_MS = 2_000;
111
+ const STREAM_CAP_BYTES = 256 * 1024;
112
+ /**
113
+ * Resolve chain config from settings. Accepts both the canonical
114
+ * `{ enabled, entries }` shape and the shorthand array form the CEO
115
+ * spec example uses.
116
+ */
117
+ export function resolveChain(settings, event) {
118
+ // The `hooks` block on settings is loaded via `parseHookChains`
119
+ // below — we re-parse on every call so a settings reload picks up
120
+ // edits without restart-loop accounting. Cost is one Zod parse per
121
+ // dispatch, dwarfed by the network round-trip the loop just finished.
122
+ const rawHooks = settings.hooks;
123
+ const parsed = settingsHooksShape.safeParse(rawHooks);
124
+ if (!parsed.success || !parsed.data) {
125
+ return { enabled: true, entries: [] };
126
+ }
127
+ const raw = parsed.data[event];
128
+ if (!raw)
129
+ return { enabled: true, entries: [] };
130
+ if (Array.isArray(raw)) {
131
+ return { enabled: true, entries: raw };
132
+ }
133
+ return raw;
134
+ }
135
+ /**
136
+ * Parse the `hooks` section out of a raw settings JSON value. Exposed
137
+ * for tests and the settings reload path. Returns the normalised shape
138
+ * (both events present, canonical `{enabled, entries}` form) so the
139
+ * caller can assert structure without re-running Zod.
140
+ */
141
+ export function parseHookChains(rawHooks) {
142
+ const parsed = settingsHooksShape.safeParse(rawHooks);
143
+ const out = {
144
+ PostToolUseFailure: { enabled: true, entries: [] },
145
+ TaskCompleted: { enabled: true, entries: [] },
146
+ };
147
+ if (!parsed.success || !parsed.data)
148
+ return out;
149
+ const pf = parsed.data.PostToolUseFailure;
150
+ if (pf) {
151
+ out.PostToolUseFailure = Array.isArray(pf) ? { enabled: true, entries: pf } : pf;
152
+ }
153
+ const tc = parsed.data.TaskCompleted;
154
+ if (tc) {
155
+ out.TaskCompleted = Array.isArray(tc) ? { enabled: true, entries: tc } : tc;
156
+ }
157
+ return out;
158
+ }
159
+ /**
160
+ * Decide whether a chain entry matches a `PostToolUseFailure` payload.
161
+ * An entry with no matcher (or only the empty object) fires on every
162
+ * failure of its event kind.
163
+ */
164
+ function matchesPostToolUseFailure(entry, payload) {
165
+ const m = entry.matcher;
166
+ if (!m || (!m.tool && !m.command))
167
+ return true;
168
+ if (m.command !== undefined) {
169
+ // `command` does not apply to PostToolUseFailure semantically;
170
+ // ignore it so a chain author who pastes both keys does not get
171
+ // a silent no-match. Documented in matcher schema comments.
172
+ }
173
+ if (m.tool !== undefined && m.tool !== payload.toolName)
174
+ return false;
175
+ return true;
176
+ }
177
+ /**
178
+ * Decide whether a chain entry matches a `TaskCompleted` payload.
179
+ */
180
+ function matchesTaskCompleted(entry, payload) {
181
+ const m = entry.matcher;
182
+ if (!m || (!m.tool && !m.command))
183
+ return true;
184
+ if (m.command !== undefined && m.command !== payload.command)
185
+ return false;
186
+ return true;
187
+ }
188
+ /**
189
+ * Fire the `PostToolUseFailure` chain. Best-effort: a hook crash, a
190
+ * spawn error, or a timeout never propagates back to the caller. The
191
+ * caller observes the structured outcome and may log / surface it.
192
+ *
193
+ * Caller responsibility: the engine's tool-bridge invokes this AFTER
194
+ * the existing legacy `PostToolUseFailure` registry fire so legacy
195
+ * scripts and chain entries both run on the same failure (the legacy
196
+ * registry is the strict per-tool gate; chains are the fallback hook).
197
+ */
198
+ export async function firePostToolUseFailureChain(workspaceRoot, payload, settingsOverride) {
199
+ const settings = settingsOverride ?? safeLoadSettings(workspaceRoot);
200
+ const config = resolveChain(settings, 'PostToolUseFailure');
201
+ if (!config.enabled) {
202
+ return { event: 'PostToolUseFailure', enabled: false, entries: [] };
203
+ }
204
+ const out = {
205
+ event: 'PostToolUseFailure',
206
+ enabled: true,
207
+ entries: [],
208
+ };
209
+ for (const entry of config.entries) {
210
+ const matched = matchesPostToolUseFailure(entry, payload);
211
+ if (!matched) {
212
+ out.entries.push({ matched: false, commands: [] });
213
+ continue;
214
+ }
215
+ const commands = await runChainEntry(entry, {
216
+ PUGI_HOOK_EVENT: 'PostToolUseFailure',
217
+ PUGI_HOOK_PAYLOAD: JSON.stringify(payload),
218
+ PUGI_HOOK_TOOL: payload.toolName,
219
+ PUGI_HOOK_EXIT_CODE: String(payload.exitCode),
220
+ });
221
+ out.entries.push({ matched: true, commands });
222
+ }
223
+ return out;
224
+ }
225
+ /**
226
+ * Fire the `TaskCompleted` chain. Same best-effort semantics as the
227
+ * PostToolUseFailure chain above. Caller invokes this at the dispatch
228
+ * exit in `native-pugi.ts` regardless of completion status (the
229
+ * payload carries `exitCode` so the hook can branch on success vs
230
+ * failure).
231
+ */
232
+ export async function fireTaskCompletedChain(workspaceRoot, payload, settingsOverride) {
233
+ const settings = settingsOverride ?? safeLoadSettings(workspaceRoot);
234
+ const config = resolveChain(settings, 'TaskCompleted');
235
+ if (!config.enabled) {
236
+ return { event: 'TaskCompleted', enabled: false, entries: [] };
237
+ }
238
+ const out = {
239
+ event: 'TaskCompleted',
240
+ enabled: true,
241
+ entries: [],
242
+ };
243
+ for (const entry of config.entries) {
244
+ const matched = matchesTaskCompleted(entry, payload);
245
+ if (!matched) {
246
+ out.entries.push({ matched: false, commands: [] });
247
+ continue;
248
+ }
249
+ const commands = await runChainEntry(entry, {
250
+ PUGI_HOOK_EVENT: 'TaskCompleted',
251
+ PUGI_HOOK_PAYLOAD: JSON.stringify(payload),
252
+ PUGI_HOOK_COMMAND: payload.command,
253
+ PUGI_HOOK_EXIT_CODE: String(payload.exitCode),
254
+ PUGI_HOOK_DURATION_MS: String(payload.durationMs),
255
+ PUGI_HOOK_TOOL_CALLS: String(payload.toolCalls),
256
+ });
257
+ out.entries.push({ matched: true, commands });
258
+ }
259
+ return out;
260
+ }
261
+ /** Run every command in one chain entry sequentially. */
262
+ async function runChainEntry(entry, baseEnv) {
263
+ const timeoutMs = entry.timeoutMs ?? DEFAULT_TIMEOUT_MS;
264
+ const results = [];
265
+ for (const command of entry.run) {
266
+ try {
267
+ const result = await executeOne(command, timeoutMs, baseEnv);
268
+ results.push(result);
269
+ }
270
+ catch (error) {
271
+ // Spawn failure (binary missing, fork limit hit, etc). Swallow
272
+ // and record so the chain marches on to the next command. The
273
+ // outer fireXxxChain caller is wrapped in another try/catch in
274
+ // the integration callsite for defense in depth.
275
+ results.push({
276
+ command,
277
+ exitCode: -1,
278
+ durationMs: 0,
279
+ stdout: '',
280
+ stderr: `chain spawn error: ${error.message}`,
281
+ timedOut: false,
282
+ });
283
+ }
284
+ }
285
+ return results;
286
+ }
287
+ /** Spawn ONE shell command and capture the result. */
288
+ function executeOne(command, timeoutMs, baseEnv) {
289
+ return new Promise((resolvePromise) => {
290
+ const startedAt = Date.now();
291
+ const child = spawn('/bin/sh', ['-c', command], {
292
+ env: { ...process.env, ...baseEnv },
293
+ stdio: ['pipe', 'pipe', 'pipe'],
294
+ });
295
+ let stdout = '';
296
+ let stderr = '';
297
+ let killedForTimeout = false;
298
+ let killedForStreamCap = false;
299
+ let sigKillTimer;
300
+ const enforceStreamCap = () => {
301
+ if (killedForStreamCap)
302
+ return;
303
+ if (stdout.length + stderr.length <= STREAM_CAP_BYTES)
304
+ return;
305
+ killedForStreamCap = true;
306
+ child.kill('SIGTERM');
307
+ if (!sigKillTimer) {
308
+ sigKillTimer = setTimeout(() => {
309
+ if (!child.killed)
310
+ child.kill('SIGKILL');
311
+ }, SIGKILL_GRACE_MS);
312
+ if (sigKillTimer.unref)
313
+ sigKillTimer.unref();
314
+ }
315
+ };
316
+ child.stdout?.on('data', (chunk) => {
317
+ if (killedForStreamCap)
318
+ return;
319
+ stdout += chunk.toString('utf8');
320
+ enforceStreamCap();
321
+ });
322
+ child.stderr?.on('data', (chunk) => {
323
+ if (killedForStreamCap)
324
+ return;
325
+ stderr += chunk.toString('utf8');
326
+ enforceStreamCap();
327
+ });
328
+ // Close stdin so commands that block on read (cat, jq) do not hang
329
+ // the chain. Errors are swallowed because the child may have
330
+ // already exited before we write.
331
+ if (child.stdin) {
332
+ child.stdin.on('error', () => {
333
+ /* ignore EPIPE */
334
+ });
335
+ child.stdin.end();
336
+ }
337
+ const timer = setTimeout(() => {
338
+ killedForTimeout = true;
339
+ child.kill('SIGTERM');
340
+ sigKillTimer = setTimeout(() => {
341
+ if (!child.killed)
342
+ child.kill('SIGKILL');
343
+ }, SIGKILL_GRACE_MS);
344
+ if (sigKillTimer.unref)
345
+ sigKillTimer.unref();
346
+ }, timeoutMs);
347
+ if (timer.unref)
348
+ timer.unref();
349
+ child.on('error', (error) => {
350
+ clearTimeout(timer);
351
+ if (sigKillTimer)
352
+ clearTimeout(sigKillTimer);
353
+ resolvePromise({
354
+ command,
355
+ exitCode: -1,
356
+ durationMs: Date.now() - startedAt,
357
+ stdout,
358
+ stderr: stderr || `hook spawn error: ${error.message}`,
359
+ timedOut: false,
360
+ });
361
+ });
362
+ child.on('close', (code, signal) => {
363
+ clearTimeout(timer);
364
+ if (sigKillTimer)
365
+ clearTimeout(sigKillTimer);
366
+ const durationMs = Date.now() - startedAt;
367
+ let exitCode;
368
+ if (code !== null) {
369
+ exitCode = code;
370
+ }
371
+ else if (signal === 'SIGTERM') {
372
+ exitCode = -15;
373
+ }
374
+ else if (signal === 'SIGKILL') {
375
+ exitCode = -9;
376
+ }
377
+ else {
378
+ exitCode = -1;
379
+ }
380
+ resolvePromise({
381
+ command,
382
+ exitCode,
383
+ durationMs,
384
+ stdout,
385
+ stderr,
386
+ timedOut: killedForTimeout || killedForStreamCap,
387
+ });
388
+ });
389
+ });
390
+ }
391
+ /**
392
+ * `loadSettings` throws on schema-invalid configs (e.g. legacy
393
+ * settings shape with no `hooks` block at all parses fine; but a
394
+ * settings file that is outright broken would crash the chain fire).
395
+ * Chains MUST NOT crash the dispatch — swallow + log to stderr so the
396
+ * caller proceeds.
397
+ */
398
+ function safeLoadSettings(root) {
399
+ try {
400
+ return loadSettings(root);
401
+ }
402
+ catch (error) {
403
+ process.stderr.write(`[pugi hook-chains] settings load failed: ${error.message}\n`);
404
+ // Build a minimal settings object that parses through the Zod
405
+ // schema. The shape is built by re-parsing an empty object which
406
+ // `loadSettings` already does for missing files.
407
+ return loadSettings('/this-path-does-not-exist-fallback');
408
+ }
409
+ }
410
+ //# sourceMappingURL=hook-chains.js.map
@@ -1318,6 +1318,7 @@ function parseArgs(argv) {
1318
1318
  // TTY). PUGI_NO_AUTO_* env vars provide a per-shell escape hatch
1319
1319
  // without needing к thread the flag through every invocation.
1320
1320
  noInit: process.env.PUGI_NO_AUTO_INIT === '1',
1321
+ live: false,
1321
1322
  noLogin: process.env.PUGI_NO_AUTO_LOGIN === '1',
1322
1323
  decompose: false,
1323
1324
  // β-headless: --no-tools default OFF so existing flag-free invocations
@@ -1430,6 +1431,9 @@ function parseArgs(argv) {
1430
1431
  // at the global level for consistency with --no-splash / --no-tool-stream.
1431
1432
  flags.noDefaults = true;
1432
1433
  }
1434
+ else if (arg === '--live') {
1435
+ flags.live = true;
1436
+ }
1433
1437
  else if (arg === '--ascii-only') {
1434
1438
  // Leak L33 — `pugi stickers --ascii-only` skips the Ink boxed
1435
1439
  // renderer. Parsed globally so the dispatcher can pass the flag
@@ -2150,6 +2154,7 @@ async function doctor(_args, flags, _session) {
2150
2154
  home: defaultDoctorHome(),
2151
2155
  env: process.env,
2152
2156
  json: flags.json,
2157
+ live: flags.live,
2153
2158
  writeOutput: (payload, text) => writeOutput(flags, payload, text),
2154
2159
  });
2155
2160
  }
@@ -53,6 +53,7 @@ import { probeBareMode } from '../../core/diagnostics/probes/bare-mode.js';
53
53
  import { probePugiMdHierarchy } from '../../core/diagnostics/probes/pugi-md.js';
54
54
  import { probeSandbox } from '../../core/diagnostics/probes/sandbox.js';
55
55
  import { probeHooks } from '../../core/diagnostics/probes/hooks.js';
56
+ import { probeEngineLive } from '../../core/diagnostics/probes/engine-live.js';
56
57
  /**
57
58
  * Default API URL when no PUGI_API_URL env override is set. Mirrors
58
59
  * the constant in `core/credentials.ts` (kept local to avoid an
@@ -250,6 +251,27 @@ export function buildDefaultProbes(ctx, options = {}) {
250
251
  }),
251
252
  },
252
253
  ];
254
+ // CEO P1 #22 (2026-05-29): ENGINE LIVE probe — end-to-end smoke
255
+ // against api.pugi.io. Only fires when `--live` is set so the
256
+ // default `pugi doctor` stays offline-safe.
257
+ if (options.live) {
258
+ probes.push({
259
+ name: 'ENGINE LIVE',
260
+ run: async () => probeEngineLive(ctx, {
261
+ resolveApiUrl: (env) => env['PUGI_API_URL'] ?? 'https://api.pugi.io',
262
+ resolveApiKey: (env) => {
263
+ const fromEnv = env['PUGI_API_KEY'];
264
+ if (fromEnv && fromEnv.length > 0)
265
+ return fromEnv;
266
+ const credential = resolveActiveCredential(env, ctx.home);
267
+ return credential?.apiKey ?? null;
268
+ },
269
+ fetchImpl,
270
+ now,
271
+ }),
272
+ timeoutMs: 20_000,
273
+ });
274
+ }
253
275
  return probes;
254
276
  }
255
277
  /**
@@ -266,6 +288,7 @@ export async function runDoctorCommand(ctx) {
266
288
  const probes = buildDefaultProbes(probeCtx, {
267
289
  ...(ctx.liveSessionId ? { liveSessionId: ctx.liveSessionId } : {}),
268
290
  ...(ctx.denialTracking ? { denialTracking: ctx.denialTracking } : {}),
291
+ ...(ctx.live ? { live: ctx.live } : {}),
269
292
  });
270
293
  const report = await runProbes(probes);
271
294
  // Defensive recompute: even though runProbes already computed the
@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
44
44
  * during import). When bumping the CLI version BOTH literals must be
45
45
  * updated; the release smoke-test (`pack:smoke`) verifies they agree.
46
46
  */
47
- export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.50');
47
+ export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.52');
48
48
  /**
49
49
  * Outbound: the CLI's installed semver. Read at request time by
50
50
  * `version-interceptor.ts` and injected on every `fetch` call.
@@ -33,6 +33,7 @@ import { isAbsolute, join, resolve } from 'node:path';
33
33
  import { spawn, spawnSync } from 'node:child_process';
34
34
  import { classifyBash } from '../core/bash-classifier.js';
35
35
  import { evaluateBashPermission } from '../core/permission.js';
36
+ import { writeAuditEvent } from '../core/audit/audit-trail.js';
36
37
  import { getJobRegistry, } from '../core/jobs/registry.js';
37
38
  import { recordToolCall, recordToolResult } from '../core/session.js';
38
39
  export const BASH_OUTPUT_CAP_BYTES = 32 * 1024;
@@ -95,6 +96,22 @@ export async function bashTool(input, ctx) {
95
96
  if (decision.decision !== 'allow') {
96
97
  const reason = `Permission ${decision.decision}: ${decision.reason}`;
97
98
  recordToolResult(ctx.session, toolCallId, 'error', reason);
99
+ // Wave 7 Sprint 1 #21 (2026-05-29): emit `permission_denied` to
100
+ // the tenant-wide audit trail. Truncate the cmd preview to 200
101
+ // chars so a long here-doc does not bloat the JSONL row; the
102
+ // session log keeps the full text for forensic replay.
103
+ writeAuditEvent({
104
+ event: 'permission_denied',
105
+ sessionId: ctx.session.id,
106
+ workspaceRoot: ctx.root,
107
+ data: {
108
+ tool: 'bash',
109
+ source,
110
+ decision: decision.decision,
111
+ reason: decision.reason,
112
+ cmdPreview: cmd.slice(0, 200),
113
+ },
114
+ });
98
115
  return {
99
116
  stdout: '',
100
117
  stderr: `Permission denied: ${decision.reason}`,
@@ -800,6 +817,20 @@ export function bashToolSync(input, ctx) {
800
817
  if (decision.decision !== 'allow') {
801
818
  const reason = `Permission ${decision.decision}: ${decision.reason}`;
802
819
  recordToolResult(ctx.session, toolCallId, 'error', reason);
820
+ // Wave 7 #21: mirror the async-path emission so sync callers
821
+ // (spawnSync fallback) produce the same tenant-wide audit trail.
822
+ writeAuditEvent({
823
+ event: 'permission_denied',
824
+ sessionId: ctx.session.id,
825
+ workspaceRoot: ctx.root,
826
+ data: {
827
+ tool: 'bash',
828
+ source,
829
+ decision: decision.decision,
830
+ reason: decision.reason,
831
+ cmdPreview: cmd.slice(0, 200),
832
+ },
833
+ });
803
834
  return {
804
835
  stdout: '',
805
836
  stderr: `Permission denied: ${decision.reason}`,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pugi/cli",
3
- "version": "0.1.0-beta.50",
3
+ "version": "0.1.0-beta.52",
4
4
  "description": "Pugi CLI - terminal-native software execution system",
5
5
  "homepage": "https://pugi.io",
6
6
  "repository": {
@@ -54,8 +54,8 @@
54
54
  "turndown": "^7.2.4",
55
55
  "undici": "^8.3.0",
56
56
  "zod": "^3.23.0",
57
- "@pugi/personas": "0.1.2",
58
- "@pugi/sdk": "0.1.0-beta.50"
57
+ "@pugi/sdk": "0.1.0-beta.52",
58
+ "@pugi/personas": "0.1.2"
59
59
  },
60
60
  "devDependencies": {
61
61
  "@types/node": "^22.0.0",