npm - @pugi/cli - Versions diffs - 0.1.0-beta.50 → 0.1.0-beta.52 - Mend

@pugi/cli 0.1.0-beta.50 → 0.1.0-beta.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/core/audit/audit-trail.js +194 -0
package/dist/core/diagnostics/probes/engine-live.js +46 -0
package/dist/core/engine/auto-compact.js +179 -0
package/dist/core/engine/budgets.js +57 -0
package/dist/core/engine/native-pugi.js +143 -1
package/dist/core/hook-chains.js +410 -0
package/dist/runtime/cli.js +5 -0
package/dist/runtime/commands/doctor.js +23 -0
package/dist/runtime/version.js +1 -1
package/dist/tools/bash.js +31 -0
package/package.json +3 -3

package/dist/core/audit/audit-trail.js ADDED Viewed

@@ -0,0 +1,194 @@
+/**
+ * Tenant-wide JSONL audit trail (Wave 7 Sprint 1 #21, 2026-05-29).
+ *
+ * Pugi already records every tool_call / tool_result in two places:
+ *
+ *   1. The global per-workspace log at `<workspace>/.pugi/events.jsonl`
+ *      (audit-replay source of truth; see `core/session.ts`).
+ *   2. The per-session mirror at
+ *      `<workspace>/.pugi/sessions/<sessionId>/events.jsonl`
+ *      (operator-friendly per-run copy, see `native-pugi.ts`).
+ *
+ * Both live under the workspace directory and disappear when the
+ * operator wipes the workspace or runs many ephemeral sandboxes.
+ * What's missing is a TENANT-wide structured audit log: a single
+ * append-only NDJSON stream per (tenant, workspace) pair that the
+ * operator (or a SOC pipeline) can tail across every session over
+ * the lifetime of the host.
+ *
+ * Spec (Wave 7 #21):
+ *
+ *   - Path: `~/.pugi/audit/<tenant>/<workspace-slug>-<hash>.jsonl`
+ *   - One JSON line per event with shared shape:
+ *     `{ ts, tenant, workspace, workspaceHash, event, sessionId, data }`
+ *   - Events covered: `tool_call`, `tool_result`, `dispatch_start`,
+ *     `dispatch_end`, `permission_denied`, `auto_compact`,
+ *     `budget_exhausted`.
+ *   - Append-only — no rotation logic. Operators wire `logrotate`
+ *     themselves if they want size caps.
+ *   - Opt-out: `PUGI_AUDIT_TRAIL_DISABLE=1`.
+ *   - Failures NEVER throw. Audit MUST NOT break a dispatch.
+ *   - Tenant fallback: when `PUGI_API_KEY` is unset, tenant is `local`.
+ *
+ * Why duplicate the per-session log on disk:
+ *
+ *   The per-session mirror clusters by `sessionId` (one dir per run).
+ *   To answer "what did this tenant DO across every session this week
+ *   from this workspace" an operator otherwise has to glob hundreds of
+ *   session dirs and merge by timestamp. The audit trail flattens that
+ *   into one tail-able stream per (tenant, workspace) — same shape an
+ *   ops pipeline would expect from a hosted log surface.
+ */
+import { appendFileSync, mkdirSync } from 'node:fs';
+import { createHash } from 'node:crypto';
+import { homedir } from 'node:os';
+import { basename, dirname, join, resolve } from 'node:path';
+/**
+ * Opt-out env var. Mirrors the convention every other Pugi feature uses
+ * (`PUGI_BARE`, `PUGI_AGENTMEMORY_RECALL_ENABLED=false`, etc.).
+ * Operators set this when they pipe the CLI through a sandbox that
+ * already captures audit upstream and they want to skip the duplicate.
+ */
+export const PUGI_AUDIT_TRAIL_DISABLE_VAR = 'PUGI_AUDIT_TRAIL_DISABLE';
+/**
+ * Tenant fallback used when the operator has not exported
+ * `PUGI_API_KEY`. The audit trail still flows — it just lives under
+ * `~/.pugi/audit/local/...` so a single-user workstation gets a useful
+ * forensic log without needing API-key plumbing.
+ */
+export const LOCAL_TENANT_FALLBACK = 'local';
+/**
+ * Sanitize the workspace basename to a safe filesystem slug:
+ * lowercase a-z + 0-9 + `-`. Anything else collapses to `-`. We avoid
+ * the empty case (root workspace) by falling back to `workspace`.
+ *
+ * Why not a hash here too: the hash is appended separately so two
+ * workspaces with the same basename (e.g. two clones of the same repo
+ * sitting in different parent dirs) get distinct files. The slug is
+ * the human-readable half operators eyeball at `ls ~/.pugi/audit/...`.
+ */
+export function sanitizeWorkspaceSlug(workspaceRoot) {
+    const base = basename(resolve(workspaceRoot));
+    const sanitized = base
+        .toLowerCase()
+        .replace(/[^a-z0-9-]+/g, '-')
+        .replace(/-+/g, '-')
+        .replace(/^-|-$/g, '');
+    return sanitized.length > 0 ? sanitized : 'workspace';
+}
+/**
+ * Stable, anonymous workspace handle. We use the FIRST 8 hex of
+ * sha256(workspaceRoot). 8 hex = 32 bits = ~4 billion buckets, more
+ * than enough to disambiguate `~/code/foo` from `~/other/foo` on the
+ * same host without leaking the absolute path through the file name.
+ *
+ * The hash is over the RESOLVED path so symlink trickery cannot point
+ * two different audit streams at the same file by accident.
+ */
+export function computeWorkspaceHash(workspaceRoot) {
+    return createHash('sha256')
+        .update(resolve(workspaceRoot))
+        .digest('hex')
+        .slice(0, 8);
+}
+/**
+ * Derive the tenant slug from `PUGI_API_KEY`. We hash the key (sha256,
+ * 12 hex prefix) rather than emitting the raw key — the audit trail is
+ * a plaintext file on the local FS and the tenant slug shows up in
+ * every path under `~/.pugi/audit/`. A truncated hash is enough to
+ * cluster every (tenant, workspace) over time without leaking the key
+ * if the operator accidentally `tar`s their `~/.pugi` for support.
+ *
+ * The hash is purely a CLI-local clustering key — the runtime backend
+ * has its own (different) tenant identifier and never sees this.
+ */
+export function resolveTenant(env = process.env) {
+    const key = env.PUGI_API_KEY?.trim();
+    if (!key)
+        return LOCAL_TENANT_FALLBACK;
+    // 12 hex = 48 bits — enough disambiguation for any realistic per-host
+    // tenant cardinality; still short enough for operators to eyeball at
+    // `ls ~/.pugi/audit/`.
+    return createHash('sha256').update(key).digest('hex').slice(0, 12);
+}
+/**
+ * Resolve the audit file path for a given (tenant, workspace) pair.
+ * Pure path arithmetic — the caller is responsible for `mkdir -p`
+ * before append (handled inside `writeAuditEvent`).
+ */
+export function resolveAuditPath(workspaceRoot, tenant, home = homedir()) {
+    const slug = sanitizeWorkspaceSlug(workspaceRoot);
+    const hash = computeWorkspaceHash(workspaceRoot);
+    return join(home, '.pugi', 'audit', tenant, `${slug}-${hash}.jsonl`);
+}
+/**
+ * Predicate: is the audit trail disabled via env opt-out?
+ *
+ * Accept `1`, `true`, `yes` (case-insensitive) as positive; anything
+ * else — including `0`, `false`, `''`, and the var being absent — keeps
+ * the trail enabled. Mirrors the convention used in `bare-mode/` and
+ * elsewhere in the CLI.
+ */
+export function isAuditDisabled(env = process.env) {
+    const raw = env[PUGI_AUDIT_TRAIL_DISABLE_VAR]?.trim().toLowerCase();
+    if (!raw)
+        return false;
+    return raw === '1' || raw === 'true' || raw === 'yes';
+}
+/**
+ * Append a single audit event to the per-tenant per-workspace NDJSON
+ * trail. Never throws — failures (FS unwritable, opt-out, malformed
+ * input) are silently swallowed so a misconfigured audit surface
+ * cannot break a dispatch. The engine adapter's existing per-session
+ * mirror remains intact as a redundant copy.
+ *
+ * Append-only: every call writes exactly one line. No rotation, no
+ * truncation. Operators wire `logrotate` if they want size caps.
+ *
+ * macOS hardening: we `mkdir -p` the parent dir on every call (cheap
+ * in practice — Node short-circuits when the dir exists) so a manual
+ * `rm -rf ~/.pugi/audit/<tenant>/` between runs does not turn the next
+ * append into ENOENT. The mode is `0o700` for the tenant dir and
+ * `0o600` for the JSONL file so curious users on a shared host cannot
+ * read another tenant's trail.
+ */
+export function writeAuditEvent(input) {
+    const env = input.env ?? process.env;
+    if (isAuditDisabled(env))
+        return;
+    try {
+        const tenant = (input.tenant?.trim() || resolveTenant(env)) || LOCAL_TENANT_FALLBACK;
+        const home = input.home ?? homedir();
+        const path = resolveAuditPath(input.workspaceRoot, tenant, home);
+        const now = input.now ? input.now() : new Date().toISOString();
+        const envelope = {
+            ts: now,
+            tenant,
+            workspace: sanitizeWorkspaceSlug(input.workspaceRoot),
+            workspaceHash: computeWorkspaceHash(input.workspaceRoot),
+            event: input.event,
+            sessionId: input.sessionId,
+            data: input.data,
+        };
+        try {
+            mkdirSync(dirname(path), { recursive: true, mode: 0o700 });
+        }
+        catch {
+            // mkdir failure is silent — the appendFileSync below will surface
+            // the real error and the outer catch swallows it. We still try
+            // the write so EEXIST on the dir (the only real path here) does
+            // not block the append.
+        }
+        appendFileSync(path, `${JSON.stringify(envelope)}\n`, {
+            encoding: 'utf8',
+            mode: 0o600,
+        });
+    }
+    catch {
+        // Audit failures must NEVER break a dispatch. The session log + the
+        // per-session mirror under `<workspace>/.pugi/` remain as redundant
+        // surfaces. A future telemetry pass can surface the failure count
+        // via the doctor probe; for now silent no-op is the contract.
+    }
+}
+//# sourceMappingURL=audit-trail.js.map

package/dist/core/diagnostics/probes/engine-live.js ADDED Viewed

@@ -0,0 +1,46 @@
+const LIVE_PROMPT = 'Reply with the single word OK.';
+const TIMEOUT_MS = 15_000;
+export async function probeEngineLive(ctx, deps) {
+    const apiKey = deps.resolveApiKey(ctx.env);
+    if (!apiKey) {
+        return { name: 'ENGINE LIVE', status: 'skipped', detail: 'no API key (run `pugi login` or set PUGI_API_KEY)' };
+    }
+    const apiUrl = deps.resolveApiUrl(ctx.env);
+    const startedAt = deps.now();
+    const url = (apiUrl.endsWith('/') ? apiUrl.slice(0, -1) : apiUrl) + '/api/pugi/engine';
+    try {
+        const res = await deps.fetchImpl(url, {
+            method: 'POST',
+            signal: AbortSignal.timeout(TIMEOUT_MS),
+            headers: { 'content-type': 'application/json', authorization: `Bearer ${apiKey}` },
+            body: JSON.stringify({
+                personaSlug: 'main',
+                command: 'explain',
+                messages: [{ role: 'user', content: LIVE_PROMPT }],
+                tools: [],
+                temperature: 0,
+                maxTokens: 32,
+            }),
+        });
+        const latencyMs = deps.now() - startedAt;
+        if (!res.ok) {
+            const body = await res.text().catch(() => '');
+            return { name: 'ENGINE LIVE', status: 'error', detail: `engine returned HTTP ${res.status}${body ? `: ${body.slice(0, 200)}` : ''}`, latencyMs };
+        }
+        const json = (await res.json().catch(() => null));
+        if (!json)
+            return { name: 'ENGINE LIVE', status: 'error', detail: 'engine returned 2xx but body is not JSON', latencyMs };
+        const model = typeof json['model'] === 'string' ? json['model'] : 'unknown';
+        const content = typeof json['content'] === 'string' ? json['content'] : '';
+        if (!content.toLowerCase().includes('ok')) {
+            return { name: 'ENGINE LIVE', status: 'warn', detail: `round-trip OK via ${model} (${latencyMs}ms) but reply did not contain expected token; got "${content.slice(0, 80)}"`, latencyMs };
+        }
+        return { name: 'ENGINE LIVE', status: 'ok', detail: `round-trip OK via ${model} (${latencyMs}ms)`, latencyMs };
+    }
+    catch (error) {
+        const latencyMs = deps.now() - startedAt;
+        const message = error instanceof Error ? error.message : String(error);
+        return { name: 'ENGINE LIVE', status: 'error', detail: `engine round-trip failed: ${message}`, latencyMs };
+    }
+}
+//# sourceMappingURL=engine-live.js.map

package/dist/core/engine/auto-compact.js ADDED Viewed

@@ -0,0 +1,179 @@
+/**
+ * Crude token-count heuristic mirroring `runEngineLoop`'s fallback
+ * accounting (transcript char count / 4). The CLI does not have access
+ * to a real tokenizer pre-flight — the runtime returns `usage.totalTokens`
+ * only on the server response, which is too late for our pre-turn gate.
+ * char/4 is in the right order of magnitude for English/TS and matches
+ * what the loop's own fallback uses on `tokensUsed === 0` upstream.
+ */
+export function estimateTranscriptTokens(messages) {
+    let chars = 0;
+    for (const m of messages) {
+        chars += m.content.length;
+        const calls = m.toolCalls ?? [];
+        for (const c of calls) {
+            chars += c.name.length + c.arguments.length;
+        }
+    }
+    return Math.ceil(chars / 4);
+}
+const FILE_TOOL_NAMES = new Set([
+    'read',
+    'write',
+    'edit',
+    'multi_edit',
+    'multiEdit',
+]);
+/**
+ * Walk the dropped slice and pull out tool-call metadata. We parse the
+ * `arguments` JSON best-effort — a bad parse is harmless here because
+ * the executor surfaced the canonical error to the model already; the
+ * gist just under-counts that one call.
+ */
+export function summarizeDroppedTurns(dropped) {
+    let toolCalls = 0;
+    let bashCalls = 0;
+    const files = new Set();
+    for (const m of dropped) {
+        if (m.role === 'assistant') {
+            const calls = m.toolCalls ?? [];
+            toolCalls += calls.length;
+            for (const c of calls) {
+                if (c.name === 'bash') {
+                    bashCalls += 1;
+                    continue;
+                }
+                if (FILE_TOOL_NAMES.has(c.name)) {
+                    const p = extractPath(c.arguments);
+                    if (p)
+                        files.add(p);
+                }
+            }
+        }
+    }
+    return {
+        toolCalls,
+        fileCount: files.size,
+        bashCalls,
+        messagesDropped: dropped.length,
+    };
+}
+function extractPath(rawArgs) {
+    if (!rawArgs)
+        return null;
+    try {
+        const parsed = JSON.parse(rawArgs);
+        if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+            const obj = parsed;
+            const path = obj['path'] ?? obj['filePath'];
+            if (typeof path === 'string' && path.length > 0)
+                return path;
+        }
+    }
+    catch {
+        return null;
+    }
+    return null;
+}
+/**
+ * Format the deterministic gist string spliced into the synthetic
+ * system message. Stable shape so spec assertions and operator
+ * logs do not drift turn-over-turn.
+ */
+export function renderAutoCompactSentinel(stats) {
+    return (`[auto-compact] Earlier turns ` +
+        `(${stats.toolCalls} tool calls, ${stats.fileCount} files read, ${stats.bashCalls} bash commands) ` +
+        `summarized to free transcript headroom. ` +
+        `Recent turns and the original task remain in context; ` +
+        `re-read any earlier file by name if you need its contents again.`);
+}
+/**
+ * Minimum transcript length (in messages) before compact is allowed.
+ * We always retain `system + user` (the first 2) + the last 2 turns,
+ * so anything <= 4 messages has nothing in the middle to drop.
+ * Compacting на 4-message transcript would either be a no-op or
+ * accidentally drop the user's original task.
+ */
+export const MIN_COMPACT_TRANSCRIPT_LENGTH = 5;
+/**
+ * Pure gate. Returns `compact` when ALL of:
+ *   - `config.enabled` is true
+ *   - estimated transcript tokens >= `thresholdRatio * maxTokens`
+ *   - transcript length >= 5 (need history to drop)
+ */
+export function evaluateAutoCompactDecision(input) {
+    const usedTokens = estimateTranscriptTokens(input.transcript);
+    if (!input.config.enabled) {
+        return { kind: 'skip', reason: 'disabled', usedTokens };
+    }
+    if (input.transcript.length < MIN_COMPACT_TRANSCRIPT_LENGTH) {
+        return { kind: 'skip', reason: 'transcript-too-short', usedTokens };
+    }
+    const thresholdTokens = Math.floor(input.config.thresholdRatio * input.maxTokens);
+    if (usedTokens < thresholdTokens) {
+        return { kind: 'skip', reason: 'below-threshold', usedTokens };
+    }
+    return { kind: 'compact', usedTokens, thresholdTokens };
+}
+/**
+ * Rewrite the transcript: keep the first two messages (system + user
+ * task), drop the middle (assistant + tool turns), insert a synthetic
+ * system sentinel summarizing what was dropped, then re-append the
+ * last 2 messages so the model has the most-recent tool result + its
+ * own last reply in full fidelity.
+ *
+ * Precondition: caller has already checked the decision is `compact`
+ * (length >= MIN_COMPACT_TRANSCRIPT_LENGTH). The function still guards
+ * with a defensive identity-return on shorter transcripts so a careless
+ * caller cannot corrupt the prefix.
+ */
+export function compactTranscript(transcript) {
+    const preUsedTokens = estimateTranscriptTokens(transcript);
+    if (transcript.length < MIN_COMPACT_TRANSCRIPT_LENGTH) {
+        return {
+            transcript: transcript.slice(),
+            droppedCount: 0,
+            gist: '',
+            stats: { toolCalls: 0, fileCount: 0, bashCalls: 0, messagesDropped: 0 },
+            preUsedTokens,
+            postUsedTokens: preUsedTokens,
+        };
+    }
+    // Always retain: index 0 (system) + index 1 (original user task) +
+    // last 2 messages. The middle slice is what gets summarised.
+    const head = transcript.slice(0, 2);
+    const tail = transcript.slice(-2);
+    const middle = transcript.slice(2, -2);
+    const stats = summarizeDroppedTurns(middle);
+    const gist = renderAutoCompactSentinel(stats);
+    const sentinelMessage = {
+        role: 'system',
+        content: gist,
+    };
+    const next = [...head, sentinelMessage, ...tail];
+    const postUsedTokens = estimateTranscriptTokens(next);
+    return {
+        transcript: next,
+        droppedCount: middle.length,
+        gist,
+        stats,
+        preUsedTokens,
+        postUsedTokens,
+    };
+}
+/**
+ * Convenience composer used by `runEngineLoop`: evaluate → compact in
+ * one shot. Returns `null` when the decision was `skip` so the loop
+ * driver can branch cheaply без destructuring two layers of records.
+ */
+export function maybeCompact(transcript, maxTokens, config) {
+    const decision = evaluateAutoCompactDecision({
+        transcript,
+        maxTokens,
+        config,
+    });
+    if (decision.kind === 'skip')
+        return null;
+    return compactTranscript(transcript);
+}
+//# sourceMappingURL=auto-compact.js.map

package/dist/core/engine/budgets.js CHANGED Viewed

@@ -1,3 +1,60 @@
+/**
+ * Auto-compact (mid-loop transcript summarization) default trip point as
+ * a fraction of the per-command `maxTokens` envelope. CEO P1 #14 (CC
+ * parity): when transcript char-count tokens cross 75% of the budget,
+ * the engine loop drops the middle turns and inserts a deterministic
+ * `[auto-compact]` sentinel so the loop can continue без the model
+ * tripping the `budget_exhausted` terminal status mid-build.
+ *
+ * Empirically — `pugi code "big refactor"` hits the 80k cap on turn 4-5
+ * and refuses to finish; `pugi fix` does the same at 50k. Auto-compact
+ * keeps the recent N turns + a one-line gist of the dropped tool calls
+ * so the model retains the most recent state without paying for the
+ * full prefix.
+ *
+ * Operators can opt out / retune via `.pugi/settings.json`:
+ *
+ *   {
+ *     "autoCompact": { "enabled": true, "thresholdRatio": 0.75 }
+ *   }
+ *
+ * Bad values fall back silently to the default — the engine loop never
+ * crashes on a malformed settings field (mirrors `resolveBudget`).
+ */
+export const AUTO_COMPACT_THRESHOLD_RATIO = 0.75;
+export const DEFAULT_AUTO_COMPACT_CONFIG = {
+    enabled: true,
+    thresholdRatio: AUTO_COMPACT_THRESHOLD_RATIO,
+};
+/**
+ * Pull the auto-compact override from `.pugi/settings.json`. Uses the
+ * same defensive-cast pattern as `readSettingsBudget` so an unknown
+ * field shape silently falls back к defaults (the gate is a comfort
+ * feature; a malformed settings line must not break the engine loop).
+ *
+ * Returns the merged config — caller never sees `undefined`.
+ */
+export function resolveAutoCompactConfig(settings) {
+    if (!settings)
+        return DEFAULT_AUTO_COMPACT_CONFIG;
+    const root = settings.autoCompact;
+    if (!root || typeof root !== 'object' || Array.isArray(root)) {
+        return DEFAULT_AUTO_COMPACT_CONFIG;
+    }
+    const r = root;
+    const enabledRaw = r['enabled'];
+    const thresholdRaw = r['thresholdRatio'];
+    const enabled = typeof enabledRaw === 'boolean'
+        ? enabledRaw
+        : DEFAULT_AUTO_COMPACT_CONFIG.enabled;
+    let thresholdRatio = DEFAULT_AUTO_COMPACT_CONFIG.thresholdRatio;
+    if (typeof thresholdRaw === 'number' && Number.isFinite(thresholdRaw)) {
+        if (thresholdRaw > 0 && thresholdRaw <= 1) {
+            thresholdRatio = thresholdRaw;
+        }
+    }
+    return { enabled, thresholdRatio };
+}
 /**
  * β1 defaults. Source of truth for the per-command budget envelope.
  * The runtime is allowed to look these up directly (no need to round

package/dist/core/engine/native-pugi.js CHANGED Viewed

@@ -6,7 +6,9 @@ import { FileReadCache } from '../file-cache.js';
 import { loadSettings } from '../settings.js';
 import { openSession, recordToolCall, recordToolResult } from '../session.js';
 import { prewarmRealDispatch } from '../subagents/dispatcher.js';
-import { resolveBudget } from './budgets.js';
+import { resolveAutoCompactConfig, resolveBudget } from './budgets.js';
+import { maybeCompact } from './auto-compact.js';
+import { writeAuditEvent } from '../audit/audit-trail.js';
 import { buildExecutor, buildToolsSchema } from './tool-bridge.js';
 import { personaSlugFor, systemPromptFor } from './prompts.js';
 import { CancellationToken } from '../repl/cancellation.js';
@@ -188,6 +190,13 @@ export class NativePugiEngineAdapter {
             // budget so a careless caller cannot disable the call-count
             // guard by setting only token count.
             const budget = resolveBudget(kind, settings, task.budget?.tokens ? { maxTokens: task.budget.tokens } : undefined);
+            // CEO P1 #14 (auto-compact, 2026-05-29): resolve the per-workspace
+            // override of the 75% threshold gate. Default is `{ enabled: true,
+            // thresholdRatio: 0.75 }`; operators kill it via
+            // `.pugi/settings.json::autoCompact.enabled = false` или retune the
+            // ratio. The resolved config is captured by the closure that
+            // `runEngineLoop` invokes pre-send on every turn.
+            const autoCompactConfig = resolveAutoCompactConfig(settings);
             // β3 streaming: pre-build the typed stream event queue so the hook
             // callbacks below can push live events that this async generator
             // yields IMMEDIATELY (instead of buffering until `runEngineLoop`
@@ -220,6 +229,23 @@ export class NativePugiEngineAdapter {
                 type: 'status',
                 message: `Pugi engine starting: kind=${kind} budget=${budget.maxToolCalls} calls / ${budget.maxTokens} tokens`,
             });
+            // Wave 7 Sprint 1 #21 (2026-05-29): emit `dispatch_start` to the
+            // tenant-wide audit trail at `~/.pugi/audit/<tenant>/<slug>-<hash>
+            // .jsonl`. Append-only, never throws — a misconfigured audit
+            // surface must not block a dispatch. The per-session mirror under
+            // `.pugi/sessions/<id>/events.jsonl` remains as a redundant copy.
+            writeAuditEvent({
+                event: 'dispatch_start',
+                sessionId: session.id,
+                workspaceRoot: root,
+                data: {
+                    kind,
+                    promptLength: task.prompt.length,
+                    maxToolCalls: budget.maxToolCalls,
+                    maxTokens: budget.maxTokens,
+                    model: this.options.model ?? null,
+                },
+            });
             // β5a R1+R4+R5+R6+P1 (2026-05-26): build the per-turn `<context>`
             // prefix and apply the intent marker so the model sees:
             //   1. cwd + open-files + per-dir-conventions block (R5+R6)
@@ -364,6 +390,39 @@ export class NativePugiEngineAdapter {
             // per-run log for operators and the cabinet UI (Sprint 2B).
             const sessionEventsPath = openSessionMirror(root, session.id);
             const hooks = {
+                // CEO P1 #14 (auto-compact, 2026-05-29): single operator-visible
+                // line on stderr — keep parity with Claude Code's
+                // `Compacted N turns into Y tokens; continuing.` message. We mirror
+                // the event into the session log + stream emitter as a `status`
+                // frame так that admin-api SSE consumers + the cabinet UI render
+                // it without a schema change.
+                onAutoCompact: (event) => {
+                    const pct = Math.round((event.preUsedTokens / Math.max(1, event.maxTokens)) * 100);
+                    const line = `engine: auto-compacted ${event.droppedCount} turns at ${event.preUsedTokens}/${event.maxTokens} (${pct}%)`;
+                    // Single-line stderr write — operator-visible per spec.
+                    process.stderr.write(`${line}\n`);
+                    emitStream({ type: 'status', message: line });
+                    appendSessionMirror(sessionEventsPath, {
+                        type: 'auto_compact',
+                        droppedCount: event.droppedCount,
+                        preUsedTokens: event.preUsedTokens,
+                        postUsedTokens: event.postUsedTokens,
+                        maxTokens: event.maxTokens,
+                        gist: event.gist,
+                    });
+                    // Wave 7 #21: tenant-wide audit trail mirror.
+                    writeAuditEvent({
+                        event: 'auto_compact',
+                        sessionId: session.id,
+                        workspaceRoot: root,
+                        data: {
+                            droppedCount: event.droppedCount,
+                            preUsedTokens: event.preUsedTokens,
+                            postUsedTokens: event.postUsedTokens,
+                            maxTokens: event.maxTokens,
+                        },
+                    });
+                },
                 onTurnStart: (turnIndex, messageCount) => {
                     const msg = `turn ${turnIndex + 1}: requesting model (transcript=${messageCount} messages)`;
                     emitStream({ type: 'status', message: msg });
@@ -453,6 +512,20 @@ export class NativePugiEngineAdapter {
                         callId: call.id,
                         argsPreview: call.arguments.slice(0, 200),
                     });
+                    // Wave 7 #21: tenant-wide audit trail mirror. Same payload
+                    // shape as the session mirror but flattened so a `jq` query
+                    // across all sessions for one (tenant, workspace) reads
+                    // cleanly.
+                    writeAuditEvent({
+                        event: 'tool_call',
+                        sessionId: session.id,
+                        workspaceRoot: root,
+                        data: {
+                            tool: call.name,
+                            callId: call.id,
+                            argsPreview: call.arguments.slice(0, 200),
+                        },
+                    });
                 },
                 onToolResult: (call, result) => {
                     const auditId = this.engineToolCallIds.get(call.id);
@@ -492,6 +565,18 @@ export class NativePugiEngineAdapter {
                         ok: result.ok,
                         summary: result.ok ? result.content.slice(0, 200) : result.error.slice(0, 200),
                     });
+                    // Wave 7 #21: tenant-wide audit trail mirror.
+                    writeAuditEvent({
+                        event: 'tool_result',
+                        sessionId: session.id,
+                        workspaceRoot: root,
+                        data: {
+                            tool: call.name,
+                            callId: call.id,
+                            ok: result.ok,
+                            summary: result.ok ? result.content.slice(0, 200) : result.error.slice(0, 200),
+                        },
+                    });
                 },
             };
             // β1b r1 (--allow-fetch / --allow-search wiring, 2026-05-26):
@@ -671,6 +756,14 @@ export class NativePugiEngineAdapter {
                         command: kind,
                         tag: dispatchTagFor(kind),
                         model: this.options.model,
+                        // CEO P1 #14 (auto-compact, 2026-05-29): pluggable compactor
+                        // hook. The SDK driver invokes this pre-`client.send` on every
+                        // turn. `maybeCompact` returns `null` below the 75% threshold
+                        // или when the transcript is too short to drop history — the
+                        // loop continues unchanged on the cold path. When it returns
+                        // a result, the driver swaps the transcript + fires the
+                        // `onAutoCompact` hook above which emits the stderr line.
+                        autoCompact: ({ transcript, maxTokens }) => maybeCompact(transcript, maxTokens, autoCompactConfig),
                     });
                 }
                 catch (err) {
@@ -696,6 +789,19 @@ export class NativePugiEngineAdapter {
             await loopPromise;
             if (loopError !== null) {
                 const message = loopError instanceof Error ? loopError.message : String(loopError);
+                // Wave 7 #21: surface the crash to the audit trail before
+                // returning. Mirrors the `failed` arm of the structured path
+                // below so a SOC pipeline sees one `dispatch_end` per dispatch
+                // regardless of which code path produced it.
+                writeAuditEvent({
+                    event: 'dispatch_end',
+                    sessionId: session.id,
+                    workspaceRoot: root,
+                    data: {
+                        status: 'crashed',
+                        error: message,
+                    },
+                });
                 yield {
                     type: 'result',
                     result: {
@@ -714,6 +820,12 @@ export class NativePugiEngineAdapter {
             if (finalOutcome === null) {
                 // Defensive — should never hit. `runEngineLoop` always either
                 // resolves with an outcome or throws (and we catch that above).
+                writeAuditEvent({
+                    event: 'dispatch_end',
+                    sessionId: session.id,
+                    workspaceRoot: root,
+                    data: { status: 'no_outcome' },
+                });
                 yield {
                     type: 'result',
                     result: {
@@ -756,6 +868,36 @@ export class NativePugiEngineAdapter {
                 filesChanged: filesChangedList,
                 reason: finalOutcome.reason,
             });
+            // Wave 7 #21: emit `dispatch_end` to the tenant-wide audit trail.
+            // When the loop tripped the per-command budget we ALSO emit a
+            // dedicated `budget_exhausted` row so a SOC query can filter on
+            // event type alone without parsing the `data.status` payload.
+            if (finalOutcome.status === 'budget_exhausted') {
+                writeAuditEvent({
+                    event: 'budget_exhausted',
+                    sessionId: session.id,
+                    workspaceRoot: root,
+                    data: {
+                        toolCallCount: finalOutcome.toolCallCount,
+                        turnsUsed: finalOutcome.turnsUsed,
+                        tokensUsed: finalOutcome.tokensUsed,
+                        reason: finalOutcome.reason ?? null,
+                    },
+                });
+            }
+            writeAuditEvent({
+                event: 'dispatch_end',
+                sessionId: session.id,
+                workspaceRoot: root,
+                data: {
+                    status: finalOutcome.status,
+                    toolCallCount: finalOutcome.toolCallCount,
+                    turnsUsed: finalOutcome.turnsUsed,
+                    tokensUsed: finalOutcome.tokensUsed,
+                    filesChangedCount: filesChangedList.length,
+                    reason: finalOutcome.reason ?? null,
+                },
+            });
             yield {
                 type: 'result',
                 result: {

package/dist/core/hook-chains.js ADDED Viewed

@@ -0,0 +1,410 @@
+/**
+ * Hook chains — `PostToolUseFailure` + `TaskCompleted` first-class events.
+ *
+ * Wave 7 #24 (CEO P1, 2026-05-29). Pugi already had primitives for
+ * `PostToolUseFailure` in the legacy `core/hooks.ts` registry but no
+ * way to declare a **fallback chain** that fires automatically when a
+ * tool dispatch fails or when an entire `pugi <command>` dispatch
+ * completes. Claude Code exposes both events as first-class hook
+ * sources; this module wires them on the Pugi side.
+ *
+ * Why a fresh module rather than extending `core/hooks.ts`:
+ *   - The legacy registry reads flat `hooks: [{event, match, run}]`
+ *     arrays from `~/.pugi/hooks.json` + `.pugi/hooks.json`. Chains
+ *     are declared in a nested `hooks: { EventName: [{matcher, run}] }`
+ *     shape from `.pugi/settings.json` — a different file, different
+ *     shape, and importantly a different opt-out grammar (per-chain
+ *     `enabled: false`). Mixing the two readers would force every
+ *     legacy caller to learn the chain shape.
+ *   - Chains have richer payloads (TaskCompleted ships durationMs,
+ *     toolCalls, filesChanged). Stuffing them through the legacy
+ *     stdin/env contract would silently break v1 scripts that key on
+ *     the existing payload shape.
+ *   - Chain failures MUST NOT crash the dispatch (the model already
+ *     finished). The legacy registry's `onFailure: 'block'` semantics
+ *     would propagate the error — chains explicitly swallow it and
+ *     log instead.
+ *
+ * The chain runner is intentionally self-contained: no dependency on
+ * `HookRegistry`, no trust-ledger gating (project settings are already
+ * trusted by virtue of being in the workspace's `.pugi/` dir, same as
+ * persona prompts), and a single `firePostToolUseFailureChain` /
+ * `fireTaskCompletedChain` entry point per event.
+ *
+ * Brand voice: ASCII only, no emoji, no em-dashes.
+ */
+import { spawn } from 'node:child_process';
+import { z } from 'zod';
+import { loadSettings } from './settings.js';
+/**
+ * Per-hook matcher. Both keys are optional and AND together — a hook
+ * with `matcher: { tool: 'write' }` fires for every PostToolUseFailure
+ * whose tool is `write`, regardless of command. A hook with no matcher
+ * at all (or `matcher: {}`) fires on every event of its kind.
+ */
+const chainMatcherSchema = z
+    .object({
+    /** Compare against the failing tool name (PostToolUseFailure). */
+    tool: z.string().min(1).optional(),
+    /** Compare against the completed command name (TaskCompleted). */
+    command: z.string().min(1).optional(),
+})
+    .strict();
+/**
+ * Single chain entry. `run` is an array so chain authors can express
+ * "do A, then B, then C" without needing shell `&&` chaining — the
+ * runner executes the list sequentially and short-circuits on the
+ * first non-zero exit (within ONE entry; the next entry in the chain
+ * is still attempted).
+ *
+ * Hard-coded constants:
+ *   - Default timeout per command: 10s (matches Claude Code).
+ *   - Max timeout: 60s (matches legacy hooks.ts cap).
+ *   - Stream cap: 256KB per stream (smaller than legacy 1MB because
+ *     chain hooks are post-hoc notifications, not blocking gates).
+ */
+const chainEntrySchema = z
+    .object({
+    matcher: chainMatcherSchema.optional(),
+    run: z.array(z.string().min(1)).min(1),
+    timeoutMs: z.number().int().positive().max(60_000).optional(),
+})
+    .strict();
+/**
+ * Per-event chain config. `enabled: false` short-circuits the whole
+ * chain (opt-out switch the operator can flip without deleting the
+ * entries). `entries` defaults to empty so the operator can declare
+ * `{ enabled: false }` to suppress a chain inherited from a parent
+ * config layer later.
+ */
+const chainConfigSchema = z
+    .object({
+    enabled: z.boolean().default(true),
+    entries: z.array(chainEntrySchema).default([]),
+})
+    .strict();
+/**
+ * Settings-level `hooks` block. We accept BOTH:
+ *   1. The canonical nested form
+ *      `hooks: { PostToolUseFailure: { enabled, entries: [...] } }`
+ *   2. The shorthand array form
+ *      `hooks: { PostToolUseFailure: [...entries] }`
+ *      — which the CEO spec uses as the example. The reader normalises
+ *      it to the canonical form before handing back to callers.
+ *
+ * Both forms are passed through `parseHookChains` below so callers see
+ * one shape regardless of which the operator typed.
+ */
+const settingsHooksShape = z
+    .object({
+    PostToolUseFailure: z
+        .union([chainConfigSchema, z.array(chainEntrySchema)])
+        .optional(),
+    TaskCompleted: z
+        .union([chainConfigSchema, z.array(chainEntrySchema)])
+        .optional(),
+})
+    .strict()
+    .optional();
+const DEFAULT_TIMEOUT_MS = 10_000;
+const SIGKILL_GRACE_MS = 2_000;
+const STREAM_CAP_BYTES = 256 * 1024;
+/**
+ * Resolve chain config from settings. Accepts both the canonical
+ * `{ enabled, entries }` shape and the shorthand array form the CEO
+ * spec example uses.
+ */
+export function resolveChain(settings, event) {
+    // The `hooks` block on settings is loaded via `parseHookChains`
+    // below — we re-parse on every call so a settings reload picks up
+    // edits without restart-loop accounting. Cost is one Zod parse per
+    // dispatch, dwarfed by the network round-trip the loop just finished.
+    const rawHooks = settings.hooks;
+    const parsed = settingsHooksShape.safeParse(rawHooks);
+    if (!parsed.success || !parsed.data) {
+        return { enabled: true, entries: [] };
+    }
+    const raw = parsed.data[event];
+    if (!raw)
+        return { enabled: true, entries: [] };
+    if (Array.isArray(raw)) {
+        return { enabled: true, entries: raw };
+    }
+    return raw;
+}
+/**
+ * Parse the `hooks` section out of a raw settings JSON value. Exposed
+ * for tests and the settings reload path. Returns the normalised shape
+ * (both events present, canonical `{enabled, entries}` form) so the
+ * caller can assert structure without re-running Zod.
+ */
+export function parseHookChains(rawHooks) {
+    const parsed = settingsHooksShape.safeParse(rawHooks);
+    const out = {
+        PostToolUseFailure: { enabled: true, entries: [] },
+        TaskCompleted: { enabled: true, entries: [] },
+    };
+    if (!parsed.success || !parsed.data)
+        return out;
+    const pf = parsed.data.PostToolUseFailure;
+    if (pf) {
+        out.PostToolUseFailure = Array.isArray(pf) ? { enabled: true, entries: pf } : pf;
+    }
+    const tc = parsed.data.TaskCompleted;
+    if (tc) {
+        out.TaskCompleted = Array.isArray(tc) ? { enabled: true, entries: tc } : tc;
+    }
+    return out;
+}
+/**
+ * Decide whether a chain entry matches a `PostToolUseFailure` payload.
+ * An entry with no matcher (or only the empty object) fires on every
+ * failure of its event kind.
+ */
+function matchesPostToolUseFailure(entry, payload) {
+    const m = entry.matcher;
+    if (!m || (!m.tool && !m.command))
+        return true;
+    if (m.command !== undefined) {
+        // `command` does not apply to PostToolUseFailure semantically;
+        // ignore it so a chain author who pastes both keys does not get
+        // a silent no-match. Documented in matcher schema comments.
+    }
+    if (m.tool !== undefined && m.tool !== payload.toolName)
+        return false;
+    return true;
+}
+/**
+ * Decide whether a chain entry matches a `TaskCompleted` payload.
+ */
+function matchesTaskCompleted(entry, payload) {
+    const m = entry.matcher;
+    if (!m || (!m.tool && !m.command))
+        return true;
+    if (m.command !== undefined && m.command !== payload.command)
+        return false;
+    return true;
+}
+/**
+ * Fire the `PostToolUseFailure` chain. Best-effort: a hook crash, a
+ * spawn error, or a timeout never propagates back to the caller. The
+ * caller observes the structured outcome and may log / surface it.
+ *
+ * Caller responsibility: the engine's tool-bridge invokes this AFTER
+ * the existing legacy `PostToolUseFailure` registry fire so legacy
+ * scripts and chain entries both run on the same failure (the legacy
+ * registry is the strict per-tool gate; chains are the fallback hook).
+ */
+export async function firePostToolUseFailureChain(workspaceRoot, payload, settingsOverride) {
+    const settings = settingsOverride ?? safeLoadSettings(workspaceRoot);
+    const config = resolveChain(settings, 'PostToolUseFailure');
+    if (!config.enabled) {
+        return { event: 'PostToolUseFailure', enabled: false, entries: [] };
+    }
+    const out = {
+        event: 'PostToolUseFailure',
+        enabled: true,
+        entries: [],
+    };
+    for (const entry of config.entries) {
+        const matched = matchesPostToolUseFailure(entry, payload);
+        if (!matched) {
+            out.entries.push({ matched: false, commands: [] });
+            continue;
+        }
+        const commands = await runChainEntry(entry, {
+            PUGI_HOOK_EVENT: 'PostToolUseFailure',
+            PUGI_HOOK_PAYLOAD: JSON.stringify(payload),
+            PUGI_HOOK_TOOL: payload.toolName,
+            PUGI_HOOK_EXIT_CODE: String(payload.exitCode),
+        });
+        out.entries.push({ matched: true, commands });
+    }
+    return out;
+}
+/**
+ * Fire the `TaskCompleted` chain. Same best-effort semantics as the
+ * PostToolUseFailure chain above. Caller invokes this at the dispatch
+ * exit in `native-pugi.ts` regardless of completion status (the
+ * payload carries `exitCode` so the hook can branch on success vs
+ * failure).
+ */
+export async function fireTaskCompletedChain(workspaceRoot, payload, settingsOverride) {
+    const settings = settingsOverride ?? safeLoadSettings(workspaceRoot);
+    const config = resolveChain(settings, 'TaskCompleted');
+    if (!config.enabled) {
+        return { event: 'TaskCompleted', enabled: false, entries: [] };
+    }
+    const out = {
+        event: 'TaskCompleted',
+        enabled: true,
+        entries: [],
+    };
+    for (const entry of config.entries) {
+        const matched = matchesTaskCompleted(entry, payload);
+        if (!matched) {
+            out.entries.push({ matched: false, commands: [] });
+            continue;
+        }
+        const commands = await runChainEntry(entry, {
+            PUGI_HOOK_EVENT: 'TaskCompleted',
+            PUGI_HOOK_PAYLOAD: JSON.stringify(payload),
+            PUGI_HOOK_COMMAND: payload.command,
+            PUGI_HOOK_EXIT_CODE: String(payload.exitCode),
+            PUGI_HOOK_DURATION_MS: String(payload.durationMs),
+            PUGI_HOOK_TOOL_CALLS: String(payload.toolCalls),
+        });
+        out.entries.push({ matched: true, commands });
+    }
+    return out;
+}
+/** Run every command in one chain entry sequentially. */
+async function runChainEntry(entry, baseEnv) {
+    const timeoutMs = entry.timeoutMs ?? DEFAULT_TIMEOUT_MS;
+    const results = [];
+    for (const command of entry.run) {
+        try {
+            const result = await executeOne(command, timeoutMs, baseEnv);
+            results.push(result);
+        }
+        catch (error) {
+            // Spawn failure (binary missing, fork limit hit, etc). Swallow
+            // and record so the chain marches on to the next command. The
+            // outer fireXxxChain caller is wrapped in another try/catch in
+            // the integration callsite for defense in depth.
+            results.push({
+                command,
+                exitCode: -1,
+                durationMs: 0,
+                stdout: '',
+                stderr: `chain spawn error: ${error.message}`,
+                timedOut: false,
+            });
+        }
+    }
+    return results;
+}
+/** Spawn ONE shell command and capture the result. */
+function executeOne(command, timeoutMs, baseEnv) {
+    return new Promise((resolvePromise) => {
+        const startedAt = Date.now();
+        const child = spawn('/bin/sh', ['-c', command], {
+            env: { ...process.env, ...baseEnv },
+            stdio: ['pipe', 'pipe', 'pipe'],
+        });
+        let stdout = '';
+        let stderr = '';
+        let killedForTimeout = false;
+        let killedForStreamCap = false;
+        let sigKillTimer;
+        const enforceStreamCap = () => {
+            if (killedForStreamCap)
+                return;
+            if (stdout.length + stderr.length <= STREAM_CAP_BYTES)
+                return;
+            killedForStreamCap = true;
+            child.kill('SIGTERM');
+            if (!sigKillTimer) {
+                sigKillTimer = setTimeout(() => {
+                    if (!child.killed)
+                        child.kill('SIGKILL');
+                }, SIGKILL_GRACE_MS);
+                if (sigKillTimer.unref)
+                    sigKillTimer.unref();
+            }
+        };
+        child.stdout?.on('data', (chunk) => {
+            if (killedForStreamCap)
+                return;
+            stdout += chunk.toString('utf8');
+            enforceStreamCap();
+        });
+        child.stderr?.on('data', (chunk) => {
+            if (killedForStreamCap)
+                return;
+            stderr += chunk.toString('utf8');
+            enforceStreamCap();
+        });
+        // Close stdin so commands that block on read (cat, jq) do not hang
+        // the chain. Errors are swallowed because the child may have
+        // already exited before we write.
+        if (child.stdin) {
+            child.stdin.on('error', () => {
+                /* ignore EPIPE */
+            });
+            child.stdin.end();
+        }
+        const timer = setTimeout(() => {
+            killedForTimeout = true;
+            child.kill('SIGTERM');
+            sigKillTimer = setTimeout(() => {
+                if (!child.killed)
+                    child.kill('SIGKILL');
+            }, SIGKILL_GRACE_MS);
+            if (sigKillTimer.unref)
+                sigKillTimer.unref();
+        }, timeoutMs);
+        if (timer.unref)
+            timer.unref();
+        child.on('error', (error) => {
+            clearTimeout(timer);
+            if (sigKillTimer)
+                clearTimeout(sigKillTimer);
+            resolvePromise({
+                command,
+                exitCode: -1,
+                durationMs: Date.now() - startedAt,
+                stdout,
+                stderr: stderr || `hook spawn error: ${error.message}`,
+                timedOut: false,
+            });
+        });
+        child.on('close', (code, signal) => {
+            clearTimeout(timer);
+            if (sigKillTimer)
+                clearTimeout(sigKillTimer);
+            const durationMs = Date.now() - startedAt;
+            let exitCode;
+            if (code !== null) {
+                exitCode = code;
+            }
+            else if (signal === 'SIGTERM') {
+                exitCode = -15;
+            }
+            else if (signal === 'SIGKILL') {
+                exitCode = -9;
+            }
+            else {
+                exitCode = -1;
+            }
+            resolvePromise({
+                command,
+                exitCode,
+                durationMs,
+                stdout,
+                stderr,
+                timedOut: killedForTimeout || killedForStreamCap,
+            });
+        });
+    });
+}
+/**
+ * `loadSettings` throws on schema-invalid configs (e.g. legacy
+ * settings shape with no `hooks` block at all parses fine; but a
+ * settings file that is outright broken would crash the chain fire).
+ * Chains MUST NOT crash the dispatch — swallow + log to stderr so the
+ * caller proceeds.
+ */
+function safeLoadSettings(root) {
+    try {
+        return loadSettings(root);
+    }
+    catch (error) {
+        process.stderr.write(`[pugi hook-chains] settings load failed: ${error.message}\n`);
+        // Build a minimal settings object that parses through the Zod
+        // schema. The shape is built by re-parsing an empty object which
+        // `loadSettings` already does for missing files.
+        return loadSettings('/this-path-does-not-exist-fallback');
+    }
+}
+//# sourceMappingURL=hook-chains.js.map

package/dist/runtime/cli.js CHANGED Viewed

@@ -1318,6 +1318,7 @@ function parseArgs(argv) {
         // TTY). PUGI_NO_AUTO_* env vars provide a per-shell escape hatch
         // without needing к thread the flag through every invocation.
         noInit: process.env.PUGI_NO_AUTO_INIT === '1',
+        live: false,
         noLogin: process.env.PUGI_NO_AUTO_LOGIN === '1',
         decompose: false,
         // β-headless: --no-tools default OFF so existing flag-free invocations
@@ -1430,6 +1431,9 @@ function parseArgs(argv) {
             // at the global level for consistency with --no-splash / --no-tool-stream.
             flags.noDefaults = true;
         }
+        else if (arg === '--live') {
+            flags.live = true;
+        }
         else if (arg === '--ascii-only') {
             // Leak L33 — `pugi stickers --ascii-only` skips the Ink boxed
             // renderer. Parsed globally so the dispatcher can pass the flag
@@ -2150,6 +2154,7 @@ async function doctor(_args, flags, _session) {
         home: defaultDoctorHome(),
         env: process.env,
         json: flags.json,
+        live: flags.live,
         writeOutput: (payload, text) => writeOutput(flags, payload, text),
     });
 }

package/dist/runtime/commands/doctor.js CHANGED Viewed

@@ -53,6 +53,7 @@ import { probeBareMode } from '../../core/diagnostics/probes/bare-mode.js';
 import { probePugiMdHierarchy } from '../../core/diagnostics/probes/pugi-md.js';
 import { probeSandbox } from '../../core/diagnostics/probes/sandbox.js';
 import { probeHooks } from '../../core/diagnostics/probes/hooks.js';
+import { probeEngineLive } from '../../core/diagnostics/probes/engine-live.js';
 /**
  * Default API URL when no PUGI_API_URL env override is set. Mirrors
  * the constant in `core/credentials.ts` (kept local to avoid an
@@ -250,6 +251,27 @@ export function buildDefaultProbes(ctx, options = {}) {
             }),
         },
     ];
+    // CEO P1 #22 (2026-05-29): ENGINE LIVE probe — end-to-end smoke
+    // against api.pugi.io. Only fires when `--live` is set so the
+    // default `pugi doctor` stays offline-safe.
+    if (options.live) {
+        probes.push({
+            name: 'ENGINE LIVE',
+            run: async () => probeEngineLive(ctx, {
+                resolveApiUrl: (env) => env['PUGI_API_URL'] ?? 'https://api.pugi.io',
+                resolveApiKey: (env) => {
+                    const fromEnv = env['PUGI_API_KEY'];
+                    if (fromEnv && fromEnv.length > 0)
+                        return fromEnv;
+                    const credential = resolveActiveCredential(env, ctx.home);
+                    return credential?.apiKey ?? null;
+                },
+                fetchImpl,
+                now,
+            }),
+            timeoutMs: 20_000,
+        });
+    }
     return probes;
 }
 /**
@@ -266,6 +288,7 @@ export async function runDoctorCommand(ctx) {
     const probes = buildDefaultProbes(probeCtx, {
         ...(ctx.liveSessionId ? { liveSessionId: ctx.liveSessionId } : {}),
         ...(ctx.denialTracking ? { denialTracking: ctx.denialTracking } : {}),
+        ...(ctx.live ? { live: ctx.live } : {}),
     });
     const report = await runProbes(probes);
     // Defensive recompute: even though runProbes already computed the

package/dist/runtime/version.js CHANGED Viewed

@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
  * during import). When bumping the CLI version BOTH literals must be
  * updated; the release smoke-test (`pack:smoke`) verifies they agree.
  */
-export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.50');
+export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.52');
 /**
  * Outbound: the CLI's installed semver. Read at request time by
  * `version-interceptor.ts` and injected on every `fetch` call.

package/dist/tools/bash.js CHANGED Viewed

@@ -33,6 +33,7 @@ import { isAbsolute, join, resolve } from 'node:path';
 import { spawn, spawnSync } from 'node:child_process';
 import { classifyBash } from '../core/bash-classifier.js';
 import { evaluateBashPermission } from '../core/permission.js';
+import { writeAuditEvent } from '../core/audit/audit-trail.js';
 import { getJobRegistry, } from '../core/jobs/registry.js';
 import { recordToolCall, recordToolResult } from '../core/session.js';
 export const BASH_OUTPUT_CAP_BYTES = 32 * 1024;
@@ -95,6 +96,22 @@ export async function bashTool(input, ctx) {
     if (decision.decision !== 'allow') {
         const reason = `Permission ${decision.decision}: ${decision.reason}`;
         recordToolResult(ctx.session, toolCallId, 'error', reason);
+        // Wave 7 Sprint 1 #21 (2026-05-29): emit `permission_denied` to
+        // the tenant-wide audit trail. Truncate the cmd preview to 200
+        // chars so a long here-doc does not bloat the JSONL row; the
+        // session log keeps the full text for forensic replay.
+        writeAuditEvent({
+            event: 'permission_denied',
+            sessionId: ctx.session.id,
+            workspaceRoot: ctx.root,
+            data: {
+                tool: 'bash',
+                source,
+                decision: decision.decision,
+                reason: decision.reason,
+                cmdPreview: cmd.slice(0, 200),
+            },
+        });
         return {
             stdout: '',
             stderr: `Permission denied: ${decision.reason}`,
@@ -800,6 +817,20 @@ export function bashToolSync(input, ctx) {
     if (decision.decision !== 'allow') {
         const reason = `Permission ${decision.decision}: ${decision.reason}`;
         recordToolResult(ctx.session, toolCallId, 'error', reason);
+        // Wave 7 #21: mirror the async-path emission so sync callers
+        // (spawnSync fallback) produce the same tenant-wide audit trail.
+        writeAuditEvent({
+            event: 'permission_denied',
+            sessionId: ctx.session.id,
+            workspaceRoot: ctx.root,
+            data: {
+                tool: 'bash',
+                source,
+                decision: decision.decision,
+                reason: decision.reason,
+                cmdPreview: cmd.slice(0, 200),
+            },
+        });
         return {
             stdout: '',
             stderr: `Permission denied: ${decision.reason}`,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pugi/cli",
-  "version": "0.1.0-beta.50",
+  "version": "0.1.0-beta.52",
   "description": "Pugi CLI - terminal-native software execution system",
   "homepage": "https://pugi.io",
   "repository": {
@@ -54,8 +54,8 @@
     "turndown": "^7.2.4",
     "undici": "^8.3.0",
     "zod": "^3.23.0",
-    "@pugi/personas": "0.1.2",
-    "@pugi/sdk": "0.1.0-beta.50"
+    "@pugi/sdk": "0.1.0-beta.52",
+    "@pugi/personas": "0.1.2"
   },
   "devDependencies": {
     "@types/node": "^22.0.0",