npm - @pugi/cli - Versions diffs - 0.1.0-beta.50 → 0.1.0-beta.51 - Mend

@pugi/cli 0.1.0-beta.50 → 0.1.0-beta.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/core/engine/auto-compact.js +179 -0
package/dist/core/engine/budgets.js +57 -0
package/dist/core/engine/native-pugi.js +38 -1
package/dist/runtime/version.js +1 -1
package/package.json +3 -3

package/dist/core/engine/auto-compact.js ADDED Viewed

@@ -0,0 +1,179 @@
+/**
+ * Crude token-count heuristic mirroring `runEngineLoop`'s fallback
+ * accounting (transcript char count / 4). The CLI does not have access
+ * to a real tokenizer pre-flight — the runtime returns `usage.totalTokens`
+ * only on the server response, which is too late for our pre-turn gate.
+ * char/4 is in the right order of magnitude for English/TS and matches
+ * what the loop's own fallback uses on `tokensUsed === 0` upstream.
+ */
+export function estimateTranscriptTokens(messages) {
+    let chars = 0;
+    for (const m of messages) {
+        chars += m.content.length;
+        const calls = m.toolCalls ?? [];
+        for (const c of calls) {
+            chars += c.name.length + c.arguments.length;
+        }
+    }
+    return Math.ceil(chars / 4);
+}
+const FILE_TOOL_NAMES = new Set([
+    'read',
+    'write',
+    'edit',
+    'multi_edit',
+    'multiEdit',
+]);
+/**
+ * Walk the dropped slice and pull out tool-call metadata. We parse the
+ * `arguments` JSON best-effort — a bad parse is harmless here because
+ * the executor surfaced the canonical error to the model already; the
+ * gist just under-counts that one call.
+ */
+export function summarizeDroppedTurns(dropped) {
+    let toolCalls = 0;
+    let bashCalls = 0;
+    const files = new Set();
+    for (const m of dropped) {
+        if (m.role === 'assistant') {
+            const calls = m.toolCalls ?? [];
+            toolCalls += calls.length;
+            for (const c of calls) {
+                if (c.name === 'bash') {
+                    bashCalls += 1;
+                    continue;
+                }
+                if (FILE_TOOL_NAMES.has(c.name)) {
+                    const p = extractPath(c.arguments);
+                    if (p)
+                        files.add(p);
+                }
+            }
+        }
+    }
+    return {
+        toolCalls,
+        fileCount: files.size,
+        bashCalls,
+        messagesDropped: dropped.length,
+    };
+}
+function extractPath(rawArgs) {
+    if (!rawArgs)
+        return null;
+    try {
+        const parsed = JSON.parse(rawArgs);
+        if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+            const obj = parsed;
+            const path = obj['path'] ?? obj['filePath'];
+            if (typeof path === 'string' && path.length > 0)
+                return path;
+        }
+    }
+    catch {
+        return null;
+    }
+    return null;
+}
+/**
+ * Format the deterministic gist string spliced into the synthetic
+ * system message. Stable shape so spec assertions and operator
+ * logs do not drift turn-over-turn.
+ */
+export function renderAutoCompactSentinel(stats) {
+    return (`[auto-compact] Earlier turns ` +
+        `(${stats.toolCalls} tool calls, ${stats.fileCount} files read, ${stats.bashCalls} bash commands) ` +
+        `summarized to free transcript headroom. ` +
+        `Recent turns and the original task remain in context; ` +
+        `re-read any earlier file by name if you need its contents again.`);
+}
+/**
+ * Minimum transcript length (in messages) before compact is allowed.
+ * We always retain `system + user` (the first 2) + the last 2 turns,
+ * so anything <= 4 messages has nothing in the middle to drop.
+ * Compacting на 4-message transcript would either be a no-op or
+ * accidentally drop the user's original task.
+ */
+export const MIN_COMPACT_TRANSCRIPT_LENGTH = 5;
+/**
+ * Pure gate. Returns `compact` when ALL of:
+ *   - `config.enabled` is true
+ *   - estimated transcript tokens >= `thresholdRatio * maxTokens`
+ *   - transcript length >= 5 (need history to drop)
+ */
+export function evaluateAutoCompactDecision(input) {
+    const usedTokens = estimateTranscriptTokens(input.transcript);
+    if (!input.config.enabled) {
+        return { kind: 'skip', reason: 'disabled', usedTokens };
+    }
+    if (input.transcript.length < MIN_COMPACT_TRANSCRIPT_LENGTH) {
+        return { kind: 'skip', reason: 'transcript-too-short', usedTokens };
+    }
+    const thresholdTokens = Math.floor(input.config.thresholdRatio * input.maxTokens);
+    if (usedTokens < thresholdTokens) {
+        return { kind: 'skip', reason: 'below-threshold', usedTokens };
+    }
+    return { kind: 'compact', usedTokens, thresholdTokens };
+}
+/**
+ * Rewrite the transcript: keep the first two messages (system + user
+ * task), drop the middle (assistant + tool turns), insert a synthetic
+ * system sentinel summarizing what was dropped, then re-append the
+ * last 2 messages so the model has the most-recent tool result + its
+ * own last reply in full fidelity.
+ *
+ * Precondition: caller has already checked the decision is `compact`
+ * (length >= MIN_COMPACT_TRANSCRIPT_LENGTH). The function still guards
+ * with a defensive identity-return on shorter transcripts so a careless
+ * caller cannot corrupt the prefix.
+ */
+export function compactTranscript(transcript) {
+    const preUsedTokens = estimateTranscriptTokens(transcript);
+    if (transcript.length < MIN_COMPACT_TRANSCRIPT_LENGTH) {
+        return {
+            transcript: transcript.slice(),
+            droppedCount: 0,
+            gist: '',
+            stats: { toolCalls: 0, fileCount: 0, bashCalls: 0, messagesDropped: 0 },
+            preUsedTokens,
+            postUsedTokens: preUsedTokens,
+        };
+    }
+    // Always retain: index 0 (system) + index 1 (original user task) +
+    // last 2 messages. The middle slice is what gets summarised.
+    const head = transcript.slice(0, 2);
+    const tail = transcript.slice(-2);
+    const middle = transcript.slice(2, -2);
+    const stats = summarizeDroppedTurns(middle);
+    const gist = renderAutoCompactSentinel(stats);
+    const sentinelMessage = {
+        role: 'system',
+        content: gist,
+    };
+    const next = [...head, sentinelMessage, ...tail];
+    const postUsedTokens = estimateTranscriptTokens(next);
+    return {
+        transcript: next,
+        droppedCount: middle.length,
+        gist,
+        stats,
+        preUsedTokens,
+        postUsedTokens,
+    };
+}
+/**
+ * Convenience composer used by `runEngineLoop`: evaluate → compact in
+ * one shot. Returns `null` when the decision was `skip` so the loop
+ * driver can branch cheaply без destructuring two layers of records.
+ */
+export function maybeCompact(transcript, maxTokens, config) {
+    const decision = evaluateAutoCompactDecision({
+        transcript,
+        maxTokens,
+        config,
+    });
+    if (decision.kind === 'skip')
+        return null;
+    return compactTranscript(transcript);
+}
+//# sourceMappingURL=auto-compact.js.map

package/dist/core/engine/budgets.js CHANGED Viewed

@@ -1,3 +1,60 @@
+/**
+ * Auto-compact (mid-loop transcript summarization) default trip point as
+ * a fraction of the per-command `maxTokens` envelope. CEO P1 #14 (CC
+ * parity): when transcript char-count tokens cross 75% of the budget,
+ * the engine loop drops the middle turns and inserts a deterministic
+ * `[auto-compact]` sentinel so the loop can continue без the model
+ * tripping the `budget_exhausted` terminal status mid-build.
+ *
+ * Empirically — `pugi code "big refactor"` hits the 80k cap on turn 4-5
+ * and refuses to finish; `pugi fix` does the same at 50k. Auto-compact
+ * keeps the recent N turns + a one-line gist of the dropped tool calls
+ * so the model retains the most recent state without paying for the
+ * full prefix.
+ *
+ * Operators can opt out / retune via `.pugi/settings.json`:
+ *
+ *   {
+ *     "autoCompact": { "enabled": true, "thresholdRatio": 0.75 }
+ *   }
+ *
+ * Bad values fall back silently to the default — the engine loop never
+ * crashes on a malformed settings field (mirrors `resolveBudget`).
+ */
+export const AUTO_COMPACT_THRESHOLD_RATIO = 0.75;
+export const DEFAULT_AUTO_COMPACT_CONFIG = {
+    enabled: true,
+    thresholdRatio: AUTO_COMPACT_THRESHOLD_RATIO,
+};
+/**
+ * Pull the auto-compact override from `.pugi/settings.json`. Uses the
+ * same defensive-cast pattern as `readSettingsBudget` so an unknown
+ * field shape silently falls back к defaults (the gate is a comfort
+ * feature; a malformed settings line must not break the engine loop).
+ *
+ * Returns the merged config — caller never sees `undefined`.
+ */
+export function resolveAutoCompactConfig(settings) {
+    if (!settings)
+        return DEFAULT_AUTO_COMPACT_CONFIG;
+    const root = settings.autoCompact;
+    if (!root || typeof root !== 'object' || Array.isArray(root)) {
+        return DEFAULT_AUTO_COMPACT_CONFIG;
+    }
+    const r = root;
+    const enabledRaw = r['enabled'];
+    const thresholdRaw = r['thresholdRatio'];
+    const enabled = typeof enabledRaw === 'boolean'
+        ? enabledRaw
+        : DEFAULT_AUTO_COMPACT_CONFIG.enabled;
+    let thresholdRatio = DEFAULT_AUTO_COMPACT_CONFIG.thresholdRatio;
+    if (typeof thresholdRaw === 'number' && Number.isFinite(thresholdRaw)) {
+        if (thresholdRaw > 0 && thresholdRaw <= 1) {
+            thresholdRatio = thresholdRaw;
+        }
+    }
+    return { enabled, thresholdRatio };
+}
 /**
  * β1 defaults. Source of truth for the per-command budget envelope.
  * The runtime is allowed to look these up directly (no need to round

package/dist/core/engine/native-pugi.js CHANGED Viewed

@@ -6,7 +6,8 @@ import { FileReadCache } from '../file-cache.js';
 import { loadSettings } from '../settings.js';
 import { openSession, recordToolCall, recordToolResult } from '../session.js';
 import { prewarmRealDispatch } from '../subagents/dispatcher.js';
-import { resolveBudget } from './budgets.js';
+import { resolveAutoCompactConfig, resolveBudget } from './budgets.js';
+import { maybeCompact } from './auto-compact.js';
 import { buildExecutor, buildToolsSchema } from './tool-bridge.js';
 import { personaSlugFor, systemPromptFor } from './prompts.js';
 import { CancellationToken } from '../repl/cancellation.js';
@@ -188,6 +189,13 @@ export class NativePugiEngineAdapter {
             // budget so a careless caller cannot disable the call-count
             // guard by setting only token count.
             const budget = resolveBudget(kind, settings, task.budget?.tokens ? { maxTokens: task.budget.tokens } : undefined);
+            // CEO P1 #14 (auto-compact, 2026-05-29): resolve the per-workspace
+            // override of the 75% threshold gate. Default is `{ enabled: true,
+            // thresholdRatio: 0.75 }`; operators kill it via
+            // `.pugi/settings.json::autoCompact.enabled = false` или retune the
+            // ratio. The resolved config is captured by the closure that
+            // `runEngineLoop` invokes pre-send on every turn.
+            const autoCompactConfig = resolveAutoCompactConfig(settings);
             // β3 streaming: pre-build the typed stream event queue so the hook
             // callbacks below can push live events that this async generator
             // yields IMMEDIATELY (instead of buffering until `runEngineLoop`
@@ -364,6 +372,27 @@ export class NativePugiEngineAdapter {
             // per-run log for operators and the cabinet UI (Sprint 2B).
             const sessionEventsPath = openSessionMirror(root, session.id);
             const hooks = {
+                // CEO P1 #14 (auto-compact, 2026-05-29): single operator-visible
+                // line on stderr — keep parity with Claude Code's
+                // `Compacted N turns into Y tokens; continuing.` message. We mirror
+                // the event into the session log + stream emitter as a `status`
+                // frame так that admin-api SSE consumers + the cabinet UI render
+                // it without a schema change.
+                onAutoCompact: (event) => {
+                    const pct = Math.round((event.preUsedTokens / Math.max(1, event.maxTokens)) * 100);
+                    const line = `engine: auto-compacted ${event.droppedCount} turns at ${event.preUsedTokens}/${event.maxTokens} (${pct}%)`;
+                    // Single-line stderr write — operator-visible per spec.
+                    process.stderr.write(`${line}\n`);
+                    emitStream({ type: 'status', message: line });
+                    appendSessionMirror(sessionEventsPath, {
+                        type: 'auto_compact',
+                        droppedCount: event.droppedCount,
+                        preUsedTokens: event.preUsedTokens,
+                        postUsedTokens: event.postUsedTokens,
+                        maxTokens: event.maxTokens,
+                        gist: event.gist,
+                    });
+                },
                 onTurnStart: (turnIndex, messageCount) => {
                     const msg = `turn ${turnIndex + 1}: requesting model (transcript=${messageCount} messages)`;
                     emitStream({ type: 'status', message: msg });
@@ -671,6 +700,14 @@ export class NativePugiEngineAdapter {
                         command: kind,
                         tag: dispatchTagFor(kind),
                         model: this.options.model,
+                        // CEO P1 #14 (auto-compact, 2026-05-29): pluggable compactor
+                        // hook. The SDK driver invokes this pre-`client.send` on every
+                        // turn. `maybeCompact` returns `null` below the 75% threshold
+                        // или when the transcript is too short to drop history — the
+                        // loop continues unchanged on the cold path. When it returns
+                        // a result, the driver swaps the transcript + fires the
+                        // `onAutoCompact` hook above which emits the stderr line.
+                        autoCompact: ({ transcript, maxTokens }) => maybeCompact(transcript, maxTokens, autoCompactConfig),
                     });
                 }
                 catch (err) {

package/dist/runtime/version.js CHANGED Viewed

@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
  * during import). When bumping the CLI version BOTH literals must be
  * updated; the release smoke-test (`pack:smoke`) verifies they agree.
  */
-export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.50');
+export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.51');
 /**
  * Outbound: the CLI's installed semver. Read at request time by
  * `version-interceptor.ts` and injected on every `fetch` call.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pugi/cli",
-  "version": "0.1.0-beta.50",
+  "version": "0.1.0-beta.51",
   "description": "Pugi CLI - terminal-native software execution system",
   "homepage": "https://pugi.io",
   "repository": {
@@ -54,8 +54,8 @@
     "turndown": "^7.2.4",
     "undici": "^8.3.0",
     "zod": "^3.23.0",
-    "@pugi/personas": "0.1.2",
-    "@pugi/sdk": "0.1.0-beta.50"
+    "@pugi/sdk": "0.1.0-beta.51",
+    "@pugi/personas": "0.1.2"
   },
   "devDependencies": {
     "@types/node": "^22.0.0",