npm - @pugi/cli - Versions diffs - 0.1.0-beta.97 → 0.1.0-beta.99 - Mend

@pugi/cli 0.1.0-beta.97 → 0.1.0-beta.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/core/engine/budgets.js +15 -6
package/dist/core/engine/intensity.js +11 -6
package/dist/core/repl/session.js +72 -1
package/dist/core/subagents/dispatcher.js +14 -9
package/dist/runtime/version.js +1 -1
package/package.json +3 -3

package/dist/core/engine/budgets.js CHANGED Viewed

@@ -75,12 +75,21 @@ export const beta1DefaultBudgets = {
     // real per-call token use is ~30-40% lower than legacy. Bump headroom
     // so multi-file refactors no longer trip the cap. Anvil clamps per-call
     // max_tokens to 128k (PR) so the engine envelope still safe.
-    fix: { maxTokens: 80_000, maxToolCalls: 20 },
-    code: { maxTokens: 120_000, maxToolCalls: 25 },
-    build: { maxTokens: 200_000, maxToolCalls: 30 },
-    plan: { maxTokens: 200_000, maxToolCalls: 8 },
-    explain: { maxTokens: 60_000, maxToolCalls: 10 },
-    review_triple: { maxTokens: 100_000, maxToolCalls: 10 },
+    // CEO escalation 2026-06-05 round 2: "почему другие модели пишут
+    // днями? ты ограничиваешь токены?" — the upstream / Codex / Aider have
+    // NO per-task budget cap. They auto-compact at ~75% threshold and
+    // continue indefinitely. Pugi's 400K cap was still a regression vs
+    // industry expectation. Raise к the HARD_MAX_TOKENS ceiling (5M) for
+    // code/fix/build so the engine loop effectively runs until completion
+    // or until the operator aborts. Auto-compact (#14, shipped) reclaims
+    // headroom mid-loop. Token cost still tracked + tier-gated upstream;
+    // this lifts only the in-process loop ceiling.
+    fix: { maxTokens: 2_000_000, maxToolCalls: 200 },
+    code: { maxTokens: 5_000_000, maxToolCalls: 500 },
+    build: { maxTokens: 5_000_000, maxToolCalls: 500 },
+    plan: { maxTokens: 500_000, maxToolCalls: 30 },
+    explain: { maxTokens: 200_000, maxToolCalls: 20 },
+    review_triple: { maxTokens: 300_000, maxToolCalls: 20 },
 };
 /**
  * Hard upper bounds. Anything above this is treated as user error

package/dist/core/engine/intensity.js CHANGED Viewed

@@ -49,26 +49,31 @@ const PROFILES = {
         allowParallelAgents: false,
         maxParallelAgents: 0,
     },
+    // CEO 2026-06-05: 80K standard exhausted React multi-file build mid-
+    // turn (120K hardcoded budget). Customers compare to the upstream = 200K
+    // context per session. Bump standard к 200K so default REPL doesn't
+    // trip mid-build; deep к 500K for complex multi-file refactors;
+    // marathon к 1.5M for long-running autonomous work.
     standard: {
         level: 'standard',
-        maxTurns: 15,
-        budgetTokens: 80_000,
+        maxTurns: 30,
+        budgetTokens: 200_000,
         modelTag: 'standard',
         allowParallelAgents: false,
         maxParallelAgents: 0,
     },
     deep: {
         level: 'deep',
-        maxTurns: 50,
-        budgetTokens: 200_000,
+        maxTurns: 80,
+        budgetTokens: 500_000,
         modelTag: 'standard',
         allowParallelAgents: true,
         maxParallelAgents: 3,
     },
     marathon: {
         level: 'marathon',
-        maxTurns: 200,
-        budgetTokens: 800_000,
+        maxTurns: 300,
+        budgetTokens: 1_500_000,
         modelTag: 'heavy',
         allowParallelAgents: true,
         maxParallelAgents: 3,

package/dist/core/repl/session.js CHANGED Viewed

@@ -2831,6 +2831,24 @@ export class ReplSession {
         try {
             if (useDirectEngine) {
                 const persona = personaSlugFor('code');
+                // PR C (PUGI-538-FU): thread the recent conversation
+                // into the engine prompt so multi-turn refinements work. Without
+                // this, the engine sees only the literal current brief — a
+                // follow-up like "react" after "сделай крестики нолики" arrives
+                // as a bare "react" with no prior context, and the engine ships
+                // arbitrary nonsense or asks again ("нет конкретного feature
+                // request"). The CEO reproduction 2026-06-05 (Python tic-tac-toe
+                // shipped когда customer wanted React браузер game, then engine
+                // claimed "нет feature request" on the correction turn) is
+                // exactly this gap.
+                //
+                // Display channels (system line, transcript) keep using the bare
+                // `brief` for UX cleanliness; only the engine's task.prompt gets
+                // the full conversational context via the new `enginePrompt`
+                // field. Engine-bridge falls back to brief when enginePrompt is
+                // undefined (server-emitted parser-built tags), preserving the
+                // legacy behaviour for those surfaces.
+                const enginePrompt = this.buildEnginePromptWithContext(brief);
                 const tag = {
                     command: 'code',
                     brief,
@@ -2842,6 +2860,7 @@ export class ReplSession {
                     signature: signatureForToolRoute('code', persona, brief),
                     start: 0,
                     end: 0,
+                    ...(enginePrompt !== brief ? { enginePrompt } : {}),
                 };
                 await this.runEngineBridge(tag);
             }
@@ -2862,6 +2881,54 @@ export class ReplSession {
             this.markDispatchFailed('post_brief_failed');
         }
     }
+    /**
+     * PR C (PUGI-538-FU): build the engine prompt with recent
+     * conversation context prepended. The current brief is preserved as
+     * the explicit "Current request:" terminal so the engine knows what
+     * the user is asking right now, while the prior turns give it the
+     * stack/framework/format hints from earlier in the dialog.
+     *
+     * Returns `brief` unchanged when there is no prior conversation —
+     * the empty preamble would just waste tokens.
+     *
+     * Window policy: last 4 conversational exchanges (operator + persona
+     * pairs), text truncated к 400 chars per row. Drops the trailing
+     * operator row if it matches `brief` (which has already been appended
+     * to the transcript by `appendOperatorLine` at line 3429 above and
+     * would otherwise duplicate inside the prompt).
+     *
+     * Doc strings stay в English per repo convention; the rendered
+     * preamble uses neutral English labels ("User", "Pugi") so the
+     * engine's model treats it as standard transcript context rather
+     * than a localized field name.
+     */
+    buildEnginePromptWithContext(brief) {
+        const MAX_TURNS = 4;
+        const MAX_ROW_CHARS = 400;
+        const conversational = this.state.transcript.filter((r) => r.source === 'operator' || r.source === 'persona');
+        if (conversational.length === 0)
+            return brief;
+        // Take the last MAX_TURNS * 2 rows (each turn = 1 operator + 1 persona).
+        const recent = conversational.slice(-(MAX_TURNS * 2));
+        // Drop trailing operator row when it equals the brief we're about
+        // to dispatch — the brief is the "current request" and already
+        // landed in the transcript via `appendOperatorLine` earlier in
+        // `dispatchBrief`. Including it twice would confuse the engine.
+        const lastRow = recent[recent.length - 1];
+        const trimmed = lastRow && lastRow.source === 'operator' && lastRow.text === brief
+            ? recent.slice(0, -1)
+            : recent;
+        if (trimmed.length === 0)
+            return brief;
+        const lines = trimmed.map((r) => {
+            const role = r.source === 'operator' ? 'User' : 'Pugi';
+            const truncated = r.text.length > MAX_ROW_CHARS
+                ? r.text.slice(0, MAX_ROW_CHARS) + '...'
+                : r.text;
+            return `- ${role}: ${truncated}`;
+        });
+        return `Recent conversation:\n${lines.join('\n')}\n\nCurrent request: ${brief}`;
+    }
     /**
      * : reset the FSM to `idle` after a terminal transition so the
      * next brief can start. The FSM does not allow direct
@@ -4137,7 +4204,11 @@ export class ReplSession {
             result = await bridge({
                 command: tag.command,
                 persona: tag.persona,
-                brief: tag.brief,
+                // PR C (PUGI-538-FU): prefer the contextualized
+                // engine prompt when the direct-engine path set it. Falls back
+                // к the bare brief for parser-built tags from the server-emitted
+                // envelope path (no conversation context available there).
+                brief: tag.enginePrompt ?? tag.brief,
                 bridgeId,
                 signal: abort.signal,
                 onEvent,

package/dist/core/subagents/dispatcher.js CHANGED Viewed

@@ -142,16 +142,21 @@ const DENY_ALL_WRITES_READONLY = Object.freeze([
 /* ------------------------------------------------------------------ */
 /* Default budgets                                                   */
 /* ------------------------------------------------------------------ */
+// CEO escalation 2026-06-05: 120K coder budget exhausted mid-React-
+// build (120214 > 120000). Match the engine-level `code` task bump
+// (apps/pugi-cli/src/core/engine/budgets.ts:149 — 400K). Subagent
+// dispatches inherit the upstream caller's headroom, so this needs
+// to track the engine envelope.
 const DEFAULT_BUDGETS = Object.freeze({
-    orchestrator: { tokens: 200_000, dollars: 5, wallClockMs: 600_000 },
-    architect: { tokens: 80_000, dollars: 2, wallClockMs: 300_000 },
-    coder: { tokens: 120_000, dollars: 3, wallClockMs: 600_000 },
-    verifier: { tokens: 60_000, dollars: 2, wallClockMs: 300_000 },
-    reviewer: { tokens: 80_000, dollars: 2, wallClockMs: 300_000 },
-    researcher: { tokens: 60_000, dollars: 1.5, wallClockMs: 300_000 },
-    release: { tokens: 40_000, dollars: 1, wallClockMs: 180_000 },
-    devops: { tokens: 60_000, dollars: 2, wallClockMs: 300_000 },
-    design_qa: { tokens: 60_000, dollars: 1.5, wallClockMs: 300_000 },
+    orchestrator: { tokens: 400_000, dollars: 8, wallClockMs: 900_000 },
+    architect: { tokens: 200_000, dollars: 4, wallClockMs: 600_000 },
+    coder: { tokens: 400_000, dollars: 8, wallClockMs: 900_000 },
+    verifier: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
+    reviewer: { tokens: 200_000, dollars: 4, wallClockMs: 600_000 },
+    researcher: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
+    release: { tokens: 80_000, dollars: 2, wallClockMs: 300_000 },
+    devops: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
+    design_qa: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
 });
 /**
  * Resolve the effective budget for a dispatch by merging task overrides

package/dist/runtime/version.js CHANGED Viewed

@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
  * during import). When bumping the CLI version BOTH literals must be
  * updated; the release smoke-test (`pack:smoke`) verifies they agree.
  */
-export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.97');
+export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.99');
 /**
  * Outbound: the CLI's installed semver. Read at request time by
  * `version-interceptor.ts` and injected on every `fetch` call.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pugi/cli",
-  "version": "0.1.0-beta.97",
+  "version": "0.1.0-beta.99",
   "description": "Pugi CLI - terminal-native software execution system",
   "homepage": "https://pugi.io",
   "repository": {
@@ -62,8 +62,8 @@
     "undici": "^8.3.0",
     "which": "^6.0.0",
     "zod": "^3.23.0",
-    "@pugi/personas": "0.1.2",
-    "@pugi/sdk": "0.1.0-beta.97"
+    "@pugi/sdk": "0.1.0-beta.99",
+    "@pugi/personas": "0.1.2"
   },
   "devDependencies": {
     "@types/node": "^22.0.0",