@pugi/cli 0.1.0-beta.97 → 0.1.0-beta.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -75,12 +75,21 @@ export const beta1DefaultBudgets = {
75
75
  // real per-call token use is ~30-40% lower than legacy. Bump headroom
76
76
  // so multi-file refactors no longer trip the cap. Anvil clamps per-call
77
77
  // max_tokens to 128k (PR) so the engine envelope still safe.
78
- fix: { maxTokens: 80_000, maxToolCalls: 20 },
79
- code: { maxTokens: 120_000, maxToolCalls: 25 },
80
- build: { maxTokens: 200_000, maxToolCalls: 30 },
81
- plan: { maxTokens: 200_000, maxToolCalls: 8 },
82
- explain: { maxTokens: 60_000, maxToolCalls: 10 },
83
- review_triple: { maxTokens: 100_000, maxToolCalls: 10 },
78
+ // CEO escalation 2026-06-05 round 2: "почему другие модели пишут
79
+ // днями? ты ограничиваешь токены?" the upstream / Codex / Aider have
80
+ // NO per-task budget cap. They auto-compact at ~75% threshold and
81
+ // continue indefinitely. Pugi's 400K cap was still a regression vs
82
+ // industry expectation. Raise к the HARD_MAX_TOKENS ceiling (5M) for
83
+ // code/fix/build so the engine loop effectively runs until completion
84
+ // or until the operator aborts. Auto-compact (#14, shipped) reclaims
85
+ // headroom mid-loop. Token cost still tracked + tier-gated upstream;
86
+ // this lifts only the in-process loop ceiling.
87
+ fix: { maxTokens: 2_000_000, maxToolCalls: 200 },
88
+ code: { maxTokens: 5_000_000, maxToolCalls: 500 },
89
+ build: { maxTokens: 5_000_000, maxToolCalls: 500 },
90
+ plan: { maxTokens: 500_000, maxToolCalls: 30 },
91
+ explain: { maxTokens: 200_000, maxToolCalls: 20 },
92
+ review_triple: { maxTokens: 300_000, maxToolCalls: 20 },
84
93
  };
85
94
  /**
86
95
  * Hard upper bounds. Anything above this is treated as user error
@@ -49,26 +49,31 @@ const PROFILES = {
49
49
  allowParallelAgents: false,
50
50
  maxParallelAgents: 0,
51
51
  },
52
+ // CEO 2026-06-05: 80K standard exhausted React multi-file build mid-
53
+ // turn (120K hardcoded budget). Customers compare to the upstream = 200K
54
+ // context per session. Bump standard к 200K so default REPL doesn't
55
+ // trip mid-build; deep к 500K for complex multi-file refactors;
56
+ // marathon к 1.5M for long-running autonomous work.
52
57
  standard: {
53
58
  level: 'standard',
54
- maxTurns: 15,
55
- budgetTokens: 80_000,
59
+ maxTurns: 30,
60
+ budgetTokens: 200_000,
56
61
  modelTag: 'standard',
57
62
  allowParallelAgents: false,
58
63
  maxParallelAgents: 0,
59
64
  },
60
65
  deep: {
61
66
  level: 'deep',
62
- maxTurns: 50,
63
- budgetTokens: 200_000,
67
+ maxTurns: 80,
68
+ budgetTokens: 500_000,
64
69
  modelTag: 'standard',
65
70
  allowParallelAgents: true,
66
71
  maxParallelAgents: 3,
67
72
  },
68
73
  marathon: {
69
74
  level: 'marathon',
70
- maxTurns: 200,
71
- budgetTokens: 800_000,
75
+ maxTurns: 300,
76
+ budgetTokens: 1_500_000,
72
77
  modelTag: 'heavy',
73
78
  allowParallelAgents: true,
74
79
  maxParallelAgents: 3,
@@ -2831,6 +2831,24 @@ export class ReplSession {
2831
2831
  try {
2832
2832
  if (useDirectEngine) {
2833
2833
  const persona = personaSlugFor('code');
2834
+ // PR C (PUGI-538-FU): thread the recent conversation
2835
+ // into the engine prompt so multi-turn refinements work. Without
2836
+ // this, the engine sees only the literal current brief — a
2837
+ // follow-up like "react" after "сделай крестики нолики" arrives
2838
+ // as a bare "react" with no prior context, and the engine ships
2839
+ // arbitrary nonsense or asks again ("нет конкретного feature
2840
+ // request"). The CEO reproduction 2026-06-05 (Python tic-tac-toe
2841
+ // shipped когда customer wanted React браузер game, then engine
2842
+ // claimed "нет feature request" on the correction turn) is
2843
+ // exactly this gap.
2844
+ //
2845
+ // Display channels (system line, transcript) keep using the bare
2846
+ // `brief` for UX cleanliness; only the engine's task.prompt gets
2847
+ // the full conversational context via the new `enginePrompt`
2848
+ // field. Engine-bridge falls back to brief when enginePrompt is
2849
+ // undefined (server-emitted parser-built tags), preserving the
2850
+ // legacy behaviour for those surfaces.
2851
+ const enginePrompt = this.buildEnginePromptWithContext(brief);
2834
2852
  const tag = {
2835
2853
  command: 'code',
2836
2854
  brief,
@@ -2842,6 +2860,7 @@ export class ReplSession {
2842
2860
  signature: signatureForToolRoute('code', persona, brief),
2843
2861
  start: 0,
2844
2862
  end: 0,
2863
+ ...(enginePrompt !== brief ? { enginePrompt } : {}),
2845
2864
  };
2846
2865
  await this.runEngineBridge(tag);
2847
2866
  }
@@ -2862,6 +2881,54 @@ export class ReplSession {
2862
2881
  this.markDispatchFailed('post_brief_failed');
2863
2882
  }
2864
2883
  }
2884
+ /**
2885
+ * PR C (PUGI-538-FU): build the engine prompt with recent
2886
+ * conversation context prepended. The current brief is preserved as
2887
+ * the explicit "Current request:" terminal so the engine knows what
2888
+ * the user is asking right now, while the prior turns give it the
2889
+ * stack/framework/format hints from earlier in the dialog.
2890
+ *
2891
+ * Returns `brief` unchanged when there is no prior conversation —
2892
+ * the empty preamble would just waste tokens.
2893
+ *
2894
+ * Window policy: last 4 conversational exchanges (operator + persona
2895
+ * pairs), text truncated к 400 chars per row. Drops the trailing
2896
+ * operator row if it matches `brief` (which has already been appended
2897
+ * to the transcript by `appendOperatorLine` at line 3429 above and
2898
+ * would otherwise duplicate inside the prompt).
2899
+ *
2900
+ * Doc strings stay в English per repo convention; the rendered
2901
+ * preamble uses neutral English labels ("User", "Pugi") so the
2902
+ * engine's model treats it as standard transcript context rather
2903
+ * than a localized field name.
2904
+ */
2905
+ buildEnginePromptWithContext(brief) {
2906
+ const MAX_TURNS = 4;
2907
+ const MAX_ROW_CHARS = 400;
2908
+ const conversational = this.state.transcript.filter((r) => r.source === 'operator' || r.source === 'persona');
2909
+ if (conversational.length === 0)
2910
+ return brief;
2911
+ // Take the last MAX_TURNS * 2 rows (each turn = 1 operator + 1 persona).
2912
+ const recent = conversational.slice(-(MAX_TURNS * 2));
2913
+ // Drop trailing operator row when it equals the brief we're about
2914
+ // to dispatch — the brief is the "current request" and already
2915
+ // landed in the transcript via `appendOperatorLine` earlier in
2916
+ // `dispatchBrief`. Including it twice would confuse the engine.
2917
+ const lastRow = recent[recent.length - 1];
2918
+ const trimmed = lastRow && lastRow.source === 'operator' && lastRow.text === brief
2919
+ ? recent.slice(0, -1)
2920
+ : recent;
2921
+ if (trimmed.length === 0)
2922
+ return brief;
2923
+ const lines = trimmed.map((r) => {
2924
+ const role = r.source === 'operator' ? 'User' : 'Pugi';
2925
+ const truncated = r.text.length > MAX_ROW_CHARS
2926
+ ? r.text.slice(0, MAX_ROW_CHARS) + '...'
2927
+ : r.text;
2928
+ return `- ${role}: ${truncated}`;
2929
+ });
2930
+ return `Recent conversation:\n${lines.join('\n')}\n\nCurrent request: ${brief}`;
2931
+ }
2865
2932
  /**
2866
2933
  * : reset the FSM to `idle` after a terminal transition so the
2867
2934
  * next brief can start. The FSM does not allow direct
@@ -4137,7 +4204,11 @@ export class ReplSession {
4137
4204
  result = await bridge({
4138
4205
  command: tag.command,
4139
4206
  persona: tag.persona,
4140
- brief: tag.brief,
4207
+ // PR C (PUGI-538-FU): prefer the contextualized
4208
+ // engine prompt when the direct-engine path set it. Falls back
4209
+ // к the bare brief for parser-built tags from the server-emitted
4210
+ // envelope path (no conversation context available there).
4211
+ brief: tag.enginePrompt ?? tag.brief,
4141
4212
  bridgeId,
4142
4213
  signal: abort.signal,
4143
4214
  onEvent,
@@ -142,16 +142,21 @@ const DENY_ALL_WRITES_READONLY = Object.freeze([
142
142
  /* ------------------------------------------------------------------ */
143
143
  /* Default budgets */
144
144
  /* ------------------------------------------------------------------ */
145
+ // CEO escalation 2026-06-05: 120K coder budget exhausted mid-React-
146
+ // build (120214 > 120000). Match the engine-level `code` task bump
147
+ // (apps/pugi-cli/src/core/engine/budgets.ts:149 — 400K). Subagent
148
+ // dispatches inherit the upstream caller's headroom, so this needs
149
+ // to track the engine envelope.
145
150
  const DEFAULT_BUDGETS = Object.freeze({
146
- orchestrator: { tokens: 200_000, dollars: 5, wallClockMs: 600_000 },
147
- architect: { tokens: 80_000, dollars: 2, wallClockMs: 300_000 },
148
- coder: { tokens: 120_000, dollars: 3, wallClockMs: 600_000 },
149
- verifier: { tokens: 60_000, dollars: 2, wallClockMs: 300_000 },
150
- reviewer: { tokens: 80_000, dollars: 2, wallClockMs: 300_000 },
151
- researcher: { tokens: 60_000, dollars: 1.5, wallClockMs: 300_000 },
152
- release: { tokens: 40_000, dollars: 1, wallClockMs: 180_000 },
153
- devops: { tokens: 60_000, dollars: 2, wallClockMs: 300_000 },
154
- design_qa: { tokens: 60_000, dollars: 1.5, wallClockMs: 300_000 },
151
+ orchestrator: { tokens: 400_000, dollars: 8, wallClockMs: 900_000 },
152
+ architect: { tokens: 200_000, dollars: 4, wallClockMs: 600_000 },
153
+ coder: { tokens: 400_000, dollars: 8, wallClockMs: 900_000 },
154
+ verifier: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
155
+ reviewer: { tokens: 200_000, dollars: 4, wallClockMs: 600_000 },
156
+ researcher: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
157
+ release: { tokens: 80_000, dollars: 2, wallClockMs: 300_000 },
158
+ devops: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
159
+ design_qa: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
155
160
  });
156
161
  /**
157
162
  * Resolve the effective budget for a dispatch by merging task overrides
@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
44
44
  * during import). When bumping the CLI version BOTH literals must be
45
45
  * updated; the release smoke-test (`pack:smoke`) verifies they agree.
46
46
  */
47
- export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.97');
47
+ export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.99');
48
48
  /**
49
49
  * Outbound: the CLI's installed semver. Read at request time by
50
50
  * `version-interceptor.ts` and injected on every `fetch` call.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pugi/cli",
3
- "version": "0.1.0-beta.97",
3
+ "version": "0.1.0-beta.99",
4
4
  "description": "Pugi CLI - terminal-native software execution system",
5
5
  "homepage": "https://pugi.io",
6
6
  "repository": {
@@ -62,8 +62,8 @@
62
62
  "undici": "^8.3.0",
63
63
  "which": "^6.0.0",
64
64
  "zod": "^3.23.0",
65
- "@pugi/personas": "0.1.2",
66
- "@pugi/sdk": "0.1.0-beta.97"
65
+ "@pugi/sdk": "0.1.0-beta.99",
66
+ "@pugi/personas": "0.1.2"
67
67
  },
68
68
  "devDependencies": {
69
69
  "@types/node": "^22.0.0",