@pugi/cli 0.1.0-beta.97 → 0.1.0-beta.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -75,12 +75,21 @@ export const beta1DefaultBudgets = {
|
|
|
75
75
|
// real per-call token use is ~30-40% lower than legacy. Bump headroom
|
|
76
76
|
// so multi-file refactors no longer trip the cap. Anvil clamps per-call
|
|
77
77
|
// max_tokens to 128k (PR) so the engine envelope still safe.
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
78
|
+
// CEO escalation 2026-06-05 round 2: "почему другие модели пишут
|
|
79
|
+
// днями? ты ограничиваешь токены?" — the upstream / Codex / Aider have
|
|
80
|
+
// NO per-task budget cap. They auto-compact at ~75% threshold and
|
|
81
|
+
// continue indefinitely. Pugi's 400K cap was still a regression vs
|
|
82
|
+
// industry expectation. Raise к the HARD_MAX_TOKENS ceiling (5M) for
|
|
83
|
+
// code/fix/build so the engine loop effectively runs until completion
|
|
84
|
+
// or until the operator aborts. Auto-compact (#14, shipped) reclaims
|
|
85
|
+
// headroom mid-loop. Token cost still tracked + tier-gated upstream;
|
|
86
|
+
// this lifts only the in-process loop ceiling.
|
|
87
|
+
fix: { maxTokens: 2_000_000, maxToolCalls: 200 },
|
|
88
|
+
code: { maxTokens: 5_000_000, maxToolCalls: 500 },
|
|
89
|
+
build: { maxTokens: 5_000_000, maxToolCalls: 500 },
|
|
90
|
+
plan: { maxTokens: 500_000, maxToolCalls: 30 },
|
|
91
|
+
explain: { maxTokens: 200_000, maxToolCalls: 20 },
|
|
92
|
+
review_triple: { maxTokens: 300_000, maxToolCalls: 20 },
|
|
84
93
|
};
|
|
85
94
|
/**
|
|
86
95
|
* Hard upper bounds. Anything above this is treated as user error
|
|
@@ -49,26 +49,31 @@ const PROFILES = {
|
|
|
49
49
|
allowParallelAgents: false,
|
|
50
50
|
maxParallelAgents: 0,
|
|
51
51
|
},
|
|
52
|
+
// CEO 2026-06-05: 80K standard exhausted React multi-file build mid-
|
|
53
|
+
// turn (120K hardcoded budget). Customers compare to the upstream = 200K
|
|
54
|
+
// context per session. Bump standard к 200K so default REPL doesn't
|
|
55
|
+
// trip mid-build; deep к 500K for complex multi-file refactors;
|
|
56
|
+
// marathon к 1.5M for long-running autonomous work.
|
|
52
57
|
standard: {
|
|
53
58
|
level: 'standard',
|
|
54
|
-
maxTurns:
|
|
55
|
-
budgetTokens:
|
|
59
|
+
maxTurns: 30,
|
|
60
|
+
budgetTokens: 200_000,
|
|
56
61
|
modelTag: 'standard',
|
|
57
62
|
allowParallelAgents: false,
|
|
58
63
|
maxParallelAgents: 0,
|
|
59
64
|
},
|
|
60
65
|
deep: {
|
|
61
66
|
level: 'deep',
|
|
62
|
-
maxTurns:
|
|
63
|
-
budgetTokens:
|
|
67
|
+
maxTurns: 80,
|
|
68
|
+
budgetTokens: 500_000,
|
|
64
69
|
modelTag: 'standard',
|
|
65
70
|
allowParallelAgents: true,
|
|
66
71
|
maxParallelAgents: 3,
|
|
67
72
|
},
|
|
68
73
|
marathon: {
|
|
69
74
|
level: 'marathon',
|
|
70
|
-
maxTurns:
|
|
71
|
-
budgetTokens:
|
|
75
|
+
maxTurns: 300,
|
|
76
|
+
budgetTokens: 1_500_000,
|
|
72
77
|
modelTag: 'heavy',
|
|
73
78
|
allowParallelAgents: true,
|
|
74
79
|
maxParallelAgents: 3,
|
|
@@ -2831,6 +2831,24 @@ export class ReplSession {
|
|
|
2831
2831
|
try {
|
|
2832
2832
|
if (useDirectEngine) {
|
|
2833
2833
|
const persona = personaSlugFor('code');
|
|
2834
|
+
// PR C (PUGI-538-FU): thread the recent conversation
|
|
2835
|
+
// into the engine prompt so multi-turn refinements work. Without
|
|
2836
|
+
// this, the engine sees only the literal current brief — a
|
|
2837
|
+
// follow-up like "react" after "сделай крестики нолики" arrives
|
|
2838
|
+
// as a bare "react" with no prior context, and the engine ships
|
|
2839
|
+
// arbitrary nonsense or asks again ("нет конкретного feature
|
|
2840
|
+
// request"). The CEO reproduction 2026-06-05 (Python tic-tac-toe
|
|
2841
|
+
// shipped когда customer wanted React браузер game, then engine
|
|
2842
|
+
// claimed "нет feature request" on the correction turn) is
|
|
2843
|
+
// exactly this gap.
|
|
2844
|
+
//
|
|
2845
|
+
// Display channels (system line, transcript) keep using the bare
|
|
2846
|
+
// `brief` for UX cleanliness; only the engine's task.prompt gets
|
|
2847
|
+
// the full conversational context via the new `enginePrompt`
|
|
2848
|
+
// field. Engine-bridge falls back to brief when enginePrompt is
|
|
2849
|
+
// undefined (server-emitted parser-built tags), preserving the
|
|
2850
|
+
// legacy behaviour for those surfaces.
|
|
2851
|
+
const enginePrompt = this.buildEnginePromptWithContext(brief);
|
|
2834
2852
|
const tag = {
|
|
2835
2853
|
command: 'code',
|
|
2836
2854
|
brief,
|
|
@@ -2842,6 +2860,7 @@ export class ReplSession {
|
|
|
2842
2860
|
signature: signatureForToolRoute('code', persona, brief),
|
|
2843
2861
|
start: 0,
|
|
2844
2862
|
end: 0,
|
|
2863
|
+
...(enginePrompt !== brief ? { enginePrompt } : {}),
|
|
2845
2864
|
};
|
|
2846
2865
|
await this.runEngineBridge(tag);
|
|
2847
2866
|
}
|
|
@@ -2862,6 +2881,54 @@ export class ReplSession {
|
|
|
2862
2881
|
this.markDispatchFailed('post_brief_failed');
|
|
2863
2882
|
}
|
|
2864
2883
|
}
|
|
2884
|
+
/**
|
|
2885
|
+
* PR C (PUGI-538-FU): build the engine prompt with recent
|
|
2886
|
+
* conversation context prepended. The current brief is preserved as
|
|
2887
|
+
* the explicit "Current request:" terminal so the engine knows what
|
|
2888
|
+
* the user is asking right now, while the prior turns give it the
|
|
2889
|
+
* stack/framework/format hints from earlier in the dialog.
|
|
2890
|
+
*
|
|
2891
|
+
* Returns `brief` unchanged when there is no prior conversation —
|
|
2892
|
+
* the empty preamble would just waste tokens.
|
|
2893
|
+
*
|
|
2894
|
+
* Window policy: last 4 conversational exchanges (operator + persona
|
|
2895
|
+
* pairs), text truncated к 400 chars per row. Drops the trailing
|
|
2896
|
+
* operator row if it matches `brief` (which has already been appended
|
|
2897
|
+
* to the transcript by `appendOperatorLine` at line 3429 above and
|
|
2898
|
+
* would otherwise duplicate inside the prompt).
|
|
2899
|
+
*
|
|
2900
|
+
* Doc strings stay в English per repo convention; the rendered
|
|
2901
|
+
* preamble uses neutral English labels ("User", "Pugi") so the
|
|
2902
|
+
* engine's model treats it as standard transcript context rather
|
|
2903
|
+
* than a localized field name.
|
|
2904
|
+
*/
|
|
2905
|
+
buildEnginePromptWithContext(brief) {
|
|
2906
|
+
const MAX_TURNS = 4;
|
|
2907
|
+
const MAX_ROW_CHARS = 400;
|
|
2908
|
+
const conversational = this.state.transcript.filter((r) => r.source === 'operator' || r.source === 'persona');
|
|
2909
|
+
if (conversational.length === 0)
|
|
2910
|
+
return brief;
|
|
2911
|
+
// Take the last MAX_TURNS * 2 rows (each turn = 1 operator + 1 persona).
|
|
2912
|
+
const recent = conversational.slice(-(MAX_TURNS * 2));
|
|
2913
|
+
// Drop trailing operator row when it equals the brief we're about
|
|
2914
|
+
// to dispatch — the brief is the "current request" and already
|
|
2915
|
+
// landed in the transcript via `appendOperatorLine` earlier in
|
|
2916
|
+
// `dispatchBrief`. Including it twice would confuse the engine.
|
|
2917
|
+
const lastRow = recent[recent.length - 1];
|
|
2918
|
+
const trimmed = lastRow && lastRow.source === 'operator' && lastRow.text === brief
|
|
2919
|
+
? recent.slice(0, -1)
|
|
2920
|
+
: recent;
|
|
2921
|
+
if (trimmed.length === 0)
|
|
2922
|
+
return brief;
|
|
2923
|
+
const lines = trimmed.map((r) => {
|
|
2924
|
+
const role = r.source === 'operator' ? 'User' : 'Pugi';
|
|
2925
|
+
const truncated = r.text.length > MAX_ROW_CHARS
|
|
2926
|
+
? r.text.slice(0, MAX_ROW_CHARS) + '...'
|
|
2927
|
+
: r.text;
|
|
2928
|
+
return `- ${role}: ${truncated}`;
|
|
2929
|
+
});
|
|
2930
|
+
return `Recent conversation:\n${lines.join('\n')}\n\nCurrent request: ${brief}`;
|
|
2931
|
+
}
|
|
2865
2932
|
/**
|
|
2866
2933
|
* : reset the FSM to `idle` after a terminal transition so the
|
|
2867
2934
|
* next brief can start. The FSM does not allow direct
|
|
@@ -4137,7 +4204,11 @@ export class ReplSession {
|
|
|
4137
4204
|
result = await bridge({
|
|
4138
4205
|
command: tag.command,
|
|
4139
4206
|
persona: tag.persona,
|
|
4140
|
-
|
|
4207
|
+
// PR C (PUGI-538-FU): prefer the contextualized
|
|
4208
|
+
// engine prompt when the direct-engine path set it. Falls back
|
|
4209
|
+
// к the bare brief for parser-built tags from the server-emitted
|
|
4210
|
+
// envelope path (no conversation context available there).
|
|
4211
|
+
brief: tag.enginePrompt ?? tag.brief,
|
|
4141
4212
|
bridgeId,
|
|
4142
4213
|
signal: abort.signal,
|
|
4143
4214
|
onEvent,
|
|
@@ -142,16 +142,21 @@ const DENY_ALL_WRITES_READONLY = Object.freeze([
|
|
|
142
142
|
/* ------------------------------------------------------------------ */
|
|
143
143
|
/* Default budgets */
|
|
144
144
|
/* ------------------------------------------------------------------ */
|
|
145
|
+
// CEO escalation 2026-06-05: 120K coder budget exhausted mid-React-
|
|
146
|
+
// build (120214 > 120000). Match the engine-level `code` task bump
|
|
147
|
+
// (apps/pugi-cli/src/core/engine/budgets.ts:149 — 400K). Subagent
|
|
148
|
+
// dispatches inherit the upstream caller's headroom, so this needs
|
|
149
|
+
// to track the engine envelope.
|
|
145
150
|
const DEFAULT_BUDGETS = Object.freeze({
|
|
146
|
-
orchestrator: { tokens:
|
|
147
|
-
architect: { tokens:
|
|
148
|
-
coder: { tokens:
|
|
149
|
-
verifier: { tokens:
|
|
150
|
-
reviewer: { tokens:
|
|
151
|
-
researcher: { tokens:
|
|
152
|
-
release: { tokens:
|
|
153
|
-
devops: { tokens:
|
|
154
|
-
design_qa: { tokens:
|
|
151
|
+
orchestrator: { tokens: 400_000, dollars: 8, wallClockMs: 900_000 },
|
|
152
|
+
architect: { tokens: 200_000, dollars: 4, wallClockMs: 600_000 },
|
|
153
|
+
coder: { tokens: 400_000, dollars: 8, wallClockMs: 900_000 },
|
|
154
|
+
verifier: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
|
|
155
|
+
reviewer: { tokens: 200_000, dollars: 4, wallClockMs: 600_000 },
|
|
156
|
+
researcher: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
|
|
157
|
+
release: { tokens: 80_000, dollars: 2, wallClockMs: 300_000 },
|
|
158
|
+
devops: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
|
|
159
|
+
design_qa: { tokens: 150_000, dollars: 3, wallClockMs: 600_000 },
|
|
155
160
|
});
|
|
156
161
|
/**
|
|
157
162
|
* Resolve the effective budget for a dispatch by merging task overrides
|
package/dist/runtime/version.js
CHANGED
|
@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
|
|
|
44
44
|
* during import). When bumping the CLI version BOTH literals must be
|
|
45
45
|
* updated; the release smoke-test (`pack:smoke`) verifies they agree.
|
|
46
46
|
*/
|
|
47
|
-
export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.
|
|
47
|
+
export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.99');
|
|
48
48
|
/**
|
|
49
49
|
* Outbound: the CLI's installed semver. Read at request time by
|
|
50
50
|
* `version-interceptor.ts` and injected on every `fetch` call.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pugi/cli",
|
|
3
|
-
"version": "0.1.0-beta.
|
|
3
|
+
"version": "0.1.0-beta.99",
|
|
4
4
|
"description": "Pugi CLI - terminal-native software execution system",
|
|
5
5
|
"homepage": "https://pugi.io",
|
|
6
6
|
"repository": {
|
|
@@ -62,8 +62,8 @@
|
|
|
62
62
|
"undici": "^8.3.0",
|
|
63
63
|
"which": "^6.0.0",
|
|
64
64
|
"zod": "^3.23.0",
|
|
65
|
-
"@pugi/
|
|
66
|
-
"@pugi/
|
|
65
|
+
"@pugi/sdk": "0.1.0-beta.99",
|
|
66
|
+
"@pugi/personas": "0.1.2"
|
|
67
67
|
},
|
|
68
68
|
"devDependencies": {
|
|
69
69
|
"@types/node": "^22.0.0",
|