@pugi/cli 0.1.0-beta.50 → 0.1.0-beta.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,179 @@
1
+ /**
2
+ * Crude token-count heuristic mirroring `runEngineLoop`'s fallback
3
+ * accounting (transcript char count / 4). The CLI does not have access
4
+ * to a real tokenizer pre-flight — the runtime returns `usage.totalTokens`
5
+ * only on the server response, which is too late for our pre-turn gate.
6
+ * char/4 is in the right order of magnitude for English/TS and matches
7
+ * what the loop's own fallback uses on `tokensUsed === 0` upstream.
8
+ */
9
+ export function estimateTranscriptTokens(messages) {
10
+ let chars = 0;
11
+ for (const m of messages) {
12
+ chars += m.content.length;
13
+ const calls = m.toolCalls ?? [];
14
+ for (const c of calls) {
15
+ chars += c.name.length + c.arguments.length;
16
+ }
17
+ }
18
+ return Math.ceil(chars / 4);
19
+ }
20
+ const FILE_TOOL_NAMES = new Set([
21
+ 'read',
22
+ 'write',
23
+ 'edit',
24
+ 'multi_edit',
25
+ 'multiEdit',
26
+ ]);
27
+ /**
28
+ * Walk the dropped slice and pull out tool-call metadata. We parse the
29
+ * `arguments` JSON best-effort — a bad parse is harmless here because
30
+ * the executor surfaced the canonical error to the model already; the
31
+ * gist just under-counts that one call.
32
+ */
33
+ export function summarizeDroppedTurns(dropped) {
34
+ let toolCalls = 0;
35
+ let bashCalls = 0;
36
+ const files = new Set();
37
+ for (const m of dropped) {
38
+ if (m.role === 'assistant') {
39
+ const calls = m.toolCalls ?? [];
40
+ toolCalls += calls.length;
41
+ for (const c of calls) {
42
+ if (c.name === 'bash') {
43
+ bashCalls += 1;
44
+ continue;
45
+ }
46
+ if (FILE_TOOL_NAMES.has(c.name)) {
47
+ const p = extractPath(c.arguments);
48
+ if (p)
49
+ files.add(p);
50
+ }
51
+ }
52
+ }
53
+ }
54
+ return {
55
+ toolCalls,
56
+ fileCount: files.size,
57
+ bashCalls,
58
+ messagesDropped: dropped.length,
59
+ };
60
+ }
61
+ function extractPath(rawArgs) {
62
+ if (!rawArgs)
63
+ return null;
64
+ try {
65
+ const parsed = JSON.parse(rawArgs);
66
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
67
+ const obj = parsed;
68
+ const path = obj['path'] ?? obj['filePath'];
69
+ if (typeof path === 'string' && path.length > 0)
70
+ return path;
71
+ }
72
+ }
73
+ catch {
74
+ return null;
75
+ }
76
+ return null;
77
+ }
78
+ /**
79
+ * Format the deterministic gist string spliced into the synthetic
80
+ * system message. Stable shape so spec assertions and operator
81
+ * logs do not drift turn-over-turn.
82
+ */
83
+ export function renderAutoCompactSentinel(stats) {
84
+ return (`[auto-compact] Earlier turns ` +
85
+ `(${stats.toolCalls} tool calls, ${stats.fileCount} files read, ${stats.bashCalls} bash commands) ` +
86
+ `summarized to free transcript headroom. ` +
87
+ `Recent turns and the original task remain in context; ` +
88
+ `re-read any earlier file by name if you need its contents again.`);
89
+ }
90
+ /**
91
+ * Minimum transcript length (in messages) before compact is allowed.
92
+ * We always retain `system + user` (the first 2) + the last 2 turns,
93
+ * so anything <= 4 messages has nothing in the middle to drop.
94
+ * Compacting на 4-message transcript would either be a no-op or
95
+ * accidentally drop the user's original task.
96
+ */
97
+ export const MIN_COMPACT_TRANSCRIPT_LENGTH = 5;
98
+ /**
99
+ * Pure gate. Returns `compact` when ALL of:
100
+ * - `config.enabled` is true
101
+ * - estimated transcript tokens >= `thresholdRatio * maxTokens`
102
+ * - transcript length >= 5 (need history to drop)
103
+ */
104
+ export function evaluateAutoCompactDecision(input) {
105
+ const usedTokens = estimateTranscriptTokens(input.transcript);
106
+ if (!input.config.enabled) {
107
+ return { kind: 'skip', reason: 'disabled', usedTokens };
108
+ }
109
+ if (input.transcript.length < MIN_COMPACT_TRANSCRIPT_LENGTH) {
110
+ return { kind: 'skip', reason: 'transcript-too-short', usedTokens };
111
+ }
112
+ const thresholdTokens = Math.floor(input.config.thresholdRatio * input.maxTokens);
113
+ if (usedTokens < thresholdTokens) {
114
+ return { kind: 'skip', reason: 'below-threshold', usedTokens };
115
+ }
116
+ return { kind: 'compact', usedTokens, thresholdTokens };
117
+ }
118
+ /**
119
+ * Rewrite the transcript: keep the first two messages (system + user
120
+ * task), drop the middle (assistant + tool turns), insert a synthetic
121
+ * system sentinel summarizing what was dropped, then re-append the
122
+ * last 2 messages so the model has the most-recent tool result + its
123
+ * own last reply in full fidelity.
124
+ *
125
+ * Precondition: caller has already checked the decision is `compact`
126
+ * (length >= MIN_COMPACT_TRANSCRIPT_LENGTH). The function still guards
127
+ * with a defensive identity-return on shorter transcripts so a careless
128
+ * caller cannot corrupt the prefix.
129
+ */
130
+ export function compactTranscript(transcript) {
131
+ const preUsedTokens = estimateTranscriptTokens(transcript);
132
+ if (transcript.length < MIN_COMPACT_TRANSCRIPT_LENGTH) {
133
+ return {
134
+ transcript: transcript.slice(),
135
+ droppedCount: 0,
136
+ gist: '',
137
+ stats: { toolCalls: 0, fileCount: 0, bashCalls: 0, messagesDropped: 0 },
138
+ preUsedTokens,
139
+ postUsedTokens: preUsedTokens,
140
+ };
141
+ }
142
+ // Always retain: index 0 (system) + index 1 (original user task) +
143
+ // last 2 messages. The middle slice is what gets summarised.
144
+ const head = transcript.slice(0, 2);
145
+ const tail = transcript.slice(-2);
146
+ const middle = transcript.slice(2, -2);
147
+ const stats = summarizeDroppedTurns(middle);
148
+ const gist = renderAutoCompactSentinel(stats);
149
+ const sentinelMessage = {
150
+ role: 'system',
151
+ content: gist,
152
+ };
153
+ const next = [...head, sentinelMessage, ...tail];
154
+ const postUsedTokens = estimateTranscriptTokens(next);
155
+ return {
156
+ transcript: next,
157
+ droppedCount: middle.length,
158
+ gist,
159
+ stats,
160
+ preUsedTokens,
161
+ postUsedTokens,
162
+ };
163
+ }
164
+ /**
165
+ * Convenience composer used by `runEngineLoop`: evaluate → compact in
166
+ * one shot. Returns `null` when the decision was `skip` so the loop
167
+ * driver can branch cheaply без destructuring two layers of records.
168
+ */
169
+ export function maybeCompact(transcript, maxTokens, config) {
170
+ const decision = evaluateAutoCompactDecision({
171
+ transcript,
172
+ maxTokens,
173
+ config,
174
+ });
175
+ if (decision.kind === 'skip')
176
+ return null;
177
+ return compactTranscript(transcript);
178
+ }
179
+ //# sourceMappingURL=auto-compact.js.map
@@ -1,3 +1,60 @@
1
+ /**
2
+ * Auto-compact (mid-loop transcript summarization) default trip point as
3
+ * a fraction of the per-command `maxTokens` envelope. CEO P1 #14 (CC
4
+ * parity): when transcript char-count tokens cross 75% of the budget,
5
+ * the engine loop drops the middle turns and inserts a deterministic
6
+ * `[auto-compact]` sentinel so the loop can continue без the model
7
+ * tripping the `budget_exhausted` terminal status mid-build.
8
+ *
9
+ * Empirically — `pugi code "big refactor"` hits the 80k cap on turn 4-5
10
+ * and refuses to finish; `pugi fix` does the same at 50k. Auto-compact
11
+ * keeps the recent N turns + a one-line gist of the dropped tool calls
12
+ * so the model retains the most recent state without paying for the
13
+ * full prefix.
14
+ *
15
+ * Operators can opt out / retune via `.pugi/settings.json`:
16
+ *
17
+ * {
18
+ * "autoCompact": { "enabled": true, "thresholdRatio": 0.75 }
19
+ * }
20
+ *
21
+ * Bad values fall back silently to the default — the engine loop never
22
+ * crashes on a malformed settings field (mirrors `resolveBudget`).
23
+ */
24
+ export const AUTO_COMPACT_THRESHOLD_RATIO = 0.75;
25
+ export const DEFAULT_AUTO_COMPACT_CONFIG = {
26
+ enabled: true,
27
+ thresholdRatio: AUTO_COMPACT_THRESHOLD_RATIO,
28
+ };
29
+ /**
30
+ * Pull the auto-compact override from `.pugi/settings.json`. Uses the
31
+ * same defensive-cast pattern as `readSettingsBudget` so an unknown
32
+ * field shape silently falls back к defaults (the gate is a comfort
33
+ * feature; a malformed settings line must not break the engine loop).
34
+ *
35
+ * Returns the merged config — caller never sees `undefined`.
36
+ */
37
+ export function resolveAutoCompactConfig(settings) {
38
+ if (!settings)
39
+ return DEFAULT_AUTO_COMPACT_CONFIG;
40
+ const root = settings.autoCompact;
41
+ if (!root || typeof root !== 'object' || Array.isArray(root)) {
42
+ return DEFAULT_AUTO_COMPACT_CONFIG;
43
+ }
44
+ const r = root;
45
+ const enabledRaw = r['enabled'];
46
+ const thresholdRaw = r['thresholdRatio'];
47
+ const enabled = typeof enabledRaw === 'boolean'
48
+ ? enabledRaw
49
+ : DEFAULT_AUTO_COMPACT_CONFIG.enabled;
50
+ let thresholdRatio = DEFAULT_AUTO_COMPACT_CONFIG.thresholdRatio;
51
+ if (typeof thresholdRaw === 'number' && Number.isFinite(thresholdRaw)) {
52
+ if (thresholdRaw > 0 && thresholdRaw <= 1) {
53
+ thresholdRatio = thresholdRaw;
54
+ }
55
+ }
56
+ return { enabled, thresholdRatio };
57
+ }
1
58
  /**
2
59
  * β1 defaults. Source of truth for the per-command budget envelope.
3
60
  * The runtime is allowed to look these up directly (no need to round
@@ -6,7 +6,8 @@ import { FileReadCache } from '../file-cache.js';
6
6
  import { loadSettings } from '../settings.js';
7
7
  import { openSession, recordToolCall, recordToolResult } from '../session.js';
8
8
  import { prewarmRealDispatch } from '../subagents/dispatcher.js';
9
- import { resolveBudget } from './budgets.js';
9
+ import { resolveAutoCompactConfig, resolveBudget } from './budgets.js';
10
+ import { maybeCompact } from './auto-compact.js';
10
11
  import { buildExecutor, buildToolsSchema } from './tool-bridge.js';
11
12
  import { personaSlugFor, systemPromptFor } from './prompts.js';
12
13
  import { CancellationToken } from '../repl/cancellation.js';
@@ -188,6 +189,13 @@ export class NativePugiEngineAdapter {
188
189
  // budget so a careless caller cannot disable the call-count
189
190
  // guard by setting only token count.
190
191
  const budget = resolveBudget(kind, settings, task.budget?.tokens ? { maxTokens: task.budget.tokens } : undefined);
192
+ // CEO P1 #14 (auto-compact, 2026-05-29): resolve the per-workspace
193
+ // override of the 75% threshold gate. Default is `{ enabled: true,
194
+ // thresholdRatio: 0.75 }`; operators kill it via
195
+ // `.pugi/settings.json::autoCompact.enabled = false` или retune the
196
+ // ratio. The resolved config is captured by the closure that
197
+ // `runEngineLoop` invokes pre-send on every turn.
198
+ const autoCompactConfig = resolveAutoCompactConfig(settings);
191
199
  // β3 streaming: pre-build the typed stream event queue so the hook
192
200
  // callbacks below can push live events that this async generator
193
201
  // yields IMMEDIATELY (instead of buffering until `runEngineLoop`
@@ -364,6 +372,27 @@ export class NativePugiEngineAdapter {
364
372
  // per-run log for operators and the cabinet UI (Sprint 2B).
365
373
  const sessionEventsPath = openSessionMirror(root, session.id);
366
374
  const hooks = {
375
+ // CEO P1 #14 (auto-compact, 2026-05-29): single operator-visible
376
+ // line on stderr — keep parity with Claude Code's
377
+ // `Compacted N turns into Y tokens; continuing.` message. We mirror
378
+ // the event into the session log + stream emitter as a `status`
379
+ // frame так that admin-api SSE consumers + the cabinet UI render
380
+ // it without a schema change.
381
+ onAutoCompact: (event) => {
382
+ const pct = Math.round((event.preUsedTokens / Math.max(1, event.maxTokens)) * 100);
383
+ const line = `engine: auto-compacted ${event.droppedCount} turns at ${event.preUsedTokens}/${event.maxTokens} (${pct}%)`;
384
+ // Single-line stderr write — operator-visible per spec.
385
+ process.stderr.write(`${line}\n`);
386
+ emitStream({ type: 'status', message: line });
387
+ appendSessionMirror(sessionEventsPath, {
388
+ type: 'auto_compact',
389
+ droppedCount: event.droppedCount,
390
+ preUsedTokens: event.preUsedTokens,
391
+ postUsedTokens: event.postUsedTokens,
392
+ maxTokens: event.maxTokens,
393
+ gist: event.gist,
394
+ });
395
+ },
367
396
  onTurnStart: (turnIndex, messageCount) => {
368
397
  const msg = `turn ${turnIndex + 1}: requesting model (transcript=${messageCount} messages)`;
369
398
  emitStream({ type: 'status', message: msg });
@@ -671,6 +700,14 @@ export class NativePugiEngineAdapter {
671
700
  command: kind,
672
701
  tag: dispatchTagFor(kind),
673
702
  model: this.options.model,
703
+ // CEO P1 #14 (auto-compact, 2026-05-29): pluggable compactor
704
+ // hook. The SDK driver invokes this pre-`client.send` on every
705
+ // turn. `maybeCompact` returns `null` below the 75% threshold
706
+ // или when the transcript is too short to drop history — the
707
+ // loop continues unchanged on the cold path. When it returns
708
+ // a result, the driver swaps the transcript + fires the
709
+ // `onAutoCompact` hook above which emits the stderr line.
710
+ autoCompact: ({ transcript, maxTokens }) => maybeCompact(transcript, maxTokens, autoCompactConfig),
674
711
  });
675
712
  }
676
713
  catch (err) {
@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
44
44
  * during import). When bumping the CLI version BOTH literals must be
45
45
  * updated; the release smoke-test (`pack:smoke`) verifies they agree.
46
46
  */
47
- export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.50');
47
+ export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.51');
48
48
  /**
49
49
  * Outbound: the CLI's installed semver. Read at request time by
50
50
  * `version-interceptor.ts` and injected on every `fetch` call.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pugi/cli",
3
- "version": "0.1.0-beta.50",
3
+ "version": "0.1.0-beta.51",
4
4
  "description": "Pugi CLI - terminal-native software execution system",
5
5
  "homepage": "https://pugi.io",
6
6
  "repository": {
@@ -54,8 +54,8 @@
54
54
  "turndown": "^7.2.4",
55
55
  "undici": "^8.3.0",
56
56
  "zod": "^3.23.0",
57
- "@pugi/personas": "0.1.2",
58
- "@pugi/sdk": "0.1.0-beta.50"
57
+ "@pugi/sdk": "0.1.0-beta.51",
58
+ "@pugi/personas": "0.1.2"
59
59
  },
60
60
  "devDependencies": {
61
61
  "@types/node": "^22.0.0",