@oh-my-pi/pi-coding-agent 15.2.4 → 15.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/dist/types/config/model-registry.d.ts +26 -0
  3. package/dist/types/config/settings-schema.d.ts +34 -1
  4. package/dist/types/config/settings.d.ts +6 -0
  5. package/dist/types/discovery/helpers.d.ts +1 -0
  6. package/dist/types/goals/runtime.d.ts +4 -0
  7. package/dist/types/modes/components/status-line/types.d.ts +10 -0
  8. package/dist/types/modes/components/status-line.d.ts +16 -0
  9. package/dist/types/modes/interactive-mode.d.ts +3 -1
  10. package/dist/types/modes/types.d.ts +3 -1
  11. package/dist/types/modes/utils/context-usage.d.ts +17 -0
  12. package/dist/types/modes/utils/ui-helpers.d.ts +5 -1
  13. package/dist/types/session/agent-session.d.ts +9 -0
  14. package/dist/types/session/session-manager.d.ts +10 -0
  15. package/dist/types/task/executor.d.ts +3 -1
  16. package/dist/types/task/types.d.ts +35 -0
  17. package/dist/types/tools/bash-command-fixup.d.ts +0 -5
  18. package/dist/types/utils/clipboard.d.ts +3 -1
  19. package/dist/types/utils/image-resize.d.ts +4 -1
  20. package/package.json +7 -7
  21. package/src/config/model-registry.ts +46 -21
  22. package/src/config/settings-schema.ts +29 -1
  23. package/src/config/settings.ts +19 -0
  24. package/src/discovery/helpers.ts +5 -1
  25. package/src/extensibility/plugins/legacy-pi-compat.ts +27 -5
  26. package/src/goals/runtime.ts +35 -13
  27. package/src/hashline/parser.ts +6 -1
  28. package/src/internal-urls/docs-index.generated.ts +2 -1
  29. package/src/main.ts +1 -1
  30. package/src/modes/components/model-selector.ts +53 -22
  31. package/src/modes/components/status-line/segments.ts +53 -0
  32. package/src/modes/components/status-line/types.ts +4 -0
  33. package/src/modes/components/status-line.ts +243 -15
  34. package/src/modes/controllers/command-controller.ts +9 -0
  35. package/src/modes/controllers/event-controller.ts +8 -0
  36. package/src/modes/interactive-mode.ts +23 -8
  37. package/src/modes/theme/theme.ts +1 -1
  38. package/src/modes/types.ts +1 -1
  39. package/src/modes/utils/context-usage.ts +42 -8
  40. package/src/modes/utils/ui-helpers.ts +11 -1
  41. package/src/prompts/agents/frontmatter.md +1 -0
  42. package/src/sdk.ts +24 -0
  43. package/src/session/agent-session.ts +70 -0
  44. package/src/session/session-manager.ts +119 -1
  45. package/src/slash-commands/builtin-registry.ts +15 -0
  46. package/src/task/executor.ts +50 -1
  47. package/src/task/index.ts +11 -0
  48. package/src/task/render.ts +26 -2
  49. package/src/task/types.ts +35 -0
  50. package/src/tools/bash-command-fixup.ts +0 -10
  51. package/src/tools/bash.ts +1 -9
  52. package/src/utils/clipboard.ts +79 -3
  53. package/src/utils/image-resize.ts +78 -30
  54. package/dist/types/modes/components/status-line-segment-editor.d.ts +0 -24
  55. package/src/modes/components/status-line-segment-editor.ts +0 -359
@@ -1,4 +1,6 @@
1
1
  import * as fs from "node:fs";
2
+ import type { AgentMessage } from "@oh-my-pi/pi-agent-core";
3
+ import { estimateTokens } from "@oh-my-pi/pi-agent-core/compaction";
2
4
  import { type Component, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
3
5
  import { formatCount, getProjectDir } from "@oh-my-pi/pi-utils";
4
6
  import { $ } from "bun";
@@ -9,7 +11,7 @@ import type { AgentSession } from "../../session/agent-session";
9
11
  import * as git from "../../utils/git";
10
12
  import { getSessionAccentAnsi, getSessionAccentHex } from "../../utils/session-color";
11
13
  import { sanitizeStatusText } from "../shared";
12
- import { computeContextBreakdown } from "../utils/context-usage";
14
+ import { computeNonMessageTokens } from "../utils/context-usage";
13
15
  import {
14
16
  canReuseCachedPr,
15
17
  createPrCacheContext,
@@ -39,9 +41,102 @@ export interface StatusLineSettings {
39
41
  }
40
42
 
41
43
  // ═══════════════════════════════════════════════════════════════════════════
42
- // Rendering Helpers
44
+ // Per-message token cache
43
45
  // ═══════════════════════════════════════════════════════════════════════════
44
46
 
47
+ /**
48
+ * Symbol-keyed sidecar tagged onto each `AgentMessage` to memoize its
49
+ * `estimateTokens` result. Keyed by message identity (the object itself);
50
+ * a cheap content fingerprint detects in-place mutations (post-hoc error
51
+ * attachment, retry-truncated branch rebuild, etc.) and forces recompute.
52
+ *
53
+ * Cache lives on the message — multiple `StatusLineComponent` instances
54
+ * share it for free, and entries collect with the message itself when the
55
+ * conversation is replaced or compacted.
56
+ */
57
+ const kTokenCache = Symbol("statusLine.tokenCache");
58
+ interface TaggedMessage {
59
+ [kTokenCache]?: { fingerprint: string; tokens: number };
60
+ }
61
+
62
+ /**
63
+ * Cheap structural fingerprint mirroring `estimateTokens`'s content walk.
64
+ * O(blocks) — only reads string `.length` and primitives, never copies or
65
+ * serializes content. Any in-place mutation that alters total tokenized
66
+ * content also alters one of the byte-length sums or block counts captured
67
+ * here, forcing the cached `estimateTokens` value to be recomputed.
68
+ */
69
+ function messageFingerprint(msg: AgentMessage): string {
70
+ const role = (msg as { role?: string }).role ?? "";
71
+ const ts = (msg as { timestamp?: number }).timestamp ?? 0;
72
+ let textLen = 0;
73
+ let blocks = 0;
74
+ let images = 0;
75
+ if (role === "bashExecution") {
76
+ const b = msg as { command?: unknown; output?: unknown };
77
+ if (typeof b.command === "string") textLen += b.command.length;
78
+ if (typeof b.output === "string") textLen += b.output.length;
79
+ } else if (role === "user") {
80
+ const content = (msg as { content?: unknown }).content;
81
+ if (typeof content === "string") {
82
+ textLen += content.length;
83
+ } else if (Array.isArray(content)) {
84
+ blocks = content.length;
85
+ for (const block of content) {
86
+ if (block?.type === "text" && typeof block.text === "string") textLen += block.text.length;
87
+ }
88
+ }
89
+ } else if (role === "assistant") {
90
+ const content = (msg as { content?: unknown }).content;
91
+ if (Array.isArray(content)) {
92
+ blocks = content.length;
93
+ for (const block of content) {
94
+ if (!block || typeof block !== "object") continue;
95
+ const b = block as { type?: string; text?: string; thinking?: string; name?: string; arguments?: unknown };
96
+ if (b.type === "text" && typeof b.text === "string") textLen += b.text.length;
97
+ else if (b.type === "thinking" && typeof b.thinking === "string") textLen += b.thinking.length;
98
+ else if (b.type === "toolCall") {
99
+ if (typeof b.name === "string") textLen += b.name.length;
100
+ // Argument bytes vary; a length proxy is enough to detect in-place edits.
101
+ textLen += b.arguments === undefined ? 0 : JSON.stringify(b.arguments).length;
102
+ }
103
+ }
104
+ }
105
+ } else if (role === "toolResult" || role === "hookMessage") {
106
+ const content = (msg as { content?: unknown }).content;
107
+ if (typeof content === "string") {
108
+ textLen += content.length;
109
+ } else if (Array.isArray(content)) {
110
+ blocks = content.length;
111
+ for (const block of content) {
112
+ if (!block || typeof block !== "object") continue;
113
+ const b = block as { type?: string; text?: string };
114
+ if (b.type === "text" && typeof b.text === "string") textLen += b.text.length;
115
+ else if (b.type === "image") images++;
116
+ }
117
+ }
118
+ } else if (role === "branchSummary" || role === "compactionSummary") {
119
+ const s = (msg as { summary?: unknown }).summary;
120
+ if (typeof s === "string") textLen += s.length;
121
+ }
122
+ return `${role}:${ts}:${textLen}:${blocks}:${images}`;
123
+ }
124
+
125
+ /**
126
+ * Token count for a single message, using the per-message sidecar cache.
127
+ * The caller MUST skip caching for the last message during streaming —
128
+ * it may still be growing and its tokens belong recomputed each refresh.
129
+ */
130
+ function tokensForMessage(msg: AgentMessage): number {
131
+ const fp = messageFingerprint(msg);
132
+ const tagged = msg as TaggedMessage;
133
+ const cached = tagged[kTokenCache];
134
+ if (cached && cached.fingerprint === fp) return cached.tokens;
135
+ const tokens = estimateTokens(msg);
136
+ tagged[kTokenCache] = { fingerprint: fp, tokens };
137
+ return tokens;
138
+ }
139
+
45
140
  // ═══════════════════════════════════════════════════════════════════════════
46
141
  // StatusLineComponent
47
142
  // ═══════════════════════════════════════════════════════════════════════════
@@ -73,9 +168,27 @@ export class StatusLineComponent implements Component {
73
168
  #lastTokensPerSecond: number | null = null;
74
169
  #lastTokensPerSecondTimestamp: number | null = null;
75
170
 
76
- // Context breakdown caching (2s TTL — aligns with /context command output)
77
- #cachedBreakdown: { usedTokens: number; contextWindow: number } | null = null;
78
- #breakdownFetchedAt = 0;
171
+ // Anthropic usage caching (5-min TTL, OAuth/sub only)
172
+ #cachedUsage: {
173
+ fiveHour?: { percent: number; resetMinutes?: number };
174
+ sevenDay?: { percent: number; resetHours?: number };
175
+ } | null = null;
176
+ #usageFetchedAt = 0;
177
+ #usageInFlight = false;
178
+ // Context breakdown — incremental cache. Replaces the previous 2-second
179
+ // TTL design (which re-walked every message on each refresh and produced
180
+ // ~1.1 s sync freezes on 2,000+ message sessions because `updateEditorTopBorder`
181
+ // is called on every agent event in event-controller). The new scheme
182
+ // caches by message-object identity (a Symbol-keyed sidecar on each
183
+ // message) plus a cheap content fingerprint, so in-place mutations of
184
+ // an existing message (post-hoc error attachment, retry-truncated
185
+ // branch rebuild, replaceMessages with the same length) are detected
186
+ // and recomputed.
187
+ // Cached non-message total (system prompt + tools + skills). Invalidated
188
+ // when the inputs-identity fingerprint changes (model swap, skill toggle,
189
+ // tool registration).
190
+ #nonMessageTokensCache: number | undefined;
191
+ #nonMessageInputsKey: string | undefined;
79
192
 
80
193
  constructor(private readonly session: AgentSession) {
81
194
  this.#settings = {
@@ -309,22 +422,136 @@ export class StatusLineComponent implements Component {
309
422
  return null;
310
423
  }
311
424
 
312
- #getCachedContextBreakdown(): { usedTokens: number; contextWindow: number } {
425
+ /**
426
+ * Background-refresh the Anthropic OAuth quota report. Guarded by a 5-min
427
+ * TTL on both success (cache lifetime) and error (backoff). Exposed
428
+ * (non-private) so unit tests can verify the backoff invariant.
429
+ */
430
+ refreshUsageInBackground(): void {
313
431
  const now = Date.now();
314
- if (!this.#cachedBreakdown || now - this.#breakdownFetchedAt > 2_000) {
315
- const breakdown = computeContextBreakdown(this.session);
316
- this.#cachedBreakdown = {
317
- usedTokens: breakdown.usedTokens,
318
- contextWindow: breakdown.contextWindow,
319
- };
320
- this.#breakdownFetchedAt = now;
432
+ if (this.#usageInFlight) return;
433
+ if (this.#usageFetchedAt > 0 && now - this.#usageFetchedAt < 5 * 60_000) return;
434
+ const fetcher = (this.session as { fetchUsageReports?: () => Promise<unknown> }).fetchUsageReports;
435
+ if (typeof fetcher !== "function") return;
436
+ this.#usageInFlight = true;
437
+ void fetcher
438
+ .call(this.session)
439
+ .then(reports => {
440
+ this.#cachedUsage = this.#normalizeUsageReports(reports);
441
+ this.#usageFetchedAt = Date.now();
442
+ })
443
+ .catch(() => {
444
+ // Backoff on error: stamp the fetch time so the 5-min TTL guard
445
+ // also acts as an error budget. Without this, every render
446
+ // kicks off another fetch (gated only by #usageInFlight),
447
+ // which hammers the endpoint during a network outage / 5xx.
448
+ this.#usageFetchedAt = Date.now();
449
+ })
450
+ .finally(() => {
451
+ this.#usageInFlight = false;
452
+ });
453
+ }
454
+
455
+ #normalizeUsageReports(reports: unknown): {
456
+ fiveHour?: { percent: number; resetMinutes?: number };
457
+ sevenDay?: { percent: number; resetHours?: number };
458
+ } | null {
459
+ if (!Array.isArray(reports)) return null;
460
+ let fiveHour: { percent: number; resetMinutes?: number } | undefined;
461
+ let sevenDay: { percent: number; resetHours?: number } | undefined;
462
+ const now = Date.now();
463
+ for (const report of reports) {
464
+ if (!report || typeof report !== "object") continue;
465
+ const limits = (report as { limits?: unknown }).limits;
466
+ if (!Array.isArray(limits)) continue;
467
+ for (const limit of limits) {
468
+ if (!limit || typeof limit !== "object") continue;
469
+ const l = limit as {
470
+ scope?: { windowId?: string; tier?: string };
471
+ window?: { resetsAt?: number };
472
+ amount?: { usedFraction?: number };
473
+ };
474
+ const fraction = l.amount?.usedFraction;
475
+ if (typeof fraction !== "number") continue;
476
+ const windowId = l.scope?.windowId;
477
+ const tier = l.scope?.tier;
478
+ const resetsAt = l.window?.resetsAt;
479
+ if (windowId === "5h" && !tier && !fiveHour) {
480
+ fiveHour = {
481
+ percent: fraction * 100,
482
+ resetMinutes:
483
+ typeof resetsAt === "number" ? Math.max(0, Math.round((resetsAt - now) / 60_000)) : undefined,
484
+ };
485
+ } else if (windowId === "7d" && !tier && !sevenDay) {
486
+ sevenDay = {
487
+ percent: fraction * 100,
488
+ resetHours:
489
+ typeof resetsAt === "number" ? Math.max(0, Math.round((resetsAt - now) / 3_600_000)) : undefined,
490
+ };
491
+ }
492
+ }
321
493
  }
322
- return this.#cachedBreakdown;
494
+ if (!fiveHour && !sevenDay) return null;
495
+ return { fiveHour, sevenDay };
496
+ }
497
+
498
+ /**
499
+ * Compute the (cached) used-tokens / context-window totals for the
500
+ * status-line context% segment. Exposed (non-private) so unit tests can
501
+ * verify the incremental-cache invariants; not part of any external
502
+ * API.
503
+ */
504
+ getCachedContextBreakdown(): { usedTokens: number; contextWindow: number } {
505
+ const messages = this.session.messages ?? [];
506
+ const contextWindow = this.session.model?.contextWindow ?? 0;
507
+
508
+ // 1) Non-message tokens (system prompt + tools + skills). Refresh only
509
+ // when the inputs identity fingerprint changes — usually never
510
+ // during a streaming turn. ~10-30 ms when it does refresh.
511
+ const inputsKey = this.#computeNonMessageInputsKey();
512
+ if (this.#nonMessageTokensCache === undefined || this.#nonMessageInputsKey !== inputsKey) {
513
+ this.#nonMessageTokensCache = computeNonMessageTokens(this.session);
514
+ this.#nonMessageInputsKey = inputsKey;
515
+ }
516
+
517
+ // 2) Message tokens — incremental. The sidecar cache lives on the
518
+ // message object itself (Symbol-keyed), keyed by identity and
519
+ // validated by a cheap content fingerprint. Mutations that
520
+ // replace messages (replaceMessages, branch rebuild, compaction)
521
+ // yield fresh objects → cache miss → recompute. In-place
522
+ // mutations on the same object are caught by fingerprint
523
+ // mismatch. The LAST message is always recomputed because it
524
+ // may still be growing during streaming.
525
+ let messagesTokens = 0;
526
+ const lastIdx = messages.length - 1;
527
+ for (let i = 0; i < messages.length; i++) {
528
+ messagesTokens += i === lastIdx ? estimateTokens(messages[i]) : tokensForMessage(messages[i]);
529
+ }
530
+
531
+ const usedTokens = this.#nonMessageTokensCache + messagesTokens;
532
+ return { usedTokens, contextWindow };
533
+ }
534
+
535
+ /**
536
+ * Build an identity fingerprint for the non-message inputs (system prompt,
537
+ * tools, skills). When this changes, the non-message token cache must be
538
+ * recomputed. Cheap: just lengths + first-string-length. Doesn't need to
539
+ * be cryptographically unique — only stable for the same inputs.
540
+ */
541
+ #computeNonMessageInputsKey(): string {
542
+ const sp = this.session.systemPrompt ?? [];
543
+ const tools = this.session.agent?.state?.tools ?? [];
544
+ const skills = this.session.skills ?? [];
545
+ const modelId = this.session.model?.id ?? "";
546
+ return `${modelId}|${sp.length}:${sp[0]?.length ?? 0}|${tools.length}|${skills.length}`;
323
547
  }
324
548
 
325
549
  #buildSegmentContext(width: number): SegmentContext {
326
550
  const state = this.session.state;
327
551
 
552
+ // Trigger background fetch (5-min TTL); render uses cached value
553
+ this.refreshUsageInBackground();
554
+
328
555
  // Get usage statistics
329
556
  const aggregateUsageStats = this.session.sessionManager?.getUsageStatistics() ?? {
330
557
  input: 0,
@@ -340,7 +567,7 @@ export class StatusLineComponent implements Component {
340
567
  };
341
568
 
342
569
  // Context usage — aligned with /context command so both surfaces report the same value
343
- const breakdown = this.#getCachedContextBreakdown();
570
+ const breakdown = this.getCachedContextBreakdown();
344
571
  const contextTokens = breakdown.usedTokens;
345
572
  const contextWindow = breakdown.contextWindow || state.model?.contextWindow || 0;
346
573
  const contextPercent = contextWindow > 0 ? (contextTokens / contextWindow) * 100 : 0;
@@ -363,6 +590,7 @@ export class StatusLineComponent implements Component {
363
590
  status: this.#getGitStatus(),
364
591
  pr: this.#lookupPr(),
365
592
  },
593
+ usage: this.#cachedUsage,
366
594
  };
367
595
  }
368
596
 
@@ -395,6 +395,15 @@ export class CommandController {
395
395
  info += `${theme.fg("dim", "Tool Calls:")} ${stats.toolCalls}\n`;
396
396
  info += `${theme.fg("dim", "Tool Results:")} ${stats.toolResults}\n`;
397
397
  info += `${theme.fg("dim", "Total:")} ${stats.totalMessages}\n\n`;
398
+ // Append-only context
399
+ {
400
+ const setting = this.ctx.settings.get("provider.appendOnlyContext") ?? "auto";
401
+ const provider = this.ctx.session.model?.provider;
402
+ const mode = setting === "on" ? true : setting === "off" ? false : provider === "deepseek";
403
+ const activeLabel = mode ? theme.fg("success", "active") : theme.fg("dim", "inactive");
404
+ const settingLabel = setting === "auto" ? `${setting} (${provider ?? "?"})` : setting;
405
+ info += `${theme.fg("dim", "Append-Only:")} ${activeLabel} (setting: ${settingLabel})\n`;
406
+ }
398
407
  info += `${theme.bold("Tokens")}\n`;
399
408
  info += `${theme.fg("dim", "Input:")} ${stats.tokens.input.toLocaleString()}\n`;
400
409
  info += `${theme.fg("dim", "Output:")} ${stats.tokens.output.toLocaleString()}\n`;
@@ -760,6 +760,14 @@ export class EventController {
760
760
  if (this.ctx.isBackgrounded === false) return;
761
761
  const notify = settings.get("completion.notify");
762
762
  if (notify === "off") return;
763
+
764
+ // Skip when the turn was aborted (e.g. ask cancelled with Ctrl+C) or
765
+ // errored — those are not "Task complete" events. Mirrors the gate
766
+ // already used by #currentContextTokens, #handleMessageEnd, and the
767
+ // retry / TTSR / compaction skip paths across agent-session.ts.
768
+ const last = this.ctx.session.getLastAssistantMessage?.();
769
+ if (last?.stopReason === "aborted" || last?.stopReason === "error") return;
770
+
763
771
  const title = this.ctx.sessionManager.getSessionName();
764
772
  const message = title ? `${title}: Complete` : "Complete";
765
773
  TERMINAL.sendNotification(message);
@@ -691,7 +691,7 @@ export class InteractiveMode implements InteractiveModeContext {
691
691
  }
692
692
 
693
693
  #isLoopAutoSubmitBlocked(): boolean {
694
- return this.session.isStreaming || this.session.isCompacting;
694
+ return this.session.isStreaming || this.session.isCompacting || this.session.hasPostPromptWork;
695
695
  }
696
696
 
697
697
  #submitLoopPromptWhenReady(prompt: string): void {
@@ -1876,12 +1876,23 @@ export class InteractiveMode implements InteractiveModeContext {
1876
1876
  }
1877
1877
  }
1878
1878
 
1879
- async #handleGoalSetSubcommand(rest: string): Promise<void> {
1880
- if (this.goalModeEnabled) {
1881
- this.showStatus("Goal mode is already active. Use /goal drop to start over.");
1882
- return;
1879
+ async #replaceGoalFromObjective(objective: string): Promise<void> {
1880
+ const state = await this.session.goalRuntime.replaceGoal({ objective });
1881
+ this.session.setGoalModeState(state);
1882
+ this.goalModeEnabled = true;
1883
+ this.goalModePaused = false;
1884
+ this.#resetGoalContinuationSuppression();
1885
+ this.#updateGoalModeStatus();
1886
+ if (this.session.isStreaming) {
1887
+ await this.session.sendGoalModeContext({ deliverAs: "steer" });
1883
1888
  }
1884
- if (this.#getPausedGoalState()) {
1889
+ if (this.onInputCallback) {
1890
+ this.onInputCallback(this.startPendingSubmission({ text: objective }));
1891
+ }
1892
+ }
1893
+
1894
+ async #handleGoalSetSubcommand(rest: string): Promise<void> {
1895
+ if (!this.goalModeEnabled && this.#getPausedGoalState()) {
1885
1896
  this.showWarning("Resume the current goal first, or drop it before setting a new objective.");
1886
1897
  return;
1887
1898
  }
@@ -1889,6 +1900,10 @@ export class InteractiveMode implements InteractiveModeContext {
1889
1900
  ? rest.trim()
1890
1901
  : (await this.showHookEditor("Goal objective", undefined, undefined, { promptStyle: true }))?.trim();
1891
1902
  if (!objective) return;
1903
+ if (this.goalModeEnabled) {
1904
+ await this.#replaceGoalFromObjective(objective);
1905
+ return;
1906
+ }
1892
1907
  await this.#startGoalFromObjective(objective);
1893
1908
  }
1894
1909
 
@@ -2312,8 +2327,8 @@ export class InteractiveMode implements InteractiveModeContext {
2312
2327
  this.#uiHelpers.renderSessionContext(sessionContext, options);
2313
2328
  }
2314
2329
 
2315
- renderInitialMessages(prebuiltContext?: SessionContext): void {
2316
- this.#uiHelpers.renderInitialMessages(prebuiltContext);
2330
+ renderInitialMessages(prebuiltContext?: SessionContext, options?: { preserveExistingChat?: boolean }): void {
2331
+ this.#uiHelpers.renderInitialMessages(prebuiltContext, options);
2317
2332
  }
2318
2333
 
2319
2334
  getUserMessageText(message: Message): string {
@@ -295,7 +295,7 @@ const UNICODE_SYMBOLS: SymbolMap = {
295
295
  "thinking.low": "◑ low",
296
296
  "thinking.medium": "◒ med",
297
297
  "thinking.high": "◕ high",
298
- "thinking.xhigh": "◉ xhi",
298
+ "thinking.xhigh": "◉ xhigh",
299
299
  // Checkboxes
300
300
  "checkbox.checked": "☑",
301
301
  "checkbox.unchecked": "☐",
@@ -186,7 +186,7 @@ export interface InteractiveModeContext {
186
186
  sessionContext: SessionContext,
187
187
  options?: { updateFooter?: boolean; populateHistory?: boolean },
188
188
  ): void;
189
- renderInitialMessages(prebuiltContext?: SessionContext): void;
189
+ renderInitialMessages(prebuiltContext?: SessionContext, options?: { preserveExistingChat?: boolean }): void;
190
190
  getUserMessageText(message: Message): string;
191
191
  findLastAssistantMessage(): AssistantMessage | undefined;
192
192
  extractAssistantText(message: AssistantMessage): string;
@@ -37,7 +37,7 @@ export interface ContextBreakdown {
37
37
  freeTokens: number;
38
38
  }
39
39
 
40
- function estimateSkillsTokens(skills: readonly Skill[]): number {
40
+ export function estimateSkillsTokens(skills: readonly Skill[]): number {
41
41
  const fragments: string[] = [];
42
42
  for (const skill of skills) {
43
43
  // "- name: description\n" wire framing tokenizes ~identically to the
@@ -47,7 +47,9 @@ function estimateSkillsTokens(skills: readonly Skill[]): number {
47
47
  return countTokens(fragments);
48
48
  }
49
49
 
50
- function estimateToolSchemaTokens(tools: ReadonlyArray<Pick<Tool, "name" | "description" | "parameters">>): number {
50
+ export function estimateToolSchemaTokens(
51
+ tools: ReadonlyArray<Pick<Tool, "name" | "description" | "parameters">>,
52
+ ): number {
51
53
  const fragments: string[] = [];
52
54
  for (const tool of tools) {
53
55
  fragments.push(tool.name, tool.description);
@@ -60,6 +62,43 @@ function estimateToolSchemaTokens(tools: ReadonlyArray<Pick<Tool, "name" | "desc
60
62
  return countTokens(fragments);
61
63
  }
62
64
 
65
+ /**
66
+ * Compute just the NON-MESSAGE token total: system prompt (with its skills
67
+ * section subtracted, since skills are tokenized separately) + system context
68
+ * (the rest of the system-prompt array) + tools + skills.
69
+ *
70
+ * Exposed so callers like `StatusLineComponent` can cache the non-message
71
+ * total separately from the message total. Non-message inputs (skills,
72
+ * tools, system prompt) change rarely; the message list grows on every
73
+ * streaming turn. Splitting the two lets the caller refresh each on its own
74
+ * cadence — non-message recomputed only when the inputs identity changes,
75
+ * messages walked incrementally as new entries append.
76
+ */
77
+ export function computeNonMessageTokens(session: AgentSession): number {
78
+ const parts = computeNonMessageBreakdown(session);
79
+ return parts.systemPromptTokens + parts.systemContextTokens + parts.toolsTokens + parts.skillsTokens;
80
+ }
81
+
82
+ /**
83
+ * Shared helper for the four non-message token totals. Single source of truth
84
+ * for both `computeNonMessageTokens` (status-line incremental cache) and
85
+ * `computeContextBreakdown` (/context panel). The split avoids drift between
86
+ * the two surfaces — they MUST report the same numbers.
87
+ */
88
+ function computeNonMessageBreakdown(session: AgentSession): {
89
+ skillsTokens: number;
90
+ toolsTokens: number;
91
+ systemContextTokens: number;
92
+ systemPromptTokens: number;
93
+ } {
94
+ const skillsTokens = estimateSkillsTokens(session.skills ?? []);
95
+ const toolsTokens = estimateToolSchemaTokens(session.agent?.state?.tools ?? []);
96
+ const systemPromptParts = session.systemPrompt ?? [];
97
+ const systemContextTokens = countTokens(systemPromptParts.slice(1));
98
+ const systemPromptTokens = Math.max(0, countTokens(systemPromptParts[0] ?? "") - skillsTokens);
99
+ return { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens };
100
+ }
101
+
63
102
  /**
64
103
  * Compute a breakdown of estimated context usage by category for the active
65
104
  * session and model.
@@ -68,9 +107,6 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
68
107
  const model = session.model;
69
108
  const contextWindow = model?.contextWindow ?? 0;
70
109
 
71
- const skillsTokens = estimateSkillsTokens(session.skills ?? []);
72
- const toolsTokens = estimateToolSchemaTokens(session.agent?.state?.tools ?? []);
73
-
74
110
  let messagesTokens = 0;
75
111
  const convo = session.messages;
76
112
  if (convo) {
@@ -85,9 +121,7 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
85
121
  // Tools = JSON tool schema sent separately on the wire
86
122
  // Skills = the skill list embedded in the system prompt
87
123
  // Messages = conversation messages
88
- const systemPromptParts = session.systemPrompt;
89
- const systemPromptTokens = Math.max(0, countTokens(systemPromptParts?.[0] ?? "") - skillsTokens);
90
- const systemContextTokens = countTokens(systemPromptParts?.slice(1) ?? []);
124
+ const { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens } = computeNonMessageBreakdown(session);
91
125
 
92
126
  const categories: CategoryInfo[] = [
93
127
  { id: "systemPrompt", label: "System prompt", tokens: systemPromptTokens, color: "accent", glyph: CELL_FILLED },
@@ -29,6 +29,9 @@ import type { SessionContext } from "../../session/session-manager";
29
29
  import { formatBytes, formatDuration } from "../../tools/render-utils";
30
30
 
31
31
  type TextBlock = { type: "text"; text: string };
32
+ interface RenderInitialMessagesOptions {
33
+ preserveExistingChat?: boolean;
34
+ }
32
35
 
33
36
  type QueuedMessages = {
34
37
  steering: string[];
@@ -459,9 +462,10 @@ export class UiHelpers {
459
462
  this.ctx.ui.requestRender();
460
463
  }
461
464
 
462
- renderInitialMessages(prebuiltContext?: SessionContext): void {
465
+ renderInitialMessages(prebuiltContext?: SessionContext, options: RenderInitialMessagesOptions = {}): void {
463
466
  // This path is used to rebuild the visible chat transcript (e.g. after custom/debug UI).
464
467
  // Clear existing rendered chat first to avoid duplicating the full session in the container.
468
+ const preservedChatChildren = options.preserveExistingChat ? this.ctx.chatContainer.children : undefined;
465
469
  this.ctx.chatContainer.clear();
466
470
  this.ctx.pendingMessagesContainer.clear();
467
471
  this.ctx.pendingBashComponents = [];
@@ -486,6 +490,12 @@ export class UiHelpers {
486
490
  const times = compactionCount === 1 ? "1 time" : `${compactionCount} times`;
487
491
  this.ctx.showStatus(`Session compacted ${times}`);
488
492
  }
493
+ if (preservedChatChildren && preservedChatChildren.length > 0) {
494
+ for (const child of preservedChatChildren) {
495
+ this.ctx.chatContainer.addChild(child);
496
+ }
497
+ this.ctx.ui.requestRender();
498
+ }
489
499
  }
490
500
 
491
501
  clearEditor(): void {
@@ -6,5 +6,6 @@ description: {{jsonStringify description}}
6
6
  {{/if}}{{#if model}}model: {{jsonStringify model}}
7
7
  {{/if}}{{#if thinkingLevel}}thinking-level: {{jsonStringify thinkingLevel}}
8
8
  {{/if}}{{#if blocking}}blocking: true
9
+ {{/if}}{{#if autoloadSkills}}autoloadSkills: {{jsonStringify autoloadSkills}}
9
10
  {{/if}}---
10
11
  {{body}}
package/src/sdk.ts CHANGED
@@ -4,6 +4,7 @@ import {
4
4
  type AgentMessage,
5
5
  type AgentTelemetryConfig,
6
6
  type AgentTool,
7
+ AppendOnlyContextManager,
7
8
  INTENT_FIELD,
8
9
  type ThinkingLevel,
9
10
  } from "@oh-my-pi/pi-agent-core";
@@ -589,6 +590,24 @@ function registerPythonCleanup(): void {
589
590
  postmortem.register("python-cleanup", disposeAllKernelSessions);
590
591
  }
591
592
 
593
+ /**
594
+ * Resolve whether to enable append-only context mode based on the setting and provider.
595
+ *
596
+ * - `"on"` → always enable
597
+ * - `"off"` → never enable
598
+ * - `"auto"` → enable for DeepSeek (prefix-caching provider)
599
+ */
600
+ function resolveAppendOnlyMode(setting: "auto" | "on" | "off" | undefined, provider: string): boolean {
601
+ switch (setting ?? "auto") {
602
+ case "on":
603
+ return true;
604
+ case "off":
605
+ return false;
606
+ default:
607
+ return provider === "deepseek";
608
+ }
609
+ }
610
+
592
611
  function customToolToDefinition(tool: CustomTool): ToolDefinition {
593
612
  const definition: ToolDefinition & { [TOOL_DEFINITION_MARKER]: true } = {
594
613
  name: tool.name,
@@ -1897,6 +1916,11 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1897
1916
  intentTracing: !!intentField,
1898
1917
  getToolChoice: () => session?.nextToolChoice(),
1899
1918
  telemetry: options.telemetry,
1919
+ appendOnlyContext: model
1920
+ ? resolveAppendOnlyMode(settings.get("provider.appendOnlyContext"), model.provider)
1921
+ ? new AppendOnlyContextManager()
1922
+ : undefined
1923
+ : undefined,
1900
1924
  });
1901
1925
 
1902
1926
  cursorEventEmitter = event => agent.emitExternalEvent(event);