npm - @oh-my-pi/pi-coding-agent - Versions diffs - 15.2.4 → 15.3.1 - Mend

@oh-my-pi/pi-coding-agent 15.2.4 → 15.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/CHANGELOG.md +20 -0
package/dist/types/config/model-registry.d.ts +26 -0
package/dist/types/config/settings-schema.d.ts +34 -1
package/dist/types/config/settings.d.ts +6 -0
package/dist/types/discovery/helpers.d.ts +1 -0
package/dist/types/goals/runtime.d.ts +4 -0
package/dist/types/modes/components/status-line/types.d.ts +10 -0
package/dist/types/modes/components/status-line.d.ts +16 -0
package/dist/types/modes/interactive-mode.d.ts +3 -1
package/dist/types/modes/types.d.ts +3 -1
package/dist/types/modes/utils/context-usage.d.ts +17 -0
package/dist/types/modes/utils/ui-helpers.d.ts +5 -1
package/dist/types/session/agent-session.d.ts +9 -0
package/dist/types/session/session-manager.d.ts +10 -0
package/dist/types/task/executor.d.ts +3 -1
package/dist/types/task/types.d.ts +35 -0
package/dist/types/tools/bash-command-fixup.d.ts +0 -5
package/dist/types/utils/clipboard.d.ts +3 -1
package/dist/types/utils/image-resize.d.ts +4 -1
package/package.json +7 -7
package/src/config/model-registry.ts +46 -21
package/src/config/settings-schema.ts +29 -1
package/src/config/settings.ts +19 -0
package/src/discovery/helpers.ts +5 -1
package/src/extensibility/plugins/legacy-pi-compat.ts +27 -5
package/src/goals/runtime.ts +35 -13
package/src/hashline/parser.ts +6 -1
package/src/internal-urls/docs-index.generated.ts +2 -1
package/src/main.ts +1 -1
package/src/modes/components/model-selector.ts +53 -22
package/src/modes/components/status-line/segments.ts +53 -0
package/src/modes/components/status-line/types.ts +4 -0
package/src/modes/components/status-line.ts +243 -15
package/src/modes/controllers/command-controller.ts +9 -0
package/src/modes/controllers/event-controller.ts +8 -0
package/src/modes/interactive-mode.ts +23 -8
package/src/modes/theme/theme.ts +1 -1
package/src/modes/types.ts +1 -1
package/src/modes/utils/context-usage.ts +42 -8
package/src/modes/utils/ui-helpers.ts +11 -1
package/src/prompts/agents/frontmatter.md +1 -0
package/src/sdk.ts +24 -0
package/src/session/agent-session.ts +70 -0
package/src/session/session-manager.ts +119 -1
package/src/slash-commands/builtin-registry.ts +15 -0
package/src/task/executor.ts +50 -1
package/src/task/index.ts +11 -0
package/src/task/render.ts +26 -2
package/src/task/types.ts +35 -0
package/src/tools/bash-command-fixup.ts +0 -10
package/src/tools/bash.ts +1 -9
package/src/utils/clipboard.ts +79 -3
package/src/utils/image-resize.ts +78 -30
package/dist/types/modes/components/status-line-segment-editor.d.ts +0 -24
package/src/modes/components/status-line-segment-editor.ts +0 -359

package/src/modes/components/status-line.ts CHANGED Viewed

@@ -1,4 +1,6 @@
 import * as fs from "node:fs";
+import type { AgentMessage } from "@oh-my-pi/pi-agent-core";
+import { estimateTokens } from "@oh-my-pi/pi-agent-core/compaction";
 import { type Component, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
 import { formatCount, getProjectDir } from "@oh-my-pi/pi-utils";
 import { $ } from "bun";
@@ -9,7 +11,7 @@ import type { AgentSession } from "../../session/agent-session";
 import * as git from "../../utils/git";
 import { getSessionAccentAnsi, getSessionAccentHex } from "../../utils/session-color";
 import { sanitizeStatusText } from "../shared";
-import { computeContextBreakdown } from "../utils/context-usage";
+import { computeNonMessageTokens } from "../utils/context-usage";
 import {
 	canReuseCachedPr,
 	createPrCacheContext,
@@ -39,9 +41,102 @@ export interface StatusLineSettings {
 }
 // ═══════════════════════════════════════════════════════════════════════════
-// Rendering Helpers
+// Per-message token cache
 // ═══════════════════════════════════════════════════════════════════════════
+/**
+ * Symbol-keyed sidecar tagged onto each `AgentMessage` to memoize its
+ * `estimateTokens` result. Keyed by message identity (the object itself);
+ * a cheap content fingerprint detects in-place mutations (post-hoc error
+ * attachment, retry-truncated branch rebuild, etc.) and forces recompute.
+ *
+ * Cache lives on the message — multiple `StatusLineComponent` instances
+ * share it for free, and entries collect with the message itself when the
+ * conversation is replaced or compacted.
+ */
+const kTokenCache = Symbol("statusLine.tokenCache");
+interface TaggedMessage {
+	[kTokenCache]?: { fingerprint: string; tokens: number };
+}
+/**
+ * Cheap structural fingerprint mirroring `estimateTokens`'s content walk.
+ * O(blocks) — only reads string `.length` and primitives, never copies or
+ * serializes content. Any in-place mutation that alters total tokenized
+ * content also alters one of the byte-length sums or block counts captured
+ * here, forcing the cached `estimateTokens` value to be recomputed.
+ */
+function messageFingerprint(msg: AgentMessage): string {
+	const role = (msg as { role?: string }).role ?? "";
+	const ts = (msg as { timestamp?: number }).timestamp ?? 0;
+	let textLen = 0;
+	let blocks = 0;
+	let images = 0;
+	if (role === "bashExecution") {
+		const b = msg as { command?: unknown; output?: unknown };
+		if (typeof b.command === "string") textLen += b.command.length;
+		if (typeof b.output === "string") textLen += b.output.length;
+	} else if (role === "user") {
+		const content = (msg as { content?: unknown }).content;
+		if (typeof content === "string") {
+			textLen += content.length;
+		} else if (Array.isArray(content)) {
+			blocks = content.length;
+			for (const block of content) {
+				if (block?.type === "text" && typeof block.text === "string") textLen += block.text.length;
+			}
+		}
+	} else if (role === "assistant") {
+		const content = (msg as { content?: unknown }).content;
+		if (Array.isArray(content)) {
+			blocks = content.length;
+			for (const block of content) {
+				if (!block || typeof block !== "object") continue;
+				const b = block as { type?: string; text?: string; thinking?: string; name?: string; arguments?: unknown };
+				if (b.type === "text" && typeof b.text === "string") textLen += b.text.length;
+				else if (b.type === "thinking" && typeof b.thinking === "string") textLen += b.thinking.length;
+				else if (b.type === "toolCall") {
+					if (typeof b.name === "string") textLen += b.name.length;
+					// Argument bytes vary; a length proxy is enough to detect in-place edits.
+					textLen += b.arguments === undefined ? 0 : JSON.stringify(b.arguments).length;
+				}
+			}
+		}
+	} else if (role === "toolResult" || role === "hookMessage") {
+		const content = (msg as { content?: unknown }).content;
+		if (typeof content === "string") {
+			textLen += content.length;
+		} else if (Array.isArray(content)) {
+			blocks = content.length;
+			for (const block of content) {
+				if (!block || typeof block !== "object") continue;
+				const b = block as { type?: string; text?: string };
+				if (b.type === "text" && typeof b.text === "string") textLen += b.text.length;
+				else if (b.type === "image") images++;
+			}
+		}
+	} else if (role === "branchSummary" || role === "compactionSummary") {
+		const s = (msg as { summary?: unknown }).summary;
+		if (typeof s === "string") textLen += s.length;
+	}
+	return `${role}:${ts}:${textLen}:${blocks}:${images}`;
+}
+/**
+ * Token count for a single message, using the per-message sidecar cache.
+ * The caller MUST skip caching for the last message during streaming —
+ * it may still be growing and its tokens belong recomputed each refresh.
+ */
+function tokensForMessage(msg: AgentMessage): number {
+	const fp = messageFingerprint(msg);
+	const tagged = msg as TaggedMessage;
+	const cached = tagged[kTokenCache];
+	if (cached && cached.fingerprint === fp) return cached.tokens;
+	const tokens = estimateTokens(msg);
+	tagged[kTokenCache] = { fingerprint: fp, tokens };
+	return tokens;
+}
 // ═══════════════════════════════════════════════════════════════════════════
 // StatusLineComponent
 // ═══════════════════════════════════════════════════════════════════════════
@@ -73,9 +168,27 @@ export class StatusLineComponent implements Component {
 	#lastTokensPerSecond: number | null = null;
 	#lastTokensPerSecondTimestamp: number | null = null;
-	// Context breakdown caching (2s TTL — aligns with /context command output)
-	#cachedBreakdown: { usedTokens: number; contextWindow: number } | null = null;
-	#breakdownFetchedAt = 0;
+	// Anthropic usage caching (5-min TTL, OAuth/sub only)
+	#cachedUsage: {
+		fiveHour?: { percent: number; resetMinutes?: number };
+		sevenDay?: { percent: number; resetHours?: number };
+	} | null = null;
+	#usageFetchedAt = 0;
+	#usageInFlight = false;
+	// Context breakdown — incremental cache. Replaces the previous 2-second
+	// TTL design (which re-walked every message on each refresh and produced
+	// ~1.1 s sync freezes on 2,000+ message sessions because `updateEditorTopBorder`
+	// is called on every agent event in event-controller). The new scheme
+	// caches by message-object identity (a Symbol-keyed sidecar on each
+	// message) plus a cheap content fingerprint, so in-place mutations of
+	// an existing message (post-hoc error attachment, retry-truncated
+	// branch rebuild, replaceMessages with the same length) are detected
+	// and recomputed.
+	// Cached non-message total (system prompt + tools + skills). Invalidated
+	// when the inputs-identity fingerprint changes (model swap, skill toggle,
+	// tool registration).
+	#nonMessageTokensCache: number | undefined;
+	#nonMessageInputsKey: string | undefined;
 	constructor(private readonly session: AgentSession) {
 		this.#settings = {
@@ -309,22 +422,136 @@ export class StatusLineComponent implements Component {
 		return null;
 	}
-	#getCachedContextBreakdown(): { usedTokens: number; contextWindow: number } {
+	/**
+	 * Background-refresh the Anthropic OAuth quota report. Guarded by a 5-min
+	 * TTL on both success (cache lifetime) and error (backoff). Exposed
+	 * (non-private) so unit tests can verify the backoff invariant.
+	 */
+	refreshUsageInBackground(): void {
 		const now = Date.now();
-		if (!this.#cachedBreakdown || now - this.#breakdownFetchedAt > 2_000) {
-			const breakdown = computeContextBreakdown(this.session);
-			this.#cachedBreakdown = {
-				usedTokens: breakdown.usedTokens,
-				contextWindow: breakdown.contextWindow,
-			};
-			this.#breakdownFetchedAt = now;
+		if (this.#usageInFlight) return;
+		if (this.#usageFetchedAt > 0 && now - this.#usageFetchedAt < 5 * 60_000) return;
+		const fetcher = (this.session as { fetchUsageReports?: () => Promise<unknown> }).fetchUsageReports;
+		if (typeof fetcher !== "function") return;
+		this.#usageInFlight = true;
+		void fetcher
+			.call(this.session)
+			.then(reports => {
+				this.#cachedUsage = this.#normalizeUsageReports(reports);
+				this.#usageFetchedAt = Date.now();
+			})
+			.catch(() => {
+				// Backoff on error: stamp the fetch time so the 5-min TTL guard
+				// also acts as an error budget. Without this, every render
+				// kicks off another fetch (gated only by #usageInFlight),
+				// which hammers the endpoint during a network outage / 5xx.
+				this.#usageFetchedAt = Date.now();
+			})
+			.finally(() => {
+				this.#usageInFlight = false;
+			});
+	}
+	#normalizeUsageReports(reports: unknown): {
+		fiveHour?: { percent: number; resetMinutes?: number };
+		sevenDay?: { percent: number; resetHours?: number };
+	} | null {
+		if (!Array.isArray(reports)) return null;
+		let fiveHour: { percent: number; resetMinutes?: number } | undefined;
+		let sevenDay: { percent: number; resetHours?: number } | undefined;
+		const now = Date.now();
+		for (const report of reports) {
+			if (!report || typeof report !== "object") continue;
+			const limits = (report as { limits?: unknown }).limits;
+			if (!Array.isArray(limits)) continue;
+			for (const limit of limits) {
+				if (!limit || typeof limit !== "object") continue;
+				const l = limit as {
+					scope?: { windowId?: string; tier?: string };
+					window?: { resetsAt?: number };
+					amount?: { usedFraction?: number };
+				};
+				const fraction = l.amount?.usedFraction;
+				if (typeof fraction !== "number") continue;
+				const windowId = l.scope?.windowId;
+				const tier = l.scope?.tier;
+				const resetsAt = l.window?.resetsAt;
+				if (windowId === "5h" && !tier && !fiveHour) {
+					fiveHour = {
+						percent: fraction * 100,
+						resetMinutes:
+							typeof resetsAt === "number" ? Math.max(0, Math.round((resetsAt - now) / 60_000)) : undefined,
+					};
+				} else if (windowId === "7d" && !tier && !sevenDay) {
+					sevenDay = {
+						percent: fraction * 100,
+						resetHours:
+							typeof resetsAt === "number" ? Math.max(0, Math.round((resetsAt - now) / 3_600_000)) : undefined,
+					};
+				}
+			}
 		}
-		return this.#cachedBreakdown;
+		if (!fiveHour && !sevenDay) return null;
+		return { fiveHour, sevenDay };
+	}
+	/**
+	 * Compute the (cached) used-tokens / context-window totals for the
+	 * status-line context% segment. Exposed (non-private) so unit tests can
+	 * verify the incremental-cache invariants; not part of any external
+	 * API.
+	 */
+	getCachedContextBreakdown(): { usedTokens: number; contextWindow: number } {
+		const messages = this.session.messages ?? [];
+		const contextWindow = this.session.model?.contextWindow ?? 0;
+		// 1) Non-message tokens (system prompt + tools + skills). Refresh only
+		//    when the inputs identity fingerprint changes — usually never
+		//    during a streaming turn. ~10-30 ms when it does refresh.
+		const inputsKey = this.#computeNonMessageInputsKey();
+		if (this.#nonMessageTokensCache === undefined || this.#nonMessageInputsKey !== inputsKey) {
+			this.#nonMessageTokensCache = computeNonMessageTokens(this.session);
+			this.#nonMessageInputsKey = inputsKey;
+		}
+		// 2) Message tokens — incremental. The sidecar cache lives on the
+		//    message object itself (Symbol-keyed), keyed by identity and
+		//    validated by a cheap content fingerprint. Mutations that
+		//    replace messages (replaceMessages, branch rebuild, compaction)
+		//    yield fresh objects → cache miss → recompute. In-place
+		//    mutations on the same object are caught by fingerprint
+		//    mismatch. The LAST message is always recomputed because it
+		//    may still be growing during streaming.
+		let messagesTokens = 0;
+		const lastIdx = messages.length - 1;
+		for (let i = 0; i < messages.length; i++) {
+			messagesTokens += i === lastIdx ? estimateTokens(messages[i]) : tokensForMessage(messages[i]);
+		}
+		const usedTokens = this.#nonMessageTokensCache + messagesTokens;
+		return { usedTokens, contextWindow };
+	}
+	/**
+	 * Build an identity fingerprint for the non-message inputs (system prompt,
+	 * tools, skills). When this changes, the non-message token cache must be
+	 * recomputed. Cheap: just lengths + first-string-length. Doesn't need to
+	 * be cryptographically unique — only stable for the same inputs.
+	 */
+	#computeNonMessageInputsKey(): string {
+		const sp = this.session.systemPrompt ?? [];
+		const tools = this.session.agent?.state?.tools ?? [];
+		const skills = this.session.skills ?? [];
+		const modelId = this.session.model?.id ?? "";
+		return `${modelId}|${sp.length}:${sp[0]?.length ?? 0}|${tools.length}|${skills.length}`;
 	}
 	#buildSegmentContext(width: number): SegmentContext {
 		const state = this.session.state;
+		// Trigger background fetch (5-min TTL); render uses cached value
+		this.refreshUsageInBackground();
 		// Get usage statistics
 		const aggregateUsageStats = this.session.sessionManager?.getUsageStatistics() ?? {
 			input: 0,
@@ -340,7 +567,7 @@ export class StatusLineComponent implements Component {
 		};
 		// Context usage — aligned with /context command so both surfaces report the same value
-		const breakdown = this.#getCachedContextBreakdown();
+		const breakdown = this.getCachedContextBreakdown();
 		const contextTokens = breakdown.usedTokens;
 		const contextWindow = breakdown.contextWindow || state.model?.contextWindow || 0;
 		const contextPercent = contextWindow > 0 ? (contextTokens / contextWindow) * 100 : 0;
@@ -363,6 +590,7 @@ export class StatusLineComponent implements Component {
 				status: this.#getGitStatus(),
 				pr: this.#lookupPr(),
 			},
+			usage: this.#cachedUsage,
 		};
 	}

package/src/modes/controllers/command-controller.ts CHANGED Viewed

@@ -395,6 +395,15 @@ export class CommandController {
 		info += `${theme.fg("dim", "Tool Calls:")} ${stats.toolCalls}\n`;
 		info += `${theme.fg("dim", "Tool Results:")} ${stats.toolResults}\n`;
 		info += `${theme.fg("dim", "Total:")} ${stats.totalMessages}\n\n`;
+		// Append-only context
+		{
+			const setting = this.ctx.settings.get("provider.appendOnlyContext") ?? "auto";
+			const provider = this.ctx.session.model?.provider;
+			const mode = setting === "on" ? true : setting === "off" ? false : provider === "deepseek";
+			const activeLabel = mode ? theme.fg("success", "active") : theme.fg("dim", "inactive");
+			const settingLabel = setting === "auto" ? `${setting} (${provider ?? "?"})` : setting;
+			info += `${theme.fg("dim", "Append-Only:")} ${activeLabel} (setting: ${settingLabel})\n`;
+		}
 		info += `${theme.bold("Tokens")}\n`;
 		info += `${theme.fg("dim", "Input:")} ${stats.tokens.input.toLocaleString()}\n`;
 		info += `${theme.fg("dim", "Output:")} ${stats.tokens.output.toLocaleString()}\n`;

package/src/modes/controllers/event-controller.ts CHANGED Viewed

@@ -760,6 +760,14 @@ export class EventController {
 		if (this.ctx.isBackgrounded === false) return;
 		const notify = settings.get("completion.notify");
 		if (notify === "off") return;
+		// Skip when the turn was aborted (e.g. ask cancelled with Ctrl+C) or
+		// errored — those are not "Task complete" events. Mirrors the gate
+		// already used by #currentContextTokens, #handleMessageEnd, and the
+		// retry / TTSR / compaction skip paths across agent-session.ts.
+		const last = this.ctx.session.getLastAssistantMessage?.();
+		if (last?.stopReason === "aborted" || last?.stopReason === "error") return;
 		const title = this.ctx.sessionManager.getSessionName();
 		const message = title ? `${title}: Complete` : "Complete";
 		TERMINAL.sendNotification(message);

package/src/modes/interactive-mode.ts CHANGED Viewed

@@ -691,7 +691,7 @@ export class InteractiveMode implements InteractiveModeContext {
 	}
 	#isLoopAutoSubmitBlocked(): boolean {
-		return this.session.isStreaming || this.session.isCompacting;
+		return this.session.isStreaming || this.session.isCompacting || this.session.hasPostPromptWork;
 	}
 	#submitLoopPromptWhenReady(prompt: string): void {
@@ -1876,12 +1876,23 @@ export class InteractiveMode implements InteractiveModeContext {
 		}
 	}
-	async #handleGoalSetSubcommand(rest: string): Promise<void> {
-		if (this.goalModeEnabled) {
-			this.showStatus("Goal mode is already active. Use /goal drop to start over.");
-			return;
+	async #replaceGoalFromObjective(objective: string): Promise<void> {
+		const state = await this.session.goalRuntime.replaceGoal({ objective });
+		this.session.setGoalModeState(state);
+		this.goalModeEnabled = true;
+		this.goalModePaused = false;
+		this.#resetGoalContinuationSuppression();
+		this.#updateGoalModeStatus();
+		if (this.session.isStreaming) {
+			await this.session.sendGoalModeContext({ deliverAs: "steer" });
 		}
-		if (this.#getPausedGoalState()) {
+		if (this.onInputCallback) {
+			this.onInputCallback(this.startPendingSubmission({ text: objective }));
+		}
+	}
+	async #handleGoalSetSubcommand(rest: string): Promise<void> {
+		if (!this.goalModeEnabled && this.#getPausedGoalState()) {
 			this.showWarning("Resume the current goal first, or drop it before setting a new objective.");
 			return;
 		}
@@ -1889,6 +1900,10 @@ export class InteractiveMode implements InteractiveModeContext {
 			? rest.trim()
 			: (await this.showHookEditor("Goal objective", undefined, undefined, { promptStyle: true }))?.trim();
 		if (!objective) return;
+		if (this.goalModeEnabled) {
+			await this.#replaceGoalFromObjective(objective);
+			return;
+		}
 		await this.#startGoalFromObjective(objective);
 	}
@@ -2312,8 +2327,8 @@ export class InteractiveMode implements InteractiveModeContext {
 		this.#uiHelpers.renderSessionContext(sessionContext, options);
 	}
-	renderInitialMessages(prebuiltContext?: SessionContext): void {
-		this.#uiHelpers.renderInitialMessages(prebuiltContext);
+	renderInitialMessages(prebuiltContext?: SessionContext, options?: { preserveExistingChat?: boolean }): void {
+		this.#uiHelpers.renderInitialMessages(prebuiltContext, options);
 	}
 	getUserMessageText(message: Message): string {

package/src/modes/theme/theme.ts CHANGED Viewed

@@ -295,7 +295,7 @@ const UNICODE_SYMBOLS: SymbolMap = {
 	"thinking.low": "◑ low",
 	"thinking.medium": "◒ med",
 	"thinking.high": "◕ high",
-	"thinking.xhigh": "◉ xhi",
+	"thinking.xhigh": "◉ xhigh",
 	// Checkboxes
 	"checkbox.checked": "☑",
 	"checkbox.unchecked": "☐",

package/src/modes/types.ts CHANGED Viewed

@@ -186,7 +186,7 @@ export interface InteractiveModeContext {
 		sessionContext: SessionContext,
 		options?: { updateFooter?: boolean; populateHistory?: boolean },
 	): void;
-	renderInitialMessages(prebuiltContext?: SessionContext): void;
+	renderInitialMessages(prebuiltContext?: SessionContext, options?: { preserveExistingChat?: boolean }): void;
 	getUserMessageText(message: Message): string;
 	findLastAssistantMessage(): AssistantMessage | undefined;
 	extractAssistantText(message: AssistantMessage): string;

package/src/modes/utils/context-usage.ts CHANGED Viewed

@@ -37,7 +37,7 @@ export interface ContextBreakdown {
 	freeTokens: number;
 }
-function estimateSkillsTokens(skills: readonly Skill[]): number {
+export function estimateSkillsTokens(skills: readonly Skill[]): number {
 	const fragments: string[] = [];
 	for (const skill of skills) {
 		// "- name: description\n" wire framing tokenizes ~identically to the
@@ -47,7 +47,9 @@ function estimateSkillsTokens(skills: readonly Skill[]): number {
 	return countTokens(fragments);
 }
-function estimateToolSchemaTokens(tools: ReadonlyArray<Pick<Tool, "name" | "description" | "parameters">>): number {
+export function estimateToolSchemaTokens(
+	tools: ReadonlyArray<Pick<Tool, "name" | "description" | "parameters">>,
+): number {
 	const fragments: string[] = [];
 	for (const tool of tools) {
 		fragments.push(tool.name, tool.description);
@@ -60,6 +62,43 @@ function estimateToolSchemaTokens(tools: ReadonlyArray<Pick<Tool, "name" | "desc
 	return countTokens(fragments);
 }
+/**
+ * Compute just the NON-MESSAGE token total: system prompt (with its skills
+ * section subtracted, since skills are tokenized separately) + system context
+ * (the rest of the system-prompt array) + tools + skills.
+ *
+ * Exposed so callers like `StatusLineComponent` can cache the non-message
+ * total separately from the message total. Non-message inputs (skills,
+ * tools, system prompt) change rarely; the message list grows on every
+ * streaming turn. Splitting the two lets the caller refresh each on its own
+ * cadence — non-message recomputed only when the inputs identity changes,
+ * messages walked incrementally as new entries append.
+ */
+export function computeNonMessageTokens(session: AgentSession): number {
+	const parts = computeNonMessageBreakdown(session);
+	return parts.systemPromptTokens + parts.systemContextTokens + parts.toolsTokens + parts.skillsTokens;
+}
+/**
+ * Shared helper for the four non-message token totals. Single source of truth
+ * for both `computeNonMessageTokens` (status-line incremental cache) and
+ * `computeContextBreakdown` (/context panel). The split avoids drift between
+ * the two surfaces — they MUST report the same numbers.
+ */
+function computeNonMessageBreakdown(session: AgentSession): {
+	skillsTokens: number;
+	toolsTokens: number;
+	systemContextTokens: number;
+	systemPromptTokens: number;
+} {
+	const skillsTokens = estimateSkillsTokens(session.skills ?? []);
+	const toolsTokens = estimateToolSchemaTokens(session.agent?.state?.tools ?? []);
+	const systemPromptParts = session.systemPrompt ?? [];
+	const systemContextTokens = countTokens(systemPromptParts.slice(1));
+	const systemPromptTokens = Math.max(0, countTokens(systemPromptParts[0] ?? "") - skillsTokens);
+	return { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens };
+}
 /**
  * Compute a breakdown of estimated context usage by category for the active
  * session and model.
@@ -68,9 +107,6 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
 	const model = session.model;
 	const contextWindow = model?.contextWindow ?? 0;
-	const skillsTokens = estimateSkillsTokens(session.skills ?? []);
-	const toolsTokens = estimateToolSchemaTokens(session.agent?.state?.tools ?? []);
 	let messagesTokens = 0;
 	const convo = session.messages;
 	if (convo) {
@@ -85,9 +121,7 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
 	//   Tools         = JSON tool schema sent separately on the wire
 	//   Skills        = the skill list embedded in the system prompt
 	//   Messages      = conversation messages
-	const systemPromptParts = session.systemPrompt;
-	const systemPromptTokens = Math.max(0, countTokens(systemPromptParts?.[0] ?? "") - skillsTokens);
-	const systemContextTokens = countTokens(systemPromptParts?.slice(1) ?? []);
+	const { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens } = computeNonMessageBreakdown(session);
 	const categories: CategoryInfo[] = [
 		{ id: "systemPrompt", label: "System prompt", tokens: systemPromptTokens, color: "accent", glyph: CELL_FILLED },

package/src/modes/utils/ui-helpers.ts CHANGED Viewed

@@ -29,6 +29,9 @@ import type { SessionContext } from "../../session/session-manager";
 import { formatBytes, formatDuration } from "../../tools/render-utils";
 type TextBlock = { type: "text"; text: string };
+interface RenderInitialMessagesOptions {
+	preserveExistingChat?: boolean;
+}
 type QueuedMessages = {
 	steering: string[];
@@ -459,9 +462,10 @@ export class UiHelpers {
 		this.ctx.ui.requestRender();
 	}
-	renderInitialMessages(prebuiltContext?: SessionContext): void {
+	renderInitialMessages(prebuiltContext?: SessionContext, options: RenderInitialMessagesOptions = {}): void {
 		// This path is used to rebuild the visible chat transcript (e.g. after custom/debug UI).
 		// Clear existing rendered chat first to avoid duplicating the full session in the container.
+		const preservedChatChildren = options.preserveExistingChat ? this.ctx.chatContainer.children : undefined;
 		this.ctx.chatContainer.clear();
 		this.ctx.pendingMessagesContainer.clear();
 		this.ctx.pendingBashComponents = [];
@@ -486,6 +490,12 @@ export class UiHelpers {
 			const times = compactionCount === 1 ? "1 time" : `${compactionCount} times`;
 			this.ctx.showStatus(`Session compacted ${times}`);
 		}
+		if (preservedChatChildren && preservedChatChildren.length > 0) {
+			for (const child of preservedChatChildren) {
+				this.ctx.chatContainer.addChild(child);
+			}
+			this.ctx.ui.requestRender();
+		}
 	}
 	clearEditor(): void {

package/src/prompts/agents/frontmatter.md CHANGED Viewed

@@ -6,5 +6,6 @@ description: {{jsonStringify description}}
 {{/if}}{{#if model}}model: {{jsonStringify model}}
 {{/if}}{{#if thinkingLevel}}thinking-level: {{jsonStringify thinkingLevel}}
 {{/if}}{{#if blocking}}blocking: true
+{{/if}}{{#if autoloadSkills}}autoloadSkills: {{jsonStringify autoloadSkills}}
 {{/if}}---
 {{body}}

package/src/sdk.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import {
 	type AgentMessage,
 	type AgentTelemetryConfig,
 	type AgentTool,
+	AppendOnlyContextManager,
 	INTENT_FIELD,
 	type ThinkingLevel,
 } from "@oh-my-pi/pi-agent-core";
@@ -589,6 +590,24 @@ function registerPythonCleanup(): void {
 	postmortem.register("python-cleanup", disposeAllKernelSessions);
 }
+/**
+ * Resolve whether to enable append-only context mode based on the setting and provider.
+ *
+ * - `"on"` → always enable
+ * - `"off"` → never enable
+ * - `"auto"` → enable for DeepSeek (prefix-caching provider)
+ */
+function resolveAppendOnlyMode(setting: "auto" | "on" | "off" | undefined, provider: string): boolean {
+	switch (setting ?? "auto") {
+		case "on":
+			return true;
+		case "off":
+			return false;
+		default:
+			return provider === "deepseek";
+	}
+}
 function customToolToDefinition(tool: CustomTool): ToolDefinition {
 	const definition: ToolDefinition & { [TOOL_DEFINITION_MARKER]: true } = {
 		name: tool.name,
@@ -1897,6 +1916,11 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			intentTracing: !!intentField,
 			getToolChoice: () => session?.nextToolChoice(),
 			telemetry: options.telemetry,
+			appendOnlyContext: model
+				? resolveAppendOnlyMode(settings.get("provider.appendOnlyContext"), model.provider)
+					? new AppendOnlyContextManager()
+					: undefined
+				: undefined,
 		});
 		cursorEventEmitter = event => agent.emitExternalEvent(event);