@oh-my-pi/pi-coding-agent 15.3.0 → 15.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import * as fs from "node:fs";
2
+ import type { AgentMessage } from "@oh-my-pi/pi-agent-core";
2
3
  import { estimateTokens } from "@oh-my-pi/pi-agent-core/compaction";
3
4
  import { type Component, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
4
5
  import { formatCount, getProjectDir } from "@oh-my-pi/pi-utils";
@@ -40,9 +41,102 @@ export interface StatusLineSettings {
40
41
  }
41
42
 
42
43
  // ═══════════════════════════════════════════════════════════════════════════
43
- // Rendering Helpers
44
+ // Per-message token cache
44
45
  // ═══════════════════════════════════════════════════════════════════════════
45
46
 
47
+ /**
48
+ * Symbol-keyed sidecar tagged onto each `AgentMessage` to memoize its
49
+ * `estimateTokens` result. Keyed by message identity (the object itself);
50
+ * a cheap content fingerprint detects in-place mutations (post-hoc error
51
+ * attachment, retry-truncated branch rebuild, etc.) and forces recompute.
52
+ *
53
+ * Cache lives on the message — multiple `StatusLineComponent` instances
54
+ * share it for free, and entries collect with the message itself when the
55
+ * conversation is replaced or compacted.
56
+ */
57
+ const kTokenCache = Symbol("statusLine.tokenCache");
58
+ interface TaggedMessage {
59
+ [kTokenCache]?: { fingerprint: string; tokens: number };
60
+ }
61
+
62
+ /**
63
+ * Cheap structural fingerprint mirroring `estimateTokens`'s content walk.
64
+ * O(blocks) — only reads string `.length` and primitives, never copies or
65
+ * serializes content. Any in-place mutation that alters total tokenized
66
+ * content also alters one of the byte-length sums or block counts captured
67
+ * here, forcing the cached `estimateTokens` value to be recomputed.
68
+ */
69
+ function messageFingerprint(msg: AgentMessage): string {
70
+ const role = (msg as { role?: string }).role ?? "";
71
+ const ts = (msg as { timestamp?: number }).timestamp ?? 0;
72
+ let textLen = 0;
73
+ let blocks = 0;
74
+ let images = 0;
75
+ if (role === "bashExecution") {
76
+ const b = msg as { command?: unknown; output?: unknown };
77
+ if (typeof b.command === "string") textLen += b.command.length;
78
+ if (typeof b.output === "string") textLen += b.output.length;
79
+ } else if (role === "user") {
80
+ const content = (msg as { content?: unknown }).content;
81
+ if (typeof content === "string") {
82
+ textLen += content.length;
83
+ } else if (Array.isArray(content)) {
84
+ blocks = content.length;
85
+ for (const block of content) {
86
+ if (block?.type === "text" && typeof block.text === "string") textLen += block.text.length;
87
+ }
88
+ }
89
+ } else if (role === "assistant") {
90
+ const content = (msg as { content?: unknown }).content;
91
+ if (Array.isArray(content)) {
92
+ blocks = content.length;
93
+ for (const block of content) {
94
+ if (!block || typeof block !== "object") continue;
95
+ const b = block as { type?: string; text?: string; thinking?: string; name?: string; arguments?: unknown };
96
+ if (b.type === "text" && typeof b.text === "string") textLen += b.text.length;
97
+ else if (b.type === "thinking" && typeof b.thinking === "string") textLen += b.thinking.length;
98
+ else if (b.type === "toolCall") {
99
+ if (typeof b.name === "string") textLen += b.name.length;
100
+ // Argument bytes vary; a length proxy is enough to detect in-place edits.
101
+ textLen += b.arguments === undefined ? 0 : JSON.stringify(b.arguments).length;
102
+ }
103
+ }
104
+ }
105
+ } else if (role === "toolResult" || role === "hookMessage") {
106
+ const content = (msg as { content?: unknown }).content;
107
+ if (typeof content === "string") {
108
+ textLen += content.length;
109
+ } else if (Array.isArray(content)) {
110
+ blocks = content.length;
111
+ for (const block of content) {
112
+ if (!block || typeof block !== "object") continue;
113
+ const b = block as { type?: string; text?: string };
114
+ if (b.type === "text" && typeof b.text === "string") textLen += b.text.length;
115
+ else if (b.type === "image") images++;
116
+ }
117
+ }
118
+ } else if (role === "branchSummary" || role === "compactionSummary") {
119
+ const s = (msg as { summary?: unknown }).summary;
120
+ if (typeof s === "string") textLen += s.length;
121
+ }
122
+ return `${role}:${ts}:${textLen}:${blocks}:${images}`;
123
+ }
124
+
125
+ /**
126
+ * Token count for a single message, using the per-message sidecar cache.
127
+ * The caller MUST skip caching for the last message during streaming —
128
+ * it may still be growing and its tokens belong recomputed each refresh.
129
+ */
130
+ function tokensForMessage(msg: AgentMessage): number {
131
+ const fp = messageFingerprint(msg);
132
+ const tagged = msg as TaggedMessage;
133
+ const cached = tagged[kTokenCache];
134
+ if (cached && cached.fingerprint === fp) return cached.tokens;
135
+ const tokens = estimateTokens(msg);
136
+ tagged[kTokenCache] = { fingerprint: fp, tokens };
137
+ return tokens;
138
+ }
139
+
46
140
  // ═══════════════════════════════════════════════════════════════════════════
47
141
  // StatusLineComponent
48
142
  // ═══════════════════════════════════════════════════════════════════════════
@@ -85,14 +179,11 @@ export class StatusLineComponent implements Component {
85
179
  // TTL design (which re-walked every message on each refresh and produced
86
180
  // ~1.1 s sync freezes on 2,000+ message sessions because `updateEditorTopBorder`
87
181
  // is called on every agent event in event-controller). The new scheme
88
- // exploits the fact that `session.messages` is append-only during a turn
89
- // and only shrinks on compaction.
90
- #cachedBreakdown: { usedTokens: number; contextWindow: number } | null = null;
91
- // Per-message token counts indexed by `session.messages` position. Entries
92
- // here are immutable: a message at index `i` is finalized (its content
93
- // no longer mutates) once index `i+1` exists. We therefore cache all but
94
- // the LAST message (which may still be growing during streaming).
95
- #messageTokenCache: number[] = [];
182
+ // caches by message-object identity (a Symbol-keyed sidecar on each
183
+ // message) plus a cheap content fingerprint, so in-place mutations of
184
+ // an existing message (post-hoc error attachment, retry-truncated
185
+ // branch rebuild, replaceMessages with the same length) are detected
186
+ // and recomputed.
96
187
  // Cached non-message total (system prompt + tools + skills). Invalidated
97
188
  // when the inputs-identity fingerprint changes (model swap, skill toggle,
98
189
  // tool registration).
@@ -331,7 +422,12 @@ export class StatusLineComponent implements Component {
331
422
  return null;
332
423
  }
333
424
 
334
- #refreshUsageInBackground(): void {
425
+ /**
426
+ * Background-refresh the Anthropic OAuth quota report. Guarded by a 5-min
427
+ * TTL on both success (cache lifetime) and error (backoff). Exposed
428
+ * (non-private) so unit tests can verify the backoff invariant.
429
+ */
430
+ refreshUsageInBackground(): void {
335
431
  const now = Date.now();
336
432
  if (this.#usageInFlight) return;
337
433
  if (this.#usageFetchedAt > 0 && now - this.#usageFetchedAt < 5 * 60_000) return;
@@ -345,7 +441,11 @@ export class StatusLineComponent implements Component {
345
441
  this.#usageFetchedAt = Date.now();
346
442
  })
347
443
  .catch(() => {
348
- /* keep last known data on error */
444
+ // Backoff on error: stamp the fetch time so the 5-min TTL guard
445
+ // also acts as an error budget. Without this, every render
446
+ // kicks off another fetch (gated only by #usageInFlight),
447
+ // which hammers the endpoint during a network outage / 5xx.
448
+ this.#usageFetchedAt = Date.now();
349
449
  })
350
450
  .finally(() => {
351
451
  this.#usageInFlight = false;
@@ -414,29 +514,22 @@ export class StatusLineComponent implements Component {
414
514
  this.#nonMessageInputsKey = inputsKey;
415
515
  }
416
516
 
417
- // 2) Message tokens — incremental.
418
- // Compaction handling: if messages.length shrank, the array was
419
- // truncated. Reset cache; the next iteration rebuilds from scratch.
420
- if (this.#messageTokenCache.length > Math.max(0, messages.length - 1)) {
421
- this.#messageTokenCache.length = 0;
422
- }
423
- // Cache all but the last message. The last message may still be
424
- // growing during streaming (assistant delta blocks append to the
425
- // existing message); recomputing it each refresh is one
426
- // `estimateTokens` call (~0.5 ms) and stays correct.
427
- while (this.#messageTokenCache.length < Math.max(0, messages.length - 1)) {
428
- const idx = this.#messageTokenCache.length;
429
- this.#messageTokenCache.push(estimateTokens(messages[idx]));
430
- }
517
+ // 2) Message tokens — incremental. The sidecar cache lives on the
518
+ // message object itself (Symbol-keyed), keyed by identity and
519
+ // validated by a cheap content fingerprint. Mutations that
520
+ // replace messages (replaceMessages, branch rebuild, compaction)
521
+ // yield fresh objects → cache miss → recompute. In-place
522
+ // mutations on the same object are caught by fingerprint
523
+ // mismatch. The LAST message is always recomputed because it
524
+ // may still be growing during streaming.
431
525
  let messagesTokens = 0;
432
- for (const t of this.#messageTokenCache) messagesTokens += t;
433
- if (messages.length > 0) {
434
- messagesTokens += estimateTokens(messages[messages.length - 1]);
526
+ const lastIdx = messages.length - 1;
527
+ for (let i = 0; i < messages.length; i++) {
528
+ messagesTokens += i === lastIdx ? estimateTokens(messages[i]) : tokensForMessage(messages[i]);
435
529
  }
436
530
 
437
531
  const usedTokens = this.#nonMessageTokensCache + messagesTokens;
438
- this.#cachedBreakdown = { usedTokens, contextWindow };
439
- return this.#cachedBreakdown;
532
+ return { usedTokens, contextWindow };
440
533
  }
441
534
 
442
535
  /**
@@ -457,7 +550,7 @@ export class StatusLineComponent implements Component {
457
550
  const state = this.session.state;
458
551
 
459
552
  // Trigger background fetch (5-min TTL); render uses cached value
460
- this.#refreshUsageInBackground();
553
+ this.refreshUsageInBackground();
461
554
 
462
555
  // Get usage statistics
463
556
  const aggregateUsageStats = this.session.sessionManager?.getUsageStatistics() ?? {
@@ -75,12 +75,28 @@ export function estimateToolSchemaTokens(
75
75
  * messages walked incrementally as new entries append.
76
76
  */
77
77
  export function computeNonMessageTokens(session: AgentSession): number {
78
+ const parts = computeNonMessageBreakdown(session);
79
+ return parts.systemPromptTokens + parts.systemContextTokens + parts.toolsTokens + parts.skillsTokens;
80
+ }
81
+
82
+ /**
83
+ * Shared helper for the four non-message token totals. Single source of truth
84
+ * for both `computeNonMessageTokens` (status-line incremental cache) and
85
+ * `computeContextBreakdown` (/context panel). The split avoids drift between
86
+ * the two surfaces — they MUST report the same numbers.
87
+ */
88
+ function computeNonMessageBreakdown(session: AgentSession): {
89
+ skillsTokens: number;
90
+ toolsTokens: number;
91
+ systemContextTokens: number;
92
+ systemPromptTokens: number;
93
+ } {
78
94
  const skillsTokens = estimateSkillsTokens(session.skills ?? []);
79
95
  const toolsTokens = estimateToolSchemaTokens(session.agent?.state?.tools ?? []);
80
96
  const systemPromptParts = session.systemPrompt ?? [];
81
97
  const systemContextTokens = countTokens(systemPromptParts.slice(1));
82
98
  const systemPromptTokens = Math.max(0, countTokens(systemPromptParts[0] ?? "") - skillsTokens);
83
- return systemPromptTokens + systemContextTokens + toolsTokens + skillsTokens;
99
+ return { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens };
84
100
  }
85
101
 
86
102
  /**
@@ -91,9 +107,6 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
91
107
  const model = session.model;
92
108
  const contextWindow = model?.contextWindow ?? 0;
93
109
 
94
- const skillsTokens = estimateSkillsTokens(session.skills ?? []);
95
- const toolsTokens = estimateToolSchemaTokens(session.agent?.state?.tools ?? []);
96
-
97
110
  let messagesTokens = 0;
98
111
  const convo = session.messages;
99
112
  if (convo) {
@@ -108,9 +121,7 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
108
121
  // Tools = JSON tool schema sent separately on the wire
109
122
  // Skills = the skill list embedded in the system prompt
110
123
  // Messages = conversation messages
111
- const systemPromptParts = session.systemPrompt;
112
- const systemPromptTokens = Math.max(0, countTokens(systemPromptParts?.[0] ?? "") - skillsTokens);
113
- const systemContextTokens = countTokens(systemPromptParts?.slice(1) ?? []);
124
+ const { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens } = computeNonMessageBreakdown(session);
114
125
 
115
126
  const categories: CategoryInfo[] = [
116
127
  { id: "systemPrompt", label: "System prompt", tokens: systemPromptTokens, color: "accent", glyph: CELL_FILLED },
package/src/sdk.ts CHANGED
@@ -1893,7 +1893,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
1893
1893
  streamSimple(streamModel, context, {
1894
1894
  ...streamOptions,
1895
1895
  onAuthError: async (provider, oldKey, error) => {
1896
- await modelRegistry.authStorage.invalidateCredentialMatching(provider, oldKey, streamOptions?.signal);
1896
+ await modelRegistry.authStorage.invalidateCredentialMatching(provider, oldKey, {
1897
+ signal: streamOptions?.signal,
1898
+ sessionId: agent.sessionId,
1899
+ });
1897
1900
  logger.debug("Retrying provider request after credential invalidation", {
1898
1901
  provider,
1899
1902
  error: error instanceof Error ? error.message : String(error),
@@ -751,6 +751,9 @@ export class AgentSession {
751
751
 
752
752
  // Event subscription state
753
753
  #unsubscribeAgent?: () => void;
754
+ #unsubscribeAppendOnly?: () => void;
755
+ /** Last (enable, providerId) tuple resolved by `#syncAppendOnlyContext` — used to skip no-op invalidations. */
756
+ #lastAppendOnlyResolution?: { enable: boolean; providerId: string | undefined };
754
757
  #eventListeners: AgentSessionEventListener[] = [];
755
758
 
756
759
  /** Tracks pending steering messages for UI display. Removed when delivered.
@@ -1141,7 +1144,7 @@ export class AgentSession {
1141
1144
  // (session persistence, hooks, auto-compaction, retry logic)
1142
1145
  this.#unsubscribeAgent = this.agent.subscribe(this.#handleAgentEvent);
1143
1146
  // Re-evaluate append-only context mode when the setting changes at runtime.
1144
- onAppendOnlyModeChanged(_value => this.#syncAppendOnlyContext(this.model));
1147
+ this.#unsubscribeAppendOnly = onAppendOnlyModeChanged(_value => this.#syncAppendOnlyContext(this.model));
1145
1148
  }
1146
1149
 
1147
1150
  /** Model registry for API key resolution and model discovery */
@@ -2785,6 +2788,10 @@ export class AgentSession {
2785
2788
  await hindsightState?.flushRetainQueue();
2786
2789
  hindsightState?.dispose();
2787
2790
  this.#disconnectFromAgent();
2791
+ if (this.#unsubscribeAppendOnly) {
2792
+ this.#unsubscribeAppendOnly();
2793
+ this.#unsubscribeAppendOnly = undefined;
2794
+ }
2788
2795
  this.#eventListeners = [];
2789
2796
  }
2790
2797
 
@@ -5980,7 +5987,12 @@ export class AgentSession {
5980
5987
  */
5981
5988
  #syncAppendOnlyContext(model: Model | null | undefined): void {
5982
5989
  const setting = this.settings.get("provider.appendOnlyContext") ?? "auto";
5983
- const enable = setting === "on" || (setting === "auto" && model?.provider === "deepseek");
5990
+ const providerId = model?.provider;
5991
+ const enable = setting === "on" || (setting === "auto" && providerId === "deepseek");
5992
+ const prev = this.#lastAppendOnlyResolution;
5993
+ if (prev && prev.enable === enable && prev.providerId === providerId) return;
5994
+ this.#lastAppendOnlyResolution = { enable, providerId };
5995
+
5984
5996
  if (enable && !this.agent.appendOnlyContext) {
5985
5997
  this.agent.setAppendOnlyContext(new AppendOnlyContextManager());
5986
5998
  } else if (enable && this.agent.appendOnlyContext) {
@@ -942,12 +942,71 @@ function extractFirstUserPrompt(entries: Array<Record<string, unknown>>): string
942
942
  return undefined;
943
943
  }
944
944
 
945
+ /**
946
+ * Promote orphaned `<basename>.jsonl.<snowflake>.bak` backups created by
947
+ * `#replaceSessionFileAfterEperm` back to their primary path when the primary
948
+ * is missing. This runs once per session-dir scan, before the main `*.jsonl`
949
+ * glob, so a crash between the two renames in the EPERM-rewrite path does not
950
+ * leave the user's last good state stranded outside the loader's view.
951
+ *
952
+ * Exported for testing.
953
+ */
954
+ export async function recoverOrphanedBackups(sessionDir: string, storage: SessionStorage): Promise<void> {
955
+ let backups: string[];
956
+ try {
957
+ backups = storage.listFilesSync(sessionDir, "*.bak");
958
+ } catch {
959
+ return;
960
+ }
961
+ if (backups.length === 0) return;
962
+ // For each primary path, pick the newest backup (highest mtime) as the recovery source.
963
+ const candidates = new Map<string, { backup: string; mtimeMs: number }>();
964
+ for (const backup of backups) {
965
+ const name = path.basename(backup);
966
+ // Expect "<primary>.<snowflake>.bak" where <primary> ends in ".jsonl".
967
+ if (!name.endsWith(".bak")) continue;
968
+ const trimmed = name.slice(0, -".bak".length);
969
+ const dotIdx = trimmed.lastIndexOf(".");
970
+ if (dotIdx <= 0) continue;
971
+ const primaryName = trimmed.slice(0, dotIdx);
972
+ if (!primaryName.endsWith(".jsonl")) continue;
973
+ const primaryPath = path.join(sessionDir, primaryName);
974
+ let mtimeMs = 0;
975
+ try {
976
+ mtimeMs = storage.statSync(backup).mtimeMs;
977
+ } catch {
978
+ continue;
979
+ }
980
+ const existing = candidates.get(primaryPath);
981
+ if (!existing || mtimeMs > existing.mtimeMs) {
982
+ candidates.set(primaryPath, { backup, mtimeMs });
983
+ }
984
+ }
985
+ for (const [primaryPath, { backup }] of candidates) {
986
+ if (storage.existsSync(primaryPath)) continue;
987
+ try {
988
+ await storage.rename(backup, primaryPath);
989
+ logger.warn("Recovered orphaned session backup", {
990
+ sessionFile: primaryPath,
991
+ backupPath: backup,
992
+ });
993
+ } catch (err) {
994
+ logger.warn("Failed to recover orphaned session backup", {
995
+ sessionFile: primaryPath,
996
+ backupPath: backup,
997
+ error: toError(err).message,
998
+ });
999
+ }
1000
+ }
1001
+ }
1002
+
945
1003
  /**
946
1004
  * Reads all session files from the directory and returns them sorted by mtime (newest first).
947
1005
  * Uses low-level file I/O to efficiently read only the first 4KB of each file
948
1006
  * to extract the JSON header and first user message without loading entire session logs into memory.
949
1007
  */
950
1008
  async function getSortedSessions(sessionDir: string, storage: SessionStorage): Promise<RecentSessionInfo[]> {
1009
+ await recoverOrphanedBackups(sessionDir, storage);
951
1010
  try {
952
1011
  const files: string[] = storage.listFilesSync(sessionDir, "*.jsonl");
953
1012
  const sessions: RecentSessionInfo[] = [];
@@ -2149,10 +2208,14 @@ export class SessionManager {
2149
2208
  }
2150
2209
  // Windows can reject overwrite-style rename with EPERM even after our own writer is closed.
2151
2210
  // Move the old session file aside first so a failed retry can roll back to the last good file.
2211
+ // The backup uses a plain `<basename>.<snowflake>.bak` name (no leading dot) so that if the
2212
+ // process crashes between the two renames, `recoverOrphanedBackups` can find it via the
2213
+ // shared `*.bak` glob on both real and in-memory storage backends and promote it back to
2214
+ // the primary on the next session-dir scan.
2152
2215
 
2153
2216
  async #replaceSessionFileAfterEperm(tempPath: string, targetPath: string, renameError: unknown): Promise<void> {
2154
2217
  const dir = path.resolve(targetPath, "..");
2155
- const backupPath = path.join(dir, `.${path.basename(targetPath)}.${Snowflake.next()}.bak`);
2218
+ const backupPath = path.join(dir, `${path.basename(targetPath)}.${Snowflake.next()}.bak`);
2156
2219
  try {
2157
2220
  await this.storage.rename(targetPath, backupPath);
2158
2221
  } catch (err) {
@@ -2167,13 +2230,14 @@ export class SessionManager {
2167
2230
  await this.storage.rename(tempPath, targetPath);
2168
2231
  } catch (err) {
2169
2232
  const replaceError = toError(err);
2233
+ const originalError = toError(renameError);
2170
2234
  try {
2171
2235
  await this.storage.rename(backupPath, targetPath);
2172
2236
  } catch (rollbackErr) {
2173
2237
  const rollbackError = toError(rollbackErr);
2174
2238
  throw new Error(
2175
- `Failed to replace session file after EPERM (${replaceError.message}); rollback from ${backupPath} also failed: ${rollbackError.message}`,
2176
- { cause: replaceError },
2239
+ `Failed to replace session file after EPERM (original: ${originalError.message}; retry: ${replaceError.message}); rollback from ${backupPath} also failed: ${rollbackError.message}`,
2240
+ { cause: originalError },
2177
2241
  );
2178
2242
  }
2179
2243
  throw replaceError;
@@ -3244,6 +3308,7 @@ export class SessionManager {
3244
3308
  ): Promise<SessionInfo[]> {
3245
3309
  const dir = sessionDir ?? SessionManager.getDefaultSessionDir(cwd, undefined, storage);
3246
3310
  try {
3311
+ await recoverOrphanedBackups(dir, storage);
3247
3312
  const files = storage.listFilesSync(dir, "*.jsonl");
3248
3313
  return await collectSessionsFromFiles(files, storage);
3249
3314
  } catch {
@@ -73,9 +73,13 @@ const BUILTIN_SLASH_COMMAND_REGISTRY: ReadonlyArray<SlashCommandSpec> = [
73
73
  allowArgs: true,
74
74
  handleTui: async (command, runtime) => {
75
75
  const hadArgs = !!command.args;
76
+ // Capture state BEFORE the call: when plan mode is already active,
77
+ // handlePlanModeCommand may exit it (on confirmed exit) or leave it on (on cancel
78
+ // or warning). In every "already active" case the typed args are NOT consumed,
79
+ // so preserve them in history regardless of the user's confirm/cancel choice.
80
+ const wasPlanModeEnabled = runtime.ctx.planModeEnabled;
76
81
  await runtime.ctx.handlePlanModeCommand(command.args || undefined);
77
- if (hadArgs && runtime.ctx.planModeEnabled) {
78
- // plan was already active — preserve the typed command in input history
82
+ if (hadArgs && wasPlanModeEnabled) {
79
83
  runtime.ctx.editor.addToHistory(command.text);
80
84
  }
81
85
  runtime.ctx.editor.setText("");
@@ -96,9 +100,10 @@ const BUILTIN_SLASH_COMMAND_REGISTRY: ReadonlyArray<SlashCommandSpec> = [
96
100
  allowArgs: true,
97
101
  handleTui: async (command, runtime) => {
98
102
  const hadArgs = !!command.args;
103
+ // Capture state BEFORE the call (see /plan above for rationale).
104
+ const wasGoalModeEnabled = runtime.ctx.goalModeEnabled;
99
105
  await runtime.ctx.handleGoalModeCommand(command.args || undefined);
100
- if (hadArgs && runtime.ctx.goalModeEnabled) {
101
- // goal was already active — preserve the typed command in input history
106
+ if (hadArgs && wasGoalModeEnabled) {
102
107
  runtime.ctx.editor.addToHistory(command.text);
103
108
  }
104
109
  runtime.ctx.editor.setText("");
@@ -49,6 +49,7 @@ import {
49
49
  TASK_SUBAGENT_EVENT_CHANNEL,
50
50
  TASK_SUBAGENT_LIFECYCLE_CHANNEL,
51
51
  TASK_SUBAGENT_PROGRESS_CHANNEL,
52
+ type TaskToolDetails,
52
53
  } from "./types";
53
54
 
54
55
  const MCP_CALL_TIMEOUT_MS = 60_000;
@@ -909,6 +910,11 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
909
910
  if (intent) {
910
911
  progress.lastIntent = intent;
911
912
  }
913
+ // Reset any prior in-flight task snapshot so we don't show stale
914
+ // nested progress when the agent enters a fresh `task` call.
915
+ if (event.toolName === "task") {
916
+ progress.inflightTaskDetails = undefined;
917
+ }
912
918
  break;
913
919
  }
914
920
 
@@ -927,6 +933,12 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
927
933
  progress.currentTool = undefined;
928
934
  progress.currentToolArgs = undefined;
929
935
  progress.currentToolStartMs = undefined;
936
+ // The finalized TaskToolDetails will be captured below into
937
+ // `extractedToolData.task`; drop the in-flight snapshot so the
938
+ // renderer doesn't double-count it against the final entry.
939
+ if (event.toolName === "task") {
940
+ progress.inflightTaskDetails = undefined;
941
+ }
930
942
 
931
943
  // Check for registered subagent tool handler
932
944
  const handler = subprocessToolRegistry.getHandler(event.toolName);
@@ -979,6 +991,23 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
979
991
  break;
980
992
  }
981
993
 
994
+ case "tool_execution_update": {
995
+ // Surface nested-subagent progress mid-flight. The child task
996
+ // tool emits incremental `onUpdate` calls carrying its current
997
+ // `TaskToolDetails` (results + progress); we stash the latest
998
+ // snapshot so the parent UI can render the in-flight subtree
999
+ // without waiting for the call to finish.
1000
+ if (event.toolName === "task") {
1001
+ const partial = (event as { partialResult?: { details?: unknown } }).partialResult;
1002
+ const details = partial && typeof partial === "object" ? partial.details : undefined;
1003
+ if (details && typeof details === "object" && "results" in (details as TaskToolDetails)) {
1004
+ progress.inflightTaskDetails = details as TaskToolDetails;
1005
+ flushProgress = true;
1006
+ }
1007
+ }
1008
+ break;
1009
+ }
1010
+
982
1011
  case "message_update": {
983
1012
  if (event.message?.role !== "assistant") break;
984
1013
  const assistantEvent = (
@@ -639,7 +639,8 @@ function renderAgentProgress(
639
639
  }
640
640
  }
641
641
 
642
- for (const [toolName, dataArray] of Object.entries(progress.extractedToolData)) {
642
+ for (const toolName in progress.extractedToolData) {
643
+ const dataArray = progress.extractedToolData[toolName];
643
644
  // Handle report_finding with tree formatting
644
645
  if (toolName === "report_finding") {
645
646
  const findings = normalizeReportFindings(dataArray);
@@ -649,6 +650,11 @@ function renderAgentProgress(
649
650
  continue;
650
651
  }
651
652
 
653
+ // Nested `task` data has its own dedicated tree renderer below that
654
+ // also merges in the in-flight snapshot — skip the generic inline
655
+ // path so we don't render twice.
656
+ if (toolName === "task") continue;
657
+
652
658
  const handler = subprocessToolRegistry.getHandler(toolName);
653
659
  if (handler?.renderInline) {
654
660
  const displayCount = expanded ? (dataArray as unknown[]).length : 3;
@@ -671,6 +677,20 @@ function renderAgentProgress(
671
677
  }
672
678
  }
673
679
 
680
+ // Nested `task` tree: completed sub-calls from `extractedToolData.task` plus
681
+ // the in-flight snapshot (if any). Surfacing this in the live view means
682
+ // the user sees deep-tree progress without waiting for this agent to finish
683
+ // its own turn.
684
+ const completedTaskCalls = (progress.extractedToolData?.task as TaskToolDetails[] | undefined) ?? [];
685
+ const inflight = progress.inflightTaskDetails;
686
+ if (completedTaskCalls.length > 0 || inflight) {
687
+ const snapshots = inflight ? [...completedTaskCalls, inflight] : completedTaskCalls;
688
+ const nestedLines = renderNestedTaskTree(snapshots, expanded, theme, spinnerFrame);
689
+ for (const line of nestedLines) {
690
+ lines.push(`${continuePrefix}${line}`);
691
+ }
692
+ }
693
+
674
694
  // Expanded view: recent output and tools
675
695
  if (expanded && progress.status === "running") {
676
696
  const output = progress.recentOutput.join("\n");
@@ -1067,6 +1087,38 @@ function renderNestedTaskResults(detailsList: TaskToolDetails[], expanded: boole
1067
1087
  return lines;
1068
1088
  }
1069
1089
 
1090
+ /**
1091
+ * Render a list of `TaskToolDetails` snapshots — completed (`results[]`) or
1092
+ * in-flight (`progress[]`) — as an interleaved tree. Used by the live progress
1093
+ * view to surface nested subagent activity while this agent is still running.
1094
+ */
1095
+ function renderNestedTaskTree(
1096
+ detailsList: TaskToolDetails[],
1097
+ expanded: boolean,
1098
+ theme: Theme,
1099
+ spinnerFrame?: number,
1100
+ ): string[] {
1101
+ const lines: string[] = [];
1102
+ for (const details of detailsList) {
1103
+ const hasResults = Boolean(details.results && details.results.length > 0);
1104
+ if (hasResults) {
1105
+ details.results.forEach((result, index) => {
1106
+ const isLast = index === details.results.length - 1;
1107
+ lines.push(...renderAgentResult(result, isLast, expanded, theme));
1108
+ });
1109
+ continue;
1110
+ }
1111
+ const inflight = details.progress;
1112
+ if (inflight && inflight.length > 0) {
1113
+ inflight.forEach((prog, index) => {
1114
+ const isLast = index === inflight.length - 1;
1115
+ lines.push(...renderAgentProgress(prog, isLast, expanded, theme, spinnerFrame));
1116
+ });
1117
+ }
1118
+ }
1119
+ return lines;
1120
+ }
1121
+
1070
1122
  subprocessToolRegistry.register<TaskToolDetails>("task", {
1071
1123
  extractData: event => {
1072
1124
  const details = event.result?.details;
package/src/task/types.ts CHANGED
@@ -236,6 +236,14 @@ export interface AgentProgress {
236
236
  attempt: number;
237
237
  errorMessage: string;
238
238
  };
239
+ /**
240
+ * Snapshot of the most recent `task` tool call's in-flight `TaskToolDetails`,
241
+ * captured from `tool_execution_update`. Lets the parent UI surface live
242
+ * nested-subagent progress while this agent is still inside its own `task`
243
+ * call. Cleared when the call ends — finalized data lives in
244
+ * `extractedToolData.task` after that.
245
+ */
246
+ inflightTaskDetails?: TaskToolDetails;
239
247
  }
240
248
 
241
249
  /** Result from a single agent execution */
@@ -180,7 +180,11 @@ function normalizeMixedSchemaNode(schema: unknown): unknown {
180
180
  }
181
181
 
182
182
  if (isJTDSchema(schema)) {
183
- return normalizeMixedSchemaNode(convertSchema(schema));
183
+ // `convertSchema` is itself fully recursive and emits pure JSON Schema, so
184
+ // re-walking the result with `normalizeMixedSchemaNode` is unnecessary and
185
+ // unsafe: it would treat user-named properties whose keys happen to be JTD
186
+ // keywords (e.g. `ref`, `elements`) as nested JTD forms (#1345).
187
+ return convertSchema(schema);
184
188
  }
185
189
 
186
190
  const normalized: Record<string, unknown> = {};