@oh-my-pi/pi-coding-agent 16.0.4 → 16.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +94 -0
  2. package/dist/cli.js +2027 -1396
  3. package/dist/types/advisor/advise-tool.d.ts +31 -19
  4. package/dist/types/autoresearch/tools/init-experiment.d.ts +13 -17
  5. package/dist/types/autoresearch/tools/log-experiment.d.ts +17 -19
  6. package/dist/types/autoresearch/tools/run-experiment.d.ts +3 -4
  7. package/dist/types/autoresearch/tools/update-notes.d.ts +4 -5
  8. package/dist/types/cli/args.d.ts +1 -0
  9. package/dist/types/cli/bench-cli.d.ts +6 -0
  10. package/dist/types/cli/ttsr-cli.d.ts +39 -0
  11. package/dist/types/commands/launch.d.ts +3 -0
  12. package/dist/types/commands/ttsr.d.ts +57 -0
  13. package/dist/types/commit/agentic/tools/analyze-file.d.ts +4 -5
  14. package/dist/types/commit/agentic/tools/git-file-diff.d.ts +4 -5
  15. package/dist/types/commit/agentic/tools/git-hunk.d.ts +5 -6
  16. package/dist/types/commit/agentic/tools/git-overview.d.ts +4 -5
  17. package/dist/types/commit/agentic/tools/propose-changelog.d.ts +23 -24
  18. package/dist/types/commit/agentic/tools/propose-commit.d.ts +11 -32
  19. package/dist/types/commit/agentic/tools/recent-commits.d.ts +3 -4
  20. package/dist/types/commit/agentic/tools/schemas.d.ts +6 -27
  21. package/dist/types/commit/agentic/tools/split-commit.d.ts +28 -49
  22. package/dist/types/commit/changelog/generate.d.ts +12 -13
  23. package/dist/types/commit/shared-llm.d.ts +10 -37
  24. package/dist/types/config/config-file.d.ts +4 -4
  25. package/dist/types/config/keybindings.d.ts +5 -0
  26. package/dist/types/config/models-config-schema.d.ts +625 -990
  27. package/dist/types/config/models-config.d.ts +229 -217
  28. package/dist/types/config/settings-schema.d.ts +144 -25
  29. package/dist/types/edit/hashline/params.d.ts +7 -11
  30. package/dist/types/edit/index.d.ts +2 -1
  31. package/dist/types/edit/modes/apply-patch.d.ts +4 -5
  32. package/dist/types/edit/modes/patch.d.ts +15 -24
  33. package/dist/types/edit/modes/replace.d.ts +16 -17
  34. package/dist/types/eval/js/index.d.ts +1 -0
  35. package/dist/types/extensibility/custom-commands/types.d.ts +6 -3
  36. package/dist/types/extensibility/custom-tools/types.d.ts +8 -5
  37. package/dist/types/extensibility/extensions/runner.d.ts +5 -2
  38. package/dist/types/extensibility/extensions/types.d.ts +14 -10
  39. package/dist/types/extensibility/hooks/types.d.ts +7 -4
  40. package/dist/types/extensibility/legacy-pi-ai-shim.d.ts +13 -5
  41. package/dist/types/extensibility/legacy-pi-coding-agent-shim.d.ts +17 -0
  42. package/dist/types/extensibility/shared-events.d.ts +22 -1
  43. package/dist/types/extensibility/typebox.d.ts +80 -58
  44. package/dist/types/goals/tools/goal-tool.d.ts +11 -24
  45. package/dist/types/index.d.ts +2 -0
  46. package/dist/types/lsp/index.d.ts +11 -26
  47. package/dist/types/lsp/types.d.ts +12 -28
  48. package/dist/types/main.d.ts +1 -0
  49. package/dist/types/mcp/client.d.ts +8 -0
  50. package/dist/types/modes/components/btw-panel.d.ts +1 -0
  51. package/dist/types/modes/components/custom-editor.d.ts +3 -1
  52. package/dist/types/modes/components/status-line/component.d.ts +1 -1
  53. package/dist/types/modes/components/status-line/context-thresholds.d.ts +0 -1
  54. package/dist/types/modes/controllers/btw-controller.d.ts +2 -0
  55. package/dist/types/modes/controllers/input-controller.d.ts +1 -0
  56. package/dist/types/modes/interactive-mode.d.ts +3 -0
  57. package/dist/types/modes/rpc/rpc-types.d.ts +1 -1
  58. package/dist/types/modes/setup-wizard/index.d.ts +1 -0
  59. package/dist/types/modes/setup-wizard/startup-splash.d.ts +7 -0
  60. package/dist/types/modes/theme/theme.d.ts +1 -1
  61. package/dist/types/modes/types.d.ts +3 -0
  62. package/dist/types/modes/utils/context-usage.d.ts +12 -0
  63. package/dist/types/sdk.d.ts +8 -1
  64. package/dist/types/session/agent-session.d.ts +24 -0
  65. package/dist/types/session/session-persistence.d.ts +4 -0
  66. package/dist/types/startup-splash.d.ts +12 -0
  67. package/dist/types/task/types.d.ts +47 -48
  68. package/dist/types/tools/ask.d.ts +26 -27
  69. package/dist/types/tools/ast-edit.d.ts +17 -17
  70. package/dist/types/tools/ast-grep.d.ts +12 -13
  71. package/dist/types/tools/bash.d.ts +20 -17
  72. package/dist/types/tools/browser.d.ts +46 -71
  73. package/dist/types/tools/checkpoint.d.ts +14 -15
  74. package/dist/types/tools/debug.d.ts +82 -145
  75. package/dist/types/tools/eval.d.ts +30 -40
  76. package/dist/types/tools/find.d.ts +17 -18
  77. package/dist/types/tools/gh.d.ts +49 -78
  78. package/dist/types/tools/image-gen.d.ts +20 -36
  79. package/dist/types/tools/inspect-image.d.ts +10 -11
  80. package/dist/types/tools/irc.d.ts +22 -33
  81. package/dist/types/tools/job.d.ts +11 -12
  82. package/dist/types/tools/learn.d.ts +21 -28
  83. package/dist/types/tools/manage-skill.d.ts +13 -22
  84. package/dist/types/tools/memory-edit.d.ts +15 -24
  85. package/dist/types/tools/memory-recall.d.ts +7 -8
  86. package/dist/types/tools/memory-reflect.d.ts +9 -10
  87. package/dist/types/tools/memory-retain.d.ts +13 -14
  88. package/dist/types/tools/read.d.ts +8 -8
  89. package/dist/types/tools/resolve.d.ts +11 -18
  90. package/dist/types/tools/review.d.ts +9 -15
  91. package/dist/types/tools/search-tool-bm25.d.ts +9 -10
  92. package/dist/types/tools/search.d.ts +16 -17
  93. package/dist/types/tools/ssh.d.ts +14 -15
  94. package/dist/types/tools/todo.d.ts +27 -43
  95. package/dist/types/tools/tts.d.ts +8 -9
  96. package/dist/types/tools/write.d.ts +9 -10
  97. package/dist/types/tui/code-cell.d.ts +2 -0
  98. package/dist/types/tui/index.d.ts +1 -0
  99. package/dist/types/tui/width-aware-text.d.ts +23 -0
  100. package/dist/types/utils/image-vision-fallback.d.ts +28 -0
  101. package/dist/types/utils/markit.d.ts +10 -1
  102. package/dist/types/web/search/index.d.ts +17 -28
  103. package/dist/types/web/search/providers/base.d.ts +1 -0
  104. package/dist/types/web/search/providers/gemini.d.ts +1 -0
  105. package/dist/types/web/search/providers/perplexity.d.ts +0 -2
  106. package/dist/types/web/search/types.d.ts +32 -26
  107. package/package.json +14 -13
  108. package/scripts/omp +1 -1
  109. package/src/advisor/__tests__/advisor.test.ts +103 -1
  110. package/src/advisor/advise-tool.ts +47 -11
  111. package/src/autoresearch/tools/init-experiment.ts +13 -16
  112. package/src/autoresearch/tools/log-experiment.ts +15 -18
  113. package/src/autoresearch/tools/run-experiment.ts +3 -3
  114. package/src/autoresearch/tools/update-notes.ts +4 -4
  115. package/src/cli/args.ts +1 -0
  116. package/src/cli/bench-cli.ts +30 -7
  117. package/src/cli/flag-tables.ts +8 -0
  118. package/src/cli/ttsr-cli.ts +995 -0
  119. package/src/cli-commands.ts +1 -0
  120. package/src/cli.ts +7 -1
  121. package/src/collab/host.ts +2 -2
  122. package/src/commands/launch.ts +3 -0
  123. package/src/commands/ttsr.ts +125 -0
  124. package/src/commit/agentic/tools/analyze-file.ts +4 -4
  125. package/src/commit/agentic/tools/git-file-diff.ts +4 -4
  126. package/src/commit/agentic/tools/git-hunk.ts +7 -5
  127. package/src/commit/agentic/tools/git-overview.ts +4 -4
  128. package/src/commit/agentic/tools/propose-changelog.ts +18 -15
  129. package/src/commit/agentic/tools/propose-commit.ts +6 -6
  130. package/src/commit/agentic/tools/recent-commits.ts +3 -3
  131. package/src/commit/agentic/tools/schemas.ts +8 -20
  132. package/src/commit/agentic/tools/split-commit.ts +19 -23
  133. package/src/commit/analysis/summary.ts +7 -5
  134. package/src/commit/changelog/generate.ts +15 -11
  135. package/src/commit/shared-llm.ts +17 -24
  136. package/src/config/config-file.ts +13 -15
  137. package/src/config/keybindings.ts +6 -0
  138. package/src/config/models-config-schema.ts +206 -179
  139. package/src/config/settings-schema.ts +118 -2
  140. package/src/discovery/builtin-rules/index.ts +2 -0
  141. package/src/discovery/builtin-rules/ts-import-type.md +2 -2
  142. package/src/discovery/builtin-rules/ts-no-any.md +11 -2
  143. package/src/discovery/builtin-rules/ts-no-inline-cast-access.md +55 -0
  144. package/src/edit/hashline/params.ts +12 -11
  145. package/src/edit/index.ts +5 -4
  146. package/src/edit/modes/apply-patch.ts +4 -4
  147. package/src/edit/modes/patch.ts +15 -18
  148. package/src/edit/modes/replace.ts +13 -17
  149. package/src/edit/renderer.ts +0 -1
  150. package/src/eval/agent-bridge.ts +11 -13
  151. package/src/eval/completion-bridge.ts +25 -17
  152. package/src/eval/js/context-manager.ts +17 -2
  153. package/src/eval/js/index.ts +1 -1
  154. package/src/eval/py/executor.ts +2 -2
  155. package/src/eval/py/runner.py +44 -0
  156. package/src/extensibility/custom-commands/loader.ts +5 -3
  157. package/src/extensibility/custom-commands/types.ts +6 -3
  158. package/src/extensibility/custom-tools/loader.ts +4 -2
  159. package/src/extensibility/custom-tools/types.ts +8 -5
  160. package/src/extensibility/extensions/loader.ts +4 -2
  161. package/src/extensibility/extensions/runner.ts +20 -2
  162. package/src/extensibility/extensions/types.ts +22 -8
  163. package/src/extensibility/hooks/loader.ts +5 -2
  164. package/src/extensibility/hooks/types.ts +7 -4
  165. package/src/extensibility/legacy-pi-ai-shim.ts +42 -5
  166. package/src/extensibility/legacy-pi-coding-agent-shim.ts +113 -0
  167. package/src/extensibility/plugins/legacy-pi-compat.ts +13 -13
  168. package/src/extensibility/shared-events.ts +24 -0
  169. package/src/extensibility/tool-proxy.ts +4 -1
  170. package/src/extensibility/typebox.ts +778 -251
  171. package/src/goals/guided-setup.ts +12 -3
  172. package/src/goals/tools/goal-tool.ts +6 -6
  173. package/src/index.ts +2 -0
  174. package/src/internal-urls/docs-index.generated.ts +15 -13
  175. package/src/lsp/types.ts +13 -27
  176. package/src/main.ts +29 -21
  177. package/src/mcp/client.ts +38 -13
  178. package/src/mcp/render.ts +102 -89
  179. package/src/modes/components/agent-hub.ts +11 -4
  180. package/src/modes/components/branch-summary-message.ts +1 -0
  181. package/src/modes/components/btw-panel.ts +5 -1
  182. package/src/modes/components/collab-prompt-message.ts +9 -7
  183. package/src/modes/components/compaction-summary-message.ts +1 -0
  184. package/src/modes/components/custom-editor.ts +18 -0
  185. package/src/modes/components/custom-message.ts +1 -0
  186. package/src/modes/components/footer.ts +6 -5
  187. package/src/modes/components/hook-message.ts +1 -0
  188. package/src/modes/components/read-tool-group.ts +9 -3
  189. package/src/modes/components/skill-message.ts +1 -0
  190. package/src/modes/components/status-line/component.ts +139 -15
  191. package/src/modes/components/status-line/context-thresholds.ts +0 -1
  192. package/src/modes/components/todo-reminder.ts +1 -0
  193. package/src/modes/components/tool-execution.ts +17 -10
  194. package/src/modes/components/ttsr-notification.ts +1 -0
  195. package/src/modes/components/user-message.ts +6 -6
  196. package/src/modes/controllers/btw-controller.ts +69 -1
  197. package/src/modes/controllers/event-controller.ts +2 -7
  198. package/src/modes/controllers/input-controller.ts +29 -0
  199. package/src/modes/controllers/selector-controller.ts +10 -3
  200. package/src/modes/interactive-mode.ts +42 -10
  201. package/src/modes/rpc/rpc-types.ts +1 -1
  202. package/src/modes/setup-wizard/index.ts +1 -0
  203. package/src/modes/setup-wizard/scenes/sign-in.ts +77 -5
  204. package/src/modes/setup-wizard/startup-splash.ts +107 -0
  205. package/src/modes/theme/theme.ts +133 -143
  206. package/src/modes/types.ts +3 -0
  207. package/src/modes/utils/context-usage.ts +37 -20
  208. package/src/modes/utils/hotkeys-markdown.ts +1 -0
  209. package/src/prompts/system/system-prompt.md +1 -0
  210. package/src/prompts/tools/image-attachment-describe-system.md +8 -0
  211. package/src/prompts/tools/image-attachment-describe.md +10 -0
  212. package/src/sdk.ts +35 -22
  213. package/src/session/agent-session.ts +715 -255
  214. package/src/session/session-history-format.ts +11 -2
  215. package/src/session/session-loader.ts +19 -32
  216. package/src/session/session-persistence.ts +27 -11
  217. package/src/session/snapcompact-inline.ts +1 -1
  218. package/src/slash-commands/builtin-registry.ts +4 -11
  219. package/src/ssh/connection-manager.ts +3 -2
  220. package/src/startup-splash.ts +19 -0
  221. package/src/task/executor.ts +12 -7
  222. package/src/task/types.ts +44 -41
  223. package/src/tool-discovery/tool-index.ts +17 -4
  224. package/src/tools/ask.ts +14 -14
  225. package/src/tools/ast-edit.ts +17 -14
  226. package/src/tools/ast-grep.ts +10 -9
  227. package/src/tools/bash.ts +15 -10
  228. package/src/tools/browser/launch.ts +13 -0
  229. package/src/tools/browser.ts +26 -32
  230. package/src/tools/checkpoint.ts +7 -7
  231. package/src/tools/debug.ts +72 -69
  232. package/src/tools/eval.ts +18 -19
  233. package/src/tools/find.ts +20 -13
  234. package/src/tools/gh.ts +29 -49
  235. package/src/tools/image-gen.ts +94 -57
  236. package/src/tools/inspect-image.ts +8 -9
  237. package/src/tools/irc.ts +12 -12
  238. package/src/tools/job.ts +6 -6
  239. package/src/tools/learn.ts +11 -14
  240. package/src/tools/manage-skill.ts +19 -23
  241. package/src/tools/memory-edit.ts +8 -8
  242. package/src/tools/memory-recall.ts +4 -4
  243. package/src/tools/memory-reflect.ts +5 -5
  244. package/src/tools/memory-retain.ts +9 -11
  245. package/src/tools/puppeteer/02_stealth_hairline.txt +1 -1
  246. package/src/tools/puppeteer/04_stealth_iframe.txt +4 -4
  247. package/src/tools/puppeteer/05_stealth_webgl.txt +1 -1
  248. package/src/tools/puppeteer/10_stealth_plugins.txt +6 -4
  249. package/src/tools/puppeteer/12_stealth_codecs.txt +2 -2
  250. package/src/tools/puppeteer/13_stealth_worker.txt +1 -1
  251. package/src/tools/read.ts +197 -19
  252. package/src/tools/report-tool-issue.ts +6 -6
  253. package/src/tools/resolve.ts +6 -6
  254. package/src/tools/review.ts +10 -12
  255. package/src/tools/search-tool-bm25.ts +5 -5
  256. package/src/tools/search.ts +20 -29
  257. package/src/tools/ssh.ts +8 -8
  258. package/src/tools/todo.ts +16 -19
  259. package/src/tools/tts.ts +16 -15
  260. package/src/tools/write.ts +5 -5
  261. package/src/tui/code-cell.ts +44 -3
  262. package/src/tui/index.ts +1 -0
  263. package/src/tui/width-aware-text.ts +58 -0
  264. package/src/utils/image-vision-fallback.ts +197 -0
  265. package/src/utils/markit.ts +17 -2
  266. package/src/web/search/index.ts +21 -9
  267. package/src/web/search/providers/base.ts +1 -0
  268. package/src/web/search/providers/gemini.ts +56 -18
  269. package/src/web/search/providers/perplexity.ts +373 -126
  270. package/src/web/search/types.ts +28 -48
@@ -18,6 +18,7 @@ import * as os from "node:os";
18
18
  import * as path from "node:path";
19
19
  import { scheduler } from "node:timers/promises";
20
20
  import { isPromise } from "node:util/types";
21
+
21
22
  import type { InMemorySnapshotStore } from "@oh-my-pi/hashline";
22
23
  import {
23
24
  type AfterToolCallContext,
@@ -31,11 +32,11 @@ import {
31
32
  AppendOnlyContextManager,
32
33
  type AsideMessage,
33
34
  type CompactionSummaryMessage,
35
+ countTokens,
34
36
  resolveTelemetry,
35
37
  STREAM_INTERRUPTED_AFTER_CONTENT_STOP_DETAIL,
36
38
  ThinkingLevel,
37
39
  } from "@oh-my-pi/pi-agent-core";
38
-
39
40
  import {
40
41
  AGGRESSIVE_SHAKE_CONFIG,
41
42
  AUTO_HANDOFF_THRESHOLD_FOCUS,
@@ -103,7 +104,7 @@ import {
103
104
  } from "@oh-my-pi/pi-ai";
104
105
  import { getSupportedEfforts } from "@oh-my-pi/pi-catalog/model-thinking";
105
106
  import { modelsAreEqual } from "@oh-my-pi/pi-catalog/models";
106
- import { countTokens, MacOSPowerAssertion } from "@oh-my-pi/pi-natives";
107
+ import { MacOSPowerAssertion } from "@oh-my-pi/pi-natives";
107
108
  import {
108
109
  extractRetryHint,
109
110
  formatDuration,
@@ -126,6 +127,8 @@ import {
126
127
  AdvisorRuntime,
127
128
  type AdvisorSeverity,
128
129
  formatAdvisorBatchContent,
130
+ isAdvisorInterruptImmuneTurnActive,
131
+ isInterruptingSeverity,
129
132
  resolveAdvisorDeliveryChannel,
130
133
  } from "../advisor";
131
134
  import { type AsyncJob, type AsyncJobDeliveryState, AsyncJobManager } from "../async";
@@ -147,7 +150,7 @@ import {
147
150
  resolveModelRoleValue,
148
151
  resolveRoleSelection,
149
152
  } from "../config/model-resolver";
150
- import { MODEL_ROLE_IDS } from "../config/model-roles";
153
+ import { MODEL_ROLE_IDS, MODEL_ROLES } from "../config/model-roles";
151
154
  import { expandPromptTemplate, type PromptTemplate } from "../config/prompt-templates";
152
155
  import type { Settings, SkillsSettings } from "../config/settings";
153
156
  import { onAppendOnlyModeChanged } from "../config/settings";
@@ -178,6 +181,7 @@ import type {
178
181
  SessionBeforeCompactResult,
179
182
  SessionBeforeSwitchResult,
180
183
  SessionBeforeTreeResult,
184
+ SessionStopEventResult,
181
185
  ToolExecutionEndEvent,
182
186
  ToolExecutionStartEvent,
183
187
  ToolExecutionUpdateEvent,
@@ -202,7 +206,7 @@ import { containsOrchestrate, ORCHESTRATE_NOTICE } from "../modes/orchestrate";
202
206
  import { getCurrentThemeName, theme } from "../modes/theme/theme";
203
207
  import { parseTurnBudget } from "../modes/turn-budget";
204
208
  import { containsUltrathink, ULTRATHINK_NOTICE } from "../modes/ultrathink";
205
- import { computeNonMessageTokens } from "../modes/utils/context-usage";
209
+ import { computeNonMessageBreakdown, computeNonMessageTokens } from "../modes/utils/context-usage";
206
210
  import { containsWorkflow, WORKFLOW_NOTICE } from "../modes/workflow";
207
211
  import { createPlanReadMatcher } from "../plan-mode/plan-protection";
208
212
  import type { PlanModeState } from "../plan-mode/state";
@@ -261,6 +265,7 @@ import { type EditMode, resolveEditMode } from "../utils/edit-mode";
261
265
  import { resolveFileDisplayMode } from "../utils/file-display-mode";
262
266
  import { extractFileMentions, generateFileMentionMessages } from "../utils/file-mentions";
263
267
  import { normalizeModelContextImages } from "../utils/image-loading";
268
+ import { describeAttachedImagesForTextModel } from "../utils/image-vision-fallback";
264
269
  import { buildNamedToolChoice, isToolChoiceActive } from "../utils/tool-choice";
265
270
  import type { AuthStorage } from "./auth-storage";
266
271
  import type { ClientBridge, ClientBridgePermissionOption, ClientBridgePermissionOutcome } from "./client-bridge";
@@ -295,6 +300,8 @@ import { ToolChoiceQueue } from "./tool-choice-queue";
295
300
  import { classifyUnexpectedStop, isUnexpectedStopCandidate } from "./unexpected-stop-classifier";
296
301
  import { YieldQueue } from "./yield-queue";
297
302
 
303
+ const SESSION_STOP_CONTINUATION_CAP = 8;
304
+
298
305
  /** Session-specific events that extend the core AgentEvent */
299
306
  export type AgentSessionEvent =
300
307
  | AgentEvent
@@ -338,6 +345,24 @@ const UNEXPECTED_STOP_MAX_RETRIES = 3;
338
345
  const UNEXPECTED_STOP_TIMEOUT_MS = 4000;
339
346
  const EMPTY_STOP_MAX_RETRIES = 3;
340
347
  const RETRY_BACKOFF_MAX_DELAY_MS = 8_000;
348
+
349
+ type CompactionCheckResult = Readonly<{
350
+ deferredHandoff: boolean;
351
+ continuationScheduled: boolean;
352
+ }>;
353
+
354
+ const COMPACTION_CHECK_NONE: CompactionCheckResult = {
355
+ deferredHandoff: false,
356
+ continuationScheduled: false,
357
+ };
358
+ const COMPACTION_CHECK_DEFERRED_HANDOFF: CompactionCheckResult = {
359
+ deferredHandoff: true,
360
+ continuationScheduled: true,
361
+ };
362
+ const COMPACTION_CHECK_CONTINUATION: CompactionCheckResult = {
363
+ deferredHandoff: false,
364
+ continuationScheduled: true,
365
+ };
341
366
  export type CommandMetadataChangedListener = () => void | Promise<void>;
342
367
  export type AsyncJobSnapshotItem = Pick<AsyncJob, "id" | "type" | "status" | "label" | "startTime">;
343
368
 
@@ -555,6 +580,17 @@ export interface RoleModelCycle {
555
580
  currentIndex: number;
556
581
  }
557
582
 
583
+ export interface ContextUsageBreakdown {
584
+ contextWindow: number;
585
+ anchored: boolean;
586
+ usedTokens: number;
587
+ systemPromptTokens: number;
588
+ systemToolsTokens: number;
589
+ systemContextTokens: number;
590
+ skillsTokens: number;
591
+ messagesTokens: number;
592
+ }
593
+
558
594
  /** Session statistics for /session command */
559
595
  export interface SessionStats {
560
596
  sessionFile: string | undefined;
@@ -976,6 +1012,10 @@ const MAGIC_KEYWORD_NOTICE_TYPES: ReadonlySet<string> = new Set([
976
1012
  "workflow-notice",
977
1013
  ]);
978
1014
 
1015
+ /** Custom-message type of the hidden companion carrying vision descriptions of image
1016
+ * attachments sent to a text-only model (see `#buildImageDescriptionNotice`). */
1017
+ const IMAGE_ATTACHMENT_DESCRIPTION_TYPE = "image-attachment-description";
1018
+
979
1019
  /**
980
1020
  * A hidden, user-attributed companion of a queued user prompt: the magic-keyword
981
1021
  * notices (`ultrathink`/`orchestrate`/`workflow`) enqueued alongside the user
@@ -989,7 +1029,7 @@ function isHiddenUserCompanion(message: AgentMessage): boolean {
989
1029
  message.role === "custom" &&
990
1030
  message.attribution === "user" &&
991
1031
  message.display === false &&
992
- MAGIC_KEYWORD_NOTICE_TYPES.has(message.customType)
1032
+ (MAGIC_KEYWORD_NOTICE_TYPES.has(message.customType) || message.customType === IMAGE_ATTACHMENT_DESCRIPTION_TYPE)
993
1033
  );
994
1034
  }
995
1035
 
@@ -1044,6 +1084,8 @@ export class AgentSession {
1044
1084
  * suppresses advisor concern/blocker auto-resume until the user next resumes.
1045
1085
  * Advisor advice is still recorded into the transcript, just not auto-run. */
1046
1086
  #advisorAutoResumeSuppressed = false;
1087
+ #advisorPrimaryTurnsCompleted = 0;
1088
+ #advisorInterruptImmuneTurnStart: number | undefined;
1047
1089
  #planModeState: PlanModeState | undefined;
1048
1090
  #goalModeState: GoalModeState | undefined;
1049
1091
  #goalRuntime: GoalRuntime;
@@ -1224,15 +1266,20 @@ export class AgentSession {
1224
1266
  #unexpectedStopRetryCount = 0;
1225
1267
  #promptGeneration = 0;
1226
1268
  #pendingAgentEndEmit: AgentSessionEvent | undefined;
1227
- #pendingProviderRequestNonMessageTokens: number | undefined = undefined;
1228
- #lastProviderUsageNonMessage:
1269
+ #pendingContextSnapshot:
1229
1270
  | {
1230
- provider: AssistantMessage["provider"];
1231
- model: AssistantMessage["model"];
1232
- timestamp: AssistantMessage["timestamp"];
1233
- tokens: number;
1271
+ promptTokens: number;
1272
+ nonMessageTokens: number;
1273
+ cutoffCount: number;
1234
1274
  }
1235
- | undefined;
1275
+ | undefined = undefined;
1276
+ #sessionStopContinuationCount = 0;
1277
+ #sessionStopHookActive = false;
1278
+ // Bumped whenever the pending in-flight snapshot is set/cleared. The
1279
+ // status-line context memo includes this so clearing the snapshot on
1280
+ // turn-end/abort invalidates the cache even though the message list is
1281
+ // unchanged — otherwise a mid-turn estimate would survive into idle.
1282
+ #contextUsageRevision = 0;
1236
1283
  #obfuscator: SecretObfuscator | undefined;
1237
1284
  #checkpointState: CheckpointState | undefined = undefined;
1238
1285
  #pendingRewindReport: string | undefined = undefined;
@@ -1476,6 +1523,7 @@ export class AgentSession {
1476
1523
  this.agent.setRawSseEventInterceptor(this.#onSseEvent);
1477
1524
  this.agent.setOnTurnEnd(async (messages, signal) => {
1478
1525
  if (signal?.aborted) return;
1526
+ this.#advisorPrimaryTurnsCompleted++;
1479
1527
  if (this.#advisorRuntime && !this.#advisorRuntime.disposed) {
1480
1528
  this.#advisorRuntime.onTurnEnd(messages);
1481
1529
  const syncBacklog = this.settings.get("advisor.syncBacklog");
@@ -1608,6 +1656,27 @@ export class AgentSession {
1608
1656
  // -------------------------------------------------------------------------
1609
1657
  // Advisor runtime lifecycle
1610
1658
  // -------------------------------------------------------------------------
1659
+ #advisorImmuneTurnLimit(): number {
1660
+ const immuneTurns = this.settings.get("advisor.immuneTurns") as number;
1661
+ if (!Number.isFinite(immuneTurns) || immuneTurns <= 0) return 0;
1662
+ return Math.trunc(immuneTurns);
1663
+ }
1664
+
1665
+ #isAdvisorInterruptImmuneTurnActive(): boolean {
1666
+ return isAdvisorInterruptImmuneTurnActive({
1667
+ completedTurns: this.#advisorPrimaryTurnsCompleted,
1668
+ immuneTurnStart: this.#advisorInterruptImmuneTurnStart,
1669
+ immuneTurns: this.#advisorImmuneTurnLimit(),
1670
+ });
1671
+ }
1672
+
1673
+ // The next primary turn number starts the immune-turn window. While the
1674
+ // interrupting steer is still in flight, completedTurns is lower than this
1675
+ // start, so duplicate concern/blocker advice is also downgraded.
1676
+ #recordAdvisorInterruptDelivered(): void {
1677
+ this.#advisorInterruptImmuneTurnStart = this.#advisorPrimaryTurnsCompleted + 1;
1678
+ }
1679
+
1611
1680
  #buildAdvisorRuntime(seedToCurrent = false): boolean {
1612
1681
  if (this.#isDisposed) return false;
1613
1682
  if (this.#advisorRuntime) return true;
@@ -1637,6 +1706,7 @@ export class AgentSession {
1637
1706
  // strand the advice and dump the backlog as one burst at the next prompt. A
1638
1707
  // plain nit always rides the non-interrupting YieldQueue aside.
1639
1708
  const enqueueAdvice = (note: string, severity?: AdvisorSeverity) => {
1709
+ const interrupting = isInterruptingSeverity(severity);
1640
1710
  const channel = resolveAdvisorDeliveryChannel({
1641
1711
  severity,
1642
1712
  autoResumeSuppressed: this.#advisorAutoResumeSuppressed,
@@ -1647,6 +1717,7 @@ export class AgentSession {
1647
1717
  // auto-resume it despite the user's interrupt.
1648
1718
  streaming: this.agent.state.isStreaming,
1649
1719
  aborting: this.#abortInProgress,
1720
+ interruptImmuneTurnActive: interrupting && this.#isAdvisorInterruptImmuneTurnActive(),
1650
1721
  });
1651
1722
  if (channel === "aside") {
1652
1723
  this.yieldQueue.enqueue("advisor", { note, severity });
@@ -1667,6 +1738,7 @@ export class AgentSession {
1667
1738
  });
1668
1739
  return;
1669
1740
  }
1741
+ this.#recordAdvisorInterruptDelivered();
1670
1742
  void this.sendCustomMessage(
1671
1743
  { customType: "advisor", content, display: true, attribution: "agent", details },
1672
1744
  { deliverAs: "steer", triggerTurn: true },
@@ -1682,6 +1754,25 @@ export class AgentSession {
1682
1754
  if (this.#advisorWatchdogPrompt) {
1683
1755
  systemPrompt.push(this.#advisorWatchdogPrompt);
1684
1756
  }
1757
+ const advisorSessionId = this.sessionId ? `${this.sessionId}-advisor` : undefined;
1758
+
1759
+ // Thread the primary's telemetry into the advisor loop so the advisor
1760
+ // model's GenAI spans + usage/cost hooks fire like every other model call,
1761
+ // stamped with the advisor's own identity. `conversationId` is cleared so
1762
+ // the advisor loop falls back to its own `-advisor` session id for
1763
+ // `gen_ai.conversation.id` instead of inheriting the primary's
1764
+ // conversation; undefined telemetry stays undefined (zero-overhead no-op).
1765
+ const advisorTelemetry = this.agent.telemetry
1766
+ ? {
1767
+ ...this.agent.telemetry,
1768
+ agent: {
1769
+ id: advisorSessionId,
1770
+ name: MODEL_ROLES.advisor.name,
1771
+ description: formatModelString(advisorSel.model),
1772
+ },
1773
+ conversationId: undefined,
1774
+ }
1775
+ : undefined;
1685
1776
  const advisorAgent = new Agent({
1686
1777
  initialState: {
1687
1778
  systemPrompt,
@@ -1690,16 +1781,10 @@ export class AgentSession {
1690
1781
  tools: [adviseTool, ...advisorReadOnlyTools],
1691
1782
  },
1692
1783
  appendOnlyContext,
1693
- sessionId: this.sessionId ? `${this.sessionId}-advisor` : undefined,
1694
- getApiKey: async provider => {
1695
- const key = await this.#modelRegistry.getApiKeyForProvider(
1696
- provider,
1697
- this.sessionId ? `${this.sessionId}-advisor` : undefined,
1698
- );
1699
- if (!key) throw new Error(`No API key for advisor provider "${provider}"`);
1700
- return key;
1701
- },
1784
+ sessionId: advisorSessionId,
1785
+ getApiKey: requestModel => this.#modelRegistry.resolver(requestModel, advisorSessionId),
1702
1786
  intentTracing: false,
1787
+ telemetry: advisorTelemetry,
1703
1788
  });
1704
1789
  advisorAgent.setDisableReasoning(shouldDisableReasoning(advisorThinkingLevel));
1705
1790
 
@@ -1875,24 +1960,26 @@ export class AgentSession {
1875
1960
 
1876
1961
  let compactResult: CompactionResult | undefined;
1877
1962
  let lastError: unknown;
1963
+ const advisorSessionId = this.sessionId ? `${this.sessionId}-advisor` : undefined;
1964
+ // Instrument the advisor's overflow-compaction one-shot like the primary
1965
+ // compaction path so the advisor model's maintenance call also emits spans.
1966
+ const telemetry = resolveTelemetry(advisor.telemetry, advisorSessionId);
1878
1967
 
1879
1968
  for (const candidate of candidates) {
1880
- const apiKey = await this.#modelRegistry.getApiKey(
1881
- candidate,
1882
- this.sessionId ? `${this.sessionId}-advisor` : undefined,
1883
- );
1969
+ const apiKey = await this.#modelRegistry.getApiKey(candidate, advisorSessionId);
1884
1970
  if (!apiKey) continue;
1885
1971
 
1886
1972
  try {
1887
1973
  compactResult = await compact(
1888
1974
  preparation,
1889
1975
  candidate,
1890
- this.#modelRegistry.resolver(candidate, this.sessionId ? `${this.sessionId}-advisor` : undefined),
1976
+ this.#modelRegistry.resolver(candidate, advisorSessionId),
1891
1977
  undefined,
1892
1978
  undefined,
1893
1979
  {
1894
1980
  thinkingLevel: advisorCompactionThinkingLevel,
1895
1981
  convertToLlm: messages => this.#convertToLlmForSideRequest(messages),
1982
+ telemetry,
1896
1983
  },
1897
1984
  );
1898
1985
  break;
@@ -2354,6 +2441,15 @@ export class AgentSession {
2354
2441
  event.message.role === "fileMention"
2355
2442
  ) {
2356
2443
  // Regular LLM message - persist as SessionMessageEntry
2444
+ if (event.message.role === "assistant") {
2445
+ const assistantMsg = event.message as AssistantMessage;
2446
+ if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
2447
+ assistantMsg.contextSnapshot = {
2448
+ promptTokens: calculatePromptTokens(assistantMsg.usage),
2449
+ nonMessageTokens: this.#pendingContextSnapshot?.nonMessageTokens ?? computeNonMessageTokens(this),
2450
+ };
2451
+ }
2452
+ }
2357
2453
  this.sessionManager.appendMessage(event.message);
2358
2454
  }
2359
2455
  // Other message types (bashExecution, compactionSummary, branchSummary) are persisted elsewhere
@@ -2362,14 +2458,6 @@ export class AgentSession {
2362
2458
  if (event.message.role === "assistant") {
2363
2459
  this.#lastAssistantMessage = event.message;
2364
2460
  const assistantMsg = event.message as AssistantMessage;
2365
- if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
2366
- this.#lastProviderUsageNonMessage = {
2367
- provider: assistantMsg.provider,
2368
- model: assistantMsg.model,
2369
- timestamp: assistantMsg.timestamp,
2370
- tokens: this.#pendingProviderRequestNonMessageTokens ?? computeNonMessageTokens(this),
2371
- };
2372
- }
2373
2461
  const currentGrantsAnthropicPriority =
2374
2462
  this.serviceTier === "priority" || this.serviceTier === "claude-only";
2375
2463
  if (assistantMsg.disabledFeatures?.includes("priority") && currentGrantsAnthropicPriority) {
@@ -2412,7 +2500,6 @@ export class AgentSession {
2412
2500
  this.#retryAttempt = 0;
2413
2501
  }
2414
2502
  }
2415
-
2416
2503
  if (event.message.role === "toolResult") {
2417
2504
  const { toolName, details, isError, content } = event.message as {
2418
2505
  toolName?: string;
@@ -2472,6 +2559,9 @@ export class AgentSession {
2472
2559
 
2473
2560
  // Check auto-retry and auto-compaction after agent completes
2474
2561
  if (event.type === "agent_end") {
2562
+ const emitAgentEndNotification = async () => {
2563
+ await this.#emitAgentEndNotification(event.messages);
2564
+ };
2475
2565
  const usage = this.getSessionStats().tokens;
2476
2566
  await this.#goalRuntime.onAgentEnd({
2477
2567
  currentUsage: {
@@ -2488,6 +2578,7 @@ export class AgentSession {
2488
2578
  this.#lastAssistantMessage = undefined;
2489
2579
  if (!msg) {
2490
2580
  this.#lastSuccessfulYieldToolCallId = undefined;
2581
+ await emitAgentEndNotification();
2491
2582
  return;
2492
2583
  }
2493
2584
 
@@ -2504,60 +2595,81 @@ export class AgentSession {
2504
2595
  if (this.#skipPostTurnMaintenanceAssistantTimestamp === msg.timestamp) {
2505
2596
  this.#skipPostTurnMaintenanceAssistantTimestamp = undefined;
2506
2597
  this.#lastSuccessfulYieldToolCallId = undefined;
2598
+ await emitAgentEndNotification();
2507
2599
  return;
2508
2600
  }
2509
2601
 
2510
2602
  if (this.#assistantEndedWithSuccessfulYield(msg)) {
2511
2603
  this.#lastSuccessfulYieldToolCallId = undefined;
2604
+ await emitAgentEndNotification();
2512
2605
  return;
2513
2606
  }
2514
2607
  this.#lastSuccessfulYieldToolCallId = undefined;
2515
2608
 
2516
2609
  if (await this.#handleEmptyAssistantStop(msg)) {
2610
+ await emitAgentEndNotification();
2517
2611
  return;
2518
2612
  }
2519
2613
  if (await this.#handleUnexpectedAssistantStop(msg)) {
2614
+ await emitAgentEndNotification();
2520
2615
  return;
2521
2616
  }
2522
2617
 
2523
2618
  if (this.#isRetryableReasonlessAbort(msg)) {
2524
2619
  const didRetry = await this.#handleRetryableError(msg, { allowModelFallback: false });
2525
- if (didRetry) return;
2620
+ if (didRetry) {
2621
+ await emitAgentEndNotification();
2622
+ return;
2623
+ }
2526
2624
  }
2527
2625
 
2528
2626
  // A deliberate abort should settle the current turn, not trigger queued continuations.
2529
2627
  if (msg.stopReason === "aborted") {
2530
2628
  this.#resolveRetry();
2629
+ this.#resetSessionStopContinuationState();
2630
+ await emitAgentEndNotification();
2531
2631
  return;
2532
2632
  }
2533
2633
  // Check for retryable errors first (overloaded, rate limit, server errors)
2534
2634
  if (this.#isRetryableError(msg)) {
2535
2635
  const didRetry = await this.#handleRetryableError(msg);
2536
- if (didRetry) return; // Retry was initiated, don't proceed to compaction
2636
+ if (didRetry) {
2637
+ await emitAgentEndNotification();
2638
+ return;
2639
+ }
2537
2640
  }
2538
2641
  this.#resolveRetry();
2539
2642
 
2540
2643
  const compactionTask = this.#checkCompaction(msg);
2541
2644
  this.#trackPostPromptTask(compactionTask);
2542
- const compactionDeferredHandoff = await compactionTask;
2645
+ const compactionResult = await compactionTask;
2543
2646
  // Check for incomplete todos only after a final assistant stop, not intermediate tool-use turns.
2544
2647
  const hasToolCalls = msg.content.some(content => content.type === "toolCall");
2545
2648
  if (hasToolCalls) {
2649
+ await emitAgentEndNotification();
2546
2650
  return;
2547
2651
  }
2548
- // When checkCompaction scheduled a deferred handoff, skip the rewind/todo passes:
2549
- // any reminder we append here would race the handoff's session reset, and
2550
- // #scheduleAgentContinue would start a fresh streaming turn alongside the handoff
2551
- // LLM call (visible as "Auto-handoff" loader + an assistant message still streaming).
2552
- if (compactionDeferredHandoff) {
2652
+ // When compaction queued recovery, skip the rewind/todo/session_stop passes:
2653
+ // any reminder or hook continuation we append here would race the handoff,
2654
+ // retry, auto-continue prompt, or queued-message drain that already owns the
2655
+ // next turn.
2656
+ if (compactionResult.deferredHandoff || compactionResult.continuationScheduled) {
2657
+ await emitAgentEndNotification();
2553
2658
  return;
2554
2659
  }
2555
2660
  if (msg.stopReason !== "error") {
2556
2661
  if (this.#enforceRewindBeforeYield()) {
2662
+ await emitAgentEndNotification();
2663
+ return;
2664
+ }
2665
+ const todoContinuationScheduled = await this.#checkTodoCompletion();
2666
+ if (todoContinuationScheduled) {
2667
+ await emitAgentEndNotification();
2557
2668
  return;
2558
2669
  }
2559
- await this.#checkTodoCompletion();
2560
2670
  }
2671
+ await this.#emitSessionStopEvent(event.messages);
2672
+ await emitAgentEndNotification();
2561
2673
  }
2562
2674
  };
2563
2675
 
@@ -3513,6 +3625,83 @@ export class AgentSession {
3513
3625
  }
3514
3626
  }
3515
3627
 
3628
+ #resetSessionStopContinuationState(): void {
3629
+ this.#sessionStopContinuationCount = 0;
3630
+ this.#sessionStopHookActive = false;
3631
+ }
3632
+
3633
+ #clearPendingSessionStopContinuations(): void {
3634
+ if (!this.#pendingNextTurnMessages.some(message => message.customType === "session-stop-continuation")) {
3635
+ return;
3636
+ }
3637
+ this.#pendingNextTurnMessages = this.#pendingNextTurnMessages.filter(
3638
+ message => message.customType !== "session-stop-continuation",
3639
+ );
3640
+ }
3641
+
3642
+ #sessionStopContinuationContext(result: SessionStopEventResult | undefined): string | undefined {
3643
+ if (!result) return undefined;
3644
+ const additionalContext =
3645
+ typeof result.additionalContext === "string" && result.additionalContext.length > 0
3646
+ ? result.additionalContext
3647
+ : undefined;
3648
+ const reason = typeof result.reason === "string" && result.reason.length > 0 ? result.reason : undefined;
3649
+ if (result.continue === true) {
3650
+ return additionalContext ?? reason;
3651
+ }
3652
+ if (result.decision === "block") {
3653
+ return reason ?? additionalContext;
3654
+ }
3655
+ return undefined;
3656
+ }
3657
+
3658
+ async #emitAgentEndNotification(messages: AgentMessage[]): Promise<void> {
3659
+ await this.#extensionRunner?.emit({ type: "agent_end", messages });
3660
+ }
3661
+
3662
+ async #emitSessionStopEvent(messages: AgentMessage[]): Promise<void> {
3663
+ if (this.#agentKind === "sub" || !this.#extensionRunner?.hasHandlers("session_stop")) return;
3664
+ const generation = this.#promptGeneration;
3665
+ const result = await this.#extensionRunner.emitSessionStop({
3666
+ messages,
3667
+ turn_id: Math.max(0, this.#turnIndex - 1),
3668
+ last_assistant_message: this.getLastAssistantMessage(),
3669
+ session_id: this.sessionId,
3670
+ session_file: this.sessionFile,
3671
+ stop_hook_active: this.#sessionStopHookActive,
3672
+ });
3673
+ if (this.#promptGeneration !== generation || this.#abortInProgress || this.#isDisposed) {
3674
+ this.#resetSessionStopContinuationState();
3675
+ return;
3676
+ }
3677
+ const additionalContext = this.#sessionStopContinuationContext(result);
3678
+ if (!additionalContext) {
3679
+ this.#resetSessionStopContinuationState();
3680
+ return;
3681
+ }
3682
+ if (this.#sessionStopContinuationCount >= SESSION_STOP_CONTINUATION_CAP) {
3683
+ logger.warn("session_stop continuation cap reached", {
3684
+ sessionId: this.sessionId,
3685
+ cap: SESSION_STOP_CONTINUATION_CAP,
3686
+ });
3687
+ this.#resetSessionStopContinuationState();
3688
+ return;
3689
+ }
3690
+ this.#sessionStopContinuationCount++;
3691
+ this.#sessionStopHookActive = true;
3692
+ this.#queueHiddenNextTurnMessage(
3693
+ {
3694
+ role: "custom",
3695
+ customType: "session-stop-continuation",
3696
+ content: additionalContext,
3697
+ display: false,
3698
+ attribution: "agent",
3699
+ timestamp: Date.now(),
3700
+ },
3701
+ true,
3702
+ );
3703
+ }
3704
+
3516
3705
  /** Emit extension events based on session events */
3517
3706
  async #emitExtensionEvent(event: AgentSessionEvent): Promise<void> {
3518
3707
  if (!this.#extensionRunner) return;
@@ -3520,7 +3709,9 @@ export class AgentSession {
3520
3709
  this.#turnIndex = 0;
3521
3710
  await this.#extensionRunner.emit({ type: "agent_start" });
3522
3711
  } else if (event.type === "agent_end") {
3523
- await this.#extensionRunner.emit({ type: "agent_end", messages: event.messages });
3712
+ // `agent_end` extension notification is emitted from the settled
3713
+ // agent_end maintenance path so `session_stop` control hooks are not
3714
+ // blocked by unrelated notification-only work.
3524
3715
  } else if (event.type === "turn_start") {
3525
3716
  const hookEvent: TurnStartEvent = {
3526
3717
  type: "turn_start",
@@ -4252,7 +4443,7 @@ export class AgentSession {
4252
4443
  }
4253
4444
  return new Proxy(tool, {
4254
4445
  get: (target, prop) => {
4255
- if (prop !== "execute") return Reflect.get(target, prop, target);
4446
+ if (prop !== "execute") return target[prop as keyof T];
4256
4447
  return async (
4257
4448
  toolCallId: string,
4258
4449
  args: unknown,
@@ -4791,11 +4982,19 @@ export class AgentSession {
4791
4982
  openrouterRoutingPreset !== "default" && options.openrouterVariant === undefined
4792
4983
  ? openrouterRoutingPreset
4793
4984
  : undefined;
4794
- if (!sessionOnPayload && !sessionOnResponse && !sessionMetadata && !sessionOnSseEvent && !openrouterVariant)
4795
- return options;
4985
+ const antigravityEndpointMode =
4986
+ provider === "google-antigravity" ? this.settings.get("providers.antigravityEndpoint") : undefined;
4796
4987
 
4797
- const preparedOptions: SimpleStreamOptions =
4798
- openrouterVariant === undefined ? { ...options } : { ...options, openrouterVariant };
4988
+ const preparedOptions: SimpleStreamOptions = {
4989
+ ...options,
4990
+ ...(openrouterVariant !== undefined && { openrouterVariant }),
4991
+ ...(antigravityEndpointMode !== undefined && { antigravityEndpointMode }),
4992
+ loopGuard: {
4993
+ enabled: this.settings.get("model.loopGuard.enabled"),
4994
+ checkAssistantContent: this.settings.get("model.loopGuard.checkAssistantContent"),
4995
+ ...options.loopGuard,
4996
+ },
4997
+ };
4799
4998
 
4800
4999
  // Stamp session metadata (e.g. user_id={session_id}) onto direct-call requests so
4801
5000
  // they share the same session bucket as Agent.prompt-routed requests on Anthropic
@@ -5114,6 +5313,62 @@ export class AgentSession {
5114
5313
  return normalizeModelContextImages(images, { model: this.model });
5115
5314
  }
5116
5315
 
5316
+ /**
5317
+ * Build a hidden companion message describing image attachments for a text-only
5318
+ * model. Each image is saved under local:// and a vision-capable model describes
5319
+ * it; the descriptions are returned as a `display: false` custom message (so the
5320
+ * model reads them but the TUI does not render the blob) carrying one
5321
+ * `<image path="local://…">…</image>` block per image. Returns `undefined` when
5322
+ * the active model already accepts images, the feature is disabled, or no
5323
+ * description could be produced. Never throws.
5324
+ */
5325
+ async #buildImageDescriptionNotice(
5326
+ normalizedImages: ImageContent[],
5327
+ signal?: AbortSignal,
5328
+ ): Promise<CustomMessage | undefined> {
5329
+ const model = this.model;
5330
+ const shouldDescribe =
5331
+ !!model &&
5332
+ !model.input.includes("image") &&
5333
+ !this.settings.get("images.blockImages") &&
5334
+ this.settings.get("images.describeForTextModels");
5335
+ if (!shouldDescribe || !model) {
5336
+ return undefined;
5337
+ }
5338
+ let blocks: TextContent[];
5339
+ try {
5340
+ blocks = await describeAttachedImagesForTextModel(
5341
+ normalizedImages,
5342
+ {
5343
+ activeModel: model,
5344
+ modelRegistry: this.#modelRegistry,
5345
+ settings: this.settings,
5346
+ localProtocolOptions: this.#localProtocolOptions(),
5347
+ activeModelString: formatModelString(model),
5348
+ telemetryConfig: this.agent.telemetry,
5349
+ sessionId: this.sessionId,
5350
+ },
5351
+ signal,
5352
+ );
5353
+ } catch (err) {
5354
+ logger.warn("image attachment vision fallback failed; image left undescribed", {
5355
+ error: err instanceof Error ? err.message : String(err),
5356
+ });
5357
+ return undefined;
5358
+ }
5359
+ if (blocks.length === 0) {
5360
+ return undefined;
5361
+ }
5362
+ return {
5363
+ role: "custom",
5364
+ customType: IMAGE_ATTACHMENT_DESCRIPTION_TYPE,
5365
+ content: blocks,
5366
+ display: false,
5367
+ attribution: "user",
5368
+ timestamp: Date.now(),
5369
+ };
5370
+ }
5371
+
5117
5372
  async #normalizeMessageContentImages(
5118
5373
  content: string | (TextContent | ImageContent)[],
5119
5374
  ): Promise<string | (TextContent | ImageContent)[]> {
@@ -5261,9 +5516,14 @@ export class AgentSession {
5261
5516
  const normalizedImages = await this.#normalizeImagesForModel(options?.images);
5262
5517
 
5263
5518
  const userContent: (TextContent | ImageContent)[] = [{ type: "text", text: expandedText }];
5264
- if (normalizedImages) {
5519
+ if (normalizedImages?.length) {
5265
5520
  userContent.push(...normalizedImages);
5266
5521
  }
5522
+ // Text-only model + image attachment: describe via a vision model and inject the
5523
+ // description as a hidden companion (the image stays in the visible user message).
5524
+ const imageDescriptionNotice = normalizedImages?.length
5525
+ ? await this.#buildImageDescriptionNotice(normalizedImages)
5526
+ : undefined;
5267
5527
 
5268
5528
  const promptAttribution = options?.attribution ?? (options?.synthetic ? "agent" : "user");
5269
5529
  const message = options?.synthetic
@@ -5288,8 +5548,8 @@ export class AgentSession {
5288
5548
  ...options,
5289
5549
  images: normalizedImages,
5290
5550
  prependMessages:
5291
- preludeMessages.length > 0 || keywordNotices.length > 0
5292
- ? [...preludeMessages, ...keywordNotices]
5551
+ preludeMessages.length > 0 || keywordNotices.length > 0 || imageDescriptionNotice
5552
+ ? [...preludeMessages, ...keywordNotices, ...(imageDescriptionNotice ? [imageDescriptionNotice] : [])]
5293
5553
  : undefined,
5294
5554
  });
5295
5555
  } finally {
@@ -5510,11 +5770,23 @@ export class AgentSession {
5510
5770
  }
5511
5771
 
5512
5772
  const agentPromptOptions = options?.toolChoice ? { toolChoice: options.toolChoice } : undefined;
5513
- this.#pendingProviderRequestNonMessageTokens = computeNonMessageTokens(this);
5773
+ const nonMessageTokens = computeNonMessageTokens(this);
5774
+ const contextWindow = this.model?.contextWindow ?? 0;
5775
+ const breakdown = this.getContextBreakdown({ contextWindow, pendingMessages: messages });
5776
+ const promptTokens =
5777
+ breakdown?.usedTokens ??
5778
+ nonMessageTokens +
5779
+ this.messages.reduce((sum, msg) => sum + estimateTokens(msg), 0) +
5780
+ messages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
5781
+ this.#setPendingContextSnapshot({
5782
+ promptTokens,
5783
+ nonMessageTokens,
5784
+ cutoffCount: this.messages.length + messages.length,
5785
+ });
5514
5786
  try {
5515
5787
  await this.#promptAgentWithIdleRetry(messages, agentPromptOptions);
5516
5788
  } finally {
5517
- this.#pendingProviderRequestNonMessageTokens = undefined;
5789
+ this.#setPendingContextSnapshot(undefined);
5518
5790
  }
5519
5791
  if (!options?.skipPostPromptRecoveryWait) {
5520
5792
  await this.#waitForPostPromptRecovery(generation);
@@ -5699,7 +5971,13 @@ export class AgentSession {
5699
5971
  if (normalizedImages?.length) {
5700
5972
  content.push(...normalizedImages);
5701
5973
  }
5974
+ // Text-only model + image attachment: describe via a vision model and enqueue the
5975
+ // description as a hidden companion immediately before the user message.
5976
+ const imageDescriptionNotice = normalizedImages?.length
5977
+ ? await this.#buildImageDescriptionNotice(normalizedImages)
5978
+ : undefined;
5702
5979
  if (mode === "followUp") {
5980
+ if (imageDescriptionNotice) this.agent.followUp(imageDescriptionNotice);
5703
5981
  this.agent.followUp({
5704
5982
  role: "user",
5705
5983
  content,
@@ -5707,6 +5985,7 @@ export class AgentSession {
5707
5985
  timestamp: Date.now(),
5708
5986
  });
5709
5987
  } else {
5988
+ if (imageDescriptionNotice) this.agent.steer(imageDescriptionNotice);
5710
5989
  this.agent.steer({
5711
5990
  role: "user",
5712
5991
  content,
@@ -5857,6 +6136,16 @@ export class AgentSession {
5857
6136
  }
5858
6137
  }
5859
6138
 
6139
+ async #promptAgentInitiatedMessage(message: CustomMessage): Promise<void> {
6140
+ this.#beginInFlight();
6141
+ try {
6142
+ await this.agent.prompt(message);
6143
+ await this.#waitForPostPromptRecovery();
6144
+ } finally {
6145
+ this.#endInFlight();
6146
+ }
6147
+ }
6148
+
5860
6149
  /**
5861
6150
  * Send a custom message to the session. Creates a CustomMessageEntry.
5862
6151
  *
@@ -5916,7 +6205,7 @@ export class AgentSession {
5916
6205
  this.#queueHiddenNextTurnMessage(normalizedAppMessage, false);
5917
6206
  return false;
5918
6207
  }
5919
- await this.agent.prompt(normalizedAppMessage);
6208
+ await this.#promptAgentInitiatedMessage(normalizedAppMessage);
5920
6209
  return true;
5921
6210
  }
5922
6211
  this.agent.appendMessage(normalizedAppMessage);
@@ -5935,7 +6224,7 @@ export class AgentSession {
5935
6224
  this.#queueHiddenNextTurnMessage(normalizedAppMessage, false);
5936
6225
  return false;
5937
6226
  }
5938
- await this.agent.prompt(normalizedAppMessage);
6227
+ await this.#promptAgentInitiatedMessage(normalizedAppMessage);
5939
6228
  return true;
5940
6229
  }
5941
6230
 
@@ -6158,6 +6447,8 @@ export class AgentSession {
6158
6447
  // block runs, but nested prompt setup/finalizers may still be unwinding. Without this,
6159
6448
  // a subsequent prompt() can incorrectly observe the session as busy after an abort.
6160
6449
  this.#resetInFlight();
6450
+ this.#resetSessionStopContinuationState();
6451
+ this.#clearPendingSessionStopContinuations();
6161
6452
  // Safety net: if the agent loop aborted without producing an assistant
6162
6453
  // message (e.g. failed before the first stream), the in-flight yield was
6163
6454
  // never resolved or rejected by the normal message_end path. Reject it now
@@ -7458,39 +7749,12 @@ export class AgentSession {
7458
7749
  }
7459
7750
  }
7460
7751
 
7461
- #estimatePendingPromptTokens(messages: AgentMessage[]): number {
7462
- let tokens = computeNonMessageTokens(this);
7463
- for (const message of this.messages) {
7464
- tokens += estimateTokens(message);
7465
- }
7466
- for (const message of messages) {
7467
- tokens += estimateTokens(message);
7468
- }
7469
- return tokens;
7470
- }
7471
-
7472
7752
  #estimatePrePromptContextTokens(messages: AgentMessage[], contextWindow: number): number {
7473
- const currentUsage = this.getContextUsage({ contextWindow });
7474
- if (typeof currentUsage?.tokens !== "number" || !Number.isFinite(currentUsage.tokens)) {
7475
- return this.#estimatePendingPromptTokens(messages);
7476
- }
7477
-
7478
- const currentEstimate = this.#estimateContextTokens();
7479
- if (!currentEstimate.providerAnchored) {
7480
- return this.#estimatePendingPromptTokens(messages);
7481
- }
7482
-
7483
- let tokens = currentUsage.tokens;
7484
- const previousNonMessageTokens = currentEstimate.providerNonMessageTokens;
7485
- if (previousNonMessageTokens !== undefined) {
7486
- const currentNonMessageTokens = computeNonMessageTokens(this);
7487
- const nonMessageTokenGrowth = Math.max(0, currentNonMessageTokens - previousNonMessageTokens);
7488
- tokens += nonMessageTokenGrowth;
7489
- }
7490
- for (const message of messages) {
7491
- tokens += estimateTokens(message);
7492
- }
7493
- return tokens;
7753
+ const breakdown = this.getContextBreakdown({ contextWindow, pendingMessages: messages });
7754
+ return (
7755
+ breakdown?.usedTokens ??
7756
+ computeNonMessageTokens(this) + messages.reduce((sum, msg) => sum + estimateTokens(msg), 0)
7757
+ );
7494
7758
  }
7495
7759
 
7496
7760
  async #runPrePromptCompactionIfNeeded(messages: AgentMessage[]): Promise<void> {
@@ -7544,19 +7808,19 @@ export class AgentSession {
7544
7808
  * on the pre-prompt path (where the next agent turn is about to start) set it to false
7545
7809
  * to avoid racing the deferred handoff against the new turn.
7546
7810
  * @param autoContinue Whether maintenance may schedule the agent-authored continuation prompt.
7547
- * @returns true when a deferred handoff was scheduled. Callers MUST then skip any
7548
- * subsequent `#scheduleAgentContinue` / reminder appends for this turn the
7549
- * handoff will replace session state and a concurrent `agent.continue()` would
7550
- * stream into the soon-to-be-discarded session.
7811
+ * @returns whether compaction/recovery scheduled a handoff, retry, auto-continue, or
7812
+ * queued-message drain that already owns the next turn. Callers MUST skip
7813
+ * `session_stop` and other agent continuations when `continuationScheduled`
7814
+ * is true.
7551
7815
  */
7552
7816
  async #checkCompaction(
7553
7817
  assistantMessage: AssistantMessage,
7554
7818
  skipAbortedCheck = true,
7555
7819
  allowDefer = true,
7556
7820
  autoContinue = true,
7557
- ): Promise<boolean> {
7821
+ ): Promise<CompactionCheckResult> {
7558
7822
  // Skip if message was aborted (user cancelled) - unless skipAbortedCheck is false
7559
- if (skipAbortedCheck && assistantMessage.stopReason === "aborted") return false;
7823
+ if (skipAbortedCheck && assistantMessage.stopReason === "aborted") return COMPACTION_CHECK_NONE;
7560
7824
  const contextWindow = this.model?.contextWindow ?? 0;
7561
7825
  const generation = this.#promptGeneration;
7562
7826
  // Skip overflow check if the message came from a different model.
@@ -7585,15 +7849,15 @@ export class AgentSession {
7585
7849
  if (promoted) {
7586
7850
  // Retry on the promoted (larger) model without compacting
7587
7851
  this.#scheduleAgentContinue({ delayMs: 100, generation });
7588
- return false;
7852
+ return COMPACTION_CHECK_CONTINUATION;
7589
7853
  }
7590
7854
 
7591
7855
  // No promotion target available fall through to compaction
7592
7856
  const compactionSettings = this.settings.getGroup("compaction");
7593
7857
  if (compactionSettings.enabled && compactionSettings.strategy !== "off") {
7594
- await this.#runAutoCompaction("overflow", true, false, allowDefer, { autoContinue });
7858
+ return await this.#runAutoCompaction("overflow", true, false, allowDefer, { autoContinue });
7595
7859
  }
7596
- return false;
7860
+ return COMPACTION_CHECK_NONE;
7597
7861
  }
7598
7862
 
7599
7863
  // Case 3: Output-side incomplete — `response.incomplete` from OpenAI Responses
@@ -7614,7 +7878,7 @@ export class AgentSession {
7614
7878
  from: `${assistantMessage.provider}/${assistantMessage.model}`,
7615
7879
  });
7616
7880
  this.#scheduleAgentContinue({ delayMs: 100, generation });
7617
- return false;
7881
+ return COMPACTION_CHECK_CONTINUATION;
7618
7882
  }
7619
7883
 
7620
7884
  const incompleteCompactionSettings = this.settings.getGroup("compaction");
@@ -7623,18 +7887,17 @@ export class AgentSession {
7623
7887
  model: `${assistantMessage.provider}/${assistantMessage.model}`,
7624
7888
  strategy: incompleteCompactionSettings.strategy,
7625
7889
  });
7626
- await this.#runAutoCompaction("incomplete", true, false, allowDefer, {
7890
+ return await this.#runAutoCompaction("incomplete", true, false, allowDefer, {
7627
7891
  autoContinue,
7628
7892
  triggerContextTokens: calculateContextTokens(assistantMessage.usage),
7629
7893
  });
7630
- } else {
7631
- // Neither promotion nor compaction is available — surface the dead-end so
7632
- // the user understands why the turn yielded with nothing.
7633
- logger.warn("response.incomplete with no recovery path (promotion + compaction both unavailable)", {
7634
- model: `${assistantMessage.provider}/${assistantMessage.model}`,
7635
- });
7636
7894
  }
7637
- return false;
7895
+ // Neither promotion nor compaction is available — surface the dead-end so
7896
+ // the user understands why the turn yielded with nothing.
7897
+ logger.warn("response.incomplete with no recovery path (promotion + compaction both unavailable)", {
7898
+ model: `${assistantMessage.provider}/${assistantMessage.model}`,
7899
+ });
7900
+ return COMPACTION_CHECK_NONE;
7638
7901
  }
7639
7902
 
7640
7903
  // Stale-result pass runs every turn, before any threshold gating: it is
@@ -7643,11 +7906,11 @@ export class AgentSession {
7643
7906
  const supersedeResult = await this.#pruneStaleToolResults();
7644
7907
 
7645
7908
  const compactionSettings = this.settings.getGroup("compaction");
7646
- if (!compactionSettings.enabled || compactionSettings.strategy === "off") return false;
7909
+ if (!compactionSettings.enabled || compactionSettings.strategy === "off") return COMPACTION_CHECK_NONE;
7647
7910
 
7648
7911
  // Case 4: Threshold - turn succeeded but context is getting large
7649
7912
  // Skip if this was an error (non-overflow errors don't have usage data)
7650
- if (assistantMessage.stopReason === "error") return false;
7913
+ if (assistantMessage.stopReason === "error") return COMPACTION_CHECK_NONE;
7651
7914
  const pruneResult = await this.#pruneToolOutputs();
7652
7915
  let contextTokens = calculateContextTokens(assistantMessage.usage);
7653
7916
  if (supersedeResult) {
@@ -7666,7 +7929,7 @@ export class AgentSession {
7666
7929
  });
7667
7930
  }
7668
7931
  }
7669
- return false;
7932
+ return COMPACTION_CHECK_NONE;
7670
7933
  }
7671
7934
  #assistantEndedWithSuccessfulYield(assistantMessage: AssistantMessage): boolean {
7672
7935
  const toolCallId = this.#lastSuccessfulYieldToolCallId;
@@ -7706,7 +7969,7 @@ export class AgentSession {
7706
7969
  if (assistantMessage.stopReason === "toolUse") {
7707
7970
  this.#removeEmptyStopFromActiveContext(assistantMessage);
7708
7971
  }
7709
- return true;
7972
+ return false;
7710
7973
  }
7711
7974
  this.#removeEmptyStopFromActiveContext(assistantMessage);
7712
7975
  this.agent.appendMessage({
@@ -8081,12 +8344,12 @@ export class AgentSession {
8081
8344
  /**
8082
8345
  * Check if agent stopped with incomplete todos and prompt to continue.
8083
8346
  */
8084
- async #checkTodoCompletion(): Promise<void> {
8347
+ async #checkTodoCompletion(): Promise<boolean> {
8085
8348
  // Skip todo reminders when the most recent turn was driven by an explicit user force —
8086
8349
  // the user wanted exactly that tool, not a follow-up nag about incomplete todos.
8087
8350
  const lastServedLabel = this.#toolChoiceQueue.consumeLastServedLabel();
8088
8351
  if (lastServedLabel === "user-force") {
8089
- return;
8352
+ return false;
8090
8353
  }
8091
8354
 
8092
8355
  // Suppress within a self-continuation chain: if the agent's last turn was driven by a
@@ -8097,7 +8360,7 @@ export class AgentSession {
8097
8360
  logger.debug("Todo completion: prior reminder still awaiting agent action; staying silent", {
8098
8361
  attempt: this.#todoReminderCount,
8099
8362
  });
8100
- return;
8363
+ return false;
8101
8364
  }
8102
8365
 
8103
8366
  const remindersEnabled = this.settings.get("todo.reminders");
@@ -8105,20 +8368,20 @@ export class AgentSession {
8105
8368
  if (!remindersEnabled || !todosEnabled) {
8106
8369
  this.#todoReminderCount = 0;
8107
8370
  this.#todoReminderAwaitingProgress = false;
8108
- return;
8371
+ return false;
8109
8372
  }
8110
8373
 
8111
8374
  const remindersMax = this.settings.get("todo.reminders.max");
8112
8375
  if (this.#todoReminderCount >= remindersMax) {
8113
8376
  logger.debug("Todo completion: max reminders reached", { count: this.#todoReminderCount });
8114
- return;
8377
+ return false;
8115
8378
  }
8116
8379
 
8117
8380
  const phases = this.getTodoPhases();
8118
8381
  if (phases.length === 0) {
8119
8382
  this.#todoReminderCount = 0;
8120
8383
  this.#todoReminderAwaitingProgress = false;
8121
- return;
8384
+ return false;
8122
8385
  }
8123
8386
 
8124
8387
  const incompleteByPhase = phases
@@ -8136,7 +8399,7 @@ export class AgentSession {
8136
8399
  if (incomplete.length === 0) {
8137
8400
  this.#todoReminderCount = 0;
8138
8401
  this.#todoReminderAwaitingProgress = false;
8139
- return;
8402
+ return false;
8140
8403
  }
8141
8404
 
8142
8405
  // Build reminder message
@@ -8176,6 +8439,7 @@ export class AgentSession {
8176
8439
  this.agent.appendMessage(reminderMessage);
8177
8440
  this.sessionManager.appendMessage(reminderMessage);
8178
8441
  this.#scheduleAgentContinue({ generation: this.#promptGeneration });
8442
+ return true;
8179
8443
  }
8180
8444
 
8181
8445
  /**
@@ -8461,9 +8725,13 @@ export class AgentSession {
8461
8725
  }
8462
8726
 
8463
8727
  #didSessionMessagesChange(previousMessages: AgentMessage[], nextMessages: AgentMessage[]): boolean {
8464
- return (
8465
- JSON.stringify(previousMessages.map(message => this.#normalizeSessionMessageForProviderReplay(message))) !==
8466
- JSON.stringify(nextMessages.map(message => this.#normalizeSessionMessageForProviderReplay(message)))
8728
+ if (previousMessages.length !== nextMessages.length) return true;
8729
+ return previousMessages.some(
8730
+ (message, i) =>
8731
+ !Bun.deepEquals(
8732
+ this.#normalizeSessionMessageForProviderReplay(message),
8733
+ this.#normalizeSessionMessageForProviderReplay(nextMessages[i]),
8734
+ ),
8467
8735
  );
8468
8736
  }
8469
8737
 
@@ -8709,14 +8977,14 @@ export class AgentSession {
8709
8977
  * Internal: Run auto-compaction with events.
8710
8978
  *
8711
8979
  * @param allowDefer If true (default), threshold-driven handoff strategy is allowed to
8712
- * schedule itself as a deferred post-prompt task and return `true` immediately. The
8713
- * caller MUST treat that as "compaction will happen async — do not also schedule
8714
- * `agent.continue()` for this turn", otherwise the deferred handoff races a fresh
8715
- * streaming turn (the symptom: "Auto-handoff" loader + assistant message still
8716
- * streaming). Callers on a path that is about to start a new agent turn (e.g.
8717
- * the pre-prompt check in `#promptWithMessage`) pass `false` to force inline
8718
- * execution so the handoff completes before the new turn begins.
8719
- * @returns true when a deferred handoff was scheduled. Inline runs always return false.
8980
+ * schedule itself as a deferred post-prompt task and return a deferred-handoff result
8981
+ * immediately. The caller MUST treat that as "compaction will happen async — do not
8982
+ * also schedule `agent.continue()` for this turn", otherwise the deferred handoff
8983
+ * races a fresh streaming turn (the symptom: "Auto-handoff" loader + assistant
8984
+ * message still streaming). Callers on a path that is about to start a new agent
8985
+ * turn (e.g. the pre-prompt check in `#promptWithMessage`) pass `false` to force
8986
+ * inline execution so the handoff completes before the new turn begins.
8987
+ * @returns whether auto-compaction scheduled a follow-up turn.
8720
8988
  */
8721
8989
  async #runAutoCompaction(
8722
8990
  reason: "overflow" | "threshold" | "idle" | "incomplete",
@@ -8724,10 +8992,10 @@ export class AgentSession {
8724
8992
  deferred = false,
8725
8993
  allowDefer = true,
8726
8994
  options: { autoContinue?: boolean; triggerContextTokens?: number } = {},
8727
- ): Promise<boolean> {
8995
+ ): Promise<CompactionCheckResult> {
8728
8996
  const compactionSettings = this.settings.getGroup("compaction");
8729
- if (compactionSettings.strategy === "off") return false;
8730
- if (reason !== "idle" && !compactionSettings.enabled) return false;
8997
+ if (compactionSettings.strategy === "off") return COMPACTION_CHECK_NONE;
8998
+ if (reason !== "idle" && !compactionSettings.enabled) return COMPACTION_CHECK_NONE;
8731
8999
  const generation = this.#promptGeneration;
8732
9000
  const shouldAutoContinue = options.autoContinue !== false && compactionSettings.autoContinue !== false;
8733
9001
  // Shake runs inline (cheap, no remote LLM). On overflow recovery, if shake
@@ -8741,7 +9009,7 @@ export class AgentSession {
8741
9009
  shouldAutoContinue,
8742
9010
  options.triggerContextTokens,
8743
9011
  );
8744
- if (outcome !== "fallback") return false;
9012
+ if (outcome !== "fallback") return outcome;
8745
9013
  }
8746
9014
  // "overflow" and "incomplete" force inline execution because they are recovery
8747
9015
  // paths the caller wants resolved before scheduling the next turn. "idle" is
@@ -8762,7 +9030,7 @@ export class AgentSession {
8762
9030
  },
8763
9031
  { generation },
8764
9032
  );
8765
- return true;
9033
+ return COMPACTION_CHECK_DEFERRED_HANDOFF;
8766
9034
  }
8767
9035
 
8768
9036
  // "overflow" forces context-full because the input itself is broken — a handoff
@@ -8810,7 +9078,7 @@ export class AgentSession {
8810
9078
  aborted: true,
8811
9079
  willRetry: false,
8812
9080
  });
8813
- return false;
9081
+ return COMPACTION_CHECK_NONE;
8814
9082
  }
8815
9083
  logger.warn("Auto-handoff returned no document; falling back to context-full maintenance", {
8816
9084
  reason,
@@ -8825,10 +9093,11 @@ export class AgentSession {
8825
9093
  aborted: false,
8826
9094
  willRetry: false,
8827
9095
  });
8828
- if (!autoCompactionSignal.aborted && reason !== "idle" && shouldAutoContinue) {
9096
+ const continuationScheduled = !autoCompactionSignal.aborted && reason !== "idle" && shouldAutoContinue;
9097
+ if (continuationScheduled) {
8829
9098
  this.#scheduleAutoContinuePrompt(generation);
8830
9099
  }
8831
- return false;
9100
+ return continuationScheduled ? COMPACTION_CHECK_CONTINUATION : COMPACTION_CHECK_NONE;
8832
9101
  }
8833
9102
  }
8834
9103
 
@@ -8841,7 +9110,7 @@ export class AgentSession {
8841
9110
  willRetry: false,
8842
9111
  skipped: true,
8843
9112
  });
8844
- return false;
9113
+ return COMPACTION_CHECK_NONE;
8845
9114
  }
8846
9115
 
8847
9116
  const availableModels = this.#modelRegistry.getAvailable();
@@ -8854,7 +9123,7 @@ export class AgentSession {
8854
9123
  willRetry: false,
8855
9124
  skipped: true,
8856
9125
  });
8857
- return false;
9126
+ return COMPACTION_CHECK_NONE;
8858
9127
  }
8859
9128
 
8860
9129
  const pathEntries = this.sessionManager.getBranch();
@@ -8875,8 +9144,9 @@ export class AgentSession {
8875
9144
  generation,
8876
9145
  shouldContinue: () => this.agent.hasQueuedMessages(),
8877
9146
  });
9147
+ return COMPACTION_CHECK_CONTINUATION;
8878
9148
  }
8879
- return false;
9149
+ return COMPACTION_CHECK_NONE;
8880
9150
  }
8881
9151
 
8882
9152
  let hookCompaction: CompactionResult | undefined;
@@ -8900,7 +9170,7 @@ export class AgentSession {
8900
9170
  aborted: true,
8901
9171
  willRetry: false,
8902
9172
  });
8903
- return false;
9173
+ return COMPACTION_CHECK_NONE;
8904
9174
  }
8905
9175
 
8906
9176
  if (hookResult?.compaction) {
@@ -8971,7 +9241,9 @@ export class AgentSession {
8971
9241
  let compactResult: CompactionResult | undefined;
8972
9242
  let lastError: unknown;
8973
9243
 
8974
- for (const candidate of candidates) {
9244
+ for (let candidateIndex = 0; candidateIndex < candidates.length; candidateIndex++) {
9245
+ const candidate = candidates[candidateIndex];
9246
+ const hasMoreCandidates = candidateIndex < candidates.length - 1;
8975
9247
  const apiKey = await this.#modelRegistry.getApiKey(candidate, this.sessionId);
8976
9248
  if (!apiKey) continue;
8977
9249
 
@@ -9010,6 +9282,20 @@ export class AgentSession {
9010
9282
  lastError = this.#buildCompactionAuthError();
9011
9283
  break;
9012
9284
  }
9285
+ if (this.#isCompactionSummarizationTimeoutMessage(message)) {
9286
+ logger.warn(
9287
+ hasMoreCandidates
9288
+ ? "Auto-compaction summarization timed out, trying next model"
9289
+ : "Auto-compaction summarization timed out, not retrying same model",
9290
+ {
9291
+ error: message,
9292
+ model: `${candidate.provider}/${candidate.id}`,
9293
+ },
9294
+ );
9295
+ lastError = error;
9296
+ break;
9297
+ }
9298
+
9013
9299
  const retryAfterMs = this.#parseRetryAfterMsFromError(message);
9014
9300
  const shouldRetry =
9015
9301
  retrySettings.enabled &&
@@ -9027,19 +9313,15 @@ export class AgentSession {
9027
9313
 
9028
9314
  // If retry delay is too long (>30s), try next candidate instead of waiting
9029
9315
  const maxAcceptableDelayMs = 30_000;
9030
- if (delayMs > maxAcceptableDelayMs) {
9031
- const hasMoreCandidates = candidates.indexOf(candidate) < candidates.length - 1;
9032
- if (hasMoreCandidates) {
9033
- logger.warn("Auto-compaction retry delay too long, trying next model", {
9034
- delayMs,
9035
- retryAfterMs,
9036
- error: message,
9037
- model: `${candidate.provider}/${candidate.id}`,
9038
- });
9039
- lastError = error;
9040
- break; // Exit retry loop, continue to next candidate
9041
- }
9042
- // No more candidates - we have to wait
9316
+ if (delayMs > maxAcceptableDelayMs && hasMoreCandidates) {
9317
+ logger.warn("Auto-compaction retry delay too long, trying next model", {
9318
+ delayMs,
9319
+ retryAfterMs,
9320
+ error: message,
9321
+ model: `${candidate.provider}/${candidate.id}`,
9322
+ });
9323
+ lastError = error;
9324
+ break; // Exit retry loop, continue to next candidate
9043
9325
  }
9044
9326
 
9045
9327
  attempt++;
@@ -9083,7 +9365,7 @@ export class AgentSession {
9083
9365
  aborted: true,
9084
9366
  willRetry: false,
9085
9367
  });
9086
- return false;
9368
+ return COMPACTION_CHECK_NONE;
9087
9369
  }
9088
9370
 
9089
9371
  this.sessionManager.appendCompaction(
@@ -9125,8 +9407,10 @@ export class AgentSession {
9125
9407
  };
9126
9408
  await this.#emitSessionEvent({ type: "auto_compaction_end", action, result, aborted: false, willRetry });
9127
9409
 
9410
+ let continuationScheduled = false;
9128
9411
  if (!willRetry && reason !== "idle" && shouldAutoContinue) {
9129
9412
  this.#scheduleAutoContinuePrompt(generation);
9413
+ continuationScheduled = true;
9130
9414
  }
9131
9415
 
9132
9416
  if (willRetry) {
@@ -9147,6 +9431,7 @@ export class AgentSession {
9147
9431
  }
9148
9432
 
9149
9433
  this.#scheduleAgentContinue({ delayMs: 100, generation });
9434
+ continuationScheduled = true;
9150
9435
  } else if (this.agent.hasQueuedMessages()) {
9151
9436
  // Auto-compaction can complete while follow-up/steering/custom messages are waiting.
9152
9437
  // Kick the loop so queued messages are actually delivered.
@@ -9155,7 +9440,9 @@ export class AgentSession {
9155
9440
  generation,
9156
9441
  shouldContinue: () => this.agent.hasQueuedMessages(),
9157
9442
  });
9443
+ continuationScheduled = true;
9158
9444
  }
9445
+ return continuationScheduled ? COMPACTION_CHECK_CONTINUATION : COMPACTION_CHECK_NONE;
9159
9446
  } catch (error) {
9160
9447
  if (autoCompactionSignal.aborted) {
9161
9448
  await this.#emitSessionEvent({
@@ -9165,7 +9452,7 @@ export class AgentSession {
9165
9452
  aborted: true,
9166
9453
  willRetry: false,
9167
9454
  });
9168
- return false;
9455
+ return COMPACTION_CHECK_NONE;
9169
9456
  }
9170
9457
  const errorMessage = error instanceof Error ? error.message : "compaction failed";
9171
9458
  await this.#emitSessionEvent({
@@ -9186,7 +9473,7 @@ export class AgentSession {
9186
9473
  this.#autoCompactionAbortController = undefined;
9187
9474
  }
9188
9475
  }
9189
- return false;
9476
+ return COMPACTION_CHECK_NONE;
9190
9477
  }
9191
9478
 
9192
9479
  /**
@@ -9205,7 +9492,7 @@ export class AgentSession {
9205
9492
  generation: number,
9206
9493
  autoContinue: boolean,
9207
9494
  triggerContextTokens?: number,
9208
- ): Promise<"handled" | "fallback"> {
9495
+ ): Promise<CompactionCheckResult | "fallback"> {
9209
9496
  const action = "shake";
9210
9497
  await this.#emitSessionEvent({ type: "auto_compaction_start", reason, action });
9211
9498
  this.#autoCompactionAbortController?.abort();
@@ -9222,7 +9509,7 @@ export class AgentSession {
9222
9509
  aborted: true,
9223
9510
  willRetry: false,
9224
9511
  });
9225
- return "handled";
9512
+ return COMPACTION_CHECK_NONE;
9226
9513
  }
9227
9514
  const reclaimed = result.toolResultsDropped + result.blocksDropped > 0;
9228
9515
  // Detect the dead-loop reported in issues #2119/#2275: the threshold check
@@ -9254,7 +9541,7 @@ export class AgentSession {
9254
9541
  const recoveryBand = Math.floor(thresholdTokens * SHAKE_RECOVERY_BAND);
9255
9542
  stillOverThreshold = correctedTokens > recoveryBand;
9256
9543
  } else {
9257
- const postShakeTokens = this.#estimatePendingPromptTokens([]);
9544
+ const postShakeTokens = this.getContextUsage({ contextWindow })?.tokens ?? 0;
9258
9545
  stillOverThreshold = shouldCompact(postShakeTokens, contextWindow, compactionSettings);
9259
9546
  }
9260
9547
  }
@@ -9283,8 +9570,10 @@ export class AgentSession {
9283
9570
  skipped: !reclaimed,
9284
9571
  });
9285
9572
 
9573
+ let continuationScheduled = false;
9286
9574
  if (!willRetry && reason !== "idle" && autoContinue) {
9287
9575
  this.#scheduleAutoContinuePrompt(generation);
9576
+ continuationScheduled = true;
9288
9577
  }
9289
9578
  if (willRetry) {
9290
9579
  // The shake rebuild replays every entry, so a trailing error/length
@@ -9300,14 +9589,16 @@ export class AgentSession {
9300
9589
  if (shouldDrop) this.agent.replaceMessages(messages.slice(0, -1));
9301
9590
  }
9302
9591
  this.#scheduleAgentContinue({ delayMs: 100, generation });
9592
+ continuationScheduled = true;
9303
9593
  } else if (this.agent.hasQueuedMessages()) {
9304
9594
  this.#scheduleAgentContinue({
9305
9595
  delayMs: 100,
9306
9596
  generation,
9307
9597
  shouldContinue: () => this.agent.hasQueuedMessages(),
9308
9598
  });
9599
+ continuationScheduled = true;
9309
9600
  }
9310
- return "handled";
9601
+ return continuationScheduled ? COMPACTION_CHECK_CONTINUATION : COMPACTION_CHECK_NONE;
9311
9602
  } catch (error) {
9312
9603
  if (signal.aborted) {
9313
9604
  await this.#emitSessionEvent({
@@ -9317,7 +9608,7 @@ export class AgentSession {
9317
9608
  aborted: true,
9318
9609
  willRetry: false,
9319
9610
  });
9320
- return "handled";
9611
+ return COMPACTION_CHECK_NONE;
9321
9612
  }
9322
9613
  const message = error instanceof Error ? error.message : "shake failed";
9323
9614
  await this.#emitSessionEvent({
@@ -9329,7 +9620,7 @@ export class AgentSession {
9329
9620
  errorMessage: `Auto-shake failed: ${message}`,
9330
9621
  });
9331
9622
  // Overflow still needs recovery even if shake threw.
9332
- return reason === "overflow" ? "fallback" : "handled";
9623
+ return reason === "overflow" ? "fallback" : COMPACTION_CHECK_NONE;
9333
9624
  } finally {
9334
9625
  if (this.#autoCompactionAbortController === controller) {
9335
9626
  this.#autoCompactionAbortController = undefined;
@@ -9391,6 +9682,7 @@ export class AgentSession {
9391
9682
  if (isContextOverflow(message, contextWindow)) return false;
9392
9683
 
9393
9684
  if (this.#isClassifierRefusal(message)) return true;
9685
+ if (this.#isProviderErrorFinishReasonBeforeToolUse(message)) return true;
9394
9686
  if (this.#streamInterruptedAfterObservableOutput(message)) return false;
9395
9687
  if (this.#isStaleOpenAIResponsesReplayError(message)) return true;
9396
9688
 
@@ -9435,6 +9727,12 @@ export class AgentSession {
9435
9727
  return stopType === "refusal" || stopType === "sensitive";
9436
9728
  }
9437
9729
 
9730
+ #isProviderErrorFinishReasonBeforeToolUse(message: AssistantMessage): boolean {
9731
+ if (!message.errorMessage) return false;
9732
+ if (message.content.some(block => block.type === "toolCall")) return false;
9733
+ return /\bProvider (?:returned error finish_reason|finish_reason:\s*error)\b/i.test(message.errorMessage);
9734
+ }
9735
+
9438
9736
  #isTransientErrorMessage(errorMessage: string): boolean {
9439
9737
  return (
9440
9738
  this.#isTransientEnvelopeErrorMessage(errorMessage) || this.#isTransientTransportErrorMessage(errorMessage)
@@ -9446,6 +9744,10 @@ export class AgentSession {
9446
9744
  return /anthropic stream envelope error:/i.test(errorMessage) && /before message_start/i.test(errorMessage);
9447
9745
  }
9448
9746
 
9747
+ #isCompactionSummarizationTimeoutMessage(errorMessage: string): boolean {
9748
+ return /\b(?:operation\s+)?timed?\s*out\b|\btimeout\b|\bstream stall\b/i.test(errorMessage);
9749
+ }
9750
+
9449
9751
  #isTransientTransportErrorMessage(errorMessage: string): boolean {
9450
9752
  // Match: overloaded_error, provider returned error, rate limit, 429, 500, 502, 503, 504,
9451
9753
  // service unavailable, provider-suggested retry, network/connection/socket errors, fetch failed,
@@ -10446,11 +10748,7 @@ export class AgentSession {
10446
10748
  if (!model) {
10447
10749
  throw new Error("No active model on session");
10448
10750
  }
10449
- const apiKey = await this.#modelRegistry.getApiKey(model, this.sessionId);
10450
- if (!apiKey) {
10451
- throw new Error(`No API key for ${model.provider}/${model.id}`);
10452
- }
10453
-
10751
+ const cacheSessionId = this.sessionId;
10454
10752
  const snapshot = this.#buildEphemeralSnapshot(args.promptText);
10455
10753
  const llmMessages = await this.convertMessagesToLlm(snapshot, args.signal);
10456
10754
  const context: Context = {
@@ -10462,10 +10760,9 @@ export class AgentSession {
10462
10760
  // removes the surface entirely.
10463
10761
  tools: [],
10464
10762
  };
10465
- const cacheSessionId = this.sessionId;
10466
10763
  const options = this.prepareSimpleStreamOptions(
10467
10764
  {
10468
- apiKey,
10765
+ apiKey: this.#modelRegistry.resolver(model, cacheSessionId),
10469
10766
  // Side-channel turns must not share OpenAI/Codex append-only
10470
10767
  // conversation state with the main agent turn: IRC and /btw can run
10471
10768
  // while the main turn is mid-tool-call. Keep the prompt-cache key
@@ -10899,6 +11196,94 @@ export class AgentSession {
10899
11196
  return { selectedText, cancelled: false };
10900
11197
  }
10901
11198
 
11199
+ async branchFromBtw(
11200
+ question: string,
11201
+ assistantMessage: AssistantMessage,
11202
+ ): Promise<{ cancelled: boolean; sessionFile: string | undefined }> {
11203
+ const previousSessionFile = this.sessionFile;
11204
+ if (!this.sessionManager.getSessionFile()) {
11205
+ throw new Error("Cannot branch /btw: session is not persisted");
11206
+ }
11207
+
11208
+ const leafId = this.sessionManager.getLeafId();
11209
+ if (!leafId) {
11210
+ throw new Error("Cannot branch /btw: current session has no leaf");
11211
+ }
11212
+
11213
+ if (
11214
+ this.isBashRunning ||
11215
+ this.isEvalRunning ||
11216
+ this.isCompacting ||
11217
+ this.isGeneratingHandoff ||
11218
+ this.isRetrying
11219
+ ) {
11220
+ throw new Error("Cannot branch /btw while session maintenance or user work is still running");
11221
+ }
11222
+
11223
+ if (this.#extensionRunner?.hasHandlers("session_before_branch")) {
11224
+ const result = (await this.#extensionRunner.emit({
11225
+ type: "session_before_branch",
11226
+ entryId: leafId,
11227
+ })) as SessionBeforeBranchResult | undefined;
11228
+
11229
+ if (result?.cancel) {
11230
+ return { cancelled: true, sessionFile: previousSessionFile };
11231
+ }
11232
+ }
11233
+
11234
+ await this.#cancelPostPromptTasks();
11235
+ if (
11236
+ this.isBashRunning ||
11237
+ this.isEvalRunning ||
11238
+ this.isCompacting ||
11239
+ this.isGeneratingHandoff ||
11240
+ this.isRetrying
11241
+ ) {
11242
+ throw new Error("Cannot branch /btw while session maintenance or user work is still running");
11243
+ }
11244
+
11245
+ this.#pendingNextTurnMessages = [];
11246
+ this.#scheduledHiddenNextTurnGeneration = undefined;
11247
+ this.agent.replaceQueues([], []);
11248
+ if (this.isStreaming) {
11249
+ await this.abort({ goalReason: "internal", reason: "branching /btw" });
11250
+ this.agent.replaceQueues([], []);
11251
+ }
11252
+ await this.sessionManager.flush();
11253
+ this.#cancelOwnAsyncJobs();
11254
+
11255
+ this.sessionManager.createBranchedSession(leafId);
11256
+ this.sessionManager.appendMessage({
11257
+ role: "user",
11258
+ content: [{ type: "text", text: question }],
11259
+ timestamp: Date.now(),
11260
+ });
11261
+ this.sessionManager.appendMessage(assistantMessage);
11262
+ this.#syncTodoPhasesFromBranch();
11263
+ this.#freshProviderSessionId = undefined;
11264
+ this.#syncAgentSessionId();
11265
+ this.#rekeyHindsightMemoryForCurrentSessionId();
11266
+ this.#rekeyMnemopiMemoryForCurrentSessionId();
11267
+ this.#resetHindsightConversationTrackingIfHindsight();
11268
+ this.#resetMnemopiConversationTrackingIfMnemopi();
11269
+
11270
+ const sessionContext = this.buildDisplaySessionContext();
11271
+ await this.#restoreMCPSelectionsForSessionContext(sessionContext);
11272
+
11273
+ if (this.#extensionRunner) {
11274
+ await this.#extensionRunner.emit({
11275
+ type: "session_branch",
11276
+ previousSessionFile,
11277
+ });
11278
+ }
11279
+
11280
+ this.agent.replaceMessages(sessionContext.messages);
11281
+ this.#advisorRuntime?.reset();
11282
+ this.#closeCodexProviderSessionsForHistoryRewrite();
11283
+
11284
+ return { cancelled: false, sessionFile: this.sessionFile };
11285
+ }
11286
+
10902
11287
  // =========================================================================
10903
11288
  // Tree Navigation
10904
11289
  // =========================================================================
@@ -11188,50 +11573,173 @@ export class AgentSession {
11188
11573
  * Uses the last assistant message's usage data when available,
11189
11574
  * otherwise estimates tokens for all messages.
11190
11575
  */
11191
- getContextUsage(options?: { contextWindow?: number }): ContextUsage | undefined {
11576
+ getContextBreakdown(options?: {
11577
+ contextWindow?: number;
11578
+ pendingMessages?: AgentMessage[];
11579
+ }): ContextUsageBreakdown | undefined {
11192
11580
  const model = this.model;
11193
11581
  const contextWindow = options?.contextWindow ?? model?.contextWindow ?? 0;
11194
11582
  if (!Number.isFinite(contextWindow) || contextWindow <= 0) return undefined;
11195
11583
 
11196
- // After compaction, the last assistant usage reflects pre-compaction context size.
11197
- // We can only trust usage from an assistant that responded after the latest compaction.
11198
- // If no such assistant exists, context token count is unknown until the next LLM response.
11584
+ const { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens } = computeNonMessageBreakdown(this);
11585
+ const categoryNonMessageTokens = skillsTokens + toolsTokens + systemContextTokens + systemPromptTokens;
11586
+ const currentNonMessageTokens = computeNonMessageTokens(this);
11587
+
11199
11588
  const branchEntries = this.sessionManager.getBranch();
11200
11589
  const latestCompaction = getLatestCompactionEntry(branchEntries);
11590
+ const compactionIndex = latestCompaction ? branchEntries.lastIndexOf(latestCompaction) : -1;
11201
11591
 
11202
- if (latestCompaction) {
11203
- // Check if there's a valid assistant usage after the compaction boundary
11204
- const compactionIndex = branchEntries.lastIndexOf(latestCompaction);
11205
- let hasPostCompactionUsage = false;
11206
- for (let i = branchEntries.length - 1; i > compactionIndex; i--) {
11207
- const entry = branchEntries[i];
11208
- if (entry.type === "message" && entry.message.role === "assistant") {
11209
- const assistant = entry.message;
11210
- if (assistant.stopReason !== "aborted" && assistant.stopReason !== "error") {
11211
- const contextTokens = calculateContextTokens(assistant.usage);
11212
- if (contextTokens > 0) {
11213
- hasPostCompactionUsage = true;
11214
- }
11215
- break;
11216
- }
11592
+ let usedTokens = 0;
11593
+ let anchored = false;
11594
+
11595
+ const pendingMessages = options?.pendingMessages ?? [];
11596
+
11597
+ const pending = this.#pendingContextSnapshot;
11598
+
11599
+ // Always locate the latest real assistant-usage anchor after the last
11600
+ // compaction. Its provider-reported promptTokens is ground truth for
11601
+ // everything up to that point; only the tail after it is estimated.
11602
+ let anchorEntry: SessionMessageEntry | undefined;
11603
+ for (let i = branchEntries.length - 1; i > compactionIndex; i--) {
11604
+ const entry = branchEntries[i];
11605
+ if (entry.type === "message" && entry.message.role === "assistant") {
11606
+ const assistant = entry.message;
11607
+ if (assistant.stopReason !== "aborted" && assistant.stopReason !== "error" && assistant.usage) {
11608
+ anchorEntry = entry;
11609
+ break;
11217
11610
  }
11218
11611
  }
11612
+ }
11613
+
11614
+ const resolvedActiveMessages = this.messages;
11615
+ let resolvedAnchorIndex = -1;
11616
+ let anchorAssistant: AssistantMessage | undefined;
11617
+ if (anchorEntry) {
11618
+ const a = anchorEntry.message as AssistantMessage;
11619
+ anchorAssistant = a;
11620
+ resolvedAnchorIndex = resolvedActiveMessages.indexOf(a);
11621
+ if (resolvedAnchorIndex === -1) {
11622
+ resolvedAnchorIndex = resolvedActiveMessages.findIndex(
11623
+ msg => msg.role === "assistant" && msg.timestamp === a.timestamp,
11624
+ );
11625
+ }
11626
+ }
11219
11627
 
11220
- if (!hasPostCompactionUsage) {
11221
- return { tokens: null, contextWindow, percent: null };
11628
+ // A real anchor supersedes the in-flight estimate only once a step of the
11629
+ // CURRENT turn has produced provider usage — i.e. it resolves at or after
11630
+ // the pending cutoff. While the turn's first response is still pending (or
11631
+ // the newest real anchor predates this turn) the pending snapshot is the
11632
+ // only thing accounting for the just-submitted prompt, so it wins. This
11633
+ // keeps a long tool turn from stacking an estimate of the entire tail on
11634
+ // top of a stale turn-start prompt.
11635
+ const useAnchor =
11636
+ anchorAssistant !== undefined &&
11637
+ resolvedAnchorIndex !== -1 &&
11638
+ (!pending || resolvedAnchorIndex >= pending.cutoffCount);
11639
+
11640
+ if (useAnchor && anchorAssistant) {
11641
+ const promptTokens =
11642
+ anchorAssistant.contextSnapshot?.promptTokens ?? calculatePromptTokens(anchorAssistant.usage);
11643
+ const nonMessageTokens = anchorAssistant.contextSnapshot?.nonMessageTokens ?? computeNonMessageTokens(this);
11644
+ anchored = true;
11645
+ let tailTokens = 0;
11646
+ for (let i = resolvedAnchorIndex + 1; i < resolvedActiveMessages.length; i++) {
11647
+ tailTokens += estimateTokens(resolvedActiveMessages[i]);
11648
+ }
11649
+ usedTokens =
11650
+ promptTokens +
11651
+ Math.max(0, currentNonMessageTokens - nonMessageTokens) +
11652
+ tailTokens +
11653
+ pendingMessages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
11654
+ } else if (pending) {
11655
+ anchored = true;
11656
+ let tailTokens = 0;
11657
+ if (resolvedActiveMessages.length > pending.cutoffCount) {
11658
+ for (let i = pending.cutoffCount; i < resolvedActiveMessages.length; i++) {
11659
+ tailTokens += estimateTokens(resolvedActiveMessages[i]);
11660
+ }
11222
11661
  }
11662
+ usedTokens =
11663
+ pending.promptTokens +
11664
+ Math.max(0, currentNonMessageTokens - pending.nonMessageTokens) +
11665
+ tailTokens +
11666
+ pendingMessages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
11223
11667
  }
11224
11668
 
11225
- const estimate = this.#estimateContextTokens();
11226
- const percent = (estimate.tokens / contextWindow) * 100;
11669
+ if (!anchored && !pending && branchEntries.length === 0) {
11670
+ // Fallback: look for the latest assistant message with usage/snapshot in this.messages (for branchless/fake sessions in tests)
11671
+ for (let i = resolvedActiveMessages.length - 1; i >= 0; i--) {
11672
+ const msg = resolvedActiveMessages[i];
11673
+ if (msg.role === "assistant" && msg.stopReason !== "aborted" && msg.stopReason !== "error" && msg.usage) {
11674
+ const promptTokens = msg.contextSnapshot?.promptTokens ?? calculatePromptTokens(msg.usage);
11675
+ const nonMessageTokens = msg.contextSnapshot?.nonMessageTokens ?? computeNonMessageTokens(this);
11676
+
11677
+ let tailTokens = 0;
11678
+ for (let j = i + 1; j < resolvedActiveMessages.length; j++) {
11679
+ tailTokens += estimateTokens(resolvedActiveMessages[j]);
11680
+ }
11681
+
11682
+ usedTokens =
11683
+ promptTokens +
11684
+ Math.max(0, currentNonMessageTokens - nonMessageTokens) +
11685
+ tailTokens +
11686
+ pendingMessages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
11687
+ anchored = true;
11688
+ break;
11689
+ }
11690
+ }
11691
+ }
11692
+ if (!anchored) {
11693
+ let messagesTokens = 0;
11694
+ for (const msg of resolvedActiveMessages) {
11695
+ messagesTokens += estimateTokens(msg);
11696
+ }
11697
+ usedTokens =
11698
+ currentNonMessageTokens +
11699
+ messagesTokens +
11700
+ pendingMessages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
11701
+ }
11702
+
11703
+ const messagesTokens = Math.max(0, usedTokens - categoryNonMessageTokens);
11227
11704
 
11228
11705
  return {
11229
- tokens: estimate.tokens,
11230
11706
  contextWindow,
11231
- percent,
11707
+ anchored,
11708
+ usedTokens,
11709
+ systemPromptTokens,
11710
+ systemToolsTokens: toolsTokens,
11711
+ systemContextTokens,
11712
+ skillsTokens,
11713
+ messagesTokens,
11714
+ };
11715
+ }
11716
+
11717
+ getContextUsage(options?: { contextWindow?: number }): ContextUsage | undefined {
11718
+ const breakdown = this.getContextBreakdown(options);
11719
+ if (!breakdown) return undefined;
11720
+ return {
11721
+ tokens: breakdown.usedTokens,
11722
+ contextWindow: breakdown.contextWindow,
11723
+ percent: breakdown.contextWindow > 0 ? (breakdown.usedTokens / breakdown.contextWindow) * 100 : 0,
11232
11724
  };
11233
11725
  }
11234
11726
 
11727
+ /**
11728
+ * Monotonic counter that changes whenever the in-flight pending context
11729
+ * snapshot is set or cleared. Status-line context memoization keys on this so
11730
+ * a value computed mid-turn cannot persist after the turn ends/aborts.
11731
+ */
11732
+ get contextUsageRevision(): number {
11733
+ return this.#contextUsageRevision;
11734
+ }
11735
+
11736
+ #setPendingContextSnapshot(
11737
+ snapshot: { promptTokens: number; nonMessageTokens: number; cutoffCount: number } | undefined,
11738
+ ): void {
11739
+ this.#pendingContextSnapshot = snapshot;
11740
+ this.#contextUsageRevision++;
11741
+ }
11742
+
11235
11743
  #ingestProviderUsageHeaders(response: ProviderResponseMetadata, model?: Model): void {
11236
11744
  if (model?.provider !== "anthropic") return;
11237
11745
  this.#modelRegistry.authStorage.ingestUsageHeaders("anthropic", response.headers, {
@@ -11244,7 +11752,17 @@ export class AgentSession {
11244
11752
  const authStorage = this.#modelRegistry.authStorage;
11245
11753
  if (!authStorage.fetchUsageReports) return null;
11246
11754
  return authStorage.fetchUsageReports({
11247
- baseUrlResolver: provider => this.#modelRegistry.getProviderBaseUrl?.(provider),
11755
+ baseUrlResolver: provider => {
11756
+ if (provider === "google-antigravity") {
11757
+ const mode = this.settings.get("providers.antigravityEndpoint");
11758
+ if (mode === "sandbox") {
11759
+ return "https://daily-cloudcode-pa.sandbox.googleapis.com";
11760
+ } else if (mode === "production") {
11761
+ return "https://daily-cloudcode-pa.googleapis.com";
11762
+ }
11763
+ }
11764
+ return this.#modelRegistry.getProviderBaseUrl?.(provider);
11765
+ },
11248
11766
  signal,
11249
11767
  });
11250
11768
  }
@@ -11412,64 +11930,6 @@ export class AgentSession {
11412
11930
  return run;
11413
11931
  }
11414
11932
 
11415
- /**
11416
- * Estimate context tokens from messages, using the last assistant usage when available.
11417
- */
11418
- #estimateContextTokens(): {
11419
- tokens: number;
11420
- providerAnchored: boolean;
11421
- providerNonMessageTokens?: number;
11422
- } {
11423
- const messages = this.messages;
11424
-
11425
- // Find last assistant message with valid usage.
11426
- let lastUsageIndex: number | null = null;
11427
- let lastUsage: Usage | undefined;
11428
- for (let i = messages.length - 1; i >= 0; i--) {
11429
- const msg = messages[i];
11430
- if (msg.role === "assistant") {
11431
- const assistantMsg = msg as AssistantMessage;
11432
- if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
11433
- lastUsage = assistantMsg.usage;
11434
- lastUsageIndex = i;
11435
- break;
11436
- }
11437
- }
11438
- }
11439
-
11440
- if (!lastUsage || lastUsageIndex === null) {
11441
- // No usage data - estimate all messages
11442
- let estimated = 0;
11443
- for (const message of messages) {
11444
- estimated += estimateTokens(message);
11445
- }
11446
- return {
11447
- tokens: estimated,
11448
- providerAnchored: false,
11449
- };
11450
- }
11451
-
11452
- const usageTokens = calculatePromptTokens(lastUsage);
11453
- const providerNonMessage =
11454
- this.#lastProviderUsageNonMessage &&
11455
- messages[lastUsageIndex]?.role === "assistant" &&
11456
- this.#lastProviderUsageNonMessage.provider === (messages[lastUsageIndex] as AssistantMessage).provider &&
11457
- this.#lastProviderUsageNonMessage.model === (messages[lastUsageIndex] as AssistantMessage).model &&
11458
- this.#lastProviderUsageNonMessage.timestamp === (messages[lastUsageIndex] as AssistantMessage).timestamp
11459
- ? this.#lastProviderUsageNonMessage.tokens
11460
- : undefined;
11461
- let trailingTokens = 0;
11462
- for (let i = lastUsageIndex + 1; i < messages.length; i++) {
11463
- trailingTokens += estimateTokens(messages[i]);
11464
- }
11465
-
11466
- return {
11467
- tokens: usageTokens + trailingTokens,
11468
- providerAnchored: true,
11469
- providerNonMessageTokens: providerNonMessage,
11470
- };
11471
- }
11472
-
11473
11933
  /**
11474
11934
  * Export session to HTML.
11475
11935
  * @param outputPath Optional output path (defaults to session directory)