@oh-my-pi/pi-coding-agent 16.0.4 → 16.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +41 -0
  2. package/dist/cli.js +341 -261
  3. package/dist/types/advisor/advise-tool.d.ts +9 -0
  4. package/dist/types/cli/args.d.ts +1 -0
  5. package/dist/types/cli/bench-cli.d.ts +6 -0
  6. package/dist/types/commands/launch.d.ts +3 -0
  7. package/dist/types/config/settings-schema.d.ts +91 -2
  8. package/dist/types/extensibility/extensions/runner.d.ts +5 -2
  9. package/dist/types/extensibility/extensions/types.d.ts +8 -7
  10. package/dist/types/extensibility/shared-events.d.ts +22 -1
  11. package/dist/types/main.d.ts +1 -0
  12. package/dist/types/modes/components/status-line/component.d.ts +1 -1
  13. package/dist/types/modes/components/status-line/context-thresholds.d.ts +0 -1
  14. package/dist/types/modes/rpc/rpc-types.d.ts +1 -1
  15. package/dist/types/modes/utils/context-usage.d.ts +12 -0
  16. package/dist/types/sdk.d.ts +3 -1
  17. package/dist/types/session/agent-session.d.ts +20 -0
  18. package/dist/types/session/session-persistence.d.ts +4 -0
  19. package/dist/types/tools/read.d.ts +1 -0
  20. package/dist/types/tui/code-cell.d.ts +2 -0
  21. package/dist/types/utils/image-vision-fallback.d.ts +28 -0
  22. package/dist/types/web/search/providers/base.d.ts +1 -0
  23. package/dist/types/web/search/providers/gemini.d.ts +1 -0
  24. package/package.json +12 -12
  25. package/src/advisor/__tests__/advisor.test.ts +59 -0
  26. package/src/advisor/advise-tool.ts +13 -0
  27. package/src/cli/args.ts +1 -0
  28. package/src/cli/bench-cli.ts +30 -7
  29. package/src/cli/flag-tables.ts +8 -0
  30. package/src/collab/host.ts +2 -2
  31. package/src/commands/launch.ts +3 -0
  32. package/src/config/settings-schema.ts +84 -2
  33. package/src/eval/py/runner.py +44 -0
  34. package/src/extensibility/extensions/runner.ts +20 -2
  35. package/src/extensibility/extensions/types.ts +16 -5
  36. package/src/extensibility/shared-events.ts +24 -0
  37. package/src/internal-urls/docs-index.generated.ts +7 -7
  38. package/src/main.ts +12 -5
  39. package/src/modes/components/branch-summary-message.ts +1 -0
  40. package/src/modes/components/collab-prompt-message.ts +9 -7
  41. package/src/modes/components/compaction-summary-message.ts +1 -0
  42. package/src/modes/components/custom-message.ts +1 -0
  43. package/src/modes/components/footer.ts +6 -5
  44. package/src/modes/components/hook-message.ts +1 -0
  45. package/src/modes/components/read-tool-group.ts +9 -3
  46. package/src/modes/components/skill-message.ts +1 -0
  47. package/src/modes/components/status-line/component.ts +131 -14
  48. package/src/modes/components/status-line/context-thresholds.ts +0 -1
  49. package/src/modes/components/todo-reminder.ts +1 -0
  50. package/src/modes/components/ttsr-notification.ts +1 -0
  51. package/src/modes/components/user-message.ts +6 -6
  52. package/src/modes/controllers/event-controller.ts +2 -7
  53. package/src/modes/controllers/selector-controller.ts +10 -3
  54. package/src/modes/interactive-mode.ts +4 -2
  55. package/src/modes/rpc/rpc-types.ts +1 -1
  56. package/src/modes/utils/context-usage.ts +28 -15
  57. package/src/prompts/tools/image-attachment-describe-system.md +8 -0
  58. package/src/prompts/tools/image-attachment-describe.md +10 -0
  59. package/src/sdk.ts +14 -18
  60. package/src/session/agent-session.ts +564 -231
  61. package/src/session/session-loader.ts +19 -32
  62. package/src/session/session-persistence.ts +27 -11
  63. package/src/ssh/connection-manager.ts +3 -2
  64. package/src/task/executor.ts +1 -1
  65. package/src/tools/image-gen.ts +67 -25
  66. package/src/tools/read.ts +28 -6
  67. package/src/tui/code-cell.ts +44 -3
  68. package/src/utils/image-vision-fallback.ts +197 -0
  69. package/src/web/search/index.ts +12 -0
  70. package/src/web/search/providers/base.ts +1 -0
  71. package/src/web/search/providers/gemini.ts +56 -18
@@ -18,6 +18,7 @@ import * as os from "node:os";
18
18
  import * as path from "node:path";
19
19
  import { scheduler } from "node:timers/promises";
20
20
  import { isPromise } from "node:util/types";
21
+
21
22
  import type { InMemorySnapshotStore } from "@oh-my-pi/hashline";
22
23
  import {
23
24
  type AfterToolCallContext,
@@ -126,6 +127,8 @@ import {
126
127
  AdvisorRuntime,
127
128
  type AdvisorSeverity,
128
129
  formatAdvisorBatchContent,
130
+ isAdvisorInterruptImmuneTurnActive,
131
+ isInterruptingSeverity,
129
132
  resolveAdvisorDeliveryChannel,
130
133
  } from "../advisor";
131
134
  import { type AsyncJob, type AsyncJobDeliveryState, AsyncJobManager } from "../async";
@@ -178,6 +181,7 @@ import type {
178
181
  SessionBeforeCompactResult,
179
182
  SessionBeforeSwitchResult,
180
183
  SessionBeforeTreeResult,
184
+ SessionStopEventResult,
181
185
  ToolExecutionEndEvent,
182
186
  ToolExecutionStartEvent,
183
187
  ToolExecutionUpdateEvent,
@@ -202,7 +206,7 @@ import { containsOrchestrate, ORCHESTRATE_NOTICE } from "../modes/orchestrate";
202
206
  import { getCurrentThemeName, theme } from "../modes/theme/theme";
203
207
  import { parseTurnBudget } from "../modes/turn-budget";
204
208
  import { containsUltrathink, ULTRATHINK_NOTICE } from "../modes/ultrathink";
205
- import { computeNonMessageTokens } from "../modes/utils/context-usage";
209
+ import { computeNonMessageBreakdown, computeNonMessageTokens } from "../modes/utils/context-usage";
206
210
  import { containsWorkflow, WORKFLOW_NOTICE } from "../modes/workflow";
207
211
  import { createPlanReadMatcher } from "../plan-mode/plan-protection";
208
212
  import type { PlanModeState } from "../plan-mode/state";
@@ -261,6 +265,7 @@ import { type EditMode, resolveEditMode } from "../utils/edit-mode";
261
265
  import { resolveFileDisplayMode } from "../utils/file-display-mode";
262
266
  import { extractFileMentions, generateFileMentionMessages } from "../utils/file-mentions";
263
267
  import { normalizeModelContextImages } from "../utils/image-loading";
268
+ import { describeAttachedImagesForTextModel } from "../utils/image-vision-fallback";
264
269
  import { buildNamedToolChoice, isToolChoiceActive } from "../utils/tool-choice";
265
270
  import type { AuthStorage } from "./auth-storage";
266
271
  import type { ClientBridge, ClientBridgePermissionOption, ClientBridgePermissionOutcome } from "./client-bridge";
@@ -295,6 +300,8 @@ import { ToolChoiceQueue } from "./tool-choice-queue";
295
300
  import { classifyUnexpectedStop, isUnexpectedStopCandidate } from "./unexpected-stop-classifier";
296
301
  import { YieldQueue } from "./yield-queue";
297
302
 
303
+ const SESSION_STOP_CONTINUATION_CAP = 8;
304
+
298
305
  /** Session-specific events that extend the core AgentEvent */
299
306
  export type AgentSessionEvent =
300
307
  | AgentEvent
@@ -338,6 +345,24 @@ const UNEXPECTED_STOP_MAX_RETRIES = 3;
338
345
  const UNEXPECTED_STOP_TIMEOUT_MS = 4000;
339
346
  const EMPTY_STOP_MAX_RETRIES = 3;
340
347
  const RETRY_BACKOFF_MAX_DELAY_MS = 8_000;
348
+
349
+ type CompactionCheckResult = Readonly<{
350
+ deferredHandoff: boolean;
351
+ continuationScheduled: boolean;
352
+ }>;
353
+
354
+ const COMPACTION_CHECK_NONE: CompactionCheckResult = {
355
+ deferredHandoff: false,
356
+ continuationScheduled: false,
357
+ };
358
+ const COMPACTION_CHECK_DEFERRED_HANDOFF: CompactionCheckResult = {
359
+ deferredHandoff: true,
360
+ continuationScheduled: true,
361
+ };
362
+ const COMPACTION_CHECK_CONTINUATION: CompactionCheckResult = {
363
+ deferredHandoff: false,
364
+ continuationScheduled: true,
365
+ };
341
366
  export type CommandMetadataChangedListener = () => void | Promise<void>;
342
367
  export type AsyncJobSnapshotItem = Pick<AsyncJob, "id" | "type" | "status" | "label" | "startTime">;
343
368
 
@@ -555,6 +580,17 @@ export interface RoleModelCycle {
555
580
  currentIndex: number;
556
581
  }
557
582
 
583
+ export interface ContextUsageBreakdown {
584
+ contextWindow: number;
585
+ anchored: boolean;
586
+ usedTokens: number;
587
+ systemPromptTokens: number;
588
+ systemToolsTokens: number;
589
+ systemContextTokens: number;
590
+ skillsTokens: number;
591
+ messagesTokens: number;
592
+ }
593
+
558
594
  /** Session statistics for /session command */
559
595
  export interface SessionStats {
560
596
  sessionFile: string | undefined;
@@ -976,6 +1012,10 @@ const MAGIC_KEYWORD_NOTICE_TYPES: ReadonlySet<string> = new Set([
976
1012
  "workflow-notice",
977
1013
  ]);
978
1014
 
1015
+ /** Custom-message type of the hidden companion carrying vision descriptions of image
1016
+ * attachments sent to a text-only model (see `#buildImageDescriptionNotice`). */
1017
+ const IMAGE_ATTACHMENT_DESCRIPTION_TYPE = "image-attachment-description";
1018
+
979
1019
  /**
980
1020
  * A hidden, user-attributed companion of a queued user prompt: the magic-keyword
981
1021
  * notices (`ultrathink`/`orchestrate`/`workflow`) enqueued alongside the user
@@ -989,7 +1029,7 @@ function isHiddenUserCompanion(message: AgentMessage): boolean {
989
1029
  message.role === "custom" &&
990
1030
  message.attribution === "user" &&
991
1031
  message.display === false &&
992
- MAGIC_KEYWORD_NOTICE_TYPES.has(message.customType)
1032
+ (MAGIC_KEYWORD_NOTICE_TYPES.has(message.customType) || message.customType === IMAGE_ATTACHMENT_DESCRIPTION_TYPE)
993
1033
  );
994
1034
  }
995
1035
 
@@ -1044,6 +1084,8 @@ export class AgentSession {
1044
1084
  * suppresses advisor concern/blocker auto-resume until the user next resumes.
1045
1085
  * Advisor advice is still recorded into the transcript, just not auto-run. */
1046
1086
  #advisorAutoResumeSuppressed = false;
1087
+ #advisorPrimaryTurnsCompleted = 0;
1088
+ #advisorInterruptImmuneTurnStart: number | undefined;
1047
1089
  #planModeState: PlanModeState | undefined;
1048
1090
  #goalModeState: GoalModeState | undefined;
1049
1091
  #goalRuntime: GoalRuntime;
@@ -1224,15 +1266,20 @@ export class AgentSession {
1224
1266
  #unexpectedStopRetryCount = 0;
1225
1267
  #promptGeneration = 0;
1226
1268
  #pendingAgentEndEmit: AgentSessionEvent | undefined;
1227
- #pendingProviderRequestNonMessageTokens: number | undefined = undefined;
1228
- #lastProviderUsageNonMessage:
1269
+ #pendingContextSnapshot:
1229
1270
  | {
1230
- provider: AssistantMessage["provider"];
1231
- model: AssistantMessage["model"];
1232
- timestamp: AssistantMessage["timestamp"];
1233
- tokens: number;
1271
+ promptTokens: number;
1272
+ nonMessageTokens: number;
1273
+ cutoffCount: number;
1234
1274
  }
1235
- | undefined;
1275
+ | undefined = undefined;
1276
+ #sessionStopContinuationCount = 0;
1277
+ #sessionStopHookActive = false;
1278
+ // Bumped whenever the pending in-flight snapshot is set/cleared. The
1279
+ // status-line context memo includes this so clearing the snapshot on
1280
+ // turn-end/abort invalidates the cache even though the message list is
1281
+ // unchanged — otherwise a mid-turn estimate would survive into idle.
1282
+ #contextUsageRevision = 0;
1236
1283
  #obfuscator: SecretObfuscator | undefined;
1237
1284
  #checkpointState: CheckpointState | undefined = undefined;
1238
1285
  #pendingRewindReport: string | undefined = undefined;
@@ -1476,6 +1523,7 @@ export class AgentSession {
1476
1523
  this.agent.setRawSseEventInterceptor(this.#onSseEvent);
1477
1524
  this.agent.setOnTurnEnd(async (messages, signal) => {
1478
1525
  if (signal?.aborted) return;
1526
+ this.#advisorPrimaryTurnsCompleted++;
1479
1527
  if (this.#advisorRuntime && !this.#advisorRuntime.disposed) {
1480
1528
  this.#advisorRuntime.onTurnEnd(messages);
1481
1529
  const syncBacklog = this.settings.get("advisor.syncBacklog");
@@ -1608,6 +1656,27 @@ export class AgentSession {
1608
1656
  // -------------------------------------------------------------------------
1609
1657
  // Advisor runtime lifecycle
1610
1658
  // -------------------------------------------------------------------------
1659
+ #advisorImmuneTurnLimit(): number {
1660
+ const immuneTurns = this.settings.get("advisor.immuneTurns") as number;
1661
+ if (!Number.isFinite(immuneTurns) || immuneTurns <= 0) return 0;
1662
+ return Math.trunc(immuneTurns);
1663
+ }
1664
+
1665
+ #isAdvisorInterruptImmuneTurnActive(): boolean {
1666
+ return isAdvisorInterruptImmuneTurnActive({
1667
+ completedTurns: this.#advisorPrimaryTurnsCompleted,
1668
+ immuneTurnStart: this.#advisorInterruptImmuneTurnStart,
1669
+ immuneTurns: this.#advisorImmuneTurnLimit(),
1670
+ });
1671
+ }
1672
+
1673
+ // The next primary turn number starts the immune-turn window. While the
1674
+ // interrupting steer is still in flight, completedTurns is lower than this
1675
+ // start, so duplicate concern/blocker advice is also downgraded.
1676
+ #recordAdvisorInterruptDelivered(): void {
1677
+ this.#advisorInterruptImmuneTurnStart = this.#advisorPrimaryTurnsCompleted + 1;
1678
+ }
1679
+
1611
1680
  #buildAdvisorRuntime(seedToCurrent = false): boolean {
1612
1681
  if (this.#isDisposed) return false;
1613
1682
  if (this.#advisorRuntime) return true;
@@ -1637,6 +1706,7 @@ export class AgentSession {
1637
1706
  // strand the advice and dump the backlog as one burst at the next prompt. A
1638
1707
  // plain nit always rides the non-interrupting YieldQueue aside.
1639
1708
  const enqueueAdvice = (note: string, severity?: AdvisorSeverity) => {
1709
+ const interrupting = isInterruptingSeverity(severity);
1640
1710
  const channel = resolveAdvisorDeliveryChannel({
1641
1711
  severity,
1642
1712
  autoResumeSuppressed: this.#advisorAutoResumeSuppressed,
@@ -1647,6 +1717,7 @@ export class AgentSession {
1647
1717
  // auto-resume it despite the user's interrupt.
1648
1718
  streaming: this.agent.state.isStreaming,
1649
1719
  aborting: this.#abortInProgress,
1720
+ interruptImmuneTurnActive: interrupting && this.#isAdvisorInterruptImmuneTurnActive(),
1650
1721
  });
1651
1722
  if (channel === "aside") {
1652
1723
  this.yieldQueue.enqueue("advisor", { note, severity });
@@ -1667,6 +1738,7 @@ export class AgentSession {
1667
1738
  });
1668
1739
  return;
1669
1740
  }
1741
+ this.#recordAdvisorInterruptDelivered();
1670
1742
  void this.sendCustomMessage(
1671
1743
  { customType: "advisor", content, display: true, attribution: "agent", details },
1672
1744
  { deliverAs: "steer", triggerTurn: true },
@@ -1682,6 +1754,7 @@ export class AgentSession {
1682
1754
  if (this.#advisorWatchdogPrompt) {
1683
1755
  systemPrompt.push(this.#advisorWatchdogPrompt);
1684
1756
  }
1757
+ const advisorSessionId = this.sessionId ? `${this.sessionId}-advisor` : undefined;
1685
1758
  const advisorAgent = new Agent({
1686
1759
  initialState: {
1687
1760
  systemPrompt,
@@ -1690,15 +1763,8 @@ export class AgentSession {
1690
1763
  tools: [adviseTool, ...advisorReadOnlyTools],
1691
1764
  },
1692
1765
  appendOnlyContext,
1693
- sessionId: this.sessionId ? `${this.sessionId}-advisor` : undefined,
1694
- getApiKey: async provider => {
1695
- const key = await this.#modelRegistry.getApiKeyForProvider(
1696
- provider,
1697
- this.sessionId ? `${this.sessionId}-advisor` : undefined,
1698
- );
1699
- if (!key) throw new Error(`No API key for advisor provider "${provider}"`);
1700
- return key;
1701
- },
1766
+ sessionId: advisorSessionId,
1767
+ getApiKey: requestModel => this.#modelRegistry.resolver(requestModel, advisorSessionId),
1702
1768
  intentTracing: false,
1703
1769
  });
1704
1770
  advisorAgent.setDisableReasoning(shouldDisableReasoning(advisorThinkingLevel));
@@ -2354,6 +2420,15 @@ export class AgentSession {
2354
2420
  event.message.role === "fileMention"
2355
2421
  ) {
2356
2422
  // Regular LLM message - persist as SessionMessageEntry
2423
+ if (event.message.role === "assistant") {
2424
+ const assistantMsg = event.message as AssistantMessage;
2425
+ if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
2426
+ assistantMsg.contextSnapshot = {
2427
+ promptTokens: calculatePromptTokens(assistantMsg.usage),
2428
+ nonMessageTokens: this.#pendingContextSnapshot?.nonMessageTokens ?? computeNonMessageTokens(this),
2429
+ };
2430
+ }
2431
+ }
2357
2432
  this.sessionManager.appendMessage(event.message);
2358
2433
  }
2359
2434
  // Other message types (bashExecution, compactionSummary, branchSummary) are persisted elsewhere
@@ -2362,14 +2437,6 @@ export class AgentSession {
2362
2437
  if (event.message.role === "assistant") {
2363
2438
  this.#lastAssistantMessage = event.message;
2364
2439
  const assistantMsg = event.message as AssistantMessage;
2365
- if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
2366
- this.#lastProviderUsageNonMessage = {
2367
- provider: assistantMsg.provider,
2368
- model: assistantMsg.model,
2369
- timestamp: assistantMsg.timestamp,
2370
- tokens: this.#pendingProviderRequestNonMessageTokens ?? computeNonMessageTokens(this),
2371
- };
2372
- }
2373
2440
  const currentGrantsAnthropicPriority =
2374
2441
  this.serviceTier === "priority" || this.serviceTier === "claude-only";
2375
2442
  if (assistantMsg.disabledFeatures?.includes("priority") && currentGrantsAnthropicPriority) {
@@ -2412,7 +2479,6 @@ export class AgentSession {
2412
2479
  this.#retryAttempt = 0;
2413
2480
  }
2414
2481
  }
2415
-
2416
2482
  if (event.message.role === "toolResult") {
2417
2483
  const { toolName, details, isError, content } = event.message as {
2418
2484
  toolName?: string;
@@ -2472,6 +2538,9 @@ export class AgentSession {
2472
2538
 
2473
2539
  // Check auto-retry and auto-compaction after agent completes
2474
2540
  if (event.type === "agent_end") {
2541
+ const emitAgentEndNotification = async () => {
2542
+ await this.#emitAgentEndNotification(event.messages);
2543
+ };
2475
2544
  const usage = this.getSessionStats().tokens;
2476
2545
  await this.#goalRuntime.onAgentEnd({
2477
2546
  currentUsage: {
@@ -2488,6 +2557,7 @@ export class AgentSession {
2488
2557
  this.#lastAssistantMessage = undefined;
2489
2558
  if (!msg) {
2490
2559
  this.#lastSuccessfulYieldToolCallId = undefined;
2560
+ await emitAgentEndNotification();
2491
2561
  return;
2492
2562
  }
2493
2563
 
@@ -2504,60 +2574,81 @@ export class AgentSession {
2504
2574
  if (this.#skipPostTurnMaintenanceAssistantTimestamp === msg.timestamp) {
2505
2575
  this.#skipPostTurnMaintenanceAssistantTimestamp = undefined;
2506
2576
  this.#lastSuccessfulYieldToolCallId = undefined;
2577
+ await emitAgentEndNotification();
2507
2578
  return;
2508
2579
  }
2509
2580
 
2510
2581
  if (this.#assistantEndedWithSuccessfulYield(msg)) {
2511
2582
  this.#lastSuccessfulYieldToolCallId = undefined;
2583
+ await emitAgentEndNotification();
2512
2584
  return;
2513
2585
  }
2514
2586
  this.#lastSuccessfulYieldToolCallId = undefined;
2515
2587
 
2516
2588
  if (await this.#handleEmptyAssistantStop(msg)) {
2589
+ await emitAgentEndNotification();
2517
2590
  return;
2518
2591
  }
2519
2592
  if (await this.#handleUnexpectedAssistantStop(msg)) {
2593
+ await emitAgentEndNotification();
2520
2594
  return;
2521
2595
  }
2522
2596
 
2523
2597
  if (this.#isRetryableReasonlessAbort(msg)) {
2524
2598
  const didRetry = await this.#handleRetryableError(msg, { allowModelFallback: false });
2525
- if (didRetry) return;
2599
+ if (didRetry) {
2600
+ await emitAgentEndNotification();
2601
+ return;
2602
+ }
2526
2603
  }
2527
2604
 
2528
2605
  // A deliberate abort should settle the current turn, not trigger queued continuations.
2529
2606
  if (msg.stopReason === "aborted") {
2530
2607
  this.#resolveRetry();
2608
+ this.#resetSessionStopContinuationState();
2609
+ await emitAgentEndNotification();
2531
2610
  return;
2532
2611
  }
2533
2612
  // Check for retryable errors first (overloaded, rate limit, server errors)
2534
2613
  if (this.#isRetryableError(msg)) {
2535
2614
  const didRetry = await this.#handleRetryableError(msg);
2536
- if (didRetry) return; // Retry was initiated, don't proceed to compaction
2615
+ if (didRetry) {
2616
+ await emitAgentEndNotification();
2617
+ return;
2618
+ }
2537
2619
  }
2538
2620
  this.#resolveRetry();
2539
2621
 
2540
2622
  const compactionTask = this.#checkCompaction(msg);
2541
2623
  this.#trackPostPromptTask(compactionTask);
2542
- const compactionDeferredHandoff = await compactionTask;
2624
+ const compactionResult = await compactionTask;
2543
2625
  // Check for incomplete todos only after a final assistant stop, not intermediate tool-use turns.
2544
2626
  const hasToolCalls = msg.content.some(content => content.type === "toolCall");
2545
2627
  if (hasToolCalls) {
2628
+ await emitAgentEndNotification();
2546
2629
  return;
2547
2630
  }
2548
- // When checkCompaction scheduled a deferred handoff, skip the rewind/todo passes:
2549
- // any reminder we append here would race the handoff's session reset, and
2550
- // #scheduleAgentContinue would start a fresh streaming turn alongside the handoff
2551
- // LLM call (visible as "Auto-handoff" loader + an assistant message still streaming).
2552
- if (compactionDeferredHandoff) {
2631
+ // When compaction queued recovery, skip the rewind/todo/session_stop passes:
2632
+ // any reminder or hook continuation we append here would race the handoff,
2633
+ // retry, auto-continue prompt, or queued-message drain that already owns the
2634
+ // next turn.
2635
+ if (compactionResult.deferredHandoff || compactionResult.continuationScheduled) {
2636
+ await emitAgentEndNotification();
2553
2637
  return;
2554
2638
  }
2555
2639
  if (msg.stopReason !== "error") {
2556
2640
  if (this.#enforceRewindBeforeYield()) {
2641
+ await emitAgentEndNotification();
2642
+ return;
2643
+ }
2644
+ const todoContinuationScheduled = await this.#checkTodoCompletion();
2645
+ if (todoContinuationScheduled) {
2646
+ await emitAgentEndNotification();
2557
2647
  return;
2558
2648
  }
2559
- await this.#checkTodoCompletion();
2560
2649
  }
2650
+ await this.#emitSessionStopEvent(event.messages);
2651
+ await emitAgentEndNotification();
2561
2652
  }
2562
2653
  };
2563
2654
 
@@ -3513,6 +3604,83 @@ export class AgentSession {
3513
3604
  }
3514
3605
  }
3515
3606
 
3607
+ #resetSessionStopContinuationState(): void {
3608
+ this.#sessionStopContinuationCount = 0;
3609
+ this.#sessionStopHookActive = false;
3610
+ }
3611
+
3612
+ #clearPendingSessionStopContinuations(): void {
3613
+ if (!this.#pendingNextTurnMessages.some(message => message.customType === "session-stop-continuation")) {
3614
+ return;
3615
+ }
3616
+ this.#pendingNextTurnMessages = this.#pendingNextTurnMessages.filter(
3617
+ message => message.customType !== "session-stop-continuation",
3618
+ );
3619
+ }
3620
+
3621
+ #sessionStopContinuationContext(result: SessionStopEventResult | undefined): string | undefined {
3622
+ if (!result) return undefined;
3623
+ const additionalContext =
3624
+ typeof result.additionalContext === "string" && result.additionalContext.length > 0
3625
+ ? result.additionalContext
3626
+ : undefined;
3627
+ const reason = typeof result.reason === "string" && result.reason.length > 0 ? result.reason : undefined;
3628
+ if (result.continue === true) {
3629
+ return additionalContext ?? reason;
3630
+ }
3631
+ if (result.decision === "block") {
3632
+ return reason ?? additionalContext;
3633
+ }
3634
+ return undefined;
3635
+ }
3636
+
3637
+ async #emitAgentEndNotification(messages: AgentMessage[]): Promise<void> {
3638
+ await this.#extensionRunner?.emit({ type: "agent_end", messages });
3639
+ }
3640
+
3641
+ async #emitSessionStopEvent(messages: AgentMessage[]): Promise<void> {
3642
+ if (this.#agentKind === "sub" || !this.#extensionRunner?.hasHandlers("session_stop")) return;
3643
+ const generation = this.#promptGeneration;
3644
+ const result = await this.#extensionRunner.emitSessionStop({
3645
+ messages,
3646
+ turn_id: Math.max(0, this.#turnIndex - 1),
3647
+ last_assistant_message: this.getLastAssistantMessage(),
3648
+ session_id: this.sessionId,
3649
+ session_file: this.sessionFile,
3650
+ stop_hook_active: this.#sessionStopHookActive,
3651
+ });
3652
+ if (this.#promptGeneration !== generation || this.#abortInProgress || this.#isDisposed) {
3653
+ this.#resetSessionStopContinuationState();
3654
+ return;
3655
+ }
3656
+ const additionalContext = this.#sessionStopContinuationContext(result);
3657
+ if (!additionalContext) {
3658
+ this.#resetSessionStopContinuationState();
3659
+ return;
3660
+ }
3661
+ if (this.#sessionStopContinuationCount >= SESSION_STOP_CONTINUATION_CAP) {
3662
+ logger.warn("session_stop continuation cap reached", {
3663
+ sessionId: this.sessionId,
3664
+ cap: SESSION_STOP_CONTINUATION_CAP,
3665
+ });
3666
+ this.#resetSessionStopContinuationState();
3667
+ return;
3668
+ }
3669
+ this.#sessionStopContinuationCount++;
3670
+ this.#sessionStopHookActive = true;
3671
+ this.#queueHiddenNextTurnMessage(
3672
+ {
3673
+ role: "custom",
3674
+ customType: "session-stop-continuation",
3675
+ content: additionalContext,
3676
+ display: false,
3677
+ attribution: "agent",
3678
+ timestamp: Date.now(),
3679
+ },
3680
+ true,
3681
+ );
3682
+ }
3683
+
3516
3684
  /** Emit extension events based on session events */
3517
3685
  async #emitExtensionEvent(event: AgentSessionEvent): Promise<void> {
3518
3686
  if (!this.#extensionRunner) return;
@@ -3520,7 +3688,9 @@ export class AgentSession {
3520
3688
  this.#turnIndex = 0;
3521
3689
  await this.#extensionRunner.emit({ type: "agent_start" });
3522
3690
  } else if (event.type === "agent_end") {
3523
- await this.#extensionRunner.emit({ type: "agent_end", messages: event.messages });
3691
+ // `agent_end` extension notification is emitted from the settled
3692
+ // agent_end maintenance path so `session_stop` control hooks are not
3693
+ // blocked by unrelated notification-only work.
3524
3694
  } else if (event.type === "turn_start") {
3525
3695
  const hookEvent: TurnStartEvent = {
3526
3696
  type: "turn_start",
@@ -4791,11 +4961,24 @@ export class AgentSession {
4791
4961
  openrouterRoutingPreset !== "default" && options.openrouterVariant === undefined
4792
4962
  ? openrouterRoutingPreset
4793
4963
  : undefined;
4794
- if (!sessionOnPayload && !sessionOnResponse && !sessionMetadata && !sessionOnSseEvent && !openrouterVariant)
4964
+ const antigravityEndpointMode =
4965
+ provider === "google-antigravity" ? this.settings.get("providers.antigravityEndpoint") : undefined;
4966
+
4967
+ if (
4968
+ !sessionOnPayload &&
4969
+ !sessionOnResponse &&
4970
+ !sessionMetadata &&
4971
+ !sessionOnSseEvent &&
4972
+ !openrouterVariant &&
4973
+ !antigravityEndpointMode
4974
+ )
4795
4975
  return options;
4796
4976
 
4797
- const preparedOptions: SimpleStreamOptions =
4798
- openrouterVariant === undefined ? { ...options } : { ...options, openrouterVariant };
4977
+ const preparedOptions: SimpleStreamOptions = {
4978
+ ...options,
4979
+ ...(openrouterVariant !== undefined && { openrouterVariant }),
4980
+ ...(antigravityEndpointMode !== undefined && { antigravityEndpointMode }),
4981
+ };
4799
4982
 
4800
4983
  // Stamp session metadata (e.g. user_id={session_id}) onto direct-call requests so
4801
4984
  // they share the same session bucket as Agent.prompt-routed requests on Anthropic
@@ -5114,6 +5297,62 @@ export class AgentSession {
5114
5297
  return normalizeModelContextImages(images, { model: this.model });
5115
5298
  }
5116
5299
 
5300
+ /**
5301
+ * Build a hidden companion message describing image attachments for a text-only
5302
+ * model. Each image is saved under local:// and a vision-capable model describes
5303
+ * it; the descriptions are returned as a `display: false` custom message (so the
5304
+ * model reads them but the TUI does not render the blob) carrying one
5305
+ * `<image path="local://…">…</image>` block per image. Returns `undefined` when
5306
+ * the active model already accepts images, the feature is disabled, or no
5307
+ * description could be produced. Never throws.
5308
+ */
5309
+ async #buildImageDescriptionNotice(
5310
+ normalizedImages: ImageContent[],
5311
+ signal?: AbortSignal,
5312
+ ): Promise<CustomMessage | undefined> {
5313
+ const model = this.model;
5314
+ const shouldDescribe =
5315
+ !!model &&
5316
+ !model.input.includes("image") &&
5317
+ !this.settings.get("images.blockImages") &&
5318
+ this.settings.get("images.describeForTextModels");
5319
+ if (!shouldDescribe || !model) {
5320
+ return undefined;
5321
+ }
5322
+ let blocks: TextContent[];
5323
+ try {
5324
+ blocks = await describeAttachedImagesForTextModel(
5325
+ normalizedImages,
5326
+ {
5327
+ activeModel: model,
5328
+ modelRegistry: this.#modelRegistry,
5329
+ settings: this.settings,
5330
+ localProtocolOptions: this.#localProtocolOptions(),
5331
+ activeModelString: formatModelString(model),
5332
+ telemetryConfig: this.agent.telemetry,
5333
+ sessionId: this.sessionId,
5334
+ },
5335
+ signal,
5336
+ );
5337
+ } catch (err) {
5338
+ logger.warn("image attachment vision fallback failed; image left undescribed", {
5339
+ error: err instanceof Error ? err.message : String(err),
5340
+ });
5341
+ return undefined;
5342
+ }
5343
+ if (blocks.length === 0) {
5344
+ return undefined;
5345
+ }
5346
+ return {
5347
+ role: "custom",
5348
+ customType: IMAGE_ATTACHMENT_DESCRIPTION_TYPE,
5349
+ content: blocks,
5350
+ display: false,
5351
+ attribution: "user",
5352
+ timestamp: Date.now(),
5353
+ };
5354
+ }
5355
+
5117
5356
  async #normalizeMessageContentImages(
5118
5357
  content: string | (TextContent | ImageContent)[],
5119
5358
  ): Promise<string | (TextContent | ImageContent)[]> {
@@ -5261,9 +5500,14 @@ export class AgentSession {
5261
5500
  const normalizedImages = await this.#normalizeImagesForModel(options?.images);
5262
5501
 
5263
5502
  const userContent: (TextContent | ImageContent)[] = [{ type: "text", text: expandedText }];
5264
- if (normalizedImages) {
5503
+ if (normalizedImages?.length) {
5265
5504
  userContent.push(...normalizedImages);
5266
5505
  }
5506
+ // Text-only model + image attachment: describe via a vision model and inject the
5507
+ // description as a hidden companion (the image stays in the visible user message).
5508
+ const imageDescriptionNotice = normalizedImages?.length
5509
+ ? await this.#buildImageDescriptionNotice(normalizedImages)
5510
+ : undefined;
5267
5511
 
5268
5512
  const promptAttribution = options?.attribution ?? (options?.synthetic ? "agent" : "user");
5269
5513
  const message = options?.synthetic
@@ -5288,8 +5532,8 @@ export class AgentSession {
5288
5532
  ...options,
5289
5533
  images: normalizedImages,
5290
5534
  prependMessages:
5291
- preludeMessages.length > 0 || keywordNotices.length > 0
5292
- ? [...preludeMessages, ...keywordNotices]
5535
+ preludeMessages.length > 0 || keywordNotices.length > 0 || imageDescriptionNotice
5536
+ ? [...preludeMessages, ...keywordNotices, ...(imageDescriptionNotice ? [imageDescriptionNotice] : [])]
5293
5537
  : undefined,
5294
5538
  });
5295
5539
  } finally {
@@ -5510,11 +5754,23 @@ export class AgentSession {
5510
5754
  }
5511
5755
 
5512
5756
  const agentPromptOptions = options?.toolChoice ? { toolChoice: options.toolChoice } : undefined;
5513
- this.#pendingProviderRequestNonMessageTokens = computeNonMessageTokens(this);
5757
+ const nonMessageTokens = computeNonMessageTokens(this);
5758
+ const contextWindow = this.model?.contextWindow ?? 0;
5759
+ const breakdown = this.getContextBreakdown({ contextWindow, pendingMessages: messages });
5760
+ const promptTokens =
5761
+ breakdown?.usedTokens ??
5762
+ nonMessageTokens +
5763
+ this.messages.reduce((sum, msg) => sum + estimateTokens(msg), 0) +
5764
+ messages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
5765
+ this.#setPendingContextSnapshot({
5766
+ promptTokens,
5767
+ nonMessageTokens,
5768
+ cutoffCount: this.messages.length + messages.length,
5769
+ });
5514
5770
  try {
5515
5771
  await this.#promptAgentWithIdleRetry(messages, agentPromptOptions);
5516
5772
  } finally {
5517
- this.#pendingProviderRequestNonMessageTokens = undefined;
5773
+ this.#setPendingContextSnapshot(undefined);
5518
5774
  }
5519
5775
  if (!options?.skipPostPromptRecoveryWait) {
5520
5776
  await this.#waitForPostPromptRecovery(generation);
@@ -5699,7 +5955,13 @@ export class AgentSession {
5699
5955
  if (normalizedImages?.length) {
5700
5956
  content.push(...normalizedImages);
5701
5957
  }
5958
+ // Text-only model + image attachment: describe via a vision model and enqueue the
5959
+ // description as a hidden companion immediately before the user message.
5960
+ const imageDescriptionNotice = normalizedImages?.length
5961
+ ? await this.#buildImageDescriptionNotice(normalizedImages)
5962
+ : undefined;
5702
5963
  if (mode === "followUp") {
5964
+ if (imageDescriptionNotice) this.agent.followUp(imageDescriptionNotice);
5703
5965
  this.agent.followUp({
5704
5966
  role: "user",
5705
5967
  content,
@@ -5707,6 +5969,7 @@ export class AgentSession {
5707
5969
  timestamp: Date.now(),
5708
5970
  });
5709
5971
  } else {
5972
+ if (imageDescriptionNotice) this.agent.steer(imageDescriptionNotice);
5710
5973
  this.agent.steer({
5711
5974
  role: "user",
5712
5975
  content,
@@ -5857,6 +6120,16 @@ export class AgentSession {
5857
6120
  }
5858
6121
  }
5859
6122
 
6123
+ async #promptAgentInitiatedMessage(message: CustomMessage): Promise<void> {
6124
+ this.#beginInFlight();
6125
+ try {
6126
+ await this.agent.prompt(message);
6127
+ await this.#waitForPostPromptRecovery();
6128
+ } finally {
6129
+ this.#endInFlight();
6130
+ }
6131
+ }
6132
+
5860
6133
  /**
5861
6134
  * Send a custom message to the session. Creates a CustomMessageEntry.
5862
6135
  *
@@ -5916,7 +6189,7 @@ export class AgentSession {
5916
6189
  this.#queueHiddenNextTurnMessage(normalizedAppMessage, false);
5917
6190
  return false;
5918
6191
  }
5919
- await this.agent.prompt(normalizedAppMessage);
6192
+ await this.#promptAgentInitiatedMessage(normalizedAppMessage);
5920
6193
  return true;
5921
6194
  }
5922
6195
  this.agent.appendMessage(normalizedAppMessage);
@@ -5935,7 +6208,7 @@ export class AgentSession {
5935
6208
  this.#queueHiddenNextTurnMessage(normalizedAppMessage, false);
5936
6209
  return false;
5937
6210
  }
5938
- await this.agent.prompt(normalizedAppMessage);
6211
+ await this.#promptAgentInitiatedMessage(normalizedAppMessage);
5939
6212
  return true;
5940
6213
  }
5941
6214
 
@@ -6158,6 +6431,8 @@ export class AgentSession {
6158
6431
  // block runs, but nested prompt setup/finalizers may still be unwinding. Without this,
6159
6432
  // a subsequent prompt() can incorrectly observe the session as busy after an abort.
6160
6433
  this.#resetInFlight();
6434
+ this.#resetSessionStopContinuationState();
6435
+ this.#clearPendingSessionStopContinuations();
6161
6436
  // Safety net: if the agent loop aborted without producing an assistant
6162
6437
  // message (e.g. failed before the first stream), the in-flight yield was
6163
6438
  // never resolved or rejected by the normal message_end path. Reject it now
@@ -7458,39 +7733,12 @@ export class AgentSession {
7458
7733
  }
7459
7734
  }
7460
7735
 
7461
- #estimatePendingPromptTokens(messages: AgentMessage[]): number {
7462
- let tokens = computeNonMessageTokens(this);
7463
- for (const message of this.messages) {
7464
- tokens += estimateTokens(message);
7465
- }
7466
- for (const message of messages) {
7467
- tokens += estimateTokens(message);
7468
- }
7469
- return tokens;
7470
- }
7471
-
7472
7736
  #estimatePrePromptContextTokens(messages: AgentMessage[], contextWindow: number): number {
7473
- const currentUsage = this.getContextUsage({ contextWindow });
7474
- if (typeof currentUsage?.tokens !== "number" || !Number.isFinite(currentUsage.tokens)) {
7475
- return this.#estimatePendingPromptTokens(messages);
7476
- }
7477
-
7478
- const currentEstimate = this.#estimateContextTokens();
7479
- if (!currentEstimate.providerAnchored) {
7480
- return this.#estimatePendingPromptTokens(messages);
7481
- }
7482
-
7483
- let tokens = currentUsage.tokens;
7484
- const previousNonMessageTokens = currentEstimate.providerNonMessageTokens;
7485
- if (previousNonMessageTokens !== undefined) {
7486
- const currentNonMessageTokens = computeNonMessageTokens(this);
7487
- const nonMessageTokenGrowth = Math.max(0, currentNonMessageTokens - previousNonMessageTokens);
7488
- tokens += nonMessageTokenGrowth;
7489
- }
7490
- for (const message of messages) {
7491
- tokens += estimateTokens(message);
7492
- }
7493
- return tokens;
7737
+ const breakdown = this.getContextBreakdown({ contextWindow, pendingMessages: messages });
7738
+ return (
7739
+ breakdown?.usedTokens ??
7740
+ computeNonMessageTokens(this) + messages.reduce((sum, msg) => sum + estimateTokens(msg), 0)
7741
+ );
7494
7742
  }
7495
7743
 
7496
7744
  async #runPrePromptCompactionIfNeeded(messages: AgentMessage[]): Promise<void> {
@@ -7544,19 +7792,19 @@ export class AgentSession {
7544
7792
  * on the pre-prompt path (where the next agent turn is about to start) set it to false
7545
7793
  * to avoid racing the deferred handoff against the new turn.
7546
7794
  * @param autoContinue Whether maintenance may schedule the agent-authored continuation prompt.
7547
- * @returns true when a deferred handoff was scheduled. Callers MUST then skip any
7548
- * subsequent `#scheduleAgentContinue` / reminder appends for this turn the
7549
- * handoff will replace session state and a concurrent `agent.continue()` would
7550
- * stream into the soon-to-be-discarded session.
7795
+ * @returns whether compaction/recovery scheduled a handoff, retry, auto-continue, or
7796
+ * queued-message drain that already owns the next turn. Callers MUST skip
7797
+ * `session_stop` and other agent continuations when `continuationScheduled`
7798
+ * is true.
7551
7799
  */
7552
7800
  async #checkCompaction(
7553
7801
  assistantMessage: AssistantMessage,
7554
7802
  skipAbortedCheck = true,
7555
7803
  allowDefer = true,
7556
7804
  autoContinue = true,
7557
- ): Promise<boolean> {
7805
+ ): Promise<CompactionCheckResult> {
7558
7806
  // Skip if message was aborted (user cancelled) - unless skipAbortedCheck is false
7559
- if (skipAbortedCheck && assistantMessage.stopReason === "aborted") return false;
7807
+ if (skipAbortedCheck && assistantMessage.stopReason === "aborted") return COMPACTION_CHECK_NONE;
7560
7808
  const contextWindow = this.model?.contextWindow ?? 0;
7561
7809
  const generation = this.#promptGeneration;
7562
7810
  // Skip overflow check if the message came from a different model.
@@ -7585,15 +7833,15 @@ export class AgentSession {
7585
7833
  if (promoted) {
7586
7834
  // Retry on the promoted (larger) model without compacting
7587
7835
  this.#scheduleAgentContinue({ delayMs: 100, generation });
7588
- return false;
7836
+ return COMPACTION_CHECK_CONTINUATION;
7589
7837
  }
7590
7838
 
7591
7839
  // No promotion target available fall through to compaction
7592
7840
  const compactionSettings = this.settings.getGroup("compaction");
7593
7841
  if (compactionSettings.enabled && compactionSettings.strategy !== "off") {
7594
- await this.#runAutoCompaction("overflow", true, false, allowDefer, { autoContinue });
7842
+ return await this.#runAutoCompaction("overflow", true, false, allowDefer, { autoContinue });
7595
7843
  }
7596
- return false;
7844
+ return COMPACTION_CHECK_NONE;
7597
7845
  }
7598
7846
 
7599
7847
  // Case 3: Output-side incomplete — `response.incomplete` from OpenAI Responses
@@ -7614,7 +7862,7 @@ export class AgentSession {
7614
7862
  from: `${assistantMessage.provider}/${assistantMessage.model}`,
7615
7863
  });
7616
7864
  this.#scheduleAgentContinue({ delayMs: 100, generation });
7617
- return false;
7865
+ return COMPACTION_CHECK_CONTINUATION;
7618
7866
  }
7619
7867
 
7620
7868
  const incompleteCompactionSettings = this.settings.getGroup("compaction");
@@ -7623,18 +7871,17 @@ export class AgentSession {
7623
7871
  model: `${assistantMessage.provider}/${assistantMessage.model}`,
7624
7872
  strategy: incompleteCompactionSettings.strategy,
7625
7873
  });
7626
- await this.#runAutoCompaction("incomplete", true, false, allowDefer, {
7874
+ return await this.#runAutoCompaction("incomplete", true, false, allowDefer, {
7627
7875
  autoContinue,
7628
7876
  triggerContextTokens: calculateContextTokens(assistantMessage.usage),
7629
7877
  });
7630
- } else {
7631
- // Neither promotion nor compaction is available — surface the dead-end so
7632
- // the user understands why the turn yielded with nothing.
7633
- logger.warn("response.incomplete with no recovery path (promotion + compaction both unavailable)", {
7634
- model: `${assistantMessage.provider}/${assistantMessage.model}`,
7635
- });
7636
7878
  }
7637
- return false;
7879
+ // Neither promotion nor compaction is available — surface the dead-end so
7880
+ // the user understands why the turn yielded with nothing.
7881
+ logger.warn("response.incomplete with no recovery path (promotion + compaction both unavailable)", {
7882
+ model: `${assistantMessage.provider}/${assistantMessage.model}`,
7883
+ });
7884
+ return COMPACTION_CHECK_NONE;
7638
7885
  }
7639
7886
 
7640
7887
  // Stale-result pass runs every turn, before any threshold gating: it is
@@ -7643,11 +7890,11 @@ export class AgentSession {
7643
7890
  const supersedeResult = await this.#pruneStaleToolResults();
7644
7891
 
7645
7892
  const compactionSettings = this.settings.getGroup("compaction");
7646
- if (!compactionSettings.enabled || compactionSettings.strategy === "off") return false;
7893
+ if (!compactionSettings.enabled || compactionSettings.strategy === "off") return COMPACTION_CHECK_NONE;
7647
7894
 
7648
7895
  // Case 4: Threshold - turn succeeded but context is getting large
7649
7896
  // Skip if this was an error (non-overflow errors don't have usage data)
7650
- if (assistantMessage.stopReason === "error") return false;
7897
+ if (assistantMessage.stopReason === "error") return COMPACTION_CHECK_NONE;
7651
7898
  const pruneResult = await this.#pruneToolOutputs();
7652
7899
  let contextTokens = calculateContextTokens(assistantMessage.usage);
7653
7900
  if (supersedeResult) {
@@ -7666,7 +7913,7 @@ export class AgentSession {
7666
7913
  });
7667
7914
  }
7668
7915
  }
7669
- return false;
7916
+ return COMPACTION_CHECK_NONE;
7670
7917
  }
7671
7918
  #assistantEndedWithSuccessfulYield(assistantMessage: AssistantMessage): boolean {
7672
7919
  const toolCallId = this.#lastSuccessfulYieldToolCallId;
@@ -7706,7 +7953,7 @@ export class AgentSession {
7706
7953
  if (assistantMessage.stopReason === "toolUse") {
7707
7954
  this.#removeEmptyStopFromActiveContext(assistantMessage);
7708
7955
  }
7709
- return true;
7956
+ return false;
7710
7957
  }
7711
7958
  this.#removeEmptyStopFromActiveContext(assistantMessage);
7712
7959
  this.agent.appendMessage({
@@ -8081,12 +8328,12 @@ export class AgentSession {
8081
8328
  /**
8082
8329
  * Check if agent stopped with incomplete todos and prompt to continue.
8083
8330
  */
8084
- async #checkTodoCompletion(): Promise<void> {
8331
+ async #checkTodoCompletion(): Promise<boolean> {
8085
8332
  // Skip todo reminders when the most recent turn was driven by an explicit user force —
8086
8333
  // the user wanted exactly that tool, not a follow-up nag about incomplete todos.
8087
8334
  const lastServedLabel = this.#toolChoiceQueue.consumeLastServedLabel();
8088
8335
  if (lastServedLabel === "user-force") {
8089
- return;
8336
+ return false;
8090
8337
  }
8091
8338
 
8092
8339
  // Suppress within a self-continuation chain: if the agent's last turn was driven by a
@@ -8097,7 +8344,7 @@ export class AgentSession {
8097
8344
  logger.debug("Todo completion: prior reminder still awaiting agent action; staying silent", {
8098
8345
  attempt: this.#todoReminderCount,
8099
8346
  });
8100
- return;
8347
+ return false;
8101
8348
  }
8102
8349
 
8103
8350
  const remindersEnabled = this.settings.get("todo.reminders");
@@ -8105,20 +8352,20 @@ export class AgentSession {
8105
8352
  if (!remindersEnabled || !todosEnabled) {
8106
8353
  this.#todoReminderCount = 0;
8107
8354
  this.#todoReminderAwaitingProgress = false;
8108
- return;
8355
+ return false;
8109
8356
  }
8110
8357
 
8111
8358
  const remindersMax = this.settings.get("todo.reminders.max");
8112
8359
  if (this.#todoReminderCount >= remindersMax) {
8113
8360
  logger.debug("Todo completion: max reminders reached", { count: this.#todoReminderCount });
8114
- return;
8361
+ return false;
8115
8362
  }
8116
8363
 
8117
8364
  const phases = this.getTodoPhases();
8118
8365
  if (phases.length === 0) {
8119
8366
  this.#todoReminderCount = 0;
8120
8367
  this.#todoReminderAwaitingProgress = false;
8121
- return;
8368
+ return false;
8122
8369
  }
8123
8370
 
8124
8371
  const incompleteByPhase = phases
@@ -8136,7 +8383,7 @@ export class AgentSession {
8136
8383
  if (incomplete.length === 0) {
8137
8384
  this.#todoReminderCount = 0;
8138
8385
  this.#todoReminderAwaitingProgress = false;
8139
- return;
8386
+ return false;
8140
8387
  }
8141
8388
 
8142
8389
  // Build reminder message
@@ -8176,6 +8423,7 @@ export class AgentSession {
8176
8423
  this.agent.appendMessage(reminderMessage);
8177
8424
  this.sessionManager.appendMessage(reminderMessage);
8178
8425
  this.#scheduleAgentContinue({ generation: this.#promptGeneration });
8426
+ return true;
8179
8427
  }
8180
8428
 
8181
8429
  /**
@@ -8461,9 +8709,13 @@ export class AgentSession {
8461
8709
  }
8462
8710
 
8463
8711
  #didSessionMessagesChange(previousMessages: AgentMessage[], nextMessages: AgentMessage[]): boolean {
8464
- return (
8465
- JSON.stringify(previousMessages.map(message => this.#normalizeSessionMessageForProviderReplay(message))) !==
8466
- JSON.stringify(nextMessages.map(message => this.#normalizeSessionMessageForProviderReplay(message)))
8712
+ if (previousMessages.length !== nextMessages.length) return true;
8713
+ return previousMessages.some(
8714
+ (message, i) =>
8715
+ !Bun.deepEquals(
8716
+ this.#normalizeSessionMessageForProviderReplay(message),
8717
+ this.#normalizeSessionMessageForProviderReplay(nextMessages[i]),
8718
+ ),
8467
8719
  );
8468
8720
  }
8469
8721
 
@@ -8709,14 +8961,14 @@ export class AgentSession {
8709
8961
  * Internal: Run auto-compaction with events.
8710
8962
  *
8711
8963
  * @param allowDefer If true (default), threshold-driven handoff strategy is allowed to
8712
- * schedule itself as a deferred post-prompt task and return `true` immediately. The
8713
- * caller MUST treat that as "compaction will happen async — do not also schedule
8714
- * `agent.continue()` for this turn", otherwise the deferred handoff races a fresh
8715
- * streaming turn (the symptom: "Auto-handoff" loader + assistant message still
8716
- * streaming). Callers on a path that is about to start a new agent turn (e.g.
8717
- * the pre-prompt check in `#promptWithMessage`) pass `false` to force inline
8718
- * execution so the handoff completes before the new turn begins.
8719
- * @returns true when a deferred handoff was scheduled. Inline runs always return false.
8964
+ * schedule itself as a deferred post-prompt task and return a deferred-handoff result
8965
+ * immediately. The caller MUST treat that as "compaction will happen async — do not
8966
+ * also schedule `agent.continue()` for this turn", otherwise the deferred handoff
8967
+ * races a fresh streaming turn (the symptom: "Auto-handoff" loader + assistant
8968
+ * message still streaming). Callers on a path that is about to start a new agent
8969
+ * turn (e.g. the pre-prompt check in `#promptWithMessage`) pass `false` to force
8970
+ * inline execution so the handoff completes before the new turn begins.
8971
+ * @returns whether auto-compaction scheduled a follow-up turn.
8720
8972
  */
8721
8973
  async #runAutoCompaction(
8722
8974
  reason: "overflow" | "threshold" | "idle" | "incomplete",
@@ -8724,10 +8976,10 @@ export class AgentSession {
8724
8976
  deferred = false,
8725
8977
  allowDefer = true,
8726
8978
  options: { autoContinue?: boolean; triggerContextTokens?: number } = {},
8727
- ): Promise<boolean> {
8979
+ ): Promise<CompactionCheckResult> {
8728
8980
  const compactionSettings = this.settings.getGroup("compaction");
8729
- if (compactionSettings.strategy === "off") return false;
8730
- if (reason !== "idle" && !compactionSettings.enabled) return false;
8981
+ if (compactionSettings.strategy === "off") return COMPACTION_CHECK_NONE;
8982
+ if (reason !== "idle" && !compactionSettings.enabled) return COMPACTION_CHECK_NONE;
8731
8983
  const generation = this.#promptGeneration;
8732
8984
  const shouldAutoContinue = options.autoContinue !== false && compactionSettings.autoContinue !== false;
8733
8985
  // Shake runs inline (cheap, no remote LLM). On overflow recovery, if shake
@@ -8741,7 +8993,7 @@ export class AgentSession {
8741
8993
  shouldAutoContinue,
8742
8994
  options.triggerContextTokens,
8743
8995
  );
8744
- if (outcome !== "fallback") return false;
8996
+ if (outcome !== "fallback") return outcome;
8745
8997
  }
8746
8998
  // "overflow" and "incomplete" force inline execution because they are recovery
8747
8999
  // paths the caller wants resolved before scheduling the next turn. "idle" is
@@ -8762,7 +9014,7 @@ export class AgentSession {
8762
9014
  },
8763
9015
  { generation },
8764
9016
  );
8765
- return true;
9017
+ return COMPACTION_CHECK_DEFERRED_HANDOFF;
8766
9018
  }
8767
9019
 
8768
9020
  // "overflow" forces context-full because the input itself is broken — a handoff
@@ -8810,7 +9062,7 @@ export class AgentSession {
8810
9062
  aborted: true,
8811
9063
  willRetry: false,
8812
9064
  });
8813
- return false;
9065
+ return COMPACTION_CHECK_NONE;
8814
9066
  }
8815
9067
  logger.warn("Auto-handoff returned no document; falling back to context-full maintenance", {
8816
9068
  reason,
@@ -8825,10 +9077,11 @@ export class AgentSession {
8825
9077
  aborted: false,
8826
9078
  willRetry: false,
8827
9079
  });
8828
- if (!autoCompactionSignal.aborted && reason !== "idle" && shouldAutoContinue) {
9080
+ const continuationScheduled = !autoCompactionSignal.aborted && reason !== "idle" && shouldAutoContinue;
9081
+ if (continuationScheduled) {
8829
9082
  this.#scheduleAutoContinuePrompt(generation);
8830
9083
  }
8831
- return false;
9084
+ return continuationScheduled ? COMPACTION_CHECK_CONTINUATION : COMPACTION_CHECK_NONE;
8832
9085
  }
8833
9086
  }
8834
9087
 
@@ -8841,7 +9094,7 @@ export class AgentSession {
8841
9094
  willRetry: false,
8842
9095
  skipped: true,
8843
9096
  });
8844
- return false;
9097
+ return COMPACTION_CHECK_NONE;
8845
9098
  }
8846
9099
 
8847
9100
  const availableModels = this.#modelRegistry.getAvailable();
@@ -8854,7 +9107,7 @@ export class AgentSession {
8854
9107
  willRetry: false,
8855
9108
  skipped: true,
8856
9109
  });
8857
- return false;
9110
+ return COMPACTION_CHECK_NONE;
8858
9111
  }
8859
9112
 
8860
9113
  const pathEntries = this.sessionManager.getBranch();
@@ -8875,8 +9128,9 @@ export class AgentSession {
8875
9128
  generation,
8876
9129
  shouldContinue: () => this.agent.hasQueuedMessages(),
8877
9130
  });
9131
+ return COMPACTION_CHECK_CONTINUATION;
8878
9132
  }
8879
- return false;
9133
+ return COMPACTION_CHECK_NONE;
8880
9134
  }
8881
9135
 
8882
9136
  let hookCompaction: CompactionResult | undefined;
@@ -8900,7 +9154,7 @@ export class AgentSession {
8900
9154
  aborted: true,
8901
9155
  willRetry: false,
8902
9156
  });
8903
- return false;
9157
+ return COMPACTION_CHECK_NONE;
8904
9158
  }
8905
9159
 
8906
9160
  if (hookResult?.compaction) {
@@ -9083,7 +9337,7 @@ export class AgentSession {
9083
9337
  aborted: true,
9084
9338
  willRetry: false,
9085
9339
  });
9086
- return false;
9340
+ return COMPACTION_CHECK_NONE;
9087
9341
  }
9088
9342
 
9089
9343
  this.sessionManager.appendCompaction(
@@ -9125,8 +9379,10 @@ export class AgentSession {
9125
9379
  };
9126
9380
  await this.#emitSessionEvent({ type: "auto_compaction_end", action, result, aborted: false, willRetry });
9127
9381
 
9382
+ let continuationScheduled = false;
9128
9383
  if (!willRetry && reason !== "idle" && shouldAutoContinue) {
9129
9384
  this.#scheduleAutoContinuePrompt(generation);
9385
+ continuationScheduled = true;
9130
9386
  }
9131
9387
 
9132
9388
  if (willRetry) {
@@ -9147,6 +9403,7 @@ export class AgentSession {
9147
9403
  }
9148
9404
 
9149
9405
  this.#scheduleAgentContinue({ delayMs: 100, generation });
9406
+ continuationScheduled = true;
9150
9407
  } else if (this.agent.hasQueuedMessages()) {
9151
9408
  // Auto-compaction can complete while follow-up/steering/custom messages are waiting.
9152
9409
  // Kick the loop so queued messages are actually delivered.
@@ -9155,7 +9412,9 @@ export class AgentSession {
9155
9412
  generation,
9156
9413
  shouldContinue: () => this.agent.hasQueuedMessages(),
9157
9414
  });
9415
+ continuationScheduled = true;
9158
9416
  }
9417
+ return continuationScheduled ? COMPACTION_CHECK_CONTINUATION : COMPACTION_CHECK_NONE;
9159
9418
  } catch (error) {
9160
9419
  if (autoCompactionSignal.aborted) {
9161
9420
  await this.#emitSessionEvent({
@@ -9165,7 +9424,7 @@ export class AgentSession {
9165
9424
  aborted: true,
9166
9425
  willRetry: false,
9167
9426
  });
9168
- return false;
9427
+ return COMPACTION_CHECK_NONE;
9169
9428
  }
9170
9429
  const errorMessage = error instanceof Error ? error.message : "compaction failed";
9171
9430
  await this.#emitSessionEvent({
@@ -9186,7 +9445,7 @@ export class AgentSession {
9186
9445
  this.#autoCompactionAbortController = undefined;
9187
9446
  }
9188
9447
  }
9189
- return false;
9448
+ return COMPACTION_CHECK_NONE;
9190
9449
  }
9191
9450
 
9192
9451
  /**
@@ -9205,7 +9464,7 @@ export class AgentSession {
9205
9464
  generation: number,
9206
9465
  autoContinue: boolean,
9207
9466
  triggerContextTokens?: number,
9208
- ): Promise<"handled" | "fallback"> {
9467
+ ): Promise<CompactionCheckResult | "fallback"> {
9209
9468
  const action = "shake";
9210
9469
  await this.#emitSessionEvent({ type: "auto_compaction_start", reason, action });
9211
9470
  this.#autoCompactionAbortController?.abort();
@@ -9222,7 +9481,7 @@ export class AgentSession {
9222
9481
  aborted: true,
9223
9482
  willRetry: false,
9224
9483
  });
9225
- return "handled";
9484
+ return COMPACTION_CHECK_NONE;
9226
9485
  }
9227
9486
  const reclaimed = result.toolResultsDropped + result.blocksDropped > 0;
9228
9487
  // Detect the dead-loop reported in issues #2119/#2275: the threshold check
@@ -9254,7 +9513,7 @@ export class AgentSession {
9254
9513
  const recoveryBand = Math.floor(thresholdTokens * SHAKE_RECOVERY_BAND);
9255
9514
  stillOverThreshold = correctedTokens > recoveryBand;
9256
9515
  } else {
9257
- const postShakeTokens = this.#estimatePendingPromptTokens([]);
9516
+ const postShakeTokens = this.getContextUsage({ contextWindow })?.tokens ?? 0;
9258
9517
  stillOverThreshold = shouldCompact(postShakeTokens, contextWindow, compactionSettings);
9259
9518
  }
9260
9519
  }
@@ -9283,8 +9542,10 @@ export class AgentSession {
9283
9542
  skipped: !reclaimed,
9284
9543
  });
9285
9544
 
9545
+ let continuationScheduled = false;
9286
9546
  if (!willRetry && reason !== "idle" && autoContinue) {
9287
9547
  this.#scheduleAutoContinuePrompt(generation);
9548
+ continuationScheduled = true;
9288
9549
  }
9289
9550
  if (willRetry) {
9290
9551
  // The shake rebuild replays every entry, so a trailing error/length
@@ -9300,14 +9561,16 @@ export class AgentSession {
9300
9561
  if (shouldDrop) this.agent.replaceMessages(messages.slice(0, -1));
9301
9562
  }
9302
9563
  this.#scheduleAgentContinue({ delayMs: 100, generation });
9564
+ continuationScheduled = true;
9303
9565
  } else if (this.agent.hasQueuedMessages()) {
9304
9566
  this.#scheduleAgentContinue({
9305
9567
  delayMs: 100,
9306
9568
  generation,
9307
9569
  shouldContinue: () => this.agent.hasQueuedMessages(),
9308
9570
  });
9571
+ continuationScheduled = true;
9309
9572
  }
9310
- return "handled";
9573
+ return continuationScheduled ? COMPACTION_CHECK_CONTINUATION : COMPACTION_CHECK_NONE;
9311
9574
  } catch (error) {
9312
9575
  if (signal.aborted) {
9313
9576
  await this.#emitSessionEvent({
@@ -9317,7 +9580,7 @@ export class AgentSession {
9317
9580
  aborted: true,
9318
9581
  willRetry: false,
9319
9582
  });
9320
- return "handled";
9583
+ return COMPACTION_CHECK_NONE;
9321
9584
  }
9322
9585
  const message = error instanceof Error ? error.message : "shake failed";
9323
9586
  await this.#emitSessionEvent({
@@ -9329,7 +9592,7 @@ export class AgentSession {
9329
9592
  errorMessage: `Auto-shake failed: ${message}`,
9330
9593
  });
9331
9594
  // Overflow still needs recovery even if shake threw.
9332
- return reason === "overflow" ? "fallback" : "handled";
9595
+ return reason === "overflow" ? "fallback" : COMPACTION_CHECK_NONE;
9333
9596
  } finally {
9334
9597
  if (this.#autoCompactionAbortController === controller) {
9335
9598
  this.#autoCompactionAbortController = undefined;
@@ -10446,11 +10709,7 @@ export class AgentSession {
10446
10709
  if (!model) {
10447
10710
  throw new Error("No active model on session");
10448
10711
  }
10449
- const apiKey = await this.#modelRegistry.getApiKey(model, this.sessionId);
10450
- if (!apiKey) {
10451
- throw new Error(`No API key for ${model.provider}/${model.id}`);
10452
- }
10453
-
10712
+ const cacheSessionId = this.sessionId;
10454
10713
  const snapshot = this.#buildEphemeralSnapshot(args.promptText);
10455
10714
  const llmMessages = await this.convertMessagesToLlm(snapshot, args.signal);
10456
10715
  const context: Context = {
@@ -10462,10 +10721,9 @@ export class AgentSession {
10462
10721
  // removes the surface entirely.
10463
10722
  tools: [],
10464
10723
  };
10465
- const cacheSessionId = this.sessionId;
10466
10724
  const options = this.prepareSimpleStreamOptions(
10467
10725
  {
10468
- apiKey,
10726
+ apiKey: this.#modelRegistry.resolver(model, cacheSessionId),
10469
10727
  // Side-channel turns must not share OpenAI/Codex append-only
10470
10728
  // conversation state with the main agent turn: IRC and /btw can run
10471
10729
  // while the main turn is mid-tool-call. Keep the prompt-cache key
@@ -11188,50 +11446,173 @@ export class AgentSession {
11188
11446
  * Uses the last assistant message's usage data when available,
11189
11447
  * otherwise estimates tokens for all messages.
11190
11448
  */
11191
- getContextUsage(options?: { contextWindow?: number }): ContextUsage | undefined {
11449
+ getContextBreakdown(options?: {
11450
+ contextWindow?: number;
11451
+ pendingMessages?: AgentMessage[];
11452
+ }): ContextUsageBreakdown | undefined {
11192
11453
  const model = this.model;
11193
11454
  const contextWindow = options?.contextWindow ?? model?.contextWindow ?? 0;
11194
11455
  if (!Number.isFinite(contextWindow) || contextWindow <= 0) return undefined;
11195
11456
 
11196
- // After compaction, the last assistant usage reflects pre-compaction context size.
11197
- // We can only trust usage from an assistant that responded after the latest compaction.
11198
- // If no such assistant exists, context token count is unknown until the next LLM response.
11457
+ const { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens } = computeNonMessageBreakdown(this);
11458
+ const categoryNonMessageTokens = skillsTokens + toolsTokens + systemContextTokens + systemPromptTokens;
11459
+ const currentNonMessageTokens = computeNonMessageTokens(this);
11460
+
11199
11461
  const branchEntries = this.sessionManager.getBranch();
11200
11462
  const latestCompaction = getLatestCompactionEntry(branchEntries);
11463
+ const compactionIndex = latestCompaction ? branchEntries.lastIndexOf(latestCompaction) : -1;
11201
11464
 
11202
- if (latestCompaction) {
11203
- // Check if there's a valid assistant usage after the compaction boundary
11204
- const compactionIndex = branchEntries.lastIndexOf(latestCompaction);
11205
- let hasPostCompactionUsage = false;
11206
- for (let i = branchEntries.length - 1; i > compactionIndex; i--) {
11207
- const entry = branchEntries[i];
11208
- if (entry.type === "message" && entry.message.role === "assistant") {
11209
- const assistant = entry.message;
11210
- if (assistant.stopReason !== "aborted" && assistant.stopReason !== "error") {
11211
- const contextTokens = calculateContextTokens(assistant.usage);
11212
- if (contextTokens > 0) {
11213
- hasPostCompactionUsage = true;
11214
- }
11215
- break;
11216
- }
11465
+ let usedTokens = 0;
11466
+ let anchored = false;
11467
+
11468
+ const pendingMessages = options?.pendingMessages ?? [];
11469
+
11470
+ const pending = this.#pendingContextSnapshot;
11471
+
11472
+ // Always locate the latest real assistant-usage anchor after the last
11473
+ // compaction. Its provider-reported promptTokens is ground truth for
11474
+ // everything up to that point; only the tail after it is estimated.
11475
+ let anchorEntry: SessionMessageEntry | undefined;
11476
+ for (let i = branchEntries.length - 1; i > compactionIndex; i--) {
11477
+ const entry = branchEntries[i];
11478
+ if (entry.type === "message" && entry.message.role === "assistant") {
11479
+ const assistant = entry.message;
11480
+ if (assistant.stopReason !== "aborted" && assistant.stopReason !== "error" && assistant.usage) {
11481
+ anchorEntry = entry;
11482
+ break;
11483
+ }
11484
+ }
11485
+ }
11486
+
11487
+ const resolvedActiveMessages = this.messages;
11488
+ let resolvedAnchorIndex = -1;
11489
+ let anchorAssistant: AssistantMessage | undefined;
11490
+ if (anchorEntry) {
11491
+ const a = anchorEntry.message as AssistantMessage;
11492
+ anchorAssistant = a;
11493
+ resolvedAnchorIndex = resolvedActiveMessages.indexOf(a);
11494
+ if (resolvedAnchorIndex === -1) {
11495
+ resolvedAnchorIndex = resolvedActiveMessages.findIndex(
11496
+ msg => msg.role === "assistant" && msg.timestamp === a.timestamp,
11497
+ );
11498
+ }
11499
+ }
11500
+
11501
+ // A real anchor supersedes the in-flight estimate only once a step of the
11502
+ // CURRENT turn has produced provider usage — i.e. it resolves at or after
11503
+ // the pending cutoff. While the turn's first response is still pending (or
11504
+ // the newest real anchor predates this turn) the pending snapshot is the
11505
+ // only thing accounting for the just-submitted prompt, so it wins. This
11506
+ // keeps a long tool turn from stacking an estimate of the entire tail on
11507
+ // top of a stale turn-start prompt.
11508
+ const useAnchor =
11509
+ anchorAssistant !== undefined &&
11510
+ resolvedAnchorIndex !== -1 &&
11511
+ (!pending || resolvedAnchorIndex >= pending.cutoffCount);
11512
+
11513
+ if (useAnchor && anchorAssistant) {
11514
+ const promptTokens =
11515
+ anchorAssistant.contextSnapshot?.promptTokens ?? calculatePromptTokens(anchorAssistant.usage);
11516
+ const nonMessageTokens = anchorAssistant.contextSnapshot?.nonMessageTokens ?? computeNonMessageTokens(this);
11517
+ anchored = true;
11518
+ let tailTokens = 0;
11519
+ for (let i = resolvedAnchorIndex + 1; i < resolvedActiveMessages.length; i++) {
11520
+ tailTokens += estimateTokens(resolvedActiveMessages[i]);
11521
+ }
11522
+ usedTokens =
11523
+ promptTokens +
11524
+ Math.max(0, currentNonMessageTokens - nonMessageTokens) +
11525
+ tailTokens +
11526
+ pendingMessages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
11527
+ } else if (pending) {
11528
+ anchored = true;
11529
+ let tailTokens = 0;
11530
+ if (resolvedActiveMessages.length > pending.cutoffCount) {
11531
+ for (let i = pending.cutoffCount; i < resolvedActiveMessages.length; i++) {
11532
+ tailTokens += estimateTokens(resolvedActiveMessages[i]);
11217
11533
  }
11218
11534
  }
11535
+ usedTokens =
11536
+ pending.promptTokens +
11537
+ Math.max(0, currentNonMessageTokens - pending.nonMessageTokens) +
11538
+ tailTokens +
11539
+ pendingMessages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
11540
+ }
11541
+
11542
+ if (!anchored && !pending && branchEntries.length === 0) {
11543
+ // Fallback: look for the latest assistant message with usage/snapshot in this.messages (for branchless/fake sessions in tests)
11544
+ for (let i = resolvedActiveMessages.length - 1; i >= 0; i--) {
11545
+ const msg = resolvedActiveMessages[i];
11546
+ if (msg.role === "assistant" && msg.stopReason !== "aborted" && msg.stopReason !== "error" && msg.usage) {
11547
+ const promptTokens = msg.contextSnapshot?.promptTokens ?? calculatePromptTokens(msg.usage);
11548
+ const nonMessageTokens = msg.contextSnapshot?.nonMessageTokens ?? computeNonMessageTokens(this);
11219
11549
 
11220
- if (!hasPostCompactionUsage) {
11221
- return { tokens: null, contextWindow, percent: null };
11550
+ let tailTokens = 0;
11551
+ for (let j = i + 1; j < resolvedActiveMessages.length; j++) {
11552
+ tailTokens += estimateTokens(resolvedActiveMessages[j]);
11553
+ }
11554
+
11555
+ usedTokens =
11556
+ promptTokens +
11557
+ Math.max(0, currentNonMessageTokens - nonMessageTokens) +
11558
+ tailTokens +
11559
+ pendingMessages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
11560
+ anchored = true;
11561
+ break;
11562
+ }
11222
11563
  }
11223
11564
  }
11565
+ if (!anchored) {
11566
+ let messagesTokens = 0;
11567
+ for (const msg of resolvedActiveMessages) {
11568
+ messagesTokens += estimateTokens(msg);
11569
+ }
11570
+ usedTokens =
11571
+ currentNonMessageTokens +
11572
+ messagesTokens +
11573
+ pendingMessages.reduce((sum, msg) => sum + estimateTokens(msg), 0);
11574
+ }
11224
11575
 
11225
- const estimate = this.#estimateContextTokens();
11226
- const percent = (estimate.tokens / contextWindow) * 100;
11576
+ const messagesTokens = Math.max(0, usedTokens - categoryNonMessageTokens);
11227
11577
 
11228
11578
  return {
11229
- tokens: estimate.tokens,
11230
11579
  contextWindow,
11231
- percent,
11580
+ anchored,
11581
+ usedTokens,
11582
+ systemPromptTokens,
11583
+ systemToolsTokens: toolsTokens,
11584
+ systemContextTokens,
11585
+ skillsTokens,
11586
+ messagesTokens,
11587
+ };
11588
+ }
11589
+
11590
+ getContextUsage(options?: { contextWindow?: number }): ContextUsage | undefined {
11591
+ const breakdown = this.getContextBreakdown(options);
11592
+ if (!breakdown) return undefined;
11593
+ return {
11594
+ tokens: breakdown.usedTokens,
11595
+ contextWindow: breakdown.contextWindow,
11596
+ percent: breakdown.contextWindow > 0 ? (breakdown.usedTokens / breakdown.contextWindow) * 100 : 0,
11232
11597
  };
11233
11598
  }
11234
11599
 
11600
+ /**
11601
+ * Monotonic counter that changes whenever the in-flight pending context
11602
+ * snapshot is set or cleared. Status-line context memoization keys on this so
11603
+ * a value computed mid-turn cannot persist after the turn ends/aborts.
11604
+ */
11605
+ get contextUsageRevision(): number {
11606
+ return this.#contextUsageRevision;
11607
+ }
11608
+
11609
+ #setPendingContextSnapshot(
11610
+ snapshot: { promptTokens: number; nonMessageTokens: number; cutoffCount: number } | undefined,
11611
+ ): void {
11612
+ this.#pendingContextSnapshot = snapshot;
11613
+ this.#contextUsageRevision++;
11614
+ }
11615
+
11235
11616
  #ingestProviderUsageHeaders(response: ProviderResponseMetadata, model?: Model): void {
11236
11617
  if (model?.provider !== "anthropic") return;
11237
11618
  this.#modelRegistry.authStorage.ingestUsageHeaders("anthropic", response.headers, {
@@ -11244,7 +11625,17 @@ export class AgentSession {
11244
11625
  const authStorage = this.#modelRegistry.authStorage;
11245
11626
  if (!authStorage.fetchUsageReports) return null;
11246
11627
  return authStorage.fetchUsageReports({
11247
- baseUrlResolver: provider => this.#modelRegistry.getProviderBaseUrl?.(provider),
11628
+ baseUrlResolver: provider => {
11629
+ if (provider === "google-antigravity") {
11630
+ const mode = this.settings.get("providers.antigravityEndpoint");
11631
+ if (mode === "sandbox") {
11632
+ return "https://daily-cloudcode-pa.sandbox.googleapis.com";
11633
+ } else if (mode === "production") {
11634
+ return "https://daily-cloudcode-pa.googleapis.com";
11635
+ }
11636
+ }
11637
+ return this.#modelRegistry.getProviderBaseUrl?.(provider);
11638
+ },
11248
11639
  signal,
11249
11640
  });
11250
11641
  }
@@ -11412,64 +11803,6 @@ export class AgentSession {
11412
11803
  return run;
11413
11804
  }
11414
11805
 
11415
- /**
11416
- * Estimate context tokens from messages, using the last assistant usage when available.
11417
- */
11418
- #estimateContextTokens(): {
11419
- tokens: number;
11420
- providerAnchored: boolean;
11421
- providerNonMessageTokens?: number;
11422
- } {
11423
- const messages = this.messages;
11424
-
11425
- // Find last assistant message with valid usage.
11426
- let lastUsageIndex: number | null = null;
11427
- let lastUsage: Usage | undefined;
11428
- for (let i = messages.length - 1; i >= 0; i--) {
11429
- const msg = messages[i];
11430
- if (msg.role === "assistant") {
11431
- const assistantMsg = msg as AssistantMessage;
11432
- if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
11433
- lastUsage = assistantMsg.usage;
11434
- lastUsageIndex = i;
11435
- break;
11436
- }
11437
- }
11438
- }
11439
-
11440
- if (!lastUsage || lastUsageIndex === null) {
11441
- // No usage data - estimate all messages
11442
- let estimated = 0;
11443
- for (const message of messages) {
11444
- estimated += estimateTokens(message);
11445
- }
11446
- return {
11447
- tokens: estimated,
11448
- providerAnchored: false,
11449
- };
11450
- }
11451
-
11452
- const usageTokens = calculatePromptTokens(lastUsage);
11453
- const providerNonMessage =
11454
- this.#lastProviderUsageNonMessage &&
11455
- messages[lastUsageIndex]?.role === "assistant" &&
11456
- this.#lastProviderUsageNonMessage.provider === (messages[lastUsageIndex] as AssistantMessage).provider &&
11457
- this.#lastProviderUsageNonMessage.model === (messages[lastUsageIndex] as AssistantMessage).model &&
11458
- this.#lastProviderUsageNonMessage.timestamp === (messages[lastUsageIndex] as AssistantMessage).timestamp
11459
- ? this.#lastProviderUsageNonMessage.tokens
11460
- : undefined;
11461
- let trailingTokens = 0;
11462
- for (let i = lastUsageIndex + 1; i < messages.length; i++) {
11463
- trailingTokens += estimateTokens(messages[i]);
11464
- }
11465
-
11466
- return {
11467
- tokens: usageTokens + trailingTokens,
11468
- providerAnchored: true,
11469
- providerNonMessageTokens: providerNonMessage,
11470
- };
11471
- }
11472
-
11473
11806
  /**
11474
11807
  * Export session to HTML.
11475
11808
  * @param outputPath Optional output path (defaults to session directory)