@xortex/xcode 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (934) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +171 -0
  3. package/bin/xcode +127 -0
  4. package/bin/xcode-test +84 -0
  5. package/bin/xcode.cmd +31 -0
  6. package/constants/apiLimits.ts +94 -0
  7. package/constants/betas.ts +52 -0
  8. package/constants/common.ts +33 -0
  9. package/constants/cyberRiskInstruction.ts +24 -0
  10. package/constants/errorIds.ts +15 -0
  11. package/constants/figures.ts +45 -0
  12. package/constants/files.ts +156 -0
  13. package/constants/github-app.ts +144 -0
  14. package/constants/keys.ts +11 -0
  15. package/constants/messages.ts +1 -0
  16. package/constants/oauth.ts +234 -0
  17. package/constants/outputStyles.ts +216 -0
  18. package/constants/product.ts +76 -0
  19. package/constants/prompts.ts +939 -0
  20. package/constants/spinnerVerbs.ts +204 -0
  21. package/constants/system.ts +95 -0
  22. package/constants/systemPromptSections.ts +68 -0
  23. package/constants/toolLimits.ts +56 -0
  24. package/constants/tools.ts +112 -0
  25. package/constants/turnCompletionVerbs.ts +12 -0
  26. package/constants/xml.ts +86 -0
  27. package/entrypoints/agentSdkTypes.ts +443 -0
  28. package/entrypoints/cli.tsx +307 -0
  29. package/entrypoints/init.ts +340 -0
  30. package/entrypoints/mcp.ts +196 -0
  31. package/entrypoints/sandboxTypes.ts +156 -0
  32. package/entrypoints/sdk/controlSchemas.ts +663 -0
  33. package/entrypoints/sdk/coreSchemas.ts +1889 -0
  34. package/entrypoints/sdk/coreTypes.generated.ts +2 -0
  35. package/entrypoints/sdk/coreTypes.ts +62 -0
  36. package/entrypoints/sdk/runtimeTypes.ts +140 -0
  37. package/entrypoints/sdk/sdkUtilityTypes.ts +3 -0
  38. package/entrypoints/sdk/toolTypes.ts +90 -0
  39. package/main.tsx +4686 -0
  40. package/package.json +120 -0
  41. package/services/AgentSummary/agentSummary.ts +179 -0
  42. package/services/MagicDocs/magicDocs.ts +254 -0
  43. package/services/MagicDocs/prompts.ts +127 -0
  44. package/services/PromptSuggestion/promptSuggestion.ts +523 -0
  45. package/services/PromptSuggestion/speculation.ts +991 -0
  46. package/services/SessionMemory/prompts.ts +324 -0
  47. package/services/SessionMemory/sessionMemory.ts +495 -0
  48. package/services/SessionMemory/sessionMemoryUtils.ts +207 -0
  49. package/services/analytics/config.ts +38 -0
  50. package/services/analytics/datadog.ts +307 -0
  51. package/services/analytics/firstPartyEventLogger.ts +449 -0
  52. package/services/analytics/firstPartyEventLoggingExporter.ts +806 -0
  53. package/services/analytics/growthbook.ts +1155 -0
  54. package/services/analytics/index.ts +173 -0
  55. package/services/analytics/metadata.ts +973 -0
  56. package/services/analytics/sink.ts +114 -0
  57. package/services/analytics/sinkKillswitch.ts +25 -0
  58. package/services/api/adminRequests.ts +119 -0
  59. package/services/api/bootstrap.ts +141 -0
  60. package/services/api/claude.ts +3422 -0
  61. package/services/api/client.ts +406 -0
  62. package/services/api/dumpPrompts.ts +226 -0
  63. package/services/api/emptyUsage.ts +22 -0
  64. package/services/api/errorUtils.ts +260 -0
  65. package/services/api/errors.ts +1207 -0
  66. package/services/api/filesApi.ts +748 -0
  67. package/services/api/firstTokenDate.ts +60 -0
  68. package/services/api/gemini.ts +359 -0
  69. package/services/api/geminiAdapter.ts +123 -0
  70. package/services/api/geminiClient.ts +291 -0
  71. package/services/api/grove.ts +357 -0
  72. package/services/api/logging.ts +788 -0
  73. package/services/api/metricsOptOut.ts +159 -0
  74. package/services/api/openRouterClient.ts +453 -0
  75. package/services/api/overageCreditGrant.ts +137 -0
  76. package/services/api/promptCacheBreakDetection.ts +727 -0
  77. package/services/api/referral.ts +281 -0
  78. package/services/api/sessionIngress.ts +514 -0
  79. package/services/api/ultrareviewQuota.ts +38 -0
  80. package/services/api/usage.ts +63 -0
  81. package/services/api/withRetry.ts +822 -0
  82. package/services/autoDream/autoDream.ts +324 -0
  83. package/services/autoDream/config.ts +21 -0
  84. package/services/autoDream/consolidationLock.ts +140 -0
  85. package/services/autoDream/consolidationPrompt.ts +65 -0
  86. package/services/awaySummary.ts +74 -0
  87. package/services/claudeAiLimits.ts +515 -0
  88. package/services/claudeAiLimitsHook.ts +23 -0
  89. package/services/compact/apiMicrocompact.ts +153 -0
  90. package/services/compact/autoCompact.ts +351 -0
  91. package/services/compact/compact.ts +1705 -0
  92. package/services/compact/compactWarningHook.ts +16 -0
  93. package/services/compact/compactWarningState.ts +18 -0
  94. package/services/compact/grouping.ts +63 -0
  95. package/services/compact/microCompact.ts +530 -0
  96. package/services/compact/postCompactCleanup.ts +77 -0
  97. package/services/compact/prompt.ts +374 -0
  98. package/services/compact/sessionMemoryCompact.ts +630 -0
  99. package/services/compact/timeBasedMCConfig.ts +43 -0
  100. package/services/diagnosticTracking.ts +397 -0
  101. package/services/extractMemories/extractMemories.ts +517 -0
  102. package/services/extractMemories/prompts.ts +154 -0
  103. package/services/internalLogging.ts +90 -0
  104. package/services/lsp/LSPClient.ts +447 -0
  105. package/services/lsp/LSPDiagnosticRegistry.ts +386 -0
  106. package/services/lsp/LSPServerInstance.ts +511 -0
  107. package/services/lsp/LSPServerManager.ts +420 -0
  108. package/services/lsp/config.ts +79 -0
  109. package/services/lsp/manager.ts +289 -0
  110. package/services/lsp/passiveFeedback.ts +328 -0
  111. package/services/mcp/InProcessTransport.ts +63 -0
  112. package/services/mcp/MCPConnectionManager.tsx +73 -0
  113. package/services/mcp/SdkControlTransport.ts +136 -0
  114. package/services/mcp/auth.ts +2465 -0
  115. package/services/mcp/channelAllowlist.ts +76 -0
  116. package/services/mcp/channelNotification.ts +316 -0
  117. package/services/mcp/channelPermissions.ts +240 -0
  118. package/services/mcp/claudeai.ts +164 -0
  119. package/services/mcp/client.ts +3348 -0
  120. package/services/mcp/config.ts +1578 -0
  121. package/services/mcp/elicitationHandler.ts +313 -0
  122. package/services/mcp/envExpansion.ts +38 -0
  123. package/services/mcp/headersHelper.ts +138 -0
  124. package/services/mcp/mcpStringUtils.ts +106 -0
  125. package/services/mcp/normalization.ts +23 -0
  126. package/services/mcp/oauthPort.ts +78 -0
  127. package/services/mcp/officialRegistry.ts +72 -0
  128. package/services/mcp/types.ts +258 -0
  129. package/services/mcp/useManageMCPConnections.ts +1141 -0
  130. package/services/mcp/utils.ts +575 -0
  131. package/services/mcp/vscodeSdkMcp.ts +112 -0
  132. package/services/mcp/xaa.ts +511 -0
  133. package/services/mcp/xaaIdpLogin.ts +487 -0
  134. package/services/mcpServerApproval.tsx +41 -0
  135. package/services/mockRateLimits.ts +882 -0
  136. package/services/notifier.ts +156 -0
  137. package/services/oauth/auth-code-listener.ts +211 -0
  138. package/services/oauth/client.ts +566 -0
  139. package/services/oauth/crypto.ts +23 -0
  140. package/services/oauth/getOauthProfile.ts +53 -0
  141. package/services/oauth/index.ts +198 -0
  142. package/services/plugins/PluginInstallationManager.ts +184 -0
  143. package/services/plugins/pluginCliCommands.ts +344 -0
  144. package/services/plugins/pluginOperations.ts +1088 -0
  145. package/services/policyLimits/index.ts +663 -0
  146. package/services/policyLimits/types.ts +27 -0
  147. package/services/preventSleep.ts +165 -0
  148. package/services/rateLimitMessages.ts +344 -0
  149. package/services/rateLimitMocking.ts +144 -0
  150. package/services/remoteManagedSettings/index.ts +638 -0
  151. package/services/remoteManagedSettings/securityCheck.tsx +74 -0
  152. package/services/remoteManagedSettings/syncCache.ts +112 -0
  153. package/services/remoteManagedSettings/syncCacheState.ts +96 -0
  154. package/services/remoteManagedSettings/types.ts +31 -0
  155. package/services/settingsSync/index.ts +581 -0
  156. package/services/settingsSync/types.ts +67 -0
  157. package/services/teamMemorySync/index.ts +1256 -0
  158. package/services/teamMemorySync/secretScanner.ts +324 -0
  159. package/services/teamMemorySync/teamMemSecretGuard.ts +44 -0
  160. package/services/teamMemorySync/types.ts +156 -0
  161. package/services/teamMemorySync/watcher.ts +387 -0
  162. package/services/tips/tipHistory.ts +17 -0
  163. package/services/tips/tipRegistry.ts +686 -0
  164. package/services/tips/tipScheduler.ts +58 -0
  165. package/services/tokenEstimation.ts +495 -0
  166. package/services/toolUseSummary/toolUseSummaryGenerator.ts +112 -0
  167. package/services/tools/StreamingToolExecutor.ts +530 -0
  168. package/services/tools/toolExecution.ts +1745 -0
  169. package/services/tools/toolHooks.ts +650 -0
  170. package/services/tools/toolOrchestration.ts +188 -0
  171. package/services/vcr.ts +406 -0
  172. package/services/voice.ts +525 -0
  173. package/services/voiceKeyterms.ts +106 -0
  174. package/services/voiceStreamSTT.ts +544 -0
  175. package/tools/AgentTool/AgentTool.tsx +1398 -0
  176. package/tools/AgentTool/UI.tsx +872 -0
  177. package/tools/AgentTool/agentColorManager.ts +66 -0
  178. package/tools/AgentTool/agentDisplay.ts +104 -0
  179. package/tools/AgentTool/agentMemory.ts +177 -0
  180. package/tools/AgentTool/agentMemorySnapshot.ts +197 -0
  181. package/tools/AgentTool/agentToolUtils.ts +686 -0
  182. package/tools/AgentTool/built-in/claudeCodeGuideAgent.ts +205 -0
  183. package/tools/AgentTool/built-in/exploreAgent.ts +83 -0
  184. package/tools/AgentTool/built-in/generalPurposeAgent.ts +34 -0
  185. package/tools/AgentTool/built-in/planAgent.ts +92 -0
  186. package/tools/AgentTool/built-in/statuslineSetup.ts +144 -0
  187. package/tools/AgentTool/built-in/verificationAgent.ts +152 -0
  188. package/tools/AgentTool/builtInAgents.ts +72 -0
  189. package/tools/AgentTool/constants.ts +12 -0
  190. package/tools/AgentTool/forkSubagent.ts +210 -0
  191. package/tools/AgentTool/loadAgentsDir.ts +755 -0
  192. package/tools/AgentTool/prompt.ts +287 -0
  193. package/tools/AgentTool/resumeAgent.ts +265 -0
  194. package/tools/AgentTool/runAgent.ts +973 -0
  195. package/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +266 -0
  196. package/tools/AskUserQuestionTool/prompt.ts +44 -0
  197. package/tools/BashTool/BashTool.tsx +1144 -0
  198. package/tools/BashTool/BashToolResultMessage.tsx +191 -0
  199. package/tools/BashTool/UI.tsx +185 -0
  200. package/tools/BashTool/bashCommandHelpers.ts +265 -0
  201. package/tools/BashTool/bashPermissions.ts +2621 -0
  202. package/tools/BashTool/bashSecurity.ts +2592 -0
  203. package/tools/BashTool/commandSemantics.ts +140 -0
  204. package/tools/BashTool/commentLabel.ts +13 -0
  205. package/tools/BashTool/destructiveCommandWarning.ts +102 -0
  206. package/tools/BashTool/modeValidation.ts +115 -0
  207. package/tools/BashTool/pathValidation.ts +1303 -0
  208. package/tools/BashTool/prompt.ts +369 -0
  209. package/tools/BashTool/readOnlyValidation.ts +1990 -0
  210. package/tools/BashTool/sedEditParser.ts +322 -0
  211. package/tools/BashTool/sedValidation.ts +684 -0
  212. package/tools/BashTool/shouldUseSandbox.ts +153 -0
  213. package/tools/BashTool/toolName.ts +2 -0
  214. package/tools/BashTool/utils.ts +223 -0
  215. package/tools/BriefTool/BriefTool.ts +204 -0
  216. package/tools/BriefTool/UI.tsx +101 -0
  217. package/tools/BriefTool/attachments.ts +110 -0
  218. package/tools/BriefTool/prompt.ts +22 -0
  219. package/tools/BriefTool/upload.ts +174 -0
  220. package/tools/ConfigTool/ConfigTool.ts +467 -0
  221. package/tools/ConfigTool/UI.tsx +38 -0
  222. package/tools/ConfigTool/constants.ts +1 -0
  223. package/tools/ConfigTool/prompt.ts +93 -0
  224. package/tools/ConfigTool/supportedSettings.ts +211 -0
  225. package/tools/EnterPlanModeTool/EnterPlanModeTool.ts +126 -0
  226. package/tools/EnterPlanModeTool/UI.tsx +33 -0
  227. package/tools/EnterPlanModeTool/constants.ts +1 -0
  228. package/tools/EnterPlanModeTool/prompt.ts +170 -0
  229. package/tools/EnterWorktreeTool/EnterWorktreeTool.ts +127 -0
  230. package/tools/EnterWorktreeTool/UI.tsx +20 -0
  231. package/tools/EnterWorktreeTool/constants.ts +1 -0
  232. package/tools/EnterWorktreeTool/prompt.ts +30 -0
  233. package/tools/ExitPlanModeTool/ExitPlanModeV2Tool.ts +493 -0
  234. package/tools/ExitPlanModeTool/UI.tsx +82 -0
  235. package/tools/ExitPlanModeTool/constants.ts +2 -0
  236. package/tools/ExitPlanModeTool/prompt.ts +29 -0
  237. package/tools/ExitWorktreeTool/ExitWorktreeTool.ts +329 -0
  238. package/tools/ExitWorktreeTool/UI.tsx +25 -0
  239. package/tools/ExitWorktreeTool/constants.ts +1 -0
  240. package/tools/ExitWorktreeTool/prompt.ts +32 -0
  241. package/tools/FileEditTool/FileEditTool.ts +625 -0
  242. package/tools/FileEditTool/UI.tsx +289 -0
  243. package/tools/FileEditTool/constants.ts +11 -0
  244. package/tools/FileEditTool/prompt.ts +28 -0
  245. package/tools/FileEditTool/types.ts +85 -0
  246. package/tools/FileEditTool/utils.ts +775 -0
  247. package/tools/FileReadTool/FileReadTool.ts +1183 -0
  248. package/tools/FileReadTool/UI.tsx +185 -0
  249. package/tools/FileReadTool/imageProcessor.ts +94 -0
  250. package/tools/FileReadTool/limits.ts +92 -0
  251. package/tools/FileReadTool/prompt.ts +49 -0
  252. package/tools/FileWriteTool/FileWriteTool.ts +434 -0
  253. package/tools/FileWriteTool/UI.tsx +405 -0
  254. package/tools/FileWriteTool/prompt.ts +18 -0
  255. package/tools/GlobTool/GlobTool.ts +198 -0
  256. package/tools/GlobTool/UI.tsx +63 -0
  257. package/tools/GlobTool/prompt.ts +7 -0
  258. package/tools/GrepTool/GrepTool.ts +577 -0
  259. package/tools/GrepTool/UI.tsx +201 -0
  260. package/tools/GrepTool/prompt.ts +18 -0
  261. package/tools/LSPTool/LSPTool.ts +860 -0
  262. package/tools/LSPTool/UI.tsx +228 -0
  263. package/tools/LSPTool/formatters.ts +592 -0
  264. package/tools/LSPTool/prompt.ts +21 -0
  265. package/tools/LSPTool/schemas.ts +215 -0
  266. package/tools/LSPTool/symbolContext.ts +90 -0
  267. package/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +123 -0
  268. package/tools/ListMcpResourcesTool/UI.tsx +29 -0
  269. package/tools/ListMcpResourcesTool/prompt.ts +20 -0
  270. package/tools/MCPTool/MCPTool.ts +77 -0
  271. package/tools/MCPTool/UI.tsx +403 -0
  272. package/tools/MCPTool/classifyForCollapse.ts +604 -0
  273. package/tools/MCPTool/prompt.ts +3 -0
  274. package/tools/McpAuthTool/McpAuthTool.ts +215 -0
  275. package/tools/NotebookEditTool/NotebookEditTool.ts +490 -0
  276. package/tools/NotebookEditTool/UI.tsx +93 -0
  277. package/tools/NotebookEditTool/constants.ts +2 -0
  278. package/tools/NotebookEditTool/prompt.ts +3 -0
  279. package/tools/PowerShellTool/PowerShellTool.tsx +1001 -0
  280. package/tools/PowerShellTool/UI.tsx +131 -0
  281. package/tools/PowerShellTool/clmTypes.ts +211 -0
  282. package/tools/PowerShellTool/commandSemantics.ts +142 -0
  283. package/tools/PowerShellTool/commonParameters.ts +30 -0
  284. package/tools/PowerShellTool/destructiveCommandWarning.ts +109 -0
  285. package/tools/PowerShellTool/gitSafety.ts +176 -0
  286. package/tools/PowerShellTool/modeValidation.ts +404 -0
  287. package/tools/PowerShellTool/pathValidation.ts +2049 -0
  288. package/tools/PowerShellTool/powershellPermissions.ts +1648 -0
  289. package/tools/PowerShellTool/powershellSecurity.ts +1090 -0
  290. package/tools/PowerShellTool/prompt.ts +145 -0
  291. package/tools/PowerShellTool/readOnlyValidation.ts +1823 -0
  292. package/tools/PowerShellTool/toolName.ts +2 -0
  293. package/tools/REPLTool/constants.ts +46 -0
  294. package/tools/REPLTool/primitiveTools.ts +39 -0
  295. package/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +158 -0
  296. package/tools/ReadMcpResourceTool/UI.tsx +37 -0
  297. package/tools/ReadMcpResourceTool/prompt.ts +16 -0
  298. package/tools/RemoteTriggerTool/RemoteTriggerTool.ts +161 -0
  299. package/tools/RemoteTriggerTool/UI.tsx +17 -0
  300. package/tools/RemoteTriggerTool/prompt.ts +15 -0
  301. package/tools/ScheduleCronTool/CronCreateTool.ts +157 -0
  302. package/tools/ScheduleCronTool/CronDeleteTool.ts +95 -0
  303. package/tools/ScheduleCronTool/CronListTool.ts +97 -0
  304. package/tools/ScheduleCronTool/UI.tsx +60 -0
  305. package/tools/ScheduleCronTool/prompt.ts +135 -0
  306. package/tools/SendMessageTool/SendMessageTool.ts +917 -0
  307. package/tools/SendMessageTool/UI.tsx +31 -0
  308. package/tools/SendMessageTool/constants.ts +1 -0
  309. package/tools/SendMessageTool/prompt.ts +49 -0
  310. package/tools/SkillTool/SkillTool.ts +1108 -0
  311. package/tools/SkillTool/UI.tsx +128 -0
  312. package/tools/SkillTool/constants.ts +1 -0
  313. package/tools/SkillTool/prompt.ts +241 -0
  314. package/tools/SleepTool/prompt.ts +17 -0
  315. package/tools/SyntheticOutputTool/SyntheticOutputTool.ts +163 -0
  316. package/tools/TaskCreateTool/TaskCreateTool.ts +138 -0
  317. package/tools/TaskCreateTool/constants.ts +1 -0
  318. package/tools/TaskCreateTool/prompt.ts +56 -0
  319. package/tools/TaskGetTool/TaskGetTool.ts +128 -0
  320. package/tools/TaskGetTool/constants.ts +1 -0
  321. package/tools/TaskGetTool/prompt.ts +24 -0
  322. package/tools/TaskListTool/TaskListTool.ts +116 -0
  323. package/tools/TaskListTool/constants.ts +1 -0
  324. package/tools/TaskListTool/prompt.ts +49 -0
  325. package/tools/TaskOutputTool/TaskOutputTool.tsx +584 -0
  326. package/tools/TaskOutputTool/constants.ts +1 -0
  327. package/tools/TaskStopTool/TaskStopTool.ts +131 -0
  328. package/tools/TaskStopTool/UI.tsx +41 -0
  329. package/tools/TaskStopTool/prompt.ts +8 -0
  330. package/tools/TaskUpdateTool/TaskUpdateTool.ts +406 -0
  331. package/tools/TaskUpdateTool/constants.ts +1 -0
  332. package/tools/TaskUpdateTool/prompt.ts +77 -0
  333. package/tools/TeamCreateTool/TeamCreateTool.ts +240 -0
  334. package/tools/TeamCreateTool/UI.tsx +6 -0
  335. package/tools/TeamCreateTool/constants.ts +1 -0
  336. package/tools/TeamCreateTool/prompt.ts +113 -0
  337. package/tools/TeamDeleteTool/TeamDeleteTool.ts +139 -0
  338. package/tools/TeamDeleteTool/UI.tsx +20 -0
  339. package/tools/TeamDeleteTool/constants.ts +1 -0
  340. package/tools/TeamDeleteTool/prompt.ts +16 -0
  341. package/tools/TodoWriteTool/TodoWriteTool.ts +115 -0
  342. package/tools/TodoWriteTool/constants.ts +1 -0
  343. package/tools/TodoWriteTool/prompt.ts +184 -0
  344. package/tools/ToolSearchTool/ToolSearchTool.ts +471 -0
  345. package/tools/ToolSearchTool/constants.ts +1 -0
  346. package/tools/ToolSearchTool/prompt.ts +121 -0
  347. package/tools/TungstenTool/TungstenTool.ts +4 -0
  348. package/tools/WebFetchTool/UI.tsx +72 -0
  349. package/tools/WebFetchTool/WebFetchTool.ts +318 -0
  350. package/tools/WebFetchTool/preapproved.ts +166 -0
  351. package/tools/WebFetchTool/prompt.ts +46 -0
  352. package/tools/WebFetchTool/utils.ts +530 -0
  353. package/tools/WebSearchTool/UI.tsx +101 -0
  354. package/tools/WebSearchTool/WebSearchTool.ts +435 -0
  355. package/tools/WebSearchTool/prompt.ts +34 -0
  356. package/tools/WorkflowTool/constants.ts +2 -0
  357. package/tools/XMemIngestTool/XMemIngestTool.ts +140 -0
  358. package/tools/XMemIngestTool/prompt.ts +13 -0
  359. package/tools/XMemRetrieveTool/XMemRetrieveTool.ts +177 -0
  360. package/tools/XMemRetrieveTool/prompt.ts +16 -0
  361. package/tools/XMemSearchTool/XMemSearchTool.ts +172 -0
  362. package/tools/XMemSearchTool/prompt.ts +11 -0
  363. package/tools/shared/gitOperationTracking.ts +277 -0
  364. package/tools/shared/spawnMultiAgent.ts +1093 -0
  365. package/tools/testing/TestingPermissionTool.tsx +74 -0
  366. package/tools/utils.ts +40 -0
  367. package/utils/CircularBuffer.ts +84 -0
  368. package/utils/Cursor.ts +1530 -0
  369. package/utils/QueryGuard.ts +121 -0
  370. package/utils/Shell.ts +474 -0
  371. package/utils/ShellCommand.ts +465 -0
  372. package/utils/abortController.ts +99 -0
  373. package/utils/activityManager.ts +164 -0
  374. package/utils/advisor.ts +145 -0
  375. package/utils/agentContext.ts +178 -0
  376. package/utils/agentId.ts +99 -0
  377. package/utils/agentSwarmsEnabled.ts +44 -0
  378. package/utils/agenticSessionSearch.ts +307 -0
  379. package/utils/analyzeContext.ts +1382 -0
  380. package/utils/ansiToPng.ts +334 -0
  381. package/utils/ansiToSvg.ts +272 -0
  382. package/utils/api.ts +718 -0
  383. package/utils/apiPreconnect.ts +71 -0
  384. package/utils/appleTerminalBackup.ts +124 -0
  385. package/utils/argumentSubstitution.ts +145 -0
  386. package/utils/array.ts +13 -0
  387. package/utils/asciicast.ts +239 -0
  388. package/utils/attachments.ts +4091 -0
  389. package/utils/attribution.ts +393 -0
  390. package/utils/auth.ts +2002 -0
  391. package/utils/authFileDescriptor.ts +196 -0
  392. package/utils/authPortable.ts +19 -0
  393. package/utils/autoModeDenials.ts +26 -0
  394. package/utils/autoRunIssue.tsx +122 -0
  395. package/utils/autoUpdater.ts +561 -0
  396. package/utils/aws.ts +74 -0
  397. package/utils/awsAuthStatusManager.ts +81 -0
  398. package/utils/axios.ts +8 -0
  399. package/utils/background/remote/preconditions.ts +235 -0
  400. package/utils/background/remote/remoteSession.ts +98 -0
  401. package/utils/backgroundHousekeeping.ts +94 -0
  402. package/utils/bash/ParsedCommand.ts +318 -0
  403. package/utils/bash/ShellSnapshot.ts +582 -0
  404. package/utils/bash/ast.ts +2679 -0
  405. package/utils/bash/bashParser.ts +4436 -0
  406. package/utils/bash/bashPipeCommand.ts +294 -0
  407. package/utils/bash/commands.ts +1339 -0
  408. package/utils/bash/heredoc.ts +733 -0
  409. package/utils/bash/parser.ts +230 -0
  410. package/utils/bash/prefix.ts +204 -0
  411. package/utils/bash/registry.ts +53 -0
  412. package/utils/bash/shellCompletion.ts +259 -0
  413. package/utils/bash/shellPrefix.ts +28 -0
  414. package/utils/bash/shellQuote.ts +304 -0
  415. package/utils/bash/shellQuoting.ts +128 -0
  416. package/utils/bash/specs/alias.ts +14 -0
  417. package/utils/bash/specs/index.ts +18 -0
  418. package/utils/bash/specs/nohup.ts +13 -0
  419. package/utils/bash/specs/pyright.ts +91 -0
  420. package/utils/bash/specs/sleep.ts +13 -0
  421. package/utils/bash/specs/srun.ts +31 -0
  422. package/utils/bash/specs/time.ts +13 -0
  423. package/utils/bash/specs/timeout.ts +20 -0
  424. package/utils/bash/treeSitterAnalysis.ts +506 -0
  425. package/utils/betas.ts +434 -0
  426. package/utils/billing.ts +78 -0
  427. package/utils/binaryCheck.ts +53 -0
  428. package/utils/browser.ts +68 -0
  429. package/utils/bufferedWriter.ts +100 -0
  430. package/utils/bundledMode.ts +22 -0
  431. package/utils/caCerts.ts +115 -0
  432. package/utils/caCertsConfig.ts +88 -0
  433. package/utils/cachePaths.ts +38 -0
  434. package/utils/classifierApprovals.ts +88 -0
  435. package/utils/classifierApprovalsHook.ts +17 -0
  436. package/utils/claudeCodeHints.ts +193 -0
  437. package/utils/claudeDesktop.ts +152 -0
  438. package/utils/claudeInChrome/chromeNativeHost.ts +527 -0
  439. package/utils/claudeInChrome/common.ts +540 -0
  440. package/utils/claudeInChrome/mcpServer.ts +292 -0
  441. package/utils/claudeInChrome/prompt.ts +83 -0
  442. package/utils/claudeInChrome/setup.ts +400 -0
  443. package/utils/claudeInChrome/setupPortable.ts +233 -0
  444. package/utils/claudeInChrome/toolRendering.tsx +262 -0
  445. package/utils/claudemd.ts +1479 -0
  446. package/utils/cleanup.ts +602 -0
  447. package/utils/cleanupRegistry.ts +25 -0
  448. package/utils/cliArgs.ts +60 -0
  449. package/utils/cliHighlight.ts +54 -0
  450. package/utils/codeIndexing.ts +206 -0
  451. package/utils/collapseBackgroundBashNotifications.ts +84 -0
  452. package/utils/collapseHookSummaries.ts +59 -0
  453. package/utils/collapseReadSearch.ts +1109 -0
  454. package/utils/collapseTeammateShutdowns.ts +55 -0
  455. package/utils/color-diff-mock.ts +27 -0
  456. package/utils/combinedAbortSignal.ts +47 -0
  457. package/utils/commandLifecycle.ts +21 -0
  458. package/utils/commitAttribution.ts +961 -0
  459. package/utils/completionCache.ts +166 -0
  460. package/utils/computerUse/appNames.ts +196 -0
  461. package/utils/computerUse/cleanup.ts +86 -0
  462. package/utils/computerUse/common.ts +61 -0
  463. package/utils/computerUse/computerUseLock.ts +215 -0
  464. package/utils/computerUse/drainRunLoop.ts +79 -0
  465. package/utils/computerUse/escHotkey.ts +54 -0
  466. package/utils/computerUse/executor.ts +658 -0
  467. package/utils/computerUse/gates.ts +72 -0
  468. package/utils/computerUse/hostAdapter.ts +69 -0
  469. package/utils/computerUse/inputLoader.ts +30 -0
  470. package/utils/computerUse/mcpServer.ts +106 -0
  471. package/utils/computerUse/setup.ts +53 -0
  472. package/utils/computerUse/swiftLoader.ts +23 -0
  473. package/utils/computerUse/toolRendering.tsx +125 -0
  474. package/utils/computerUse/wrapper.tsx +336 -0
  475. package/utils/concurrentSessions.ts +204 -0
  476. package/utils/config.ts +1817 -0
  477. package/utils/configConstants.ts +21 -0
  478. package/utils/contentArray.ts +51 -0
  479. package/utils/context.ts +221 -0
  480. package/utils/contextAnalysis.ts +272 -0
  481. package/utils/contextSuggestions.ts +235 -0
  482. package/utils/controlMessageCompat.ts +32 -0
  483. package/utils/conversationRecovery.ts +597 -0
  484. package/utils/cron.ts +308 -0
  485. package/utils/cronJitterConfig.ts +75 -0
  486. package/utils/cronScheduler.ts +565 -0
  487. package/utils/cronTasks.ts +458 -0
  488. package/utils/cronTasksLock.ts +195 -0
  489. package/utils/crossProjectResume.ts +75 -0
  490. package/utils/crypto.ts +13 -0
  491. package/utils/cwd.ts +32 -0
  492. package/utils/debug.ts +268 -0
  493. package/utils/debugFilter.ts +157 -0
  494. package/utils/deepLink/banner.ts +123 -0
  495. package/utils/deepLink/parseDeepLink.ts +170 -0
  496. package/utils/deepLink/protocolHandler.ts +136 -0
  497. package/utils/deepLink/registerProtocol.ts +348 -0
  498. package/utils/deepLink/terminalLauncher.ts +557 -0
  499. package/utils/deepLink/terminalPreference.ts +54 -0
  500. package/utils/desktopDeepLink.ts +236 -0
  501. package/utils/detectRepository.ts +178 -0
  502. package/utils/diagLogs.ts +94 -0
  503. package/utils/diff.ts +177 -0
  504. package/utils/directMemberMessage.ts +69 -0
  505. package/utils/displayTags.ts +51 -0
  506. package/utils/doctorContextWarnings.ts +265 -0
  507. package/utils/doctorDiagnostic.ts +625 -0
  508. package/utils/dxt/helpers.ts +88 -0
  509. package/utils/dxt/zip.ts +226 -0
  510. package/utils/earlyInput.ts +191 -0
  511. package/utils/editor.ts +183 -0
  512. package/utils/effort.ts +329 -0
  513. package/utils/embeddedTools.ts +29 -0
  514. package/utils/env.ts +347 -0
  515. package/utils/envDynamic.ts +151 -0
  516. package/utils/envUtils.ts +183 -0
  517. package/utils/envValidation.ts +38 -0
  518. package/utils/errorLogSink.ts +235 -0
  519. package/utils/errors.ts +238 -0
  520. package/utils/exampleCommands.ts +184 -0
  521. package/utils/execFileNoThrow.ts +150 -0
  522. package/utils/execFileNoThrowPortable.ts +89 -0
  523. package/utils/execSyncWrapper.ts +38 -0
  524. package/utils/exportRenderer.tsx +98 -0
  525. package/utils/extraUsage.ts +23 -0
  526. package/utils/fastMode.ts +532 -0
  527. package/utils/file.ts +584 -0
  528. package/utils/fileHistory.ts +1115 -0
  529. package/utils/fileOperationAnalytics.ts +71 -0
  530. package/utils/filePersistence/filePersistence.ts +287 -0
  531. package/utils/filePersistence/outputsScanner.ts +126 -0
  532. package/utils/fileRead.ts +102 -0
  533. package/utils/fileReadCache.ts +96 -0
  534. package/utils/fileStateCache.ts +142 -0
  535. package/utils/findExecutable.ts +17 -0
  536. package/utils/fingerprint.ts +76 -0
  537. package/utils/forkedAgent.ts +689 -0
  538. package/utils/format.ts +308 -0
  539. package/utils/formatBriefTimestamp.ts +81 -0
  540. package/utils/fpsTracker.ts +47 -0
  541. package/utils/frontmatterParser.ts +370 -0
  542. package/utils/fsOperations.ts +770 -0
  543. package/utils/fullscreen.ts +202 -0
  544. package/utils/generatedFiles.ts +136 -0
  545. package/utils/generators.ts +88 -0
  546. package/utils/genericProcessUtils.ts +184 -0
  547. package/utils/getWorktreePaths.ts +70 -0
  548. package/utils/getWorktreePathsPortable.ts +27 -0
  549. package/utils/ghPrStatus.ts +106 -0
  550. package/utils/git/gitConfigParser.ts +277 -0
  551. package/utils/git/gitFilesystem.ts +699 -0
  552. package/utils/git/gitignore.ts +99 -0
  553. package/utils/git.ts +926 -0
  554. package/utils/gitDiff.ts +532 -0
  555. package/utils/gitSettings.ts +18 -0
  556. package/utils/github/ghAuthStatus.ts +29 -0
  557. package/utils/githubRepoPathMapping.ts +162 -0
  558. package/utils/glob.ts +130 -0
  559. package/utils/gracefulShutdown.ts +529 -0
  560. package/utils/groupToolUses.ts +182 -0
  561. package/utils/handlePromptSubmit.ts +610 -0
  562. package/utils/hash.ts +46 -0
  563. package/utils/headlessProfiler.ts +178 -0
  564. package/utils/heapDumpService.ts +303 -0
  565. package/utils/heatmap.ts +198 -0
  566. package/utils/highlightMatch.tsx +28 -0
  567. package/utils/hooks/AsyncHookRegistry.ts +309 -0
  568. package/utils/hooks/apiQueryHookHelper.ts +141 -0
  569. package/utils/hooks/execAgentHook.ts +339 -0
  570. package/utils/hooks/execHttpHook.ts +242 -0
  571. package/utils/hooks/execPromptHook.ts +211 -0
  572. package/utils/hooks/fileChangedWatcher.ts +191 -0
  573. package/utils/hooks/hookEvents.ts +192 -0
  574. package/utils/hooks/hookHelpers.ts +83 -0
  575. package/utils/hooks/hooksConfigManager.ts +400 -0
  576. package/utils/hooks/hooksConfigSnapshot.ts +133 -0
  577. package/utils/hooks/hooksSettings.ts +271 -0
  578. package/utils/hooks/postSamplingHooks.ts +70 -0
  579. package/utils/hooks/registerFrontmatterHooks.ts +67 -0
  580. package/utils/hooks/registerSkillHooks.ts +64 -0
  581. package/utils/hooks/sessionHooks.ts +447 -0
  582. package/utils/hooks/skillImprovement.ts +267 -0
  583. package/utils/hooks/ssrfGuard.ts +294 -0
  584. package/utils/hooks.ts +5022 -0
  585. package/utils/horizontalScroll.ts +137 -0
  586. package/utils/http.ts +136 -0
  587. package/utils/hyperlink.ts +39 -0
  588. package/utils/iTermBackup.ts +73 -0
  589. package/utils/ide.ts +1494 -0
  590. package/utils/idePathConversion.ts +90 -0
  591. package/utils/idleTimeout.ts +53 -0
  592. package/utils/imagePaste.ts +416 -0
  593. package/utils/imageResizer.ts +880 -0
  594. package/utils/imageStore.ts +167 -0
  595. package/utils/imageValidation.ts +104 -0
  596. package/utils/immediateCommand.ts +15 -0
  597. package/utils/inProcessTeammateHelpers.ts +102 -0
  598. package/utils/ink.ts +26 -0
  599. package/utils/intl.ts +94 -0
  600. package/utils/jetbrains.ts +191 -0
  601. package/utils/json.ts +277 -0
  602. package/utils/jsonRead.ts +16 -0
  603. package/utils/keyboardShortcuts.ts +14 -0
  604. package/utils/lazySchema.ts +8 -0
  605. package/utils/listSessionsImpl.ts +454 -0
  606. package/utils/localInstaller.ts +162 -0
  607. package/utils/lockfile.ts +43 -0
  608. package/utils/log.ts +362 -0
  609. package/utils/logoV2Utils.ts +347 -0
  610. package/utils/mailbox.ts +73 -0
  611. package/utils/managedEnv.ts +199 -0
  612. package/utils/managedEnvConstants.ts +191 -0
  613. package/utils/markdown.ts +381 -0
  614. package/utils/markdownConfigLoader.ts +600 -0
  615. package/utils/mcp/dateTimeParser.ts +121 -0
  616. package/utils/mcp/elicitationValidation.ts +336 -0
  617. package/utils/mcpInstructionsDelta.ts +130 -0
  618. package/utils/mcpOutputStorage.ts +189 -0
  619. package/utils/mcpValidation.ts +208 -0
  620. package/utils/mcpWebSocketTransport.ts +200 -0
  621. package/utils/memoize.ts +269 -0
  622. package/utils/memory/types.ts +12 -0
  623. package/utils/memory/versions.ts +8 -0
  624. package/utils/memoryFileDetection.ts +289 -0
  625. package/utils/messagePredicates.ts +8 -0
  626. package/utils/messageQueueManager.ts +547 -0
  627. package/utils/messages/mappers.ts +290 -0
  628. package/utils/messages/systemInit.ts +96 -0
  629. package/utils/messages.ts +5520 -0
  630. package/utils/model/agent.ts +157 -0
  631. package/utils/model/aliases.ts +35 -0
  632. package/utils/model/antModels.ts +64 -0
  633. package/utils/model/bedrock.ts +265 -0
  634. package/utils/model/check1mAccess.ts +72 -0
  635. package/utils/model/configs.ts +158 -0
  636. package/utils/model/contextWindowUpgradeCheck.ts +47 -0
  637. package/utils/model/deprecation.ts +101 -0
  638. package/utils/model/model.ts +654 -0
  639. package/utils/model/modelAllowlist.ts +170 -0
  640. package/utils/model/modelCapabilities.ts +118 -0
  641. package/utils/model/modelOptions.ts +589 -0
  642. package/utils/model/modelStrings.ts +170 -0
  643. package/utils/model/modelSupportOverrides.ts +50 -0
  644. package/utils/model/providers.ts +42 -0
  645. package/utils/model/validateModel.ts +159 -0
  646. package/utils/modelCost.ts +231 -0
  647. package/utils/modifiers.ts +36 -0
  648. package/utils/mtls.ts +179 -0
  649. package/utils/nativeInstaller/download.ts +523 -0
  650. package/utils/nativeInstaller/index.ts +18 -0
  651. package/utils/nativeInstaller/installer.ts +1708 -0
  652. package/utils/nativeInstaller/packageManagers.ts +336 -0
  653. package/utils/nativeInstaller/pidLock.ts +433 -0
  654. package/utils/notebook.ts +224 -0
  655. package/utils/objectGroupBy.ts +18 -0
  656. package/utils/pasteStore.ts +104 -0
  657. package/utils/path.ts +155 -0
  658. package/utils/pdf.ts +300 -0
  659. package/utils/pdfUtils.ts +70 -0
  660. package/utils/peerAddress.ts +21 -0
  661. package/utils/permissions/PermissionMode.ts +141 -0
  662. package/utils/permissions/PermissionPromptToolResultSchema.ts +127 -0
  663. package/utils/permissions/PermissionResult.ts +35 -0
  664. package/utils/permissions/PermissionRule.ts +40 -0
  665. package/utils/permissions/PermissionUpdate.ts +389 -0
  666. package/utils/permissions/PermissionUpdateSchema.ts +78 -0
  667. package/utils/permissions/autoModeState.ts +39 -0
  668. package/utils/permissions/bashClassifier.ts +61 -0
  669. package/utils/permissions/bypassPermissionsKillswitch.ts +155 -0
  670. package/utils/permissions/classifierDecision.ts +98 -0
  671. package/utils/permissions/classifierShared.ts +39 -0
  672. package/utils/permissions/dangerousPatterns.ts +80 -0
  673. package/utils/permissions/denialTracking.ts +45 -0
  674. package/utils/permissions/filesystem.ts +1777 -0
  675. package/utils/permissions/getNextPermissionMode.ts +101 -0
  676. package/utils/permissions/pathValidation.ts +485 -0
  677. package/utils/permissions/permissionExplainer.ts +250 -0
  678. package/utils/permissions/permissionRuleParser.ts +198 -0
  679. package/utils/permissions/permissionSetup.ts +1532 -0
  680. package/utils/permissions/permissions.ts +1486 -0
  681. package/utils/permissions/permissionsLoader.ts +296 -0
  682. package/utils/permissions/shadowedRuleDetection.ts +234 -0
  683. package/utils/permissions/shellRuleMatching.ts +228 -0
  684. package/utils/permissions/yoloClassifier.ts +1495 -0
  685. package/utils/planModeV2.ts +95 -0
  686. package/utils/plans.ts +397 -0
  687. package/utils/platform.ts +150 -0
  688. package/utils/plugins/addDirPluginSettings.ts +71 -0
  689. package/utils/plugins/cacheUtils.ts +196 -0
  690. package/utils/plugins/dependencyResolver.ts +305 -0
  691. package/utils/plugins/fetchTelemetry.ts +135 -0
  692. package/utils/plugins/gitAvailability.ts +69 -0
  693. package/utils/plugins/headlessPluginInstall.ts +174 -0
  694. package/utils/plugins/hintRecommendation.ts +164 -0
  695. package/utils/plugins/installCounts.ts +292 -0
  696. package/utils/plugins/installedPluginsManager.ts +1268 -0
  697. package/utils/plugins/loadPluginAgents.ts +348 -0
  698. package/utils/plugins/loadPluginCommands.ts +946 -0
  699. package/utils/plugins/loadPluginHooks.ts +287 -0
  700. package/utils/plugins/loadPluginOutputStyles.ts +178 -0
  701. package/utils/plugins/lspPluginIntegration.ts +387 -0
  702. package/utils/plugins/lspRecommendation.ts +374 -0
  703. package/utils/plugins/managedPlugins.ts +27 -0
  704. package/utils/plugins/marketplaceHelpers.ts +592 -0
  705. package/utils/plugins/marketplaceManager.ts +2643 -0
  706. package/utils/plugins/mcpPluginIntegration.ts +634 -0
  707. package/utils/plugins/mcpbHandler.ts +968 -0
  708. package/utils/plugins/officialMarketplace.ts +25 -0
  709. package/utils/plugins/officialMarketplaceGcs.ts +216 -0
  710. package/utils/plugins/officialMarketplaceStartupCheck.ts +439 -0
  711. package/utils/plugins/orphanedPluginFilter.ts +114 -0
  712. package/utils/plugins/parseMarketplaceInput.ts +162 -0
  713. package/utils/plugins/performStartupChecks.tsx +70 -0
  714. package/utils/plugins/pluginAutoupdate.ts +284 -0
  715. package/utils/plugins/pluginBlocklist.ts +127 -0
  716. package/utils/plugins/pluginDirectories.ts +178 -0
  717. package/utils/plugins/pluginFlagging.ts +208 -0
  718. package/utils/plugins/pluginIdentifier.ts +123 -0
  719. package/utils/plugins/pluginInstallationHelpers.ts +595 -0
  720. package/utils/plugins/pluginLoader.ts +3302 -0
  721. package/utils/plugins/pluginOptionsStorage.ts +400 -0
  722. package/utils/plugins/pluginPolicy.ts +20 -0
  723. package/utils/plugins/pluginStartupCheck.ts +341 -0
  724. package/utils/plugins/pluginVersioning.ts +157 -0
  725. package/utils/plugins/reconciler.ts +265 -0
  726. package/utils/plugins/refresh.ts +215 -0
  727. package/utils/plugins/schemas.ts +1681 -0
  728. package/utils/plugins/validatePlugin.ts +903 -0
  729. package/utils/plugins/walkPluginMarkdown.ts +69 -0
  730. package/utils/plugins/zipCache.ts +406 -0
  731. package/utils/plugins/zipCacheAdapters.ts +164 -0
  732. package/utils/powershell/dangerousCmdlets.ts +185 -0
  733. package/utils/powershell/parser.ts +1804 -0
  734. package/utils/powershell/staticPrefix.ts +316 -0
  735. package/utils/preflightChecks.tsx +151 -0
  736. package/utils/privacyLevel.ts +55 -0
  737. package/utils/process.ts +68 -0
  738. package/utils/processUserInput/processBashCommand.tsx +140 -0
  739. package/utils/processUserInput/processSlashCommand.tsx +922 -0
  740. package/utils/processUserInput/processTextPrompt.ts +100 -0
  741. package/utils/processUserInput/processUserInput.ts +605 -0
  742. package/utils/profilerBase.ts +46 -0
  743. package/utils/promptCategory.ts +49 -0
  744. package/utils/promptEditor.ts +188 -0
  745. package/utils/promptShellExecution.ts +183 -0
  746. package/utils/proxy.ts +426 -0
  747. package/utils/queryContext.ts +179 -0
  748. package/utils/queryHelpers.ts +552 -0
  749. package/utils/queryProfiler.ts +301 -0
  750. package/utils/queueProcessor.ts +95 -0
  751. package/utils/readEditContext.ts +227 -0
  752. package/utils/readFileInRange.ts +383 -0
  753. package/utils/releaseNotes.ts +360 -0
  754. package/utils/renderOptions.ts +113 -0
  755. package/utils/ripgrep.ts +679 -0
  756. package/utils/sandbox/sandbox-adapter.ts +985 -0
  757. package/utils/sandbox/sandbox-ui-utils.ts +12 -0
  758. package/utils/sanitization.ts +91 -0
  759. package/utils/screenshotClipboard.ts +121 -0
  760. package/utils/sdkEventQueue.ts +134 -0
  761. package/utils/secureStorage/fallbackStorage.ts +70 -0
  762. package/utils/secureStorage/index.ts +17 -0
  763. package/utils/secureStorage/keychainPrefetch.ts +116 -0
  764. package/utils/secureStorage/macOsKeychainHelpers.ts +111 -0
  765. package/utils/secureStorage/macOsKeychainStorage.ts +231 -0
  766. package/utils/secureStorage/plainTextStorage.ts +84 -0
  767. package/utils/semanticBoolean.ts +29 -0
  768. package/utils/semanticNumber.ts +36 -0
  769. package/utils/semver.ts +59 -0
  770. package/utils/sequential.ts +56 -0
  771. package/utils/sessionActivity.ts +133 -0
  772. package/utils/sessionEnvVars.ts +22 -0
  773. package/utils/sessionEnvironment.ts +166 -0
  774. package/utils/sessionFileAccessHooks.ts +250 -0
  775. package/utils/sessionIngressAuth.ts +140 -0
  776. package/utils/sessionRestore.ts +551 -0
  777. package/utils/sessionStart.ts +232 -0
  778. package/utils/sessionState.ts +150 -0
  779. package/utils/sessionStorage.ts +5105 -0
  780. package/utils/sessionStoragePortable.ts +793 -0
  781. package/utils/sessionTitle.ts +129 -0
  782. package/utils/sessionUrl.ts +64 -0
  783. package/utils/set.ts +53 -0
  784. package/utils/settings/allErrors.ts +32 -0
  785. package/utils/settings/applySettingsChange.ts +92 -0
  786. package/utils/settings/changeDetector.ts +488 -0
  787. package/utils/settings/constants.ts +202 -0
  788. package/utils/settings/internalWrites.ts +37 -0
  789. package/utils/settings/managedPath.ts +34 -0
  790. package/utils/settings/mdm/constants.ts +81 -0
  791. package/utils/settings/mdm/rawRead.ts +130 -0
  792. package/utils/settings/mdm/settings.ts +316 -0
  793. package/utils/settings/permissionValidation.ts +262 -0
  794. package/utils/settings/pluginOnlyPolicy.ts +60 -0
  795. package/utils/settings/schemaOutput.ts +8 -0
  796. package/utils/settings/settings.ts +1015 -0
  797. package/utils/settings/settingsCache.ts +80 -0
  798. package/utils/settings/toolValidationConfig.ts +103 -0
  799. package/utils/settings/types.ts +1149 -0
  800. package/utils/settings/validateEditTool.ts +45 -0
  801. package/utils/settings/validation.ts +265 -0
  802. package/utils/settings/validationTips.ts +164 -0
  803. package/utils/shell/bashProvider.ts +255 -0
  804. package/utils/shell/outputLimits.ts +14 -0
  805. package/utils/shell/powershellDetection.ts +107 -0
  806. package/utils/shell/powershellProvider.ts +123 -0
  807. package/utils/shell/prefix.ts +367 -0
  808. package/utils/shell/readOnlyCommandValidation.ts +1893 -0
  809. package/utils/shell/resolveDefaultShell.ts +14 -0
  810. package/utils/shell/shellProvider.ts +33 -0
  811. package/utils/shell/shellToolUtils.ts +22 -0
  812. package/utils/shell/specPrefix.ts +241 -0
  813. package/utils/shellConfig.ts +167 -0
  814. package/utils/sideQuery.ts +222 -0
  815. package/utils/sideQuestion.ts +155 -0
  816. package/utils/signal.ts +43 -0
  817. package/utils/sinks.ts +16 -0
  818. package/utils/skills/skillChangeDetector.ts +311 -0
  819. package/utils/slashCommandParsing.ts +60 -0
  820. package/utils/sleep.ts +84 -0
  821. package/utils/sliceAnsi.ts +91 -0
  822. package/utils/slowOperations.ts +286 -0
  823. package/utils/standaloneAgent.ts +23 -0
  824. package/utils/startupProfiler.ts +194 -0
  825. package/utils/staticRender.tsx +116 -0
  826. package/utils/stats.ts +1061 -0
  827. package/utils/statsCache.ts +434 -0
  828. package/utils/status.tsx +362 -0
  829. package/utils/statusNoticeDefinitions.tsx +198 -0
  830. package/utils/statusNoticeHelpers.ts +20 -0
  831. package/utils/stream.ts +76 -0
  832. package/utils/streamJsonStdoutGuard.ts +123 -0
  833. package/utils/streamlinedTransform.ts +201 -0
  834. package/utils/stringUtils.ts +235 -0
  835. package/utils/subprocessEnv.ts +99 -0
  836. package/utils/suggestions/commandSuggestions.ts +567 -0
  837. package/utils/suggestions/directoryCompletion.ts +263 -0
  838. package/utils/suggestions/shellHistoryCompletion.ts +119 -0
  839. package/utils/suggestions/skillUsageTracking.ts +55 -0
  840. package/utils/suggestions/slackChannelSuggestions.ts +209 -0
  841. package/utils/swarm/It2SetupPrompt.tsx +380 -0
  842. package/utils/swarm/backends/ITermBackend.ts +370 -0
  843. package/utils/swarm/backends/InProcessBackend.ts +339 -0
  844. package/utils/swarm/backends/PaneBackendExecutor.ts +354 -0
  845. package/utils/swarm/backends/TmuxBackend.ts +764 -0
  846. package/utils/swarm/backends/detection.ts +128 -0
  847. package/utils/swarm/backends/it2Setup.ts +245 -0
  848. package/utils/swarm/backends/registry.ts +464 -0
  849. package/utils/swarm/backends/teammateModeSnapshot.ts +87 -0
  850. package/utils/swarm/backends/types.ts +311 -0
  851. package/utils/swarm/constants.ts +33 -0
  852. package/utils/swarm/inProcessRunner.ts +1552 -0
  853. package/utils/swarm/leaderPermissionBridge.ts +54 -0
  854. package/utils/swarm/permissionSync.ts +928 -0
  855. package/utils/swarm/reconnection.ts +119 -0
  856. package/utils/swarm/spawnInProcess.ts +328 -0
  857. package/utils/swarm/spawnUtils.ts +146 -0
  858. package/utils/swarm/teamHelpers.ts +683 -0
  859. package/utils/swarm/teammateInit.ts +129 -0
  860. package/utils/swarm/teammateLayoutManager.ts +107 -0
  861. package/utils/swarm/teammateModel.ts +10 -0
  862. package/utils/swarm/teammatePromptAddendum.ts +18 -0
  863. package/utils/systemDirectories.ts +74 -0
  864. package/utils/systemPrompt.ts +123 -0
  865. package/utils/systemPromptType.ts +14 -0
  866. package/utils/systemTheme.ts +119 -0
  867. package/utils/taggedId.ts +54 -0
  868. package/utils/task/TaskOutput.ts +390 -0
  869. package/utils/task/diskOutput.ts +451 -0
  870. package/utils/task/framework.ts +308 -0
  871. package/utils/task/outputFormatting.ts +38 -0
  872. package/utils/task/sdkProgress.ts +36 -0
  873. package/utils/tasks.ts +862 -0
  874. package/utils/teamDiscovery.ts +81 -0
  875. package/utils/teamMemoryOps.ts +88 -0
  876. package/utils/teammate.ts +292 -0
  877. package/utils/teammateContext.ts +96 -0
  878. package/utils/teammateMailbox.ts +1183 -0
  879. package/utils/telemetry/betaSessionTracing.ts +491 -0
  880. package/utils/telemetry/bigqueryExporter.ts +252 -0
  881. package/utils/telemetry/events.ts +75 -0
  882. package/utils/telemetry/instrumentation.ts +825 -0
  883. package/utils/telemetry/logger.ts +26 -0
  884. package/utils/telemetry/perfettoTracing.ts +1120 -0
  885. package/utils/telemetry/pluginTelemetry.ts +289 -0
  886. package/utils/telemetry/sessionTracing.ts +927 -0
  887. package/utils/telemetry/skillLoadedEvent.ts +39 -0
  888. package/utils/telemetryAttributes.ts +71 -0
  889. package/utils/teleport/api.ts +466 -0
  890. package/utils/teleport/environmentSelection.ts +77 -0
  891. package/utils/teleport/environments.ts +120 -0
  892. package/utils/teleport/gitBundle.ts +292 -0
  893. package/utils/teleport.tsx +1226 -0
  894. package/utils/tempfile.ts +31 -0
  895. package/utils/terminal.ts +131 -0
  896. package/utils/terminalPanel.ts +191 -0
  897. package/utils/textHighlighting.ts +166 -0
  898. package/utils/theme.ts +639 -0
  899. package/utils/thinking.ts +162 -0
  900. package/utils/timeouts.ts +39 -0
  901. package/utils/tmuxSocket.ts +427 -0
  902. package/utils/todo/types.ts +18 -0
  903. package/utils/tokenBudget.ts +73 -0
  904. package/utils/tokens.ts +261 -0
  905. package/utils/toolErrors.ts +132 -0
  906. package/utils/toolPool.ts +79 -0
  907. package/utils/toolResultStorage.ts +1040 -0
  908. package/utils/toolSchemaCache.ts +26 -0
  909. package/utils/toolSearch.ts +756 -0
  910. package/utils/transcriptSearch.ts +202 -0
  911. package/utils/treeify.ts +170 -0
  912. package/utils/truncate.ts +179 -0
  913. package/utils/ultraplan/ccrSession.ts +349 -0
  914. package/utils/ultraplan/keyword.ts +127 -0
  915. package/utils/ultraplan/prompt.txt +1 -0
  916. package/utils/unaryLogging.ts +39 -0
  917. package/utils/undercover.ts +89 -0
  918. package/utils/user.ts +194 -0
  919. package/utils/userAgent.ts +10 -0
  920. package/utils/userPromptKeywords.ts +27 -0
  921. package/utils/uuid.ts +27 -0
  922. package/utils/warningHandler.ts +121 -0
  923. package/utils/which.ts +82 -0
  924. package/utils/windowsPaths.ts +173 -0
  925. package/utils/withResolvers.ts +13 -0
  926. package/utils/words.ts +800 -0
  927. package/utils/workloadContext.ts +57 -0
  928. package/utils/worktree.ts +1519 -0
  929. package/utils/worktreeModeEnabled.ts +11 -0
  930. package/utils/xdg.ts +65 -0
  931. package/utils/xmem.ts +6 -0
  932. package/utils/xml.ts +16 -0
  933. package/utils/yaml.ts +15 -0
  934. package/utils/zodToJsonSchema.ts +23 -0
@@ -0,0 +1,3422 @@
1
+ import type {
2
+ BetaContentBlock,
3
+ BetaContentBlockParam,
4
+ BetaImageBlockParam,
5
+ BetaJSONOutputFormat,
6
+ BetaMessage,
7
+ BetaMessageDeltaUsage,
8
+ BetaMessageStreamParams,
9
+ BetaOutputConfig,
10
+ BetaRawMessageStreamEvent,
11
+ BetaRequestDocumentBlock,
12
+ BetaStopReason,
13
+ BetaToolChoiceAuto,
14
+ BetaToolChoiceTool,
15
+ BetaToolResultBlockParam,
16
+ BetaToolUnion,
17
+ BetaUsage,
18
+ BetaMessageParam as MessageParam,
19
+ } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
20
+ import type { TextBlockParam } from '@anthropic-ai/sdk/resources/index.mjs'
21
+ import type { Stream } from '@anthropic-ai/sdk/streaming.mjs'
22
+ import { randomUUID } from 'crypto'
23
+ import {
24
+ getAPIProvider,
25
+ isFirstPartyAnthropicBaseUrl,
26
+ } from 'src/utils/model/providers.js'
27
+ import {
28
+ getAttributionHeader,
29
+ getCLISyspromptPrefix,
30
+ } from '../../constants/system.js'
31
+ import {
32
+ getEmptyToolPermissionContext,
33
+ type QueryChainTracking,
34
+ type Tool,
35
+ type ToolPermissionContext,
36
+ type Tools,
37
+ toolMatchesName,
38
+ } from '../../Tool.js'
39
+ import type { AgentDefinition } from '../../tools/AgentTool/loadAgentsDir.js'
40
+ import {
41
+ type ConnectorTextBlock,
42
+ type ConnectorTextDelta,
43
+ isConnectorTextBlock,
44
+ } from '../../types/connectorText.js'
45
+ import type {
46
+ AssistantMessage,
47
+ Message,
48
+ StreamEvent,
49
+ SystemAPIErrorMessage,
50
+ UserMessage,
51
+ } from '../../types/message.js'
52
+ import {
53
+ type CacheScope,
54
+ logAPIPrefix,
55
+ splitSysPromptPrefix,
56
+ toolToAPISchema,
57
+ } from '../../utils/api.js'
58
+ import { getOauthAccountInfo } from '../../utils/auth.js'
59
+ import {
60
+ getBedrockExtraBodyParamsBetas,
61
+ getMergedBetas,
62
+ getModelBetas,
63
+ } from '../../utils/betas.js'
64
+ import { getOrCreateUserID } from '../../utils/config.js'
65
+ import {
66
+ CAPPED_DEFAULT_MAX_TOKENS,
67
+ getModelMaxOutputTokens,
68
+ getSonnet1mExpTreatmentEnabled,
69
+ } from '../../utils/context.js'
70
+ import { resolveAppliedEffort } from '../../utils/effort.js'
71
+ import { isEnvTruthy } from '../../utils/envUtils.js'
72
+ import { errorMessage } from '../../utils/errors.js'
73
+ import { computeFingerprintFromMessages } from '../../utils/fingerprint.js'
74
+ import { captureAPIRequest, logError } from '../../utils/log.js'
75
+ import {
76
+ createAssistantAPIErrorMessage,
77
+ createUserMessage,
78
+ ensureToolResultPairing,
79
+ normalizeContentFromAPI,
80
+ normalizeMessagesForAPI,
81
+ stripAdvisorBlocks,
82
+ stripCallerFieldFromAssistantMessage,
83
+ stripToolReferenceBlocksFromUserMessage,
84
+ } from '../../utils/messages.js'
85
+ import {
86
+ getDefaultOpusModel,
87
+ getDefaultSonnetModel,
88
+ getSmallFastModel,
89
+ isNonCustomOpusModel,
90
+ } from '../../utils/model/model.js'
91
+ import {
92
+ asSystemPrompt,
93
+ type SystemPrompt,
94
+ } from '../../utils/systemPromptType.js'
95
+ import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
96
+ import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
97
+ import {
98
+ currentLimits,
99
+ extractQuotaStatusFromError,
100
+ extractQuotaStatusFromHeaders,
101
+ } from '../claudeAiLimits.js'
102
+ import { getAPIContextManagement } from '../compact/apiMicrocompact.js'
103
+
104
+ /* eslint-disable @typescript-eslint/no-require-imports */
105
+ const autoModeStateModule = feature('TRANSCRIPT_CLASSIFIER')
106
+ ? (require('../../utils/permissions/autoModeState.js') as typeof import('../../utils/permissions/autoModeState.js'))
107
+ : null
108
+
109
+ import { feature } from 'bun:bundle'
110
+ import type { ClientOptions } from '@anthropic-ai/sdk'
111
+ import {
112
+ APIConnectionTimeoutError,
113
+ APIError,
114
+ APIUserAbortError,
115
+ } from '@anthropic-ai/sdk/error'
116
+ import {
117
+ getAfkModeHeaderLatched,
118
+ getCacheEditingHeaderLatched,
119
+ getFastModeHeaderLatched,
120
+ getLastApiCompletionTimestamp,
121
+ getPromptCache1hAllowlist,
122
+ getPromptCache1hEligible,
123
+ getSessionId,
124
+ getThinkingClearLatched,
125
+ setAfkModeHeaderLatched,
126
+ setCacheEditingHeaderLatched,
127
+ setFastModeHeaderLatched,
128
+ setLastMainRequestId,
129
+ setPromptCache1hAllowlist,
130
+ setPromptCache1hEligible,
131
+ setThinkingClearLatched,
132
+ } from 'src/bootstrap/state.js'
133
+ import {
134
+ AFK_MODE_BETA_HEADER,
135
+ CONTEXT_1M_BETA_HEADER,
136
+ CONTEXT_MANAGEMENT_BETA_HEADER,
137
+ EFFORT_BETA_HEADER,
138
+ FAST_MODE_BETA_HEADER,
139
+ PROMPT_CACHING_SCOPE_BETA_HEADER,
140
+ REDACT_THINKING_BETA_HEADER,
141
+ STRUCTURED_OUTPUTS_BETA_HEADER,
142
+ TASK_BUDGETS_BETA_HEADER,
143
+ } from 'src/constants/betas.js'
144
+ import type { QuerySource } from 'src/constants/querySource.js'
145
+ import type { Notification } from 'src/context/notifications.js'
146
+ import { addToTotalSessionCost } from 'src/cost-tracker.js'
147
+ import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js'
148
+ import type { AgentId } from 'src/types/ids.js'
149
+ import {
150
+ ADVISOR_TOOL_INSTRUCTIONS,
151
+ getExperimentAdvisorModels,
152
+ isAdvisorEnabled,
153
+ isValidAdvisorModel,
154
+ modelSupportsAdvisor,
155
+ } from 'src/utils/advisor.js'
156
+ import { getAgentContext } from 'src/utils/agentContext.js'
157
+ import { isClaudeAISubscriber } from 'src/utils/auth.js'
158
+ import {
159
+ getToolSearchBetaHeader,
160
+ modelSupportsStructuredOutputs,
161
+ shouldIncludeFirstPartyOnlyBetas,
162
+ shouldUseGlobalCacheScope,
163
+ } from 'src/utils/betas.js'
164
+ import { CLAUDE_IN_CHROME_MCP_SERVER_NAME } from 'src/utils/claudeInChrome/common.js'
165
+ import { CHROME_TOOL_SEARCH_INSTRUCTIONS } from 'src/utils/claudeInChrome/prompt.js'
166
+ import { getMaxThinkingTokensForModel } from 'src/utils/context.js'
167
+ import { logForDebugging } from 'src/utils/debug.js'
168
+ import { logForDiagnosticsNoPII } from 'src/utils/diagLogs.js'
169
+ import { type EffortValue, modelSupportsEffort } from 'src/utils/effort.js'
170
+ import {
171
+ isFastModeAvailable,
172
+ isFastModeCooldown,
173
+ isFastModeEnabled,
174
+ isFastModeSupportedByModel,
175
+ } from 'src/utils/fastMode.js'
176
+ import { returnValue } from 'src/utils/generators.js'
177
+ import { headlessProfilerCheckpoint } from 'src/utils/headlessProfiler.js'
178
+ import { isMcpInstructionsDeltaEnabled } from 'src/utils/mcpInstructionsDelta.js'
179
+ import { calculateUSDCost } from 'src/utils/modelCost.js'
180
+ import { endQueryProfile, queryCheckpoint } from 'src/utils/queryProfiler.js'
181
+ import {
182
+ modelSupportsAdaptiveThinking,
183
+ modelSupportsThinking,
184
+ type ThinkingConfig,
185
+ } from 'src/utils/thinking.js'
186
+ import {
187
+ extractDiscoveredToolNames,
188
+ isDeferredToolsDeltaEnabled,
189
+ isToolSearchEnabled,
190
+ } from 'src/utils/toolSearch.js'
191
+ import { API_MAX_MEDIA_PER_REQUEST } from '../../constants/apiLimits.js'
192
+ import { ADVISOR_BETA_HEADER } from '../../constants/betas.js'
193
+ import {
194
+ formatDeferredToolLine,
195
+ isDeferredTool,
196
+ TOOL_SEARCH_TOOL_NAME,
197
+ } from '../../tools/ToolSearchTool/prompt.js'
198
+ import { count } from '../../utils/array.js'
199
+ import { insertBlockAfterToolResults } from '../../utils/contentArray.js'
200
+ import { validateBoundedIntEnvVar } from '../../utils/envValidation.js'
201
+ import { safeParseJSON } from '../../utils/json.js'
202
+ import { getInferenceProfileBackingModel } from '../../utils/model/bedrock.js'
203
+ import {
204
+ normalizeModelStringForAPI,
205
+ parseUserSpecifiedModel,
206
+ } from '../../utils/model/model.js'
207
+ import {
208
+ startSessionActivity,
209
+ stopSessionActivity,
210
+ } from '../../utils/sessionActivity.js'
211
+ import { jsonStringify } from '../../utils/slowOperations.js'
212
+ import {
213
+ isBetaTracingEnabled,
214
+ type LLMRequestNewContext,
215
+ startLLMRequestSpan,
216
+ } from '../../utils/telemetry/sessionTracing.js'
217
+ /* eslint-enable @typescript-eslint/no-require-imports */
218
+ import {
219
+ type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
220
+ logEvent,
221
+ } from '../analytics/index.js'
222
+ import {
223
+ consumePendingCacheEdits,
224
+ getPinnedCacheEdits,
225
+ markToolsSentToAPIState,
226
+ pinCacheEdits,
227
+ } from '../compact/microCompact.js'
228
+ import { getInitializationStatus } from '../lsp/manager.js'
229
+ import { isToolFromMcpServer } from '../mcp/utils.js'
230
+ import { withStreamingVCR, withVCR } from '../vcr.js'
231
+ import { CLIENT_REQUEST_ID_HEADER, getAnthropicClient } from './client.js'
232
+ import {
233
+ API_ERROR_MESSAGE_PREFIX,
234
+ CUSTOM_OFF_SWITCH_MESSAGE,
235
+ getAssistantMessageFromError,
236
+ getErrorMessageIfRefusal,
237
+ } from './errors.js'
238
+ import {
239
+ EMPTY_USAGE,
240
+ type GlobalCacheStrategy,
241
+ logAPIError,
242
+ logAPIQuery,
243
+ logAPISuccessAndDuration,
244
+ type NonNullableUsage,
245
+ } from './logging.js'
246
+ import {
247
+ CACHE_TTL_1HOUR_MS,
248
+ checkResponseForCacheBreak,
249
+ recordPromptState,
250
+ } from './promptCacheBreakDetection.js'
251
+ import {
252
+ CannotRetryError,
253
+ FallbackTriggeredError,
254
+ is529Error,
255
+ type RetryContext,
256
+ withRetry,
257
+ } from './withRetry.js'
258
+
259
+ // Define a type that represents valid JSON values
260
+ type JsonValue = string | number | boolean | null | JsonObject | JsonArray
261
+ type JsonObject = { [key: string]: JsonValue }
262
+ type JsonArray = JsonValue[]
263
+
264
+ /**
265
+ * Assemble the extra body parameters for the API request, based on the
266
+ * CLAUDE_CODE_EXTRA_BODY environment variable if present and on any beta
267
+ * headers (primarily for Bedrock requests).
268
+ *
269
+ * @param betaHeaders - An array of beta headers to include in the request.
270
+ * @returns A JSON object representing the extra body parameters.
271
+ */
272
+ export function getExtraBodyParams(betaHeaders?: string[]): JsonObject {
273
+ // Parse user's extra body parameters first
274
+ const extraBodyStr = process.env.CLAUDE_CODE_EXTRA_BODY
275
+ let result: JsonObject = {}
276
+
277
+ if (extraBodyStr) {
278
+ try {
279
+ // Parse as JSON, which can be null, boolean, number, string, array or object
280
+ const parsed = safeParseJSON(extraBodyStr)
281
+ // We expect an object with key-value pairs to spread into API parameters
282
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
283
+ // Shallow clone — safeParseJSON is LRU-cached and returns the same
284
+ // object reference for the same string. Mutating `result` below
285
+ // would poison the cache, causing stale values to persist.
286
+ result = { ...(parsed as JsonObject) }
287
+ } else {
288
+ logForDebugging(
289
+ `CLAUDE_CODE_EXTRA_BODY env var must be a JSON object, but was given ${extraBodyStr}`,
290
+ { level: 'error' },
291
+ )
292
+ }
293
+ } catch (error) {
294
+ logForDebugging(
295
+ `Error parsing CLAUDE_CODE_EXTRA_BODY: ${errorMessage(error)}`,
296
+ { level: 'error' },
297
+ )
298
+ }
299
+ }
300
+
301
+ // Anti-distillation: send fake_tools opt-in for 1P CLI only
302
+ if (
303
+ feature('ANTI_DISTILLATION_CC')
304
+ ? process.env.CLAUDE_CODE_ENTRYPOINT === 'cli' &&
305
+ shouldIncludeFirstPartyOnlyBetas() &&
306
+ getFeatureValue_CACHED_MAY_BE_STALE(
307
+ 'tengu_anti_distill_fake_tool_injection',
308
+ false,
309
+ )
310
+ : false
311
+ ) {
312
+ result.anti_distillation = ['fake_tools']
313
+ }
314
+
315
+ // Handle beta headers if provided
316
+ if (betaHeaders && betaHeaders.length > 0) {
317
+ if (result.anthropic_beta && Array.isArray(result.anthropic_beta)) {
318
+ // Add to existing array, avoiding duplicates
319
+ const existingHeaders = result.anthropic_beta as string[]
320
+ const newHeaders = betaHeaders.filter(
321
+ header => !existingHeaders.includes(header),
322
+ )
323
+ result.anthropic_beta = [...existingHeaders, ...newHeaders]
324
+ } else {
325
+ // Create new array with the beta headers
326
+ result.anthropic_beta = betaHeaders
327
+ }
328
+ }
329
+
330
+ return result
331
+ }
332
+
333
+ export function getPromptCachingEnabled(model: string): boolean {
334
+ // Global disable takes precedence
335
+ if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING)) return false
336
+
337
+ // Check if we should disable for small/fast model
338
+ if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING_HAIKU)) {
339
+ const smallFastModel = getSmallFastModel()
340
+ if (model === smallFastModel) return false
341
+ }
342
+
343
+ // Check if we should disable for default Sonnet
344
+ if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING_SONNET)) {
345
+ const defaultSonnet = getDefaultSonnetModel()
346
+ if (model === defaultSonnet) return false
347
+ }
348
+
349
+ // Check if we should disable for default Opus
350
+ if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING_OPUS)) {
351
+ const defaultOpus = getDefaultOpusModel()
352
+ if (model === defaultOpus) return false
353
+ }
354
+
355
+ return true
356
+ }
357
+
358
+ export function getCacheControl({
359
+ scope,
360
+ querySource,
361
+ }: {
362
+ scope?: CacheScope
363
+ querySource?: QuerySource
364
+ } = {}): {
365
+ type: 'ephemeral'
366
+ ttl?: '1h'
367
+ scope?: CacheScope
368
+ } {
369
+ return {
370
+ type: 'ephemeral',
371
+ ...(should1hCacheTTL(querySource) && { ttl: '1h' }),
372
+ ...(scope === 'global' && { scope }),
373
+ }
374
+ }
375
+
376
+ /**
377
+ * Determines if 1h TTL should be used for prompt caching.
378
+ *
379
+ * Only applied when:
380
+ * 1. User is eligible (ant or subscriber within rate limits)
381
+ * 2. The query source matches a pattern in the GrowthBook allowlist
382
+ *
383
+ * GrowthBook config shape: { allowlist: string[] }
384
+ * Patterns support trailing '*' for prefix matching.
385
+ * Examples:
386
+ * - { allowlist: ["repl_main_thread*", "sdk"] } — main thread + SDK only
387
+ * - { allowlist: ["repl_main_thread*", "sdk", "agent:*"] } — also subagents
388
+ * - { allowlist: ["*"] } — all sources
389
+ *
390
+ * The allowlist is cached in STATE for session stability — prevents mixed
391
+ * TTLs when GrowthBook's disk cache updates mid-request.
392
+ */
393
+ function should1hCacheTTL(querySource?: QuerySource): boolean {
394
+ // 3P Bedrock users get 1h TTL when opted in via env var — they manage their own billing
395
+ // No GrowthBook gating needed since 3P users don't have GrowthBook configured
396
+ if (
397
+ getAPIProvider() === 'bedrock' &&
398
+ isEnvTruthy(process.env.ENABLE_PROMPT_CACHING_1H_BEDROCK)
399
+ ) {
400
+ return true
401
+ }
402
+
403
+ // Latch eligibility in bootstrap state for session stability — prevents
404
+ // mid-session overage flips from changing the cache_control TTL, which
405
+ // would bust the server-side prompt cache (~20K tokens per flip).
406
+ let userEligible = getPromptCache1hEligible()
407
+ if (userEligible === null) {
408
+ userEligible =
409
+ process.env.USER_TYPE === 'ant' ||
410
+ (isClaudeAISubscriber() && !currentLimits.isUsingOverage)
411
+ setPromptCache1hEligible(userEligible)
412
+ }
413
+ if (!userEligible) return false
414
+
415
+ // Cache allowlist in bootstrap state for session stability — prevents mixed
416
+ // TTLs when GrowthBook's disk cache updates mid-request
417
+ let allowlist = getPromptCache1hAllowlist()
418
+ if (allowlist === null) {
419
+ const config = getFeatureValue_CACHED_MAY_BE_STALE<{
420
+ allowlist?: string[]
421
+ }>('tengu_prompt_cache_1h_config', {})
422
+ allowlist = config.allowlist ?? []
423
+ setPromptCache1hAllowlist(allowlist)
424
+ }
425
+
426
+ return (
427
+ querySource !== undefined &&
428
+ allowlist.some(pattern =>
429
+ pattern.endsWith('*')
430
+ ? querySource.startsWith(pattern.slice(0, -1))
431
+ : querySource === pattern,
432
+ )
433
+ )
434
+ }
435
+
436
+ /**
437
+ * Configure effort parameters for API request.
438
+ *
439
+ */
440
+ function configureEffortParams(
441
+ effortValue: EffortValue | undefined,
442
+ outputConfig: BetaOutputConfig,
443
+ extraBodyParams: Record<string, unknown>,
444
+ betas: string[],
445
+ model: string,
446
+ ): void {
447
+ if (!modelSupportsEffort(model) || 'effort' in outputConfig) {
448
+ return
449
+ }
450
+
451
+ if (effortValue === undefined) {
452
+ betas.push(EFFORT_BETA_HEADER)
453
+ } else if (typeof effortValue === 'string') {
454
+ // Send string effort level as is
455
+ outputConfig.effort = effortValue
456
+ betas.push(EFFORT_BETA_HEADER)
457
+ } else if (process.env.USER_TYPE === 'ant') {
458
+ // Numeric effort override - ant-only (uses anthropic_internal)
459
+ const existingInternal =
460
+ (extraBodyParams.anthropic_internal as Record<string, unknown>) || {}
461
+ extraBodyParams.anthropic_internal = {
462
+ ...existingInternal,
463
+ effort_override: effortValue,
464
+ }
465
+ }
466
+ }
467
+
468
+ // output_config.task_budget — API-side token budget awareness for the model.
469
+ // Stainless SDK types don't yet include task_budget on BetaOutputConfig, so we
470
+ // define the wire shape locally and cast. The API validates on receipt; see
471
+ // api/api/schemas/messages/request/output_config.py:12-39 in the monorepo.
472
+ // Beta: task-budgets-2026-03-13 (EAP, claude-strudel-eap only as of Mar 2026).
473
+ type TaskBudgetParam = {
474
+ type: 'tokens'
475
+ total: number
476
+ remaining?: number
477
+ }
478
+
479
+ export function configureTaskBudgetParams(
480
+ taskBudget: Options['taskBudget'],
481
+ outputConfig: BetaOutputConfig & { task_budget?: TaskBudgetParam },
482
+ betas: string[],
483
+ ): void {
484
+ if (
485
+ !taskBudget ||
486
+ 'task_budget' in outputConfig ||
487
+ !shouldIncludeFirstPartyOnlyBetas()
488
+ ) {
489
+ return
490
+ }
491
+ outputConfig.task_budget = {
492
+ type: 'tokens',
493
+ total: taskBudget.total,
494
+ ...(taskBudget.remaining !== undefined && {
495
+ remaining: taskBudget.remaining,
496
+ }),
497
+ }
498
+ if (!betas.includes(TASK_BUDGETS_BETA_HEADER)) {
499
+ betas.push(TASK_BUDGETS_BETA_HEADER)
500
+ }
501
+ }
502
+
503
+ export function getAPIMetadata() {
504
+ // https://docs.google.com/document/d/1dURO9ycXXQCBS0V4Vhl4poDBRgkelFc5t2BNPoEgH5Q/edit?tab=t.0#heading=h.5g7nec5b09w5
505
+ let extra: JsonObject = {}
506
+ const extraStr = process.env.CLAUDE_CODE_EXTRA_METADATA
507
+ if (extraStr) {
508
+ const parsed = safeParseJSON(extraStr, false)
509
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
510
+ extra = parsed as JsonObject
511
+ } else {
512
+ logForDebugging(
513
+ `CLAUDE_CODE_EXTRA_METADATA env var must be a JSON object, but was given ${extraStr}`,
514
+ { level: 'error' },
515
+ )
516
+ }
517
+ }
518
+
519
+ return {
520
+ user_id: jsonStringify({
521
+ ...extra,
522
+ device_id: getOrCreateUserID(),
523
+ // Only include OAuth account UUID when actively using OAuth authentication
524
+ account_uuid: getOauthAccountInfo()?.accountUuid ?? '',
525
+ session_id: getSessionId(),
526
+ }),
527
+ }
528
+ }
529
+
530
+ export async function verifyApiKey(
531
+ apiKey: string,
532
+ isNonInteractiveSession: boolean,
533
+ ): Promise<boolean> {
534
+ // Skip API verification if running in print mode (isNonInteractiveSession)
535
+ if (isNonInteractiveSession) {
536
+ return true
537
+ }
538
+
539
+ try {
540
+ // WARNING: if you change this to use a non-Haiku model, this request will fail in 1P unless it uses getCLISyspromptPrefix.
541
+ const model = getSmallFastModel()
542
+ const betas = getModelBetas(model)
543
+ return await returnValue(
544
+ withRetry(
545
+ () =>
546
+ getAnthropicClient({
547
+ apiKey,
548
+ maxRetries: 3,
549
+ model,
550
+ source: 'verify_api_key',
551
+ }),
552
+ async anthropic => {
553
+ const messages: MessageParam[] = [{ role: 'user', content: 'test' }]
554
+ // biome-ignore lint/plugin: API key verification is intentionally a minimal direct call
555
+ await anthropic.beta.messages.create({
556
+ model,
557
+ max_tokens: 1,
558
+ messages,
559
+ temperature: 1,
560
+ ...(betas.length > 0 && { betas }),
561
+ metadata: getAPIMetadata(),
562
+ ...getExtraBodyParams(),
563
+ })
564
+ return true
565
+ },
566
+ { maxRetries: 2, model, thinkingConfig: { type: 'disabled' } }, // Use fewer retries for API key verification
567
+ ),
568
+ )
569
+ } catch (errorFromRetry) {
570
+ let error = errorFromRetry
571
+ if (errorFromRetry instanceof CannotRetryError) {
572
+ error = errorFromRetry.originalError
573
+ }
574
+ logError(error)
575
+ // Check for authentication error
576
+ if (
577
+ error instanceof Error &&
578
+ error.message.includes(
579
+ '{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}',
580
+ )
581
+ ) {
582
+ return false
583
+ }
584
+ throw error
585
+ }
586
+ }
587
+
588
+ export function userMessageToMessageParam(
589
+ message: UserMessage,
590
+ addCache = false,
591
+ enablePromptCaching: boolean,
592
+ querySource?: QuerySource,
593
+ ): MessageParam {
594
+ if (addCache) {
595
+ if (typeof message.message.content === 'string') {
596
+ return {
597
+ role: 'user',
598
+ content: [
599
+ {
600
+ type: 'text',
601
+ text: message.message.content,
602
+ ...(enablePromptCaching && {
603
+ cache_control: getCacheControl({ querySource }),
604
+ }),
605
+ },
606
+ ],
607
+ }
608
+ } else {
609
+ return {
610
+ role: 'user',
611
+ content: message.message.content.map((_, i) => ({
612
+ ..._,
613
+ ...(i === message.message.content.length - 1
614
+ ? enablePromptCaching
615
+ ? { cache_control: getCacheControl({ querySource }) }
616
+ : {}
617
+ : {}),
618
+ })),
619
+ }
620
+ }
621
+ }
622
+ // Clone array content to prevent in-place mutations (e.g., insertCacheEditsBlock's
623
+ // splice) from contaminating the original message. Without cloning, multiple calls
624
+ // to addCacheBreakpoints share the same array and each splices in duplicate cache_edits.
625
+ return {
626
+ role: 'user',
627
+ content: Array.isArray(message.message.content)
628
+ ? [...message.message.content]
629
+ : message.message.content,
630
+ }
631
+ }
632
+
633
+ export function assistantMessageToMessageParam(
634
+ message: AssistantMessage,
635
+ addCache = false,
636
+ enablePromptCaching: boolean,
637
+ querySource?: QuerySource,
638
+ ): MessageParam {
639
+ if (addCache) {
640
+ if (typeof message.message.content === 'string') {
641
+ return {
642
+ role: 'assistant',
643
+ content: [
644
+ {
645
+ type: 'text',
646
+ text: message.message.content,
647
+ ...(enablePromptCaching && {
648
+ cache_control: getCacheControl({ querySource }),
649
+ }),
650
+ },
651
+ ],
652
+ }
653
+ } else {
654
+ return {
655
+ role: 'assistant',
656
+ content: message.message.content.map((_, i) => ({
657
+ ..._,
658
+ ...(i === message.message.content.length - 1 &&
659
+ _.type !== 'thinking' &&
660
+ _.type !== 'redacted_thinking' &&
661
+ (feature('CONNECTOR_TEXT') ? !isConnectorTextBlock(_) : true)
662
+ ? enablePromptCaching
663
+ ? { cache_control: getCacheControl({ querySource }) }
664
+ : {}
665
+ : {}),
666
+ })),
667
+ }
668
+ }
669
+ }
670
+ return {
671
+ role: 'assistant',
672
+ content: message.message.content,
673
+ }
674
+ }
675
+
676
+ export type Options = {
677
+ getToolPermissionContext: () => Promise<ToolPermissionContext>
678
+ model: string
679
+ toolChoice?: BetaToolChoiceTool | BetaToolChoiceAuto | undefined
680
+ isNonInteractiveSession: boolean
681
+ extraToolSchemas?: BetaToolUnion[]
682
+ maxOutputTokensOverride?: number
683
+ fallbackModel?: string
684
+ onStreamingFallback?: () => void
685
+ querySource: QuerySource
686
+ agents: AgentDefinition[]
687
+ allowedAgentTypes?: string[]
688
+ hasAppendSystemPrompt: boolean
689
+ fetchOverride?: ClientOptions['fetch']
690
+ enablePromptCaching?: boolean
691
+ skipCacheWrite?: boolean
692
+ temperatureOverride?: number
693
+ effortValue?: EffortValue
694
+ mcpTools: Tools
695
+ hasPendingMcpServers?: boolean
696
+ queryTracking?: QueryChainTracking
697
+ agentId?: AgentId // Only set for subagents
698
+ outputFormat?: BetaJSONOutputFormat
699
+ fastMode?: boolean
700
+ advisorModel?: string
701
+ addNotification?: (notif: Notification) => void
702
+ // API-side task budget (output_config.task_budget). Distinct from the
703
+ // tokenBudget.ts +500k auto-continue feature — this one is sent to the API
704
+ // so the model can pace itself. `remaining` is computed by the caller
705
+ // (query.ts decrements across the agentic loop).
706
+ taskBudget?: { total: number; remaining?: number }
707
+ }
708
+
709
+ export async function queryModelWithoutStreaming({
710
+ messages,
711
+ systemPrompt,
712
+ thinkingConfig,
713
+ tools,
714
+ signal,
715
+ options,
716
+ }: {
717
+ messages: Message[]
718
+ systemPrompt: SystemPrompt
719
+ thinkingConfig: ThinkingConfig
720
+ tools: Tools
721
+ signal: AbortSignal
722
+ options: Options
723
+ }): Promise<AssistantMessage> {
724
+ // Store the assistant message but continue consuming the generator to ensure
725
+ // logAPISuccessAndDuration gets called (which happens after all yields)
726
+ let assistantMessage: AssistantMessage | undefined
727
+ for await (const message of withStreamingVCR(messages, async function* () {
728
+ yield* queryModel(
729
+ messages,
730
+ systemPrompt,
731
+ thinkingConfig,
732
+ tools,
733
+ signal,
734
+ options,
735
+ )
736
+ })) {
737
+ if (message.type === 'assistant') {
738
+ assistantMessage = message
739
+ }
740
+ }
741
+ if (!assistantMessage) {
742
+ // If the signal was aborted, throw APIUserAbortError instead of a generic error
743
+ // This allows callers to handle abort scenarios gracefully
744
+ if (signal.aborted) {
745
+ throw new APIUserAbortError()
746
+ }
747
+ throw new Error('No assistant message found')
748
+ }
749
+ return assistantMessage
750
+ }
751
+
752
+ export async function* queryModelWithStreaming({
753
+ messages,
754
+ systemPrompt,
755
+ thinkingConfig,
756
+ tools,
757
+ signal,
758
+ options,
759
+ }: {
760
+ messages: Message[]
761
+ systemPrompt: SystemPrompt
762
+ thinkingConfig: ThinkingConfig
763
+ tools: Tools
764
+ signal: AbortSignal
765
+ options: Options
766
+ }): AsyncGenerator<
767
+ StreamEvent | AssistantMessage | SystemAPIErrorMessage,
768
+ void
769
+ > {
770
+ return yield* withStreamingVCR(messages, async function* () {
771
+ yield* queryModel(
772
+ messages,
773
+ systemPrompt,
774
+ thinkingConfig,
775
+ tools,
776
+ signal,
777
+ options,
778
+ )
779
+ })
780
+ }
781
+
782
+ /**
783
+ * Determines if an LSP tool should be deferred (tool appears with defer_loading: true)
784
+ * because LSP initialization is not yet complete.
785
+ */
786
+ function shouldDeferLspTool(tool: Tool): boolean {
787
+ if (!('isLsp' in tool) || !tool.isLsp) {
788
+ return false
789
+ }
790
+ const status = getInitializationStatus()
791
+ // Defer when pending or not started
792
+ return status.status === 'pending' || status.status === 'not-started'
793
+ }
794
+
795
+ /**
796
+ * Per-attempt timeout for non-streaming fallback requests, in milliseconds.
797
+ * Reads API_TIMEOUT_MS when set so slow backends and the streaming path
798
+ * share the same ceiling.
799
+ *
800
+ * Remote sessions default to 120s to stay under CCR's container idle-kill
801
+ * (~5min) so a hung fallback to a wedged backend surfaces a clean
802
+ * APIConnectionTimeoutError instead of stalling past SIGKILL.
803
+ *
804
+ * Otherwise defaults to 300s — long enough for slow backends without
805
+ * approaching the API's 10-minute non-streaming boundary.
806
+ */
807
+ function getNonstreamingFallbackTimeoutMs(): number {
808
+ const override = parseInt(process.env.API_TIMEOUT_MS || '', 10)
809
+ if (override) return override
810
+ return isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) ? 120_000 : 300_000
811
+ }
812
+
813
+ /**
814
+ * Helper generator for non-streaming API requests.
815
+ * Encapsulates the common pattern of creating a withRetry generator,
816
+ * iterating to yield system messages, and returning the final BetaMessage.
817
+ */
818
+ export async function* executeNonStreamingRequest(
819
+ clientOptions: {
820
+ model: string
821
+ fetchOverride?: Options['fetchOverride']
822
+ source: string
823
+ },
824
+ retryOptions: {
825
+ model: string
826
+ fallbackModel?: string
827
+ thinkingConfig: ThinkingConfig
828
+ fastMode?: boolean
829
+ signal: AbortSignal
830
+ initialConsecutive529Errors?: number
831
+ querySource?: QuerySource
832
+ },
833
+ paramsFromContext: (context: RetryContext) => BetaMessageStreamParams,
834
+ onAttempt: (attempt: number, start: number, maxOutputTokens: number) => void,
835
+ captureRequest: (params: BetaMessageStreamParams) => void,
836
+ /**
837
+ * Request ID of the failed streaming attempt this fallback is recovering
838
+ * from. Emitted in tengu_nonstreaming_fallback_error for funnel correlation.
839
+ */
840
+ originatingRequestId?: string | null,
841
+ ): AsyncGenerator<SystemAPIErrorMessage, BetaMessage> {
842
+ const fallbackTimeoutMs = getNonstreamingFallbackTimeoutMs()
843
+ const generator = withRetry(
844
+ () =>
845
+ getAnthropicClient({
846
+ maxRetries: 0,
847
+ model: clientOptions.model,
848
+ fetchOverride: clientOptions.fetchOverride,
849
+ source: clientOptions.source,
850
+ }),
851
+ async (anthropic, attempt, context) => {
852
+ const start = Date.now()
853
+ const retryParams = paramsFromContext(context)
854
+ captureRequest(retryParams)
855
+ onAttempt(attempt, start, retryParams.max_tokens)
856
+
857
+ const adjustedParams = adjustParamsForNonStreaming(
858
+ retryParams,
859
+ MAX_NON_STREAMING_TOKENS,
860
+ )
861
+
862
+ try {
863
+ // biome-ignore lint/plugin: non-streaming API call
864
+ return await anthropic.beta.messages.create(
865
+ {
866
+ ...adjustedParams,
867
+ model: normalizeModelStringForAPI(adjustedParams.model),
868
+ },
869
+ {
870
+ signal: retryOptions.signal,
871
+ timeout: fallbackTimeoutMs,
872
+ },
873
+ )
874
+ } catch (err) {
875
+ // User aborts are not errors — re-throw immediately without logging
876
+ if (err instanceof APIUserAbortError) throw err
877
+
878
+ // Instrumentation: record when the non-streaming request errors (including
879
+ // timeouts). Lets us distinguish "fallback hung past container kill"
880
+ // (no event) from "fallback hit the bounded timeout" (this event).
881
+ logForDiagnosticsNoPII('error', 'cli_nonstreaming_fallback_error')
882
+ logEvent('tengu_nonstreaming_fallback_error', {
883
+ model:
884
+ clientOptions.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
885
+ error:
886
+ err instanceof Error
887
+ ? (err.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
888
+ : ('unknown' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
889
+ attempt,
890
+ timeout_ms: fallbackTimeoutMs,
891
+ request_id: (originatingRequestId ??
892
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
893
+ })
894
+ throw err
895
+ }
896
+ },
897
+ {
898
+ model: retryOptions.model,
899
+ fallbackModel: retryOptions.fallbackModel,
900
+ thinkingConfig: retryOptions.thinkingConfig,
901
+ ...(isFastModeEnabled() && { fastMode: retryOptions.fastMode }),
902
+ signal: retryOptions.signal,
903
+ initialConsecutive529Errors: retryOptions.initialConsecutive529Errors,
904
+ querySource: retryOptions.querySource,
905
+ },
906
+ )
907
+
908
+ let e
909
+ do {
910
+ e = await generator.next()
911
+ if (!e.done && e.value.type === 'system') {
912
+ yield e.value
913
+ }
914
+ } while (!e.done)
915
+
916
+ return e.value as BetaMessage
917
+ }
918
+
919
+ /**
920
+ * Extracts the request ID from the most recent assistant message in the
921
+ * conversation. Used to link consecutive API requests in analytics so we can
922
+ * join them for cache-hit-rate analysis and incremental token tracking.
923
+ *
924
+ * Deriving this from the message array (rather than global state) ensures each
925
+ * query chain (main thread, subagent, teammate) tracks its own request chain
926
+ * independently, and rollback/undo naturally updates the value.
927
+ */
928
+ function getPreviousRequestIdFromMessages(
929
+ messages: Message[],
930
+ ): string | undefined {
931
+ for (let i = messages.length - 1; i >= 0; i--) {
932
+ const msg = messages[i]!
933
+ if (msg.type === 'assistant' && msg.requestId) {
934
+ return msg.requestId
935
+ }
936
+ }
937
+ return undefined
938
+ }
939
+
940
+ function isMedia(
941
+ block: BetaContentBlockParam,
942
+ ): block is BetaImageBlockParam | BetaRequestDocumentBlock {
943
+ return block.type === 'image' || block.type === 'document'
944
+ }
945
+
946
+ function isToolResult(
947
+ block: BetaContentBlockParam,
948
+ ): block is BetaToolResultBlockParam {
949
+ return block.type === 'tool_result'
950
+ }
951
+
952
+ /**
953
+ * Ensures messages contain at most `limit` media items (images + documents).
954
+ * Strips oldest media first to preserve the most recent.
955
+ */
956
+ export function stripExcessMediaItems(
957
+ messages: (UserMessage | AssistantMessage)[],
958
+ limit: number,
959
+ ): (UserMessage | AssistantMessage)[] {
960
+ let toRemove = 0
961
+ for (const msg of messages) {
962
+ if (!Array.isArray(msg.message.content)) continue
963
+ for (const block of msg.message.content) {
964
+ if (isMedia(block)) toRemove++
965
+ if (isToolResult(block) && Array.isArray(block.content)) {
966
+ for (const nested of block.content) {
967
+ if (isMedia(nested)) toRemove++
968
+ }
969
+ }
970
+ }
971
+ }
972
+ toRemove -= limit
973
+ if (toRemove <= 0) return messages
974
+
975
+ return messages.map(msg => {
976
+ if (toRemove <= 0) return msg
977
+ const content = msg.message.content
978
+ if (!Array.isArray(content)) return msg
979
+
980
+ const before = toRemove
981
+ const stripped = content
982
+ .map(block => {
983
+ if (
984
+ toRemove <= 0 ||
985
+ !isToolResult(block) ||
986
+ !Array.isArray(block.content)
987
+ )
988
+ return block
989
+ const filtered = block.content.filter(n => {
990
+ if (toRemove > 0 && isMedia(n)) {
991
+ toRemove--
992
+ return false
993
+ }
994
+ return true
995
+ })
996
+ return filtered.length === block.content.length
997
+ ? block
998
+ : { ...block, content: filtered }
999
+ })
1000
+ .filter(block => {
1001
+ if (toRemove > 0 && isMedia(block)) {
1002
+ toRemove--
1003
+ return false
1004
+ }
1005
+ return true
1006
+ })
1007
+
1008
+ return before === toRemove
1009
+ ? msg
1010
+ : {
1011
+ ...msg,
1012
+ message: { ...msg.message, content: stripped },
1013
+ }
1014
+ }) as (UserMessage | AssistantMessage)[]
1015
+ }
1016
+
1017
+ async function* queryModel(
1018
+ messages: Message[],
1019
+ systemPrompt: SystemPrompt,
1020
+ thinkingConfig: ThinkingConfig,
1021
+ tools: Tools,
1022
+ signal: AbortSignal,
1023
+ options: Options,
1024
+ ): AsyncGenerator<
1025
+ StreamEvent | AssistantMessage | SystemAPIErrorMessage,
1026
+ void
1027
+ > {
1028
+ // Check cheap conditions first — the off-switch await blocks on GrowthBook
1029
+ // init (~10ms). For non-Opus models (haiku, sonnet) this skips the await
1030
+ // entirely. Subscribers don't hit this path at all.
1031
+ if (
1032
+ !isClaudeAISubscriber() &&
1033
+ isNonCustomOpusModel(options.model) &&
1034
+ (
1035
+ await getDynamicConfig_BLOCKS_ON_INIT<{ activated: boolean }>(
1036
+ 'tengu-off-switch',
1037
+ {
1038
+ activated: false,
1039
+ },
1040
+ )
1041
+ ).activated
1042
+ ) {
1043
+ logEvent('tengu_off_switch_query', {})
1044
+ yield getAssistantMessageFromError(
1045
+ new Error(CUSTOM_OFF_SWITCH_MESSAGE),
1046
+ options.model,
1047
+ )
1048
+ return
1049
+ }
1050
+
1051
+ // Derive previous request ID from the last assistant message in this query chain.
1052
+ // This is scoped per message array (main thread, subagent, teammate each have their own),
1053
+ // so concurrent agents don't clobber each other's request chain tracking.
1054
+ // Also naturally handles rollback/undo since removed messages won't be in the array.
1055
+ const previousRequestId = getPreviousRequestIdFromMessages(messages)
1056
+
1057
+ const resolvedModel =
1058
+ getAPIProvider() === 'bedrock' &&
1059
+ options.model.includes('application-inference-profile')
1060
+ ? ((await getInferenceProfileBackingModel(options.model)) ??
1061
+ options.model)
1062
+ : options.model
1063
+
1064
+ queryCheckpoint('query_tool_schema_build_start')
1065
+ const isAgenticQuery =
1066
+ options.querySource.startsWith('repl_main_thread') ||
1067
+ options.querySource.startsWith('agent:') ||
1068
+ options.querySource === 'sdk' ||
1069
+ options.querySource === 'hook_agent' ||
1070
+ options.querySource === 'verification_agent'
1071
+ const betas = getMergedBetas(options.model, { isAgenticQuery })
1072
+
1073
+ // Always send the advisor beta header when advisor is enabled, so
1074
+ // non-agentic queries (compact, side_question, extract_memories, etc.)
1075
+ // can parse advisor server_tool_use blocks already in the conversation history.
1076
+ if (isAdvisorEnabled()) {
1077
+ betas.push(ADVISOR_BETA_HEADER)
1078
+ }
1079
+
1080
+ let advisorModel: string | undefined
1081
+ if (isAgenticQuery && isAdvisorEnabled()) {
1082
+ let advisorOption = options.advisorModel
1083
+
1084
+ const advisorExperiment = getExperimentAdvisorModels()
1085
+ if (advisorExperiment !== undefined) {
1086
+ if (
1087
+ normalizeModelStringForAPI(advisorExperiment.baseModel) ===
1088
+ normalizeModelStringForAPI(options.model)
1089
+ ) {
1090
+ // Override the advisor model if the base model matches. We
1091
+ // should only have experiment models if the user cannot
1092
+ // configure it themselves.
1093
+ advisorOption = advisorExperiment.advisorModel
1094
+ }
1095
+ }
1096
+
1097
+ if (advisorOption) {
1098
+ const normalizedAdvisorModel = normalizeModelStringForAPI(
1099
+ parseUserSpecifiedModel(advisorOption),
1100
+ )
1101
+ if (!modelSupportsAdvisor(options.model)) {
1102
+ logForDebugging(
1103
+ `[AdvisorTool] Skipping advisor - base model ${options.model} does not support advisor`,
1104
+ )
1105
+ } else if (!isValidAdvisorModel(normalizedAdvisorModel)) {
1106
+ logForDebugging(
1107
+ `[AdvisorTool] Skipping advisor - ${normalizedAdvisorModel} is not a valid advisor model`,
1108
+ )
1109
+ } else {
1110
+ advisorModel = normalizedAdvisorModel
1111
+ logForDebugging(
1112
+ `[AdvisorTool] Server-side tool enabled with ${advisorModel} as the advisor model`,
1113
+ )
1114
+ }
1115
+ }
1116
+ }
1117
+
1118
+ // Check if tool search is enabled (checks mode, model support, and threshold for auto mode)
1119
+ // This is async because it may need to calculate MCP tool description sizes for TstAuto mode
1120
+ let useToolSearch = await isToolSearchEnabled(
1121
+ options.model,
1122
+ tools,
1123
+ options.getToolPermissionContext,
1124
+ options.agents,
1125
+ 'query',
1126
+ )
1127
+
1128
+ // Precompute once — isDeferredTool does 2 GrowthBook lookups per call
1129
+ const deferredToolNames = new Set<string>()
1130
+ if (useToolSearch) {
1131
+ for (const t of tools) {
1132
+ if (isDeferredTool(t)) deferredToolNames.add(t.name)
1133
+ }
1134
+ }
1135
+
1136
+ // Even if tool search mode is enabled, skip if there are no deferred tools
1137
+ // AND no MCP servers are still connecting. When servers are pending, keep
1138
+ // ToolSearch available so the model can discover tools after they connect.
1139
+ if (
1140
+ useToolSearch &&
1141
+ deferredToolNames.size === 0 &&
1142
+ !options.hasPendingMcpServers
1143
+ ) {
1144
+ logForDebugging(
1145
+ 'Tool search disabled: no deferred tools available to search',
1146
+ )
1147
+ useToolSearch = false
1148
+ }
1149
+
1150
+ // Filter out ToolSearchTool if tool search is not enabled for this model
1151
+ // ToolSearchTool returns tool_reference blocks which unsupported models can't handle
1152
+ let filteredTools: Tools
1153
+
1154
+ if (useToolSearch) {
1155
+ // Dynamic tool loading: Only include deferred tools that have been discovered
1156
+ // via tool_reference blocks in the message history. This eliminates the need
1157
+ // to predeclare all deferred tools upfront and removes limits on tool quantity.
1158
+ const discoveredToolNames = extractDiscoveredToolNames(messages)
1159
+
1160
+ filteredTools = tools.filter(tool => {
1161
+ // Always include non-deferred tools
1162
+ if (!deferredToolNames.has(tool.name)) return true
1163
+ // Always include ToolSearchTool (so it can discover more tools)
1164
+ if (toolMatchesName(tool, TOOL_SEARCH_TOOL_NAME)) return true
1165
+ // Only include deferred tools that have been discovered
1166
+ return discoveredToolNames.has(tool.name)
1167
+ })
1168
+ } else {
1169
+ filteredTools = tools.filter(
1170
+ t => !toolMatchesName(t, TOOL_SEARCH_TOOL_NAME),
1171
+ )
1172
+ }
1173
+
1174
+ // Add tool search beta header if enabled - required for defer_loading to be accepted
1175
+ // Header differs by provider: 1P/Foundry use advanced-tool-use, Vertex/Bedrock use tool-search-tool
1176
+ // For Bedrock, this header must go in extraBodyParams, not the betas array
1177
+ const toolSearchHeader = useToolSearch ? getToolSearchBetaHeader() : null
1178
+ if (toolSearchHeader && getAPIProvider() !== 'bedrock') {
1179
+ if (!betas.includes(toolSearchHeader)) {
1180
+ betas.push(toolSearchHeader)
1181
+ }
1182
+ }
1183
+
1184
+ // Determine if cached microcompact is enabled for this model.
1185
+ // Computed once here (in async context) and captured by paramsFromContext.
1186
+ // The beta header is also captured here to avoid a top-level import of the
1187
+ // ant-only CACHE_EDITING_BETA_HEADER constant.
1188
+ let cachedMCEnabled = false
1189
+ let cacheEditingBetaHeader = ''
1190
+ if (feature('CACHED_MICROCOMPACT')) {
1191
+ const {
1192
+ isCachedMicrocompactEnabled,
1193
+ isModelSupportedForCacheEditing,
1194
+ getCachedMCConfig,
1195
+ } = await import('../compact/cachedMicrocompact.js')
1196
+ const betas = await import('src/constants/betas.js')
1197
+ cacheEditingBetaHeader = betas.CACHE_EDITING_BETA_HEADER
1198
+ const featureEnabled = isCachedMicrocompactEnabled()
1199
+ const modelSupported = isModelSupportedForCacheEditing(options.model)
1200
+ cachedMCEnabled = featureEnabled && modelSupported
1201
+ const config = getCachedMCConfig()
1202
+ logForDebugging(
1203
+ `Cached MC gate: enabled=${featureEnabled} modelSupported=${modelSupported} model=${options.model} supportedModels=${jsonStringify(config.supportedModels)}`,
1204
+ )
1205
+ }
1206
+
1207
+ const useGlobalCacheFeature = shouldUseGlobalCacheScope()
1208
+ const willDefer = (t: Tool) =>
1209
+ useToolSearch && (deferredToolNames.has(t.name) || shouldDeferLspTool(t))
1210
+ // MCP tools are per-user → dynamic tool section → can't globally cache.
1211
+ // Only gate when an MCP tool will actually render (not defer_loading).
1212
+ const needsToolBasedCacheMarker =
1213
+ useGlobalCacheFeature &&
1214
+ filteredTools.some(t => t.isMcp === true && !willDefer(t))
1215
+
1216
+ // Ensure prompt_caching_scope beta header is present when global cache is enabled.
1217
+ if (
1218
+ useGlobalCacheFeature &&
1219
+ !betas.includes(PROMPT_CACHING_SCOPE_BETA_HEADER)
1220
+ ) {
1221
+ betas.push(PROMPT_CACHING_SCOPE_BETA_HEADER)
1222
+ }
1223
+
1224
+ // Determine global cache strategy for logging
1225
+ const globalCacheStrategy: GlobalCacheStrategy = useGlobalCacheFeature
1226
+ ? needsToolBasedCacheMarker
1227
+ ? 'none'
1228
+ : 'system_prompt'
1229
+ : 'none'
1230
+
1231
+ // Build tool schemas, adding defer_loading for MCP tools when tool search is enabled
1232
+ // Note: We pass the full `tools` list (not filteredTools) to toolToAPISchema so that
1233
+ // ToolSearchTool's prompt can list ALL available MCP tools. The filtering only affects
1234
+ // which tools are actually sent to the API, not what the model sees in tool descriptions.
1235
+ const toolSchemas = await Promise.all(
1236
+ filteredTools.map(tool =>
1237
+ toolToAPISchema(tool, {
1238
+ getToolPermissionContext: options.getToolPermissionContext,
1239
+ tools,
1240
+ agents: options.agents,
1241
+ allowedAgentTypes: options.allowedAgentTypes,
1242
+ model: options.model,
1243
+ deferLoading: willDefer(tool),
1244
+ }),
1245
+ ),
1246
+ )
1247
+
1248
+ if (useToolSearch) {
1249
+ const includedDeferredTools = count(filteredTools, t =>
1250
+ deferredToolNames.has(t.name),
1251
+ )
1252
+ logForDebugging(
1253
+ `Dynamic tool loading: ${includedDeferredTools}/${deferredToolNames.size} deferred tools included`,
1254
+ )
1255
+ }
1256
+
1257
+ queryCheckpoint('query_tool_schema_build_end')
1258
+
1259
+ // Normalize messages before building system prompt (needed for fingerprinting)
1260
+ // Instrumentation: Track message count before normalization
1261
+ logEvent('tengu_api_before_normalize', {
1262
+ preNormalizedMessageCount: messages.length,
1263
+ })
1264
+
1265
+ queryCheckpoint('query_message_normalization_start')
1266
+ let messagesForAPI = normalizeMessagesForAPI(messages, filteredTools)
1267
+ queryCheckpoint('query_message_normalization_end')
1268
+
1269
+ // Model-specific post-processing: strip tool-search-specific fields if the
1270
+ // selected model doesn't support tool search.
1271
+ //
1272
+ // Why is this needed in addition to normalizeMessagesForAPI?
1273
+ // - normalizeMessagesForAPI uses isToolSearchEnabledNoModelCheck() because it's
1274
+ // called from ~20 places (analytics, feedback, sharing, etc.), many of which
1275
+ // don't have model context. Adding model to its signature would be a large refactor.
1276
+ // - This post-processing uses the model-aware isToolSearchEnabled() check
1277
+ // - This handles mid-conversation model switching (e.g., Sonnet → Haiku) where
1278
+ // stale tool-search fields from the previous model would cause 400 errors
1279
+ //
1280
+ // Note: For assistant messages, normalizeMessagesForAPI already normalized the
1281
+ // tool inputs, so stripCallerFieldFromAssistantMessage only needs to remove the
1282
+ // 'caller' field (not re-normalize inputs).
1283
+ if (!useToolSearch) {
1284
+ messagesForAPI = messagesForAPI.map(msg => {
1285
+ switch (msg.type) {
1286
+ case 'user':
1287
+ // Strip tool_reference blocks from tool_result content
1288
+ return stripToolReferenceBlocksFromUserMessage(msg)
1289
+ case 'assistant':
1290
+ // Strip 'caller' field from tool_use blocks
1291
+ return stripCallerFieldFromAssistantMessage(msg)
1292
+ default:
1293
+ return msg
1294
+ }
1295
+ })
1296
+ }
1297
+
1298
+ // Repair tool_use/tool_result pairing mismatches that can occur when resuming
1299
+ // remote/teleport sessions. Inserts synthetic error tool_results for orphaned
1300
+ // tool_uses and strips orphaned tool_results referencing non-existent tool_uses.
1301
+ messagesForAPI = ensureToolResultPairing(messagesForAPI)
1302
+
1303
+ // Strip advisor blocks — the API rejects them without the beta header.
1304
+ if (!betas.includes(ADVISOR_BETA_HEADER)) {
1305
+ messagesForAPI = stripAdvisorBlocks(messagesForAPI)
1306
+ }
1307
+
1308
+ // Strip excess media items before making the API call.
1309
+ // The API rejects requests with >100 media items but returns a confusing error.
1310
+ // Rather than erroring (which is hard to recover from in Cowork/CCD), we
1311
+ // silently drop the oldest media items to stay within the limit.
1312
+ messagesForAPI = stripExcessMediaItems(
1313
+ messagesForAPI,
1314
+ API_MAX_MEDIA_PER_REQUEST,
1315
+ )
1316
+
1317
+ // Instrumentation: Track message count after normalization
1318
+ logEvent('tengu_api_after_normalize', {
1319
+ postNormalizedMessageCount: messagesForAPI.length,
1320
+ })
1321
+
1322
+ // Compute fingerprint from first user message for attribution.
1323
+ // Must run BEFORE injecting synthetic messages (e.g. deferred tool names)
1324
+ // so the fingerprint reflects the actual user input.
1325
+ const fingerprint = computeFingerprintFromMessages(messagesForAPI)
1326
+
1327
+ // When the delta attachment is enabled, deferred tools are announced
1328
+ // via persisted deferred_tools_delta attachments instead of this
1329
+ // ephemeral prepend (which busts cache whenever the pool changes).
1330
+ if (useToolSearch && !isDeferredToolsDeltaEnabled()) {
1331
+ const deferredToolList = tools
1332
+ .filter(t => deferredToolNames.has(t.name))
1333
+ .map(formatDeferredToolLine)
1334
+ .sort()
1335
+ .join('\n')
1336
+ if (deferredToolList) {
1337
+ messagesForAPI = [
1338
+ createUserMessage({
1339
+ content: `<available-deferred-tools>\n${deferredToolList}\n</available-deferred-tools>`,
1340
+ isMeta: true,
1341
+ }),
1342
+ ...messagesForAPI,
1343
+ ]
1344
+ }
1345
+ }
1346
+
1347
+ // Chrome tool-search instructions: when the delta attachment is enabled,
1348
+ // these are carried as a client-side block in mcp_instructions_delta
1349
+ // (attachments.ts) instead of here. This per-request sys-prompt append
1350
+ // busts the prompt cache when chrome connects late.
1351
+ const hasChromeTools = filteredTools.some(t =>
1352
+ isToolFromMcpServer(t.name, CLAUDE_IN_CHROME_MCP_SERVER_NAME),
1353
+ )
1354
+ const injectChromeHere =
1355
+ useToolSearch && hasChromeTools && !isMcpInstructionsDeltaEnabled()
1356
+
1357
+ // filter(Boolean) works by converting each element to a boolean - empty strings become false and are filtered out.
1358
+ systemPrompt = asSystemPrompt(
1359
+ [
1360
+ getAttributionHeader(fingerprint),
1361
+ getCLISyspromptPrefix({
1362
+ isNonInteractive: options.isNonInteractiveSession,
1363
+ hasAppendSystemPrompt: options.hasAppendSystemPrompt,
1364
+ }),
1365
+ ...systemPrompt,
1366
+ ...(advisorModel ? [ADVISOR_TOOL_INSTRUCTIONS] : []),
1367
+ ...(injectChromeHere ? [CHROME_TOOL_SEARCH_INSTRUCTIONS] : []),
1368
+ ].filter(Boolean),
1369
+ )
1370
+
1371
+ // Prepend system prompt block for easy API identification
1372
+ logAPIPrefix(systemPrompt)
1373
+
1374
+ const enablePromptCaching =
1375
+ options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
1376
+ const system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
1377
+ skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
1378
+ querySource: options.querySource,
1379
+ })
1380
+ const useBetas = betas.length > 0
1381
+
1382
+ // Build minimal context for detailed tracing (when beta tracing is enabled)
1383
+ // Note: The actual new_context message extraction is done in sessionTracing.ts using
1384
+ // hash-based tracking per querySource (agent) from the messagesForAPI array
1385
+ const extraToolSchemas = [...(options.extraToolSchemas ?? [])]
1386
+ if (advisorModel) {
1387
+ // Server tools must be in the tools array by API contract. Appended after
1388
+ // toolSchemas (which carries the cache_control marker) so toggling /advisor
1389
+ // only churns the small suffix, not the cached prefix.
1390
+ extraToolSchemas.push({
1391
+ type: 'advisor_20260301',
1392
+ name: 'advisor',
1393
+ model: advisorModel,
1394
+ } as unknown as BetaToolUnion)
1395
+ }
1396
+ const allTools = [...toolSchemas, ...extraToolSchemas]
1397
+
1398
+ const isFastMode =
1399
+ isFastModeEnabled() &&
1400
+ isFastModeAvailable() &&
1401
+ !isFastModeCooldown() &&
1402
+ isFastModeSupportedByModel(options.model) &&
1403
+ !!options.fastMode
1404
+
1405
+ // Sticky-on latches for dynamic beta headers. Each header, once first
1406
+ // sent, keeps being sent for the rest of the session so mid-session
1407
+ // toggles don't change the server-side cache key and bust ~50-70K tokens.
1408
+ // Latches are cleared on /clear and /compact via clearBetaHeaderLatches().
1409
+ // Per-call gates (isAgenticQuery, querySource===repl_main_thread) stay
1410
+ // per-call so non-agentic queries keep their own stable header set.
1411
+
1412
+ let afkHeaderLatched = getAfkModeHeaderLatched() === true
1413
+ if (feature('TRANSCRIPT_CLASSIFIER')) {
1414
+ if (
1415
+ !afkHeaderLatched &&
1416
+ isAgenticQuery &&
1417
+ shouldIncludeFirstPartyOnlyBetas() &&
1418
+ (autoModeStateModule?.isAutoModeActive() ?? false)
1419
+ ) {
1420
+ afkHeaderLatched = true
1421
+ setAfkModeHeaderLatched(true)
1422
+ }
1423
+ }
1424
+
1425
+ let fastModeHeaderLatched = getFastModeHeaderLatched() === true
1426
+ if (!fastModeHeaderLatched && isFastMode) {
1427
+ fastModeHeaderLatched = true
1428
+ setFastModeHeaderLatched(true)
1429
+ }
1430
+
1431
+ let cacheEditingHeaderLatched = getCacheEditingHeaderLatched() === true
1432
+ if (feature('CACHED_MICROCOMPACT')) {
1433
+ if (
1434
+ !cacheEditingHeaderLatched &&
1435
+ cachedMCEnabled &&
1436
+ getAPIProvider() === 'firstParty' &&
1437
+ options.querySource === 'repl_main_thread'
1438
+ ) {
1439
+ cacheEditingHeaderLatched = true
1440
+ setCacheEditingHeaderLatched(true)
1441
+ }
1442
+ }
1443
+
1444
+ // Only latch from agentic queries so a classifier call doesn't flip the
1445
+ // main thread's context_management mid-turn.
1446
+ let thinkingClearLatched = getThinkingClearLatched() === true
1447
+ if (!thinkingClearLatched && isAgenticQuery) {
1448
+ const lastCompletion = getLastApiCompletionTimestamp()
1449
+ if (
1450
+ lastCompletion !== null &&
1451
+ Date.now() - lastCompletion > CACHE_TTL_1HOUR_MS
1452
+ ) {
1453
+ thinkingClearLatched = true
1454
+ setThinkingClearLatched(true)
1455
+ }
1456
+ }
1457
+
1458
+ const effort = resolveAppliedEffort(options.model, options.effortValue)
1459
+
1460
+ if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
1461
+ // Exclude defer_loading tools from the hash -- the API strips them from the
1462
+ // prompt, so they never affect the actual cache key. Including them creates
1463
+ // false-positive "tool schemas changed" breaks when tools are discovered or
1464
+ // MCP servers reconnect.
1465
+ const toolsForCacheDetection = allTools.filter(
1466
+ t => !('defer_loading' in t && t.defer_loading),
1467
+ )
1468
+ // Capture everything that could affect the server-side cache key.
1469
+ // Pass latched header values (not live state) so break detection
1470
+ // reflects what we actually send, not what the user toggled.
1471
+ recordPromptState({
1472
+ system,
1473
+ toolSchemas: toolsForCacheDetection,
1474
+ querySource: options.querySource,
1475
+ model: options.model,
1476
+ agentId: options.agentId,
1477
+ fastMode: fastModeHeaderLatched,
1478
+ globalCacheStrategy,
1479
+ betas,
1480
+ autoModeActive: afkHeaderLatched,
1481
+ isUsingOverage: currentLimits.isUsingOverage ?? false,
1482
+ cachedMCEnabled: cacheEditingHeaderLatched,
1483
+ effortValue: effort,
1484
+ extraBodyParams: getExtraBodyParams(),
1485
+ })
1486
+ }
1487
+
1488
+ const newContext: LLMRequestNewContext | undefined = isBetaTracingEnabled()
1489
+ ? {
1490
+ systemPrompt: systemPrompt.join('\n\n'),
1491
+ querySource: options.querySource,
1492
+ tools: jsonStringify(allTools),
1493
+ }
1494
+ : undefined
1495
+
1496
+ // Capture the span so we can pass it to endLLMRequestSpan later
1497
+ // This ensures responses are matched to the correct request when multiple requests run in parallel
1498
+ const llmSpan = startLLMRequestSpan(
1499
+ options.model,
1500
+ newContext,
1501
+ messagesForAPI,
1502
+ isFastMode,
1503
+ )
1504
+
1505
+ const startIncludingRetries = Date.now()
1506
+ let start = Date.now()
1507
+ let attemptNumber = 0
1508
+ const attemptStartTimes: number[] = []
1509
+ let stream: Stream<BetaRawMessageStreamEvent> | undefined = undefined
1510
+ let streamRequestId: string | null | undefined = undefined
1511
+ let clientRequestId: string | undefined = undefined
1512
+ // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins -- Response is available in Node 18+ and is used by the SDK
1513
+ let streamResponse: Response | undefined = undefined
1514
+
1515
+ // Release all stream resources to prevent native memory leaks.
1516
+ // The Response object holds native TLS/socket buffers that live outside the
1517
+ // V8 heap (observed on the Node.js/npm path; see GH #32920), so we must
1518
+ // explicitly cancel and release it regardless of how the generator exits.
1519
+ function releaseStreamResources(): void {
1520
+ cleanupStream(stream)
1521
+ stream = undefined
1522
+ if (streamResponse) {
1523
+ streamResponse.body?.cancel().catch(() => {})
1524
+ streamResponse = undefined
1525
+ }
1526
+ }
1527
+
1528
+ // Consume pending cache edits ONCE before paramsFromContext is defined.
1529
+ // paramsFromContext is called multiple times (logging, retries), so consuming
1530
+ // inside it would cause the first call to steal edits from subsequent calls.
1531
+ const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
1532
+ const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []
1533
+
1534
+ // Capture the betas sent in the last API request, including the ones that
1535
+ // were dynamically added, so we can log and send it to telemetry.
1536
+ let lastRequestBetas: string[] | undefined
1537
+
1538
+ const paramsFromContext = (retryContext: RetryContext) => {
1539
+ const betasParams = [...betas]
1540
+
1541
+ // Append 1M beta dynamically for the Sonnet 1M experiment.
1542
+ if (
1543
+ !betasParams.includes(CONTEXT_1M_BETA_HEADER) &&
1544
+ getSonnet1mExpTreatmentEnabled(retryContext.model)
1545
+ ) {
1546
+ betasParams.push(CONTEXT_1M_BETA_HEADER)
1547
+ }
1548
+
1549
+ // For Bedrock, include both model-based betas and dynamically-added tool search header
1550
+ const bedrockBetas =
1551
+ getAPIProvider() === 'bedrock'
1552
+ ? [
1553
+ ...getBedrockExtraBodyParamsBetas(retryContext.model),
1554
+ ...(toolSearchHeader ? [toolSearchHeader] : []),
1555
+ ]
1556
+ : []
1557
+ const extraBodyParams = getExtraBodyParams(bedrockBetas)
1558
+
1559
+ const outputConfig: BetaOutputConfig = {
1560
+ ...((extraBodyParams.output_config as BetaOutputConfig) ?? {}),
1561
+ }
1562
+
1563
+ configureEffortParams(
1564
+ effort,
1565
+ outputConfig,
1566
+ extraBodyParams,
1567
+ betasParams,
1568
+ options.model,
1569
+ )
1570
+
1571
+ configureTaskBudgetParams(
1572
+ options.taskBudget,
1573
+ outputConfig as BetaOutputConfig & { task_budget?: TaskBudgetParam },
1574
+ betasParams,
1575
+ )
1576
+
1577
+ // Merge outputFormat into extraBodyParams.output_config alongside effort
1578
+ // Requires structured-outputs beta header per SDK (see parse() in messages.mjs)
1579
+ if (options.outputFormat && !('format' in outputConfig)) {
1580
+ outputConfig.format = options.outputFormat as BetaJSONOutputFormat
1581
+ // Add beta header if not already present and provider supports it
1582
+ if (
1583
+ modelSupportsStructuredOutputs(options.model) &&
1584
+ !betasParams.includes(STRUCTURED_OUTPUTS_BETA_HEADER)
1585
+ ) {
1586
+ betasParams.push(STRUCTURED_OUTPUTS_BETA_HEADER)
1587
+ }
1588
+ }
1589
+
1590
+ // Retry context gets preference because it tries to course correct if we exceed the context window limit
1591
+ const maxOutputTokens =
1592
+ retryContext?.maxTokensOverride ||
1593
+ options.maxOutputTokensOverride ||
1594
+ getMaxOutputTokensForModel(options.model)
1595
+
1596
+ const hasThinking =
1597
+ thinkingConfig.type !== 'disabled' &&
1598
+ !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_THINKING)
1599
+ let thinking: BetaMessageStreamParams['thinking'] | undefined = undefined
1600
+
1601
+ // IMPORTANT: Do not change the adaptive-vs-budget thinking selection below
1602
+ // without notifying the model launch DRI and research. This is a sensitive
1603
+ // setting that can greatly affect model quality and bashing.
1604
+ if (hasThinking && modelSupportsThinking(options.model)) {
1605
+ if (
1606
+ !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING) &&
1607
+ modelSupportsAdaptiveThinking(options.model)
1608
+ ) {
1609
+ // For models that support adaptive thinking, always use adaptive
1610
+ // thinking without a budget.
1611
+ thinking = {
1612
+ type: 'adaptive',
1613
+ } satisfies BetaMessageStreamParams['thinking']
1614
+ } else {
1615
+ // For models that do not support adaptive thinking, use the default
1616
+ // thinking budget unless explicitly specified.
1617
+ let thinkingBudget = getMaxThinkingTokensForModel(options.model)
1618
+ if (
1619
+ thinkingConfig.type === 'enabled' &&
1620
+ thinkingConfig.budgetTokens !== undefined
1621
+ ) {
1622
+ thinkingBudget = thinkingConfig.budgetTokens
1623
+ }
1624
+ thinkingBudget = Math.min(maxOutputTokens - 1, thinkingBudget)
1625
+ thinking = {
1626
+ budget_tokens: thinkingBudget,
1627
+ type: 'enabled',
1628
+ } satisfies BetaMessageStreamParams['thinking']
1629
+ }
1630
+ }
1631
+
1632
+ // Get API context management strategies if enabled
1633
+ const contextManagement = getAPIContextManagement({
1634
+ hasThinking,
1635
+ isRedactThinkingActive: betasParams.includes(REDACT_THINKING_BETA_HEADER),
1636
+ clearAllThinking: thinkingClearLatched,
1637
+ })
1638
+
1639
+ const enablePromptCaching =
1640
+ options.enablePromptCaching ?? getPromptCachingEnabled(retryContext.model)
1641
+
1642
+ // Fast mode: header is latched session-stable (cache-safe), but
1643
+ // `speed='fast'` stays dynamic so cooldown still suppresses the actual
1644
+ // fast-mode request without changing the cache key.
1645
+ let speed: BetaMessageStreamParams['speed']
1646
+ const isFastModeForRetry =
1647
+ isFastModeEnabled() &&
1648
+ isFastModeAvailable() &&
1649
+ !isFastModeCooldown() &&
1650
+ isFastModeSupportedByModel(options.model) &&
1651
+ !!retryContext.fastMode
1652
+ if (isFastModeForRetry) {
1653
+ speed = 'fast'
1654
+ }
1655
+ if (fastModeHeaderLatched && !betasParams.includes(FAST_MODE_BETA_HEADER)) {
1656
+ betasParams.push(FAST_MODE_BETA_HEADER)
1657
+ }
1658
+
1659
+ // AFK mode beta: latched once auto mode is first activated. Still gated
1660
+ // by isAgenticQuery per-call so classifiers/compaction don't get it.
1661
+ if (feature('TRANSCRIPT_CLASSIFIER')) {
1662
+ if (
1663
+ afkHeaderLatched &&
1664
+ shouldIncludeFirstPartyOnlyBetas() &&
1665
+ isAgenticQuery &&
1666
+ !betasParams.includes(AFK_MODE_BETA_HEADER)
1667
+ ) {
1668
+ betasParams.push(AFK_MODE_BETA_HEADER)
1669
+ }
1670
+ }
1671
+
1672
+ // Cache editing beta: header is latched session-stable; useCachedMC
1673
+ // (controls cache_edits body behavior) stays live so edits stop when
1674
+ // the feature disables but the header doesn't flip.
1675
+ const useCachedMC =
1676
+ cachedMCEnabled &&
1677
+ getAPIProvider() === 'firstParty' &&
1678
+ options.querySource === 'repl_main_thread'
1679
+ if (
1680
+ cacheEditingHeaderLatched &&
1681
+ getAPIProvider() === 'firstParty' &&
1682
+ options.querySource === 'repl_main_thread' &&
1683
+ !betasParams.includes(cacheEditingBetaHeader)
1684
+ ) {
1685
+ betasParams.push(cacheEditingBetaHeader)
1686
+ logForDebugging(
1687
+ 'Cache editing beta header enabled for cached microcompact',
1688
+ )
1689
+ }
1690
+
1691
+ // Only send temperature when thinking is disabled — the API requires
1692
+ // temperature: 1 when thinking is enabled, which is already the default.
1693
+ const temperature = !hasThinking
1694
+ ? (options.temperatureOverride ?? 1)
1695
+ : undefined
1696
+
1697
+ lastRequestBetas = betasParams
1698
+
1699
+ return {
1700
+ model: normalizeModelStringForAPI(options.model),
1701
+ messages: addCacheBreakpoints(
1702
+ messagesForAPI,
1703
+ enablePromptCaching,
1704
+ options.querySource,
1705
+ useCachedMC,
1706
+ consumedCacheEdits,
1707
+ consumedPinnedEdits,
1708
+ options.skipCacheWrite,
1709
+ ),
1710
+ system,
1711
+ tools: allTools,
1712
+ tool_choice: options.toolChoice,
1713
+ ...(useBetas && { betas: betasParams }),
1714
+ metadata: getAPIMetadata(),
1715
+ max_tokens: maxOutputTokens,
1716
+ thinking,
1717
+ ...(temperature !== undefined && { temperature }),
1718
+ ...(contextManagement &&
1719
+ useBetas &&
1720
+ betasParams.includes(CONTEXT_MANAGEMENT_BETA_HEADER) && {
1721
+ context_management: contextManagement,
1722
+ }),
1723
+ ...extraBodyParams,
1724
+ ...(Object.keys(outputConfig).length > 0 && {
1725
+ output_config: outputConfig,
1726
+ }),
1727
+ ...(speed !== undefined && { speed }),
1728
+ }
1729
+ }
1730
+
1731
+ // Compute log scalars synchronously so the fire-and-forget .then() closure
1732
+ // captures only primitives instead of paramsFromContext's full closure scope
1733
+ // (messagesForAPI, system, allTools, betas — the entire request-building
1734
+ // context), which would otherwise be pinned until the promise resolves.
1735
+ {
1736
+ const queryParams = paramsFromContext({
1737
+ model: options.model,
1738
+ thinkingConfig,
1739
+ })
1740
+ const logMessagesLength = queryParams.messages.length
1741
+ const logBetas = useBetas ? (queryParams.betas ?? []) : []
1742
+ const logThinkingType = queryParams.thinking?.type ?? 'disabled'
1743
+ const logEffortValue = queryParams.output_config?.effort
1744
+ void options.getToolPermissionContext().then(permissionContext => {
1745
+ logAPIQuery({
1746
+ model: options.model,
1747
+ messagesLength: logMessagesLength,
1748
+ temperature: options.temperatureOverride ?? 1,
1749
+ betas: logBetas,
1750
+ permissionMode: permissionContext.mode,
1751
+ querySource: options.querySource,
1752
+ queryTracking: options.queryTracking,
1753
+ thinkingType: logThinkingType,
1754
+ effortValue: logEffortValue,
1755
+ fastMode: isFastMode,
1756
+ previousRequestId,
1757
+ })
1758
+ })
1759
+ }
1760
+
1761
+ const newMessages: AssistantMessage[] = []
1762
+ let ttftMs = 0
1763
+ let partialMessage: BetaMessage | undefined = undefined
1764
+ const contentBlocks: (BetaContentBlock | ConnectorTextBlock)[] = []
1765
+ let usage: NonNullableUsage = EMPTY_USAGE
1766
+ let costUSD = 0
1767
+ let stopReason: BetaStopReason | null = null
1768
+ let didFallBackToNonStreaming = false
1769
+ let fallbackMessage: AssistantMessage | undefined
1770
+ let maxOutputTokens = 0
1771
+ let responseHeaders: globalThis.Headers | undefined = undefined
1772
+ let research: unknown = undefined
1773
+ let isFastModeRequest = isFastMode // Keep separate state as it may change if falling back
1774
+ let isAdvisorInProgress = false
1775
+
1776
+ try {
1777
+ queryCheckpoint('query_client_creation_start')
1778
+ const generator = withRetry(
1779
+ () =>
1780
+ getAnthropicClient({
1781
+ maxRetries: 0, // Disabled auto-retry in favor of manual implementation
1782
+ model: options.model,
1783
+ fetchOverride: options.fetchOverride,
1784
+ source: options.querySource,
1785
+ }),
1786
+ async (anthropic, attempt, context) => {
1787
+ attemptNumber = attempt
1788
+ isFastModeRequest = context.fastMode ?? false
1789
+ start = Date.now()
1790
+ attemptStartTimes.push(start)
1791
+ // Client has been created by withRetry's getClient() call. This fires
1792
+ // once per attempt; on retries the client is usually cached (withRetry
1793
+ // only calls getClient() again after auth errors), so the delta from
1794
+ // client_creation_start is meaningful on attempt 1.
1795
+ queryCheckpoint('query_client_creation_end')
1796
+
1797
+ const params = paramsFromContext(context)
1798
+ captureAPIRequest(params, options.querySource) // Capture for bug reports
1799
+
1800
+ maxOutputTokens = params.max_tokens
1801
+
1802
+ // Fire immediately before the fetch is dispatched. .withResponse() below
1803
+ // awaits until response headers arrive, so this MUST be before the await
1804
+ // or the "Network TTFB" phase measurement is wrong.
1805
+ queryCheckpoint('query_api_request_sent')
1806
+ if (!options.agentId) {
1807
+ headlessProfilerCheckpoint('api_request_sent')
1808
+ }
1809
+
1810
+ // Generate and track client request ID so timeouts (which return no
1811
+ // server request ID) can still be correlated with server logs.
1812
+ // First-party only — 3P providers don't log it (inc-4029 class).
1813
+ clientRequestId =
1814
+ getAPIProvider() === 'firstParty' && isFirstPartyAnthropicBaseUrl()
1815
+ ? randomUUID()
1816
+ : undefined
1817
+
1818
+ // Use raw stream instead of BetaMessageStream to avoid O(n²) partial JSON parsing
1819
+ // BetaMessageStream calls partialParse() on every input_json_delta, which we don't need
1820
+ // since we handle tool input accumulation ourselves
1821
+ // biome-ignore lint/plugin: main conversation loop handles attribution separately
1822
+
1823
+ // Use stream() method which has withResponse() support
1824
+ const result = await anthropic.beta.messages
1825
+ .stream(
1826
+ params,
1827
+ {
1828
+ signal,
1829
+ ...(clientRequestId && {
1830
+ headers: { [CLIENT_REQUEST_ID_HEADER]: clientRequestId },
1831
+ }),
1832
+ },
1833
+ )
1834
+ .withResponse()
1835
+
1836
+ queryCheckpoint('query_response_headers_received')
1837
+ streamRequestId = result.request_id
1838
+ streamResponse = result.response
1839
+ return result.data
1840
+ },
1841
+ {
1842
+ model: options.model,
1843
+ fallbackModel: options.fallbackModel,
1844
+ thinkingConfig,
1845
+ ...(isFastModeEnabled() ? { fastMode: isFastMode } : false),
1846
+ signal,
1847
+ querySource: options.querySource,
1848
+ },
1849
+ )
1850
+
1851
+ let e
1852
+ do {
1853
+ e = await generator.next()
1854
+
1855
+ // yield API error messages (the stream has a 'controller' property, error messages don't)
1856
+ if (!('controller' in e.value)) {
1857
+ yield e.value
1858
+ }
1859
+ } while (!e.done)
1860
+ stream = e.value as Stream<BetaRawMessageStreamEvent>
1861
+
1862
+ // reset state
1863
+ newMessages.length = 0
1864
+ ttftMs = 0
1865
+ partialMessage = undefined
1866
+ contentBlocks.length = 0
1867
+ usage = EMPTY_USAGE
1868
+ stopReason = null
1869
+ isAdvisorInProgress = false
1870
+
1871
+ // Streaming idle timeout watchdog: abort the stream if no chunks arrive
1872
+ // for STREAM_IDLE_TIMEOUT_MS. Unlike the stall detection below (which only
1873
+ // fires when the *next* chunk arrives), this uses setTimeout to actively
1874
+ // kill hung streams. Without this, a silently dropped connection can hang
1875
+ // the session indefinitely since the SDK's request timeout only covers the
1876
+ // initial fetch(), not the streaming body.
1877
+ const streamWatchdogEnabled = isEnvTruthy(
1878
+ process.env.CLAUDE_ENABLE_STREAM_WATCHDOG,
1879
+ )
1880
+ const STREAM_IDLE_TIMEOUT_MS =
1881
+ parseInt(process.env.CLAUDE_STREAM_IDLE_TIMEOUT_MS || '', 10) || 90_000
1882
+ const STREAM_IDLE_WARNING_MS = STREAM_IDLE_TIMEOUT_MS / 2
1883
+ let streamIdleAborted = false
1884
+ // performance.now() snapshot when watchdog fires, for measuring abort propagation delay
1885
+ let streamWatchdogFiredAt: number | null = null
1886
+ let streamIdleWarningTimer: ReturnType<typeof setTimeout> | null = null
1887
+ let streamIdleTimer: ReturnType<typeof setTimeout> | null = null
1888
+ function clearStreamIdleTimers(): void {
1889
+ if (streamIdleWarningTimer !== null) {
1890
+ clearTimeout(streamIdleWarningTimer)
1891
+ streamIdleWarningTimer = null
1892
+ }
1893
+ if (streamIdleTimer !== null) {
1894
+ clearTimeout(streamIdleTimer)
1895
+ streamIdleTimer = null
1896
+ }
1897
+ }
1898
+ function resetStreamIdleTimer(): void {
1899
+ clearStreamIdleTimers()
1900
+ if (!streamWatchdogEnabled) {
1901
+ return
1902
+ }
1903
+ streamIdleWarningTimer = setTimeout(
1904
+ warnMs => {
1905
+ logForDebugging(
1906
+ `Streaming idle warning: no chunks received for ${warnMs / 1000}s`,
1907
+ { level: 'warn' },
1908
+ )
1909
+ logForDiagnosticsNoPII('warn', 'cli_streaming_idle_warning')
1910
+ },
1911
+ STREAM_IDLE_WARNING_MS,
1912
+ STREAM_IDLE_WARNING_MS,
1913
+ )
1914
+ streamIdleTimer = setTimeout(() => {
1915
+ streamIdleAborted = true
1916
+ streamWatchdogFiredAt = performance.now()
1917
+ logForDebugging(
1918
+ `Streaming idle timeout: no chunks received for ${STREAM_IDLE_TIMEOUT_MS / 1000}s, aborting stream`,
1919
+ { level: 'error' },
1920
+ )
1921
+ logForDiagnosticsNoPII('error', 'cli_streaming_idle_timeout')
1922
+ logEvent('tengu_streaming_idle_timeout', {
1923
+ model:
1924
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1925
+ request_id: (streamRequestId ??
1926
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1927
+ timeout_ms: STREAM_IDLE_TIMEOUT_MS,
1928
+ })
1929
+ releaseStreamResources()
1930
+ }, STREAM_IDLE_TIMEOUT_MS)
1931
+ }
1932
+ resetStreamIdleTimer()
1933
+
1934
+ startSessionActivity('api_call')
1935
+ try {
1936
+ // stream in and accumulate state
1937
+ let isFirstChunk = true
1938
+ let lastEventTime: number | null = null // Set after first chunk to avoid measuring TTFB as a stall
1939
+ const STALL_THRESHOLD_MS = 30_000 // 30 seconds
1940
+ let totalStallTime = 0
1941
+ let stallCount = 0
1942
+
1943
+ for await (const part of stream) {
1944
+ resetStreamIdleTimer()
1945
+ const now = Date.now()
1946
+
1947
+ // Detect and log streaming stalls (only after first event to avoid counting TTFB)
1948
+ if (lastEventTime !== null) {
1949
+ const timeSinceLastEvent = now - lastEventTime
1950
+ if (timeSinceLastEvent > STALL_THRESHOLD_MS) {
1951
+ stallCount++
1952
+ totalStallTime += timeSinceLastEvent
1953
+ logForDebugging(
1954
+ `Streaming stall detected: ${(timeSinceLastEvent / 1000).toFixed(1)}s gap between events (stall #${stallCount})`,
1955
+ { level: 'warn' },
1956
+ )
1957
+ logEvent('tengu_streaming_stall', {
1958
+ stall_duration_ms: timeSinceLastEvent,
1959
+ stall_count: stallCount,
1960
+ total_stall_time_ms: totalStallTime,
1961
+ event_type:
1962
+ part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1963
+ model:
1964
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1965
+ request_id: (streamRequestId ??
1966
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1967
+ })
1968
+ }
1969
+ }
1970
+ lastEventTime = now
1971
+
1972
+ if (isFirstChunk) {
1973
+ logForDebugging('Stream started - received first chunk')
1974
+ queryCheckpoint('query_first_chunk_received')
1975
+ if (!options.agentId) {
1976
+ headlessProfilerCheckpoint('first_chunk')
1977
+ }
1978
+ endQueryProfile()
1979
+ isFirstChunk = false
1980
+ }
1981
+
1982
+ switch (part.type) {
1983
+ case 'message_start': {
1984
+ partialMessage = part.message
1985
+ ttftMs = Date.now() - start
1986
+ usage = updateUsage(usage, part.message?.usage)
1987
+ // Capture research from message_start if available (internal only).
1988
+ // Always overwrite with the latest value.
1989
+ if (
1990
+ process.env.USER_TYPE === 'ant' &&
1991
+ 'research' in (part.message as unknown as Record<string, unknown>)
1992
+ ) {
1993
+ research = (part.message as unknown as Record<string, unknown>)
1994
+ .research
1995
+ }
1996
+ break
1997
+ }
1998
+ case 'content_block_start':
1999
+ switch (part.content_block.type) {
2000
+ case 'tool_use':
2001
+ contentBlocks[part.index] = {
2002
+ ...part.content_block,
2003
+ input: '',
2004
+ }
2005
+ break
2006
+ case 'server_tool_use':
2007
+ contentBlocks[part.index] = {
2008
+ ...part.content_block,
2009
+ input: '' as unknown as { [key: string]: unknown },
2010
+ }
2011
+ if ((part.content_block.name as string) === 'advisor') {
2012
+ isAdvisorInProgress = true
2013
+ logForDebugging(`[AdvisorTool] Advisor tool called`)
2014
+ logEvent('tengu_advisor_tool_call', {
2015
+ model:
2016
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2017
+ advisor_model: (advisorModel ??
2018
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2019
+ })
2020
+ }
2021
+ break
2022
+ case 'text':
2023
+ contentBlocks[part.index] = {
2024
+ ...part.content_block,
2025
+ // awkwardly, the sdk sometimes returns text as part of a
2026
+ // content_block_start message, then returns the same text
2027
+ // again in a content_block_delta message. we ignore it here
2028
+ // since there doesn't seem to be a way to detect when a
2029
+ // content_block_delta message duplicates the text.
2030
+ text: '',
2031
+ }
2032
+ break
2033
+ case 'thinking':
2034
+ contentBlocks[part.index] = {
2035
+ ...part.content_block,
2036
+ // also awkward
2037
+ thinking: '',
2038
+ // initialize signature to ensure field exists even if signature_delta never arrives
2039
+ signature: '',
2040
+ }
2041
+ break
2042
+ default:
2043
+ // even more awkwardly, the sdk mutates the contents of text blocks
2044
+ // as it works. we want the blocks to be immutable, so that we can
2045
+ // accumulate state ourselves.
2046
+ contentBlocks[part.index] = { ...part.content_block }
2047
+ if (
2048
+ (part.content_block.type as string) === 'advisor_tool_result'
2049
+ ) {
2050
+ isAdvisorInProgress = false
2051
+ logForDebugging(`[AdvisorTool] Advisor tool result received`)
2052
+ }
2053
+ break
2054
+ }
2055
+ break
2056
+ case 'content_block_delta': {
2057
+ const contentBlock = contentBlocks[part.index]
2058
+ const delta = part.delta as typeof part.delta | ConnectorTextDelta
2059
+ if (!contentBlock) {
2060
+ logEvent('tengu_streaming_error', {
2061
+ error_type:
2062
+ 'content_block_not_found_delta' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2063
+ part_type:
2064
+ part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2065
+ part_index: part.index,
2066
+ })
2067
+ throw new RangeError('Content block not found')
2068
+ }
2069
+ if (
2070
+ feature('CONNECTOR_TEXT') &&
2071
+ delta.type === 'connector_text_delta'
2072
+ ) {
2073
+ if (contentBlock.type !== 'connector_text') {
2074
+ logEvent('tengu_streaming_error', {
2075
+ error_type:
2076
+ 'content_block_type_mismatch_connector_text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2077
+ expected_type:
2078
+ 'connector_text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2079
+ actual_type:
2080
+ contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2081
+ })
2082
+ throw new Error('Content block is not a connector_text block')
2083
+ }
2084
+ contentBlock.connector_text += delta.connector_text
2085
+ } else {
2086
+ switch (delta.type) {
2087
+ case 'citations_delta':
2088
+ // TODO: handle citations
2089
+ break
2090
+ case 'input_json_delta':
2091
+ if (
2092
+ contentBlock.type !== 'tool_use' &&
2093
+ contentBlock.type !== 'server_tool_use'
2094
+ ) {
2095
+ logEvent('tengu_streaming_error', {
2096
+ error_type:
2097
+ 'content_block_type_mismatch_input_json' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2098
+ expected_type:
2099
+ 'tool_use' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2100
+ actual_type:
2101
+ contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2102
+ })
2103
+ throw new Error('Content block is not a input_json block')
2104
+ }
2105
+ if (typeof contentBlock.input !== 'string') {
2106
+ logEvent('tengu_streaming_error', {
2107
+ error_type:
2108
+ 'content_block_input_not_string' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2109
+ input_type:
2110
+ typeof contentBlock.input as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2111
+ })
2112
+ throw new Error('Content block input is not a string')
2113
+ }
2114
+ contentBlock.input += delta.partial_json
2115
+ break
2116
+ case 'text_delta':
2117
+ if (contentBlock.type !== 'text') {
2118
+ logEvent('tengu_streaming_error', {
2119
+ error_type:
2120
+ 'content_block_type_mismatch_text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2121
+ expected_type:
2122
+ 'text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2123
+ actual_type:
2124
+ contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2125
+ })
2126
+ throw new Error('Content block is not a text block')
2127
+ }
2128
+ contentBlock.text += delta.text
2129
+ break
2130
+ case 'signature_delta':
2131
+ if (
2132
+ feature('CONNECTOR_TEXT') &&
2133
+ contentBlock.type === 'connector_text'
2134
+ ) {
2135
+ contentBlock.signature = delta.signature
2136
+ break
2137
+ }
2138
+ if (contentBlock.type !== 'thinking') {
2139
+ logEvent('tengu_streaming_error', {
2140
+ error_type:
2141
+ 'content_block_type_mismatch_thinking_signature' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2142
+ expected_type:
2143
+ 'thinking' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2144
+ actual_type:
2145
+ contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2146
+ })
2147
+ throw new Error('Content block is not a thinking block')
2148
+ }
2149
+ contentBlock.signature = delta.signature
2150
+ break
2151
+ case 'thinking_delta':
2152
+ if (contentBlock.type !== 'thinking') {
2153
+ logEvent('tengu_streaming_error', {
2154
+ error_type:
2155
+ 'content_block_type_mismatch_thinking_delta' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2156
+ expected_type:
2157
+ 'thinking' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2158
+ actual_type:
2159
+ contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2160
+ })
2161
+ throw new Error('Content block is not a thinking block')
2162
+ }
2163
+ contentBlock.thinking += delta.thinking
2164
+ break
2165
+ }
2166
+ }
2167
+ // Capture research from content_block_delta if available (internal only).
2168
+ // Always overwrite with the latest value.
2169
+ if (process.env.USER_TYPE === 'ant' && 'research' in part) {
2170
+ research = (part as { research: unknown }).research
2171
+ }
2172
+ break
2173
+ }
2174
+ case 'content_block_stop': {
2175
+ const contentBlock = contentBlocks[part.index]
2176
+ if (!contentBlock) {
2177
+ logEvent('tengu_streaming_error', {
2178
+ error_type:
2179
+ 'content_block_not_found_stop' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2180
+ part_type:
2181
+ part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2182
+ part_index: part.index,
2183
+ })
2184
+ throw new RangeError('Content block not found')
2185
+ }
2186
+ if (!partialMessage) {
2187
+ logEvent('tengu_streaming_error', {
2188
+ error_type:
2189
+ 'partial_message_not_found' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2190
+ part_type:
2191
+ part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2192
+ })
2193
+ throw new Error('Message not found')
2194
+ }
2195
+ const m: AssistantMessage = {
2196
+ message: {
2197
+ ...partialMessage,
2198
+ content: normalizeContentFromAPI(
2199
+ [contentBlock] as BetaContentBlock[],
2200
+ tools,
2201
+ options.agentId,
2202
+ ),
2203
+ },
2204
+ requestId: streamRequestId ?? undefined,
2205
+ type: 'assistant',
2206
+ uuid: randomUUID(),
2207
+ timestamp: new Date().toISOString(),
2208
+ ...(process.env.USER_TYPE === 'ant' &&
2209
+ research !== undefined && { research }),
2210
+ ...(advisorModel && { advisorModel }),
2211
+ }
2212
+ newMessages.push(m)
2213
+ yield m
2214
+ break
2215
+ }
2216
+ case 'message_delta': {
2217
+ usage = updateUsage(usage, part.usage)
2218
+ // Capture research from message_delta if available (internal only).
2219
+ // Always overwrite with the latest value. Also write back to
2220
+ // already-yielded messages since message_delta arrives after
2221
+ // content_block_stop.
2222
+ if (
2223
+ process.env.USER_TYPE === 'ant' &&
2224
+ 'research' in (part as unknown as Record<string, unknown>)
2225
+ ) {
2226
+ research = (part as unknown as Record<string, unknown>).research
2227
+ for (const msg of newMessages) {
2228
+ msg.research = research
2229
+ }
2230
+ }
2231
+
2232
+ // Write final usage and stop_reason back to the last yielded
2233
+ // message. Messages are created at content_block_stop from
2234
+ // partialMessage, which was set at message_start before any tokens
2235
+ // were generated (output_tokens: 0, stop_reason: null).
2236
+ // message_delta arrives after content_block_stop with the real
2237
+ // values.
2238
+ //
2239
+ // IMPORTANT: Use direct property mutation, not object replacement.
2240
+ // The transcript write queue holds a reference to message.message
2241
+ // and serializes it lazily (100ms flush interval). Object
2242
+ // replacement ({ ...lastMsg.message, usage }) would disconnect
2243
+ // the queued reference; direct mutation ensures the transcript
2244
+ // captures the final values.
2245
+ stopReason = part.delta.stop_reason
2246
+
2247
+ const lastMsg = newMessages.at(-1)
2248
+ if (lastMsg) {
2249
+ lastMsg.message.usage = usage
2250
+ lastMsg.message.stop_reason = stopReason
2251
+ }
2252
+
2253
+ // Update cost
2254
+ const costUSDForPart = calculateUSDCost(resolvedModel, usage)
2255
+ costUSD += addToTotalSessionCost(
2256
+ costUSDForPart,
2257
+ usage,
2258
+ options.model,
2259
+ )
2260
+
2261
+ const refusalMessage = getErrorMessageIfRefusal(
2262
+ part.delta.stop_reason,
2263
+ options.model,
2264
+ )
2265
+ if (refusalMessage) {
2266
+ yield refusalMessage
2267
+ }
2268
+
2269
+ if (stopReason === 'max_tokens') {
2270
+ logEvent('tengu_max_tokens_reached', {
2271
+ max_tokens: maxOutputTokens,
2272
+ })
2273
+ yield createAssistantAPIErrorMessage({
2274
+ content: `${API_ERROR_MESSAGE_PREFIX}: Claude's response exceeded the ${
2275
+ maxOutputTokens
2276
+ } output token maximum. To configure this behavior, set the CLAUDE_CODE_MAX_OUTPUT_TOKENS environment variable.`,
2277
+ apiError: 'max_output_tokens',
2278
+ error: 'max_output_tokens',
2279
+ })
2280
+ }
2281
+
2282
+ if (stopReason === 'model_context_window_exceeded') {
2283
+ logEvent('tengu_context_window_exceeded', {
2284
+ max_tokens: maxOutputTokens,
2285
+ output_tokens: usage.output_tokens,
2286
+ })
2287
+ // Reuse the max_output_tokens recovery path — from the model's
2288
+ // perspective, both mean "response was cut off, continue from
2289
+ // where you left off."
2290
+ yield createAssistantAPIErrorMessage({
2291
+ content: `${API_ERROR_MESSAGE_PREFIX}: The model has reached its context window limit.`,
2292
+ apiError: 'max_output_tokens',
2293
+ error: 'max_output_tokens',
2294
+ })
2295
+ }
2296
+ break
2297
+ }
2298
+ case 'message_stop':
2299
+ break
2300
+ }
2301
+
2302
+ yield {
2303
+ type: 'stream_event',
2304
+ event: part,
2305
+ ...(part.type === 'message_start' ? { ttftMs } : undefined),
2306
+ }
2307
+ }
2308
+ // Clear the idle timeout watchdog now that the stream loop has exited
2309
+ clearStreamIdleTimers()
2310
+
2311
+ // If the stream was aborted by our idle timeout watchdog, fall back to
2312
+ // non-streaming retry rather than treating it as a completed stream.
2313
+ if (streamIdleAborted) {
2314
+ // Instrumentation: proves the for-await exited after the watchdog fired
2315
+ // (vs. hung forever). exit_delay_ms measures abort propagation latency:
2316
+ // 0-10ms = abort worked; >>1000ms = something else woke the loop.
2317
+ const exitDelayMs =
2318
+ streamWatchdogFiredAt !== null
2319
+ ? Math.round(performance.now() - streamWatchdogFiredAt)
2320
+ : -1
2321
+ logForDiagnosticsNoPII(
2322
+ 'info',
2323
+ 'cli_stream_loop_exited_after_watchdog_clean',
2324
+ )
2325
+ logEvent('tengu_stream_loop_exited_after_watchdog', {
2326
+ request_id: (streamRequestId ??
2327
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2328
+ exit_delay_ms: exitDelayMs,
2329
+ exit_path:
2330
+ 'clean' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2331
+ model:
2332
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2333
+ })
2334
+ // Prevent double-emit: this throw lands in the catch block below,
2335
+ // whose exit_path='error' probe guards on streamWatchdogFiredAt.
2336
+ streamWatchdogFiredAt = null
2337
+ throw new Error('Stream idle timeout - no chunks received')
2338
+ }
2339
+
2340
+ // Detect when the stream completed without producing any assistant messages.
2341
+ // This covers two proxy failure modes:
2342
+ // 1. No events at all (!partialMessage): proxy returned 200 with non-SSE body
2343
+ // 2. Partial events (partialMessage set but no content blocks completed AND
2344
+ // no stop_reason received): proxy returned message_start but stream ended
2345
+ // before content_block_stop and before message_delta with stop_reason
2346
+ // BetaMessageStream had the first check in _endRequest() but the raw Stream
2347
+ // does not - without it the generator silently returns no assistant messages,
2348
+ // causing "Execution error" in -p mode.
2349
+ // Note: We must check stopReason to avoid false positives. For example, with
2350
+ // structured output (--json-schema), the model calls a StructuredOutput tool
2351
+ // on turn 1, then on turn 2 responds with end_turn and no content blocks.
2352
+ // That's a legitimate empty response, not an incomplete stream.
2353
+ if (!partialMessage || (newMessages.length === 0 && !stopReason)) {
2354
+ logForDebugging(
2355
+ !partialMessage
2356
+ ? 'Stream completed without receiving message_start event - triggering non-streaming fallback'
2357
+ : 'Stream completed with message_start but no content blocks completed - triggering non-streaming fallback',
2358
+ { level: 'error' },
2359
+ )
2360
+ logEvent('tengu_stream_no_events', {
2361
+ model:
2362
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2363
+ request_id: (streamRequestId ??
2364
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2365
+ })
2366
+ throw new Error('Stream ended without receiving any events')
2367
+ }
2368
+
2369
+ // Log summary if any stalls occurred during streaming
2370
+ if (stallCount > 0) {
2371
+ logForDebugging(
2372
+ `Streaming completed with ${stallCount} stall(s), total stall time: ${(totalStallTime / 1000).toFixed(1)}s`,
2373
+ { level: 'warn' },
2374
+ )
2375
+ logEvent('tengu_streaming_stall_summary', {
2376
+ stall_count: stallCount,
2377
+ total_stall_time_ms: totalStallTime,
2378
+ model:
2379
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2380
+ request_id: (streamRequestId ??
2381
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2382
+ })
2383
+ }
2384
+
2385
+ // Check if the cache actually broke based on response tokens
2386
+ if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
2387
+ void checkResponseForCacheBreak(
2388
+ options.querySource,
2389
+ usage.cache_read_input_tokens,
2390
+ usage.cache_creation_input_tokens,
2391
+ messages,
2392
+ options.agentId,
2393
+ streamRequestId,
2394
+ )
2395
+ }
2396
+
2397
+ // Process fallback percentage header and quota status if available
2398
+ // streamResponse is set when the stream is created in the withRetry callback above
2399
+ // TypeScript's control flow analysis can't track that streamResponse is set in the callback
2400
+ // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
2401
+ const resp = streamResponse as unknown as Response | undefined
2402
+ if (resp) {
2403
+ extractQuotaStatusFromHeaders(resp.headers)
2404
+ // Store headers for gateway detection
2405
+ responseHeaders = resp.headers
2406
+ }
2407
+ } catch (streamingError) {
2408
+ // Clear the idle timeout watchdog on error path too
2409
+ clearStreamIdleTimers()
2410
+
2411
+ // Instrumentation: if the watchdog had already fired and the for-await
2412
+ // threw (rather than exiting cleanly), record that the loop DID exit and
2413
+ // how long after the watchdog. Distinguishes true hangs from error exits.
2414
+ if (streamIdleAborted && streamWatchdogFiredAt !== null) {
2415
+ const exitDelayMs = Math.round(
2416
+ performance.now() - streamWatchdogFiredAt,
2417
+ )
2418
+ logForDiagnosticsNoPII(
2419
+ 'info',
2420
+ 'cli_stream_loop_exited_after_watchdog_error',
2421
+ )
2422
+ logEvent('tengu_stream_loop_exited_after_watchdog', {
2423
+ request_id: (streamRequestId ??
2424
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2425
+ exit_delay_ms: exitDelayMs,
2426
+ exit_path:
2427
+ 'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2428
+ error_name:
2429
+ streamingError instanceof Error
2430
+ ? (streamingError.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2431
+ : ('unknown' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
2432
+ model:
2433
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2434
+ })
2435
+ }
2436
+
2437
+ if (streamingError instanceof APIUserAbortError) {
2438
+ // Check if the abort signal was triggered by the user (ESC key)
2439
+ // If the signal is aborted, it's a user-initiated abort
2440
+ // If not, it's likely a timeout from the SDK
2441
+ if (signal.aborted) {
2442
+ // This is a real user abort (ESC key was pressed)
2443
+ logForDebugging(
2444
+ `Streaming aborted by user: ${errorMessage(streamingError)}`,
2445
+ )
2446
+ if (isAdvisorInProgress) {
2447
+ logEvent('tengu_advisor_tool_interrupted', {
2448
+ model:
2449
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2450
+ advisor_model: (advisorModel ??
2451
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2452
+ })
2453
+ }
2454
+ throw streamingError
2455
+ } else {
2456
+ // The SDK threw APIUserAbortError but our signal wasn't aborted
2457
+ // This means it's a timeout from the SDK's internal timeout
2458
+ logForDebugging(
2459
+ `Streaming timeout (SDK abort): ${streamingError.message}`,
2460
+ { level: 'error' },
2461
+ )
2462
+ // Throw a more specific error for timeout
2463
+ throw new APIConnectionTimeoutError({ message: 'Request timed out' })
2464
+ }
2465
+ }
2466
+
2467
+ // When the flag is enabled, skip the non-streaming fallback and let the
2468
+ // error propagate to withRetry. The mid-stream fallback causes double tool
2469
+ // execution when streaming tool execution is active: the partial stream
2470
+ // starts a tool, then the non-streaming retry produces the same tool_use
2471
+ // and runs it again. See inc-4258.
2472
+ const disableFallback =
2473
+ isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_NONSTREAMING_FALLBACK) ||
2474
+ getFeatureValue_CACHED_MAY_BE_STALE(
2475
+ 'tengu_disable_streaming_to_non_streaming_fallback',
2476
+ false,
2477
+ )
2478
+
2479
+ if (disableFallback) {
2480
+ logForDebugging(
2481
+ `Error streaming (non-streaming fallback disabled): ${errorMessage(streamingError)}`,
2482
+ { level: 'error' },
2483
+ )
2484
+ logEvent('tengu_streaming_fallback_to_non_streaming', {
2485
+ model:
2486
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2487
+ error:
2488
+ streamingError instanceof Error
2489
+ ? (streamingError.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2490
+ : (String(
2491
+ streamingError,
2492
+ ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
2493
+ attemptNumber,
2494
+ maxOutputTokens,
2495
+ thinkingType:
2496
+ thinkingConfig.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2497
+ fallback_disabled: true,
2498
+ request_id: (streamRequestId ??
2499
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2500
+ fallback_cause: (streamIdleAborted
2501
+ ? 'watchdog'
2502
+ : 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2503
+ })
2504
+ throw streamingError
2505
+ }
2506
+
2507
+ logForDebugging(
2508
+ `Error streaming, falling back to non-streaming mode: ${errorMessage(streamingError)}`,
2509
+ { level: 'error' },
2510
+ )
2511
+ didFallBackToNonStreaming = true
2512
+ if (options.onStreamingFallback) {
2513
+ options.onStreamingFallback()
2514
+ }
2515
+
2516
+ logEvent('tengu_streaming_fallback_to_non_streaming', {
2517
+ model:
2518
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2519
+ error:
2520
+ streamingError instanceof Error
2521
+ ? (streamingError.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2522
+ : (String(
2523
+ streamingError,
2524
+ ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
2525
+ attemptNumber,
2526
+ maxOutputTokens,
2527
+ thinkingType:
2528
+ thinkingConfig.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2529
+ fallback_disabled: false,
2530
+ request_id: (streamRequestId ??
2531
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2532
+ fallback_cause: (streamIdleAborted
2533
+ ? 'watchdog'
2534
+ : 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2535
+ })
2536
+
2537
+ // Fall back to non-streaming mode with retries.
2538
+ // If the streaming failure was itself a 529, count it toward the
2539
+ // consecutive-529 budget so total 529s-before-model-fallback is the
2540
+ // same whether the overload was hit in streaming or non-streaming mode.
2541
+ // This is a speculative fix for https://github.com/anthropics/claude-code/issues/1513
2542
+ // Instrumentation: proves executeNonStreamingRequest was entered (vs. the
2543
+ // fallback event firing but the call itself hanging at dispatch).
2544
+ logForDiagnosticsNoPII('info', 'cli_nonstreaming_fallback_started')
2545
+ logEvent('tengu_nonstreaming_fallback_started', {
2546
+ request_id: (streamRequestId ??
2547
+ 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2548
+ model:
2549
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2550
+ fallback_cause: (streamIdleAborted
2551
+ ? 'watchdog'
2552
+ : 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2553
+ })
2554
+ const result = yield* executeNonStreamingRequest(
2555
+ { model: options.model, source: options.querySource },
2556
+ {
2557
+ model: options.model,
2558
+ fallbackModel: options.fallbackModel,
2559
+ thinkingConfig,
2560
+ ...(isFastModeEnabled() && { fastMode: isFastMode }),
2561
+ signal,
2562
+ initialConsecutive529Errors: is529Error(streamingError) ? 1 : 0,
2563
+ querySource: options.querySource,
2564
+ },
2565
+ paramsFromContext,
2566
+ (attempt, _startTime, tokens) => {
2567
+ attemptNumber = attempt
2568
+ maxOutputTokens = tokens
2569
+ },
2570
+ params => captureAPIRequest(params, options.querySource),
2571
+ streamRequestId,
2572
+ )
2573
+
2574
+ const m: AssistantMessage = {
2575
+ message: {
2576
+ ...result,
2577
+ content: normalizeContentFromAPI(
2578
+ result.content,
2579
+ tools,
2580
+ options.agentId,
2581
+ ),
2582
+ },
2583
+ requestId: streamRequestId ?? undefined,
2584
+ type: 'assistant',
2585
+ uuid: randomUUID(),
2586
+ timestamp: new Date().toISOString(),
2587
+ ...(process.env.USER_TYPE === 'ant' &&
2588
+ research !== undefined && {
2589
+ research,
2590
+ }),
2591
+ ...(advisorModel && {
2592
+ advisorModel,
2593
+ }),
2594
+ }
2595
+ newMessages.push(m)
2596
+ fallbackMessage = m
2597
+ yield m
2598
+ } finally {
2599
+ clearStreamIdleTimers()
2600
+ }
2601
+ } catch (errorFromRetry) {
2602
+ // FallbackTriggeredError must propagate to query.ts, which performs the
2603
+ // actual model switch. Swallowing it here would turn the fallback into a
2604
+ // no-op — the user would just see "Model fallback triggered: X -> Y" as
2605
+ // an error message with no actual retry on the fallback model.
2606
+ if (errorFromRetry instanceof FallbackTriggeredError) {
2607
+ throw errorFromRetry
2608
+ }
2609
+
2610
+ // Check if this is a 404 error during stream creation that should trigger
2611
+ // non-streaming fallback. This handles gateways that return 404 for streaming
2612
+ // endpoints but work fine with non-streaming. Before v2.1.8, BetaMessageStream
2613
+ // threw 404s during iteration (caught by inner catch with fallback), but now
2614
+ // with raw streams, 404s are thrown during creation (caught here).
2615
+ const is404StreamCreationError =
2616
+ !didFallBackToNonStreaming &&
2617
+ errorFromRetry instanceof CannotRetryError &&
2618
+ errorFromRetry.originalError instanceof APIError &&
2619
+ errorFromRetry.originalError.status === 404
2620
+
2621
+ if (is404StreamCreationError) {
2622
+ // 404 is thrown at .withResponse() before streamRequestId is assigned,
2623
+ // and CannotRetryError means every retry failed — so grab the failed
2624
+ // request's ID from the error header instead.
2625
+ const failedRequestId =
2626
+ (errorFromRetry.originalError as APIError).requestID ?? 'unknown'
2627
+ logForDebugging(
2628
+ 'Streaming endpoint returned 404, falling back to non-streaming mode',
2629
+ { level: 'warn' },
2630
+ )
2631
+ didFallBackToNonStreaming = true
2632
+ if (options.onStreamingFallback) {
2633
+ options.onStreamingFallback()
2634
+ }
2635
+
2636
+ logEvent('tengu_streaming_fallback_to_non_streaming', {
2637
+ model:
2638
+ options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2639
+ error:
2640
+ '404_stream_creation' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2641
+ attemptNumber,
2642
+ maxOutputTokens,
2643
+ thinkingType:
2644
+ thinkingConfig.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2645
+ request_id:
2646
+ failedRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2647
+ fallback_cause:
2648
+ '404_stream_creation' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2649
+ })
2650
+
2651
+ try {
2652
+ // Fall back to non-streaming mode
2653
+ const result = yield* executeNonStreamingRequest(
2654
+ { model: options.model, source: options.querySource },
2655
+ {
2656
+ model: options.model,
2657
+ fallbackModel: options.fallbackModel,
2658
+ thinkingConfig,
2659
+ ...(isFastModeEnabled() && { fastMode: isFastMode }),
2660
+ signal,
2661
+ },
2662
+ paramsFromContext,
2663
+ (attempt, _startTime, tokens) => {
2664
+ attemptNumber = attempt
2665
+ maxOutputTokens = tokens
2666
+ },
2667
+ params => captureAPIRequest(params, options.querySource),
2668
+ failedRequestId,
2669
+ )
2670
+
2671
+ const m: AssistantMessage = {
2672
+ message: {
2673
+ ...result,
2674
+ content: normalizeContentFromAPI(
2675
+ result.content,
2676
+ tools,
2677
+ options.agentId,
2678
+ ),
2679
+ },
2680
+ requestId: streamRequestId ?? undefined,
2681
+ type: 'assistant',
2682
+ uuid: randomUUID(),
2683
+ timestamp: new Date().toISOString(),
2684
+ ...(process.env.USER_TYPE === 'ant' &&
2685
+ research !== undefined && { research }),
2686
+ ...(advisorModel && { advisorModel }),
2687
+ }
2688
+ newMessages.push(m)
2689
+ fallbackMessage = m
2690
+ yield m
2691
+
2692
+ // Continue to success logging below
2693
+ } catch (fallbackError) {
2694
+ // Propagate model-fallback signal to query.ts (see comment above).
2695
+ if (fallbackError instanceof FallbackTriggeredError) {
2696
+ throw fallbackError
2697
+ }
2698
+
2699
+ // Fallback also failed, handle as normal error
2700
+ logForDebugging(
2701
+ `Non-streaming fallback also failed: ${errorMessage(fallbackError)}`,
2702
+ { level: 'error' },
2703
+ )
2704
+
2705
+ let error = fallbackError
2706
+ let errorModel = options.model
2707
+ if (fallbackError instanceof CannotRetryError) {
2708
+ error = fallbackError.originalError
2709
+ errorModel = fallbackError.retryContext.model
2710
+ }
2711
+
2712
+ if (error instanceof APIError) {
2713
+ extractQuotaStatusFromError(error)
2714
+ }
2715
+
2716
+ const requestId =
2717
+ streamRequestId ||
2718
+ (error instanceof APIError ? error.requestID : undefined) ||
2719
+ (error instanceof APIError
2720
+ ? (error.error as { request_id?: string })?.request_id
2721
+ : undefined)
2722
+
2723
+ logAPIError({
2724
+ error,
2725
+ model: errorModel,
2726
+ messageCount: messagesForAPI.length,
2727
+ messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
2728
+ durationMs: Date.now() - start,
2729
+ durationMsIncludingRetries: Date.now() - startIncludingRetries,
2730
+ attempt: attemptNumber,
2731
+ requestId,
2732
+ clientRequestId,
2733
+ didFallBackToNonStreaming,
2734
+ queryTracking: options.queryTracking,
2735
+ querySource: options.querySource,
2736
+ llmSpan,
2737
+ fastMode: isFastModeRequest,
2738
+ previousRequestId,
2739
+ })
2740
+
2741
+ if (error instanceof APIUserAbortError) {
2742
+ releaseStreamResources()
2743
+ return
2744
+ }
2745
+
2746
+ yield getAssistantMessageFromError(error, errorModel, {
2747
+ messages,
2748
+ messagesForAPI,
2749
+ })
2750
+ releaseStreamResources()
2751
+ return
2752
+ }
2753
+ } else {
2754
+ // Original error handling for non-404 errors
2755
+ logForDebugging(`Error in API request: ${errorMessage(errorFromRetry)}`, {
2756
+ level: 'error',
2757
+ })
2758
+
2759
+ let error = errorFromRetry
2760
+ let errorModel = options.model
2761
+ if (errorFromRetry instanceof CannotRetryError) {
2762
+ error = errorFromRetry.originalError
2763
+ errorModel = errorFromRetry.retryContext.model
2764
+ }
2765
+
2766
+ // Extract quota status from error headers if it's a rate limit error
2767
+ if (error instanceof APIError) {
2768
+ extractQuotaStatusFromError(error)
2769
+ }
2770
+
2771
+ // Extract requestId from stream, error header, or error body
2772
+ const requestId =
2773
+ streamRequestId ||
2774
+ (error instanceof APIError ? error.requestID : undefined) ||
2775
+ (error instanceof APIError
2776
+ ? (error.error as { request_id?: string })?.request_id
2777
+ : undefined)
2778
+
2779
+ logAPIError({
2780
+ error,
2781
+ model: errorModel,
2782
+ messageCount: messagesForAPI.length,
2783
+ messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
2784
+ durationMs: Date.now() - start,
2785
+ durationMsIncludingRetries: Date.now() - startIncludingRetries,
2786
+ attempt: attemptNumber,
2787
+ requestId,
2788
+ clientRequestId,
2789
+ didFallBackToNonStreaming,
2790
+ queryTracking: options.queryTracking,
2791
+ querySource: options.querySource,
2792
+ llmSpan,
2793
+ fastMode: isFastModeRequest,
2794
+ previousRequestId,
2795
+ })
2796
+
2797
+ // Don't yield an assistant error message for user aborts
2798
+ // The interruption message is handled in query.ts
2799
+ if (error instanceof APIUserAbortError) {
2800
+ releaseStreamResources()
2801
+ return
2802
+ }
2803
+
2804
+ yield getAssistantMessageFromError(error, errorModel, {
2805
+ messages,
2806
+ messagesForAPI,
2807
+ })
2808
+ releaseStreamResources()
2809
+ return
2810
+ }
2811
+ } finally {
2812
+ stopSessionActivity('api_call')
2813
+ // Must be in the finally block: if the generator is terminated early
2814
+ // via .return() (e.g. consumer breaks out of for-await-of, or query.ts
2815
+ // encounters an abort), code after the try/finally never executes.
2816
+ // Without this, the Response object's native TLS/socket buffers leak
2817
+ // until the generator itself is GC'd (see GH #32920).
2818
+ releaseStreamResources()
2819
+
2820
+ // Non-streaming fallback cost: the streaming path tracks cost in the
2821
+ // message_delta handler before any yield. Fallback pushes to newMessages
2822
+ // then yields, so tracking must be here to survive .return() at the yield.
2823
+ if (fallbackMessage) {
2824
+ const fallbackUsage = fallbackMessage.message.usage
2825
+ usage = updateUsage(EMPTY_USAGE, fallbackUsage)
2826
+ stopReason = fallbackMessage.message.stop_reason
2827
+ const fallbackCost = calculateUSDCost(resolvedModel, fallbackUsage)
2828
+ costUSD += addToTotalSessionCost(
2829
+ fallbackCost,
2830
+ fallbackUsage,
2831
+ options.model,
2832
+ )
2833
+ }
2834
+ }
2835
+
2836
+ // Mark all registered tools as sent to API so they become eligible for deletion
2837
+ if (feature('CACHED_MICROCOMPACT') && cachedMCEnabled) {
2838
+ markToolsSentToAPIState()
2839
+ }
2840
+
2841
+ // Track the last requestId for the main conversation chain so shutdown
2842
+ // can send a cache eviction hint to inference. Exclude backgrounded
2843
+ // sessions (Ctrl+B) which share the repl_main_thread querySource but
2844
+ // run inside an agent context — they are independent conversation chains
2845
+ // whose cache should not be evicted when the foreground session clears.
2846
+ if (
2847
+ streamRequestId &&
2848
+ !getAgentContext() &&
2849
+ (options.querySource.startsWith('repl_main_thread') ||
2850
+ options.querySource === 'sdk')
2851
+ ) {
2852
+ setLastMainRequestId(streamRequestId)
2853
+ }
2854
+
2855
+ // Precompute scalars so the fire-and-forget .then() closure doesn't pin the
2856
+ // full messagesForAPI array (the entire conversation up to the context window
2857
+ // limit) until getToolPermissionContext() resolves.
2858
+ const logMessageCount = messagesForAPI.length
2859
+ const logMessageTokens = tokenCountFromLastAPIResponse(messagesForAPI)
2860
+ void options.getToolPermissionContext().then(permissionContext => {
2861
+ logAPISuccessAndDuration({
2862
+ model:
2863
+ newMessages[0]?.message.model ?? partialMessage?.model ?? options.model,
2864
+ preNormalizedModel: options.model,
2865
+ usage,
2866
+ start,
2867
+ startIncludingRetries,
2868
+ attempt: attemptNumber,
2869
+ messageCount: logMessageCount,
2870
+ messageTokens: logMessageTokens,
2871
+ requestId: streamRequestId ?? null,
2872
+ stopReason,
2873
+ ttftMs,
2874
+ didFallBackToNonStreaming,
2875
+ querySource: options.querySource,
2876
+ headers: responseHeaders,
2877
+ costUSD,
2878
+ queryTracking: options.queryTracking,
2879
+ permissionMode: permissionContext.mode,
2880
+ // Pass newMessages for beta tracing - extraction happens in logging.ts
2881
+ // only when beta tracing is enabled
2882
+ newMessages,
2883
+ llmSpan,
2884
+ globalCacheStrategy,
2885
+ requestSetupMs: start - startIncludingRetries,
2886
+ attemptStartTimes,
2887
+ fastMode: isFastModeRequest,
2888
+ previousRequestId,
2889
+ betas: lastRequestBetas,
2890
+ })
2891
+ })
2892
+
2893
+ // Defensive: also release on normal completion (no-op if finally already ran).
2894
+ releaseStreamResources()
2895
+ }
2896
+
2897
+ /**
2898
+ * Cleans up stream resources to prevent memory leaks.
2899
+ * @internal Exported for testing
2900
+ */
2901
+ export function cleanupStream(
2902
+ stream: Stream<BetaRawMessageStreamEvent> | undefined,
2903
+ ): void {
2904
+ if (!stream) {
2905
+ return
2906
+ }
2907
+ try {
2908
+ // Abort the stream via its controller if not already aborted
2909
+ if (!stream.controller.signal.aborted) {
2910
+ stream.controller.abort()
2911
+ }
2912
+ } catch {
2913
+ // Ignore - stream may already be closed
2914
+ }
2915
+ }
2916
+
2917
+ /**
2918
+ * Updates usage statistics with new values from streaming API events.
2919
+ * Note: Anthropic's streaming API provides cumulative usage totals, not incremental deltas.
2920
+ * Each event contains the complete usage up to that point in the stream.
2921
+ *
2922
+ * Input-related tokens (input_tokens, cache_creation_input_tokens, cache_read_input_tokens)
2923
+ * are typically set in message_start and remain constant. message_delta events may send
2924
+ * explicit 0 values for these fields, which should not overwrite the values from message_start.
2925
+ * We only update these fields if they have a non-null, non-zero value.
2926
+ */
2927
+ export function updateUsage(
2928
+ usage: Readonly<NonNullableUsage>,
2929
+ partUsage: BetaMessageDeltaUsage | undefined,
2930
+ ): NonNullableUsage {
2931
+ if (!partUsage) {
2932
+ return { ...usage }
2933
+ }
2934
+ return {
2935
+ input_tokens:
2936
+ partUsage.input_tokens !== null && partUsage.input_tokens > 0
2937
+ ? partUsage.input_tokens
2938
+ : usage.input_tokens,
2939
+ cache_creation_input_tokens:
2940
+ partUsage.cache_creation_input_tokens !== null &&
2941
+ partUsage.cache_creation_input_tokens > 0
2942
+ ? partUsage.cache_creation_input_tokens
2943
+ : usage.cache_creation_input_tokens,
2944
+ cache_read_input_tokens:
2945
+ partUsage.cache_read_input_tokens !== null &&
2946
+ partUsage.cache_read_input_tokens > 0
2947
+ ? partUsage.cache_read_input_tokens
2948
+ : usage.cache_read_input_tokens,
2949
+ output_tokens: partUsage.output_tokens ?? usage.output_tokens,
2950
+ server_tool_use: {
2951
+ web_search_requests:
2952
+ partUsage.server_tool_use?.web_search_requests ??
2953
+ usage.server_tool_use.web_search_requests,
2954
+ web_fetch_requests:
2955
+ partUsage.server_tool_use?.web_fetch_requests ??
2956
+ usage.server_tool_use.web_fetch_requests,
2957
+ },
2958
+ service_tier: usage.service_tier,
2959
+ cache_creation: {
2960
+ // SDK type BetaMessageDeltaUsage is missing cache_creation, but it's real!
2961
+ ephemeral_1h_input_tokens:
2962
+ (partUsage as BetaUsage).cache_creation?.ephemeral_1h_input_tokens ??
2963
+ usage.cache_creation.ephemeral_1h_input_tokens,
2964
+ ephemeral_5m_input_tokens:
2965
+ (partUsage as BetaUsage).cache_creation?.ephemeral_5m_input_tokens ??
2966
+ usage.cache_creation.ephemeral_5m_input_tokens,
2967
+ },
2968
+ // cache_deleted_input_tokens: returned by the API when cache editing
2969
+ // deletes KV cache content, but not in SDK types. Kept off NonNullableUsage
2970
+ // so the string is eliminated from external builds by dead code elimination.
2971
+ // Uses the same > 0 guard as other token fields to prevent message_delta
2972
+ // from overwriting the real value with 0.
2973
+ ...(feature('CACHED_MICROCOMPACT')
2974
+ ? {
2975
+ cache_deleted_input_tokens:
2976
+ (partUsage as unknown as { cache_deleted_input_tokens?: number })
2977
+ .cache_deleted_input_tokens != null &&
2978
+ (partUsage as unknown as { cache_deleted_input_tokens: number })
2979
+ .cache_deleted_input_tokens > 0
2980
+ ? (partUsage as unknown as { cache_deleted_input_tokens: number })
2981
+ .cache_deleted_input_tokens
2982
+ : ((usage as unknown as { cache_deleted_input_tokens?: number })
2983
+ .cache_deleted_input_tokens ?? 0),
2984
+ }
2985
+ : {}),
2986
+ inference_geo: usage.inference_geo,
2987
+ iterations: partUsage.iterations ?? usage.iterations,
2988
+ speed: (partUsage as BetaUsage).speed ?? usage.speed,
2989
+ }
2990
+ }
2991
+
2992
+ /**
2993
+ * Accumulates usage from one message into a total usage object.
2994
+ * Used to track cumulative usage across multiple assistant turns.
2995
+ */
2996
+ export function accumulateUsage(
2997
+ totalUsage: Readonly<NonNullableUsage>,
2998
+ messageUsage: Readonly<NonNullableUsage>,
2999
+ ): NonNullableUsage {
3000
+ return {
3001
+ input_tokens: totalUsage.input_tokens + messageUsage.input_tokens,
3002
+ cache_creation_input_tokens:
3003
+ totalUsage.cache_creation_input_tokens +
3004
+ messageUsage.cache_creation_input_tokens,
3005
+ cache_read_input_tokens:
3006
+ totalUsage.cache_read_input_tokens + messageUsage.cache_read_input_tokens,
3007
+ output_tokens: totalUsage.output_tokens + messageUsage.output_tokens,
3008
+ server_tool_use: {
3009
+ web_search_requests:
3010
+ totalUsage.server_tool_use.web_search_requests +
3011
+ messageUsage.server_tool_use.web_search_requests,
3012
+ web_fetch_requests:
3013
+ totalUsage.server_tool_use.web_fetch_requests +
3014
+ messageUsage.server_tool_use.web_fetch_requests,
3015
+ },
3016
+ service_tier: messageUsage.service_tier, // Use the most recent service tier
3017
+ cache_creation: {
3018
+ ephemeral_1h_input_tokens:
3019
+ totalUsage.cache_creation.ephemeral_1h_input_tokens +
3020
+ messageUsage.cache_creation.ephemeral_1h_input_tokens,
3021
+ ephemeral_5m_input_tokens:
3022
+ totalUsage.cache_creation.ephemeral_5m_input_tokens +
3023
+ messageUsage.cache_creation.ephemeral_5m_input_tokens,
3024
+ },
3025
+ // See comment in updateUsage — field is not on NonNullableUsage to keep
3026
+ // the string out of external builds.
3027
+ ...(feature('CACHED_MICROCOMPACT')
3028
+ ? {
3029
+ cache_deleted_input_tokens:
3030
+ ((totalUsage as unknown as { cache_deleted_input_tokens?: number })
3031
+ .cache_deleted_input_tokens ?? 0) +
3032
+ ((
3033
+ messageUsage as unknown as { cache_deleted_input_tokens?: number }
3034
+ ).cache_deleted_input_tokens ?? 0),
3035
+ }
3036
+ : {}),
3037
+ inference_geo: messageUsage.inference_geo, // Use the most recent
3038
+ iterations: messageUsage.iterations, // Use the most recent
3039
+ speed: messageUsage.speed, // Use the most recent
3040
+ }
3041
+ }
3042
+
3043
+ function isToolResultBlock(
3044
+ block: unknown,
3045
+ ): block is { type: 'tool_result'; tool_use_id: string } {
3046
+ return (
3047
+ block !== null &&
3048
+ typeof block === 'object' &&
3049
+ 'type' in block &&
3050
+ (block as { type: string }).type === 'tool_result' &&
3051
+ 'tool_use_id' in block
3052
+ )
3053
+ }
3054
+
3055
+ type CachedMCEditsBlock = {
3056
+ type: 'cache_edits'
3057
+ edits: { type: 'delete'; cache_reference: string }[]
3058
+ }
3059
+
3060
+ type CachedMCPinnedEdits = {
3061
+ userMessageIndex: number
3062
+ block: CachedMCEditsBlock
3063
+ }
3064
+
3065
+ // Exported for testing cache_reference placement constraints
3066
+ export function addCacheBreakpoints(
3067
+ messages: (UserMessage | AssistantMessage)[],
3068
+ enablePromptCaching: boolean,
3069
+ querySource?: QuerySource,
3070
+ useCachedMC = false,
3071
+ newCacheEdits?: CachedMCEditsBlock | null,
3072
+ pinnedEdits?: CachedMCPinnedEdits[],
3073
+ skipCacheWrite = false,
3074
+ ): MessageParam[] {
3075
+ logEvent('tengu_api_cache_breakpoints', {
3076
+ totalMessageCount: messages.length,
3077
+ cachingEnabled: enablePromptCaching,
3078
+ skipCacheWrite,
3079
+ })
3080
+
3081
+ // Exactly one message-level cache_control marker per request. Mycro's
3082
+ // turn-to-turn eviction (page_manager/index.rs: Index::insert) frees
3083
+ // local-attention KV pages at any cached prefix position NOT in
3084
+ // cache_store_int_token_boundaries. With two markers the second-to-last
3085
+ // position is protected and its locals survive an extra turn even though
3086
+ // nothing will ever resume from there — with one marker they're freed
3087
+ // immediately. For fire-and-forget forks (skipCacheWrite) we shift the
3088
+ // marker to the second-to-last message: that's the last shared-prefix
3089
+ // point, so the write is a no-op merge on mycro (entry already exists)
3090
+ // and the fork doesn't leave its own tail in the KVCC. Dense pages are
3091
+ // refcounted and survive via the new hash either way.
3092
+ const markerIndex = skipCacheWrite ? messages.length - 2 : messages.length - 1
3093
+ const result = messages.map((msg, index) => {
3094
+ const addCache = index === markerIndex
3095
+ if (msg.type === 'user') {
3096
+ return userMessageToMessageParam(
3097
+ msg,
3098
+ addCache,
3099
+ enablePromptCaching,
3100
+ querySource,
3101
+ )
3102
+ }
3103
+ return assistantMessageToMessageParam(
3104
+ msg,
3105
+ addCache,
3106
+ enablePromptCaching,
3107
+ querySource,
3108
+ )
3109
+ })
3110
+
3111
+ if (!useCachedMC) {
3112
+ return result
3113
+ }
3114
+
3115
+ // Track all cache_references being deleted to prevent duplicates across blocks.
3116
+ const seenDeleteRefs = new Set<string>()
3117
+
3118
+ // Helper to deduplicate a cache_edits block against already-seen deletions
3119
+ const deduplicateEdits = (block: CachedMCEditsBlock): CachedMCEditsBlock => {
3120
+ const uniqueEdits = block.edits.filter(edit => {
3121
+ if (seenDeleteRefs.has(edit.cache_reference)) {
3122
+ return false
3123
+ }
3124
+ seenDeleteRefs.add(edit.cache_reference)
3125
+ return true
3126
+ })
3127
+ return { ...block, edits: uniqueEdits }
3128
+ }
3129
+
3130
+ // Re-insert all previously-pinned cache_edits at their original positions
3131
+ for (const pinned of pinnedEdits ?? []) {
3132
+ const msg = result[pinned.userMessageIndex]
3133
+ if (msg && msg.role === 'user') {
3134
+ if (!Array.isArray(msg.content)) {
3135
+ msg.content = [{ type: 'text', text: msg.content as string }]
3136
+ }
3137
+ const dedupedBlock = deduplicateEdits(pinned.block)
3138
+ if (dedupedBlock.edits.length > 0) {
3139
+ insertBlockAfterToolResults(msg.content, dedupedBlock)
3140
+ }
3141
+ }
3142
+ }
3143
+
3144
+ // Insert new cache_edits into the last user message and pin them
3145
+ if (newCacheEdits && result.length > 0) {
3146
+ const dedupedNewEdits = deduplicateEdits(newCacheEdits)
3147
+ if (dedupedNewEdits.edits.length > 0) {
3148
+ for (let i = result.length - 1; i >= 0; i--) {
3149
+ const msg = result[i]
3150
+ if (msg && msg.role === 'user') {
3151
+ if (!Array.isArray(msg.content)) {
3152
+ msg.content = [{ type: 'text', text: msg.content as string }]
3153
+ }
3154
+ insertBlockAfterToolResults(msg.content, dedupedNewEdits)
3155
+ // Pin so this block is re-sent at the same position in future calls
3156
+ pinCacheEdits(i, newCacheEdits)
3157
+
3158
+ logForDebugging(
3159
+ `Added cache_edits block with ${dedupedNewEdits.edits.length} deletion(s) to message[${i}]: ${dedupedNewEdits.edits.map(e => e.cache_reference).join(', ')}`,
3160
+ )
3161
+ break
3162
+ }
3163
+ }
3164
+ }
3165
+ }
3166
+
3167
+ // Add cache_reference to tool_result blocks that are within the cached prefix.
3168
+ // Must be done AFTER cache_edits insertion since that modifies content arrays.
3169
+ if (enablePromptCaching) {
3170
+ // Find the last message containing a cache_control marker
3171
+ let lastCCMsg = -1
3172
+ for (let i = 0; i < result.length; i++) {
3173
+ const msg = result[i]!
3174
+ if (Array.isArray(msg.content)) {
3175
+ for (const block of msg.content) {
3176
+ if (block && typeof block === 'object' && 'cache_control' in block) {
3177
+ lastCCMsg = i
3178
+ }
3179
+ }
3180
+ }
3181
+ }
3182
+
3183
+ // Add cache_reference to tool_result blocks that are strictly before
3184
+ // the last cache_control marker. The API requires cache_reference to
3185
+ // appear "before or on" the last cache_control — we use strict "before"
3186
+ // to avoid edge cases where cache_edits splicing shifts block indices.
3187
+ //
3188
+ // Create new objects instead of mutating in-place to avoid contaminating
3189
+ // blocks reused by secondary queries that use models without cache_editing support.
3190
+ if (lastCCMsg >= 0) {
3191
+ for (let i = 0; i < lastCCMsg; i++) {
3192
+ const msg = result[i]!
3193
+ if (msg.role !== 'user' || !Array.isArray(msg.content)) {
3194
+ continue
3195
+ }
3196
+ let cloned = false
3197
+ for (let j = 0; j < msg.content.length; j++) {
3198
+ const block = msg.content[j]
3199
+ if (block && isToolResultBlock(block)) {
3200
+ if (!cloned) {
3201
+ msg.content = [...msg.content]
3202
+ cloned = true
3203
+ }
3204
+ msg.content[j] = Object.assign({}, block, {
3205
+ cache_reference: block.tool_use_id,
3206
+ })
3207
+ }
3208
+ }
3209
+ }
3210
+ }
3211
+ }
3212
+
3213
+ return result
3214
+ }
3215
+
3216
+ export function buildSystemPromptBlocks(
3217
+ systemPrompt: SystemPrompt,
3218
+ enablePromptCaching: boolean,
3219
+ options?: {
3220
+ skipGlobalCacheForSystemPrompt?: boolean
3221
+ querySource?: QuerySource
3222
+ },
3223
+ ): TextBlockParam[] {
3224
+ // IMPORTANT: Do not add any more blocks for caching or you will get a 400
3225
+ return splitSysPromptPrefix(systemPrompt, {
3226
+ skipGlobalCacheForSystemPrompt: options?.skipGlobalCacheForSystemPrompt,
3227
+ }).map(block => {
3228
+ return {
3229
+ type: 'text' as const,
3230
+ text: block.text,
3231
+ ...(enablePromptCaching &&
3232
+ block.cacheScope !== null && {
3233
+ cache_control: getCacheControl({
3234
+ scope: block.cacheScope,
3235
+ querySource: options?.querySource,
3236
+ }),
3237
+ }),
3238
+ }
3239
+ })
3240
+ }
3241
+
3242
+ type HaikuOptions = Omit<Options, 'model' | 'getToolPermissionContext'>
3243
+
3244
+ export async function queryHaiku({
3245
+ systemPrompt = asSystemPrompt([]),
3246
+ userPrompt,
3247
+ outputFormat,
3248
+ signal,
3249
+ options,
3250
+ }: {
3251
+ systemPrompt: SystemPrompt
3252
+ userPrompt: string
3253
+ outputFormat?: BetaJSONOutputFormat
3254
+ signal: AbortSignal
3255
+ options: HaikuOptions
3256
+ }): Promise<AssistantMessage> {
3257
+ const result = await withVCR(
3258
+ [
3259
+ createUserMessage({
3260
+ content: systemPrompt.map(text => ({ type: 'text', text })),
3261
+ }),
3262
+ createUserMessage({
3263
+ content: userPrompt,
3264
+ }),
3265
+ ],
3266
+ async () => {
3267
+ const messages = [
3268
+ createUserMessage({
3269
+ content: userPrompt,
3270
+ }),
3271
+ ]
3272
+
3273
+ const result = await queryModelWithoutStreaming({
3274
+ messages,
3275
+ systemPrompt,
3276
+ thinkingConfig: { type: 'disabled' },
3277
+ tools: [],
3278
+ signal,
3279
+ options: {
3280
+ ...options,
3281
+ model: getSmallFastModel(),
3282
+ enablePromptCaching: options.enablePromptCaching ?? false,
3283
+ outputFormat,
3284
+ async getToolPermissionContext() {
3285
+ return getEmptyToolPermissionContext()
3286
+ },
3287
+ },
3288
+ })
3289
+ return [result]
3290
+ },
3291
+ )
3292
+ // We don't use streaming for Haiku so this is safe
3293
+ return result[0]! as AssistantMessage
3294
+ }
3295
+
3296
+ type QueryWithModelOptions = Omit<Options, 'getToolPermissionContext'>
3297
+
3298
+ /**
3299
+ * Query a specific model through the Claude Code infrastructure.
3300
+ * This goes through the full query pipeline including proper authentication,
3301
+ * betas, and headers - unlike direct API calls.
3302
+ */
3303
+ export async function queryWithModel({
3304
+ systemPrompt = asSystemPrompt([]),
3305
+ userPrompt,
3306
+ outputFormat,
3307
+ signal,
3308
+ options,
3309
+ }: {
3310
+ systemPrompt: SystemPrompt
3311
+ userPrompt: string
3312
+ outputFormat?: BetaJSONOutputFormat
3313
+ signal: AbortSignal
3314
+ options: QueryWithModelOptions
3315
+ }): Promise<AssistantMessage> {
3316
+ const result = await withVCR(
3317
+ [
3318
+ createUserMessage({
3319
+ content: systemPrompt.map(text => ({ type: 'text', text })),
3320
+ }),
3321
+ createUserMessage({
3322
+ content: userPrompt,
3323
+ }),
3324
+ ],
3325
+ async () => {
3326
+ const messages = [
3327
+ createUserMessage({
3328
+ content: userPrompt,
3329
+ }),
3330
+ ]
3331
+
3332
+ const result = await queryModelWithoutStreaming({
3333
+ messages,
3334
+ systemPrompt,
3335
+ thinkingConfig: { type: 'disabled' },
3336
+ tools: [],
3337
+ signal,
3338
+ options: {
3339
+ ...options,
3340
+ enablePromptCaching: options.enablePromptCaching ?? false,
3341
+ outputFormat,
3342
+ async getToolPermissionContext() {
3343
+ return getEmptyToolPermissionContext()
3344
+ },
3345
+ },
3346
+ })
3347
+ return [result]
3348
+ },
3349
+ )
3350
+ return result[0]! as AssistantMessage
3351
+ }
3352
+
3353
+ // Non-streaming requests have a 10min max per the docs:
3354
+ // https://platform.claude.com/docs/en/api/errors#long-requests
3355
+ // The SDK's 21333-token cap is derived from 10min × 128k tokens/hour, but we
3356
+ // bypass it by setting a client-level timeout, so we can cap higher.
3357
+ export const MAX_NON_STREAMING_TOKENS = 64_000
3358
+
3359
+ /**
3360
+ * Adjusts thinking budget when max_tokens is capped for non-streaming fallback.
3361
+ * Ensures the API constraint: max_tokens > thinking.budget_tokens
3362
+ *
3363
+ * @param params - The parameters that will be sent to the API
3364
+ * @param maxTokensCap - The maximum allowed tokens (MAX_NON_STREAMING_TOKENS)
3365
+ * @returns Adjusted parameters with thinking budget capped if needed
3366
+ */
3367
+ export function adjustParamsForNonStreaming<
3368
+ T extends {
3369
+ max_tokens: number
3370
+ thinking?: BetaMessageStreamParams['thinking']
3371
+ },
3372
+ >(params: T, maxTokensCap: number): T {
3373
+ const cappedMaxTokens = Math.min(params.max_tokens, maxTokensCap)
3374
+
3375
+ // Adjust thinking budget if it would exceed capped max_tokens
3376
+ // to maintain the constraint: max_tokens > thinking.budget_tokens
3377
+ const adjustedParams = { ...params }
3378
+ if (
3379
+ adjustedParams.thinking?.type === 'enabled' &&
3380
+ adjustedParams.thinking.budget_tokens
3381
+ ) {
3382
+ adjustedParams.thinking = {
3383
+ ...adjustedParams.thinking,
3384
+ budget_tokens: Math.min(
3385
+ adjustedParams.thinking.budget_tokens,
3386
+ cappedMaxTokens - 1, // Must be at least 1 less than max_tokens
3387
+ ),
3388
+ }
3389
+ }
3390
+
3391
+ return {
3392
+ ...adjustedParams,
3393
+ max_tokens: cappedMaxTokens,
3394
+ }
3395
+ }
3396
+
3397
+ function isMaxTokensCapEnabled(): boolean {
3398
+ // 3P default: false (not validated on Bedrock/Vertex)
3399
+ return getFeatureValue_CACHED_MAY_BE_STALE('tengu_otk_slot_v1', false)
3400
+ }
3401
+
3402
+ export function getMaxOutputTokensForModel(model: string): number {
3403
+ const maxOutputTokens = getModelMaxOutputTokens(model)
3404
+
3405
+ // Slot-reservation cap: drop default to 8k for all models. BQ p99 output
3406
+ // = 4,911 tokens; 32k/64k defaults over-reserve 8-16× slot capacity.
3407
+ // Requests hitting the cap get one clean retry at 64k (query.ts
3408
+ // max_output_tokens_escalate). Math.min keeps models with lower native
3409
+ // defaults (e.g. claude-3-opus at 4k) at their native value. Applied
3410
+ // before the env-var override so CLAUDE_CODE_MAX_OUTPUT_TOKENS still wins.
3411
+ const defaultTokens = isMaxTokensCapEnabled()
3412
+ ? Math.min(maxOutputTokens.default, CAPPED_DEFAULT_MAX_TOKENS)
3413
+ : maxOutputTokens.default
3414
+
3415
+ const result = validateBoundedIntEnvVar(
3416
+ 'CLAUDE_CODE_MAX_OUTPUT_TOKENS',
3417
+ process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS,
3418
+ defaultTokens,
3419
+ maxOutputTokens.upperLimit,
3420
+ )
3421
+ return result.effective
3422
+ }