ummaya 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (482) hide show
  1. package/README.md +15 -2
  2. package/bin/ummaya +10 -1
  3. package/bun.lock +180 -244
  4. package/npm-shrinkwrap.json +760 -1760
  5. package/package.json +39 -22
  6. package/prompts/manifest.yaml +1 -1
  7. package/prompts/system_v1.md +1 -0
  8. package/pyproject.toml +27 -2
  9. package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
  10. package/src/ummaya/_canonical/__init__.py +2 -0
  11. package/src/ummaya/_canonical/baselines.yaml +113 -0
  12. package/src/ummaya/engine/engine.py +29 -132
  13. package/src/ummaya/evidence/__init__.py +21 -2
  14. package/src/ummaya/evidence/dataset_contract.py +193 -0
  15. package/src/ummaya/evidence/document_authoring_cases.py +33 -0
  16. package/src/ummaya/evidence/document_harness.py +313 -0
  17. package/src/ummaya/evidence/document_viewer_ux.py +391 -0
  18. package/src/ummaya/evidence/gates.py +70 -0
  19. package/src/ummaya/evidence/json_types.py +20 -0
  20. package/src/ummaya/evidence/models.py +88 -1
  21. package/src/ummaya/evidence/output_payload.py +89 -0
  22. package/src/ummaya/evidence/payload_documents.py +233 -0
  23. package/src/ummaya/evidence/route_contracts.py +224 -0
  24. package/src/ummaya/evidence/route_helpers.py +150 -0
  25. package/src/ummaya/evidence/runner.py +81 -212
  26. package/src/ummaya/evidence/source_provenance.py +246 -0
  27. package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
  28. package/src/ummaya/evidence/tool_layer.py +39 -0
  29. package/src/ummaya/evidence/tool_layer_models.py +151 -0
  30. package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
  31. package/src/ummaya/ipc/document_intent_normalization.py +185 -0
  32. package/src/ummaya/ipc/frame_schema.py +5 -5
  33. package/src/ummaya/ipc/route_diagnostics.py +73 -0
  34. package/src/ummaya/ipc/stdio.py +1109 -477
  35. package/src/ummaya/llm/client.py +102 -3
  36. package/src/ummaya/llm/config.py +8 -3
  37. package/src/ummaya/primitives/__init__.py +6 -2
  38. package/src/ummaya/primitives/delegation.py +1 -1
  39. package/src/ummaya/primitives/document.py +28 -0
  40. package/src/ummaya/settings.py +0 -3
  41. package/src/ummaya/tools/discovery_bridge.py +17 -1
  42. package/src/ummaya/tools/documents/__init__.py +297 -0
  43. package/src/ummaya/tools/documents/adapter_registry.py +487 -0
  44. package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
  45. package/src/ummaya/tools/documents/artifact_store.py +454 -0
  46. package/src/ummaya/tools/documents/authoring.py +283 -0
  47. package/src/ummaya/tools/documents/baselines.py +132 -0
  48. package/src/ummaya/tools/documents/capability.py +331 -0
  49. package/src/ummaya/tools/documents/contracts.py +112 -0
  50. package/src/ummaya/tools/documents/conversion.py +521 -0
  51. package/src/ummaya/tools/documents/diff.py +275 -0
  52. package/src/ummaya/tools/documents/engines.py +163 -0
  53. package/src/ummaya/tools/documents/evaluation.py +291 -0
  54. package/src/ummaya/tools/documents/explicit_values.py +108 -0
  55. package/src/ummaya/tools/documents/fixtures.py +174 -0
  56. package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
  57. package/src/ummaya/tools/documents/formats/__init__.py +2 -0
  58. package/src/ummaya/tools/documents/formats/archive.py +528 -0
  59. package/src/ummaya/tools/documents/formats/base.py +41 -0
  60. package/src/ummaya/tools/documents/formats/code_file.py +211 -0
  61. package/src/ummaya/tools/documents/formats/data_file.py +272 -0
  62. package/src/ummaya/tools/documents/formats/hwp.py +284 -0
  63. package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
  64. package/src/ummaya/tools/documents/formats/odf.py +435 -0
  65. package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
  66. package/src/ummaya/tools/documents/formats/passive.py +766 -0
  67. package/src/ummaya/tools/documents/formats/pdf.py +702 -0
  68. package/src/ummaya/tools/documents/formats/text_web.py +268 -0
  69. package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
  70. package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
  71. package/src/ummaya/tools/documents/inspection.py +289 -0
  72. package/src/ummaya/tools/documents/intake.py +1079 -0
  73. package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
  74. package/src/ummaya/tools/documents/models.py +1598 -0
  75. package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
  76. package/src/ummaya/tools/documents/orchestrator.py +96 -0
  77. package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
  78. package/src/ummaya/tools/documents/patch.py +170 -0
  79. package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
  80. package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
  81. package/src/ummaya/tools/documents/permissions.py +110 -0
  82. package/src/ummaya/tools/documents/planner.py +616 -0
  83. package/src/ummaya/tools/documents/registry.py +2733 -0
  84. package/src/ummaya/tools/documents/render.py +978 -0
  85. package/src/ummaya/tools/documents/render_comparison.py +113 -0
  86. package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
  87. package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
  88. package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
  89. package/src/ummaya/tools/documents/reread.py +157 -0
  90. package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
  91. package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
  92. package/src/ummaya/tools/documents/scorecard.py +184 -0
  93. package/src/ummaya/tools/documents/socratic_planner.py +193 -0
  94. package/src/ummaya/tools/documents/style.py +48 -0
  95. package/src/ummaya/tools/documents/tool_defs.py +523 -0
  96. package/src/ummaya/tools/documents/validate.py +347 -0
  97. package/src/ummaya/tools/executor.py +29 -0
  98. package/src/ummaya/tools/live_proxy.py +0 -3
  99. package/src/ummaya/tools/models.py +5 -1
  100. package/src/ummaya/tools/register_all.py +8 -0
  101. package/src/ummaya/tools/registry.py +10 -1
  102. package/src/ummaya/tools/routing/__init__.py +59 -0
  103. package/src/ummaya/tools/routing/builder.py +105 -0
  104. package/src/ummaya/tools/routing/cards.py +29 -0
  105. package/src/ummaya/tools/routing/decision_service.py +534 -0
  106. package/src/ummaya/tools/routing/decision_types.py +74 -0
  107. package/src/ummaya/tools/routing/feasibility.py +122 -0
  108. package/src/ummaya/tools/routing/intent.py +17 -0
  109. package/src/ummaya/tools/routing/intent_extractor.py +207 -0
  110. package/src/ummaya/tools/routing/intent_patterns.py +160 -0
  111. package/src/ummaya/tools/routing/intent_public_data.py +150 -0
  112. package/src/ummaya/tools/routing/intent_types.py +48 -0
  113. package/src/ummaya/tools/routing/lint.py +78 -0
  114. package/src/ummaya/tools/routing/metadata.py +174 -0
  115. package/src/ummaya/tools/routing/projection.py +340 -0
  116. package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
  117. package/src/ummaya/tools/routing/schema.py +81 -0
  118. package/src/ummaya/tools/routing/types.py +96 -0
  119. package/src/ummaya/tools/routing_index.py +2 -2
  120. package/src/ummaya/tools/search.py +34 -746
  121. package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
  122. package/tui/bun.lock +126 -305
  123. package/tui/package.json +35 -22
  124. package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
  125. package/tui/src/QueryEngine.ts +12 -8
  126. package/tui/src/bridge/inboundAttachments.ts +3 -3
  127. package/tui/src/cli/handlers/auth.ts +3 -12
  128. package/tui/src/cli/handlers/mcp.tsx +0 -1
  129. package/tui/src/cli/print.ts +8 -9
  130. package/tui/src/commands/insights.ts +1 -1
  131. package/tui/src/commands/install-github-app/types.ts +8 -30
  132. package/tui/src/commands/plugin/types.ts +6 -28
  133. package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
  134. package/tui/src/commands/rename/generateSessionName.ts +1 -1
  135. package/tui/src/components/Feedback.tsx +1 -1
  136. package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
  137. package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
  138. package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
  139. package/tui/src/components/Spinner/types.ts +6 -28
  140. package/tui/src/components/agents/generateAgent.ts +1 -1
  141. package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
  142. package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
  143. package/tui/src/components/mcp/types.ts +16 -38
  144. package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
  145. package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
  146. package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
  147. package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
  148. package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
  149. package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
  150. package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
  151. package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
  152. package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
  153. package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
  154. package/tui/src/components/primitive/index.tsx +43 -1
  155. package/tui/src/components/primitive/types.ts +137 -0
  156. package/tui/src/components/ui/option.ts +4 -26
  157. package/tui/src/constants/common.ts +0 -2
  158. package/tui/src/constants/prompts.ts +4 -3
  159. package/tui/src/constants/querySource.ts +4 -26
  160. package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
  161. package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
  162. package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
  163. package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
  164. package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
  165. package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
  166. package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
  167. package/tui/src/hooks/useApiKeyVerification.ts +1 -1
  168. package/tui/src/hooks/useVirtualScroll.ts +1 -1
  169. package/tui/src/ink/ink.tsx +33 -14
  170. package/tui/src/ink/reconciler.ts +2 -3
  171. package/tui/src/ink/render-to-screen.ts +30 -10
  172. package/tui/src/ipc/bridge.ts +62 -15
  173. package/tui/src/ipc/bridgeSingleton.ts +5 -1
  174. package/tui/src/ipc/codec.ts +3 -3
  175. package/tui/src/ipc/frames.generated.ts +12 -12
  176. package/tui/src/ipc/llmClient.ts +151 -27
  177. package/tui/src/ipc/schema/frame.schema.json +1 -1
  178. package/tui/src/keybindings/defaultBindings.ts +4 -0
  179. package/tui/src/main.tsx +32 -15
  180. package/tui/src/native-ts/file-index/index.ts +33 -3
  181. package/tui/src/observability/surface.ts +2 -2
  182. package/tui/src/probes/toolRegistryProbe.tsx +3 -1
  183. package/tui/src/projectOnboardingState.ts +7 -6
  184. package/tui/src/query/chatMessageTypes.ts +18 -0
  185. package/tui/src/query/chatMessagesBuilder.ts +1 -1
  186. package/tui/src/query/deps.ts +1 -1
  187. package/tui/src/query/messageGuards.ts +106 -0
  188. package/tui/src/query/publicDataTerminalRepair.ts +384 -0
  189. package/tui/src/query/run.ts +1075 -0
  190. package/tui/src/query/supportBoundary.ts +168 -0
  191. package/tui/src/query/toolResultErrors.ts +103 -0
  192. package/tui/src/query/toolRunner.ts +687 -0
  193. package/tui/src/query/unavailableToolRepair.ts +118 -0
  194. package/tui/src/query.ts +9 -2186
  195. package/tui/src/screens/REPL.tsx +40 -29
  196. package/tui/src/services/api/adapterManifest.ts +4 -0
  197. package/tui/src/services/api/backendChat/events.ts +117 -0
  198. package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
  199. package/tui/src/services/api/backendChat/frame.ts +9 -0
  200. package/tui/src/services/api/backendChat/streaming.ts +430 -0
  201. package/tui/src/services/api/backendChat/types.ts +62 -0
  202. package/tui/src/services/api/backendChat.ts +1 -0
  203. package/tui/src/services/api/client.ts +65 -2
  204. package/tui/src/services/api/errorUtils.ts +5 -5
  205. package/tui/src/services/api/errors.ts +1 -1
  206. package/tui/src/services/api/logging.ts +1 -1
  207. package/tui/src/services/api/ummaya/evidence.ts +194 -0
  208. package/tui/src/services/api/ummaya/messages.ts +255 -0
  209. package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
  210. package/tui/src/services/api/ummaya/provider.ts +200 -0
  211. package/tui/src/services/api/ummaya/reasoning.ts +24 -0
  212. package/tui/src/services/api/ummaya/request.ts +200 -0
  213. package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
  214. package/tui/src/services/api/ummaya/streaming.ts +365 -0
  215. package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
  216. package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
  217. package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
  218. package/tui/src/services/api/ummaya/types.ts +110 -0
  219. package/tui/src/services/api/ummaya/usage.ts +30 -0
  220. package/tui/src/services/api/ummaya.ts +26 -418
  221. package/tui/src/services/api/withRetry.ts +1 -1
  222. package/tui/src/services/awaySummary.ts +2 -2
  223. package/tui/src/services/claudeAiLimits.ts +1 -1
  224. package/tui/src/services/compact/autoCompact.ts +1 -1
  225. package/tui/src/services/compact/compact.ts +1 -1
  226. package/tui/src/services/lsp/types.ts +8 -30
  227. package/tui/src/services/tips/types.ts +6 -28
  228. package/tui/src/services/tokenEstimation.ts +1 -1
  229. package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
  230. package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
  231. package/tui/src/services/tools/toolExecution.ts +94 -1
  232. package/tui/src/store/pendingPermissionSlot.ts +1 -1
  233. package/tui/src/store/session-store.ts +10 -36
  234. package/tui/src/stubs/any-stub.ts +15 -10
  235. package/tui/src/stubs/color-diff-napi.ts +37 -23
  236. package/tui/src/stubs/globals.d.ts +3 -3
  237. package/tui/src/stubs/macro-preload.ts +23 -12
  238. package/tui/src/tools/AdapterTool/AdapterTool.ts +1207 -714
  239. package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
  240. package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
  241. package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
  242. package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
  243. package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
  244. package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
  245. package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
  246. package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
  247. package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
  248. package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
  249. package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
  250. package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
  251. package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
  252. package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
  253. package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
  254. package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
  255. package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
  256. package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
  257. package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
  258. package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
  259. package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
  260. package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
  261. package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
  262. package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
  263. package/tui/src/tools/AgentTool/permissions.ts +39 -0
  264. package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
  265. package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
  266. package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
  267. package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
  268. package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
  269. package/tui/src/tools/AgentTool/runAgent.ts +1 -1
  270. package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
  271. package/tui/src/tools/AgentTool/schemas.ts +196 -0
  272. package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
  273. package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
  274. package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
  275. package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
  276. package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
  277. package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
  278. package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
  279. package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
  280. package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
  281. package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
  282. package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
  283. package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
  284. package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
  285. package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
  286. package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
  287. package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
  288. package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
  289. package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
  290. package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
  291. package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
  292. package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
  293. package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
  294. package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
  295. package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
  296. package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
  297. package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
  298. package/tui/src/tools/BashTool/call.ts +202 -0
  299. package/tui/src/tools/BashTool/callLoader.ts +35 -0
  300. package/tui/src/tools/BashTool/commandClassification.ts +151 -0
  301. package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
  302. package/tui/src/tools/BashTool/cwdReset.ts +33 -0
  303. package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
  304. package/tui/src/tools/BashTool/modeValidation.ts +13 -1
  305. package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
  306. package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
  307. package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
  308. package/tui/src/tools/BashTool/resultLoader.ts +29 -0
  309. package/tui/src/tools/BashTool/resultMapping.ts +83 -0
  310. package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
  311. package/tui/src/tools/BashTool/schemas.ts +65 -0
  312. package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
  313. package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
  314. package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
  315. package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
  316. package/tui/src/tools/BashTool/uiLoader.ts +37 -0
  317. package/tui/src/tools/BriefTool/upload.ts +1 -1
  318. package/tui/src/tools/CalculatorTool/parser.ts +2 -2
  319. package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
  320. package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
  321. package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
  322. package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
  323. package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
  324. package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
  325. package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
  326. package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
  327. package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
  328. package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
  329. package/tui/src/tools/FileEditTool/call.ts +228 -0
  330. package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
  331. package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
  332. package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
  333. package/tui/src/tools/FileWriteTool/call.ts +223 -0
  334. package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
  335. package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
  336. package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +25 -32
  337. package/tui/src/tools/LookupPrimitive/prompt.ts +0 -2
  338. package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
  339. package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
  340. package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
  341. package/tui/src/tools/NotebookEditTool/call.ts +254 -0
  342. package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
  343. package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
  344. package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
  345. package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
  346. package/tui/src/tools/PowerShellTool/call.ts +179 -0
  347. package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
  348. package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
  349. package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
  350. package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
  351. package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
  352. package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
  353. package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
  354. package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
  355. package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
  356. package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
  357. package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
  358. package/tui/src/tools/PowerShellTool/validation.ts +39 -0
  359. package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
  360. package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +1 -11
  361. package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
  362. package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
  363. package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +27 -10
  364. package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
  365. package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
  366. package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
  367. package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
  368. package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
  369. package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
  370. package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
  371. package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
  372. package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
  373. package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
  374. package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
  375. package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
  376. package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
  377. package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
  378. package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
  379. package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
  380. package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
  381. package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
  382. package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
  383. package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
  384. package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
  385. package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
  386. package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
  387. package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
  388. package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
  389. package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +2 -1
  390. package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
  391. package/tui/src/tools/WebFetchTool/call.ts +227 -0
  392. package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
  393. package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
  394. package/tui/src/tools/WebFetchTool/types.ts +23 -0
  395. package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
  396. package/tui/src/tools/WebFetchTool/utils.ts +1 -1
  397. package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
  398. package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
  399. package/tui/src/tools/WebSearchTool/call.ts +33 -0
  400. package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
  401. package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
  402. package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
  403. package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
  404. package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
  405. package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
  406. package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
  407. package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
  408. package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
  409. package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
  410. package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
  411. package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
  412. package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
  413. package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
  414. package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
  415. package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
  416. package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
  417. package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
  418. package/tui/src/tools/_shared/rootPrimitiveInput.ts +1 -0
  419. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
  420. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
  421. package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
  422. package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
  423. package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
  424. package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
  425. package/tui/src/tools/_shared/toolChoiceRepair.ts +55 -860
  426. package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
  427. package/tui/src/tools.ts +39 -190
  428. package/tui/src/types/fileSuggestion.ts +4 -26
  429. package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
  430. package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
  431. package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
  432. package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
  433. package/tui/src/types/message.ts +80 -102
  434. package/tui/src/types/messageQueueTypes.ts +6 -28
  435. package/tui/src/types/notebook.ts +16 -38
  436. package/tui/src/types/statusLine.ts +4 -26
  437. package/tui/src/types/tools.ts +24 -46
  438. package/tui/src/types/utils.ts +6 -28
  439. package/tui/src/upstreamproxy/relay.ts +7 -3
  440. package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
  441. package/tui/src/utils/assistantMessageFactories.ts +9 -3
  442. package/tui/src/utils/auth.ts +129 -139
  443. package/tui/src/utils/bash/ast.ts +23 -23
  444. package/tui/src/utils/bash/bashParser.ts +5 -5
  445. package/tui/src/utils/billing.ts +1 -1
  446. package/tui/src/utils/claudeDesktop.ts +4 -4
  447. package/tui/src/utils/collapseReadSearch.ts +3 -3
  448. package/tui/src/utils/cronTasks.ts +1 -1
  449. package/tui/src/utils/execFileNoThrow.ts +1 -1
  450. package/tui/src/utils/filePersistence/types.ts +16 -38
  451. package/tui/src/utils/forkedAgent.ts +1 -1
  452. package/tui/src/utils/gracefulShutdown.ts +4 -4
  453. package/tui/src/utils/heapDumpService.ts +12 -8
  454. package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
  455. package/tui/src/utils/hooks/execPromptHook.ts +1 -1
  456. package/tui/src/utils/hooks/skillImprovement.ts +1 -1
  457. package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
  458. package/tui/src/utils/messages.ts +18 -0
  459. package/tui/src/utils/migrateSessions.ts +3 -3
  460. package/tui/src/utils/model/model.ts +6 -6
  461. package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
  462. package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
  463. package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
  464. package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
  465. package/tui/src/utils/plugins/pluginLoader.ts +8 -8
  466. package/tui/src/utils/protectedNamespace.ts +5 -3
  467. package/tui/src/utils/rawJsonToolCall.ts +242 -0
  468. package/tui/src/utils/ripgrep.ts +16 -7
  469. package/tui/src/utils/sessionTitle.ts +1 -1
  470. package/tui/src/utils/settings/permissionValidation.ts +14 -2
  471. package/tui/src/utils/shell/prefix.ts +1 -1
  472. package/tui/src/utils/sideQuery.ts +1 -1
  473. package/tui/src/utils/systemThemeWatcher.ts +13 -3
  474. package/tui/src/utils/teleport.tsx +1 -1
  475. package/uv.lock +426 -45
  476. package/tui/src/services/api/claude.ts +0 -3540
  477. package/tui/src/tools/_shared/directPublicDataGuard.ts +0 -362
  478. package/tui/src/tools/_shared/kmaAnalysisGuard.ts +0 -197
  479. package/tui/src/tools/_shared/kmaAviationGuard.ts +0 -70
  480. package/tui/src/tools/_shared/nmcAedGuard.ts +0 -234
  481. package/tui/src/tools/_shared/protectedCheckGuard.ts +0 -207
  482. package/tui/src/tools/_shared/textToolCallGuard.ts +0 -91
@@ -1,3540 +0,0 @@
1
- // SPDX-License-Identifier: Apache-2.0
2
- // Spec 2521 — byte-copy(2521) baseline restored from
3
- // .references/claude-code-sourcemap/restored-src/src/services/api/claude.ts
4
- // (CC 2.1.88, SHA-256 6d3fd16e608120d502e70ec461ffb66bcbca12fa86862859606c9118f977a999).
5
- // Three labeled swap commits layer atop the byte-copy:
6
- // • swap/llm-provider(2521) — @anthropic-ai/sdk imports → sdk-compat.ts
7
- // • swap/anti-anthropic-1p(2521) — claude.ai 1P call-graph deadened via
8
- // UMMAYA-stubbed support modules (services/claudeAiLimits.ts + utils/auth.ts
9
- // are inert no-ops since Epic #1633). The 1P functions in this file
10
- // (getOauthAccountInfo, currentLimits, extractQuotaStatusFromHeaders,
11
- // getCLISyspromptPrefix's claude.ai branches, account_uuid telemetry)
12
- // remain in the byte-copy text but resolve to no-op returns at runtime.
13
- // No UMMAYA callers reach this file (verified post-Spec-2293), so the
14
- // 1P graph is doubly dead — by callgraph (no callers) and by support-
15
- // module inertness. Spec 2521 byte-copy philosophy (FR-002): keep the
16
- // reference text intact; deactivate via supporting infrastructure
17
- // instead of deleting in-file.
18
- // • swap/identifier-rename(2521) — citizen-visible Claude/Anthropic brand
19
- // tokens → UMMAYA/EXAONE/FriendliAI (T013).
20
- // This file has zero callers in tui/src after Spec 2293; it is retained as
21
- // the authoritative CC streaming-handler reference for future audit replays
22
- // (specs/2521-llm-swap-cc-rebuild/scripts/replay_rebuild.sh).
23
-
24
- import type {
25
- BetaContentBlock,
26
- BetaContentBlockParam,
27
- BetaImageBlockParam,
28
- BetaJSONOutputFormat,
29
- BetaMessage,
30
- BetaMessageDeltaUsage,
31
- BetaMessageStreamParams,
32
- BetaOutputConfig,
33
- BetaRawMessageStreamEvent,
34
- BetaRequestDocumentBlock,
35
- BetaStopReason,
36
- BetaToolChoiceAuto,
37
- BetaToolChoiceTool,
38
- BetaToolResultBlockParam,
39
- BetaToolUnion,
40
- BetaUsage,
41
- BetaMessageParam as MessageParam,
42
- } from '../../sdk-compat.js'
43
- import type { TextBlockParam } from '../../sdk-compat.js'
44
- import type { Stream } from '../../sdk-compat.js'
45
- import { randomUUID } from 'crypto'
46
- import {
47
- getAPIProvider,
48
- isFirstPartyAnthropicBaseUrl,
49
- } from 'src/utils/model/providers.js'
50
- import {
51
- getAttributionHeader,
52
- getCLISyspromptPrefix,
53
- } from '../../constants/system.js'
54
- import {
55
- getEmptyToolPermissionContext,
56
- type QueryChainTracking,
57
- type Tool,
58
- type ToolPermissionContext,
59
- type Tools,
60
- toolMatchesName,
61
- } from '../../Tool.js'
62
- import type { AgentDefinition } from '../../tools/AgentTool/loadAgentsDir.js'
63
- import {
64
- type ConnectorTextBlock,
65
- type ConnectorTextDelta,
66
- isConnectorTextBlock,
67
- } from '../../types/connectorText.js'
68
- import type {
69
- AssistantMessage,
70
- Message,
71
- StreamEvent,
72
- SystemAPIErrorMessage,
73
- UserMessage,
74
- } from '../../types/message.js'
75
- import {
76
- type CacheScope,
77
- logAPIPrefix,
78
- splitSysPromptPrefix,
79
- toolToAPISchema,
80
- } from '../../utils/api.js'
81
- import { getOauthAccountInfo } from '../../utils/auth.js'
82
- import {
83
- getBedrockExtraBodyParamsBetas,
84
- getMergedBetas,
85
- getModelBetas,
86
- } from '../../utils/betas.js'
87
- import { getOrCreateUserID } from '../../utils/config.js'
88
- import {
89
- CAPPED_DEFAULT_MAX_TOKENS,
90
- getModelMaxOutputTokens,
91
- getSonnet1mExpTreatmentEnabled,
92
- } from '../../utils/context.js'
93
- import { resolveAppliedEffort } from '../../utils/effort.js'
94
- import { isEnvTruthy } from '../../utils/envUtils.js'
95
- import { errorMessage } from '../../utils/errors.js'
96
- import { computeFingerprintFromMessages } from '../../utils/fingerprint.js'
97
- import { captureAPIRequest, logError } from '../../utils/log.js'
98
- import { normalizeMessagesForAPI } from '../../utils/messageApiNormalize.js'
99
- import * as messageUtils from '../../utils/messages.js'
100
- import { createAssistantAPIErrorMessage } from '../../utils/assistantMessageFactories.js'
101
- import { createUserMessage } from '../../utils/userMessageFactories.js'
102
- import {
103
- getDefaultOpusModel,
104
- getDefaultSonnetModel,
105
- getSmallFastModel,
106
- isNonCustomOpusModel,
107
- } from '../../utils/model/model.js'
108
- import {
109
- asSystemPrompt,
110
- type SystemPrompt,
111
- } from '../../utils/systemPromptType.js'
112
- import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
113
- import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
114
- import {
115
- currentLimits,
116
- extractQuotaStatusFromError,
117
- extractQuotaStatusFromHeaders,
118
- } from '../claudeAiLimits.js'
119
- import { getAPIContextManagement } from '../compact/apiMicrocompact.js'
120
-
121
- /* eslint-disable @typescript-eslint/no-require-imports */
122
- const autoModeStateModule = feature('TRANSCRIPT_CLASSIFIER')
123
- ? (require('../../utils/permissions/autoModeState.js') as typeof import('../../utils/permissions/autoModeState.js'))
124
- : null
125
-
126
- import { feature } from 'bun:bundle'
127
- // SWAP/llm-provider(2521): @anthropic-ai/sdk + /error → UMMAYA sdk-compat
128
- // aliases (ClientOptions, APIError, APIConnectionTimeoutError, APIUserAbortError
129
- // all re-exported by sdk-compat.ts as structural stubs).
130
- import type { ClientOptions } from '../../sdk-compat.js'
131
- import type { ReasoningMode } from '../../utils/kExaoneReasoning.js'
132
- import {
133
- APIConnectionTimeoutError,
134
- APIError,
135
- APIUserAbortError,
136
- } from '../../sdk-compat.js'
137
- import {
138
- getAfkModeHeaderLatched,
139
- getCacheEditingHeaderLatched,
140
- getFastModeHeaderLatched,
141
- getLastApiCompletionTimestamp,
142
- getPromptCache1hAllowlist,
143
- getPromptCache1hEligible,
144
- getSessionId,
145
- getThinkingClearLatched,
146
- setAfkModeHeaderLatched,
147
- setCacheEditingHeaderLatched,
148
- setFastModeHeaderLatched,
149
- setLastMainRequestId,
150
- setPromptCache1hAllowlist,
151
- setPromptCache1hEligible,
152
- setThinkingClearLatched,
153
- } from 'src/bootstrap/state.js'
154
- import {
155
- AFK_MODE_BETA_HEADER,
156
- CONTEXT_1M_BETA_HEADER,
157
- CONTEXT_MANAGEMENT_BETA_HEADER,
158
- EFFORT_BETA_HEADER,
159
- FAST_MODE_BETA_HEADER,
160
- PROMPT_CACHING_SCOPE_BETA_HEADER,
161
- REDACT_THINKING_BETA_HEADER,
162
- STRUCTURED_OUTPUTS_BETA_HEADER,
163
- TASK_BUDGETS_BETA_HEADER,
164
- } from 'src/constants/betas.js'
165
- import type { QuerySource } from 'src/constants/querySource.js'
166
- import type { Notification } from 'src/context/notifications.js'
167
- import { addToTotalSessionCost } from 'src/cost-tracker.js'
168
- import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js'
169
- import type { AgentId } from 'src/types/ids.js'
170
- import {
171
- ADVISOR_TOOL_INSTRUCTIONS,
172
- getExperimentAdvisorModels,
173
- isAdvisorEnabled,
174
- isValidAdvisorModel,
175
- modelSupportsAdvisor,
176
- } from 'src/utils/advisor.js'
177
- import { getAgentContext } from 'src/utils/agentContext.js'
178
- import { isClaudeAISubscriber } from 'src/utils/auth.js'
179
- import {
180
- getToolSearchBetaHeader,
181
- modelSupportsStructuredOutputs,
182
- shouldIncludeFirstPartyOnlyBetas,
183
- shouldUseGlobalCacheScope,
184
- } from 'src/utils/betas.js'
185
- import { CLAUDE_IN_CHROME_MCP_SERVER_NAME } from 'src/utils/claudeInChrome/common.js'
186
- import { CHROME_TOOL_SEARCH_INSTRUCTIONS } from 'src/utils/claudeInChrome/prompt.js'
187
- import { getMaxThinkingTokensForModel } from 'src/utils/context.js'
188
- import { logForDebugging } from 'src/utils/debug.js'
189
- import { logForDiagnosticsNoPII } from 'src/utils/diagLogs.js'
190
- import { type EffortValue, modelSupportsEffort } from 'src/utils/effort.js'
191
- import {
192
- isFastModeAvailable,
193
- isFastModeCooldown,
194
- isFastModeEnabled,
195
- isFastModeSupportedByModel,
196
- } from 'src/utils/fastMode.js'
197
- import { returnValue } from 'src/utils/generators.js'
198
- import { headlessProfilerCheckpoint } from 'src/utils/headlessProfiler.js'
199
- import { isMcpInstructionsDeltaEnabled } from 'src/utils/mcpInstructionsDelta.js'
200
- import { calculateUSDCost } from 'src/utils/modelCost.js'
201
- import { endQueryProfile, queryCheckpoint } from 'src/utils/queryProfiler.js'
202
- import {
203
- modelSupportsAdaptiveThinking,
204
- modelSupportsThinking,
205
- type ThinkingConfig,
206
- } from 'src/utils/thinking.js'
207
- import {
208
- extractDiscoveredToolNames,
209
- isDeferredToolsDeltaEnabled,
210
- isToolSearchEnabled,
211
- } from 'src/utils/toolSearch.js'
212
- import { API_MAX_MEDIA_PER_REQUEST } from '../../constants/apiLimits.js'
213
- import { ADVISOR_BETA_HEADER } from '../../constants/betas.js'
214
- import {
215
- formatDeferredToolLine,
216
- isDeferredTool,
217
- TOOL_SEARCH_TOOL_NAME,
218
- } from '../../tools/ToolSearchTool/prompt.js'
219
- import {
220
- getAdapterToolByName,
221
- selectTopKAdapterToolNamesForQuery,
222
- } from '../../tools/AdapterTool/AdapterTool.js'
223
- import { isNonSyntheticUserText } from '../../tools/_shared/citizenUserText.js'
224
- import { shouldSuppressUmmayaToolCallsForAnswerSynthesis } from '../../tools/_shared/toolChoiceRepair.js'
225
- import { count } from '../../utils/array.js'
226
- import { insertBlockAfterToolResults } from '../../utils/contentArray.js'
227
- import { validateBoundedIntEnvVar } from '../../utils/envValidation.js'
228
- import { safeParseJSON } from '../../utils/json.js'
229
- import { getInferenceProfileBackingModel } from '../../utils/model/bedrock.js'
230
- import {
231
- normalizeModelStringForAPI,
232
- parseUserSpecifiedModel,
233
- } from '../../utils/model/model.js'
234
- import {
235
- startSessionActivity,
236
- stopSessionActivity,
237
- } from '../../utils/sessionActivity.js'
238
- import { jsonStringify } from '../../utils/slowOperations.js'
239
- import {
240
- isBetaTracingEnabled,
241
- type LLMRequestNewContext,
242
- startLLMRequestSpan,
243
- } from '../../utils/telemetry/sessionTracing.js'
244
- /* eslint-enable @typescript-eslint/no-require-imports */
245
- import {
246
- type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
247
- logEvent,
248
- } from '../analytics/index.js'
249
- import {
250
- consumePendingCacheEdits,
251
- getPinnedCacheEdits,
252
- markToolsSentToAPIState,
253
- pinCacheEdits,
254
- } from '../compact/microCompact.js'
255
- import { getInitializationStatus } from '../lsp/manager.js'
256
- import { isToolFromMcpServer } from '../mcp/utils.js'
257
- import { withStreamingVCR, withVCR } from '../vcr.js'
258
- import { CLIENT_REQUEST_ID_HEADER, getAnthropicClient } from './client.js'
259
- import {
260
- API_ERROR_MESSAGE_PREFIX,
261
- CUSTOM_OFF_SWITCH_MESSAGE,
262
- getAssistantMessageFromError,
263
- getErrorMessageIfRefusal,
264
- } from './errors.js'
265
- import {
266
- EMPTY_USAGE,
267
- type GlobalCacheStrategy,
268
- logAPIError,
269
- logAPIQuery,
270
- logAPISuccessAndDuration,
271
- type NonNullableUsage,
272
- } from './logging.js'
273
- import {
274
- CACHE_TTL_1HOUR_MS,
275
- checkResponseForCacheBreak,
276
- recordPromptState,
277
- } from './promptCacheBreakDetection.js'
278
- import {
279
- CannotRetryError,
280
- FallbackTriggeredError,
281
- is529Error,
282
- type RetryContext,
283
- withRetry,
284
- } from './withRetry.js'
285
-
286
- const {
287
- ensureToolResultPairing,
288
- normalizeContentFromAPI,
289
- stripAdvisorBlocks,
290
- stripCallerFieldFromAssistantMessage,
291
- stripToolReferenceBlocksFromUserMessage,
292
- } = messageUtils
293
-
294
- // Define a type that represents valid JSON values
295
- type JsonValue = string | number | boolean | null | JsonObject | JsonArray
296
- type JsonObject = { [key: string]: JsonValue }
297
- type JsonArray = JsonValue[]
298
-
299
- /**
300
- * Assemble the extra body parameters for the API request, based on the
301
- * CLAUDE_CODE_EXTRA_BODY environment variable if present and on any beta
302
- * headers (primarily for Bedrock requests).
303
- *
304
- * @param betaHeaders - An array of beta headers to include in the request.
305
- * @returns A JSON object representing the extra body parameters.
306
- */
307
- export function getExtraBodyParams(betaHeaders?: string[]): JsonObject {
308
- // Parse user's extra body parameters first
309
- const extraBodyStr = process.env.CLAUDE_CODE_EXTRA_BODY
310
- let result: JsonObject = {}
311
-
312
- if (extraBodyStr) {
313
- try {
314
- // Parse as JSON, which can be null, boolean, number, string, array or object
315
- const parsed = safeParseJSON(extraBodyStr)
316
- // We expect an object with key-value pairs to spread into API parameters
317
- if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
318
- // Shallow clone — safeParseJSON is LRU-cached and returns the same
319
- // object reference for the same string. Mutating `result` below
320
- // would poison the cache, causing stale values to persist.
321
- result = { ...(parsed as JsonObject) }
322
- } else {
323
- logForDebugging(
324
- `CLAUDE_CODE_EXTRA_BODY env var must be a JSON object, but was given ${extraBodyStr}`,
325
- { level: 'error' },
326
- )
327
- }
328
- } catch (error) {
329
- logForDebugging(
330
- `Error parsing CLAUDE_CODE_EXTRA_BODY: ${errorMessage(error)}`,
331
- { level: 'error' },
332
- )
333
- }
334
- }
335
-
336
- // Anti-distillation: send fake_tools opt-in for 1P CLI only
337
- if (
338
- feature('ANTI_DISTILLATION_CC')
339
- ? process.env.CLAUDE_CODE_ENTRYPOINT === 'cli' &&
340
- shouldIncludeFirstPartyOnlyBetas() &&
341
- getFeatureValue_CACHED_MAY_BE_STALE(
342
- 'tengu_anti_distill_fake_tool_injection',
343
- false,
344
- )
345
- : false
346
- ) {
347
- result.anti_distillation = ['fake_tools']
348
- }
349
-
350
- // Handle beta headers if provided
351
- if (betaHeaders && betaHeaders.length > 0) {
352
- if (result.anthropic_beta && Array.isArray(result.anthropic_beta)) {
353
- // Add to existing array, avoiding duplicates
354
- const existingHeaders = result.anthropic_beta as string[]
355
- const newHeaders = betaHeaders.filter(
356
- header => !existingHeaders.includes(header),
357
- )
358
- result.anthropic_beta = [...existingHeaders, ...newHeaders]
359
- } else {
360
- // Create new array with the beta headers
361
- result.anthropic_beta = betaHeaders
362
- }
363
- }
364
-
365
- return result
366
- }
367
-
368
- export function getPromptCachingEnabled(model: string): boolean {
369
- // Global disable takes precedence
370
- if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING)) return false
371
-
372
- // Check if we should disable for small/fast model
373
- if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING_HAIKU)) {
374
- const smallFastModel = getSmallFastModel()
375
- if (model === smallFastModel) return false
376
- }
377
-
378
- // Check if we should disable for default Sonnet
379
- if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING_SONNET)) {
380
- const defaultSonnet = getDefaultSonnetModel()
381
- if (model === defaultSonnet) return false
382
- }
383
-
384
- // Check if we should disable for default Opus
385
- if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING_OPUS)) {
386
- const defaultOpus = getDefaultOpusModel()
387
- if (model === defaultOpus) return false
388
- }
389
-
390
- return true
391
- }
392
-
393
- export function getCacheControl({
394
- scope,
395
- querySource,
396
- }: {
397
- scope?: CacheScope
398
- querySource?: QuerySource
399
- } = {}): {
400
- type: 'ephemeral'
401
- ttl?: '1h'
402
- scope?: CacheScope
403
- } {
404
- return {
405
- type: 'ephemeral',
406
- ...(should1hCacheTTL(querySource) && { ttl: '1h' }),
407
- ...(scope === 'global' && { scope }),
408
- }
409
- }
410
-
411
- /**
412
- * Determines if 1h TTL should be used for prompt caching.
413
- *
414
- * Only applied when:
415
- * 1. User is eligible (ant or subscriber within rate limits)
416
- * 2. The query source matches a pattern in the GrowthBook allowlist
417
- *
418
- * GrowthBook config shape: { allowlist: string[] }
419
- * Patterns support trailing '*' for prefix matching.
420
- * Examples:
421
- * - { allowlist: ["repl_main_thread*", "sdk"] } — main thread + SDK only
422
- * - { allowlist: ["repl_main_thread*", "sdk", "agent:*"] } — also subagents
423
- * - { allowlist: ["*"] } — all sources
424
- *
425
- * The allowlist is cached in STATE for session stability — prevents mixed
426
- * TTLs when GrowthBook's disk cache updates mid-request.
427
- */
428
- function should1hCacheTTL(querySource?: QuerySource): boolean {
429
- // 3P Bedrock users get 1h TTL when opted in via env var — they manage their own billing
430
- // No GrowthBook gating needed since 3P users don't have GrowthBook configured
431
- if (
432
- getAPIProvider() === 'bedrock' &&
433
- isEnvTruthy(process.env.ENABLE_PROMPT_CACHING_1H_BEDROCK)
434
- ) {
435
- return true
436
- }
437
-
438
- // Latch eligibility in bootstrap state for session stability — prevents
439
- // mid-session overage flips from changing the cache_control TTL, which
440
- // would bust the server-side prompt cache (~20K tokens per flip).
441
- let userEligible = getPromptCache1hEligible()
442
- if (userEligible === null) {
443
- userEligible =
444
- process.env.USER_TYPE === 'ant' ||
445
- (isClaudeAISubscriber() && !currentLimits.isUsingOverage)
446
- setPromptCache1hEligible(userEligible)
447
- }
448
- if (!userEligible) return false
449
-
450
- // Cache allowlist in bootstrap state for session stability — prevents mixed
451
- // TTLs when GrowthBook's disk cache updates mid-request
452
- let allowlist = getPromptCache1hAllowlist()
453
- if (allowlist === null) {
454
- const config = getFeatureValue_CACHED_MAY_BE_STALE<{
455
- allowlist?: string[]
456
- }>('tengu_prompt_cache_1h_config', {})
457
- allowlist = config.allowlist ?? []
458
- setPromptCache1hAllowlist(allowlist)
459
- }
460
-
461
- return (
462
- querySource !== undefined &&
463
- allowlist.some(pattern =>
464
- pattern.endsWith('*')
465
- ? querySource.startsWith(pattern.slice(0, -1))
466
- : querySource === pattern,
467
- )
468
- )
469
- }
470
-
471
- /**
472
- * Configure effort parameters for API request.
473
- *
474
- */
475
- function configureEffortParams(
476
- effortValue: EffortValue | undefined,
477
- outputConfig: BetaOutputConfig,
478
- extraBodyParams: Record<string, unknown>,
479
- betas: string[],
480
- model: string,
481
- ): void {
482
- if (!modelSupportsEffort(model) || 'effort' in outputConfig) {
483
- return
484
- }
485
-
486
- if (effortValue === undefined) {
487
- betas.push(EFFORT_BETA_HEADER)
488
- } else if (typeof effortValue === 'string') {
489
- // Send string effort level as is
490
- outputConfig.effort = effortValue
491
- betas.push(EFFORT_BETA_HEADER)
492
- } else if (process.env.USER_TYPE === 'ant') {
493
- // Numeric effort override - ant-only (uses anthropic_internal)
494
- const existingInternal =
495
- (extraBodyParams.anthropic_internal as Record<string, unknown>) || {}
496
- extraBodyParams.anthropic_internal = {
497
- ...existingInternal,
498
- effort_override: effortValue,
499
- }
500
- }
501
- }
502
-
503
- // output_config.task_budget — API-side token budget awareness for the model.
504
- // Stainless SDK types don't yet include task_budget on BetaOutputConfig, so we
505
- // define the wire shape locally and cast. The API validates on receipt; see
506
- // api/api/schemas/messages/request/output_config.py:12-39 in the monorepo.
507
- // Beta: task-budgets-2026-03-13 (EAP, claude-strudel-eap only as of Mar 2026).
508
- type TaskBudgetParam = {
509
- type: 'tokens'
510
- total: number
511
- remaining?: number
512
- }
513
-
514
- export function configureTaskBudgetParams(
515
- taskBudget: Options['taskBudget'],
516
- outputConfig: BetaOutputConfig & { task_budget?: TaskBudgetParam },
517
- betas: string[],
518
- ): void {
519
- if (
520
- !taskBudget ||
521
- 'task_budget' in outputConfig ||
522
- !shouldIncludeFirstPartyOnlyBetas()
523
- ) {
524
- return
525
- }
526
- outputConfig.task_budget = {
527
- type: 'tokens',
528
- total: taskBudget.total,
529
- ...(taskBudget.remaining !== undefined && {
530
- remaining: taskBudget.remaining,
531
- }),
532
- }
533
- if (!betas.includes(TASK_BUDGETS_BETA_HEADER)) {
534
- betas.push(TASK_BUDGETS_BETA_HEADER)
535
- }
536
- }
537
-
538
- export function getAPIMetadata() {
539
- // https://docs.google.com/document/d/1dURO9ycXXQCBS0V4Vhl4poDBRgkelFc5t2BNPoEgH5Q/edit?tab=t.0#heading=h.5g7nec5b09w5
540
- let extra: JsonObject = {}
541
- const extraStr = process.env.CLAUDE_CODE_EXTRA_METADATA
542
- if (extraStr) {
543
- const parsed = safeParseJSON(extraStr, false)
544
- if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
545
- extra = parsed as JsonObject
546
- } else {
547
- logForDebugging(
548
- `CLAUDE_CODE_EXTRA_METADATA env var must be a JSON object, but was given ${extraStr}`,
549
- { level: 'error' },
550
- )
551
- }
552
- }
553
-
554
- return {
555
- user_id: jsonStringify({
556
- ...extra,
557
- device_id: getOrCreateUserID(),
558
- // Only include OAuth account UUID when actively using OAuth authentication
559
- account_uuid: getOauthAccountInfo()?.accountUuid ?? '',
560
- session_id: getSessionId(),
561
- }),
562
- }
563
- }
564
-
565
- export async function verifyApiKey(
566
- apiKey: string,
567
- isNonInteractiveSession: boolean,
568
- ): Promise<boolean> {
569
- // Skip API verification if running in print mode (isNonInteractiveSession)
570
- if (isNonInteractiveSession) {
571
- return true
572
- }
573
-
574
- try {
575
- // WARNING: if you change this to use a non-Haiku model, this request will fail in 1P unless it uses getCLISyspromptPrefix.
576
- const model = getSmallFastModel()
577
- const betas = getModelBetas(model)
578
- return await returnValue(
579
- withRetry(
580
- () =>
581
- getAnthropicClient({
582
- apiKey,
583
- maxRetries: 3,
584
- model,
585
- source: 'verify_api_key',
586
- }),
587
- async anthropic => {
588
- const messages: MessageParam[] = [{ role: 'user', content: 'test' }]
589
- // biome-ignore lint/plugin: API key verification is intentionally a minimal direct call
590
- await anthropic.beta.messages.create({
591
- model,
592
- max_tokens: 1,
593
- messages,
594
- temperature: 1,
595
- ...(betas.length > 0 && { betas }),
596
- metadata: getAPIMetadata(),
597
- ...getExtraBodyParams(),
598
- })
599
- return true
600
- },
601
- { maxRetries: 2, model, thinkingConfig: { type: 'disabled' } }, // Use fewer retries for API key verification
602
- ),
603
- )
604
- } catch (errorFromRetry) {
605
- let error = errorFromRetry
606
- if (errorFromRetry instanceof CannotRetryError) {
607
- error = errorFromRetry.originalError
608
- }
609
- logError(error)
610
- // Check for authentication error
611
- if (
612
- error instanceof Error &&
613
- error.message.includes(
614
- '{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}',
615
- )
616
- ) {
617
- return false
618
- }
619
- throw error
620
- }
621
- }
622
-
623
- export function userMessageToMessageParam(
624
- message: UserMessage,
625
- addCache = false,
626
- enablePromptCaching: boolean,
627
- querySource?: QuerySource,
628
- ): MessageParam {
629
- if (addCache) {
630
- if (typeof message.message.content === 'string') {
631
- return {
632
- role: 'user',
633
- content: [
634
- {
635
- type: 'text',
636
- text: message.message.content,
637
- ...(enablePromptCaching && {
638
- cache_control: getCacheControl({ querySource }),
639
- }),
640
- },
641
- ],
642
- }
643
- } else {
644
- return {
645
- role: 'user',
646
- content: message.message.content.map((_, i) => ({
647
- ..._,
648
- ...(i === message.message.content.length - 1
649
- ? enablePromptCaching
650
- ? { cache_control: getCacheControl({ querySource }) }
651
- : {}
652
- : {}),
653
- })),
654
- }
655
- }
656
- }
657
- // Clone array content to prevent in-place mutations (e.g., insertCacheEditsBlock's
658
- // splice) from contaminating the original message. Without cloning, multiple calls
659
- // to addCacheBreakpoints share the same array and each splices in duplicate cache_edits.
660
- return {
661
- role: 'user',
662
- content: Array.isArray(message.message.content)
663
- ? [...message.message.content]
664
- : message.message.content,
665
- }
666
- }
667
-
668
- export function assistantMessageToMessageParam(
669
- message: AssistantMessage,
670
- addCache = false,
671
- enablePromptCaching: boolean,
672
- querySource?: QuerySource,
673
- ): MessageParam {
674
- if (addCache) {
675
- if (typeof message.message.content === 'string') {
676
- return {
677
- role: 'assistant',
678
- content: [
679
- {
680
- type: 'text',
681
- text: message.message.content,
682
- ...(enablePromptCaching && {
683
- cache_control: getCacheControl({ querySource }),
684
- }),
685
- },
686
- ],
687
- }
688
- } else {
689
- return {
690
- role: 'assistant',
691
- content: message.message.content.map((_, i) => ({
692
- ..._,
693
- ...(i === message.message.content.length - 1 &&
694
- _.type !== 'thinking' &&
695
- _.type !== 'redacted_thinking' &&
696
- (feature('CONNECTOR_TEXT') ? !isConnectorTextBlock(_) : true)
697
- ? enablePromptCaching
698
- ? { cache_control: getCacheControl({ querySource }) }
699
- : {}
700
- : {}),
701
- })),
702
- }
703
- }
704
- }
705
- return {
706
- role: 'assistant',
707
- content: message.message.content,
708
- }
709
- }
710
-
711
- export type Options = {
712
- getToolPermissionContext: () => Promise<ToolPermissionContext>
713
- model: string
714
- toolChoice?: BetaToolChoiceTool | BetaToolChoiceAuto | undefined
715
- isNonInteractiveSession: boolean
716
- extraToolSchemas?: BetaToolUnion[]
717
- maxOutputTokensOverride?: number
718
- fallbackModel?: string
719
- onStreamingFallback?: () => void
720
- querySource: QuerySource
721
- agents: AgentDefinition[]
722
- allowedAgentTypes?: string[]
723
- hasAppendSystemPrompt: boolean
724
- fetchOverride?: ClientOptions['fetch']
725
- enablePromptCaching?: boolean
726
- skipCacheWrite?: boolean
727
- temperatureOverride?: number
728
- effortValue?: EffortValue
729
- reasoningMode?: ReasoningMode
730
- mcpTools: Tools
731
- hasPendingMcpServers?: boolean
732
- queryTracking?: QueryChainTracking
733
- agentId?: AgentId // Only set for subagents
734
- outputFormat?: BetaJSONOutputFormat
735
- fastMode?: boolean
736
- advisorModel?: string
737
- addNotification?: (notif: Notification) => void
738
- // API-side task budget (output_config.task_budget). Distinct from the
739
- // tokenBudget.ts +500k auto-continue feature — this one is sent to the API
740
- // so the model can pace itself. `remaining` is computed by the caller
741
- // (query.ts decrements across the agentic loop).
742
- taskBudget?: { total: number; remaining?: number }
743
- }
744
-
745
- export async function queryModelWithoutStreaming({
746
- messages,
747
- systemPrompt,
748
- thinkingConfig,
749
- tools,
750
- signal,
751
- options,
752
- }: {
753
- messages: Message[]
754
- systemPrompt: SystemPrompt
755
- thinkingConfig: ThinkingConfig
756
- tools: Tools
757
- signal: AbortSignal
758
- options: Options
759
- }): Promise<AssistantMessage> {
760
- // Store the assistant message but continue consuming the generator to ensure
761
- // logAPISuccessAndDuration gets called (which happens after all yields)
762
- let assistantMessage: AssistantMessage | undefined
763
- for await (const message of withStreamingVCR(messages, async function* () {
764
- yield* queryModel(
765
- messages,
766
- systemPrompt,
767
- thinkingConfig,
768
- tools,
769
- signal,
770
- options,
771
- )
772
- })) {
773
- if (message.type === 'assistant') {
774
- assistantMessage = message
775
- }
776
- }
777
- if (!assistantMessage) {
778
- // If the signal was aborted, throw APIUserAbortError instead of a generic error
779
- // This allows callers to handle abort scenarios gracefully
780
- if (signal.aborted) {
781
- throw new APIUserAbortError()
782
- }
783
- throw new Error('No assistant message found')
784
- }
785
- return assistantMessage
786
- }
787
-
788
- export async function* queryModelWithStreaming({
789
- messages,
790
- systemPrompt,
791
- thinkingConfig,
792
- tools,
793
- signal,
794
- options,
795
- }: {
796
- messages: Message[]
797
- systemPrompt: SystemPrompt
798
- thinkingConfig: ThinkingConfig
799
- tools: Tools
800
- signal: AbortSignal
801
- options: Options
802
- }): AsyncGenerator<
803
- StreamEvent | AssistantMessage | SystemAPIErrorMessage,
804
- void
805
- > {
806
- return yield* withStreamingVCR(messages, async function* () {
807
- yield* queryModel(
808
- messages,
809
- systemPrompt,
810
- thinkingConfig,
811
- tools,
812
- signal,
813
- options,
814
- )
815
- })
816
- }
817
-
818
- /**
819
- * Determines if an LSP tool should be deferred (tool appears with defer_loading: true)
820
- * because LSP initialization is not yet complete.
821
- */
822
- function shouldDeferLspTool(tool: Tool): boolean {
823
- if (!('isLsp' in tool) || !tool.isLsp) {
824
- return false
825
- }
826
- const status = getInitializationStatus()
827
- // Defer when pending or not started
828
- return status.status === 'pending' || status.status === 'not-started'
829
- }
830
-
831
- function latestUserTextForToolRetrieval(messages: Message[]): string {
832
- for (let i = messages.length - 1; i >= 0; i--) {
833
- const message = messages[i] as {
834
- type?: string
835
- message?: { content?: unknown }
836
- }
837
- if (message?.type !== 'user') continue
838
- const content = message.message?.content
839
- if (typeof content === 'string') {
840
- if (isNonSyntheticUserText(content)) return content
841
- continue
842
- }
843
- if (Array.isArray(content)) {
844
- const text = content
845
- .filter(
846
- (block): block is { type: string; text: string } =>
847
- block?.type === 'text' && typeof block.text === 'string',
848
- )
849
- .map(block => block.text)
850
- .join('')
851
- if (isNonSyntheticUserText(text)) return text
852
- }
853
- }
854
- return ''
855
- }
856
-
857
- /**
858
- * Per-attempt timeout for non-streaming fallback requests, in milliseconds.
859
- * Reads API_TIMEOUT_MS when set so slow backends and the streaming path
860
- * share the same ceiling.
861
- *
862
- * Remote sessions default to 120s to stay under CCR's container idle-kill
863
- * (~5min) so a hung fallback to a wedged backend surfaces a clean
864
- * APIConnectionTimeoutError instead of stalling past SIGKILL.
865
- *
866
- * Otherwise defaults to 300s — long enough for slow backends without
867
- * approaching the API's 10-minute non-streaming boundary.
868
- */
869
- function getNonstreamingFallbackTimeoutMs(): number {
870
- const override = parseInt(process.env.API_TIMEOUT_MS || '', 10)
871
- if (override) return override
872
- return isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) ? 120_000 : 300_000
873
- }
874
-
875
- /**
876
- * Helper generator for non-streaming API requests.
877
- * Encapsulates the common pattern of creating a withRetry generator,
878
- * iterating to yield system messages, and returning the final BetaMessage.
879
- */
880
- export async function* executeNonStreamingRequest(
881
- clientOptions: {
882
- model: string
883
- fetchOverride?: Options['fetchOverride']
884
- source: string
885
- },
886
- retryOptions: {
887
- model: string
888
- fallbackModel?: string
889
- thinkingConfig: ThinkingConfig
890
- fastMode?: boolean
891
- signal: AbortSignal
892
- initialConsecutive529Errors?: number
893
- querySource?: QuerySource
894
- },
895
- paramsFromContext: (context: RetryContext) => BetaMessageStreamParams,
896
- onAttempt: (attempt: number, start: number, maxOutputTokens: number) => void,
897
- captureRequest: (params: BetaMessageStreamParams) => void,
898
- /**
899
- * Request ID of the failed streaming attempt this fallback is recovering
900
- * from. Emitted in tengu_nonstreaming_fallback_error for funnel correlation.
901
- */
902
- originatingRequestId?: string | null,
903
- ): AsyncGenerator<SystemAPIErrorMessage, BetaMessage> {
904
- const fallbackTimeoutMs = getNonstreamingFallbackTimeoutMs()
905
- const generator = withRetry(
906
- () =>
907
- getAnthropicClient({
908
- maxRetries: 0,
909
- model: clientOptions.model,
910
- fetchOverride: clientOptions.fetchOverride,
911
- source: clientOptions.source,
912
- }),
913
- async (anthropic, attempt, context) => {
914
- const start = Date.now()
915
- const retryParams = paramsFromContext(context)
916
- captureRequest(retryParams)
917
- onAttempt(attempt, start, retryParams.max_tokens)
918
-
919
- const adjustedParams = adjustParamsForNonStreaming(
920
- retryParams,
921
- MAX_NON_STREAMING_TOKENS,
922
- )
923
-
924
- try {
925
- // biome-ignore lint/plugin: non-streaming API call
926
- return await anthropic.beta.messages.create(
927
- {
928
- ...adjustedParams,
929
- model: normalizeModelStringForAPI(adjustedParams.model),
930
- },
931
- {
932
- signal: retryOptions.signal,
933
- timeout: fallbackTimeoutMs,
934
- },
935
- )
936
- } catch (err) {
937
- // User aborts are not errors — re-throw immediately without logging
938
- if (err instanceof APIUserAbortError) throw err
939
-
940
- // Instrumentation: record when the non-streaming request errors (including
941
- // timeouts). Lets us distinguish "fallback hung past container kill"
942
- // (no event) from "fallback hit the bounded timeout" (this event).
943
- logForDiagnosticsNoPII('error', 'cli_nonstreaming_fallback_error')
944
- logEvent('tengu_nonstreaming_fallback_error', {
945
- model:
946
- clientOptions.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
947
- error:
948
- err instanceof Error
949
- ? (err.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
950
- : ('unknown' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
951
- attempt,
952
- timeout_ms: fallbackTimeoutMs,
953
- request_id: (originatingRequestId ??
954
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
955
- })
956
- throw err
957
- }
958
- },
959
- {
960
- model: retryOptions.model,
961
- fallbackModel: retryOptions.fallbackModel,
962
- thinkingConfig: retryOptions.thinkingConfig,
963
- ...(isFastModeEnabled() && { fastMode: retryOptions.fastMode }),
964
- signal: retryOptions.signal,
965
- initialConsecutive529Errors: retryOptions.initialConsecutive529Errors,
966
- querySource: retryOptions.querySource,
967
- },
968
- )
969
-
970
- let e
971
- do {
972
- e = await generator.next()
973
- if (!e.done && e.value.type === 'system') {
974
- yield e.value
975
- }
976
- } while (!e.done)
977
-
978
- return e.value as BetaMessage
979
- }
980
-
981
- /**
982
- * Extracts the request ID from the most recent assistant message in the
983
- * conversation. Used to link consecutive API requests in analytics so we can
984
- * join them for cache-hit-rate analysis and incremental token tracking.
985
- *
986
- * Deriving this from the message array (rather than global state) ensures each
987
- * query chain (main thread, subagent, teammate) tracks its own request chain
988
- * independently, and rollback/undo naturally updates the value.
989
- */
990
- function getPreviousRequestIdFromMessages(
991
- messages: Message[],
992
- ): string | undefined {
993
- for (let i = messages.length - 1; i >= 0; i--) {
994
- const msg = messages[i]!
995
- if (msg.type === 'assistant' && msg.requestId) {
996
- return msg.requestId
997
- }
998
- }
999
- return undefined
1000
- }
1001
-
1002
- function isMedia(
1003
- block: BetaContentBlockParam,
1004
- ): block is BetaImageBlockParam | BetaRequestDocumentBlock {
1005
- return block.type === 'image' || block.type === 'document'
1006
- }
1007
-
1008
- function isToolResult(
1009
- block: BetaContentBlockParam,
1010
- ): block is BetaToolResultBlockParam {
1011
- return block.type === 'tool_result'
1012
- }
1013
-
1014
- /**
1015
- * Ensures messages contain at most `limit` media items (images + documents).
1016
- * Strips oldest media first to preserve the most recent.
1017
- */
1018
- export function stripExcessMediaItems(
1019
- messages: (UserMessage | AssistantMessage)[],
1020
- limit: number,
1021
- ): (UserMessage | AssistantMessage)[] {
1022
- let toRemove = 0
1023
- for (const msg of messages) {
1024
- if (!Array.isArray(msg.message.content)) continue
1025
- for (const block of msg.message.content) {
1026
- if (isMedia(block)) toRemove++
1027
- if (isToolResult(block) && Array.isArray(block.content)) {
1028
- for (const nested of block.content) {
1029
- if (isMedia(nested)) toRemove++
1030
- }
1031
- }
1032
- }
1033
- }
1034
- toRemove -= limit
1035
- if (toRemove <= 0) return messages
1036
-
1037
- return messages.map(msg => {
1038
- if (toRemove <= 0) return msg
1039
- const content = msg.message.content
1040
- if (!Array.isArray(content)) return msg
1041
-
1042
- const before = toRemove
1043
- const stripped = content
1044
- .map(block => {
1045
- if (
1046
- toRemove <= 0 ||
1047
- !isToolResult(block) ||
1048
- !Array.isArray(block.content)
1049
- )
1050
- return block
1051
- const filtered = block.content.filter(n => {
1052
- if (toRemove > 0 && isMedia(n)) {
1053
- toRemove--
1054
- return false
1055
- }
1056
- return true
1057
- })
1058
- return filtered.length === block.content.length
1059
- ? block
1060
- : { ...block, content: filtered }
1061
- })
1062
- .filter(block => {
1063
- if (toRemove > 0 && isMedia(block)) {
1064
- toRemove--
1065
- return false
1066
- }
1067
- return true
1068
- })
1069
-
1070
- return before === toRemove
1071
- ? msg
1072
- : {
1073
- ...msg,
1074
- message: { ...msg.message, content: stripped },
1075
- }
1076
- }) as (UserMessage | AssistantMessage)[]
1077
- }
1078
-
1079
- async function* queryModel(
1080
- messages: Message[],
1081
- systemPrompt: SystemPrompt,
1082
- thinkingConfig: ThinkingConfig,
1083
- tools: Tools,
1084
- signal: AbortSignal,
1085
- options: Options,
1086
- ): AsyncGenerator<
1087
- StreamEvent | AssistantMessage | SystemAPIErrorMessage,
1088
- void
1089
- > {
1090
- // Check cheap conditions first — the off-switch await blocks on GrowthBook
1091
- // init (~10ms). For non-Opus models (haiku, sonnet) this skips the await
1092
- // entirely. Subscribers don't hit this path at all.
1093
- if (
1094
- !isClaudeAISubscriber() &&
1095
- isNonCustomOpusModel(options.model) &&
1096
- (
1097
- await getDynamicConfig_BLOCKS_ON_INIT<{ activated: boolean }>(
1098
- 'tengu-off-switch',
1099
- {
1100
- activated: false,
1101
- },
1102
- )
1103
- ).activated
1104
- ) {
1105
- logEvent('tengu_off_switch_query', {})
1106
- yield getAssistantMessageFromError(
1107
- new Error(CUSTOM_OFF_SWITCH_MESSAGE),
1108
- options.model,
1109
- )
1110
- return
1111
- }
1112
-
1113
- // Derive previous request ID from the last assistant message in this query chain.
1114
- // This is scoped per message array (main thread, subagent, teammate each have their own),
1115
- // so concurrent agents don't clobber each other's request chain tracking.
1116
- // Also naturally handles rollback/undo since removed messages won't be in the array.
1117
- const previousRequestId = getPreviousRequestIdFromMessages(messages)
1118
-
1119
- const resolvedModel =
1120
- getAPIProvider() === 'bedrock' &&
1121
- options.model.includes('application-inference-profile')
1122
- ? ((await getInferenceProfileBackingModel(options.model)) ??
1123
- options.model)
1124
- : options.model
1125
-
1126
- queryCheckpoint('query_tool_schema_build_start')
1127
- const isAgenticQuery =
1128
- options.querySource.startsWith('repl_main_thread') ||
1129
- options.querySource.startsWith('agent:') ||
1130
- options.querySource === 'sdk' ||
1131
- options.querySource === 'hook_agent' ||
1132
- options.querySource === 'verification_agent'
1133
- const betas = getMergedBetas(options.model, { isAgenticQuery })
1134
-
1135
- // Always send the advisor beta header when advisor is enabled, so
1136
- // non-agentic queries (compact, side_question, extract_memories, etc.)
1137
- // can parse advisor server_tool_use blocks already in the conversation history.
1138
- if (isAdvisorEnabled()) {
1139
- betas.push(ADVISOR_BETA_HEADER)
1140
- }
1141
-
1142
- let advisorModel: string | undefined
1143
- if (isAgenticQuery && isAdvisorEnabled()) {
1144
- let advisorOption = options.advisorModel
1145
-
1146
- const advisorExperiment = getExperimentAdvisorModels()
1147
- if (advisorExperiment !== undefined) {
1148
- if (
1149
- normalizeModelStringForAPI(advisorExperiment.baseModel) ===
1150
- normalizeModelStringForAPI(options.model)
1151
- ) {
1152
- // Override the advisor model if the base model matches. We
1153
- // should only have experiment models if the user cannot
1154
- // configure it themselves.
1155
- advisorOption = advisorExperiment.advisorModel
1156
- }
1157
- }
1158
-
1159
- if (advisorOption) {
1160
- const normalizedAdvisorModel = normalizeModelStringForAPI(
1161
- parseUserSpecifiedModel(advisorOption),
1162
- )
1163
- if (!modelSupportsAdvisor(options.model)) {
1164
- logForDebugging(
1165
- `[AdvisorTool] Skipping advisor - base model ${options.model} does not support advisor`,
1166
- )
1167
- } else if (!isValidAdvisorModel(normalizedAdvisorModel)) {
1168
- logForDebugging(
1169
- `[AdvisorTool] Skipping advisor - ${normalizedAdvisorModel} is not a valid advisor model`,
1170
- )
1171
- } else {
1172
- advisorModel = normalizedAdvisorModel
1173
- logForDebugging(
1174
- `[AdvisorTool] Server-side tool enabled with ${advisorModel} as the advisor model`,
1175
- )
1176
- }
1177
- }
1178
- }
1179
-
1180
- // Check if tool search is enabled (checks mode, model support, and threshold for auto mode)
1181
- // This is async because it may need to calculate MCP tool description sizes for TstAuto mode
1182
- let useToolSearch = await isToolSearchEnabled(
1183
- options.model,
1184
- tools,
1185
- options.getToolPermissionContext,
1186
- options.agents,
1187
- 'query',
1188
- )
1189
-
1190
- const turnLocalAdapterToolNames = new Set(
1191
- selectTopKAdapterToolNamesForQuery(
1192
- latestUserTextForToolRetrieval(messages),
1193
- ),
1194
- )
1195
- if (options.toolChoice?.type === 'tool') {
1196
- turnLocalAdapterToolNames.add(options.toolChoice.name)
1197
- }
1198
- if (turnLocalAdapterToolNames.size > 0) {
1199
- logForDebugging(
1200
- `UMMAYA turn-local adapter schemas: ${[...turnLocalAdapterToolNames].join(', ')}`,
1201
- )
1202
- }
1203
- const requestTools =
1204
- turnLocalAdapterToolNames.size === 0
1205
- ? tools
1206
- : [
1207
- ...tools,
1208
- ...[...turnLocalAdapterToolNames]
1209
- .filter(toolName => !tools.some(tool => tool.name === toolName))
1210
- .map(toolName => getAdapterToolByName(toolName))
1211
- .filter((tool): tool is NonNullable<typeof tool> => Boolean(tool)),
1212
- ]
1213
-
1214
- // Precompute once — isDeferredTool does 2 GrowthBook lookups per call.
1215
- // Include turn-local synced adapters even if the long-lived TUI tool pool
1216
- // was assembled before the latest backend manifest frame arrived.
1217
- const deferredToolNames = new Set<string>()
1218
- if (useToolSearch) {
1219
- for (const t of requestTools) {
1220
- if (isDeferredTool(t)) deferredToolNames.add(t.name)
1221
- }
1222
- }
1223
-
1224
- // Even if tool search mode is enabled, skip if there are no deferred tools
1225
- // AND no MCP servers are still connecting. When servers are pending, keep
1226
- // ToolSearch available so the model can discover tools after they connect.
1227
- if (
1228
- useToolSearch &&
1229
- deferredToolNames.size === 0 &&
1230
- !options.hasPendingMcpServers
1231
- ) {
1232
- logForDebugging(
1233
- 'Tool search disabled: no deferred tools available to search',
1234
- )
1235
- useToolSearch = false
1236
- }
1237
- const suppressUmmayaToolCalls =
1238
- shouldSuppressUmmayaToolCallsForAnswerSynthesis({ messages, tools: requestTools })
1239
- if (suppressUmmayaToolCalls) {
1240
- logForDebugging('UMMAYA suppressing tool schemas for answer synthesis')
1241
- }
1242
-
1243
- // Filter out ToolSearchTool if tool search is not enabled for this model
1244
- // ToolSearchTool returns tool_reference blocks which unsupported models can't handle
1245
- let filteredTools: Tools
1246
-
1247
- if (suppressUmmayaToolCalls) {
1248
- filteredTools = []
1249
- } else if (useToolSearch) {
1250
- // Dynamic tool loading: Only include deferred tools that have been discovered
1251
- // via tool_reference blocks in the message history. This eliminates the need
1252
- // to predeclare all deferred tools upfront and removes limits on tool quantity.
1253
- const discoveredToolNames = extractDiscoveredToolNames(messages)
1254
-
1255
- filteredTools = requestTools.filter(tool => {
1256
- // 0.2.1 exposed the lightweight root primitives together with concrete
1257
- // adapter schemas. Keep that surface so K-EXAONE preserves CC-style
1258
- // prose→tool→prose loop painting, while still limiting concrete adapter
1259
- // schemas to the turn-local top-k set.
1260
- if (turnLocalAdapterToolNames.has(tool.name)) return true
1261
- // Always include non-deferred tools
1262
- if (!deferredToolNames.has(tool.name)) return true
1263
- // Always include ToolSearchTool (so it can discover more tools)
1264
- if (toolMatchesName(tool, TOOL_SEARCH_TOOL_NAME)) return true
1265
- // Only include deferred tools that have been discovered
1266
- return discoveredToolNames.has(tool.name)
1267
- })
1268
- } else {
1269
- filteredTools = requestTools.filter(t => {
1270
- if (toolMatchesName(t, TOOL_SEARCH_TOOL_NAME)) return false
1271
- // Keep non-deferred root primitives even when concrete top-k adapter
1272
- // schemas are available; this matches the released 0.2.1 loop surface.
1273
- if (isDeferredTool(t)) return turnLocalAdapterToolNames.has(t.name)
1274
- return true
1275
- })
1276
- }
1277
-
1278
- // Add tool search beta header if enabled - required for defer_loading to be accepted
1279
- // Header differs by provider: 1P/Foundry use advanced-tool-use, Vertex/Bedrock use tool-search-tool
1280
- // For Bedrock, this header must go in extraBodyParams, not the betas array
1281
- const toolSearchHeader = useToolSearch ? getToolSearchBetaHeader() : null
1282
- if (toolSearchHeader && getAPIProvider() !== 'bedrock') {
1283
- if (!betas.includes(toolSearchHeader)) {
1284
- betas.push(toolSearchHeader)
1285
- }
1286
- }
1287
-
1288
- // Determine if cached microcompact is enabled for this model.
1289
- // Computed once here (in async context) and captured by paramsFromContext.
1290
- // The beta header is also captured here to avoid a top-level import of the
1291
- // ant-only CACHE_EDITING_BETA_HEADER constant.
1292
- let cachedMCEnabled = false
1293
- let cacheEditingBetaHeader = ''
1294
- if (feature('CACHED_MICROCOMPACT')) {
1295
- const {
1296
- isCachedMicrocompactEnabled,
1297
- isModelSupportedForCacheEditing,
1298
- getCachedMCConfig,
1299
- } = await import('../compact/cachedMicrocompact.js')
1300
- const betas = await import('src/constants/betas.js')
1301
- cacheEditingBetaHeader = betas.CACHE_EDITING_BETA_HEADER
1302
- const featureEnabled = isCachedMicrocompactEnabled()
1303
- const modelSupported = isModelSupportedForCacheEditing(options.model)
1304
- cachedMCEnabled = featureEnabled && modelSupported
1305
- const config = getCachedMCConfig()
1306
- logForDebugging(
1307
- `Cached MC gate: enabled=${featureEnabled} modelSupported=${modelSupported} model=${options.model} supportedModels=${jsonStringify(config.supportedModels)}`,
1308
- )
1309
- }
1310
-
1311
- const useGlobalCacheFeature = shouldUseGlobalCacheScope()
1312
- const willDefer = (t: Tool) =>
1313
- useToolSearch && (deferredToolNames.has(t.name) || shouldDeferLspTool(t))
1314
- // MCP tools are per-user → dynamic tool section → can't globally cache.
1315
- // Only gate when an MCP tool will actually render (not defer_loading).
1316
- const needsToolBasedCacheMarker =
1317
- useGlobalCacheFeature &&
1318
- filteredTools.some(t => t.isMcp === true && !willDefer(t))
1319
-
1320
- // Ensure prompt_caching_scope beta header is present when global cache is enabled.
1321
- if (
1322
- useGlobalCacheFeature &&
1323
- !betas.includes(PROMPT_CACHING_SCOPE_BETA_HEADER)
1324
- ) {
1325
- betas.push(PROMPT_CACHING_SCOPE_BETA_HEADER)
1326
- }
1327
-
1328
- // Determine global cache strategy for logging
1329
- const globalCacheStrategy: GlobalCacheStrategy = useGlobalCacheFeature
1330
- ? needsToolBasedCacheMarker
1331
- ? 'none'
1332
- : 'system_prompt'
1333
- : 'none'
1334
-
1335
- // Build tool schemas, adding defer_loading for MCP tools when tool search is enabled
1336
- // Note: We pass the full `tools` list (not filteredTools) to toolToAPISchema so that
1337
- // ToolSearchTool's prompt can list ALL available MCP tools. The filtering only affects
1338
- // which tools are actually sent to the API, not what the model sees in tool descriptions.
1339
- const toolSchemas = await Promise.all(
1340
- filteredTools.map(tool =>
1341
- toolToAPISchema(tool, {
1342
- getToolPermissionContext: options.getToolPermissionContext,
1343
- tools,
1344
- agents: options.agents,
1345
- allowedAgentTypes: options.allowedAgentTypes,
1346
- model: options.model,
1347
- deferLoading: willDefer(tool),
1348
- }),
1349
- ),
1350
- )
1351
-
1352
- if (useToolSearch) {
1353
- const includedDeferredTools = count(filteredTools, t =>
1354
- deferredToolNames.has(t.name),
1355
- )
1356
- logForDebugging(
1357
- `Dynamic tool loading: ${includedDeferredTools}/${deferredToolNames.size} deferred tools included`,
1358
- )
1359
- }
1360
-
1361
- queryCheckpoint('query_tool_schema_build_end')
1362
-
1363
- // Normalize messages before building system prompt (needed for fingerprinting)
1364
- // Instrumentation: Track message count before normalization
1365
- logEvent('tengu_api_before_normalize', {
1366
- preNormalizedMessageCount: messages.length,
1367
- })
1368
-
1369
- queryCheckpoint('query_message_normalization_start')
1370
- let messagesForAPI = normalizeMessagesForAPI(messages, filteredTools)
1371
- queryCheckpoint('query_message_normalization_end')
1372
-
1373
- // Model-specific post-processing: strip tool-search-specific fields if the
1374
- // selected model doesn't support tool search.
1375
- //
1376
- // Why is this needed in addition to normalizeMessagesForAPI?
1377
- // - normalizeMessagesForAPI uses isToolSearchEnabledNoModelCheck() because it's
1378
- // called from ~20 places (analytics, feedback, sharing, etc.), many of which
1379
- // don't have model context. Adding model to its signature would be a large refactor.
1380
- // - This post-processing uses the model-aware isToolSearchEnabled() check
1381
- // - This handles mid-conversation model switching (e.g., Sonnet → Haiku) where
1382
- // stale tool-search fields from the previous model would cause 400 errors
1383
- //
1384
- // Note: For assistant messages, normalizeMessagesForAPI already normalized the
1385
- // tool inputs, so stripCallerFieldFromAssistantMessage only needs to remove the
1386
- // 'caller' field (not re-normalize inputs).
1387
- if (!useToolSearch) {
1388
- messagesForAPI = messagesForAPI.map(msg => {
1389
- switch (msg.type) {
1390
- case 'user':
1391
- // Strip tool_reference blocks from tool_result content
1392
- return stripToolReferenceBlocksFromUserMessage(msg)
1393
- case 'assistant':
1394
- // Strip 'caller' field from tool_use blocks
1395
- return stripCallerFieldFromAssistantMessage(msg)
1396
- default:
1397
- return msg
1398
- }
1399
- })
1400
- }
1401
-
1402
- // Repair tool_use/tool_result pairing mismatches that can occur when resuming
1403
- // remote/teleport sessions. Inserts synthetic error tool_results for orphaned
1404
- // tool_uses and strips orphaned tool_results referencing non-existent tool_uses.
1405
- messagesForAPI = ensureToolResultPairing(messagesForAPI)
1406
-
1407
- // Strip advisor blocks — the API rejects them without the beta header.
1408
- if (!betas.includes(ADVISOR_BETA_HEADER)) {
1409
- messagesForAPI = stripAdvisorBlocks(messagesForAPI)
1410
- }
1411
-
1412
- // Strip excess media items before making the API call.
1413
- // The API rejects requests with >100 media items but returns a confusing error.
1414
- // Rather than erroring (which is hard to recover from in Cowork/CCD), we
1415
- // silently drop the oldest media items to stay within the limit.
1416
- messagesForAPI = stripExcessMediaItems(
1417
- messagesForAPI,
1418
- API_MAX_MEDIA_PER_REQUEST,
1419
- )
1420
-
1421
- // Instrumentation: Track message count after normalization
1422
- logEvent('tengu_api_after_normalize', {
1423
- postNormalizedMessageCount: messagesForAPI.length,
1424
- })
1425
-
1426
- // Compute fingerprint from first user message for attribution.
1427
- // Must run BEFORE injecting synthetic messages (e.g. deferred tool names)
1428
- // so the fingerprint reflects the actual user input.
1429
- const fingerprint = computeFingerprintFromMessages(messagesForAPI)
1430
-
1431
- // When the delta attachment is enabled, deferred tools are announced
1432
- // via persisted deferred_tools_delta attachments instead of this
1433
- // ephemeral prepend (which busts cache whenever the pool changes).
1434
- if (useToolSearch && !isDeferredToolsDeltaEnabled()) {
1435
- const deferredToolList = tools
1436
- .filter(t => deferredToolNames.has(t.name))
1437
- .map(formatDeferredToolLine)
1438
- .sort()
1439
- .join('\n')
1440
- if (deferredToolList) {
1441
- messagesForAPI = [
1442
- createUserMessage({
1443
- content: `<available-deferred-tools>\n${deferredToolList}\n</available-deferred-tools>`,
1444
- isMeta: true,
1445
- }),
1446
- ...messagesForAPI,
1447
- ]
1448
- }
1449
- }
1450
-
1451
- // Chrome tool-search instructions: when the delta attachment is enabled,
1452
- // these are carried as a client-side block in mcp_instructions_delta
1453
- // (attachments.ts) instead of here. This per-request sys-prompt append
1454
- // busts the prompt cache when chrome connects late.
1455
- const hasChromeTools = filteredTools.some(t =>
1456
- isToolFromMcpServer(t.name, CLAUDE_IN_CHROME_MCP_SERVER_NAME),
1457
- )
1458
- const injectChromeHere =
1459
- useToolSearch && hasChromeTools && !isMcpInstructionsDeltaEnabled()
1460
-
1461
- // filter(Boolean) works by converting each element to a boolean - empty strings become false and are filtered out.
1462
- systemPrompt = asSystemPrompt(
1463
- [
1464
- getAttributionHeader(fingerprint),
1465
- getCLISyspromptPrefix({
1466
- isNonInteractive: options.isNonInteractiveSession,
1467
- hasAppendSystemPrompt: options.hasAppendSystemPrompt,
1468
- }),
1469
- ...systemPrompt,
1470
- ...(advisorModel ? [ADVISOR_TOOL_INSTRUCTIONS] : []),
1471
- ...(injectChromeHere ? [CHROME_TOOL_SEARCH_INSTRUCTIONS] : []),
1472
- ].filter(Boolean),
1473
- )
1474
-
1475
- // Prepend system prompt block for easy API identification
1476
- logAPIPrefix(systemPrompt)
1477
-
1478
- const enablePromptCaching =
1479
- options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
1480
- const system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
1481
- skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
1482
- querySource: options.querySource,
1483
- })
1484
- const useBetas = betas.length > 0
1485
-
1486
- // Build minimal context for detailed tracing (when beta tracing is enabled)
1487
- // Note: The actual new_context message extraction is done in sessionTracing.ts using
1488
- // hash-based tracking per querySource (agent) from the messagesForAPI array
1489
- const extraToolSchemas = [...(options.extraToolSchemas ?? [])]
1490
- if (advisorModel) {
1491
- // Server tools must be in the tools array by API contract. Appended after
1492
- // toolSchemas (which carries the cache_control marker) so toggling /advisor
1493
- // only churns the small suffix, not the cached prefix.
1494
- extraToolSchemas.push({
1495
- type: 'advisor_20260301',
1496
- name: 'advisor',
1497
- model: advisorModel,
1498
- } as unknown as BetaToolUnion)
1499
- }
1500
- const allTools = [...toolSchemas, ...extraToolSchemas]
1501
-
1502
- const isFastMode =
1503
- isFastModeEnabled() &&
1504
- isFastModeAvailable() &&
1505
- !isFastModeCooldown() &&
1506
- isFastModeSupportedByModel(options.model) &&
1507
- !!options.fastMode
1508
-
1509
- // Sticky-on latches for dynamic beta headers. Each header, once first
1510
- // sent, keeps being sent for the rest of the session so mid-session
1511
- // toggles don't change the server-side cache key and bust ~50-70K tokens.
1512
- // Latches are cleared on /clear and /compact via clearBetaHeaderLatches().
1513
- // Per-call gates (isAgenticQuery, querySource===repl_main_thread) stay
1514
- // per-call so non-agentic queries keep their own stable header set.
1515
-
1516
- let afkHeaderLatched = getAfkModeHeaderLatched() === true
1517
- if (feature('TRANSCRIPT_CLASSIFIER')) {
1518
- if (
1519
- !afkHeaderLatched &&
1520
- isAgenticQuery &&
1521
- shouldIncludeFirstPartyOnlyBetas() &&
1522
- (autoModeStateModule?.isAutoModeActive() ?? false)
1523
- ) {
1524
- afkHeaderLatched = true
1525
- setAfkModeHeaderLatched(true)
1526
- }
1527
- }
1528
-
1529
- let fastModeHeaderLatched = getFastModeHeaderLatched() === true
1530
- if (!fastModeHeaderLatched && isFastMode) {
1531
- fastModeHeaderLatched = true
1532
- setFastModeHeaderLatched(true)
1533
- }
1534
-
1535
- let cacheEditingHeaderLatched = getCacheEditingHeaderLatched() === true
1536
- if (feature('CACHED_MICROCOMPACT')) {
1537
- if (
1538
- !cacheEditingHeaderLatched &&
1539
- cachedMCEnabled &&
1540
- getAPIProvider() === 'firstParty' &&
1541
- options.querySource === 'repl_main_thread'
1542
- ) {
1543
- cacheEditingHeaderLatched = true
1544
- setCacheEditingHeaderLatched(true)
1545
- }
1546
- }
1547
-
1548
- // Only latch from agentic queries so a classifier call doesn't flip the
1549
- // main thread's context_management mid-turn.
1550
- let thinkingClearLatched = getThinkingClearLatched() === true
1551
- if (!thinkingClearLatched && isAgenticQuery) {
1552
- const lastCompletion = getLastApiCompletionTimestamp()
1553
- if (
1554
- lastCompletion !== null &&
1555
- Date.now() - lastCompletion > CACHE_TTL_1HOUR_MS
1556
- ) {
1557
- thinkingClearLatched = true
1558
- setThinkingClearLatched(true)
1559
- }
1560
- }
1561
-
1562
- const effort = resolveAppliedEffort(options.model, options.effortValue)
1563
-
1564
- if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
1565
- // Exclude defer_loading tools from the hash -- the API strips them from the
1566
- // prompt, so they never affect the actual cache key. Including them creates
1567
- // false-positive "tool schemas changed" breaks when tools are discovered or
1568
- // MCP servers reconnect.
1569
- const toolsForCacheDetection = allTools.filter(
1570
- t => !('defer_loading' in t && t.defer_loading),
1571
- )
1572
- // Capture everything that could affect the server-side cache key.
1573
- // Pass latched header values (not live state) so break detection
1574
- // reflects what we actually send, not what the user toggled.
1575
- recordPromptState({
1576
- system,
1577
- toolSchemas: toolsForCacheDetection,
1578
- querySource: options.querySource,
1579
- model: options.model,
1580
- agentId: options.agentId,
1581
- fastMode: fastModeHeaderLatched,
1582
- globalCacheStrategy,
1583
- betas,
1584
- autoModeActive: afkHeaderLatched,
1585
- isUsingOverage: currentLimits.isUsingOverage ?? false,
1586
- cachedMCEnabled: cacheEditingHeaderLatched,
1587
- effortValue: effort,
1588
- extraBodyParams: getExtraBodyParams(),
1589
- })
1590
- }
1591
-
1592
- const newContext: LLMRequestNewContext | undefined = isBetaTracingEnabled()
1593
- ? {
1594
- systemPrompt: systemPrompt.join('\n\n'),
1595
- querySource: options.querySource,
1596
- tools: jsonStringify(allTools),
1597
- }
1598
- : undefined
1599
-
1600
- // Capture the span so we can pass it to endLLMRequestSpan later
1601
- // This ensures responses are matched to the correct request when multiple requests run in parallel
1602
- const llmSpan = startLLMRequestSpan(
1603
- options.model,
1604
- newContext,
1605
- messagesForAPI,
1606
- isFastMode,
1607
- )
1608
-
1609
- const startIncludingRetries = Date.now()
1610
- let start = Date.now()
1611
- let attemptNumber = 0
1612
- const attemptStartTimes: number[] = []
1613
- let stream: Stream<BetaRawMessageStreamEvent> | undefined = undefined
1614
- let streamRequestId: string | null | undefined = undefined
1615
- let clientRequestId: string | undefined = undefined
1616
- // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins -- Response is available in Node 18+ and is used by the SDK
1617
- let streamResponse: Response | undefined = undefined
1618
-
1619
- // Release all stream resources to prevent native memory leaks.
1620
- // The Response object holds native TLS/socket buffers that live outside the
1621
- // V8 heap (observed on the Node.js/npm path; see GH #32920), so we must
1622
- // explicitly cancel and release it regardless of how the generator exits.
1623
- function releaseStreamResources(): void {
1624
- cleanupStream(stream)
1625
- stream = undefined
1626
- if (streamResponse) {
1627
- streamResponse.body?.cancel().catch(() => {})
1628
- streamResponse = undefined
1629
- }
1630
- }
1631
-
1632
- // Consume pending cache edits ONCE before paramsFromContext is defined.
1633
- // paramsFromContext is called multiple times (logging, retries), so consuming
1634
- // inside it would cause the first call to steal edits from subsequent calls.
1635
- const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
1636
- const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []
1637
-
1638
- // Capture the betas sent in the last API request, including the ones that
1639
- // were dynamically added, so we can log and send it to telemetry.
1640
- let lastRequestBetas: string[] | undefined
1641
-
1642
- const paramsFromContext = (retryContext: RetryContext) => {
1643
- const betasParams = [...betas]
1644
-
1645
- // Append 1M beta dynamically for the Sonnet 1M experiment.
1646
- if (
1647
- !betasParams.includes(CONTEXT_1M_BETA_HEADER) &&
1648
- getSonnet1mExpTreatmentEnabled(retryContext.model)
1649
- ) {
1650
- betasParams.push(CONTEXT_1M_BETA_HEADER)
1651
- }
1652
-
1653
- // For Bedrock, include both model-based betas and dynamically-added tool search header
1654
- const bedrockBetas =
1655
- getAPIProvider() === 'bedrock'
1656
- ? [
1657
- ...getBedrockExtraBodyParamsBetas(retryContext.model),
1658
- ...(toolSearchHeader ? [toolSearchHeader] : []),
1659
- ]
1660
- : []
1661
- const extraBodyParams = getExtraBodyParams(bedrockBetas)
1662
-
1663
- const outputConfig: BetaOutputConfig = {
1664
- ...((extraBodyParams.output_config as BetaOutputConfig) ?? {}),
1665
- }
1666
-
1667
- configureEffortParams(
1668
- effort,
1669
- outputConfig,
1670
- extraBodyParams,
1671
- betasParams,
1672
- options.model,
1673
- )
1674
-
1675
- configureTaskBudgetParams(
1676
- options.taskBudget,
1677
- outputConfig as BetaOutputConfig & { task_budget?: TaskBudgetParam },
1678
- betasParams,
1679
- )
1680
-
1681
- // Merge outputFormat into extraBodyParams.output_config alongside effort
1682
- // Requires structured-outputs beta header per SDK (see parse() in messages.mjs)
1683
- if (options.outputFormat && !('format' in outputConfig)) {
1684
- outputConfig.format = options.outputFormat as BetaJSONOutputFormat
1685
- // Add beta header if not already present and provider supports it
1686
- if (
1687
- modelSupportsStructuredOutputs(options.model) &&
1688
- !betasParams.includes(STRUCTURED_OUTPUTS_BETA_HEADER)
1689
- ) {
1690
- betasParams.push(STRUCTURED_OUTPUTS_BETA_HEADER)
1691
- }
1692
- }
1693
-
1694
- // Retry context gets preference because it tries to course correct if we exceed the context window limit
1695
- const maxOutputTokens =
1696
- retryContext?.maxTokensOverride ||
1697
- options.maxOutputTokensOverride ||
1698
- getMaxOutputTokensForModel(options.model)
1699
-
1700
- const hasThinking =
1701
- thinkingConfig.type !== 'disabled' &&
1702
- !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_THINKING)
1703
- let thinking: BetaMessageStreamParams['thinking'] | undefined = undefined
1704
-
1705
- // IMPORTANT: Do not change the adaptive-vs-budget thinking selection below
1706
- // without notifying the model launch DRI and research. This is a sensitive
1707
- // setting that can greatly affect model quality and bashing.
1708
- if (hasThinking && modelSupportsThinking(options.model)) {
1709
- if (
1710
- !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING) &&
1711
- modelSupportsAdaptiveThinking(options.model)
1712
- ) {
1713
- // For models that support adaptive thinking, always use adaptive
1714
- // thinking without a budget.
1715
- thinking = {
1716
- type: 'adaptive',
1717
- } satisfies BetaMessageStreamParams['thinking']
1718
- } else {
1719
- // For models that do not support adaptive thinking, use the default
1720
- // thinking budget unless explicitly specified.
1721
- let thinkingBudget = getMaxThinkingTokensForModel(options.model)
1722
- if (
1723
- thinkingConfig.type === 'enabled' &&
1724
- thinkingConfig.budgetTokens !== undefined
1725
- ) {
1726
- thinkingBudget = thinkingConfig.budgetTokens
1727
- }
1728
- thinkingBudget = Math.min(maxOutputTokens - 1, thinkingBudget)
1729
- thinking = {
1730
- budget_tokens: thinkingBudget,
1731
- type: 'enabled',
1732
- } satisfies BetaMessageStreamParams['thinking']
1733
- }
1734
- }
1735
-
1736
- // Get API context management strategies if enabled
1737
- const contextManagement = getAPIContextManagement({
1738
- hasThinking,
1739
- isRedactThinkingActive: betasParams.includes(REDACT_THINKING_BETA_HEADER),
1740
- clearAllThinking: thinkingClearLatched,
1741
- })
1742
-
1743
- const enablePromptCaching =
1744
- options.enablePromptCaching ?? getPromptCachingEnabled(retryContext.model)
1745
-
1746
- // Fast mode: header is latched session-stable (cache-safe), but
1747
- // `speed='fast'` stays dynamic so cooldown still suppresses the actual
1748
- // fast-mode request without changing the cache key.
1749
- let speed: BetaMessageStreamParams['speed']
1750
- const isFastModeForRetry =
1751
- isFastModeEnabled() &&
1752
- isFastModeAvailable() &&
1753
- !isFastModeCooldown() &&
1754
- isFastModeSupportedByModel(options.model) &&
1755
- !!retryContext.fastMode
1756
- if (isFastModeForRetry) {
1757
- speed = 'fast'
1758
- }
1759
- if (fastModeHeaderLatched && !betasParams.includes(FAST_MODE_BETA_HEADER)) {
1760
- betasParams.push(FAST_MODE_BETA_HEADER)
1761
- }
1762
-
1763
- // AFK mode beta: latched once auto mode is first activated. Still gated
1764
- // by isAgenticQuery per-call so classifiers/compaction don't get it.
1765
- if (feature('TRANSCRIPT_CLASSIFIER')) {
1766
- if (
1767
- afkHeaderLatched &&
1768
- shouldIncludeFirstPartyOnlyBetas() &&
1769
- isAgenticQuery &&
1770
- !betasParams.includes(AFK_MODE_BETA_HEADER)
1771
- ) {
1772
- betasParams.push(AFK_MODE_BETA_HEADER)
1773
- }
1774
- }
1775
-
1776
- // Cache editing beta: header is latched session-stable; useCachedMC
1777
- // (controls cache_edits body behavior) stays live so edits stop when
1778
- // the feature disables but the header doesn't flip.
1779
- const useCachedMC =
1780
- cachedMCEnabled &&
1781
- getAPIProvider() === 'firstParty' &&
1782
- options.querySource === 'repl_main_thread'
1783
- if (
1784
- cacheEditingHeaderLatched &&
1785
- getAPIProvider() === 'firstParty' &&
1786
- options.querySource === 'repl_main_thread' &&
1787
- !betasParams.includes(cacheEditingBetaHeader)
1788
- ) {
1789
- betasParams.push(cacheEditingBetaHeader)
1790
- logForDebugging(
1791
- 'Cache editing beta header enabled for cached microcompact',
1792
- )
1793
- }
1794
-
1795
- // Only send temperature when thinking is disabled — the API requires
1796
- // temperature: 1 when thinking is enabled, which is already the default.
1797
- const temperature = !hasThinking
1798
- ? (options.temperatureOverride ?? 1)
1799
- : undefined
1800
-
1801
- lastRequestBetas = betasParams
1802
-
1803
- return {
1804
- model: normalizeModelStringForAPI(options.model),
1805
- messages: addCacheBreakpoints(
1806
- messagesForAPI,
1807
- enablePromptCaching,
1808
- options.querySource,
1809
- useCachedMC,
1810
- consumedCacheEdits,
1811
- consumedPinnedEdits,
1812
- options.skipCacheWrite,
1813
- ),
1814
- system,
1815
- tools: allTools,
1816
- tool_choice: options.toolChoice,
1817
- ...(useBetas && { betas: betasParams }),
1818
- metadata: getAPIMetadata(),
1819
- max_tokens: maxOutputTokens,
1820
- thinking,
1821
- ...(temperature !== undefined && { temperature }),
1822
- ...(contextManagement &&
1823
- useBetas &&
1824
- betasParams.includes(CONTEXT_MANAGEMENT_BETA_HEADER) && {
1825
- context_management: contextManagement,
1826
- }),
1827
- ...extraBodyParams,
1828
- ...(Object.keys(outputConfig).length > 0 && {
1829
- output_config: outputConfig,
1830
- }),
1831
- ...(speed !== undefined && { speed }),
1832
- ...(options.reasoningMode !== undefined && {
1833
- reasoning_mode: options.reasoningMode,
1834
- }),
1835
- }
1836
- }
1837
-
1838
- // Compute log scalars synchronously so the fire-and-forget .then() closure
1839
- // captures only primitives instead of paramsFromContext's full closure scope
1840
- // (messagesForAPI, system, allTools, betas — the entire request-building
1841
- // context), which would otherwise be pinned until the promise resolves.
1842
- {
1843
- const queryParams = paramsFromContext({
1844
- model: options.model,
1845
- thinkingConfig,
1846
- })
1847
- const logMessagesLength = queryParams.messages.length
1848
- const logBetas = useBetas ? (queryParams.betas ?? []) : []
1849
- const logThinkingType = queryParams.thinking?.type ?? 'disabled'
1850
- const logEffortValue = queryParams.output_config?.effort
1851
- void options.getToolPermissionContext().then(permissionContext => {
1852
- logAPIQuery({
1853
- model: options.model,
1854
- messagesLength: logMessagesLength,
1855
- temperature: options.temperatureOverride ?? 1,
1856
- betas: logBetas,
1857
- permissionMode: permissionContext.mode,
1858
- querySource: options.querySource,
1859
- queryTracking: options.queryTracking,
1860
- thinkingType: logThinkingType,
1861
- effortValue: logEffortValue,
1862
- fastMode: isFastMode,
1863
- previousRequestId,
1864
- })
1865
- })
1866
- }
1867
-
1868
- const newMessages: AssistantMessage[] = []
1869
- let ttftMs = 0
1870
- let partialMessage: BetaMessage | undefined = undefined
1871
- const contentBlocks: (BetaContentBlock | ConnectorTextBlock)[] = []
1872
- let usage: NonNullableUsage = EMPTY_USAGE
1873
- let costUSD = 0
1874
- let stopReason: BetaStopReason | null = null
1875
- let didFallBackToNonStreaming = false
1876
- let fallbackMessage: AssistantMessage | undefined
1877
- let maxOutputTokens = 0
1878
- let responseHeaders: globalThis.Headers | undefined = undefined
1879
- let research: unknown = undefined
1880
- let isFastModeRequest = isFastMode // Keep separate state as it may change if falling back
1881
- let isAdvisorInProgress = false
1882
-
1883
- try {
1884
- queryCheckpoint('query_client_creation_start')
1885
- const generator = withRetry(
1886
- () =>
1887
- getAnthropicClient({
1888
- maxRetries: 0, // Disabled auto-retry in favor of manual implementation
1889
- model: options.model,
1890
- fetchOverride: options.fetchOverride,
1891
- source: options.querySource,
1892
- }),
1893
- async (anthropic, attempt, context) => {
1894
- attemptNumber = attempt
1895
- isFastModeRequest = context.fastMode ?? false
1896
- start = Date.now()
1897
- attemptStartTimes.push(start)
1898
- // Client has been created by withRetry's getClient() call. This fires
1899
- // once per attempt; on retries the client is usually cached (withRetry
1900
- // only calls getClient() again after auth errors), so the delta from
1901
- // client_creation_start is meaningful on attempt 1.
1902
- queryCheckpoint('query_client_creation_end')
1903
-
1904
- const params = paramsFromContext(context)
1905
- captureAPIRequest(params, options.querySource) // Capture for bug reports
1906
-
1907
- maxOutputTokens = params.max_tokens
1908
-
1909
- // Fire immediately before the fetch is dispatched. .withResponse() below
1910
- // awaits until response headers arrive, so this MUST be before the await
1911
- // or the "Network TTFB" phase measurement is wrong.
1912
- queryCheckpoint('query_api_request_sent')
1913
- if (!options.agentId) {
1914
- headlessProfilerCheckpoint('api_request_sent')
1915
- }
1916
-
1917
- // Generate and track client request ID so timeouts (which return no
1918
- // server request ID) can still be correlated with server logs.
1919
- // First-party only — 3P providers don't log it (inc-4029 class).
1920
- clientRequestId =
1921
- getAPIProvider() === 'firstParty' && isFirstPartyAnthropicBaseUrl()
1922
- ? randomUUID()
1923
- : undefined
1924
-
1925
- // Use raw stream instead of BetaMessageStream to avoid O(n²) partial JSON parsing
1926
- // BetaMessageStream calls partialParse() on every input_json_delta, which we don't need
1927
- // since we handle tool input accumulation ourselves
1928
- // biome-ignore lint/plugin: main conversation loop handles attribution separately
1929
- const result = await anthropic.beta.messages
1930
- .create(
1931
- { ...params, stream: true },
1932
- {
1933
- signal,
1934
- ...(clientRequestId && {
1935
- headers: { [CLIENT_REQUEST_ID_HEADER]: clientRequestId },
1936
- }),
1937
- },
1938
- )
1939
- .withResponse()
1940
- queryCheckpoint('query_response_headers_received')
1941
- streamRequestId = result.request_id
1942
- streamResponse = result.response
1943
- return result.data
1944
- },
1945
- {
1946
- model: options.model,
1947
- fallbackModel: options.fallbackModel,
1948
- thinkingConfig,
1949
- ...(isFastModeEnabled() ? { fastMode: isFastMode } : false),
1950
- signal,
1951
- querySource: options.querySource,
1952
- },
1953
- )
1954
-
1955
- let e
1956
- do {
1957
- e = await generator.next()
1958
-
1959
- // yield API error messages (the stream has a 'controller' property, error messages don't)
1960
- if (!('controller' in e.value)) {
1961
- yield e.value
1962
- }
1963
- } while (!e.done)
1964
- stream = e.value as Stream<BetaRawMessageStreamEvent>
1965
-
1966
- // reset state
1967
- newMessages.length = 0
1968
- ttftMs = 0
1969
- partialMessage = undefined
1970
- contentBlocks.length = 0
1971
- usage = EMPTY_USAGE
1972
- stopReason = null
1973
- isAdvisorInProgress = false
1974
-
1975
- // Streaming idle timeout watchdog: abort the stream if no chunks arrive
1976
- // for STREAM_IDLE_TIMEOUT_MS. Unlike the stall detection below (which only
1977
- // fires when the *next* chunk arrives), this uses setTimeout to actively
1978
- // kill hung streams. Without this, a silently dropped connection can hang
1979
- // the session indefinitely since the SDK's request timeout only covers the
1980
- // initial fetch(), not the streaming body.
1981
- const streamWatchdogEnabled = isEnvTruthy(
1982
- process.env.CLAUDE_ENABLE_STREAM_WATCHDOG,
1983
- )
1984
- const STREAM_IDLE_TIMEOUT_MS =
1985
- parseInt(process.env.CLAUDE_STREAM_IDLE_TIMEOUT_MS || '', 10) || 90_000
1986
- const STREAM_IDLE_WARNING_MS = STREAM_IDLE_TIMEOUT_MS / 2
1987
- let streamIdleAborted = false
1988
- // performance.now() snapshot when watchdog fires, for measuring abort propagation delay
1989
- let streamWatchdogFiredAt: number | null = null
1990
- let streamIdleWarningTimer: ReturnType<typeof setTimeout> | null = null
1991
- let streamIdleTimer: ReturnType<typeof setTimeout> | null = null
1992
- function clearStreamIdleTimers(): void {
1993
- if (streamIdleWarningTimer !== null) {
1994
- clearTimeout(streamIdleWarningTimer)
1995
- streamIdleWarningTimer = null
1996
- }
1997
- if (streamIdleTimer !== null) {
1998
- clearTimeout(streamIdleTimer)
1999
- streamIdleTimer = null
2000
- }
2001
- }
2002
- function resetStreamIdleTimer(): void {
2003
- clearStreamIdleTimers()
2004
- if (!streamWatchdogEnabled) {
2005
- return
2006
- }
2007
- streamIdleWarningTimer = setTimeout(
2008
- warnMs => {
2009
- logForDebugging(
2010
- `Streaming idle warning: no chunks received for ${warnMs / 1000}s`,
2011
- { level: 'warn' },
2012
- )
2013
- logForDiagnosticsNoPII('warn', 'cli_streaming_idle_warning')
2014
- },
2015
- STREAM_IDLE_WARNING_MS,
2016
- STREAM_IDLE_WARNING_MS,
2017
- )
2018
- streamIdleTimer = setTimeout(() => {
2019
- streamIdleAborted = true
2020
- streamWatchdogFiredAt = performance.now()
2021
- logForDebugging(
2022
- `Streaming idle timeout: no chunks received for ${STREAM_IDLE_TIMEOUT_MS / 1000}s, aborting stream`,
2023
- { level: 'error' },
2024
- )
2025
- logForDiagnosticsNoPII('error', 'cli_streaming_idle_timeout')
2026
- logEvent('tengu_streaming_idle_timeout', {
2027
- model:
2028
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2029
- request_id: (streamRequestId ??
2030
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2031
- timeout_ms: STREAM_IDLE_TIMEOUT_MS,
2032
- })
2033
- releaseStreamResources()
2034
- }, STREAM_IDLE_TIMEOUT_MS)
2035
- }
2036
- resetStreamIdleTimer()
2037
-
2038
- startSessionActivity('api_call')
2039
- try {
2040
- // stream in and accumulate state
2041
- let isFirstChunk = true
2042
- let lastEventTime: number | null = null // Set after first chunk to avoid measuring TTFB as a stall
2043
- const STALL_THRESHOLD_MS = 30_000 // 30 seconds
2044
- let totalStallTime = 0
2045
- let stallCount = 0
2046
-
2047
- for await (const part of stream) {
2048
- resetStreamIdleTimer()
2049
- const now = Date.now()
2050
-
2051
- // Detect and log streaming stalls (only after first event to avoid counting TTFB)
2052
- if (lastEventTime !== null) {
2053
- const timeSinceLastEvent = now - lastEventTime
2054
- if (timeSinceLastEvent > STALL_THRESHOLD_MS) {
2055
- stallCount++
2056
- totalStallTime += timeSinceLastEvent
2057
- logForDebugging(
2058
- `Streaming stall detected: ${(timeSinceLastEvent / 1000).toFixed(1)}s gap between events (stall #${stallCount})`,
2059
- { level: 'warn' },
2060
- )
2061
- logEvent('tengu_streaming_stall', {
2062
- stall_duration_ms: timeSinceLastEvent,
2063
- stall_count: stallCount,
2064
- total_stall_time_ms: totalStallTime,
2065
- event_type:
2066
- part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2067
- model:
2068
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2069
- request_id: (streamRequestId ??
2070
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2071
- })
2072
- }
2073
- }
2074
- lastEventTime = now
2075
-
2076
- if (isFirstChunk) {
2077
- logForDebugging('Stream started - received first chunk')
2078
- queryCheckpoint('query_first_chunk_received')
2079
- if (!options.agentId) {
2080
- headlessProfilerCheckpoint('first_chunk')
2081
- }
2082
- endQueryProfile()
2083
- isFirstChunk = false
2084
- }
2085
-
2086
- switch (part.type) {
2087
- case 'message_start': {
2088
- partialMessage = part.message
2089
- ttftMs = Date.now() - start
2090
- usage = updateUsage(usage, part.message?.usage)
2091
- // Capture research from message_start if available (internal only).
2092
- // Always overwrite with the latest value.
2093
- if (
2094
- process.env.USER_TYPE === 'ant' &&
2095
- 'research' in (part.message as unknown as Record<string, unknown>)
2096
- ) {
2097
- research = (part.message as unknown as Record<string, unknown>)
2098
- .research
2099
- }
2100
- break
2101
- }
2102
- case 'content_block_start':
2103
- switch (part.content_block.type) {
2104
- case 'tool_use':
2105
- contentBlocks[part.index] = {
2106
- ...part.content_block,
2107
- input: '',
2108
- }
2109
- break
2110
- case 'server_tool_use':
2111
- contentBlocks[part.index] = {
2112
- ...part.content_block,
2113
- input: '' as unknown as { [key: string]: unknown },
2114
- }
2115
- if ((part.content_block.name as string) === 'advisor') {
2116
- isAdvisorInProgress = true
2117
- logForDebugging(`[AdvisorTool] Advisor tool called`)
2118
- logEvent('tengu_advisor_tool_call', {
2119
- model:
2120
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2121
- advisor_model: (advisorModel ??
2122
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2123
- })
2124
- }
2125
- break
2126
- case 'text':
2127
- contentBlocks[part.index] = {
2128
- ...part.content_block,
2129
- // awkwardly, the sdk sometimes returns text as part of a
2130
- // content_block_start message, then returns the same text
2131
- // again in a content_block_delta message. we ignore it here
2132
- // since there doesn't seem to be a way to detect when a
2133
- // content_block_delta message duplicates the text.
2134
- text: '',
2135
- }
2136
- break
2137
- case 'thinking':
2138
- contentBlocks[part.index] = {
2139
- ...part.content_block,
2140
- // also awkward
2141
- thinking: '',
2142
- // initialize signature to ensure field exists even if signature_delta never arrives
2143
- signature: '',
2144
- }
2145
- break
2146
- default:
2147
- // even more awkwardly, the sdk mutates the contents of text blocks
2148
- // as it works. we want the blocks to be immutable, so that we can
2149
- // accumulate state ourselves.
2150
- contentBlocks[part.index] = { ...part.content_block }
2151
- if (
2152
- (part.content_block.type as string) === 'advisor_tool_result'
2153
- ) {
2154
- isAdvisorInProgress = false
2155
- logForDebugging(`[AdvisorTool] Advisor tool result received`)
2156
- }
2157
- break
2158
- }
2159
- break
2160
- case 'content_block_delta': {
2161
- const contentBlock = contentBlocks[part.index]
2162
- const delta = part.delta as typeof part.delta | ConnectorTextDelta
2163
- if (!contentBlock) {
2164
- logEvent('tengu_streaming_error', {
2165
- error_type:
2166
- 'content_block_not_found_delta' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2167
- part_type:
2168
- part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2169
- part_index: part.index,
2170
- })
2171
- throw new RangeError('Content block not found')
2172
- }
2173
- if (
2174
- feature('CONNECTOR_TEXT') &&
2175
- delta.type === 'connector_text_delta'
2176
- ) {
2177
- if (contentBlock.type !== 'connector_text') {
2178
- logEvent('tengu_streaming_error', {
2179
- error_type:
2180
- 'content_block_type_mismatch_connector_text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2181
- expected_type:
2182
- 'connector_text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2183
- actual_type:
2184
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2185
- })
2186
- throw new Error('Content block is not a connector_text block')
2187
- }
2188
- contentBlock.connector_text += delta.connector_text
2189
- } else {
2190
- switch (delta.type) {
2191
- case 'citations_delta':
2192
- // TODO: handle citations
2193
- break
2194
- case 'input_json_delta':
2195
- if (
2196
- contentBlock.type !== 'tool_use' &&
2197
- contentBlock.type !== 'server_tool_use'
2198
- ) {
2199
- logEvent('tengu_streaming_error', {
2200
- error_type:
2201
- 'content_block_type_mismatch_input_json' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2202
- expected_type:
2203
- 'tool_use' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2204
- actual_type:
2205
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2206
- })
2207
- throw new Error('Content block is not a input_json block')
2208
- }
2209
- if (typeof contentBlock.input !== 'string') {
2210
- logEvent('tengu_streaming_error', {
2211
- error_type:
2212
- 'content_block_input_not_string' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2213
- input_type:
2214
- typeof contentBlock.input as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2215
- })
2216
- throw new Error('Content block input is not a string')
2217
- }
2218
- contentBlock.input += delta.partial_json
2219
- break
2220
- case 'text_delta':
2221
- if (contentBlock.type !== 'text') {
2222
- logEvent('tengu_streaming_error', {
2223
- error_type:
2224
- 'content_block_type_mismatch_text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2225
- expected_type:
2226
- 'text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2227
- actual_type:
2228
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2229
- })
2230
- throw new Error('Content block is not a text block')
2231
- }
2232
- contentBlock.text += delta.text
2233
- break
2234
- case 'signature_delta':
2235
- if (
2236
- feature('CONNECTOR_TEXT') &&
2237
- contentBlock.type === 'connector_text'
2238
- ) {
2239
- contentBlock.signature = delta.signature
2240
- break
2241
- }
2242
- if (contentBlock.type !== 'thinking') {
2243
- logEvent('tengu_streaming_error', {
2244
- error_type:
2245
- 'content_block_type_mismatch_thinking_signature' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2246
- expected_type:
2247
- 'thinking' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2248
- actual_type:
2249
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2250
- })
2251
- throw new Error('Content block is not a thinking block')
2252
- }
2253
- contentBlock.signature = delta.signature
2254
- break
2255
- case 'thinking_delta':
2256
- if (contentBlock.type !== 'thinking') {
2257
- logEvent('tengu_streaming_error', {
2258
- error_type:
2259
- 'content_block_type_mismatch_thinking_delta' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2260
- expected_type:
2261
- 'thinking' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2262
- actual_type:
2263
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2264
- })
2265
- throw new Error('Content block is not a thinking block')
2266
- }
2267
- contentBlock.thinking += delta.thinking
2268
- break
2269
- }
2270
- }
2271
- // Capture research from content_block_delta if available (internal only).
2272
- // Always overwrite with the latest value.
2273
- if (process.env.USER_TYPE === 'ant' && 'research' in part) {
2274
- research = (part as { research: unknown }).research
2275
- }
2276
- break
2277
- }
2278
- case 'content_block_stop': {
2279
- const contentBlock = contentBlocks[part.index]
2280
- if (!contentBlock) {
2281
- logEvent('tengu_streaming_error', {
2282
- error_type:
2283
- 'content_block_not_found_stop' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2284
- part_type:
2285
- part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2286
- part_index: part.index,
2287
- })
2288
- throw new RangeError('Content block not found')
2289
- }
2290
- if (!partialMessage) {
2291
- logEvent('tengu_streaming_error', {
2292
- error_type:
2293
- 'partial_message_not_found' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2294
- part_type:
2295
- part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2296
- })
2297
- throw new Error('Message not found')
2298
- }
2299
- const m: AssistantMessage = {
2300
- message: {
2301
- ...partialMessage,
2302
- content: normalizeContentFromAPI(
2303
- [contentBlock] as BetaContentBlock[],
2304
- tools,
2305
- options.agentId,
2306
- ),
2307
- },
2308
- requestId: streamRequestId ?? undefined,
2309
- type: 'assistant',
2310
- uuid: randomUUID(),
2311
- timestamp: new Date().toISOString(),
2312
- ...(process.env.USER_TYPE === 'ant' &&
2313
- research !== undefined && { research }),
2314
- ...(advisorModel && { advisorModel }),
2315
- }
2316
- newMessages.push(m)
2317
- yield m
2318
- break
2319
- }
2320
- case 'message_delta': {
2321
- usage = updateUsage(usage, part.usage)
2322
- // Capture research from message_delta if available (internal only).
2323
- // Always overwrite with the latest value. Also write back to
2324
- // already-yielded messages since message_delta arrives after
2325
- // content_block_stop.
2326
- if (
2327
- process.env.USER_TYPE === 'ant' &&
2328
- 'research' in (part as unknown as Record<string, unknown>)
2329
- ) {
2330
- research = (part as unknown as Record<string, unknown>).research
2331
- for (const msg of newMessages) {
2332
- msg.research = research
2333
- }
2334
- }
2335
-
2336
- // Write final usage and stop_reason back to the last yielded
2337
- // message. Messages are created at content_block_stop from
2338
- // partialMessage, which was set at message_start before any tokens
2339
- // were generated (output_tokens: 0, stop_reason: null).
2340
- // message_delta arrives after content_block_stop with the real
2341
- // values.
2342
- //
2343
- // IMPORTANT: Use direct property mutation, not object replacement.
2344
- // The transcript write queue holds a reference to message.message
2345
- // and serializes it lazily (100ms flush interval). Object
2346
- // replacement ({ ...lastMsg.message, usage }) would disconnect
2347
- // the queued reference; direct mutation ensures the transcript
2348
- // captures the final values.
2349
- stopReason = part.delta.stop_reason
2350
-
2351
- const lastMsg = newMessages.at(-1)
2352
- if (lastMsg) {
2353
- lastMsg.message.usage = usage
2354
- lastMsg.message.stop_reason = stopReason
2355
- }
2356
-
2357
- // Update cost
2358
- const costUSDForPart = calculateUSDCost(resolvedModel, usage)
2359
- costUSD += addToTotalSessionCost(
2360
- costUSDForPart,
2361
- usage,
2362
- options.model,
2363
- )
2364
-
2365
- const refusalMessage = getErrorMessageIfRefusal(
2366
- part.delta.stop_reason,
2367
- options.model,
2368
- )
2369
- if (refusalMessage) {
2370
- yield refusalMessage
2371
- }
2372
-
2373
- if (stopReason === 'max_tokens') {
2374
- logEvent('tengu_max_tokens_reached', {
2375
- max_tokens: maxOutputTokens,
2376
- })
2377
- yield createAssistantAPIErrorMessage({
2378
- content: `${API_ERROR_MESSAGE_PREFIX}: Ummaya's response exceeded the ${
2379
- maxOutputTokens
2380
- } output token maximum. To configure this behavior, set the CLAUDE_CODE_MAX_OUTPUT_TOKENS environment variable.`,
2381
- apiError: 'max_output_tokens',
2382
- error: 'max_output_tokens',
2383
- })
2384
- }
2385
-
2386
- if (stopReason === 'model_context_window_exceeded') {
2387
- logEvent('tengu_context_window_exceeded', {
2388
- max_tokens: maxOutputTokens,
2389
- output_tokens: usage.output_tokens,
2390
- })
2391
- // Reuse the max_output_tokens recovery path — from the model's
2392
- // perspective, both mean "response was cut off, continue from
2393
- // where you left off."
2394
- yield createAssistantAPIErrorMessage({
2395
- content: `${API_ERROR_MESSAGE_PREFIX}: The model has reached its context window limit.`,
2396
- apiError: 'max_output_tokens',
2397
- error: 'max_output_tokens',
2398
- })
2399
- }
2400
- break
2401
- }
2402
- case 'message_stop':
2403
- break
2404
- }
2405
-
2406
- yield {
2407
- type: 'stream_event',
2408
- event: part,
2409
- ...(part.type === 'message_start' ? { ttftMs } : undefined),
2410
- }
2411
- }
2412
- // Clear the idle timeout watchdog now that the stream loop has exited
2413
- clearStreamIdleTimers()
2414
-
2415
- // If the stream was aborted by our idle timeout watchdog, fall back to
2416
- // non-streaming retry rather than treating it as a completed stream.
2417
- if (streamIdleAborted) {
2418
- // Instrumentation: proves the for-await exited after the watchdog fired
2419
- // (vs. hung forever). exit_delay_ms measures abort propagation latency:
2420
- // 0-10ms = abort worked; >>1000ms = something else woke the loop.
2421
- const exitDelayMs =
2422
- streamWatchdogFiredAt !== null
2423
- ? Math.round(performance.now() - streamWatchdogFiredAt)
2424
- : -1
2425
- logForDiagnosticsNoPII(
2426
- 'info',
2427
- 'cli_stream_loop_exited_after_watchdog_clean',
2428
- )
2429
- logEvent('tengu_stream_loop_exited_after_watchdog', {
2430
- request_id: (streamRequestId ??
2431
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2432
- exit_delay_ms: exitDelayMs,
2433
- exit_path:
2434
- 'clean' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2435
- model:
2436
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2437
- })
2438
- // Prevent double-emit: this throw lands in the catch block below,
2439
- // whose exit_path='error' probe guards on streamWatchdogFiredAt.
2440
- streamWatchdogFiredAt = null
2441
- throw new Error('Stream idle timeout - no chunks received')
2442
- }
2443
-
2444
- // Detect when the stream completed without producing any assistant messages.
2445
- // This covers two proxy failure modes:
2446
- // 1. No events at all (!partialMessage): proxy returned 200 with non-SSE body
2447
- // 2. Partial events (partialMessage set but no content blocks completed AND
2448
- // no stop_reason received): proxy returned message_start but stream ended
2449
- // before content_block_stop and before message_delta with stop_reason
2450
- // BetaMessageStream had the first check in _endRequest() but the raw Stream
2451
- // does not - without it the generator silently returns no assistant messages,
2452
- // causing "Execution error" in -p mode.
2453
- // Note: We must check stopReason to avoid false positives. For example, with
2454
- // structured output (--json-schema), the model calls a StructuredOutput tool
2455
- // on turn 1, then on turn 2 responds with end_turn and no content blocks.
2456
- // That's a legitimate empty response, not an incomplete stream.
2457
- if (!partialMessage || (newMessages.length === 0 && !stopReason)) {
2458
- logForDebugging(
2459
- !partialMessage
2460
- ? 'Stream completed without receiving message_start event - triggering non-streaming fallback'
2461
- : 'Stream completed with message_start but no content blocks completed - triggering non-streaming fallback',
2462
- { level: 'error' },
2463
- )
2464
- logEvent('tengu_stream_no_events', {
2465
- model:
2466
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2467
- request_id: (streamRequestId ??
2468
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2469
- })
2470
- throw new Error('Stream ended without receiving any events')
2471
- }
2472
-
2473
- // Log summary if any stalls occurred during streaming
2474
- if (stallCount > 0) {
2475
- logForDebugging(
2476
- `Streaming completed with ${stallCount} stall(s), total stall time: ${(totalStallTime / 1000).toFixed(1)}s`,
2477
- { level: 'warn' },
2478
- )
2479
- logEvent('tengu_streaming_stall_summary', {
2480
- stall_count: stallCount,
2481
- total_stall_time_ms: totalStallTime,
2482
- model:
2483
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2484
- request_id: (streamRequestId ??
2485
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2486
- })
2487
- }
2488
-
2489
- // Check if the cache actually broke based on response tokens
2490
- if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
2491
- void checkResponseForCacheBreak(
2492
- options.querySource,
2493
- usage.cache_read_input_tokens,
2494
- usage.cache_creation_input_tokens,
2495
- messages,
2496
- options.agentId,
2497
- streamRequestId,
2498
- )
2499
- }
2500
-
2501
- // Process fallback percentage header and quota status if available
2502
- // streamResponse is set when the stream is created in the withRetry callback above
2503
- // TypeScript's control flow analysis can't track that streamResponse is set in the callback
2504
- // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
2505
- const resp = streamResponse as unknown as Response | undefined
2506
- if (resp) {
2507
- extractQuotaStatusFromHeaders(resp.headers)
2508
- // Store headers for gateway detection
2509
- responseHeaders = resp.headers
2510
- }
2511
- } catch (streamingError) {
2512
- // Clear the idle timeout watchdog on error path too
2513
- clearStreamIdleTimers()
2514
-
2515
- // Instrumentation: if the watchdog had already fired and the for-await
2516
- // threw (rather than exiting cleanly), record that the loop DID exit and
2517
- // how long after the watchdog. Distinguishes true hangs from error exits.
2518
- if (streamIdleAborted && streamWatchdogFiredAt !== null) {
2519
- const exitDelayMs = Math.round(
2520
- performance.now() - streamWatchdogFiredAt,
2521
- )
2522
- logForDiagnosticsNoPII(
2523
- 'info',
2524
- 'cli_stream_loop_exited_after_watchdog_error',
2525
- )
2526
- logEvent('tengu_stream_loop_exited_after_watchdog', {
2527
- request_id: (streamRequestId ??
2528
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2529
- exit_delay_ms: exitDelayMs,
2530
- exit_path:
2531
- 'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2532
- error_name:
2533
- streamingError instanceof Error
2534
- ? (streamingError.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2535
- : ('unknown' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
2536
- model:
2537
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2538
- })
2539
- }
2540
-
2541
- if (streamingError instanceof APIUserAbortError) {
2542
- // Check if the abort signal was triggered by the user (ESC key)
2543
- // If the signal is aborted, it's a user-initiated abort
2544
- // If not, it's likely a timeout from the SDK
2545
- if (signal.aborted) {
2546
- // This is a real user abort (ESC key was pressed)
2547
- logForDebugging(
2548
- `Streaming aborted by user: ${errorMessage(streamingError)}`,
2549
- )
2550
- if (isAdvisorInProgress) {
2551
- logEvent('tengu_advisor_tool_interrupted', {
2552
- model:
2553
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2554
- advisor_model: (advisorModel ??
2555
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2556
- })
2557
- }
2558
- throw streamingError
2559
- } else {
2560
- // The SDK threw APIUserAbortError but our signal wasn't aborted
2561
- // This means it's a timeout from the SDK's internal timeout
2562
- logForDebugging(
2563
- `Streaming timeout (SDK abort): ${streamingError.message}`,
2564
- { level: 'error' },
2565
- )
2566
- // Throw a more specific error for timeout
2567
- throw new APIConnectionTimeoutError({ message: 'Request timed out' })
2568
- }
2569
- }
2570
-
2571
- // When the flag is enabled, skip the non-streaming fallback and let the
2572
- // error propagate to withRetry. The mid-stream fallback causes double tool
2573
- // execution when streaming tool execution is active: the partial stream
2574
- // starts a tool, then the non-streaming retry produces the same tool_use
2575
- // and runs it again. See inc-4258.
2576
- const disableFallback =
2577
- isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_NONSTREAMING_FALLBACK) ||
2578
- getFeatureValue_CACHED_MAY_BE_STALE(
2579
- 'tengu_disable_streaming_to_non_streaming_fallback',
2580
- false,
2581
- )
2582
-
2583
- if (disableFallback) {
2584
- logForDebugging(
2585
- `Error streaming (non-streaming fallback disabled): ${errorMessage(streamingError)}`,
2586
- { level: 'error' },
2587
- )
2588
- logEvent('tengu_streaming_fallback_to_non_streaming', {
2589
- model:
2590
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2591
- error:
2592
- streamingError instanceof Error
2593
- ? (streamingError.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2594
- : (String(
2595
- streamingError,
2596
- ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
2597
- attemptNumber,
2598
- maxOutputTokens,
2599
- thinkingType:
2600
- thinkingConfig.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2601
- fallback_disabled: true,
2602
- request_id: (streamRequestId ??
2603
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2604
- fallback_cause: (streamIdleAborted
2605
- ? 'watchdog'
2606
- : 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2607
- })
2608
- throw streamingError
2609
- }
2610
-
2611
- logForDebugging(
2612
- `Error streaming, falling back to non-streaming mode: ${errorMessage(streamingError)}`,
2613
- { level: 'error' },
2614
- )
2615
- didFallBackToNonStreaming = true
2616
- if (options.onStreamingFallback) {
2617
- options.onStreamingFallback()
2618
- }
2619
-
2620
- logEvent('tengu_streaming_fallback_to_non_streaming', {
2621
- model:
2622
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2623
- error:
2624
- streamingError instanceof Error
2625
- ? (streamingError.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2626
- : (String(
2627
- streamingError,
2628
- ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
2629
- attemptNumber,
2630
- maxOutputTokens,
2631
- thinkingType:
2632
- thinkingConfig.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2633
- fallback_disabled: false,
2634
- request_id: (streamRequestId ??
2635
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2636
- fallback_cause: (streamIdleAborted
2637
- ? 'watchdog'
2638
- : 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2639
- })
2640
-
2641
- // Fall back to non-streaming mode with retries.
2642
- // If the streaming failure was itself a 529, count it toward the
2643
- // consecutive-529 budget so total 529s-before-model-fallback is the
2644
- // same whether the overload was hit in streaming or non-streaming mode.
2645
- // This is a speculative fix for https://github.com/anthropics/claude-code/issues/1513
2646
- // Instrumentation: proves executeNonStreamingRequest was entered (vs. the
2647
- // fallback event firing but the call itself hanging at dispatch).
2648
- logForDiagnosticsNoPII('info', 'cli_nonstreaming_fallback_started')
2649
- logEvent('tengu_nonstreaming_fallback_started', {
2650
- request_id: (streamRequestId ??
2651
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2652
- model:
2653
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2654
- fallback_cause: (streamIdleAborted
2655
- ? 'watchdog'
2656
- : 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2657
- })
2658
- const result = yield* executeNonStreamingRequest(
2659
- {
2660
- model: options.model,
2661
- fetchOverride: options.fetchOverride,
2662
- source: options.querySource,
2663
- },
2664
- {
2665
- model: options.model,
2666
- fallbackModel: options.fallbackModel,
2667
- thinkingConfig,
2668
- ...(isFastModeEnabled() && { fastMode: isFastMode }),
2669
- signal,
2670
- initialConsecutive529Errors: is529Error(streamingError) ? 1 : 0,
2671
- querySource: options.querySource,
2672
- },
2673
- paramsFromContext,
2674
- (attempt, _startTime, tokens) => {
2675
- attemptNumber = attempt
2676
- maxOutputTokens = tokens
2677
- },
2678
- params => captureAPIRequest(params, options.querySource),
2679
- streamRequestId,
2680
- )
2681
-
2682
- const m: AssistantMessage = {
2683
- message: {
2684
- ...result,
2685
- content: normalizeContentFromAPI(
2686
- result.content,
2687
- tools,
2688
- options.agentId,
2689
- ),
2690
- },
2691
- requestId: streamRequestId ?? undefined,
2692
- type: 'assistant',
2693
- uuid: randomUUID(),
2694
- timestamp: new Date().toISOString(),
2695
- ...(process.env.USER_TYPE === 'ant' &&
2696
- research !== undefined && {
2697
- research,
2698
- }),
2699
- ...(advisorModel && {
2700
- advisorModel,
2701
- }),
2702
- }
2703
- newMessages.push(m)
2704
- fallbackMessage = m
2705
- yield m
2706
- } finally {
2707
- clearStreamIdleTimers()
2708
- }
2709
- } catch (errorFromRetry) {
2710
- // FallbackTriggeredError must propagate to query.ts, which performs the
2711
- // actual model switch. Swallowing it here would turn the fallback into a
2712
- // no-op — the user would just see "Model fallback triggered: X -> Y" as
2713
- // an error message with no actual retry on the fallback model.
2714
- if (errorFromRetry instanceof FallbackTriggeredError) {
2715
- throw errorFromRetry
2716
- }
2717
-
2718
- // Check if this is a 404 error during stream creation that should trigger
2719
- // non-streaming fallback. This handles gateways that return 404 for streaming
2720
- // endpoints but work fine with non-streaming. Before v2.1.8, BetaMessageStream
2721
- // threw 404s during iteration (caught by inner catch with fallback), but now
2722
- // with raw streams, 404s are thrown during creation (caught here).
2723
- const is404StreamCreationError =
2724
- !didFallBackToNonStreaming &&
2725
- errorFromRetry instanceof CannotRetryError &&
2726
- errorFromRetry.originalError instanceof APIError &&
2727
- errorFromRetry.originalError.status === 404
2728
-
2729
- if (is404StreamCreationError) {
2730
- // 404 is thrown at .withResponse() before streamRequestId is assigned,
2731
- // and CannotRetryError means every retry failed — so grab the failed
2732
- // request's ID from the error header instead.
2733
- const failedRequestId =
2734
- (errorFromRetry.originalError as APIError).requestID ?? 'unknown'
2735
- logForDebugging(
2736
- 'Streaming endpoint returned 404, falling back to non-streaming mode',
2737
- { level: 'warn' },
2738
- )
2739
- didFallBackToNonStreaming = true
2740
- if (options.onStreamingFallback) {
2741
- options.onStreamingFallback()
2742
- }
2743
-
2744
- logEvent('tengu_streaming_fallback_to_non_streaming', {
2745
- model:
2746
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2747
- error:
2748
- '404_stream_creation' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2749
- attemptNumber,
2750
- maxOutputTokens,
2751
- thinkingType:
2752
- thinkingConfig.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2753
- request_id:
2754
- failedRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2755
- fallback_cause:
2756
- '404_stream_creation' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2757
- })
2758
-
2759
- try {
2760
- // Fall back to non-streaming mode
2761
- const result = yield* executeNonStreamingRequest(
2762
- {
2763
- model: options.model,
2764
- fetchOverride: options.fetchOverride,
2765
- source: options.querySource,
2766
- },
2767
- {
2768
- model: options.model,
2769
- fallbackModel: options.fallbackModel,
2770
- thinkingConfig,
2771
- ...(isFastModeEnabled() && { fastMode: isFastMode }),
2772
- signal,
2773
- },
2774
- paramsFromContext,
2775
- (attempt, _startTime, tokens) => {
2776
- attemptNumber = attempt
2777
- maxOutputTokens = tokens
2778
- },
2779
- params => captureAPIRequest(params, options.querySource),
2780
- failedRequestId,
2781
- )
2782
-
2783
- const m: AssistantMessage = {
2784
- message: {
2785
- ...result,
2786
- content: normalizeContentFromAPI(
2787
- result.content,
2788
- tools,
2789
- options.agentId,
2790
- ),
2791
- },
2792
- requestId: streamRequestId ?? undefined,
2793
- type: 'assistant',
2794
- uuid: randomUUID(),
2795
- timestamp: new Date().toISOString(),
2796
- ...(process.env.USER_TYPE === 'ant' &&
2797
- research !== undefined && { research }),
2798
- ...(advisorModel && { advisorModel }),
2799
- }
2800
- newMessages.push(m)
2801
- fallbackMessage = m
2802
- yield m
2803
-
2804
- // Continue to success logging below
2805
- } catch (fallbackError) {
2806
- // Propagate model-fallback signal to query.ts (see comment above).
2807
- if (fallbackError instanceof FallbackTriggeredError) {
2808
- throw fallbackError
2809
- }
2810
-
2811
- // Fallback also failed, handle as normal error
2812
- logForDebugging(
2813
- `Non-streaming fallback also failed: ${errorMessage(fallbackError)}`,
2814
- { level: 'error' },
2815
- )
2816
-
2817
- let error = fallbackError
2818
- let errorModel = options.model
2819
- if (fallbackError instanceof CannotRetryError) {
2820
- error = fallbackError.originalError
2821
- errorModel = fallbackError.retryContext.model
2822
- }
2823
-
2824
- if (error instanceof APIError) {
2825
- extractQuotaStatusFromError(error)
2826
- }
2827
-
2828
- const requestId =
2829
- streamRequestId ||
2830
- (error instanceof APIError ? error.requestID : undefined) ||
2831
- (error instanceof APIError
2832
- ? (error.error as { request_id?: string })?.request_id
2833
- : undefined)
2834
-
2835
- logAPIError({
2836
- error,
2837
- model: errorModel,
2838
- messageCount: messagesForAPI.length,
2839
- messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
2840
- durationMs: Date.now() - start,
2841
- durationMsIncludingRetries: Date.now() - startIncludingRetries,
2842
- attempt: attemptNumber,
2843
- requestId,
2844
- clientRequestId,
2845
- didFallBackToNonStreaming,
2846
- queryTracking: options.queryTracking,
2847
- querySource: options.querySource,
2848
- llmSpan,
2849
- fastMode: isFastModeRequest,
2850
- previousRequestId,
2851
- })
2852
-
2853
- if (error instanceof APIUserAbortError) {
2854
- releaseStreamResources()
2855
- return
2856
- }
2857
-
2858
- yield getAssistantMessageFromError(error, errorModel, {
2859
- messages,
2860
- messagesForAPI,
2861
- })
2862
- releaseStreamResources()
2863
- return
2864
- }
2865
- } else {
2866
- // Original error handling for non-404 errors
2867
- logForDebugging(`Error in API request: ${errorMessage(errorFromRetry)}`, {
2868
- level: 'error',
2869
- })
2870
-
2871
- let error = errorFromRetry
2872
- let errorModel = options.model
2873
- if (errorFromRetry instanceof CannotRetryError) {
2874
- error = errorFromRetry.originalError
2875
- errorModel = errorFromRetry.retryContext.model
2876
- }
2877
-
2878
- // Extract quota status from error headers if it's a rate limit error
2879
- if (error instanceof APIError) {
2880
- extractQuotaStatusFromError(error)
2881
- }
2882
-
2883
- // Extract requestId from stream, error header, or error body
2884
- const requestId =
2885
- streamRequestId ||
2886
- (error instanceof APIError ? error.requestID : undefined) ||
2887
- (error instanceof APIError
2888
- ? (error.error as { request_id?: string })?.request_id
2889
- : undefined)
2890
-
2891
- logAPIError({
2892
- error,
2893
- model: errorModel,
2894
- messageCount: messagesForAPI.length,
2895
- messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
2896
- durationMs: Date.now() - start,
2897
- durationMsIncludingRetries: Date.now() - startIncludingRetries,
2898
- attempt: attemptNumber,
2899
- requestId,
2900
- clientRequestId,
2901
- didFallBackToNonStreaming,
2902
- queryTracking: options.queryTracking,
2903
- querySource: options.querySource,
2904
- llmSpan,
2905
- fastMode: isFastModeRequest,
2906
- previousRequestId,
2907
- })
2908
-
2909
- // Don't yield an assistant error message for user aborts
2910
- // The interruption message is handled in query.ts
2911
- if (error instanceof APIUserAbortError) {
2912
- releaseStreamResources()
2913
- return
2914
- }
2915
-
2916
- yield getAssistantMessageFromError(error, errorModel, {
2917
- messages,
2918
- messagesForAPI,
2919
- })
2920
- releaseStreamResources()
2921
- return
2922
- }
2923
- } finally {
2924
- stopSessionActivity('api_call')
2925
- // Must be in the finally block: if the generator is terminated early
2926
- // via .return() (e.g. consumer breaks out of for-await-of, or query.ts
2927
- // encounters an abort), code after the try/finally never executes.
2928
- // Without this, the Response object's native TLS/socket buffers leak
2929
- // until the generator itself is GC'd (see GH #32920).
2930
- releaseStreamResources()
2931
-
2932
- // Non-streaming fallback cost: the streaming path tracks cost in the
2933
- // message_delta handler before any yield. Fallback pushes to newMessages
2934
- // then yields, so tracking must be here to survive .return() at the yield.
2935
- if (fallbackMessage) {
2936
- const fallbackUsage = fallbackMessage.message.usage
2937
- usage = updateUsage(EMPTY_USAGE, fallbackUsage)
2938
- stopReason = fallbackMessage.message.stop_reason
2939
- const fallbackCost = calculateUSDCost(resolvedModel, fallbackUsage)
2940
- costUSD += addToTotalSessionCost(
2941
- fallbackCost,
2942
- fallbackUsage,
2943
- options.model,
2944
- )
2945
- }
2946
- }
2947
-
2948
- // Mark all registered tools as sent to API so they become eligible for deletion
2949
- if (feature('CACHED_MICROCOMPACT') && cachedMCEnabled) {
2950
- markToolsSentToAPIState()
2951
- }
2952
-
2953
- // Track the last requestId for the main conversation chain so shutdown
2954
- // can send a cache eviction hint to inference. Exclude backgrounded
2955
- // sessions (Ctrl+B) which share the repl_main_thread querySource but
2956
- // run inside an agent context — they are independent conversation chains
2957
- // whose cache should not be evicted when the foreground session clears.
2958
- if (
2959
- streamRequestId &&
2960
- !getAgentContext() &&
2961
- (options.querySource.startsWith('repl_main_thread') ||
2962
- options.querySource === 'sdk')
2963
- ) {
2964
- setLastMainRequestId(streamRequestId)
2965
- }
2966
-
2967
- // Precompute scalars so the fire-and-forget .then() closure doesn't pin the
2968
- // full messagesForAPI array (the entire conversation up to the context window
2969
- // limit) until getToolPermissionContext() resolves.
2970
- const logMessageCount = messagesForAPI.length
2971
- const logMessageTokens = tokenCountFromLastAPIResponse(messagesForAPI)
2972
- void options.getToolPermissionContext().then(permissionContext => {
2973
- logAPISuccessAndDuration({
2974
- model:
2975
- newMessages[0]?.message.model ?? partialMessage?.model ?? options.model,
2976
- preNormalizedModel: options.model,
2977
- usage,
2978
- start,
2979
- startIncludingRetries,
2980
- attempt: attemptNumber,
2981
- messageCount: logMessageCount,
2982
- messageTokens: logMessageTokens,
2983
- requestId: streamRequestId ?? null,
2984
- stopReason,
2985
- ttftMs,
2986
- didFallBackToNonStreaming,
2987
- querySource: options.querySource,
2988
- headers: responseHeaders,
2989
- costUSD,
2990
- queryTracking: options.queryTracking,
2991
- permissionMode: permissionContext.mode,
2992
- // Pass newMessages for beta tracing - extraction happens in logging.ts
2993
- // only when beta tracing is enabled
2994
- newMessages,
2995
- llmSpan,
2996
- globalCacheStrategy,
2997
- requestSetupMs: start - startIncludingRetries,
2998
- attemptStartTimes,
2999
- fastMode: isFastModeRequest,
3000
- previousRequestId,
3001
- betas: lastRequestBetas,
3002
- })
3003
- })
3004
-
3005
- // Defensive: also release on normal completion (no-op if finally already ran).
3006
- releaseStreamResources()
3007
- }
3008
-
3009
- /**
3010
- * Cleans up stream resources to prevent memory leaks.
3011
- * @internal Exported for testing
3012
- */
3013
- export function cleanupStream(
3014
- stream: Stream<BetaRawMessageStreamEvent> | undefined,
3015
- ): void {
3016
- if (!stream) {
3017
- return
3018
- }
3019
- try {
3020
- // Abort the stream via its controller if not already aborted
3021
- if (!stream.controller.signal.aborted) {
3022
- stream.controller.abort()
3023
- }
3024
- } catch {
3025
- // Ignore - stream may already be closed
3026
- }
3027
- }
3028
-
3029
- /**
3030
- * Updates usage statistics with new values from streaming API events.
3031
- * Note: the upstream streaming API provides cumulative usage totals, not incremental deltas.
3032
- * (UMMAYA: byte-copied from CC where this read "Anthropic's streaming API"; FriendliAI's
3033
- * OpenAI-compatible streaming surface follows the same cumulative semantics, so the
3034
- * algorithm is unchanged — only the brand token is renamed.)
3035
- * Each event contains the complete usage up to that point in the stream.
3036
- *
3037
- * Input-related tokens (input_tokens, cache_creation_input_tokens, cache_read_input_tokens)
3038
- * are typically set in message_start and remain constant. message_delta events may send
3039
- * explicit 0 values for these fields, which should not overwrite the values from message_start.
3040
- * We only update these fields if they have a non-null, non-zero value.
3041
- */
3042
- export function updateUsage(
3043
- usage: Readonly<NonNullableUsage>,
3044
- partUsage: BetaMessageDeltaUsage | undefined,
3045
- ): NonNullableUsage {
3046
- if (!partUsage) {
3047
- return { ...usage }
3048
- }
3049
- return {
3050
- input_tokens:
3051
- partUsage.input_tokens !== null && partUsage.input_tokens > 0
3052
- ? partUsage.input_tokens
3053
- : usage.input_tokens,
3054
- cache_creation_input_tokens:
3055
- partUsage.cache_creation_input_tokens !== null &&
3056
- partUsage.cache_creation_input_tokens > 0
3057
- ? partUsage.cache_creation_input_tokens
3058
- : usage.cache_creation_input_tokens,
3059
- cache_read_input_tokens:
3060
- partUsage.cache_read_input_tokens !== null &&
3061
- partUsage.cache_read_input_tokens > 0
3062
- ? partUsage.cache_read_input_tokens
3063
- : usage.cache_read_input_tokens,
3064
- output_tokens: partUsage.output_tokens ?? usage.output_tokens,
3065
- server_tool_use: {
3066
- web_search_requests:
3067
- partUsage.server_tool_use?.web_search_requests ??
3068
- usage.server_tool_use.web_search_requests,
3069
- web_fetch_requests:
3070
- partUsage.server_tool_use?.web_fetch_requests ??
3071
- usage.server_tool_use.web_fetch_requests,
3072
- },
3073
- service_tier: usage.service_tier,
3074
- cache_creation: {
3075
- // SDK type BetaMessageDeltaUsage is missing cache_creation, but it's real!
3076
- ephemeral_1h_input_tokens:
3077
- (partUsage as BetaUsage).cache_creation?.ephemeral_1h_input_tokens ??
3078
- usage.cache_creation.ephemeral_1h_input_tokens,
3079
- ephemeral_5m_input_tokens:
3080
- (partUsage as BetaUsage).cache_creation?.ephemeral_5m_input_tokens ??
3081
- usage.cache_creation.ephemeral_5m_input_tokens,
3082
- },
3083
- // cache_deleted_input_tokens: returned by the API when cache editing
3084
- // deletes KV cache content, but not in SDK types. Kept off NonNullableUsage
3085
- // so the string is eliminated from external builds by dead code elimination.
3086
- // Uses the same > 0 guard as other token fields to prevent message_delta
3087
- // from overwriting the real value with 0.
3088
- ...(feature('CACHED_MICROCOMPACT')
3089
- ? {
3090
- cache_deleted_input_tokens:
3091
- (partUsage as unknown as { cache_deleted_input_tokens?: number })
3092
- .cache_deleted_input_tokens != null &&
3093
- (partUsage as unknown as { cache_deleted_input_tokens: number })
3094
- .cache_deleted_input_tokens > 0
3095
- ? (partUsage as unknown as { cache_deleted_input_tokens: number })
3096
- .cache_deleted_input_tokens
3097
- : ((usage as unknown as { cache_deleted_input_tokens?: number })
3098
- .cache_deleted_input_tokens ?? 0),
3099
- }
3100
- : {}),
3101
- inference_geo: usage.inference_geo,
3102
- iterations: partUsage.iterations ?? usage.iterations,
3103
- speed: (partUsage as BetaUsage).speed ?? usage.speed,
3104
- }
3105
- }
3106
-
3107
- /**
3108
- * Accumulates usage from one message into a total usage object.
3109
- * Used to track cumulative usage across multiple assistant turns.
3110
- */
3111
- export function accumulateUsage(
3112
- totalUsage: Readonly<NonNullableUsage>,
3113
- messageUsage: Readonly<NonNullableUsage>,
3114
- ): NonNullableUsage {
3115
- return {
3116
- input_tokens: totalUsage.input_tokens + messageUsage.input_tokens,
3117
- cache_creation_input_tokens:
3118
- totalUsage.cache_creation_input_tokens +
3119
- messageUsage.cache_creation_input_tokens,
3120
- cache_read_input_tokens:
3121
- totalUsage.cache_read_input_tokens + messageUsage.cache_read_input_tokens,
3122
- output_tokens: totalUsage.output_tokens + messageUsage.output_tokens,
3123
- server_tool_use: {
3124
- web_search_requests:
3125
- totalUsage.server_tool_use.web_search_requests +
3126
- messageUsage.server_tool_use.web_search_requests,
3127
- web_fetch_requests:
3128
- totalUsage.server_tool_use.web_fetch_requests +
3129
- messageUsage.server_tool_use.web_fetch_requests,
3130
- },
3131
- service_tier: messageUsage.service_tier, // Use the most recent service tier
3132
- cache_creation: {
3133
- ephemeral_1h_input_tokens:
3134
- totalUsage.cache_creation.ephemeral_1h_input_tokens +
3135
- messageUsage.cache_creation.ephemeral_1h_input_tokens,
3136
- ephemeral_5m_input_tokens:
3137
- totalUsage.cache_creation.ephemeral_5m_input_tokens +
3138
- messageUsage.cache_creation.ephemeral_5m_input_tokens,
3139
- },
3140
- // See comment in updateUsage — field is not on NonNullableUsage to keep
3141
- // the string out of external builds.
3142
- ...(feature('CACHED_MICROCOMPACT')
3143
- ? {
3144
- cache_deleted_input_tokens:
3145
- ((totalUsage as unknown as { cache_deleted_input_tokens?: number })
3146
- .cache_deleted_input_tokens ?? 0) +
3147
- ((
3148
- messageUsage as unknown as { cache_deleted_input_tokens?: number }
3149
- ).cache_deleted_input_tokens ?? 0),
3150
- }
3151
- : {}),
3152
- inference_geo: messageUsage.inference_geo, // Use the most recent
3153
- iterations: messageUsage.iterations, // Use the most recent
3154
- speed: messageUsage.speed, // Use the most recent
3155
- }
3156
- }
3157
-
3158
- function isToolResultBlock(
3159
- block: unknown,
3160
- ): block is { type: 'tool_result'; tool_use_id: string } {
3161
- return (
3162
- block !== null &&
3163
- typeof block === 'object' &&
3164
- 'type' in block &&
3165
- (block as { type: string }).type === 'tool_result' &&
3166
- 'tool_use_id' in block
3167
- )
3168
- }
3169
-
3170
- type CachedMCEditsBlock = {
3171
- type: 'cache_edits'
3172
- edits: { type: 'delete'; cache_reference: string }[]
3173
- }
3174
-
3175
- type CachedMCPinnedEdits = {
3176
- userMessageIndex: number
3177
- block: CachedMCEditsBlock
3178
- }
3179
-
3180
- // Exported for testing cache_reference placement constraints
3181
- export function addCacheBreakpoints(
3182
- messages: (UserMessage | AssistantMessage)[],
3183
- enablePromptCaching: boolean,
3184
- querySource?: QuerySource,
3185
- useCachedMC = false,
3186
- newCacheEdits?: CachedMCEditsBlock | null,
3187
- pinnedEdits?: CachedMCPinnedEdits[],
3188
- skipCacheWrite = false,
3189
- ): MessageParam[] {
3190
- logEvent('tengu_api_cache_breakpoints', {
3191
- totalMessageCount: messages.length,
3192
- cachingEnabled: enablePromptCaching,
3193
- skipCacheWrite,
3194
- })
3195
-
3196
- // Exactly one message-level cache_control marker per request. Mycro's
3197
- // turn-to-turn eviction (page_manager/index.rs: Index::insert) frees
3198
- // local-attention KV pages at any cached prefix position NOT in
3199
- // cache_store_int_token_boundaries. With two markers the second-to-last
3200
- // position is protected and its locals survive an extra turn even though
3201
- // nothing will ever resume from there — with one marker they're freed
3202
- // immediately. For fire-and-forget forks (skipCacheWrite) we shift the
3203
- // marker to the second-to-last message: that's the last shared-prefix
3204
- // point, so the write is a no-op merge on mycro (entry already exists)
3205
- // and the fork doesn't leave its own tail in the KVCC. Dense pages are
3206
- // refcounted and survive via the new hash either way.
3207
- const markerIndex = skipCacheWrite ? messages.length - 2 : messages.length - 1
3208
- const result = messages.map((msg, index) => {
3209
- const addCache = index === markerIndex
3210
- if (msg.type === 'user') {
3211
- return userMessageToMessageParam(
3212
- msg,
3213
- addCache,
3214
- enablePromptCaching,
3215
- querySource,
3216
- )
3217
- }
3218
- return assistantMessageToMessageParam(
3219
- msg,
3220
- addCache,
3221
- enablePromptCaching,
3222
- querySource,
3223
- )
3224
- })
3225
-
3226
- if (!useCachedMC) {
3227
- return result
3228
- }
3229
-
3230
- // Track all cache_references being deleted to prevent duplicates across blocks.
3231
- const seenDeleteRefs = new Set<string>()
3232
-
3233
- // Helper to deduplicate a cache_edits block against already-seen deletions
3234
- const deduplicateEdits = (block: CachedMCEditsBlock): CachedMCEditsBlock => {
3235
- const uniqueEdits = block.edits.filter(edit => {
3236
- if (seenDeleteRefs.has(edit.cache_reference)) {
3237
- return false
3238
- }
3239
- seenDeleteRefs.add(edit.cache_reference)
3240
- return true
3241
- })
3242
- return { ...block, edits: uniqueEdits }
3243
- }
3244
-
3245
- // Re-insert all previously-pinned cache_edits at their original positions
3246
- for (const pinned of pinnedEdits ?? []) {
3247
- const msg = result[pinned.userMessageIndex]
3248
- if (msg && msg.role === 'user') {
3249
- if (!Array.isArray(msg.content)) {
3250
- msg.content = [{ type: 'text', text: msg.content as string }]
3251
- }
3252
- const dedupedBlock = deduplicateEdits(pinned.block)
3253
- if (dedupedBlock.edits.length > 0) {
3254
- insertBlockAfterToolResults(msg.content, dedupedBlock)
3255
- }
3256
- }
3257
- }
3258
-
3259
- // Insert new cache_edits into the last user message and pin them
3260
- if (newCacheEdits && result.length > 0) {
3261
- const dedupedNewEdits = deduplicateEdits(newCacheEdits)
3262
- if (dedupedNewEdits.edits.length > 0) {
3263
- for (let i = result.length - 1; i >= 0; i--) {
3264
- const msg = result[i]
3265
- if (msg && msg.role === 'user') {
3266
- if (!Array.isArray(msg.content)) {
3267
- msg.content = [{ type: 'text', text: msg.content as string }]
3268
- }
3269
- insertBlockAfterToolResults(msg.content, dedupedNewEdits)
3270
- // Pin so this block is re-sent at the same position in future calls
3271
- pinCacheEdits(i, newCacheEdits)
3272
-
3273
- logForDebugging(
3274
- `Added cache_edits block with ${dedupedNewEdits.edits.length} deletion(s) to message[${i}]: ${dedupedNewEdits.edits.map(e => e.cache_reference).join(', ')}`,
3275
- )
3276
- break
3277
- }
3278
- }
3279
- }
3280
- }
3281
-
3282
- // Add cache_reference to tool_result blocks that are within the cached prefix.
3283
- // Must be done AFTER cache_edits insertion since that modifies content arrays.
3284
- if (enablePromptCaching) {
3285
- // Find the last message containing a cache_control marker
3286
- let lastCCMsg = -1
3287
- for (let i = 0; i < result.length; i++) {
3288
- const msg = result[i]!
3289
- if (Array.isArray(msg.content)) {
3290
- for (const block of msg.content) {
3291
- if (block && typeof block === 'object' && 'cache_control' in block) {
3292
- lastCCMsg = i
3293
- }
3294
- }
3295
- }
3296
- }
3297
-
3298
- // Add cache_reference to tool_result blocks that are strictly before
3299
- // the last cache_control marker. The API requires cache_reference to
3300
- // appear "before or on" the last cache_control — we use strict "before"
3301
- // to avoid edge cases where cache_edits splicing shifts block indices.
3302
- //
3303
- // Create new objects instead of mutating in-place to avoid contaminating
3304
- // blocks reused by secondary queries that use models without cache_editing support.
3305
- if (lastCCMsg >= 0) {
3306
- for (let i = 0; i < lastCCMsg; i++) {
3307
- const msg = result[i]!
3308
- if (msg.role !== 'user' || !Array.isArray(msg.content)) {
3309
- continue
3310
- }
3311
- let cloned = false
3312
- for (let j = 0; j < msg.content.length; j++) {
3313
- const block = msg.content[j]
3314
- if (block && isToolResultBlock(block)) {
3315
- if (!cloned) {
3316
- msg.content = [...msg.content]
3317
- cloned = true
3318
- }
3319
- msg.content[j] = Object.assign({}, block, {
3320
- cache_reference: block.tool_use_id,
3321
- })
3322
- }
3323
- }
3324
- }
3325
- }
3326
- }
3327
-
3328
- return result
3329
- }
3330
-
3331
- export function buildSystemPromptBlocks(
3332
- systemPrompt: SystemPrompt,
3333
- enablePromptCaching: boolean,
3334
- options?: {
3335
- skipGlobalCacheForSystemPrompt?: boolean
3336
- querySource?: QuerySource
3337
- },
3338
- ): TextBlockParam[] {
3339
- // IMPORTANT: Do not add any more blocks for caching or you will get a 400
3340
- return splitSysPromptPrefix(systemPrompt, {
3341
- skipGlobalCacheForSystemPrompt: options?.skipGlobalCacheForSystemPrompt,
3342
- }).map(block => {
3343
- return {
3344
- type: 'text' as const,
3345
- text: block.text,
3346
- ...(enablePromptCaching &&
3347
- block.cacheScope !== null && {
3348
- cache_control: getCacheControl({
3349
- scope: block.cacheScope,
3350
- querySource: options?.querySource,
3351
- }),
3352
- }),
3353
- }
3354
- })
3355
- }
3356
-
3357
- type HaikuOptions = Omit<Options, 'model' | 'getToolPermissionContext'>
3358
-
3359
- export async function queryHaiku({
3360
- systemPrompt = asSystemPrompt([]),
3361
- userPrompt,
3362
- outputFormat,
3363
- signal,
3364
- options,
3365
- }: {
3366
- systemPrompt: SystemPrompt
3367
- userPrompt: string
3368
- outputFormat?: BetaJSONOutputFormat
3369
- signal: AbortSignal
3370
- options: HaikuOptions
3371
- }): Promise<AssistantMessage> {
3372
- const result = await withVCR(
3373
- [
3374
- createUserMessage({
3375
- content: systemPrompt.map(text => ({ type: 'text', text })),
3376
- }),
3377
- createUserMessage({
3378
- content: userPrompt,
3379
- }),
3380
- ],
3381
- async () => {
3382
- const messages = [
3383
- createUserMessage({
3384
- content: userPrompt,
3385
- }),
3386
- ]
3387
-
3388
- const result = await queryModelWithoutStreaming({
3389
- messages,
3390
- systemPrompt,
3391
- thinkingConfig: { type: 'disabled' },
3392
- tools: [],
3393
- signal,
3394
- options: {
3395
- ...options,
3396
- model: getSmallFastModel(),
3397
- enablePromptCaching: options.enablePromptCaching ?? false,
3398
- outputFormat,
3399
- async getToolPermissionContext() {
3400
- return getEmptyToolPermissionContext()
3401
- },
3402
- },
3403
- })
3404
- return [result]
3405
- },
3406
- )
3407
- // We don't use streaming for Haiku so this is safe
3408
- return result[0]! as AssistantMessage
3409
- }
3410
-
3411
- type QueryWithModelOptions = Omit<Options, 'getToolPermissionContext'>
3412
-
3413
- /**
3414
- * Query a specific model through the UMMAYA infrastructure.
3415
- * (Originally "Claude Code infrastructure" in the CC byte-copy; UMMAYA renames
3416
- * the citizen-visible doc string but preserves the function shape so future
3417
- * audit replays diff cleanly against CC. swap/identifier-rename(2521).)
3418
- * This goes through the full query pipeline including proper authentication,
3419
- * betas, and headers - unlike direct API calls.
3420
- */
3421
- export async function queryWithModel({
3422
- systemPrompt = asSystemPrompt([]),
3423
- userPrompt,
3424
- outputFormat,
3425
- signal,
3426
- options,
3427
- }: {
3428
- systemPrompt: SystemPrompt
3429
- userPrompt: string
3430
- outputFormat?: BetaJSONOutputFormat
3431
- signal: AbortSignal
3432
- options: QueryWithModelOptions
3433
- }): Promise<AssistantMessage> {
3434
- const result = await withVCR(
3435
- [
3436
- createUserMessage({
3437
- content: systemPrompt.map(text => ({ type: 'text', text })),
3438
- }),
3439
- createUserMessage({
3440
- content: userPrompt,
3441
- }),
3442
- ],
3443
- async () => {
3444
- const messages = [
3445
- createUserMessage({
3446
- content: userPrompt,
3447
- }),
3448
- ]
3449
-
3450
- const result = await queryModelWithoutStreaming({
3451
- messages,
3452
- systemPrompt,
3453
- thinkingConfig: { type: 'disabled' },
3454
- tools: [],
3455
- signal,
3456
- options: {
3457
- ...options,
3458
- enablePromptCaching: options.enablePromptCaching ?? false,
3459
- outputFormat,
3460
- async getToolPermissionContext() {
3461
- return getEmptyToolPermissionContext()
3462
- },
3463
- },
3464
- })
3465
- return [result]
3466
- },
3467
- )
3468
- return result[0]! as AssistantMessage
3469
- }
3470
-
3471
- // Non-streaming requests have a 10min max per the docs:
3472
- // https://platform.claude.com/docs/en/api/errors#long-requests
3473
- // The SDK's 21333-token cap is derived from 10min × 128k tokens/hour, but we
3474
- // bypass it by setting a client-level timeout, so we can cap higher.
3475
- export const MAX_NON_STREAMING_TOKENS = 64_000
3476
-
3477
- /**
3478
- * Adjusts thinking budget when max_tokens is capped for non-streaming fallback.
3479
- * Ensures the API constraint: max_tokens > thinking.budget_tokens
3480
- *
3481
- * @param params - The parameters that will be sent to the API
3482
- * @param maxTokensCap - The maximum allowed tokens (MAX_NON_STREAMING_TOKENS)
3483
- * @returns Adjusted parameters with thinking budget capped if needed
3484
- */
3485
- export function adjustParamsForNonStreaming<
3486
- T extends {
3487
- max_tokens: number
3488
- thinking?: BetaMessageStreamParams['thinking']
3489
- },
3490
- >(params: T, maxTokensCap: number): T {
3491
- const cappedMaxTokens = Math.min(params.max_tokens, maxTokensCap)
3492
-
3493
- // Adjust thinking budget if it would exceed capped max_tokens
3494
- // to maintain the constraint: max_tokens > thinking.budget_tokens
3495
- const adjustedParams = { ...params }
3496
- if (
3497
- adjustedParams.thinking?.type === 'enabled' &&
3498
- adjustedParams.thinking.budget_tokens
3499
- ) {
3500
- adjustedParams.thinking = {
3501
- ...adjustedParams.thinking,
3502
- budget_tokens: Math.min(
3503
- adjustedParams.thinking.budget_tokens,
3504
- cappedMaxTokens - 1, // Must be at least 1 less than max_tokens
3505
- ),
3506
- }
3507
- }
3508
-
3509
- return {
3510
- ...adjustedParams,
3511
- max_tokens: cappedMaxTokens,
3512
- }
3513
- }
3514
-
3515
- function isMaxTokensCapEnabled(): boolean {
3516
- // 3P default: false (not validated on Bedrock/Vertex)
3517
- return getFeatureValue_CACHED_MAY_BE_STALE('tengu_otk_slot_v1', false)
3518
- }
3519
-
3520
- export function getMaxOutputTokensForModel(model: string): number {
3521
- const maxOutputTokens = getModelMaxOutputTokens(model)
3522
-
3523
- // Slot-reservation cap: drop default to 8k for all models. BQ p99 output
3524
- // = 4,911 tokens; 32k/64k defaults over-reserve 8-16× slot capacity.
3525
- // Requests hitting the cap get one clean retry at 64k (query.ts
3526
- // max_output_tokens_escalate). Math.min keeps models with lower native
3527
- // defaults (e.g. claude-3-opus at 4k) at their native value. Applied
3528
- // before the env-var override so CLAUDE_CODE_MAX_OUTPUT_TOKENS still wins.
3529
- const defaultTokens = isMaxTokensCapEnabled()
3530
- ? Math.min(maxOutputTokens.default, CAPPED_DEFAULT_MAX_TOKENS)
3531
- : maxOutputTokens.default
3532
-
3533
- const result = validateBoundedIntEnvVar(
3534
- 'CLAUDE_CODE_MAX_OUTPUT_TOKENS',
3535
- process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS,
3536
- defaultTokens,
3537
- maxOutputTokens.upperLimit,
3538
- )
3539
- return result.effective
3540
- }