ummaya 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (534) hide show
  1. package/README.md +17 -3
  2. package/bin/ummaya +10 -1
  3. package/npm-shrinkwrap.json +253 -2
  4. package/package.json +5 -1
  5. package/prompts/manifest.yaml +2 -2
  6. package/prompts/session_guidance_v1.md +3 -1
  7. package/prompts/system_v1.md +9 -7
  8. package/pyproject.toml +26 -7
  9. package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
  10. package/src/ummaya/_canonical/__init__.py +2 -0
  11. package/src/ummaya/context/builder.py +17 -11
  12. package/src/ummaya/engine/engine.py +30 -113
  13. package/src/ummaya/engine/query.py +20 -0
  14. package/src/ummaya/evidence/__init__.py +44 -0
  15. package/src/ummaya/evidence/__main__.py +7 -0
  16. package/src/ummaya/evidence/dataset_contract.py +193 -0
  17. package/src/ummaya/evidence/document_authoring_cases.py +33 -0
  18. package/src/ummaya/evidence/document_harness.py +313 -0
  19. package/src/ummaya/evidence/document_viewer_ux.py +391 -0
  20. package/src/ummaya/evidence/gates.py +70 -0
  21. package/src/ummaya/evidence/json_types.py +20 -0
  22. package/src/ummaya/evidence/models.py +145 -0
  23. package/src/ummaya/evidence/output_payload.py +89 -0
  24. package/src/ummaya/evidence/payload_documents.py +233 -0
  25. package/src/ummaya/evidence/route_contracts.py +224 -0
  26. package/src/ummaya/evidence/route_helpers.py +150 -0
  27. package/src/ummaya/evidence/runner.py +177 -0
  28. package/src/ummaya/evidence/source_provenance.py +246 -0
  29. package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
  30. package/src/ummaya/evidence/task_registry.py +264 -0
  31. package/src/ummaya/evidence/tool_layer.py +39 -0
  32. package/src/ummaya/evidence/tool_layer_models.py +151 -0
  33. package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
  34. package/src/ummaya/ipc/document_intent_normalization.py +185 -0
  35. package/src/ummaya/ipc/frame_schema.py +52 -5
  36. package/src/ummaya/ipc/route_diagnostics.py +73 -0
  37. package/src/ummaya/ipc/stdio.py +2282 -417
  38. package/src/ummaya/llm/client.py +234 -59
  39. package/src/ummaya/llm/config.py +8 -3
  40. package/src/ummaya/llm/reasoning.py +84 -0
  41. package/src/ummaya/primitives/__init__.py +6 -2
  42. package/src/ummaya/primitives/delegation.py +1 -1
  43. package/src/ummaya/primitives/document.py +28 -0
  44. package/src/ummaya/settings.py +0 -3
  45. package/src/ummaya/tools/discovery_bridge.py +34 -2
  46. package/src/ummaya/tools/documents/__init__.py +297 -0
  47. package/src/ummaya/tools/documents/adapter_registry.py +487 -0
  48. package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
  49. package/src/ummaya/tools/documents/artifact_store.py +454 -0
  50. package/src/ummaya/tools/documents/authoring.py +283 -0
  51. package/src/ummaya/tools/documents/baselines.py +114 -0
  52. package/src/ummaya/tools/documents/capability.py +331 -0
  53. package/src/ummaya/tools/documents/contracts.py +112 -0
  54. package/src/ummaya/tools/documents/conversion.py +521 -0
  55. package/src/ummaya/tools/documents/diff.py +275 -0
  56. package/src/ummaya/tools/documents/engines.py +163 -0
  57. package/src/ummaya/tools/documents/evaluation.py +291 -0
  58. package/src/ummaya/tools/documents/explicit_values.py +108 -0
  59. package/src/ummaya/tools/documents/fixtures.py +174 -0
  60. package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
  61. package/src/ummaya/tools/documents/formats/__init__.py +2 -0
  62. package/src/ummaya/tools/documents/formats/archive.py +528 -0
  63. package/src/ummaya/tools/documents/formats/base.py +41 -0
  64. package/src/ummaya/tools/documents/formats/code_file.py +211 -0
  65. package/src/ummaya/tools/documents/formats/data_file.py +272 -0
  66. package/src/ummaya/tools/documents/formats/hwp.py +284 -0
  67. package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
  68. package/src/ummaya/tools/documents/formats/odf.py +435 -0
  69. package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
  70. package/src/ummaya/tools/documents/formats/passive.py +766 -0
  71. package/src/ummaya/tools/documents/formats/pdf.py +702 -0
  72. package/src/ummaya/tools/documents/formats/text_web.py +268 -0
  73. package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
  74. package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
  75. package/src/ummaya/tools/documents/inspection.py +289 -0
  76. package/src/ummaya/tools/documents/intake.py +1079 -0
  77. package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
  78. package/src/ummaya/tools/documents/models.py +1598 -0
  79. package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
  80. package/src/ummaya/tools/documents/orchestrator.py +96 -0
  81. package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
  82. package/src/ummaya/tools/documents/patch.py +170 -0
  83. package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
  84. package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
  85. package/src/ummaya/tools/documents/permissions.py +110 -0
  86. package/src/ummaya/tools/documents/planner.py +616 -0
  87. package/src/ummaya/tools/documents/registry.py +2733 -0
  88. package/src/ummaya/tools/documents/render.py +978 -0
  89. package/src/ummaya/tools/documents/render_comparison.py +113 -0
  90. package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
  91. package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
  92. package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
  93. package/src/ummaya/tools/documents/reread.py +157 -0
  94. package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
  95. package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
  96. package/src/ummaya/tools/documents/scorecard.py +184 -0
  97. package/src/ummaya/tools/documents/socratic_planner.py +193 -0
  98. package/src/ummaya/tools/documents/style.py +48 -0
  99. package/src/ummaya/tools/documents/tool_defs.py +523 -0
  100. package/src/ummaya/tools/documents/validate.py +347 -0
  101. package/src/ummaya/tools/executor.py +61 -12
  102. package/src/ummaya/tools/geocoding/kakao_client.py +1 -2
  103. package/src/ummaya/tools/kma/apihub_catalog.py +984 -1
  104. package/src/ummaya/tools/kma/apihub_structured_adapter.py +86 -6
  105. package/src/ummaya/tools/kma/apihub_url_adapter.py +593 -0
  106. package/src/ummaya/tools/kma/apihub_url_catalog.py +296 -0
  107. package/src/ummaya/tools/live_proxy.py +0 -3
  108. package/src/ummaya/tools/location_adapters.py +8 -6
  109. package/src/ummaya/tools/manifest_metadata.py +16 -3
  110. package/src/ummaya/tools/models.py +5 -1
  111. package/src/ummaya/tools/mvp_surface.py +2 -2
  112. package/src/ummaya/tools/nmc/emergency_search.py +8 -6
  113. package/src/ummaya/tools/register_all.py +17 -0
  114. package/src/ummaya/tools/registry.py +10 -1
  115. package/src/ummaya/tools/resolve_location.py +4 -4
  116. package/src/ummaya/tools/routing/__init__.py +59 -0
  117. package/src/ummaya/tools/routing/builder.py +105 -0
  118. package/src/ummaya/tools/routing/cards.py +29 -0
  119. package/src/ummaya/tools/routing/decision_service.py +534 -0
  120. package/src/ummaya/tools/routing/decision_types.py +74 -0
  121. package/src/ummaya/tools/routing/feasibility.py +122 -0
  122. package/src/ummaya/tools/routing/intent.py +17 -0
  123. package/src/ummaya/tools/routing/intent_extractor.py +207 -0
  124. package/src/ummaya/tools/routing/intent_patterns.py +160 -0
  125. package/src/ummaya/tools/routing/intent_public_data.py +150 -0
  126. package/src/ummaya/tools/routing/intent_types.py +48 -0
  127. package/src/ummaya/tools/routing/lint.py +78 -0
  128. package/src/ummaya/tools/routing/metadata.py +174 -0
  129. package/src/ummaya/tools/routing/projection.py +340 -0
  130. package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
  131. package/src/ummaya/tools/routing/schema.py +81 -0
  132. package/src/ummaya/tools/routing/types.py +96 -0
  133. package/src/ummaya/tools/routing_index.py +2 -2
  134. package/src/ummaya/tools/search.py +40 -106
  135. package/src/ummaya/tools/verified_data_go_kr/_manifest.py +115 -25
  136. package/src/ummaya/tools/verified_data_go_kr/airkorea_air_quality.py +109 -4
  137. package/src/ummaya/tools/verified_data_go_kr/nmc_aed_site.py +108 -2
  138. package/src/ummaya/tools/verified_data_go_kr/pps_bid_public_info.py +174 -9
  139. package/src/ummaya/tools/verified_data_go_kr/tago_bus_arrival.py +66 -3
  140. package/src/ummaya/tools/verified_data_go_kr/tago_bus_location.py +12 -2
  141. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route.py +8 -2
  142. package/src/ummaya/tools/verified_data_go_kr/tago_bus_route_station.py +114 -0
  143. package/src/ummaya/tools/verified_data_go_kr/tago_bus_station.py +14 -3
  144. package/src/ummaya/tools/verify_canonical_map.py +21 -0
  145. package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
  146. package/tui/package.json +1 -2
  147. package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
  148. package/tui/src/QueryEngine.ts +12 -4
  149. package/tui/src/bridge/inboundAttachments.ts +3 -3
  150. package/tui/src/cli/handlers/auth.ts +4 -13
  151. package/tui/src/cli/handlers/mcp.tsx +3 -3
  152. package/tui/src/cli/print.ts +69 -18
  153. package/tui/src/cli/update.ts +13 -13
  154. package/tui/src/commands/copy/index.ts +1 -1
  155. package/tui/src/commands/cost/cost.ts +2 -2
  156. package/tui/src/commands/init-verifiers.ts +5 -5
  157. package/tui/src/commands/init.ts +30 -30
  158. package/tui/src/commands/insights.ts +44 -44
  159. package/tui/src/commands/install-github-app/install-github-app.tsx +2 -2
  160. package/tui/src/commands/install-github-app/setupGitHubActions.ts +3 -3
  161. package/tui/src/commands/install-github-app/types.ts +8 -30
  162. package/tui/src/commands/install.tsx +5 -5
  163. package/tui/src/commands/mcp/addCommand.ts +5 -5
  164. package/tui/src/commands/mcp/xaaIdpCommand.ts +2 -2
  165. package/tui/src/commands/plugin/ManageMarketplaces.tsx +2 -2
  166. package/tui/src/commands/plugin/types.ts +6 -28
  167. package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
  168. package/tui/src/commands/reasoning/index.ts +13 -0
  169. package/tui/src/commands/reasoning/reasoning.tsx +177 -0
  170. package/tui/src/commands/rename/generateSessionName.ts +1 -1
  171. package/tui/src/commands/thinkback/thinkback.tsx +3 -3
  172. package/tui/src/commands.ts +2 -0
  173. package/tui/src/components/Feedback.tsx +1 -1
  174. package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
  175. package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
  176. package/tui/src/components/Messages.tsx +2 -1
  177. package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
  178. package/tui/src/components/Spinner/types.ts +6 -28
  179. package/tui/src/components/Spinner.tsx +2 -2
  180. package/tui/src/components/agents/generateAgent.ts +1 -1
  181. package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
  182. package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
  183. package/tui/src/components/design-system/LoadingState.tsx +2 -2
  184. package/tui/src/components/mcp/types.ts +16 -38
  185. package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
  186. package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
  187. package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
  188. package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
  189. package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
  190. package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
  191. package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
  192. package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
  193. package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
  194. package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
  195. package/tui/src/components/primitive/index.tsx +43 -1
  196. package/tui/src/components/primitive/types.ts +137 -0
  197. package/tui/src/components/ui/option.ts +4 -26
  198. package/tui/src/constants/common.ts +0 -2
  199. package/tui/src/constants/prompts.ts +4 -3
  200. package/tui/src/constants/querySource.ts +4 -26
  201. package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
  202. package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
  203. package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
  204. package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
  205. package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
  206. package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
  207. package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
  208. package/tui/src/hooks/useApiKeyVerification.ts +1 -1
  209. package/tui/src/hooks/useVirtualScroll.ts +1 -1
  210. package/tui/src/ink/ink.tsx +33 -14
  211. package/tui/src/ink/reconciler.ts +2 -3
  212. package/tui/src/ink/render-to-screen.ts +30 -10
  213. package/tui/src/ipc/bridge.ts +62 -15
  214. package/tui/src/ipc/bridgeSingleton.ts +5 -1
  215. package/tui/src/ipc/codec.ts +29 -3
  216. package/tui/src/ipc/frames.generated.ts +407 -312
  217. package/tui/src/ipc/llmClient.ts +279 -76
  218. package/tui/src/ipc/llmTypes.ts +16 -1
  219. package/tui/src/ipc/schema/frame.schema.json +1 -3475
  220. package/tui/src/keybindings/defaultBindings.ts +4 -0
  221. package/tui/src/main.tsx +32 -11
  222. package/tui/src/native-ts/file-index/index.ts +33 -3
  223. package/tui/src/observability/surface.ts +2 -2
  224. package/tui/src/probes/toolRegistryProbe.tsx +3 -1
  225. package/tui/src/projectOnboardingState.ts +7 -6
  226. package/tui/src/query/chatMessageTypes.ts +18 -0
  227. package/tui/src/query/chatMessagesBuilder.ts +1 -1
  228. package/tui/src/query/deps.ts +1 -1
  229. package/tui/src/query/messageGuards.ts +106 -0
  230. package/tui/src/query/publicDataTerminalRepair.ts +384 -0
  231. package/tui/src/query/run.ts +1075 -0
  232. package/tui/src/query/supportBoundary.ts +168 -0
  233. package/tui/src/query/toolResultErrors.ts +103 -0
  234. package/tui/src/query/toolRunner.ts +687 -0
  235. package/tui/src/query/unavailableToolRepair.ts +118 -0
  236. package/tui/src/query.ts +9 -1721
  237. package/tui/src/screens/REPL.tsx +42 -31
  238. package/tui/src/services/api/adapterManifest.ts +4 -0
  239. package/tui/src/services/api/backendChat/events.ts +117 -0
  240. package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
  241. package/tui/src/services/api/backendChat/frame.ts +9 -0
  242. package/tui/src/services/api/backendChat/streaming.ts +430 -0
  243. package/tui/src/services/api/backendChat/types.ts +62 -0
  244. package/tui/src/services/api/backendChat.ts +1 -0
  245. package/tui/src/services/api/client.ts +98 -14
  246. package/tui/src/services/api/errorUtils.ts +5 -5
  247. package/tui/src/services/api/errors.ts +1 -1
  248. package/tui/src/services/api/logging.ts +1 -1
  249. package/tui/src/services/api/ummaya/evidence.ts +194 -0
  250. package/tui/src/services/api/ummaya/messages.ts +255 -0
  251. package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
  252. package/tui/src/services/api/ummaya/provider.ts +200 -0
  253. package/tui/src/services/api/ummaya/reasoning.ts +24 -0
  254. package/tui/src/services/api/ummaya/request.ts +200 -0
  255. package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
  256. package/tui/src/services/api/ummaya/streaming.ts +365 -0
  257. package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
  258. package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
  259. package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
  260. package/tui/src/services/api/ummaya/types.ts +110 -0
  261. package/tui/src/services/api/ummaya/usage.ts +30 -0
  262. package/tui/src/services/api/ummaya.ts +26 -364
  263. package/tui/src/services/api/withRetry.ts +1 -1
  264. package/tui/src/services/awaySummary.ts +2 -2
  265. package/tui/src/services/claudeAiLimits.ts +1 -1
  266. package/tui/src/services/compact/autoCompact.ts +1 -1
  267. package/tui/src/services/compact/compact.ts +1 -1
  268. package/tui/src/services/lsp/types.ts +8 -30
  269. package/tui/src/services/tips/types.ts +6 -28
  270. package/tui/src/services/tokenEstimation.ts +1 -1
  271. package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
  272. package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
  273. package/tui/src/services/tools/toolExecution.ts +94 -1
  274. package/tui/src/skills/bundled/stuck.ts +12 -12
  275. package/tui/src/state/AppStateStore.ts +7 -0
  276. package/tui/src/store/pendingPermissionSlot.ts +1 -1
  277. package/tui/src/store/session-store.ts +10 -36
  278. package/tui/src/stubs/any-stub.ts +15 -10
  279. package/tui/src/stubs/color-diff-napi.ts +37 -23
  280. package/tui/src/stubs/globals.d.ts +3 -3
  281. package/tui/src/stubs/macro-preload.ts +23 -12
  282. package/tui/src/tools/AdapterTool/AdapterTool.ts +1239 -163
  283. package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
  284. package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
  285. package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
  286. package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
  287. package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
  288. package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
  289. package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
  290. package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
  291. package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
  292. package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
  293. package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
  294. package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
  295. package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
  296. package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
  297. package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
  298. package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
  299. package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
  300. package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
  301. package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
  302. package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
  303. package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
  304. package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
  305. package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
  306. package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
  307. package/tui/src/tools/AgentTool/permissions.ts +39 -0
  308. package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
  309. package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
  310. package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
  311. package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
  312. package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
  313. package/tui/src/tools/AgentTool/runAgent.ts +1 -1
  314. package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
  315. package/tui/src/tools/AgentTool/schemas.ts +196 -0
  316. package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
  317. package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
  318. package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
  319. package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
  320. package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
  321. package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
  322. package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
  323. package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
  324. package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
  325. package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
  326. package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
  327. package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
  328. package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
  329. package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
  330. package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
  331. package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
  332. package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
  333. package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
  334. package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
  335. package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
  336. package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
  337. package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
  338. package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
  339. package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
  340. package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
  341. package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
  342. package/tui/src/tools/BashTool/call.ts +202 -0
  343. package/tui/src/tools/BashTool/callLoader.ts +35 -0
  344. package/tui/src/tools/BashTool/commandClassification.ts +151 -0
  345. package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
  346. package/tui/src/tools/BashTool/cwdReset.ts +33 -0
  347. package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
  348. package/tui/src/tools/BashTool/modeValidation.ts +13 -1
  349. package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
  350. package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
  351. package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
  352. package/tui/src/tools/BashTool/resultLoader.ts +29 -0
  353. package/tui/src/tools/BashTool/resultMapping.ts +83 -0
  354. package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
  355. package/tui/src/tools/BashTool/schemas.ts +65 -0
  356. package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
  357. package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
  358. package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
  359. package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
  360. package/tui/src/tools/BashTool/uiLoader.ts +37 -0
  361. package/tui/src/tools/BriefTool/upload.ts +1 -1
  362. package/tui/src/tools/CalculatorTool/parser.ts +2 -2
  363. package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
  364. package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
  365. package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
  366. package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
  367. package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
  368. package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
  369. package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
  370. package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
  371. package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
  372. package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
  373. package/tui/src/tools/FileEditTool/call.ts +228 -0
  374. package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
  375. package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
  376. package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
  377. package/tui/src/tools/FileWriteTool/call.ts +223 -0
  378. package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
  379. package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
  380. package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +48 -29
  381. package/tui/src/tools/LookupPrimitive/prompt.ts +6 -7
  382. package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
  383. package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
  384. package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
  385. package/tui/src/tools/NotebookEditTool/call.ts +254 -0
  386. package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
  387. package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
  388. package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
  389. package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
  390. package/tui/src/tools/PowerShellTool/call.ts +179 -0
  391. package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
  392. package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
  393. package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
  394. package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
  395. package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
  396. package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
  397. package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
  398. package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
  399. package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
  400. package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
  401. package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
  402. package/tui/src/tools/PowerShellTool/validation.ts +39 -0
  403. package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
  404. package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +30 -19
  405. package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
  406. package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
  407. package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +51 -18
  408. package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
  409. package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
  410. package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
  411. package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
  412. package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
  413. package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
  414. package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
  415. package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
  416. package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
  417. package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
  418. package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
  419. package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
  420. package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
  421. package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
  422. package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
  423. package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
  424. package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
  425. package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
  426. package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
  427. package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
  428. package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
  429. package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
  430. package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
  431. package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
  432. package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
  433. package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +27 -10
  434. package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
  435. package/tui/src/tools/WebFetchTool/call.ts +227 -0
  436. package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
  437. package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
  438. package/tui/src/tools/WebFetchTool/types.ts +23 -0
  439. package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
  440. package/tui/src/tools/WebFetchTool/utils.ts +1 -1
  441. package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
  442. package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
  443. package/tui/src/tools/WebSearchTool/call.ts +33 -0
  444. package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
  445. package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
  446. package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
  447. package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
  448. package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
  449. package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
  450. package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
  451. package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
  452. package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
  453. package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
  454. package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
  455. package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
  456. package/tui/src/tools/_shared/citizenUserText.ts +49 -0
  457. package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
  458. package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
  459. package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
  460. package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
  461. package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
  462. package/tui/src/tools/_shared/locationInputRepair.ts +112 -0
  463. package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
  464. package/tui/src/tools/_shared/rootPrimitiveInput.ts +68 -0
  465. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
  466. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
  467. package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
  468. package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
  469. package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
  470. package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
  471. package/tui/src/tools/_shared/toolChoiceRepair.ts +61 -0
  472. package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
  473. package/tui/src/tools.ts +39 -190
  474. package/tui/src/types/fileSuggestion.ts +4 -26
  475. package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
  476. package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
  477. package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
  478. package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
  479. package/tui/src/types/message.ts +80 -102
  480. package/tui/src/types/messageQueueTypes.ts +6 -28
  481. package/tui/src/types/notebook.ts +16 -38
  482. package/tui/src/types/statusLine.ts +4 -26
  483. package/tui/src/types/tools.ts +24 -46
  484. package/tui/src/types/utils.ts +6 -28
  485. package/tui/src/upstreamproxy/relay.ts +7 -3
  486. package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
  487. package/tui/src/utils/assistantMessageFactories.ts +9 -3
  488. package/tui/src/utils/attachments.ts +1 -1
  489. package/tui/src/utils/auth.ts +129 -139
  490. package/tui/src/utils/bash/ast.ts +23 -23
  491. package/tui/src/utils/bash/bashParser.ts +5 -5
  492. package/tui/src/utils/billing.ts +1 -1
  493. package/tui/src/utils/collapseReadSearch.ts +3 -3
  494. package/tui/src/utils/cronTasks.ts +1 -1
  495. package/tui/src/utils/execFileNoThrow.ts +1 -1
  496. package/tui/src/utils/filePersistence/types.ts +16 -38
  497. package/tui/src/utils/forkedAgent.ts +1 -1
  498. package/tui/src/utils/gracefulShutdown.ts +4 -4
  499. package/tui/src/utils/heapDumpService.ts +12 -8
  500. package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
  501. package/tui/src/utils/hooks/execPromptHook.ts +1 -1
  502. package/tui/src/utils/hooks/skillImprovement.ts +1 -1
  503. package/tui/src/utils/kExaoneReasoning.ts +138 -0
  504. package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
  505. package/tui/src/utils/messages.ts +19 -0
  506. package/tui/src/utils/migrateSessions.ts +3 -3
  507. package/tui/src/utils/model/model.ts +6 -6
  508. package/tui/src/utils/multiToolLayout.ts +13 -0
  509. package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
  510. package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
  511. package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
  512. package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
  513. package/tui/src/utils/plugins/pluginLoader.ts +8 -8
  514. package/tui/src/utils/processUserInput/processSlashCommand.tsx +2 -2
  515. package/tui/src/utils/processUserInput/processUserInput.ts +26 -0
  516. package/tui/src/utils/protectedNamespace.ts +5 -3
  517. package/tui/src/utils/rawJsonToolCall.ts +242 -0
  518. package/tui/src/utils/ripgrep.ts +16 -7
  519. package/tui/src/utils/sessionTitle.ts +1 -1
  520. package/tui/src/utils/settings/applySettingsChange.ts +4 -0
  521. package/tui/src/utils/settings/permissionValidation.ts +14 -2
  522. package/tui/src/utils/settings/types.ts +9 -3
  523. package/tui/src/utils/shell/prefix.ts +1 -1
  524. package/tui/src/utils/sideQuery.ts +1 -1
  525. package/tui/src/utils/stats.ts +1 -1
  526. package/tui/src/utils/systemThemeWatcher.ts +13 -3
  527. package/tui/src/utils/teleport.tsx +1 -1
  528. package/uv.lock +394 -22
  529. package/assets/copilot-gate-logo.svg +0 -58
  530. package/assets/govon-logo.svg +0 -40
  531. package/src/ummaya/eval/__init__.py +0 -5
  532. package/src/ummaya/eval/retrieval.py +0 -713
  533. package/tui/src/services/api/claude.ts +0 -3510
  534. package/tui/src/utils/messageStream.ts +0 -186
@@ -1,3510 +0,0 @@
1
- // SPDX-License-Identifier: Apache-2.0
2
- // Spec 2521 — byte-copy(2521) baseline restored from
3
- // .references/claude-code-sourcemap/restored-src/src/services/api/claude.ts
4
- // (CC 2.1.88, SHA-256 6d3fd16e608120d502e70ec461ffb66bcbca12fa86862859606c9118f977a999).
5
- // Three labeled swap commits layer atop the byte-copy:
6
- // • swap/llm-provider(2521) — @anthropic-ai/sdk imports → sdk-compat.ts
7
- // • swap/anti-anthropic-1p(2521) — claude.ai 1P call-graph deadened via
8
- // UMMAYA-stubbed support modules (services/claudeAiLimits.ts + utils/auth.ts
9
- // are inert no-ops since Epic #1633). The 1P functions in this file
10
- // (getOauthAccountInfo, currentLimits, extractQuotaStatusFromHeaders,
11
- // getCLISyspromptPrefix's claude.ai branches, account_uuid telemetry)
12
- // remain in the byte-copy text but resolve to no-op returns at runtime.
13
- // No UMMAYA callers reach this file (verified post-Spec-2293), so the
14
- // 1P graph is doubly dead — by callgraph (no callers) and by support-
15
- // module inertness. Spec 2521 byte-copy philosophy (FR-002): keep the
16
- // reference text intact; deactivate via supporting infrastructure
17
- // instead of deleting in-file.
18
- // • swap/identifier-rename(2521) — citizen-visible Claude/Anthropic brand
19
- // tokens → UMMAYA/EXAONE/FriendliAI (T013).
20
- // This file has zero callers in tui/src after Spec 2293; it is retained as
21
- // the authoritative CC streaming-handler reference for future audit replays
22
- // (specs/2521-llm-swap-cc-rebuild/scripts/replay_rebuild.sh).
23
-
24
- import type {
25
- BetaContentBlock,
26
- BetaContentBlockParam,
27
- BetaImageBlockParam,
28
- BetaJSONOutputFormat,
29
- BetaMessage,
30
- BetaMessageDeltaUsage,
31
- BetaMessageStreamParams,
32
- BetaOutputConfig,
33
- BetaRawMessageStreamEvent,
34
- BetaRequestDocumentBlock,
35
- BetaStopReason,
36
- BetaToolChoiceAuto,
37
- BetaToolChoiceTool,
38
- BetaToolResultBlockParam,
39
- BetaToolUnion,
40
- BetaUsage,
41
- BetaMessageParam as MessageParam,
42
- } from '../../sdk-compat.js'
43
- import type { TextBlockParam } from '../../sdk-compat.js'
44
- import type { Stream } from '../../sdk-compat.js'
45
- import { randomUUID } from 'crypto'
46
- import {
47
- getAPIProvider,
48
- isFirstPartyAnthropicBaseUrl,
49
- } from 'src/utils/model/providers.js'
50
- import {
51
- getAttributionHeader,
52
- getCLISyspromptPrefix,
53
- } from '../../constants/system.js'
54
- import {
55
- getEmptyToolPermissionContext,
56
- type QueryChainTracking,
57
- type Tool,
58
- type ToolPermissionContext,
59
- type Tools,
60
- toolMatchesName,
61
- } from '../../Tool.js'
62
- import type { AgentDefinition } from '../../tools/AgentTool/loadAgentsDir.js'
63
- import {
64
- type ConnectorTextBlock,
65
- type ConnectorTextDelta,
66
- isConnectorTextBlock,
67
- } from '../../types/connectorText.js'
68
- import type {
69
- AssistantMessage,
70
- Message,
71
- StreamEvent,
72
- SystemAPIErrorMessage,
73
- UserMessage,
74
- } from '../../types/message.js'
75
- import {
76
- type CacheScope,
77
- logAPIPrefix,
78
- splitSysPromptPrefix,
79
- toolToAPISchema,
80
- } from '../../utils/api.js'
81
- import { getOauthAccountInfo } from '../../utils/auth.js'
82
- import {
83
- getBedrockExtraBodyParamsBetas,
84
- getMergedBetas,
85
- getModelBetas,
86
- } from '../../utils/betas.js'
87
- import { getOrCreateUserID } from '../../utils/config.js'
88
- import {
89
- CAPPED_DEFAULT_MAX_TOKENS,
90
- getModelMaxOutputTokens,
91
- getSonnet1mExpTreatmentEnabled,
92
- } from '../../utils/context.js'
93
- import { resolveAppliedEffort } from '../../utils/effort.js'
94
- import { isEnvTruthy } from '../../utils/envUtils.js'
95
- import { errorMessage } from '../../utils/errors.js'
96
- import { computeFingerprintFromMessages } from '../../utils/fingerprint.js'
97
- import { captureAPIRequest, logError } from '../../utils/log.js'
98
- import { normalizeMessagesForAPI } from '../../utils/messageApiNormalize.js'
99
- import * as messageUtils from '../../utils/messages.js'
100
- import { createAssistantAPIErrorMessage } from '../../utils/assistantMessageFactories.js'
101
- import { createUserMessage } from '../../utils/userMessageFactories.js'
102
- import {
103
- getDefaultOpusModel,
104
- getDefaultSonnetModel,
105
- getSmallFastModel,
106
- isNonCustomOpusModel,
107
- } from '../../utils/model/model.js'
108
- import {
109
- asSystemPrompt,
110
- type SystemPrompt,
111
- } from '../../utils/systemPromptType.js'
112
- import { tokenCountFromLastAPIResponse } from '../../utils/tokens.js'
113
- import { getDynamicConfig_BLOCKS_ON_INIT } from '../analytics/growthbook.js'
114
- import {
115
- currentLimits,
116
- extractQuotaStatusFromError,
117
- extractQuotaStatusFromHeaders,
118
- } from '../claudeAiLimits.js'
119
- import { getAPIContextManagement } from '../compact/apiMicrocompact.js'
120
-
121
- /* eslint-disable @typescript-eslint/no-require-imports */
122
- const autoModeStateModule = feature('TRANSCRIPT_CLASSIFIER')
123
- ? (require('../../utils/permissions/autoModeState.js') as typeof import('../../utils/permissions/autoModeState.js'))
124
- : null
125
-
126
- import { feature } from 'bun:bundle'
127
- // SWAP/llm-provider(2521): @anthropic-ai/sdk + /error → UMMAYA sdk-compat
128
- // aliases (ClientOptions, APIError, APIConnectionTimeoutError, APIUserAbortError
129
- // all re-exported by sdk-compat.ts as structural stubs).
130
- import type { ClientOptions } from '../../sdk-compat.js'
131
- import {
132
- APIConnectionTimeoutError,
133
- APIError,
134
- APIUserAbortError,
135
- } from '../../sdk-compat.js'
136
- import {
137
- getAfkModeHeaderLatched,
138
- getCacheEditingHeaderLatched,
139
- getFastModeHeaderLatched,
140
- getLastApiCompletionTimestamp,
141
- getPromptCache1hAllowlist,
142
- getPromptCache1hEligible,
143
- getSessionId,
144
- getThinkingClearLatched,
145
- setAfkModeHeaderLatched,
146
- setCacheEditingHeaderLatched,
147
- setFastModeHeaderLatched,
148
- setLastMainRequestId,
149
- setPromptCache1hAllowlist,
150
- setPromptCache1hEligible,
151
- setThinkingClearLatched,
152
- } from 'src/bootstrap/state.js'
153
- import {
154
- AFK_MODE_BETA_HEADER,
155
- CONTEXT_1M_BETA_HEADER,
156
- CONTEXT_MANAGEMENT_BETA_HEADER,
157
- EFFORT_BETA_HEADER,
158
- FAST_MODE_BETA_HEADER,
159
- PROMPT_CACHING_SCOPE_BETA_HEADER,
160
- REDACT_THINKING_BETA_HEADER,
161
- STRUCTURED_OUTPUTS_BETA_HEADER,
162
- TASK_BUDGETS_BETA_HEADER,
163
- } from 'src/constants/betas.js'
164
- import type { QuerySource } from 'src/constants/querySource.js'
165
- import type { Notification } from 'src/context/notifications.js'
166
- import { addToTotalSessionCost } from 'src/cost-tracker.js'
167
- import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js'
168
- import type { AgentId } from 'src/types/ids.js'
169
- import {
170
- ADVISOR_TOOL_INSTRUCTIONS,
171
- getExperimentAdvisorModels,
172
- isAdvisorEnabled,
173
- isValidAdvisorModel,
174
- modelSupportsAdvisor,
175
- } from 'src/utils/advisor.js'
176
- import { getAgentContext } from 'src/utils/agentContext.js'
177
- import { isClaudeAISubscriber } from 'src/utils/auth.js'
178
- import {
179
- getToolSearchBetaHeader,
180
- modelSupportsStructuredOutputs,
181
- shouldIncludeFirstPartyOnlyBetas,
182
- shouldUseGlobalCacheScope,
183
- } from 'src/utils/betas.js'
184
- import { CLAUDE_IN_CHROME_MCP_SERVER_NAME } from 'src/utils/claudeInChrome/common.js'
185
- import { CHROME_TOOL_SEARCH_INSTRUCTIONS } from 'src/utils/claudeInChrome/prompt.js'
186
- import { getMaxThinkingTokensForModel } from 'src/utils/context.js'
187
- import { logForDebugging } from 'src/utils/debug.js'
188
- import { logForDiagnosticsNoPII } from 'src/utils/diagLogs.js'
189
- import { type EffortValue, modelSupportsEffort } from 'src/utils/effort.js'
190
- import {
191
- isFastModeAvailable,
192
- isFastModeCooldown,
193
- isFastModeEnabled,
194
- isFastModeSupportedByModel,
195
- } from 'src/utils/fastMode.js'
196
- import { returnValue } from 'src/utils/generators.js'
197
- import { headlessProfilerCheckpoint } from 'src/utils/headlessProfiler.js'
198
- import { isMcpInstructionsDeltaEnabled } from 'src/utils/mcpInstructionsDelta.js'
199
- import { calculateUSDCost } from 'src/utils/modelCost.js'
200
- import { endQueryProfile, queryCheckpoint } from 'src/utils/queryProfiler.js'
201
- import {
202
- modelSupportsAdaptiveThinking,
203
- modelSupportsThinking,
204
- type ThinkingConfig,
205
- } from 'src/utils/thinking.js'
206
- import {
207
- extractDiscoveredToolNames,
208
- isDeferredToolsDeltaEnabled,
209
- isToolSearchEnabled,
210
- } from 'src/utils/toolSearch.js'
211
- import { API_MAX_MEDIA_PER_REQUEST } from '../../constants/apiLimits.js'
212
- import { ADVISOR_BETA_HEADER } from '../../constants/betas.js'
213
- import {
214
- formatDeferredToolLine,
215
- isDeferredTool,
216
- TOOL_SEARCH_TOOL_NAME,
217
- } from '../../tools/ToolSearchTool/prompt.js'
218
- import {
219
- selectTopKAdapterToolNamesForQuery,
220
- } from '../../tools/AdapterTool/AdapterTool.js'
221
- import { count } from '../../utils/array.js'
222
- import { insertBlockAfterToolResults } from '../../utils/contentArray.js'
223
- import { validateBoundedIntEnvVar } from '../../utils/envValidation.js'
224
- import { safeParseJSON } from '../../utils/json.js'
225
- import { getInferenceProfileBackingModel } from '../../utils/model/bedrock.js'
226
- import {
227
- normalizeModelStringForAPI,
228
- parseUserSpecifiedModel,
229
- } from '../../utils/model/model.js'
230
- import {
231
- startSessionActivity,
232
- stopSessionActivity,
233
- } from '../../utils/sessionActivity.js'
234
- import { jsonStringify } from '../../utils/slowOperations.js'
235
- import {
236
- isBetaTracingEnabled,
237
- type LLMRequestNewContext,
238
- startLLMRequestSpan,
239
- } from '../../utils/telemetry/sessionTracing.js'
240
- /* eslint-enable @typescript-eslint/no-require-imports */
241
- import {
242
- type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
243
- logEvent,
244
- } from '../analytics/index.js'
245
- import {
246
- consumePendingCacheEdits,
247
- getPinnedCacheEdits,
248
- markToolsSentToAPIState,
249
- pinCacheEdits,
250
- } from '../compact/microCompact.js'
251
- import { getInitializationStatus } from '../lsp/manager.js'
252
- import { isToolFromMcpServer } from '../mcp/utils.js'
253
- import { withStreamingVCR, withVCR } from '../vcr.js'
254
- import { CLIENT_REQUEST_ID_HEADER, getAnthropicClient } from './client.js'
255
- import {
256
- API_ERROR_MESSAGE_PREFIX,
257
- CUSTOM_OFF_SWITCH_MESSAGE,
258
- getAssistantMessageFromError,
259
- getErrorMessageIfRefusal,
260
- } from './errors.js'
261
- import {
262
- EMPTY_USAGE,
263
- type GlobalCacheStrategy,
264
- logAPIError,
265
- logAPIQuery,
266
- logAPISuccessAndDuration,
267
- type NonNullableUsage,
268
- } from './logging.js'
269
- import {
270
- CACHE_TTL_1HOUR_MS,
271
- checkResponseForCacheBreak,
272
- recordPromptState,
273
- } from './promptCacheBreakDetection.js'
274
- import {
275
- CannotRetryError,
276
- FallbackTriggeredError,
277
- is529Error,
278
- type RetryContext,
279
- withRetry,
280
- } from './withRetry.js'
281
-
282
- const {
283
- ensureToolResultPairing,
284
- normalizeContentFromAPI,
285
- stripAdvisorBlocks,
286
- stripCallerFieldFromAssistantMessage,
287
- stripToolReferenceBlocksFromUserMessage,
288
- } = messageUtils
289
-
290
- // Define a type that represents valid JSON values
291
- type JsonValue = string | number | boolean | null | JsonObject | JsonArray
292
- type JsonObject = { [key: string]: JsonValue }
293
- type JsonArray = JsonValue[]
294
-
295
- /**
296
- * Assemble the extra body parameters for the API request, based on the
297
- * CLAUDE_CODE_EXTRA_BODY environment variable if present and on any beta
298
- * headers (primarily for Bedrock requests).
299
- *
300
- * @param betaHeaders - An array of beta headers to include in the request.
301
- * @returns A JSON object representing the extra body parameters.
302
- */
303
- export function getExtraBodyParams(betaHeaders?: string[]): JsonObject {
304
- // Parse user's extra body parameters first
305
- const extraBodyStr = process.env.CLAUDE_CODE_EXTRA_BODY
306
- let result: JsonObject = {}
307
-
308
- if (extraBodyStr) {
309
- try {
310
- // Parse as JSON, which can be null, boolean, number, string, array or object
311
- const parsed = safeParseJSON(extraBodyStr)
312
- // We expect an object with key-value pairs to spread into API parameters
313
- if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
314
- // Shallow clone — safeParseJSON is LRU-cached and returns the same
315
- // object reference for the same string. Mutating `result` below
316
- // would poison the cache, causing stale values to persist.
317
- result = { ...(parsed as JsonObject) }
318
- } else {
319
- logForDebugging(
320
- `CLAUDE_CODE_EXTRA_BODY env var must be a JSON object, but was given ${extraBodyStr}`,
321
- { level: 'error' },
322
- )
323
- }
324
- } catch (error) {
325
- logForDebugging(
326
- `Error parsing CLAUDE_CODE_EXTRA_BODY: ${errorMessage(error)}`,
327
- { level: 'error' },
328
- )
329
- }
330
- }
331
-
332
- // Anti-distillation: send fake_tools opt-in for 1P CLI only
333
- if (
334
- feature('ANTI_DISTILLATION_CC')
335
- ? process.env.CLAUDE_CODE_ENTRYPOINT === 'cli' &&
336
- shouldIncludeFirstPartyOnlyBetas() &&
337
- getFeatureValue_CACHED_MAY_BE_STALE(
338
- 'tengu_anti_distill_fake_tool_injection',
339
- false,
340
- )
341
- : false
342
- ) {
343
- result.anti_distillation = ['fake_tools']
344
- }
345
-
346
- // Handle beta headers if provided
347
- if (betaHeaders && betaHeaders.length > 0) {
348
- if (result.anthropic_beta && Array.isArray(result.anthropic_beta)) {
349
- // Add to existing array, avoiding duplicates
350
- const existingHeaders = result.anthropic_beta as string[]
351
- const newHeaders = betaHeaders.filter(
352
- header => !existingHeaders.includes(header),
353
- )
354
- result.anthropic_beta = [...existingHeaders, ...newHeaders]
355
- } else {
356
- // Create new array with the beta headers
357
- result.anthropic_beta = betaHeaders
358
- }
359
- }
360
-
361
- return result
362
- }
363
-
364
- export function getPromptCachingEnabled(model: string): boolean {
365
- // Global disable takes precedence
366
- if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING)) return false
367
-
368
- // Check if we should disable for small/fast model
369
- if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING_HAIKU)) {
370
- const smallFastModel = getSmallFastModel()
371
- if (model === smallFastModel) return false
372
- }
373
-
374
- // Check if we should disable for default Sonnet
375
- if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING_SONNET)) {
376
- const defaultSonnet = getDefaultSonnetModel()
377
- if (model === defaultSonnet) return false
378
- }
379
-
380
- // Check if we should disable for default Opus
381
- if (isEnvTruthy(process.env.DISABLE_PROMPT_CACHING_OPUS)) {
382
- const defaultOpus = getDefaultOpusModel()
383
- if (model === defaultOpus) return false
384
- }
385
-
386
- return true
387
- }
388
-
389
- export function getCacheControl({
390
- scope,
391
- querySource,
392
- }: {
393
- scope?: CacheScope
394
- querySource?: QuerySource
395
- } = {}): {
396
- type: 'ephemeral'
397
- ttl?: '1h'
398
- scope?: CacheScope
399
- } {
400
- return {
401
- type: 'ephemeral',
402
- ...(should1hCacheTTL(querySource) && { ttl: '1h' }),
403
- ...(scope === 'global' && { scope }),
404
- }
405
- }
406
-
407
- /**
408
- * Determines if 1h TTL should be used for prompt caching.
409
- *
410
- * Only applied when:
411
- * 1. User is eligible (ant or subscriber within rate limits)
412
- * 2. The query source matches a pattern in the GrowthBook allowlist
413
- *
414
- * GrowthBook config shape: { allowlist: string[] }
415
- * Patterns support trailing '*' for prefix matching.
416
- * Examples:
417
- * - { allowlist: ["repl_main_thread*", "sdk"] } — main thread + SDK only
418
- * - { allowlist: ["repl_main_thread*", "sdk", "agent:*"] } — also subagents
419
- * - { allowlist: ["*"] } — all sources
420
- *
421
- * The allowlist is cached in STATE for session stability — prevents mixed
422
- * TTLs when GrowthBook's disk cache updates mid-request.
423
- */
424
- function should1hCacheTTL(querySource?: QuerySource): boolean {
425
- // 3P Bedrock users get 1h TTL when opted in via env var — they manage their own billing
426
- // No GrowthBook gating needed since 3P users don't have GrowthBook configured
427
- if (
428
- getAPIProvider() === 'bedrock' &&
429
- isEnvTruthy(process.env.ENABLE_PROMPT_CACHING_1H_BEDROCK)
430
- ) {
431
- return true
432
- }
433
-
434
- // Latch eligibility in bootstrap state for session stability — prevents
435
- // mid-session overage flips from changing the cache_control TTL, which
436
- // would bust the server-side prompt cache (~20K tokens per flip).
437
- let userEligible = getPromptCache1hEligible()
438
- if (userEligible === null) {
439
- userEligible =
440
- process.env.USER_TYPE === 'ant' ||
441
- (isClaudeAISubscriber() && !currentLimits.isUsingOverage)
442
- setPromptCache1hEligible(userEligible)
443
- }
444
- if (!userEligible) return false
445
-
446
- // Cache allowlist in bootstrap state for session stability — prevents mixed
447
- // TTLs when GrowthBook's disk cache updates mid-request
448
- let allowlist = getPromptCache1hAllowlist()
449
- if (allowlist === null) {
450
- const config = getFeatureValue_CACHED_MAY_BE_STALE<{
451
- allowlist?: string[]
452
- }>('tengu_prompt_cache_1h_config', {})
453
- allowlist = config.allowlist ?? []
454
- setPromptCache1hAllowlist(allowlist)
455
- }
456
-
457
- return (
458
- querySource !== undefined &&
459
- allowlist.some(pattern =>
460
- pattern.endsWith('*')
461
- ? querySource.startsWith(pattern.slice(0, -1))
462
- : querySource === pattern,
463
- )
464
- )
465
- }
466
-
467
- /**
468
- * Configure effort parameters for API request.
469
- *
470
- */
471
- function configureEffortParams(
472
- effortValue: EffortValue | undefined,
473
- outputConfig: BetaOutputConfig,
474
- extraBodyParams: Record<string, unknown>,
475
- betas: string[],
476
- model: string,
477
- ): void {
478
- if (!modelSupportsEffort(model) || 'effort' in outputConfig) {
479
- return
480
- }
481
-
482
- if (effortValue === undefined) {
483
- betas.push(EFFORT_BETA_HEADER)
484
- } else if (typeof effortValue === 'string') {
485
- // Send string effort level as is
486
- outputConfig.effort = effortValue
487
- betas.push(EFFORT_BETA_HEADER)
488
- } else if (process.env.USER_TYPE === 'ant') {
489
- // Numeric effort override - ant-only (uses anthropic_internal)
490
- const existingInternal =
491
- (extraBodyParams.anthropic_internal as Record<string, unknown>) || {}
492
- extraBodyParams.anthropic_internal = {
493
- ...existingInternal,
494
- effort_override: effortValue,
495
- }
496
- }
497
- }
498
-
499
- // output_config.task_budget — API-side token budget awareness for the model.
500
- // Stainless SDK types don't yet include task_budget on BetaOutputConfig, so we
501
- // define the wire shape locally and cast. The API validates on receipt; see
502
- // api/api/schemas/messages/request/output_config.py:12-39 in the monorepo.
503
- // Beta: task-budgets-2026-03-13 (EAP, claude-strudel-eap only as of Mar 2026).
504
- type TaskBudgetParam = {
505
- type: 'tokens'
506
- total: number
507
- remaining?: number
508
- }
509
-
510
- export function configureTaskBudgetParams(
511
- taskBudget: Options['taskBudget'],
512
- outputConfig: BetaOutputConfig & { task_budget?: TaskBudgetParam },
513
- betas: string[],
514
- ): void {
515
- if (
516
- !taskBudget ||
517
- 'task_budget' in outputConfig ||
518
- !shouldIncludeFirstPartyOnlyBetas()
519
- ) {
520
- return
521
- }
522
- outputConfig.task_budget = {
523
- type: 'tokens',
524
- total: taskBudget.total,
525
- ...(taskBudget.remaining !== undefined && {
526
- remaining: taskBudget.remaining,
527
- }),
528
- }
529
- if (!betas.includes(TASK_BUDGETS_BETA_HEADER)) {
530
- betas.push(TASK_BUDGETS_BETA_HEADER)
531
- }
532
- }
533
-
534
- export function getAPIMetadata() {
535
- // https://docs.google.com/document/d/1dURO9ycXXQCBS0V4Vhl4poDBRgkelFc5t2BNPoEgH5Q/edit?tab=t.0#heading=h.5g7nec5b09w5
536
- let extra: JsonObject = {}
537
- const extraStr = process.env.CLAUDE_CODE_EXTRA_METADATA
538
- if (extraStr) {
539
- const parsed = safeParseJSON(extraStr, false)
540
- if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
541
- extra = parsed as JsonObject
542
- } else {
543
- logForDebugging(
544
- `CLAUDE_CODE_EXTRA_METADATA env var must be a JSON object, but was given ${extraStr}`,
545
- { level: 'error' },
546
- )
547
- }
548
- }
549
-
550
- return {
551
- user_id: jsonStringify({
552
- ...extra,
553
- device_id: getOrCreateUserID(),
554
- // Only include OAuth account UUID when actively using OAuth authentication
555
- account_uuid: getOauthAccountInfo()?.accountUuid ?? '',
556
- session_id: getSessionId(),
557
- }),
558
- }
559
- }
560
-
561
- export async function verifyApiKey(
562
- apiKey: string,
563
- isNonInteractiveSession: boolean,
564
- ): Promise<boolean> {
565
- // Skip API verification if running in print mode (isNonInteractiveSession)
566
- if (isNonInteractiveSession) {
567
- return true
568
- }
569
-
570
- try {
571
- // WARNING: if you change this to use a non-Haiku model, this request will fail in 1P unless it uses getCLISyspromptPrefix.
572
- const model = getSmallFastModel()
573
- const betas = getModelBetas(model)
574
- return await returnValue(
575
- withRetry(
576
- () =>
577
- getAnthropicClient({
578
- apiKey,
579
- maxRetries: 3,
580
- model,
581
- source: 'verify_api_key',
582
- }),
583
- async anthropic => {
584
- const messages: MessageParam[] = [{ role: 'user', content: 'test' }]
585
- // biome-ignore lint/plugin: API key verification is intentionally a minimal direct call
586
- await anthropic.beta.messages.create({
587
- model,
588
- max_tokens: 1,
589
- messages,
590
- temperature: 1,
591
- ...(betas.length > 0 && { betas }),
592
- metadata: getAPIMetadata(),
593
- ...getExtraBodyParams(),
594
- })
595
- return true
596
- },
597
- { maxRetries: 2, model, thinkingConfig: { type: 'disabled' } }, // Use fewer retries for API key verification
598
- ),
599
- )
600
- } catch (errorFromRetry) {
601
- let error = errorFromRetry
602
- if (errorFromRetry instanceof CannotRetryError) {
603
- error = errorFromRetry.originalError
604
- }
605
- logError(error)
606
- // Check for authentication error
607
- if (
608
- error instanceof Error &&
609
- error.message.includes(
610
- '{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}',
611
- )
612
- ) {
613
- return false
614
- }
615
- throw error
616
- }
617
- }
618
-
619
- export function userMessageToMessageParam(
620
- message: UserMessage,
621
- addCache = false,
622
- enablePromptCaching: boolean,
623
- querySource?: QuerySource,
624
- ): MessageParam {
625
- if (addCache) {
626
- if (typeof message.message.content === 'string') {
627
- return {
628
- role: 'user',
629
- content: [
630
- {
631
- type: 'text',
632
- text: message.message.content,
633
- ...(enablePromptCaching && {
634
- cache_control: getCacheControl({ querySource }),
635
- }),
636
- },
637
- ],
638
- }
639
- } else {
640
- return {
641
- role: 'user',
642
- content: message.message.content.map((_, i) => ({
643
- ..._,
644
- ...(i === message.message.content.length - 1
645
- ? enablePromptCaching
646
- ? { cache_control: getCacheControl({ querySource }) }
647
- : {}
648
- : {}),
649
- })),
650
- }
651
- }
652
- }
653
- // Clone array content to prevent in-place mutations (e.g., insertCacheEditsBlock's
654
- // splice) from contaminating the original message. Without cloning, multiple calls
655
- // to addCacheBreakpoints share the same array and each splices in duplicate cache_edits.
656
- return {
657
- role: 'user',
658
- content: Array.isArray(message.message.content)
659
- ? [...message.message.content]
660
- : message.message.content,
661
- }
662
- }
663
-
664
- export function assistantMessageToMessageParam(
665
- message: AssistantMessage,
666
- addCache = false,
667
- enablePromptCaching: boolean,
668
- querySource?: QuerySource,
669
- ): MessageParam {
670
- if (addCache) {
671
- if (typeof message.message.content === 'string') {
672
- return {
673
- role: 'assistant',
674
- content: [
675
- {
676
- type: 'text',
677
- text: message.message.content,
678
- ...(enablePromptCaching && {
679
- cache_control: getCacheControl({ querySource }),
680
- }),
681
- },
682
- ],
683
- }
684
- } else {
685
- return {
686
- role: 'assistant',
687
- content: message.message.content.map((_, i) => ({
688
- ..._,
689
- ...(i === message.message.content.length - 1 &&
690
- _.type !== 'thinking' &&
691
- _.type !== 'redacted_thinking' &&
692
- (feature('CONNECTOR_TEXT') ? !isConnectorTextBlock(_) : true)
693
- ? enablePromptCaching
694
- ? { cache_control: getCacheControl({ querySource }) }
695
- : {}
696
- : {}),
697
- })),
698
- }
699
- }
700
- }
701
- return {
702
- role: 'assistant',
703
- content: message.message.content,
704
- }
705
- }
706
-
707
- export type Options = {
708
- getToolPermissionContext: () => Promise<ToolPermissionContext>
709
- model: string
710
- toolChoice?: BetaToolChoiceTool | BetaToolChoiceAuto | undefined
711
- isNonInteractiveSession: boolean
712
- extraToolSchemas?: BetaToolUnion[]
713
- maxOutputTokensOverride?: number
714
- fallbackModel?: string
715
- onStreamingFallback?: () => void
716
- querySource: QuerySource
717
- agents: AgentDefinition[]
718
- allowedAgentTypes?: string[]
719
- hasAppendSystemPrompt: boolean
720
- fetchOverride?: ClientOptions['fetch']
721
- enablePromptCaching?: boolean
722
- skipCacheWrite?: boolean
723
- temperatureOverride?: number
724
- effortValue?: EffortValue
725
- mcpTools: Tools
726
- hasPendingMcpServers?: boolean
727
- queryTracking?: QueryChainTracking
728
- agentId?: AgentId // Only set for subagents
729
- outputFormat?: BetaJSONOutputFormat
730
- fastMode?: boolean
731
- advisorModel?: string
732
- addNotification?: (notif: Notification) => void
733
- // API-side task budget (output_config.task_budget). Distinct from the
734
- // tokenBudget.ts +500k auto-continue feature — this one is sent to the API
735
- // so the model can pace itself. `remaining` is computed by the caller
736
- // (query.ts decrements across the agentic loop).
737
- taskBudget?: { total: number; remaining?: number }
738
- }
739
-
740
- export async function queryModelWithoutStreaming({
741
- messages,
742
- systemPrompt,
743
- thinkingConfig,
744
- tools,
745
- signal,
746
- options,
747
- }: {
748
- messages: Message[]
749
- systemPrompt: SystemPrompt
750
- thinkingConfig: ThinkingConfig
751
- tools: Tools
752
- signal: AbortSignal
753
- options: Options
754
- }): Promise<AssistantMessage> {
755
- // Store the assistant message but continue consuming the generator to ensure
756
- // logAPISuccessAndDuration gets called (which happens after all yields)
757
- let assistantMessage: AssistantMessage | undefined
758
- for await (const message of withStreamingVCR(messages, async function* () {
759
- yield* queryModel(
760
- messages,
761
- systemPrompt,
762
- thinkingConfig,
763
- tools,
764
- signal,
765
- options,
766
- )
767
- })) {
768
- if (message.type === 'assistant') {
769
- assistantMessage = message
770
- }
771
- }
772
- if (!assistantMessage) {
773
- // If the signal was aborted, throw APIUserAbortError instead of a generic error
774
- // This allows callers to handle abort scenarios gracefully
775
- if (signal.aborted) {
776
- throw new APIUserAbortError()
777
- }
778
- throw new Error('No assistant message found')
779
- }
780
- return assistantMessage
781
- }
782
-
783
- export async function* queryModelWithStreaming({
784
- messages,
785
- systemPrompt,
786
- thinkingConfig,
787
- tools,
788
- signal,
789
- options,
790
- }: {
791
- messages: Message[]
792
- systemPrompt: SystemPrompt
793
- thinkingConfig: ThinkingConfig
794
- tools: Tools
795
- signal: AbortSignal
796
- options: Options
797
- }): AsyncGenerator<
798
- StreamEvent | AssistantMessage | SystemAPIErrorMessage,
799
- void
800
- > {
801
- return yield* withStreamingVCR(messages, async function* () {
802
- yield* queryModel(
803
- messages,
804
- systemPrompt,
805
- thinkingConfig,
806
- tools,
807
- signal,
808
- options,
809
- )
810
- })
811
- }
812
-
813
- /**
814
- * Determines if an LSP tool should be deferred (tool appears with defer_loading: true)
815
- * because LSP initialization is not yet complete.
816
- */
817
- function shouldDeferLspTool(tool: Tool): boolean {
818
- if (!('isLsp' in tool) || !tool.isLsp) {
819
- return false
820
- }
821
- const status = getInitializationStatus()
822
- // Defer when pending or not started
823
- return status.status === 'pending' || status.status === 'not-started'
824
- }
825
-
826
- function latestUserTextForToolRetrieval(messages: Message[]): string {
827
- for (let i = messages.length - 1; i >= 0; i--) {
828
- const message = messages[i] as {
829
- type?: string
830
- message?: { content?: unknown }
831
- }
832
- if (message?.type !== 'user') continue
833
- const content = message.message?.content
834
- if (typeof content === 'string') {
835
- if (content.trim().length > 0) return content
836
- continue
837
- }
838
- if (Array.isArray(content)) {
839
- const text = content
840
- .filter(
841
- (block): block is { type: string; text: string } =>
842
- block?.type === 'text' && typeof block.text === 'string',
843
- )
844
- .map(block => block.text)
845
- .join('')
846
- if (text.trim().length > 0) return text
847
- }
848
- }
849
- return ''
850
- }
851
-
852
- /**
853
- * Per-attempt timeout for non-streaming fallback requests, in milliseconds.
854
- * Reads API_TIMEOUT_MS when set so slow backends and the streaming path
855
- * share the same ceiling.
856
- *
857
- * Remote sessions default to 120s to stay under CCR's container idle-kill
858
- * (~5min) so a hung fallback to a wedged backend surfaces a clean
859
- * APIConnectionTimeoutError instead of stalling past SIGKILL.
860
- *
861
- * Otherwise defaults to 300s — long enough for slow backends without
862
- * approaching the API's 10-minute non-streaming boundary.
863
- */
864
- function getNonstreamingFallbackTimeoutMs(): number {
865
- const override = parseInt(process.env.API_TIMEOUT_MS || '', 10)
866
- if (override) return override
867
- return isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) ? 120_000 : 300_000
868
- }
869
-
870
- /**
871
- * Helper generator for non-streaming API requests.
872
- * Encapsulates the common pattern of creating a withRetry generator,
873
- * iterating to yield system messages, and returning the final BetaMessage.
874
- */
875
- export async function* executeNonStreamingRequest(
876
- clientOptions: {
877
- model: string
878
- fetchOverride?: Options['fetchOverride']
879
- source: string
880
- },
881
- retryOptions: {
882
- model: string
883
- fallbackModel?: string
884
- thinkingConfig: ThinkingConfig
885
- fastMode?: boolean
886
- signal: AbortSignal
887
- initialConsecutive529Errors?: number
888
- querySource?: QuerySource
889
- },
890
- paramsFromContext: (context: RetryContext) => BetaMessageStreamParams,
891
- onAttempt: (attempt: number, start: number, maxOutputTokens: number) => void,
892
- captureRequest: (params: BetaMessageStreamParams) => void,
893
- /**
894
- * Request ID of the failed streaming attempt this fallback is recovering
895
- * from. Emitted in tengu_nonstreaming_fallback_error for funnel correlation.
896
- */
897
- originatingRequestId?: string | null,
898
- ): AsyncGenerator<SystemAPIErrorMessage, BetaMessage> {
899
- const fallbackTimeoutMs = getNonstreamingFallbackTimeoutMs()
900
- const generator = withRetry(
901
- () =>
902
- getAnthropicClient({
903
- maxRetries: 0,
904
- model: clientOptions.model,
905
- fetchOverride: clientOptions.fetchOverride,
906
- source: clientOptions.source,
907
- }),
908
- async (anthropic, attempt, context) => {
909
- const start = Date.now()
910
- const retryParams = paramsFromContext(context)
911
- captureRequest(retryParams)
912
- onAttempt(attempt, start, retryParams.max_tokens)
913
-
914
- const adjustedParams = adjustParamsForNonStreaming(
915
- retryParams,
916
- MAX_NON_STREAMING_TOKENS,
917
- )
918
-
919
- try {
920
- // biome-ignore lint/plugin: non-streaming API call
921
- return await anthropic.beta.messages.create(
922
- {
923
- ...adjustedParams,
924
- model: normalizeModelStringForAPI(adjustedParams.model),
925
- },
926
- {
927
- signal: retryOptions.signal,
928
- timeout: fallbackTimeoutMs,
929
- },
930
- )
931
- } catch (err) {
932
- // User aborts are not errors — re-throw immediately without logging
933
- if (err instanceof APIUserAbortError) throw err
934
-
935
- // Instrumentation: record when the non-streaming request errors (including
936
- // timeouts). Lets us distinguish "fallback hung past container kill"
937
- // (no event) from "fallback hit the bounded timeout" (this event).
938
- logForDiagnosticsNoPII('error', 'cli_nonstreaming_fallback_error')
939
- logEvent('tengu_nonstreaming_fallback_error', {
940
- model:
941
- clientOptions.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
942
- error:
943
- err instanceof Error
944
- ? (err.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
945
- : ('unknown' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
946
- attempt,
947
- timeout_ms: fallbackTimeoutMs,
948
- request_id: (originatingRequestId ??
949
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
950
- })
951
- throw err
952
- }
953
- },
954
- {
955
- model: retryOptions.model,
956
- fallbackModel: retryOptions.fallbackModel,
957
- thinkingConfig: retryOptions.thinkingConfig,
958
- ...(isFastModeEnabled() && { fastMode: retryOptions.fastMode }),
959
- signal: retryOptions.signal,
960
- initialConsecutive529Errors: retryOptions.initialConsecutive529Errors,
961
- querySource: retryOptions.querySource,
962
- },
963
- )
964
-
965
- let e
966
- do {
967
- e = await generator.next()
968
- if (!e.done && e.value.type === 'system') {
969
- yield e.value
970
- }
971
- } while (!e.done)
972
-
973
- return e.value as BetaMessage
974
- }
975
-
976
- /**
977
- * Extracts the request ID from the most recent assistant message in the
978
- * conversation. Used to link consecutive API requests in analytics so we can
979
- * join them for cache-hit-rate analysis and incremental token tracking.
980
- *
981
- * Deriving this from the message array (rather than global state) ensures each
982
- * query chain (main thread, subagent, teammate) tracks its own request chain
983
- * independently, and rollback/undo naturally updates the value.
984
- */
985
- function getPreviousRequestIdFromMessages(
986
- messages: Message[],
987
- ): string | undefined {
988
- for (let i = messages.length - 1; i >= 0; i--) {
989
- const msg = messages[i]!
990
- if (msg.type === 'assistant' && msg.requestId) {
991
- return msg.requestId
992
- }
993
- }
994
- return undefined
995
- }
996
-
997
- function isMedia(
998
- block: BetaContentBlockParam,
999
- ): block is BetaImageBlockParam | BetaRequestDocumentBlock {
1000
- return block.type === 'image' || block.type === 'document'
1001
- }
1002
-
1003
- function isToolResult(
1004
- block: BetaContentBlockParam,
1005
- ): block is BetaToolResultBlockParam {
1006
- return block.type === 'tool_result'
1007
- }
1008
-
1009
- /**
1010
- * Ensures messages contain at most `limit` media items (images + documents).
1011
- * Strips oldest media first to preserve the most recent.
1012
- */
1013
- export function stripExcessMediaItems(
1014
- messages: (UserMessage | AssistantMessage)[],
1015
- limit: number,
1016
- ): (UserMessage | AssistantMessage)[] {
1017
- let toRemove = 0
1018
- for (const msg of messages) {
1019
- if (!Array.isArray(msg.message.content)) continue
1020
- for (const block of msg.message.content) {
1021
- if (isMedia(block)) toRemove++
1022
- if (isToolResult(block) && Array.isArray(block.content)) {
1023
- for (const nested of block.content) {
1024
- if (isMedia(nested)) toRemove++
1025
- }
1026
- }
1027
- }
1028
- }
1029
- toRemove -= limit
1030
- if (toRemove <= 0) return messages
1031
-
1032
- return messages.map(msg => {
1033
- if (toRemove <= 0) return msg
1034
- const content = msg.message.content
1035
- if (!Array.isArray(content)) return msg
1036
-
1037
- const before = toRemove
1038
- const stripped = content
1039
- .map(block => {
1040
- if (
1041
- toRemove <= 0 ||
1042
- !isToolResult(block) ||
1043
- !Array.isArray(block.content)
1044
- )
1045
- return block
1046
- const filtered = block.content.filter(n => {
1047
- if (toRemove > 0 && isMedia(n)) {
1048
- toRemove--
1049
- return false
1050
- }
1051
- return true
1052
- })
1053
- return filtered.length === block.content.length
1054
- ? block
1055
- : { ...block, content: filtered }
1056
- })
1057
- .filter(block => {
1058
- if (toRemove > 0 && isMedia(block)) {
1059
- toRemove--
1060
- return false
1061
- }
1062
- return true
1063
- })
1064
-
1065
- return before === toRemove
1066
- ? msg
1067
- : {
1068
- ...msg,
1069
- message: { ...msg.message, content: stripped },
1070
- }
1071
- }) as (UserMessage | AssistantMessage)[]
1072
- }
1073
-
1074
- async function* queryModel(
1075
- messages: Message[],
1076
- systemPrompt: SystemPrompt,
1077
- thinkingConfig: ThinkingConfig,
1078
- tools: Tools,
1079
- signal: AbortSignal,
1080
- options: Options,
1081
- ): AsyncGenerator<
1082
- StreamEvent | AssistantMessage | SystemAPIErrorMessage,
1083
- void
1084
- > {
1085
- // Check cheap conditions first — the off-switch await blocks on GrowthBook
1086
- // init (~10ms). For non-Opus models (haiku, sonnet) this skips the await
1087
- // entirely. Subscribers don't hit this path at all.
1088
- if (
1089
- !isClaudeAISubscriber() &&
1090
- isNonCustomOpusModel(options.model) &&
1091
- (
1092
- await getDynamicConfig_BLOCKS_ON_INIT<{ activated: boolean }>(
1093
- 'tengu-off-switch',
1094
- {
1095
- activated: false,
1096
- },
1097
- )
1098
- ).activated
1099
- ) {
1100
- logEvent('tengu_off_switch_query', {})
1101
- yield getAssistantMessageFromError(
1102
- new Error(CUSTOM_OFF_SWITCH_MESSAGE),
1103
- options.model,
1104
- )
1105
- return
1106
- }
1107
-
1108
- // Derive previous request ID from the last assistant message in this query chain.
1109
- // This is scoped per message array (main thread, subagent, teammate each have their own),
1110
- // so concurrent agents don't clobber each other's request chain tracking.
1111
- // Also naturally handles rollback/undo since removed messages won't be in the array.
1112
- const previousRequestId = getPreviousRequestIdFromMessages(messages)
1113
-
1114
- const resolvedModel =
1115
- getAPIProvider() === 'bedrock' &&
1116
- options.model.includes('application-inference-profile')
1117
- ? ((await getInferenceProfileBackingModel(options.model)) ??
1118
- options.model)
1119
- : options.model
1120
-
1121
- queryCheckpoint('query_tool_schema_build_start')
1122
- const isAgenticQuery =
1123
- options.querySource.startsWith('repl_main_thread') ||
1124
- options.querySource.startsWith('agent:') ||
1125
- options.querySource === 'sdk' ||
1126
- options.querySource === 'hook_agent' ||
1127
- options.querySource === 'verification_agent'
1128
- const betas = getMergedBetas(options.model, { isAgenticQuery })
1129
-
1130
- // Always send the advisor beta header when advisor is enabled, so
1131
- // non-agentic queries (compact, side_question, extract_memories, etc.)
1132
- // can parse advisor server_tool_use blocks already in the conversation history.
1133
- if (isAdvisorEnabled()) {
1134
- betas.push(ADVISOR_BETA_HEADER)
1135
- }
1136
-
1137
- let advisorModel: string | undefined
1138
- if (isAgenticQuery && isAdvisorEnabled()) {
1139
- let advisorOption = options.advisorModel
1140
-
1141
- const advisorExperiment = getExperimentAdvisorModels()
1142
- if (advisorExperiment !== undefined) {
1143
- if (
1144
- normalizeModelStringForAPI(advisorExperiment.baseModel) ===
1145
- normalizeModelStringForAPI(options.model)
1146
- ) {
1147
- // Override the advisor model if the base model matches. We
1148
- // should only have experiment models if the user cannot
1149
- // configure it themselves.
1150
- advisorOption = advisorExperiment.advisorModel
1151
- }
1152
- }
1153
-
1154
- if (advisorOption) {
1155
- const normalizedAdvisorModel = normalizeModelStringForAPI(
1156
- parseUserSpecifiedModel(advisorOption),
1157
- )
1158
- if (!modelSupportsAdvisor(options.model)) {
1159
- logForDebugging(
1160
- `[AdvisorTool] Skipping advisor - base model ${options.model} does not support advisor`,
1161
- )
1162
- } else if (!isValidAdvisorModel(normalizedAdvisorModel)) {
1163
- logForDebugging(
1164
- `[AdvisorTool] Skipping advisor - ${normalizedAdvisorModel} is not a valid advisor model`,
1165
- )
1166
- } else {
1167
- advisorModel = normalizedAdvisorModel
1168
- logForDebugging(
1169
- `[AdvisorTool] Server-side tool enabled with ${advisorModel} as the advisor model`,
1170
- )
1171
- }
1172
- }
1173
- }
1174
-
1175
- // Check if tool search is enabled (checks mode, model support, and threshold for auto mode)
1176
- // This is async because it may need to calculate MCP tool description sizes for TstAuto mode
1177
- let useToolSearch = await isToolSearchEnabled(
1178
- options.model,
1179
- tools,
1180
- options.getToolPermissionContext,
1181
- options.agents,
1182
- 'query',
1183
- )
1184
-
1185
- // Precompute once — isDeferredTool does 2 GrowthBook lookups per call
1186
- const deferredToolNames = new Set<string>()
1187
- if (useToolSearch) {
1188
- for (const t of tools) {
1189
- if (isDeferredTool(t)) deferredToolNames.add(t.name)
1190
- }
1191
- }
1192
-
1193
- // Even if tool search mode is enabled, skip if there are no deferred tools
1194
- // AND no MCP servers are still connecting. When servers are pending, keep
1195
- // ToolSearch available so the model can discover tools after they connect.
1196
- if (
1197
- useToolSearch &&
1198
- deferredToolNames.size === 0 &&
1199
- !options.hasPendingMcpServers
1200
- ) {
1201
- logForDebugging(
1202
- 'Tool search disabled: no deferred tools available to search',
1203
- )
1204
- useToolSearch = false
1205
- }
1206
-
1207
- const turnLocalAdapterToolNames = new Set(
1208
- selectTopKAdapterToolNamesForQuery(
1209
- latestUserTextForToolRetrieval(messages),
1210
- ),
1211
- )
1212
- if (turnLocalAdapterToolNames.size > 0) {
1213
- logForDebugging(
1214
- `UMMAYA turn-local adapter schemas: ${[...turnLocalAdapterToolNames].join(', ')}`,
1215
- )
1216
- }
1217
-
1218
- // Filter out ToolSearchTool if tool search is not enabled for this model
1219
- // ToolSearchTool returns tool_reference blocks which unsupported models can't handle
1220
- let filteredTools: Tools
1221
-
1222
- if (useToolSearch) {
1223
- // Dynamic tool loading: Only include deferred tools that have been discovered
1224
- // via tool_reference blocks in the message history. This eliminates the need
1225
- // to predeclare all deferred tools upfront and removes limits on tool quantity.
1226
- const discoveredToolNames = extractDiscoveredToolNames(messages)
1227
-
1228
- filteredTools = tools.filter(tool => {
1229
- // 0.2.1 exposed the lightweight root primitives together with concrete
1230
- // adapter schemas. Keep that surface so K-EXAONE preserves CC-style
1231
- // prose→tool→prose loop painting, while still limiting concrete adapter
1232
- // schemas to the turn-local top-k set.
1233
- if (turnLocalAdapterToolNames.has(tool.name)) return true
1234
- // Always include non-deferred tools
1235
- if (!deferredToolNames.has(tool.name)) return true
1236
- // Always include ToolSearchTool (so it can discover more tools)
1237
- if (toolMatchesName(tool, TOOL_SEARCH_TOOL_NAME)) return true
1238
- // Only include deferred tools that have been discovered
1239
- return discoveredToolNames.has(tool.name)
1240
- })
1241
- } else {
1242
- filteredTools = tools.filter(t => {
1243
- if (toolMatchesName(t, TOOL_SEARCH_TOOL_NAME)) return false
1244
- // Keep non-deferred root primitives even when concrete top-k adapter
1245
- // schemas are available; this matches the released 0.2.1 loop surface.
1246
- if (isDeferredTool(t)) return turnLocalAdapterToolNames.has(t.name)
1247
- return true
1248
- })
1249
- }
1250
-
1251
- // Add tool search beta header if enabled - required for defer_loading to be accepted
1252
- // Header differs by provider: 1P/Foundry use advanced-tool-use, Vertex/Bedrock use tool-search-tool
1253
- // For Bedrock, this header must go in extraBodyParams, not the betas array
1254
- const toolSearchHeader = useToolSearch ? getToolSearchBetaHeader() : null
1255
- if (toolSearchHeader && getAPIProvider() !== 'bedrock') {
1256
- if (!betas.includes(toolSearchHeader)) {
1257
- betas.push(toolSearchHeader)
1258
- }
1259
- }
1260
-
1261
- // Determine if cached microcompact is enabled for this model.
1262
- // Computed once here (in async context) and captured by paramsFromContext.
1263
- // The beta header is also captured here to avoid a top-level import of the
1264
- // ant-only CACHE_EDITING_BETA_HEADER constant.
1265
- let cachedMCEnabled = false
1266
- let cacheEditingBetaHeader = ''
1267
- if (feature('CACHED_MICROCOMPACT')) {
1268
- const {
1269
- isCachedMicrocompactEnabled,
1270
- isModelSupportedForCacheEditing,
1271
- getCachedMCConfig,
1272
- } = await import('../compact/cachedMicrocompact.js')
1273
- const betas = await import('src/constants/betas.js')
1274
- cacheEditingBetaHeader = betas.CACHE_EDITING_BETA_HEADER
1275
- const featureEnabled = isCachedMicrocompactEnabled()
1276
- const modelSupported = isModelSupportedForCacheEditing(options.model)
1277
- cachedMCEnabled = featureEnabled && modelSupported
1278
- const config = getCachedMCConfig()
1279
- logForDebugging(
1280
- `Cached MC gate: enabled=${featureEnabled} modelSupported=${modelSupported} model=${options.model} supportedModels=${jsonStringify(config.supportedModels)}`,
1281
- )
1282
- }
1283
-
1284
- const useGlobalCacheFeature = shouldUseGlobalCacheScope()
1285
- const willDefer = (t: Tool) =>
1286
- useToolSearch && (deferredToolNames.has(t.name) || shouldDeferLspTool(t))
1287
- // MCP tools are per-user → dynamic tool section → can't globally cache.
1288
- // Only gate when an MCP tool will actually render (not defer_loading).
1289
- const needsToolBasedCacheMarker =
1290
- useGlobalCacheFeature &&
1291
- filteredTools.some(t => t.isMcp === true && !willDefer(t))
1292
-
1293
- // Ensure prompt_caching_scope beta header is present when global cache is enabled.
1294
- if (
1295
- useGlobalCacheFeature &&
1296
- !betas.includes(PROMPT_CACHING_SCOPE_BETA_HEADER)
1297
- ) {
1298
- betas.push(PROMPT_CACHING_SCOPE_BETA_HEADER)
1299
- }
1300
-
1301
- // Determine global cache strategy for logging
1302
- const globalCacheStrategy: GlobalCacheStrategy = useGlobalCacheFeature
1303
- ? needsToolBasedCacheMarker
1304
- ? 'none'
1305
- : 'system_prompt'
1306
- : 'none'
1307
-
1308
- // Build tool schemas, adding defer_loading for MCP tools when tool search is enabled
1309
- // Note: We pass the full `tools` list (not filteredTools) to toolToAPISchema so that
1310
- // ToolSearchTool's prompt can list ALL available MCP tools. The filtering only affects
1311
- // which tools are actually sent to the API, not what the model sees in tool descriptions.
1312
- const toolSchemas = await Promise.all(
1313
- filteredTools.map(tool =>
1314
- toolToAPISchema(tool, {
1315
- getToolPermissionContext: options.getToolPermissionContext,
1316
- tools,
1317
- agents: options.agents,
1318
- allowedAgentTypes: options.allowedAgentTypes,
1319
- model: options.model,
1320
- deferLoading: willDefer(tool),
1321
- }),
1322
- ),
1323
- )
1324
-
1325
- if (useToolSearch) {
1326
- const includedDeferredTools = count(filteredTools, t =>
1327
- deferredToolNames.has(t.name),
1328
- )
1329
- logForDebugging(
1330
- `Dynamic tool loading: ${includedDeferredTools}/${deferredToolNames.size} deferred tools included`,
1331
- )
1332
- }
1333
-
1334
- queryCheckpoint('query_tool_schema_build_end')
1335
-
1336
- // Normalize messages before building system prompt (needed for fingerprinting)
1337
- // Instrumentation: Track message count before normalization
1338
- logEvent('tengu_api_before_normalize', {
1339
- preNormalizedMessageCount: messages.length,
1340
- })
1341
-
1342
- queryCheckpoint('query_message_normalization_start')
1343
- let messagesForAPI = normalizeMessagesForAPI(messages, filteredTools)
1344
- queryCheckpoint('query_message_normalization_end')
1345
-
1346
- // Model-specific post-processing: strip tool-search-specific fields if the
1347
- // selected model doesn't support tool search.
1348
- //
1349
- // Why is this needed in addition to normalizeMessagesForAPI?
1350
- // - normalizeMessagesForAPI uses isToolSearchEnabledNoModelCheck() because it's
1351
- // called from ~20 places (analytics, feedback, sharing, etc.), many of which
1352
- // don't have model context. Adding model to its signature would be a large refactor.
1353
- // - This post-processing uses the model-aware isToolSearchEnabled() check
1354
- // - This handles mid-conversation model switching (e.g., Sonnet → Haiku) where
1355
- // stale tool-search fields from the previous model would cause 400 errors
1356
- //
1357
- // Note: For assistant messages, normalizeMessagesForAPI already normalized the
1358
- // tool inputs, so stripCallerFieldFromAssistantMessage only needs to remove the
1359
- // 'caller' field (not re-normalize inputs).
1360
- if (!useToolSearch) {
1361
- messagesForAPI = messagesForAPI.map(msg => {
1362
- switch (msg.type) {
1363
- case 'user':
1364
- // Strip tool_reference blocks from tool_result content
1365
- return stripToolReferenceBlocksFromUserMessage(msg)
1366
- case 'assistant':
1367
- // Strip 'caller' field from tool_use blocks
1368
- return stripCallerFieldFromAssistantMessage(msg)
1369
- default:
1370
- return msg
1371
- }
1372
- })
1373
- }
1374
-
1375
- // Repair tool_use/tool_result pairing mismatches that can occur when resuming
1376
- // remote/teleport sessions. Inserts synthetic error tool_results for orphaned
1377
- // tool_uses and strips orphaned tool_results referencing non-existent tool_uses.
1378
- messagesForAPI = ensureToolResultPairing(messagesForAPI)
1379
-
1380
- // Strip advisor blocks — the API rejects them without the beta header.
1381
- if (!betas.includes(ADVISOR_BETA_HEADER)) {
1382
- messagesForAPI = stripAdvisorBlocks(messagesForAPI)
1383
- }
1384
-
1385
- // Strip excess media items before making the API call.
1386
- // The API rejects requests with >100 media items but returns a confusing error.
1387
- // Rather than erroring (which is hard to recover from in Cowork/CCD), we
1388
- // silently drop the oldest media items to stay within the limit.
1389
- messagesForAPI = stripExcessMediaItems(
1390
- messagesForAPI,
1391
- API_MAX_MEDIA_PER_REQUEST,
1392
- )
1393
-
1394
- // Instrumentation: Track message count after normalization
1395
- logEvent('tengu_api_after_normalize', {
1396
- postNormalizedMessageCount: messagesForAPI.length,
1397
- })
1398
-
1399
- // Compute fingerprint from first user message for attribution.
1400
- // Must run BEFORE injecting synthetic messages (e.g. deferred tool names)
1401
- // so the fingerprint reflects the actual user input.
1402
- const fingerprint = computeFingerprintFromMessages(messagesForAPI)
1403
-
1404
- // When the delta attachment is enabled, deferred tools are announced
1405
- // via persisted deferred_tools_delta attachments instead of this
1406
- // ephemeral prepend (which busts cache whenever the pool changes).
1407
- if (useToolSearch && !isDeferredToolsDeltaEnabled()) {
1408
- const deferredToolList = tools
1409
- .filter(t => deferredToolNames.has(t.name))
1410
- .map(formatDeferredToolLine)
1411
- .sort()
1412
- .join('\n')
1413
- if (deferredToolList) {
1414
- messagesForAPI = [
1415
- createUserMessage({
1416
- content: `<available-deferred-tools>\n${deferredToolList}\n</available-deferred-tools>`,
1417
- isMeta: true,
1418
- }),
1419
- ...messagesForAPI,
1420
- ]
1421
- }
1422
- }
1423
-
1424
- // Chrome tool-search instructions: when the delta attachment is enabled,
1425
- // these are carried as a client-side block in mcp_instructions_delta
1426
- // (attachments.ts) instead of here. This per-request sys-prompt append
1427
- // busts the prompt cache when chrome connects late.
1428
- const hasChromeTools = filteredTools.some(t =>
1429
- isToolFromMcpServer(t.name, CLAUDE_IN_CHROME_MCP_SERVER_NAME),
1430
- )
1431
- const injectChromeHere =
1432
- useToolSearch && hasChromeTools && !isMcpInstructionsDeltaEnabled()
1433
-
1434
- // filter(Boolean) works by converting each element to a boolean - empty strings become false and are filtered out.
1435
- systemPrompt = asSystemPrompt(
1436
- [
1437
- getAttributionHeader(fingerprint),
1438
- getCLISyspromptPrefix({
1439
- isNonInteractive: options.isNonInteractiveSession,
1440
- hasAppendSystemPrompt: options.hasAppendSystemPrompt,
1441
- }),
1442
- ...systemPrompt,
1443
- ...(advisorModel ? [ADVISOR_TOOL_INSTRUCTIONS] : []),
1444
- ...(injectChromeHere ? [CHROME_TOOL_SEARCH_INSTRUCTIONS] : []),
1445
- ].filter(Boolean),
1446
- )
1447
-
1448
- // Prepend system prompt block for easy API identification
1449
- logAPIPrefix(systemPrompt)
1450
-
1451
- const enablePromptCaching =
1452
- options.enablePromptCaching ?? getPromptCachingEnabled(options.model)
1453
- const system = buildSystemPromptBlocks(systemPrompt, enablePromptCaching, {
1454
- skipGlobalCacheForSystemPrompt: needsToolBasedCacheMarker,
1455
- querySource: options.querySource,
1456
- })
1457
- const useBetas = betas.length > 0
1458
-
1459
- // Build minimal context for detailed tracing (when beta tracing is enabled)
1460
- // Note: The actual new_context message extraction is done in sessionTracing.ts using
1461
- // hash-based tracking per querySource (agent) from the messagesForAPI array
1462
- const extraToolSchemas = [...(options.extraToolSchemas ?? [])]
1463
- if (advisorModel) {
1464
- // Server tools must be in the tools array by API contract. Appended after
1465
- // toolSchemas (which carries the cache_control marker) so toggling /advisor
1466
- // only churns the small suffix, not the cached prefix.
1467
- extraToolSchemas.push({
1468
- type: 'advisor_20260301',
1469
- name: 'advisor',
1470
- model: advisorModel,
1471
- } as unknown as BetaToolUnion)
1472
- }
1473
- const allTools = [...toolSchemas, ...extraToolSchemas]
1474
-
1475
- const isFastMode =
1476
- isFastModeEnabled() &&
1477
- isFastModeAvailable() &&
1478
- !isFastModeCooldown() &&
1479
- isFastModeSupportedByModel(options.model) &&
1480
- !!options.fastMode
1481
-
1482
- // Sticky-on latches for dynamic beta headers. Each header, once first
1483
- // sent, keeps being sent for the rest of the session so mid-session
1484
- // toggles don't change the server-side cache key and bust ~50-70K tokens.
1485
- // Latches are cleared on /clear and /compact via clearBetaHeaderLatches().
1486
- // Per-call gates (isAgenticQuery, querySource===repl_main_thread) stay
1487
- // per-call so non-agentic queries keep their own stable header set.
1488
-
1489
- let afkHeaderLatched = getAfkModeHeaderLatched() === true
1490
- if (feature('TRANSCRIPT_CLASSIFIER')) {
1491
- if (
1492
- !afkHeaderLatched &&
1493
- isAgenticQuery &&
1494
- shouldIncludeFirstPartyOnlyBetas() &&
1495
- (autoModeStateModule?.isAutoModeActive() ?? false)
1496
- ) {
1497
- afkHeaderLatched = true
1498
- setAfkModeHeaderLatched(true)
1499
- }
1500
- }
1501
-
1502
- let fastModeHeaderLatched = getFastModeHeaderLatched() === true
1503
- if (!fastModeHeaderLatched && isFastMode) {
1504
- fastModeHeaderLatched = true
1505
- setFastModeHeaderLatched(true)
1506
- }
1507
-
1508
- let cacheEditingHeaderLatched = getCacheEditingHeaderLatched() === true
1509
- if (feature('CACHED_MICROCOMPACT')) {
1510
- if (
1511
- !cacheEditingHeaderLatched &&
1512
- cachedMCEnabled &&
1513
- getAPIProvider() === 'firstParty' &&
1514
- options.querySource === 'repl_main_thread'
1515
- ) {
1516
- cacheEditingHeaderLatched = true
1517
- setCacheEditingHeaderLatched(true)
1518
- }
1519
- }
1520
-
1521
- // Only latch from agentic queries so a classifier call doesn't flip the
1522
- // main thread's context_management mid-turn.
1523
- let thinkingClearLatched = getThinkingClearLatched() === true
1524
- if (!thinkingClearLatched && isAgenticQuery) {
1525
- const lastCompletion = getLastApiCompletionTimestamp()
1526
- if (
1527
- lastCompletion !== null &&
1528
- Date.now() - lastCompletion > CACHE_TTL_1HOUR_MS
1529
- ) {
1530
- thinkingClearLatched = true
1531
- setThinkingClearLatched(true)
1532
- }
1533
- }
1534
-
1535
- const effort = resolveAppliedEffort(options.model, options.effortValue)
1536
-
1537
- if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
1538
- // Exclude defer_loading tools from the hash -- the API strips them from the
1539
- // prompt, so they never affect the actual cache key. Including them creates
1540
- // false-positive "tool schemas changed" breaks when tools are discovered or
1541
- // MCP servers reconnect.
1542
- const toolsForCacheDetection = allTools.filter(
1543
- t => !('defer_loading' in t && t.defer_loading),
1544
- )
1545
- // Capture everything that could affect the server-side cache key.
1546
- // Pass latched header values (not live state) so break detection
1547
- // reflects what we actually send, not what the user toggled.
1548
- recordPromptState({
1549
- system,
1550
- toolSchemas: toolsForCacheDetection,
1551
- querySource: options.querySource,
1552
- model: options.model,
1553
- agentId: options.agentId,
1554
- fastMode: fastModeHeaderLatched,
1555
- globalCacheStrategy,
1556
- betas,
1557
- autoModeActive: afkHeaderLatched,
1558
- isUsingOverage: currentLimits.isUsingOverage ?? false,
1559
- cachedMCEnabled: cacheEditingHeaderLatched,
1560
- effortValue: effort,
1561
- extraBodyParams: getExtraBodyParams(),
1562
- })
1563
- }
1564
-
1565
- const newContext: LLMRequestNewContext | undefined = isBetaTracingEnabled()
1566
- ? {
1567
- systemPrompt: systemPrompt.join('\n\n'),
1568
- querySource: options.querySource,
1569
- tools: jsonStringify(allTools),
1570
- }
1571
- : undefined
1572
-
1573
- // Capture the span so we can pass it to endLLMRequestSpan later
1574
- // This ensures responses are matched to the correct request when multiple requests run in parallel
1575
- const llmSpan = startLLMRequestSpan(
1576
- options.model,
1577
- newContext,
1578
- messagesForAPI,
1579
- isFastMode,
1580
- )
1581
-
1582
- const startIncludingRetries = Date.now()
1583
- let start = Date.now()
1584
- let attemptNumber = 0
1585
- const attemptStartTimes: number[] = []
1586
- let stream: Stream<BetaRawMessageStreamEvent> | undefined = undefined
1587
- let streamRequestId: string | null | undefined = undefined
1588
- let clientRequestId: string | undefined = undefined
1589
- // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins -- Response is available in Node 18+ and is used by the SDK
1590
- let streamResponse: Response | undefined = undefined
1591
-
1592
- // Release all stream resources to prevent native memory leaks.
1593
- // The Response object holds native TLS/socket buffers that live outside the
1594
- // V8 heap (observed on the Node.js/npm path; see GH #32920), so we must
1595
- // explicitly cancel and release it regardless of how the generator exits.
1596
- function releaseStreamResources(): void {
1597
- cleanupStream(stream)
1598
- stream = undefined
1599
- if (streamResponse) {
1600
- streamResponse.body?.cancel().catch(() => {})
1601
- streamResponse = undefined
1602
- }
1603
- }
1604
-
1605
- // Consume pending cache edits ONCE before paramsFromContext is defined.
1606
- // paramsFromContext is called multiple times (logging, retries), so consuming
1607
- // inside it would cause the first call to steal edits from subsequent calls.
1608
- const consumedCacheEdits = cachedMCEnabled ? consumePendingCacheEdits() : null
1609
- const consumedPinnedEdits = cachedMCEnabled ? getPinnedCacheEdits() : []
1610
-
1611
- // Capture the betas sent in the last API request, including the ones that
1612
- // were dynamically added, so we can log and send it to telemetry.
1613
- let lastRequestBetas: string[] | undefined
1614
-
1615
- const paramsFromContext = (retryContext: RetryContext) => {
1616
- const betasParams = [...betas]
1617
-
1618
- // Append 1M beta dynamically for the Sonnet 1M experiment.
1619
- if (
1620
- !betasParams.includes(CONTEXT_1M_BETA_HEADER) &&
1621
- getSonnet1mExpTreatmentEnabled(retryContext.model)
1622
- ) {
1623
- betasParams.push(CONTEXT_1M_BETA_HEADER)
1624
- }
1625
-
1626
- // For Bedrock, include both model-based betas and dynamically-added tool search header
1627
- const bedrockBetas =
1628
- getAPIProvider() === 'bedrock'
1629
- ? [
1630
- ...getBedrockExtraBodyParamsBetas(retryContext.model),
1631
- ...(toolSearchHeader ? [toolSearchHeader] : []),
1632
- ]
1633
- : []
1634
- const extraBodyParams = getExtraBodyParams(bedrockBetas)
1635
-
1636
- const outputConfig: BetaOutputConfig = {
1637
- ...((extraBodyParams.output_config as BetaOutputConfig) ?? {}),
1638
- }
1639
-
1640
- configureEffortParams(
1641
- effort,
1642
- outputConfig,
1643
- extraBodyParams,
1644
- betasParams,
1645
- options.model,
1646
- )
1647
-
1648
- configureTaskBudgetParams(
1649
- options.taskBudget,
1650
- outputConfig as BetaOutputConfig & { task_budget?: TaskBudgetParam },
1651
- betasParams,
1652
- )
1653
-
1654
- // Merge outputFormat into extraBodyParams.output_config alongside effort
1655
- // Requires structured-outputs beta header per SDK (see parse() in messages.mjs)
1656
- if (options.outputFormat && !('format' in outputConfig)) {
1657
- outputConfig.format = options.outputFormat as BetaJSONOutputFormat
1658
- // Add beta header if not already present and provider supports it
1659
- if (
1660
- modelSupportsStructuredOutputs(options.model) &&
1661
- !betasParams.includes(STRUCTURED_OUTPUTS_BETA_HEADER)
1662
- ) {
1663
- betasParams.push(STRUCTURED_OUTPUTS_BETA_HEADER)
1664
- }
1665
- }
1666
-
1667
- // Retry context gets preference because it tries to course correct if we exceed the context window limit
1668
- const maxOutputTokens =
1669
- retryContext?.maxTokensOverride ||
1670
- options.maxOutputTokensOverride ||
1671
- getMaxOutputTokensForModel(options.model)
1672
-
1673
- const hasThinking =
1674
- thinkingConfig.type !== 'disabled' &&
1675
- !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_THINKING)
1676
- let thinking: BetaMessageStreamParams['thinking'] | undefined = undefined
1677
-
1678
- // IMPORTANT: Do not change the adaptive-vs-budget thinking selection below
1679
- // without notifying the model launch DRI and research. This is a sensitive
1680
- // setting that can greatly affect model quality and bashing.
1681
- if (hasThinking && modelSupportsThinking(options.model)) {
1682
- if (
1683
- !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING) &&
1684
- modelSupportsAdaptiveThinking(options.model)
1685
- ) {
1686
- // For models that support adaptive thinking, always use adaptive
1687
- // thinking without a budget.
1688
- thinking = {
1689
- type: 'adaptive',
1690
- } satisfies BetaMessageStreamParams['thinking']
1691
- } else {
1692
- // For models that do not support adaptive thinking, use the default
1693
- // thinking budget unless explicitly specified.
1694
- let thinkingBudget = getMaxThinkingTokensForModel(options.model)
1695
- if (
1696
- thinkingConfig.type === 'enabled' &&
1697
- thinkingConfig.budgetTokens !== undefined
1698
- ) {
1699
- thinkingBudget = thinkingConfig.budgetTokens
1700
- }
1701
- thinkingBudget = Math.min(maxOutputTokens - 1, thinkingBudget)
1702
- thinking = {
1703
- budget_tokens: thinkingBudget,
1704
- type: 'enabled',
1705
- } satisfies BetaMessageStreamParams['thinking']
1706
- }
1707
- }
1708
-
1709
- // Get API context management strategies if enabled
1710
- const contextManagement = getAPIContextManagement({
1711
- hasThinking,
1712
- isRedactThinkingActive: betasParams.includes(REDACT_THINKING_BETA_HEADER),
1713
- clearAllThinking: thinkingClearLatched,
1714
- })
1715
-
1716
- const enablePromptCaching =
1717
- options.enablePromptCaching ?? getPromptCachingEnabled(retryContext.model)
1718
-
1719
- // Fast mode: header is latched session-stable (cache-safe), but
1720
- // `speed='fast'` stays dynamic so cooldown still suppresses the actual
1721
- // fast-mode request without changing the cache key.
1722
- let speed: BetaMessageStreamParams['speed']
1723
- const isFastModeForRetry =
1724
- isFastModeEnabled() &&
1725
- isFastModeAvailable() &&
1726
- !isFastModeCooldown() &&
1727
- isFastModeSupportedByModel(options.model) &&
1728
- !!retryContext.fastMode
1729
- if (isFastModeForRetry) {
1730
- speed = 'fast'
1731
- }
1732
- if (fastModeHeaderLatched && !betasParams.includes(FAST_MODE_BETA_HEADER)) {
1733
- betasParams.push(FAST_MODE_BETA_HEADER)
1734
- }
1735
-
1736
- // AFK mode beta: latched once auto mode is first activated. Still gated
1737
- // by isAgenticQuery per-call so classifiers/compaction don't get it.
1738
- if (feature('TRANSCRIPT_CLASSIFIER')) {
1739
- if (
1740
- afkHeaderLatched &&
1741
- shouldIncludeFirstPartyOnlyBetas() &&
1742
- isAgenticQuery &&
1743
- !betasParams.includes(AFK_MODE_BETA_HEADER)
1744
- ) {
1745
- betasParams.push(AFK_MODE_BETA_HEADER)
1746
- }
1747
- }
1748
-
1749
- // Cache editing beta: header is latched session-stable; useCachedMC
1750
- // (controls cache_edits body behavior) stays live so edits stop when
1751
- // the feature disables but the header doesn't flip.
1752
- const useCachedMC =
1753
- cachedMCEnabled &&
1754
- getAPIProvider() === 'firstParty' &&
1755
- options.querySource === 'repl_main_thread'
1756
- if (
1757
- cacheEditingHeaderLatched &&
1758
- getAPIProvider() === 'firstParty' &&
1759
- options.querySource === 'repl_main_thread' &&
1760
- !betasParams.includes(cacheEditingBetaHeader)
1761
- ) {
1762
- betasParams.push(cacheEditingBetaHeader)
1763
- logForDebugging(
1764
- 'Cache editing beta header enabled for cached microcompact',
1765
- )
1766
- }
1767
-
1768
- // Only send temperature when thinking is disabled — the API requires
1769
- // temperature: 1 when thinking is enabled, which is already the default.
1770
- const temperature = !hasThinking
1771
- ? (options.temperatureOverride ?? 1)
1772
- : undefined
1773
-
1774
- lastRequestBetas = betasParams
1775
-
1776
- return {
1777
- model: normalizeModelStringForAPI(options.model),
1778
- messages: addCacheBreakpoints(
1779
- messagesForAPI,
1780
- enablePromptCaching,
1781
- options.querySource,
1782
- useCachedMC,
1783
- consumedCacheEdits,
1784
- consumedPinnedEdits,
1785
- options.skipCacheWrite,
1786
- ),
1787
- system,
1788
- tools: allTools,
1789
- tool_choice: options.toolChoice,
1790
- ...(useBetas && { betas: betasParams }),
1791
- metadata: getAPIMetadata(),
1792
- max_tokens: maxOutputTokens,
1793
- thinking,
1794
- ...(temperature !== undefined && { temperature }),
1795
- ...(contextManagement &&
1796
- useBetas &&
1797
- betasParams.includes(CONTEXT_MANAGEMENT_BETA_HEADER) && {
1798
- context_management: contextManagement,
1799
- }),
1800
- ...extraBodyParams,
1801
- ...(Object.keys(outputConfig).length > 0 && {
1802
- output_config: outputConfig,
1803
- }),
1804
- ...(speed !== undefined && { speed }),
1805
- }
1806
- }
1807
-
1808
- // Compute log scalars synchronously so the fire-and-forget .then() closure
1809
- // captures only primitives instead of paramsFromContext's full closure scope
1810
- // (messagesForAPI, system, allTools, betas — the entire request-building
1811
- // context), which would otherwise be pinned until the promise resolves.
1812
- {
1813
- const queryParams = paramsFromContext({
1814
- model: options.model,
1815
- thinkingConfig,
1816
- })
1817
- const logMessagesLength = queryParams.messages.length
1818
- const logBetas = useBetas ? (queryParams.betas ?? []) : []
1819
- const logThinkingType = queryParams.thinking?.type ?? 'disabled'
1820
- const logEffortValue = queryParams.output_config?.effort
1821
- void options.getToolPermissionContext().then(permissionContext => {
1822
- logAPIQuery({
1823
- model: options.model,
1824
- messagesLength: logMessagesLength,
1825
- temperature: options.temperatureOverride ?? 1,
1826
- betas: logBetas,
1827
- permissionMode: permissionContext.mode,
1828
- querySource: options.querySource,
1829
- queryTracking: options.queryTracking,
1830
- thinkingType: logThinkingType,
1831
- effortValue: logEffortValue,
1832
- fastMode: isFastMode,
1833
- previousRequestId,
1834
- })
1835
- })
1836
- }
1837
-
1838
- const newMessages: AssistantMessage[] = []
1839
- let ttftMs = 0
1840
- let partialMessage: BetaMessage | undefined = undefined
1841
- const contentBlocks: (BetaContentBlock | ConnectorTextBlock)[] = []
1842
- let usage: NonNullableUsage = EMPTY_USAGE
1843
- let costUSD = 0
1844
- let stopReason: BetaStopReason | null = null
1845
- let didFallBackToNonStreaming = false
1846
- let fallbackMessage: AssistantMessage | undefined
1847
- let maxOutputTokens = 0
1848
- let responseHeaders: globalThis.Headers | undefined = undefined
1849
- let research: unknown = undefined
1850
- let isFastModeRequest = isFastMode // Keep separate state as it may change if falling back
1851
- let isAdvisorInProgress = false
1852
-
1853
- try {
1854
- queryCheckpoint('query_client_creation_start')
1855
- const generator = withRetry(
1856
- () =>
1857
- getAnthropicClient({
1858
- maxRetries: 0, // Disabled auto-retry in favor of manual implementation
1859
- model: options.model,
1860
- fetchOverride: options.fetchOverride,
1861
- source: options.querySource,
1862
- }),
1863
- async (anthropic, attempt, context) => {
1864
- attemptNumber = attempt
1865
- isFastModeRequest = context.fastMode ?? false
1866
- start = Date.now()
1867
- attemptStartTimes.push(start)
1868
- // Client has been created by withRetry's getClient() call. This fires
1869
- // once per attempt; on retries the client is usually cached (withRetry
1870
- // only calls getClient() again after auth errors), so the delta from
1871
- // client_creation_start is meaningful on attempt 1.
1872
- queryCheckpoint('query_client_creation_end')
1873
-
1874
- const params = paramsFromContext(context)
1875
- captureAPIRequest(params, options.querySource) // Capture for bug reports
1876
-
1877
- maxOutputTokens = params.max_tokens
1878
-
1879
- // Fire immediately before the fetch is dispatched. .withResponse() below
1880
- // awaits until response headers arrive, so this MUST be before the await
1881
- // or the "Network TTFB" phase measurement is wrong.
1882
- queryCheckpoint('query_api_request_sent')
1883
- if (!options.agentId) {
1884
- headlessProfilerCheckpoint('api_request_sent')
1885
- }
1886
-
1887
- // Generate and track client request ID so timeouts (which return no
1888
- // server request ID) can still be correlated with server logs.
1889
- // First-party only — 3P providers don't log it (inc-4029 class).
1890
- clientRequestId =
1891
- getAPIProvider() === 'firstParty' && isFirstPartyAnthropicBaseUrl()
1892
- ? randomUUID()
1893
- : undefined
1894
-
1895
- // Use raw stream instead of BetaMessageStream to avoid O(n²) partial JSON parsing
1896
- // BetaMessageStream calls partialParse() on every input_json_delta, which we don't need
1897
- // since we handle tool input accumulation ourselves
1898
- // biome-ignore lint/plugin: main conversation loop handles attribution separately
1899
- const result = await anthropic.beta.messages
1900
- .create(
1901
- { ...params, stream: true },
1902
- {
1903
- signal,
1904
- ...(clientRequestId && {
1905
- headers: { [CLIENT_REQUEST_ID_HEADER]: clientRequestId },
1906
- }),
1907
- },
1908
- )
1909
- .withResponse()
1910
- queryCheckpoint('query_response_headers_received')
1911
- streamRequestId = result.request_id
1912
- streamResponse = result.response
1913
- return result.data
1914
- },
1915
- {
1916
- model: options.model,
1917
- fallbackModel: options.fallbackModel,
1918
- thinkingConfig,
1919
- ...(isFastModeEnabled() ? { fastMode: isFastMode } : false),
1920
- signal,
1921
- querySource: options.querySource,
1922
- },
1923
- )
1924
-
1925
- let e
1926
- do {
1927
- e = await generator.next()
1928
-
1929
- // yield API error messages (the stream has a 'controller' property, error messages don't)
1930
- if (!('controller' in e.value)) {
1931
- yield e.value
1932
- }
1933
- } while (!e.done)
1934
- stream = e.value as Stream<BetaRawMessageStreamEvent>
1935
-
1936
- // reset state
1937
- newMessages.length = 0
1938
- ttftMs = 0
1939
- partialMessage = undefined
1940
- contentBlocks.length = 0
1941
- usage = EMPTY_USAGE
1942
- stopReason = null
1943
- isAdvisorInProgress = false
1944
-
1945
- // Streaming idle timeout watchdog: abort the stream if no chunks arrive
1946
- // for STREAM_IDLE_TIMEOUT_MS. Unlike the stall detection below (which only
1947
- // fires when the *next* chunk arrives), this uses setTimeout to actively
1948
- // kill hung streams. Without this, a silently dropped connection can hang
1949
- // the session indefinitely since the SDK's request timeout only covers the
1950
- // initial fetch(), not the streaming body.
1951
- const streamWatchdogEnabled = isEnvTruthy(
1952
- process.env.CLAUDE_ENABLE_STREAM_WATCHDOG,
1953
- )
1954
- const STREAM_IDLE_TIMEOUT_MS =
1955
- parseInt(process.env.CLAUDE_STREAM_IDLE_TIMEOUT_MS || '', 10) || 90_000
1956
- const STREAM_IDLE_WARNING_MS = STREAM_IDLE_TIMEOUT_MS / 2
1957
- let streamIdleAborted = false
1958
- // performance.now() snapshot when watchdog fires, for measuring abort propagation delay
1959
- let streamWatchdogFiredAt: number | null = null
1960
- let streamIdleWarningTimer: ReturnType<typeof setTimeout> | null = null
1961
- let streamIdleTimer: ReturnType<typeof setTimeout> | null = null
1962
- function clearStreamIdleTimers(): void {
1963
- if (streamIdleWarningTimer !== null) {
1964
- clearTimeout(streamIdleWarningTimer)
1965
- streamIdleWarningTimer = null
1966
- }
1967
- if (streamIdleTimer !== null) {
1968
- clearTimeout(streamIdleTimer)
1969
- streamIdleTimer = null
1970
- }
1971
- }
1972
- function resetStreamIdleTimer(): void {
1973
- clearStreamIdleTimers()
1974
- if (!streamWatchdogEnabled) {
1975
- return
1976
- }
1977
- streamIdleWarningTimer = setTimeout(
1978
- warnMs => {
1979
- logForDebugging(
1980
- `Streaming idle warning: no chunks received for ${warnMs / 1000}s`,
1981
- { level: 'warn' },
1982
- )
1983
- logForDiagnosticsNoPII('warn', 'cli_streaming_idle_warning')
1984
- },
1985
- STREAM_IDLE_WARNING_MS,
1986
- STREAM_IDLE_WARNING_MS,
1987
- )
1988
- streamIdleTimer = setTimeout(() => {
1989
- streamIdleAborted = true
1990
- streamWatchdogFiredAt = performance.now()
1991
- logForDebugging(
1992
- `Streaming idle timeout: no chunks received for ${STREAM_IDLE_TIMEOUT_MS / 1000}s, aborting stream`,
1993
- { level: 'error' },
1994
- )
1995
- logForDiagnosticsNoPII('error', 'cli_streaming_idle_timeout')
1996
- logEvent('tengu_streaming_idle_timeout', {
1997
- model:
1998
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1999
- request_id: (streamRequestId ??
2000
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2001
- timeout_ms: STREAM_IDLE_TIMEOUT_MS,
2002
- })
2003
- releaseStreamResources()
2004
- }, STREAM_IDLE_TIMEOUT_MS)
2005
- }
2006
- resetStreamIdleTimer()
2007
-
2008
- startSessionActivity('api_call')
2009
- try {
2010
- // stream in and accumulate state
2011
- let isFirstChunk = true
2012
- let lastEventTime: number | null = null // Set after first chunk to avoid measuring TTFB as a stall
2013
- const STALL_THRESHOLD_MS = 30_000 // 30 seconds
2014
- let totalStallTime = 0
2015
- let stallCount = 0
2016
-
2017
- for await (const part of stream) {
2018
- resetStreamIdleTimer()
2019
- const now = Date.now()
2020
-
2021
- // Detect and log streaming stalls (only after first event to avoid counting TTFB)
2022
- if (lastEventTime !== null) {
2023
- const timeSinceLastEvent = now - lastEventTime
2024
- if (timeSinceLastEvent > STALL_THRESHOLD_MS) {
2025
- stallCount++
2026
- totalStallTime += timeSinceLastEvent
2027
- logForDebugging(
2028
- `Streaming stall detected: ${(timeSinceLastEvent / 1000).toFixed(1)}s gap between events (stall #${stallCount})`,
2029
- { level: 'warn' },
2030
- )
2031
- logEvent('tengu_streaming_stall', {
2032
- stall_duration_ms: timeSinceLastEvent,
2033
- stall_count: stallCount,
2034
- total_stall_time_ms: totalStallTime,
2035
- event_type:
2036
- part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2037
- model:
2038
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2039
- request_id: (streamRequestId ??
2040
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2041
- })
2042
- }
2043
- }
2044
- lastEventTime = now
2045
-
2046
- if (isFirstChunk) {
2047
- logForDebugging('Stream started - received first chunk')
2048
- queryCheckpoint('query_first_chunk_received')
2049
- if (!options.agentId) {
2050
- headlessProfilerCheckpoint('first_chunk')
2051
- }
2052
- endQueryProfile()
2053
- isFirstChunk = false
2054
- }
2055
-
2056
- switch (part.type) {
2057
- case 'message_start': {
2058
- partialMessage = part.message
2059
- ttftMs = Date.now() - start
2060
- usage = updateUsage(usage, part.message?.usage)
2061
- // Capture research from message_start if available (internal only).
2062
- // Always overwrite with the latest value.
2063
- if (
2064
- process.env.USER_TYPE === 'ant' &&
2065
- 'research' in (part.message as unknown as Record<string, unknown>)
2066
- ) {
2067
- research = (part.message as unknown as Record<string, unknown>)
2068
- .research
2069
- }
2070
- break
2071
- }
2072
- case 'content_block_start':
2073
- switch (part.content_block.type) {
2074
- case 'tool_use':
2075
- contentBlocks[part.index] = {
2076
- ...part.content_block,
2077
- input: '',
2078
- }
2079
- break
2080
- case 'server_tool_use':
2081
- contentBlocks[part.index] = {
2082
- ...part.content_block,
2083
- input: '' as unknown as { [key: string]: unknown },
2084
- }
2085
- if ((part.content_block.name as string) === 'advisor') {
2086
- isAdvisorInProgress = true
2087
- logForDebugging(`[AdvisorTool] Advisor tool called`)
2088
- logEvent('tengu_advisor_tool_call', {
2089
- model:
2090
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2091
- advisor_model: (advisorModel ??
2092
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2093
- })
2094
- }
2095
- break
2096
- case 'text':
2097
- contentBlocks[part.index] = {
2098
- ...part.content_block,
2099
- // awkwardly, the sdk sometimes returns text as part of a
2100
- // content_block_start message, then returns the same text
2101
- // again in a content_block_delta message. we ignore it here
2102
- // since there doesn't seem to be a way to detect when a
2103
- // content_block_delta message duplicates the text.
2104
- text: '',
2105
- }
2106
- break
2107
- case 'thinking':
2108
- contentBlocks[part.index] = {
2109
- ...part.content_block,
2110
- // also awkward
2111
- thinking: '',
2112
- // initialize signature to ensure field exists even if signature_delta never arrives
2113
- signature: '',
2114
- }
2115
- break
2116
- default:
2117
- // even more awkwardly, the sdk mutates the contents of text blocks
2118
- // as it works. we want the blocks to be immutable, so that we can
2119
- // accumulate state ourselves.
2120
- contentBlocks[part.index] = { ...part.content_block }
2121
- if (
2122
- (part.content_block.type as string) === 'advisor_tool_result'
2123
- ) {
2124
- isAdvisorInProgress = false
2125
- logForDebugging(`[AdvisorTool] Advisor tool result received`)
2126
- }
2127
- break
2128
- }
2129
- break
2130
- case 'content_block_delta': {
2131
- const contentBlock = contentBlocks[part.index]
2132
- const delta = part.delta as typeof part.delta | ConnectorTextDelta
2133
- if (!contentBlock) {
2134
- logEvent('tengu_streaming_error', {
2135
- error_type:
2136
- 'content_block_not_found_delta' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2137
- part_type:
2138
- part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2139
- part_index: part.index,
2140
- })
2141
- throw new RangeError('Content block not found')
2142
- }
2143
- if (
2144
- feature('CONNECTOR_TEXT') &&
2145
- delta.type === 'connector_text_delta'
2146
- ) {
2147
- if (contentBlock.type !== 'connector_text') {
2148
- logEvent('tengu_streaming_error', {
2149
- error_type:
2150
- 'content_block_type_mismatch_connector_text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2151
- expected_type:
2152
- 'connector_text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2153
- actual_type:
2154
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2155
- })
2156
- throw new Error('Content block is not a connector_text block')
2157
- }
2158
- contentBlock.connector_text += delta.connector_text
2159
- } else {
2160
- switch (delta.type) {
2161
- case 'citations_delta':
2162
- // TODO: handle citations
2163
- break
2164
- case 'input_json_delta':
2165
- if (
2166
- contentBlock.type !== 'tool_use' &&
2167
- contentBlock.type !== 'server_tool_use'
2168
- ) {
2169
- logEvent('tengu_streaming_error', {
2170
- error_type:
2171
- 'content_block_type_mismatch_input_json' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2172
- expected_type:
2173
- 'tool_use' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2174
- actual_type:
2175
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2176
- })
2177
- throw new Error('Content block is not a input_json block')
2178
- }
2179
- if (typeof contentBlock.input !== 'string') {
2180
- logEvent('tengu_streaming_error', {
2181
- error_type:
2182
- 'content_block_input_not_string' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2183
- input_type:
2184
- typeof contentBlock.input as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2185
- })
2186
- throw new Error('Content block input is not a string')
2187
- }
2188
- contentBlock.input += delta.partial_json
2189
- break
2190
- case 'text_delta':
2191
- if (contentBlock.type !== 'text') {
2192
- logEvent('tengu_streaming_error', {
2193
- error_type:
2194
- 'content_block_type_mismatch_text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2195
- expected_type:
2196
- 'text' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2197
- actual_type:
2198
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2199
- })
2200
- throw new Error('Content block is not a text block')
2201
- }
2202
- contentBlock.text += delta.text
2203
- break
2204
- case 'signature_delta':
2205
- if (
2206
- feature('CONNECTOR_TEXT') &&
2207
- contentBlock.type === 'connector_text'
2208
- ) {
2209
- contentBlock.signature = delta.signature
2210
- break
2211
- }
2212
- if (contentBlock.type !== 'thinking') {
2213
- logEvent('tengu_streaming_error', {
2214
- error_type:
2215
- 'content_block_type_mismatch_thinking_signature' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2216
- expected_type:
2217
- 'thinking' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2218
- actual_type:
2219
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2220
- })
2221
- throw new Error('Content block is not a thinking block')
2222
- }
2223
- contentBlock.signature = delta.signature
2224
- break
2225
- case 'thinking_delta':
2226
- if (contentBlock.type !== 'thinking') {
2227
- logEvent('tengu_streaming_error', {
2228
- error_type:
2229
- 'content_block_type_mismatch_thinking_delta' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2230
- expected_type:
2231
- 'thinking' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2232
- actual_type:
2233
- contentBlock.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2234
- })
2235
- throw new Error('Content block is not a thinking block')
2236
- }
2237
- contentBlock.thinking += delta.thinking
2238
- break
2239
- }
2240
- }
2241
- // Capture research from content_block_delta if available (internal only).
2242
- // Always overwrite with the latest value.
2243
- if (process.env.USER_TYPE === 'ant' && 'research' in part) {
2244
- research = (part as { research: unknown }).research
2245
- }
2246
- break
2247
- }
2248
- case 'content_block_stop': {
2249
- const contentBlock = contentBlocks[part.index]
2250
- if (!contentBlock) {
2251
- logEvent('tengu_streaming_error', {
2252
- error_type:
2253
- 'content_block_not_found_stop' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2254
- part_type:
2255
- part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2256
- part_index: part.index,
2257
- })
2258
- throw new RangeError('Content block not found')
2259
- }
2260
- if (!partialMessage) {
2261
- logEvent('tengu_streaming_error', {
2262
- error_type:
2263
- 'partial_message_not_found' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2264
- part_type:
2265
- part.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2266
- })
2267
- throw new Error('Message not found')
2268
- }
2269
- const m: AssistantMessage = {
2270
- message: {
2271
- ...partialMessage,
2272
- content: normalizeContentFromAPI(
2273
- [contentBlock] as BetaContentBlock[],
2274
- tools,
2275
- options.agentId,
2276
- ),
2277
- },
2278
- requestId: streamRequestId ?? undefined,
2279
- type: 'assistant',
2280
- uuid: randomUUID(),
2281
- timestamp: new Date().toISOString(),
2282
- ...(process.env.USER_TYPE === 'ant' &&
2283
- research !== undefined && { research }),
2284
- ...(advisorModel && { advisorModel }),
2285
- }
2286
- newMessages.push(m)
2287
- yield m
2288
- break
2289
- }
2290
- case 'message_delta': {
2291
- usage = updateUsage(usage, part.usage)
2292
- // Capture research from message_delta if available (internal only).
2293
- // Always overwrite with the latest value. Also write back to
2294
- // already-yielded messages since message_delta arrives after
2295
- // content_block_stop.
2296
- if (
2297
- process.env.USER_TYPE === 'ant' &&
2298
- 'research' in (part as unknown as Record<string, unknown>)
2299
- ) {
2300
- research = (part as unknown as Record<string, unknown>).research
2301
- for (const msg of newMessages) {
2302
- msg.research = research
2303
- }
2304
- }
2305
-
2306
- // Write final usage and stop_reason back to the last yielded
2307
- // message. Messages are created at content_block_stop from
2308
- // partialMessage, which was set at message_start before any tokens
2309
- // were generated (output_tokens: 0, stop_reason: null).
2310
- // message_delta arrives after content_block_stop with the real
2311
- // values.
2312
- //
2313
- // IMPORTANT: Use direct property mutation, not object replacement.
2314
- // The transcript write queue holds a reference to message.message
2315
- // and serializes it lazily (100ms flush interval). Object
2316
- // replacement ({ ...lastMsg.message, usage }) would disconnect
2317
- // the queued reference; direct mutation ensures the transcript
2318
- // captures the final values.
2319
- stopReason = part.delta.stop_reason
2320
-
2321
- const lastMsg = newMessages.at(-1)
2322
- if (lastMsg) {
2323
- lastMsg.message.usage = usage
2324
- lastMsg.message.stop_reason = stopReason
2325
- }
2326
-
2327
- // Update cost
2328
- const costUSDForPart = calculateUSDCost(resolvedModel, usage)
2329
- costUSD += addToTotalSessionCost(
2330
- costUSDForPart,
2331
- usage,
2332
- options.model,
2333
- )
2334
-
2335
- const refusalMessage = getErrorMessageIfRefusal(
2336
- part.delta.stop_reason,
2337
- options.model,
2338
- )
2339
- if (refusalMessage) {
2340
- yield refusalMessage
2341
- }
2342
-
2343
- if (stopReason === 'max_tokens') {
2344
- logEvent('tengu_max_tokens_reached', {
2345
- max_tokens: maxOutputTokens,
2346
- })
2347
- yield createAssistantAPIErrorMessage({
2348
- content: `${API_ERROR_MESSAGE_PREFIX}: Claude's response exceeded the ${
2349
- maxOutputTokens
2350
- } output token maximum. To configure this behavior, set the CLAUDE_CODE_MAX_OUTPUT_TOKENS environment variable.`,
2351
- apiError: 'max_output_tokens',
2352
- error: 'max_output_tokens',
2353
- })
2354
- }
2355
-
2356
- if (stopReason === 'model_context_window_exceeded') {
2357
- logEvent('tengu_context_window_exceeded', {
2358
- max_tokens: maxOutputTokens,
2359
- output_tokens: usage.output_tokens,
2360
- })
2361
- // Reuse the max_output_tokens recovery path — from the model's
2362
- // perspective, both mean "response was cut off, continue from
2363
- // where you left off."
2364
- yield createAssistantAPIErrorMessage({
2365
- content: `${API_ERROR_MESSAGE_PREFIX}: The model has reached its context window limit.`,
2366
- apiError: 'max_output_tokens',
2367
- error: 'max_output_tokens',
2368
- })
2369
- }
2370
- break
2371
- }
2372
- case 'message_stop':
2373
- break
2374
- }
2375
-
2376
- yield {
2377
- type: 'stream_event',
2378
- event: part,
2379
- ...(part.type === 'message_start' ? { ttftMs } : undefined),
2380
- }
2381
- }
2382
- // Clear the idle timeout watchdog now that the stream loop has exited
2383
- clearStreamIdleTimers()
2384
-
2385
- // If the stream was aborted by our idle timeout watchdog, fall back to
2386
- // non-streaming retry rather than treating it as a completed stream.
2387
- if (streamIdleAborted) {
2388
- // Instrumentation: proves the for-await exited after the watchdog fired
2389
- // (vs. hung forever). exit_delay_ms measures abort propagation latency:
2390
- // 0-10ms = abort worked; >>1000ms = something else woke the loop.
2391
- const exitDelayMs =
2392
- streamWatchdogFiredAt !== null
2393
- ? Math.round(performance.now() - streamWatchdogFiredAt)
2394
- : -1
2395
- logForDiagnosticsNoPII(
2396
- 'info',
2397
- 'cli_stream_loop_exited_after_watchdog_clean',
2398
- )
2399
- logEvent('tengu_stream_loop_exited_after_watchdog', {
2400
- request_id: (streamRequestId ??
2401
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2402
- exit_delay_ms: exitDelayMs,
2403
- exit_path:
2404
- 'clean' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2405
- model:
2406
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2407
- })
2408
- // Prevent double-emit: this throw lands in the catch block below,
2409
- // whose exit_path='error' probe guards on streamWatchdogFiredAt.
2410
- streamWatchdogFiredAt = null
2411
- throw new Error('Stream idle timeout - no chunks received')
2412
- }
2413
-
2414
- // Detect when the stream completed without producing any assistant messages.
2415
- // This covers two proxy failure modes:
2416
- // 1. No events at all (!partialMessage): proxy returned 200 with non-SSE body
2417
- // 2. Partial events (partialMessage set but no content blocks completed AND
2418
- // no stop_reason received): proxy returned message_start but stream ended
2419
- // before content_block_stop and before message_delta with stop_reason
2420
- // BetaMessageStream had the first check in _endRequest() but the raw Stream
2421
- // does not - without it the generator silently returns no assistant messages,
2422
- // causing "Execution error" in -p mode.
2423
- // Note: We must check stopReason to avoid false positives. For example, with
2424
- // structured output (--json-schema), the model calls a StructuredOutput tool
2425
- // on turn 1, then on turn 2 responds with end_turn and no content blocks.
2426
- // That's a legitimate empty response, not an incomplete stream.
2427
- if (!partialMessage || (newMessages.length === 0 && !stopReason)) {
2428
- logForDebugging(
2429
- !partialMessage
2430
- ? 'Stream completed without receiving message_start event - triggering non-streaming fallback'
2431
- : 'Stream completed with message_start but no content blocks completed - triggering non-streaming fallback',
2432
- { level: 'error' },
2433
- )
2434
- logEvent('tengu_stream_no_events', {
2435
- model:
2436
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2437
- request_id: (streamRequestId ??
2438
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2439
- })
2440
- throw new Error('Stream ended without receiving any events')
2441
- }
2442
-
2443
- // Log summary if any stalls occurred during streaming
2444
- if (stallCount > 0) {
2445
- logForDebugging(
2446
- `Streaming completed with ${stallCount} stall(s), total stall time: ${(totalStallTime / 1000).toFixed(1)}s`,
2447
- { level: 'warn' },
2448
- )
2449
- logEvent('tengu_streaming_stall_summary', {
2450
- stall_count: stallCount,
2451
- total_stall_time_ms: totalStallTime,
2452
- model:
2453
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2454
- request_id: (streamRequestId ??
2455
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2456
- })
2457
- }
2458
-
2459
- // Check if the cache actually broke based on response tokens
2460
- if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
2461
- void checkResponseForCacheBreak(
2462
- options.querySource,
2463
- usage.cache_read_input_tokens,
2464
- usage.cache_creation_input_tokens,
2465
- messages,
2466
- options.agentId,
2467
- streamRequestId,
2468
- )
2469
- }
2470
-
2471
- // Process fallback percentage header and quota status if available
2472
- // streamResponse is set when the stream is created in the withRetry callback above
2473
- // TypeScript's control flow analysis can't track that streamResponse is set in the callback
2474
- // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
2475
- const resp = streamResponse as unknown as Response | undefined
2476
- if (resp) {
2477
- extractQuotaStatusFromHeaders(resp.headers)
2478
- // Store headers for gateway detection
2479
- responseHeaders = resp.headers
2480
- }
2481
- } catch (streamingError) {
2482
- // Clear the idle timeout watchdog on error path too
2483
- clearStreamIdleTimers()
2484
-
2485
- // Instrumentation: if the watchdog had already fired and the for-await
2486
- // threw (rather than exiting cleanly), record that the loop DID exit and
2487
- // how long after the watchdog. Distinguishes true hangs from error exits.
2488
- if (streamIdleAborted && streamWatchdogFiredAt !== null) {
2489
- const exitDelayMs = Math.round(
2490
- performance.now() - streamWatchdogFiredAt,
2491
- )
2492
- logForDiagnosticsNoPII(
2493
- 'info',
2494
- 'cli_stream_loop_exited_after_watchdog_error',
2495
- )
2496
- logEvent('tengu_stream_loop_exited_after_watchdog', {
2497
- request_id: (streamRequestId ??
2498
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2499
- exit_delay_ms: exitDelayMs,
2500
- exit_path:
2501
- 'error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2502
- error_name:
2503
- streamingError instanceof Error
2504
- ? (streamingError.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2505
- : ('unknown' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
2506
- model:
2507
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2508
- })
2509
- }
2510
-
2511
- if (streamingError instanceof APIUserAbortError) {
2512
- // Check if the abort signal was triggered by the user (ESC key)
2513
- // If the signal is aborted, it's a user-initiated abort
2514
- // If not, it's likely a timeout from the SDK
2515
- if (signal.aborted) {
2516
- // This is a real user abort (ESC key was pressed)
2517
- logForDebugging(
2518
- `Streaming aborted by user: ${errorMessage(streamingError)}`,
2519
- )
2520
- if (isAdvisorInProgress) {
2521
- logEvent('tengu_advisor_tool_interrupted', {
2522
- model:
2523
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2524
- advisor_model: (advisorModel ??
2525
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2526
- })
2527
- }
2528
- throw streamingError
2529
- } else {
2530
- // The SDK threw APIUserAbortError but our signal wasn't aborted
2531
- // This means it's a timeout from the SDK's internal timeout
2532
- logForDebugging(
2533
- `Streaming timeout (SDK abort): ${streamingError.message}`,
2534
- { level: 'error' },
2535
- )
2536
- // Throw a more specific error for timeout
2537
- throw new APIConnectionTimeoutError({ message: 'Request timed out' })
2538
- }
2539
- }
2540
-
2541
- // When the flag is enabled, skip the non-streaming fallback and let the
2542
- // error propagate to withRetry. The mid-stream fallback causes double tool
2543
- // execution when streaming tool execution is active: the partial stream
2544
- // starts a tool, then the non-streaming retry produces the same tool_use
2545
- // and runs it again. See inc-4258.
2546
- const disableFallback =
2547
- isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_NONSTREAMING_FALLBACK) ||
2548
- getFeatureValue_CACHED_MAY_BE_STALE(
2549
- 'tengu_disable_streaming_to_non_streaming_fallback',
2550
- false,
2551
- )
2552
-
2553
- if (disableFallback) {
2554
- logForDebugging(
2555
- `Error streaming (non-streaming fallback disabled): ${errorMessage(streamingError)}`,
2556
- { level: 'error' },
2557
- )
2558
- logEvent('tengu_streaming_fallback_to_non_streaming', {
2559
- model:
2560
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2561
- error:
2562
- streamingError instanceof Error
2563
- ? (streamingError.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2564
- : (String(
2565
- streamingError,
2566
- ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
2567
- attemptNumber,
2568
- maxOutputTokens,
2569
- thinkingType:
2570
- thinkingConfig.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2571
- fallback_disabled: true,
2572
- request_id: (streamRequestId ??
2573
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2574
- fallback_cause: (streamIdleAborted
2575
- ? 'watchdog'
2576
- : 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2577
- })
2578
- throw streamingError
2579
- }
2580
-
2581
- logForDebugging(
2582
- `Error streaming, falling back to non-streaming mode: ${errorMessage(streamingError)}`,
2583
- { level: 'error' },
2584
- )
2585
- didFallBackToNonStreaming = true
2586
- if (options.onStreamingFallback) {
2587
- options.onStreamingFallback()
2588
- }
2589
-
2590
- logEvent('tengu_streaming_fallback_to_non_streaming', {
2591
- model:
2592
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2593
- error:
2594
- streamingError instanceof Error
2595
- ? (streamingError.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
2596
- : (String(
2597
- streamingError,
2598
- ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS),
2599
- attemptNumber,
2600
- maxOutputTokens,
2601
- thinkingType:
2602
- thinkingConfig.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2603
- fallback_disabled: false,
2604
- request_id: (streamRequestId ??
2605
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2606
- fallback_cause: (streamIdleAborted
2607
- ? 'watchdog'
2608
- : 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2609
- })
2610
-
2611
- // Fall back to non-streaming mode with retries.
2612
- // If the streaming failure was itself a 529, count it toward the
2613
- // consecutive-529 budget so total 529s-before-model-fallback is the
2614
- // same whether the overload was hit in streaming or non-streaming mode.
2615
- // This is a speculative fix for https://github.com/anthropics/claude-code/issues/1513
2616
- // Instrumentation: proves executeNonStreamingRequest was entered (vs. the
2617
- // fallback event firing but the call itself hanging at dispatch).
2618
- logForDiagnosticsNoPII('info', 'cli_nonstreaming_fallback_started')
2619
- logEvent('tengu_nonstreaming_fallback_started', {
2620
- request_id: (streamRequestId ??
2621
- 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2622
- model:
2623
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2624
- fallback_cause: (streamIdleAborted
2625
- ? 'watchdog'
2626
- : 'other') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2627
- })
2628
- const result = yield* executeNonStreamingRequest(
2629
- {
2630
- model: options.model,
2631
- fetchOverride: options.fetchOverride,
2632
- source: options.querySource,
2633
- },
2634
- {
2635
- model: options.model,
2636
- fallbackModel: options.fallbackModel,
2637
- thinkingConfig,
2638
- ...(isFastModeEnabled() && { fastMode: isFastMode }),
2639
- signal,
2640
- initialConsecutive529Errors: is529Error(streamingError) ? 1 : 0,
2641
- querySource: options.querySource,
2642
- },
2643
- paramsFromContext,
2644
- (attempt, _startTime, tokens) => {
2645
- attemptNumber = attempt
2646
- maxOutputTokens = tokens
2647
- },
2648
- params => captureAPIRequest(params, options.querySource),
2649
- streamRequestId,
2650
- )
2651
-
2652
- const m: AssistantMessage = {
2653
- message: {
2654
- ...result,
2655
- content: normalizeContentFromAPI(
2656
- result.content,
2657
- tools,
2658
- options.agentId,
2659
- ),
2660
- },
2661
- requestId: streamRequestId ?? undefined,
2662
- type: 'assistant',
2663
- uuid: randomUUID(),
2664
- timestamp: new Date().toISOString(),
2665
- ...(process.env.USER_TYPE === 'ant' &&
2666
- research !== undefined && {
2667
- research,
2668
- }),
2669
- ...(advisorModel && {
2670
- advisorModel,
2671
- }),
2672
- }
2673
- newMessages.push(m)
2674
- fallbackMessage = m
2675
- yield m
2676
- } finally {
2677
- clearStreamIdleTimers()
2678
- }
2679
- } catch (errorFromRetry) {
2680
- // FallbackTriggeredError must propagate to query.ts, which performs the
2681
- // actual model switch. Swallowing it here would turn the fallback into a
2682
- // no-op — the user would just see "Model fallback triggered: X -> Y" as
2683
- // an error message with no actual retry on the fallback model.
2684
- if (errorFromRetry instanceof FallbackTriggeredError) {
2685
- throw errorFromRetry
2686
- }
2687
-
2688
- // Check if this is a 404 error during stream creation that should trigger
2689
- // non-streaming fallback. This handles gateways that return 404 for streaming
2690
- // endpoints but work fine with non-streaming. Before v2.1.8, BetaMessageStream
2691
- // threw 404s during iteration (caught by inner catch with fallback), but now
2692
- // with raw streams, 404s are thrown during creation (caught here).
2693
- const is404StreamCreationError =
2694
- !didFallBackToNonStreaming &&
2695
- errorFromRetry instanceof CannotRetryError &&
2696
- errorFromRetry.originalError instanceof APIError &&
2697
- errorFromRetry.originalError.status === 404
2698
-
2699
- if (is404StreamCreationError) {
2700
- // 404 is thrown at .withResponse() before streamRequestId is assigned,
2701
- // and CannotRetryError means every retry failed — so grab the failed
2702
- // request's ID from the error header instead.
2703
- const failedRequestId =
2704
- (errorFromRetry.originalError as APIError).requestID ?? 'unknown'
2705
- logForDebugging(
2706
- 'Streaming endpoint returned 404, falling back to non-streaming mode',
2707
- { level: 'warn' },
2708
- )
2709
- didFallBackToNonStreaming = true
2710
- if (options.onStreamingFallback) {
2711
- options.onStreamingFallback()
2712
- }
2713
-
2714
- logEvent('tengu_streaming_fallback_to_non_streaming', {
2715
- model:
2716
- options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2717
- error:
2718
- '404_stream_creation' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2719
- attemptNumber,
2720
- maxOutputTokens,
2721
- thinkingType:
2722
- thinkingConfig.type as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2723
- request_id:
2724
- failedRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2725
- fallback_cause:
2726
- '404_stream_creation' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
2727
- })
2728
-
2729
- try {
2730
- // Fall back to non-streaming mode
2731
- const result = yield* executeNonStreamingRequest(
2732
- {
2733
- model: options.model,
2734
- fetchOverride: options.fetchOverride,
2735
- source: options.querySource,
2736
- },
2737
- {
2738
- model: options.model,
2739
- fallbackModel: options.fallbackModel,
2740
- thinkingConfig,
2741
- ...(isFastModeEnabled() && { fastMode: isFastMode }),
2742
- signal,
2743
- },
2744
- paramsFromContext,
2745
- (attempt, _startTime, tokens) => {
2746
- attemptNumber = attempt
2747
- maxOutputTokens = tokens
2748
- },
2749
- params => captureAPIRequest(params, options.querySource),
2750
- failedRequestId,
2751
- )
2752
-
2753
- const m: AssistantMessage = {
2754
- message: {
2755
- ...result,
2756
- content: normalizeContentFromAPI(
2757
- result.content,
2758
- tools,
2759
- options.agentId,
2760
- ),
2761
- },
2762
- requestId: streamRequestId ?? undefined,
2763
- type: 'assistant',
2764
- uuid: randomUUID(),
2765
- timestamp: new Date().toISOString(),
2766
- ...(process.env.USER_TYPE === 'ant' &&
2767
- research !== undefined && { research }),
2768
- ...(advisorModel && { advisorModel }),
2769
- }
2770
- newMessages.push(m)
2771
- fallbackMessage = m
2772
- yield m
2773
-
2774
- // Continue to success logging below
2775
- } catch (fallbackError) {
2776
- // Propagate model-fallback signal to query.ts (see comment above).
2777
- if (fallbackError instanceof FallbackTriggeredError) {
2778
- throw fallbackError
2779
- }
2780
-
2781
- // Fallback also failed, handle as normal error
2782
- logForDebugging(
2783
- `Non-streaming fallback also failed: ${errorMessage(fallbackError)}`,
2784
- { level: 'error' },
2785
- )
2786
-
2787
- let error = fallbackError
2788
- let errorModel = options.model
2789
- if (fallbackError instanceof CannotRetryError) {
2790
- error = fallbackError.originalError
2791
- errorModel = fallbackError.retryContext.model
2792
- }
2793
-
2794
- if (error instanceof APIError) {
2795
- extractQuotaStatusFromError(error)
2796
- }
2797
-
2798
- const requestId =
2799
- streamRequestId ||
2800
- (error instanceof APIError ? error.requestID : undefined) ||
2801
- (error instanceof APIError
2802
- ? (error.error as { request_id?: string })?.request_id
2803
- : undefined)
2804
-
2805
- logAPIError({
2806
- error,
2807
- model: errorModel,
2808
- messageCount: messagesForAPI.length,
2809
- messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
2810
- durationMs: Date.now() - start,
2811
- durationMsIncludingRetries: Date.now() - startIncludingRetries,
2812
- attempt: attemptNumber,
2813
- requestId,
2814
- clientRequestId,
2815
- didFallBackToNonStreaming,
2816
- queryTracking: options.queryTracking,
2817
- querySource: options.querySource,
2818
- llmSpan,
2819
- fastMode: isFastModeRequest,
2820
- previousRequestId,
2821
- })
2822
-
2823
- if (error instanceof APIUserAbortError) {
2824
- releaseStreamResources()
2825
- return
2826
- }
2827
-
2828
- yield getAssistantMessageFromError(error, errorModel, {
2829
- messages,
2830
- messagesForAPI,
2831
- })
2832
- releaseStreamResources()
2833
- return
2834
- }
2835
- } else {
2836
- // Original error handling for non-404 errors
2837
- logForDebugging(`Error in API request: ${errorMessage(errorFromRetry)}`, {
2838
- level: 'error',
2839
- })
2840
-
2841
- let error = errorFromRetry
2842
- let errorModel = options.model
2843
- if (errorFromRetry instanceof CannotRetryError) {
2844
- error = errorFromRetry.originalError
2845
- errorModel = errorFromRetry.retryContext.model
2846
- }
2847
-
2848
- // Extract quota status from error headers if it's a rate limit error
2849
- if (error instanceof APIError) {
2850
- extractQuotaStatusFromError(error)
2851
- }
2852
-
2853
- // Extract requestId from stream, error header, or error body
2854
- const requestId =
2855
- streamRequestId ||
2856
- (error instanceof APIError ? error.requestID : undefined) ||
2857
- (error instanceof APIError
2858
- ? (error.error as { request_id?: string })?.request_id
2859
- : undefined)
2860
-
2861
- logAPIError({
2862
- error,
2863
- model: errorModel,
2864
- messageCount: messagesForAPI.length,
2865
- messageTokens: tokenCountFromLastAPIResponse(messagesForAPI),
2866
- durationMs: Date.now() - start,
2867
- durationMsIncludingRetries: Date.now() - startIncludingRetries,
2868
- attempt: attemptNumber,
2869
- requestId,
2870
- clientRequestId,
2871
- didFallBackToNonStreaming,
2872
- queryTracking: options.queryTracking,
2873
- querySource: options.querySource,
2874
- llmSpan,
2875
- fastMode: isFastModeRequest,
2876
- previousRequestId,
2877
- })
2878
-
2879
- // Don't yield an assistant error message for user aborts
2880
- // The interruption message is handled in query.ts
2881
- if (error instanceof APIUserAbortError) {
2882
- releaseStreamResources()
2883
- return
2884
- }
2885
-
2886
- yield getAssistantMessageFromError(error, errorModel, {
2887
- messages,
2888
- messagesForAPI,
2889
- })
2890
- releaseStreamResources()
2891
- return
2892
- }
2893
- } finally {
2894
- stopSessionActivity('api_call')
2895
- // Must be in the finally block: if the generator is terminated early
2896
- // via .return() (e.g. consumer breaks out of for-await-of, or query.ts
2897
- // encounters an abort), code after the try/finally never executes.
2898
- // Without this, the Response object's native TLS/socket buffers leak
2899
- // until the generator itself is GC'd (see GH #32920).
2900
- releaseStreamResources()
2901
-
2902
- // Non-streaming fallback cost: the streaming path tracks cost in the
2903
- // message_delta handler before any yield. Fallback pushes to newMessages
2904
- // then yields, so tracking must be here to survive .return() at the yield.
2905
- if (fallbackMessage) {
2906
- const fallbackUsage = fallbackMessage.message.usage
2907
- usage = updateUsage(EMPTY_USAGE, fallbackUsage)
2908
- stopReason = fallbackMessage.message.stop_reason
2909
- const fallbackCost = calculateUSDCost(resolvedModel, fallbackUsage)
2910
- costUSD += addToTotalSessionCost(
2911
- fallbackCost,
2912
- fallbackUsage,
2913
- options.model,
2914
- )
2915
- }
2916
- }
2917
-
2918
- // Mark all registered tools as sent to API so they become eligible for deletion
2919
- if (feature('CACHED_MICROCOMPACT') && cachedMCEnabled) {
2920
- markToolsSentToAPIState()
2921
- }
2922
-
2923
- // Track the last requestId for the main conversation chain so shutdown
2924
- // can send a cache eviction hint to inference. Exclude backgrounded
2925
- // sessions (Ctrl+B) which share the repl_main_thread querySource but
2926
- // run inside an agent context — they are independent conversation chains
2927
- // whose cache should not be evicted when the foreground session clears.
2928
- if (
2929
- streamRequestId &&
2930
- !getAgentContext() &&
2931
- (options.querySource.startsWith('repl_main_thread') ||
2932
- options.querySource === 'sdk')
2933
- ) {
2934
- setLastMainRequestId(streamRequestId)
2935
- }
2936
-
2937
- // Precompute scalars so the fire-and-forget .then() closure doesn't pin the
2938
- // full messagesForAPI array (the entire conversation up to the context window
2939
- // limit) until getToolPermissionContext() resolves.
2940
- const logMessageCount = messagesForAPI.length
2941
- const logMessageTokens = tokenCountFromLastAPIResponse(messagesForAPI)
2942
- void options.getToolPermissionContext().then(permissionContext => {
2943
- logAPISuccessAndDuration({
2944
- model:
2945
- newMessages[0]?.message.model ?? partialMessage?.model ?? options.model,
2946
- preNormalizedModel: options.model,
2947
- usage,
2948
- start,
2949
- startIncludingRetries,
2950
- attempt: attemptNumber,
2951
- messageCount: logMessageCount,
2952
- messageTokens: logMessageTokens,
2953
- requestId: streamRequestId ?? null,
2954
- stopReason,
2955
- ttftMs,
2956
- didFallBackToNonStreaming,
2957
- querySource: options.querySource,
2958
- headers: responseHeaders,
2959
- costUSD,
2960
- queryTracking: options.queryTracking,
2961
- permissionMode: permissionContext.mode,
2962
- // Pass newMessages for beta tracing - extraction happens in logging.ts
2963
- // only when beta tracing is enabled
2964
- newMessages,
2965
- llmSpan,
2966
- globalCacheStrategy,
2967
- requestSetupMs: start - startIncludingRetries,
2968
- attemptStartTimes,
2969
- fastMode: isFastModeRequest,
2970
- previousRequestId,
2971
- betas: lastRequestBetas,
2972
- })
2973
- })
2974
-
2975
- // Defensive: also release on normal completion (no-op if finally already ran).
2976
- releaseStreamResources()
2977
- }
2978
-
2979
- /**
2980
- * Cleans up stream resources to prevent memory leaks.
2981
- * @internal Exported for testing
2982
- */
2983
- export function cleanupStream(
2984
- stream: Stream<BetaRawMessageStreamEvent> | undefined,
2985
- ): void {
2986
- if (!stream) {
2987
- return
2988
- }
2989
- try {
2990
- // Abort the stream via its controller if not already aborted
2991
- if (!stream.controller.signal.aborted) {
2992
- stream.controller.abort()
2993
- }
2994
- } catch {
2995
- // Ignore - stream may already be closed
2996
- }
2997
- }
2998
-
2999
- /**
3000
- * Updates usage statistics with new values from streaming API events.
3001
- * Note: the upstream streaming API provides cumulative usage totals, not incremental deltas.
3002
- * (UMMAYA: byte-copied from CC where this read "Anthropic's streaming API"; FriendliAI's
3003
- * OpenAI-compatible streaming surface follows the same cumulative semantics, so the
3004
- * algorithm is unchanged — only the brand token is renamed.)
3005
- * Each event contains the complete usage up to that point in the stream.
3006
- *
3007
- * Input-related tokens (input_tokens, cache_creation_input_tokens, cache_read_input_tokens)
3008
- * are typically set in message_start and remain constant. message_delta events may send
3009
- * explicit 0 values for these fields, which should not overwrite the values from message_start.
3010
- * We only update these fields if they have a non-null, non-zero value.
3011
- */
3012
- export function updateUsage(
3013
- usage: Readonly<NonNullableUsage>,
3014
- partUsage: BetaMessageDeltaUsage | undefined,
3015
- ): NonNullableUsage {
3016
- if (!partUsage) {
3017
- return { ...usage }
3018
- }
3019
- return {
3020
- input_tokens:
3021
- partUsage.input_tokens !== null && partUsage.input_tokens > 0
3022
- ? partUsage.input_tokens
3023
- : usage.input_tokens,
3024
- cache_creation_input_tokens:
3025
- partUsage.cache_creation_input_tokens !== null &&
3026
- partUsage.cache_creation_input_tokens > 0
3027
- ? partUsage.cache_creation_input_tokens
3028
- : usage.cache_creation_input_tokens,
3029
- cache_read_input_tokens:
3030
- partUsage.cache_read_input_tokens !== null &&
3031
- partUsage.cache_read_input_tokens > 0
3032
- ? partUsage.cache_read_input_tokens
3033
- : usage.cache_read_input_tokens,
3034
- output_tokens: partUsage.output_tokens ?? usage.output_tokens,
3035
- server_tool_use: {
3036
- web_search_requests:
3037
- partUsage.server_tool_use?.web_search_requests ??
3038
- usage.server_tool_use.web_search_requests,
3039
- web_fetch_requests:
3040
- partUsage.server_tool_use?.web_fetch_requests ??
3041
- usage.server_tool_use.web_fetch_requests,
3042
- },
3043
- service_tier: usage.service_tier,
3044
- cache_creation: {
3045
- // SDK type BetaMessageDeltaUsage is missing cache_creation, but it's real!
3046
- ephemeral_1h_input_tokens:
3047
- (partUsage as BetaUsage).cache_creation?.ephemeral_1h_input_tokens ??
3048
- usage.cache_creation.ephemeral_1h_input_tokens,
3049
- ephemeral_5m_input_tokens:
3050
- (partUsage as BetaUsage).cache_creation?.ephemeral_5m_input_tokens ??
3051
- usage.cache_creation.ephemeral_5m_input_tokens,
3052
- },
3053
- // cache_deleted_input_tokens: returned by the API when cache editing
3054
- // deletes KV cache content, but not in SDK types. Kept off NonNullableUsage
3055
- // so the string is eliminated from external builds by dead code elimination.
3056
- // Uses the same > 0 guard as other token fields to prevent message_delta
3057
- // from overwriting the real value with 0.
3058
- ...(feature('CACHED_MICROCOMPACT')
3059
- ? {
3060
- cache_deleted_input_tokens:
3061
- (partUsage as unknown as { cache_deleted_input_tokens?: number })
3062
- .cache_deleted_input_tokens != null &&
3063
- (partUsage as unknown as { cache_deleted_input_tokens: number })
3064
- .cache_deleted_input_tokens > 0
3065
- ? (partUsage as unknown as { cache_deleted_input_tokens: number })
3066
- .cache_deleted_input_tokens
3067
- : ((usage as unknown as { cache_deleted_input_tokens?: number })
3068
- .cache_deleted_input_tokens ?? 0),
3069
- }
3070
- : {}),
3071
- inference_geo: usage.inference_geo,
3072
- iterations: partUsage.iterations ?? usage.iterations,
3073
- speed: (partUsage as BetaUsage).speed ?? usage.speed,
3074
- }
3075
- }
3076
-
3077
- /**
3078
- * Accumulates usage from one message into a total usage object.
3079
- * Used to track cumulative usage across multiple assistant turns.
3080
- */
3081
- export function accumulateUsage(
3082
- totalUsage: Readonly<NonNullableUsage>,
3083
- messageUsage: Readonly<NonNullableUsage>,
3084
- ): NonNullableUsage {
3085
- return {
3086
- input_tokens: totalUsage.input_tokens + messageUsage.input_tokens,
3087
- cache_creation_input_tokens:
3088
- totalUsage.cache_creation_input_tokens +
3089
- messageUsage.cache_creation_input_tokens,
3090
- cache_read_input_tokens:
3091
- totalUsage.cache_read_input_tokens + messageUsage.cache_read_input_tokens,
3092
- output_tokens: totalUsage.output_tokens + messageUsage.output_tokens,
3093
- server_tool_use: {
3094
- web_search_requests:
3095
- totalUsage.server_tool_use.web_search_requests +
3096
- messageUsage.server_tool_use.web_search_requests,
3097
- web_fetch_requests:
3098
- totalUsage.server_tool_use.web_fetch_requests +
3099
- messageUsage.server_tool_use.web_fetch_requests,
3100
- },
3101
- service_tier: messageUsage.service_tier, // Use the most recent service tier
3102
- cache_creation: {
3103
- ephemeral_1h_input_tokens:
3104
- totalUsage.cache_creation.ephemeral_1h_input_tokens +
3105
- messageUsage.cache_creation.ephemeral_1h_input_tokens,
3106
- ephemeral_5m_input_tokens:
3107
- totalUsage.cache_creation.ephemeral_5m_input_tokens +
3108
- messageUsage.cache_creation.ephemeral_5m_input_tokens,
3109
- },
3110
- // See comment in updateUsage — field is not on NonNullableUsage to keep
3111
- // the string out of external builds.
3112
- ...(feature('CACHED_MICROCOMPACT')
3113
- ? {
3114
- cache_deleted_input_tokens:
3115
- ((totalUsage as unknown as { cache_deleted_input_tokens?: number })
3116
- .cache_deleted_input_tokens ?? 0) +
3117
- ((
3118
- messageUsage as unknown as { cache_deleted_input_tokens?: number }
3119
- ).cache_deleted_input_tokens ?? 0),
3120
- }
3121
- : {}),
3122
- inference_geo: messageUsage.inference_geo, // Use the most recent
3123
- iterations: messageUsage.iterations, // Use the most recent
3124
- speed: messageUsage.speed, // Use the most recent
3125
- }
3126
- }
3127
-
3128
- function isToolResultBlock(
3129
- block: unknown,
3130
- ): block is { type: 'tool_result'; tool_use_id: string } {
3131
- return (
3132
- block !== null &&
3133
- typeof block === 'object' &&
3134
- 'type' in block &&
3135
- (block as { type: string }).type === 'tool_result' &&
3136
- 'tool_use_id' in block
3137
- )
3138
- }
3139
-
3140
- type CachedMCEditsBlock = {
3141
- type: 'cache_edits'
3142
- edits: { type: 'delete'; cache_reference: string }[]
3143
- }
3144
-
3145
- type CachedMCPinnedEdits = {
3146
- userMessageIndex: number
3147
- block: CachedMCEditsBlock
3148
- }
3149
-
3150
- // Exported for testing cache_reference placement constraints
3151
- export function addCacheBreakpoints(
3152
- messages: (UserMessage | AssistantMessage)[],
3153
- enablePromptCaching: boolean,
3154
- querySource?: QuerySource,
3155
- useCachedMC = false,
3156
- newCacheEdits?: CachedMCEditsBlock | null,
3157
- pinnedEdits?: CachedMCPinnedEdits[],
3158
- skipCacheWrite = false,
3159
- ): MessageParam[] {
3160
- logEvent('tengu_api_cache_breakpoints', {
3161
- totalMessageCount: messages.length,
3162
- cachingEnabled: enablePromptCaching,
3163
- skipCacheWrite,
3164
- })
3165
-
3166
- // Exactly one message-level cache_control marker per request. Mycro's
3167
- // turn-to-turn eviction (page_manager/index.rs: Index::insert) frees
3168
- // local-attention KV pages at any cached prefix position NOT in
3169
- // cache_store_int_token_boundaries. With two markers the second-to-last
3170
- // position is protected and its locals survive an extra turn even though
3171
- // nothing will ever resume from there — with one marker they're freed
3172
- // immediately. For fire-and-forget forks (skipCacheWrite) we shift the
3173
- // marker to the second-to-last message: that's the last shared-prefix
3174
- // point, so the write is a no-op merge on mycro (entry already exists)
3175
- // and the fork doesn't leave its own tail in the KVCC. Dense pages are
3176
- // refcounted and survive via the new hash either way.
3177
- const markerIndex = skipCacheWrite ? messages.length - 2 : messages.length - 1
3178
- const result = messages.map((msg, index) => {
3179
- const addCache = index === markerIndex
3180
- if (msg.type === 'user') {
3181
- return userMessageToMessageParam(
3182
- msg,
3183
- addCache,
3184
- enablePromptCaching,
3185
- querySource,
3186
- )
3187
- }
3188
- return assistantMessageToMessageParam(
3189
- msg,
3190
- addCache,
3191
- enablePromptCaching,
3192
- querySource,
3193
- )
3194
- })
3195
-
3196
- if (!useCachedMC) {
3197
- return result
3198
- }
3199
-
3200
- // Track all cache_references being deleted to prevent duplicates across blocks.
3201
- const seenDeleteRefs = new Set<string>()
3202
-
3203
- // Helper to deduplicate a cache_edits block against already-seen deletions
3204
- const deduplicateEdits = (block: CachedMCEditsBlock): CachedMCEditsBlock => {
3205
- const uniqueEdits = block.edits.filter(edit => {
3206
- if (seenDeleteRefs.has(edit.cache_reference)) {
3207
- return false
3208
- }
3209
- seenDeleteRefs.add(edit.cache_reference)
3210
- return true
3211
- })
3212
- return { ...block, edits: uniqueEdits }
3213
- }
3214
-
3215
- // Re-insert all previously-pinned cache_edits at their original positions
3216
- for (const pinned of pinnedEdits ?? []) {
3217
- const msg = result[pinned.userMessageIndex]
3218
- if (msg && msg.role === 'user') {
3219
- if (!Array.isArray(msg.content)) {
3220
- msg.content = [{ type: 'text', text: msg.content as string }]
3221
- }
3222
- const dedupedBlock = deduplicateEdits(pinned.block)
3223
- if (dedupedBlock.edits.length > 0) {
3224
- insertBlockAfterToolResults(msg.content, dedupedBlock)
3225
- }
3226
- }
3227
- }
3228
-
3229
- // Insert new cache_edits into the last user message and pin them
3230
- if (newCacheEdits && result.length > 0) {
3231
- const dedupedNewEdits = deduplicateEdits(newCacheEdits)
3232
- if (dedupedNewEdits.edits.length > 0) {
3233
- for (let i = result.length - 1; i >= 0; i--) {
3234
- const msg = result[i]
3235
- if (msg && msg.role === 'user') {
3236
- if (!Array.isArray(msg.content)) {
3237
- msg.content = [{ type: 'text', text: msg.content as string }]
3238
- }
3239
- insertBlockAfterToolResults(msg.content, dedupedNewEdits)
3240
- // Pin so this block is re-sent at the same position in future calls
3241
- pinCacheEdits(i, newCacheEdits)
3242
-
3243
- logForDebugging(
3244
- `Added cache_edits block with ${dedupedNewEdits.edits.length} deletion(s) to message[${i}]: ${dedupedNewEdits.edits.map(e => e.cache_reference).join(', ')}`,
3245
- )
3246
- break
3247
- }
3248
- }
3249
- }
3250
- }
3251
-
3252
- // Add cache_reference to tool_result blocks that are within the cached prefix.
3253
- // Must be done AFTER cache_edits insertion since that modifies content arrays.
3254
- if (enablePromptCaching) {
3255
- // Find the last message containing a cache_control marker
3256
- let lastCCMsg = -1
3257
- for (let i = 0; i < result.length; i++) {
3258
- const msg = result[i]!
3259
- if (Array.isArray(msg.content)) {
3260
- for (const block of msg.content) {
3261
- if (block && typeof block === 'object' && 'cache_control' in block) {
3262
- lastCCMsg = i
3263
- }
3264
- }
3265
- }
3266
- }
3267
-
3268
- // Add cache_reference to tool_result blocks that are strictly before
3269
- // the last cache_control marker. The API requires cache_reference to
3270
- // appear "before or on" the last cache_control — we use strict "before"
3271
- // to avoid edge cases where cache_edits splicing shifts block indices.
3272
- //
3273
- // Create new objects instead of mutating in-place to avoid contaminating
3274
- // blocks reused by secondary queries that use models without cache_editing support.
3275
- if (lastCCMsg >= 0) {
3276
- for (let i = 0; i < lastCCMsg; i++) {
3277
- const msg = result[i]!
3278
- if (msg.role !== 'user' || !Array.isArray(msg.content)) {
3279
- continue
3280
- }
3281
- let cloned = false
3282
- for (let j = 0; j < msg.content.length; j++) {
3283
- const block = msg.content[j]
3284
- if (block && isToolResultBlock(block)) {
3285
- if (!cloned) {
3286
- msg.content = [...msg.content]
3287
- cloned = true
3288
- }
3289
- msg.content[j] = Object.assign({}, block, {
3290
- cache_reference: block.tool_use_id,
3291
- })
3292
- }
3293
- }
3294
- }
3295
- }
3296
- }
3297
-
3298
- return result
3299
- }
3300
-
3301
- export function buildSystemPromptBlocks(
3302
- systemPrompt: SystemPrompt,
3303
- enablePromptCaching: boolean,
3304
- options?: {
3305
- skipGlobalCacheForSystemPrompt?: boolean
3306
- querySource?: QuerySource
3307
- },
3308
- ): TextBlockParam[] {
3309
- // IMPORTANT: Do not add any more blocks for caching or you will get a 400
3310
- return splitSysPromptPrefix(systemPrompt, {
3311
- skipGlobalCacheForSystemPrompt: options?.skipGlobalCacheForSystemPrompt,
3312
- }).map(block => {
3313
- return {
3314
- type: 'text' as const,
3315
- text: block.text,
3316
- ...(enablePromptCaching &&
3317
- block.cacheScope !== null && {
3318
- cache_control: getCacheControl({
3319
- scope: block.cacheScope,
3320
- querySource: options?.querySource,
3321
- }),
3322
- }),
3323
- }
3324
- })
3325
- }
3326
-
3327
- type HaikuOptions = Omit<Options, 'model' | 'getToolPermissionContext'>
3328
-
3329
- export async function queryHaiku({
3330
- systemPrompt = asSystemPrompt([]),
3331
- userPrompt,
3332
- outputFormat,
3333
- signal,
3334
- options,
3335
- }: {
3336
- systemPrompt: SystemPrompt
3337
- userPrompt: string
3338
- outputFormat?: BetaJSONOutputFormat
3339
- signal: AbortSignal
3340
- options: HaikuOptions
3341
- }): Promise<AssistantMessage> {
3342
- const result = await withVCR(
3343
- [
3344
- createUserMessage({
3345
- content: systemPrompt.map(text => ({ type: 'text', text })),
3346
- }),
3347
- createUserMessage({
3348
- content: userPrompt,
3349
- }),
3350
- ],
3351
- async () => {
3352
- const messages = [
3353
- createUserMessage({
3354
- content: userPrompt,
3355
- }),
3356
- ]
3357
-
3358
- const result = await queryModelWithoutStreaming({
3359
- messages,
3360
- systemPrompt,
3361
- thinkingConfig: { type: 'disabled' },
3362
- tools: [],
3363
- signal,
3364
- options: {
3365
- ...options,
3366
- model: getSmallFastModel(),
3367
- enablePromptCaching: options.enablePromptCaching ?? false,
3368
- outputFormat,
3369
- async getToolPermissionContext() {
3370
- return getEmptyToolPermissionContext()
3371
- },
3372
- },
3373
- })
3374
- return [result]
3375
- },
3376
- )
3377
- // We don't use streaming for Haiku so this is safe
3378
- return result[0]! as AssistantMessage
3379
- }
3380
-
3381
- type QueryWithModelOptions = Omit<Options, 'getToolPermissionContext'>
3382
-
3383
- /**
3384
- * Query a specific model through the UMMAYA infrastructure.
3385
- * (Originally "Claude Code infrastructure" in the CC byte-copy; UMMAYA renames
3386
- * the citizen-visible doc string but preserves the function shape so future
3387
- * audit replays diff cleanly against CC. swap/identifier-rename(2521).)
3388
- * This goes through the full query pipeline including proper authentication,
3389
- * betas, and headers - unlike direct API calls.
3390
- */
3391
- export async function queryWithModel({
3392
- systemPrompt = asSystemPrompt([]),
3393
- userPrompt,
3394
- outputFormat,
3395
- signal,
3396
- options,
3397
- }: {
3398
- systemPrompt: SystemPrompt
3399
- userPrompt: string
3400
- outputFormat?: BetaJSONOutputFormat
3401
- signal: AbortSignal
3402
- options: QueryWithModelOptions
3403
- }): Promise<AssistantMessage> {
3404
- const result = await withVCR(
3405
- [
3406
- createUserMessage({
3407
- content: systemPrompt.map(text => ({ type: 'text', text })),
3408
- }),
3409
- createUserMessage({
3410
- content: userPrompt,
3411
- }),
3412
- ],
3413
- async () => {
3414
- const messages = [
3415
- createUserMessage({
3416
- content: userPrompt,
3417
- }),
3418
- ]
3419
-
3420
- const result = await queryModelWithoutStreaming({
3421
- messages,
3422
- systemPrompt,
3423
- thinkingConfig: { type: 'disabled' },
3424
- tools: [],
3425
- signal,
3426
- options: {
3427
- ...options,
3428
- enablePromptCaching: options.enablePromptCaching ?? false,
3429
- outputFormat,
3430
- async getToolPermissionContext() {
3431
- return getEmptyToolPermissionContext()
3432
- },
3433
- },
3434
- })
3435
- return [result]
3436
- },
3437
- )
3438
- return result[0]! as AssistantMessage
3439
- }
3440
-
3441
- // Non-streaming requests have a 10min max per the docs:
3442
- // https://platform.claude.com/docs/en/api/errors#long-requests
3443
- // The SDK's 21333-token cap is derived from 10min × 128k tokens/hour, but we
3444
- // bypass it by setting a client-level timeout, so we can cap higher.
3445
- export const MAX_NON_STREAMING_TOKENS = 64_000
3446
-
3447
- /**
3448
- * Adjusts thinking budget when max_tokens is capped for non-streaming fallback.
3449
- * Ensures the API constraint: max_tokens > thinking.budget_tokens
3450
- *
3451
- * @param params - The parameters that will be sent to the API
3452
- * @param maxTokensCap - The maximum allowed tokens (MAX_NON_STREAMING_TOKENS)
3453
- * @returns Adjusted parameters with thinking budget capped if needed
3454
- */
3455
- export function adjustParamsForNonStreaming<
3456
- T extends {
3457
- max_tokens: number
3458
- thinking?: BetaMessageStreamParams['thinking']
3459
- },
3460
- >(params: T, maxTokensCap: number): T {
3461
- const cappedMaxTokens = Math.min(params.max_tokens, maxTokensCap)
3462
-
3463
- // Adjust thinking budget if it would exceed capped max_tokens
3464
- // to maintain the constraint: max_tokens > thinking.budget_tokens
3465
- const adjustedParams = { ...params }
3466
- if (
3467
- adjustedParams.thinking?.type === 'enabled' &&
3468
- adjustedParams.thinking.budget_tokens
3469
- ) {
3470
- adjustedParams.thinking = {
3471
- ...adjustedParams.thinking,
3472
- budget_tokens: Math.min(
3473
- adjustedParams.thinking.budget_tokens,
3474
- cappedMaxTokens - 1, // Must be at least 1 less than max_tokens
3475
- ),
3476
- }
3477
- }
3478
-
3479
- return {
3480
- ...adjustedParams,
3481
- max_tokens: cappedMaxTokens,
3482
- }
3483
- }
3484
-
3485
- function isMaxTokensCapEnabled(): boolean {
3486
- // 3P default: false (not validated on Bedrock/Vertex)
3487
- return getFeatureValue_CACHED_MAY_BE_STALE('tengu_otk_slot_v1', false)
3488
- }
3489
-
3490
- export function getMaxOutputTokensForModel(model: string): number {
3491
- const maxOutputTokens = getModelMaxOutputTokens(model)
3492
-
3493
- // Slot-reservation cap: drop default to 8k for all models. BQ p99 output
3494
- // = 4,911 tokens; 32k/64k defaults over-reserve 8-16× slot capacity.
3495
- // Requests hitting the cap get one clean retry at 64k (query.ts
3496
- // max_output_tokens_escalate). Math.min keeps models with lower native
3497
- // defaults (e.g. claude-3-opus at 4k) at their native value. Applied
3498
- // before the env-var override so CLAUDE_CODE_MAX_OUTPUT_TOKENS still wins.
3499
- const defaultTokens = isMaxTokensCapEnabled()
3500
- ? Math.min(maxOutputTokens.default, CAPPED_DEFAULT_MAX_TOKENS)
3501
- : maxOutputTokens.default
3502
-
3503
- const result = validateBoundedIntEnvVar(
3504
- 'CLAUDE_CODE_MAX_OUTPUT_TOKENS',
3505
- process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS,
3506
- defaultTokens,
3507
- maxOutputTokens.upperLimit,
3508
- )
3509
- return result.effective
3510
- }