multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. forge/__init__.py +3 -0
  2. forge/_extensions/agents/.gitkeep +0 -0
  3. forge/_extensions/commands/.gitkeep +0 -0
  4. forge/_extensions/skills/analyze/SKILL.md +87 -0
  5. forge/_extensions/skills/challenge/SKILL.md +91 -0
  6. forge/_extensions/skills/consensus/SKILL.md +120 -0
  7. forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
  8. forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
  9. forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
  10. forge/_extensions/skills/debate/SKILL.md +116 -0
  11. forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
  12. forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
  13. forge/_extensions/skills/panel/SKILL.md +141 -0
  14. forge/_extensions/skills/panel/resources/synthesis.md +103 -0
  15. forge/_extensions/skills/qa/SKILL.md +704 -0
  16. forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
  17. forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
  18. forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
  19. forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
  20. forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
  21. forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
  22. forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
  23. forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
  24. forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
  25. forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
  26. forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
  27. forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
  28. forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
  29. forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
  30. forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
  31. forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
  32. forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
  33. forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
  34. forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
  35. forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
  36. forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
  37. forge/_extensions/skills/qa/resources/checklist.md +103 -0
  38. forge/_extensions/skills/qa/resources/report-template.md +62 -0
  39. forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
  40. forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
  41. forge/_extensions/skills/review/SKILL.md +125 -0
  42. forge/_extensions/skills/review/references/claude-4.6.md +474 -0
  43. forge/_extensions/skills/review/references/claude-4.7.md +710 -0
  44. forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
  45. forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
  46. forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
  47. forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
  48. forge/_extensions/skills/review/resources/code-gemini.md +184 -0
  49. forge/_extensions/skills/review/resources/code-openai.md +203 -0
  50. forge/_extensions/skills/review/resources/code.md +160 -0
  51. forge/_extensions/skills/review-docs/SKILL.md +121 -0
  52. forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
  53. forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
  54. forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
  55. forge/_extensions/skills/review-docs/resources/docs.md +170 -0
  56. forge/_extensions/skills/smoke-test/SKILL.md +27 -0
  57. forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
  58. forge/_extensions/skills/understand/SKILL.md +148 -0
  59. forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
  60. forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
  61. forge/_extensions/skills/understand/resources/code-openai.md +181 -0
  62. forge/_extensions/skills/understand/resources/code.md +163 -0
  63. forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
  64. forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
  65. forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
  66. forge/_extensions/skills/understand/resources/docs.md +177 -0
  67. forge/_extensions/skills/walkthrough/SKILL.md +599 -0
  68. forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
  69. forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
  70. forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
  71. forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
  72. forge/backend/__init__.py +174 -0
  73. forge/backend/adapters/__init__.py +38 -0
  74. forge/backend/adapters/litellm.py +158 -0
  75. forge/backend/creation.py +89 -0
  76. forge/backend/registry.py +178 -0
  77. forge/cli/__init__.py +16 -0
  78. forge/cli/auth.py +483 -0
  79. forge/cli/backend.py +298 -0
  80. forge/cli/claude.py +411 -0
  81. forge/cli/config_cmd.py +303 -0
  82. forge/cli/extensions.py +1001 -0
  83. forge/cli/gc.py +165 -0
  84. forge/cli/guard.py +1018 -0
  85. forge/cli/guards.py +106 -0
  86. forge/cli/handoff.py +110 -0
  87. forge/cli/hooks/__init__.py +36 -0
  88. forge/cli/hooks/_group.py +20 -0
  89. forge/cli/hooks/_helpers.py +149 -0
  90. forge/cli/hooks/commands.py +1677 -0
  91. forge/cli/hooks/direct_commands.py +1304 -0
  92. forge/cli/hooks/install.py +232 -0
  93. forge/cli/hooks/policy.py +151 -0
  94. forge/cli/hooks/read_hygiene.py +74 -0
  95. forge/cli/hooks/verification.py +370 -0
  96. forge/cli/logs.py +406 -0
  97. forge/cli/main.py +292 -0
  98. forge/cli/proxy.py +1821 -0
  99. forge/cli/proxy_costs.py +313 -0
  100. forge/cli/search.py +416 -0
  101. forge/cli/session.py +892 -0
  102. forge/cli/session_addendum.py +81 -0
  103. forge/cli/session_fork.py +750 -0
  104. forge/cli/session_handoff.py +141 -0
  105. forge/cli/session_lifecycle.py +2053 -0
  106. forge/cli/session_manage.py +1336 -0
  107. forge/cli/session_memory.py +201 -0
  108. forge/cli/status_line.py +1398 -0
  109. forge/cli/workflow.py +1964 -0
  110. forge/config/__init__.py +110 -0
  111. forge/config/dataclass_utils.py +88 -0
  112. forge/config/defaults/__init__.py +0 -0
  113. forge/config/defaults/backends/__init__.py +0 -0
  114. forge/config/defaults/backends/litellm.yaml +196 -0
  115. forge/config/defaults/templates/__init__.py +0 -0
  116. forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
  117. forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
  118. forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
  119. forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
  120. forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
  121. forge/config/defaults/templates/litellm-gemini.yaml +21 -0
  122. forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
  123. forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
  124. forge/config/defaults/templates/litellm-openai.yaml +28 -0
  125. forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
  126. forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
  127. forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
  128. forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
  129. forge/config/defaults/templates/openrouter-glm.yaml +23 -0
  130. forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
  131. forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
  132. forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
  133. forge/config/defaults/templates/openrouter-openai.yaml +28 -0
  134. forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
  135. forge/config/loader.py +675 -0
  136. forge/config/schema.py +448 -0
  137. forge/core/__init__.py +5 -0
  138. forge/core/auth/__init__.py +67 -0
  139. forge/core/auth/capabilities.py +219 -0
  140. forge/core/auth/credentials_file.py +244 -0
  141. forge/core/auth/protocols.py +18 -0
  142. forge/core/auth/secrets.py +243 -0
  143. forge/core/auth/template_secrets.py +112 -0
  144. forge/core/data/__init__.py +5 -0
  145. forge/core/data/model_catalog.yaml +1522 -0
  146. forge/core/data/pricing.yaml +140 -0
  147. forge/core/data/system_prompt_addendums/__init__.py +0 -0
  148. forge/core/data/system_prompt_addendums/gemini.md +330 -0
  149. forge/core/data/system_prompt_addendums/openai.md +328 -0
  150. forge/core/llm/__init__.py +231 -0
  151. forge/core/llm/clients/__init__.py +14 -0
  152. forge/core/llm/clients/base.py +115 -0
  153. forge/core/llm/clients/litellm.py +619 -0
  154. forge/core/llm/clients/openai_compat.py +244 -0
  155. forge/core/llm/clients/openrouter.py +234 -0
  156. forge/core/llm/credentials.py +439 -0
  157. forge/core/llm/detection.py +86 -0
  158. forge/core/llm/errors.py +44 -0
  159. forge/core/llm/protocols.py +80 -0
  160. forge/core/llm/types.py +176 -0
  161. forge/core/logging.py +146 -0
  162. forge/core/models/__init__.py +91 -0
  163. forge/core/models/catalog.py +467 -0
  164. forge/core/models/pricing.py +165 -0
  165. forge/core/models/types.py +167 -0
  166. forge/core/naming.py +212 -0
  167. forge/core/ops/__init__.py +73 -0
  168. forge/core/ops/context.py +141 -0
  169. forge/core/ops/gc.py +802 -0
  170. forge/core/ops/proxy.py +146 -0
  171. forge/core/ops/resolution.py +135 -0
  172. forge/core/ops/session.py +344 -0
  173. forge/core/ops/session_context.py +548 -0
  174. forge/core/paths.py +38 -0
  175. forge/core/process.py +54 -0
  176. forge/core/reactive/__init__.py +38 -0
  177. forge/core/reactive/cost_tracking.py +300 -0
  178. forge/core/reactive/env.py +180 -0
  179. forge/core/reactive/proxy.py +78 -0
  180. forge/core/reactive/routing.py +622 -0
  181. forge/core/reactive/session_runner.py +185 -0
  182. forge/core/reactive/structured_output.py +62 -0
  183. forge/core/reactive/tagger.py +94 -0
  184. forge/core/reactive/throttle.py +132 -0
  185. forge/core/state/__init__.py +59 -0
  186. forge/core/state/exceptions.py +59 -0
  187. forge/core/state/io.py +140 -0
  188. forge/core/state/lock.py +99 -0
  189. forge/core/state/timestamps.py +60 -0
  190. forge/core/transcript.py +78 -0
  191. forge/core/typing_helpers.py +24 -0
  192. forge/core/workqueue/__init__.py +67 -0
  193. forge/core/workqueue/queue.py +552 -0
  194. forge/core/workqueue/types.py +63 -0
  195. forge/guard/__init__.py +26 -0
  196. forge/guard/deterministic/__init__.py +26 -0
  197. forge/guard/deterministic/base.py +158 -0
  198. forge/guard/deterministic/coding_standards.py +256 -0
  199. forge/guard/deterministic/registry.py +148 -0
  200. forge/guard/deterministic/tdd.py +171 -0
  201. forge/guard/engine.py +216 -0
  202. forge/guard/protocols.py +91 -0
  203. forge/guard/queries.py +96 -0
  204. forge/guard/semantic/__init__.py +34 -0
  205. forge/guard/semantic/promotion.py +18 -0
  206. forge/guard/semantic/supervisor.py +813 -0
  207. forge/guard/semantic/verdict.py +183 -0
  208. forge/guard/store.py +124 -0
  209. forge/guard/team/__init__.py +6 -0
  210. forge/guard/team/config.py +24 -0
  211. forge/guard/team/handlers.py +209 -0
  212. forge/guard/team/prompts.py +41 -0
  213. forge/guard/types.py +125 -0
  214. forge/guard/workflow/__init__.py +17 -0
  215. forge/guard/workflow/branches.py +67 -0
  216. forge/guard/workflow/config.py +63 -0
  217. forge/guard/workflow/divergence.py +113 -0
  218. forge/guard/workflow/policy.py +87 -0
  219. forge/guard/workflow/stages.py +205 -0
  220. forge/install/__init__.py +55 -0
  221. forge/install/cli.py +281 -0
  222. forge/install/exceptions.py +163 -0
  223. forge/install/hooks.py +109 -0
  224. forge/install/installer.py +1037 -0
  225. forge/install/models.py +321 -0
  226. forge/install/preset.py +272 -0
  227. forge/install/settings_merge.py +831 -0
  228. forge/install/tracking.py +238 -0
  229. forge/install/version.py +141 -0
  230. forge/proxy/__init__.py +0 -0
  231. forge/proxy/base_client.py +181 -0
  232. forge/proxy/client_adapter.py +476 -0
  233. forge/proxy/client_factory.py +531 -0
  234. forge/proxy/converters.py +1206 -0
  235. forge/proxy/cost_logger.py +132 -0
  236. forge/proxy/cost_tracker.py +242 -0
  237. forge/proxy/data_models.py +338 -0
  238. forge/proxy/error_hints.py +92 -0
  239. forge/proxy/metrics.py +222 -0
  240. forge/proxy/model_spec.py +158 -0
  241. forge/proxy/proxies.py +333 -0
  242. forge/proxy/proxy_identity.py +134 -0
  243. forge/proxy/proxy_orchestrator.py +1018 -0
  244. forge/proxy/proxy_startup.py +54 -0
  245. forge/proxy/server.py +1561 -0
  246. forge/proxy/utils.py +537 -0
  247. forge/review/__init__.py +6 -0
  248. forge/review/adversarial.py +111 -0
  249. forge/review/consensus.py +236 -0
  250. forge/review/engine.py +356 -0
  251. forge/review/models.py +437 -0
  252. forge/review/resources/__init__.py +5 -0
  253. forge/review/resources/codereview-performance.md +85 -0
  254. forge/review/resources/codereview-quick.md +75 -0
  255. forge/review/resources/codereview-security.md +92 -0
  256. forge/review/resources/codereview.md +85 -0
  257. forge/review/resources/docreview-quick.md +75 -0
  258. forge/review/resources/docreview.md +86 -0
  259. forge/review/resources/thinkdeep.md +89 -0
  260. forge/review/routing.py +368 -0
  261. forge/review/synthesis.py +73 -0
  262. forge/runtime_config.py +438 -0
  263. forge/search/__init__.py +55 -0
  264. forge/search/bm25_store.py +264 -0
  265. forge/search/content_store.py +197 -0
  266. forge/search/engine.py +352 -0
  267. forge/search/exceptions.py +51 -0
  268. forge/search/extractor.py +234 -0
  269. forge/search/index_state.py +295 -0
  270. forge/search/store.py +215 -0
  271. forge/search/tokenizer.py +24 -0
  272. forge/session/__init__.py +130 -0
  273. forge/session/active.py +339 -0
  274. forge/session/artifacts.py +202 -0
  275. forge/session/claude/__init__.py +50 -0
  276. forge/session/claude/cleanup.py +105 -0
  277. forge/session/claude/invoke.py +236 -0
  278. forge/session/claude/paths.py +200 -0
  279. forge/session/cleanup.py +216 -0
  280. forge/session/config.py +34 -0
  281. forge/session/direct_model.py +107 -0
  282. forge/session/effective.py +169 -0
  283. forge/session/exceptions.py +255 -0
  284. forge/session/handoff.py +881 -0
  285. forge/session/handoff_agent.py +544 -0
  286. forge/session/hooks/__init__.py +35 -0
  287. forge/session/hooks/models.py +73 -0
  288. forge/session/hooks/session_start.py +507 -0
  289. forge/session/identity.py +84 -0
  290. forge/session/index.py +553 -0
  291. forge/session/manager.py +1506 -0
  292. forge/session/models.py +572 -0
  293. forge/session/overrides.py +344 -0
  294. forge/session/plan_resolution.py +286 -0
  295. forge/session/prev_sessions.py +128 -0
  296. forge/session/store.py +431 -0
  297. forge/session/validation.py +47 -0
  298. forge/session/worktree/__init__.py +65 -0
  299. forge/session/worktree/cleanup.py +262 -0
  300. forge/session/worktree/config_copy.py +203 -0
  301. forge/session/worktree/create.py +332 -0
  302. forge/sidecar/__init__.py +29 -0
  303. forge/sidecar/container.py +161 -0
  304. forge/sidecar/docker.py +86 -0
  305. forge/sidecar/secrets.py +19 -0
  306. multi_forge-0.2.0.dist-info/METADATA +242 -0
  307. multi_forge-0.2.0.dist-info/RECORD +311 -0
  308. multi_forge-0.2.0.dist-info/WHEEL +4 -0
  309. multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
  310. multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
  311. multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,1206 @@
1
+ """Format conversion utilities between Anthropic and OpenAI APIs.
2
+
3
+ This module handles the conversion between two API formats:
4
+ 1. Anthropic Claude API format (client-facing)
5
+ 2. OpenAI format (backend - used by LiteLLM)
6
+
7
+ Conversion Flow:
8
+ - Request: Anthropic → OpenAI
9
+ - Response: OpenAI → Anthropic
10
+
11
+ Key Components:
12
+ - Tool description enhancement with usage examples
13
+ - Streaming and non-streaming response handling
14
+ - Comprehensive tool event logging for diagnostics
15
+ """
16
+
17
+ import asyncio
18
+ import json
19
+ import logging
20
+ import traceback
21
+ import uuid
22
+ from typing import Any, AsyncGenerator, Callable, Dict, List, Literal, Optional, Union
23
+
24
+ from forge.proxy.data_models import (
25
+ ContentBlock,
26
+ ContentBlockText,
27
+ ContentBlockToolUse,
28
+ MessagesRequest,
29
+ MessagesResponse,
30
+ Usage,
31
+ )
32
+ from forge.proxy.utils import (
33
+ log_tool_event,
34
+ smart_format_str,
35
+ )
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+ # on_complete(usage, failed, error_type) -- called when SSE stream finishes
40
+ _OnCompleteCallback = Callable[[Dict[str, int], bool, Optional[str]], None]
41
+
42
+
43
+ # Tool parameters that non-Claude models compulsively fill with empty values.
44
+ # Stripped before forwarding to Claude Code to prevent validation errors.
45
+ _STRIP_EMPTY_PARAMS: dict[str, dict[str, tuple[Any, ...]]] = {
46
+ "Read": {
47
+ "pages": (None, "", 0),
48
+ # Claude Code treats these as optional sectioning controls. GPT models
49
+ # often send 0 as a placeholder for "unset", which is better omitted.
50
+ "offset": (None, "", 0),
51
+ "limit": (None, "", 0),
52
+ },
53
+ }
54
+
55
+
56
+ def _is_pdf_path(value: Any) -> bool:
57
+ # Claude Code's Read tool accepts filesystem paths, so extension detection is sufficient here.
58
+ return isinstance(value, str) and value.lower().endswith(".pdf")
59
+
60
+
61
+ def _should_buffer_streaming_tool_args(tool_name: str | None) -> bool:
62
+ return tool_name in _STRIP_EMPTY_PARAMS
63
+
64
+
65
+ def sanitize_tool_input_with_report(tool_name: str, tool_input: dict[str, Any]) -> tuple[dict[str, Any], list[str]]:
66
+ """Strip optional parameters and report which parameter names were removed.
67
+
68
+ GPT-5.5 fills optional schema fields even when not needed (for example,
69
+ pages="" or pages="1" on Read for non-PDF files). Claude Code rejects
70
+ these, causing an unrecoverable retry loop.
71
+ """
72
+ params_to_check = _STRIP_EMPTY_PARAMS.get(tool_name)
73
+ if not params_to_check:
74
+ return tool_input, []
75
+
76
+ cleaned = dict(tool_input)
77
+ stripped_params: list[str] = []
78
+
79
+ def strip_param(param: str) -> None:
80
+ if param in cleaned:
81
+ del cleaned[param]
82
+ stripped_params.append(param)
83
+
84
+ if tool_name == "Read" and "pages" in cleaned and not _is_pdf_path(cleaned.get("file_path")):
85
+ strip_param("pages")
86
+
87
+ for param, empty_values in params_to_check.items():
88
+ if param in cleaned and cleaned[param] in empty_values:
89
+ strip_param(param)
90
+
91
+ return cleaned, stripped_params
92
+
93
+
94
+ def sanitize_tool_input(tool_name: str, tool_input: dict[str, Any]) -> dict[str, Any]:
95
+ """Strip optional parameters that non-Claude models add compulsively."""
96
+ cleaned, _stripped_params = sanitize_tool_input_with_report(tool_name, tool_input)
97
+ return cleaned
98
+
99
+
100
+ def _sanitize_tool_arguments_json(tool_name: str | None, args_json: str) -> str:
101
+ """Sanitize complete tool-call JSON arguments while preserving malformed JSON."""
102
+ cleaned_json, _stripped_params = _sanitize_tool_arguments_json_with_report(tool_name, args_json)
103
+ return cleaned_json
104
+
105
+
106
+ def _sanitize_tool_arguments_json_with_report(tool_name: str | None, args_json: str) -> tuple[str, list[str]]:
107
+ """Sanitize complete tool-call JSON arguments and report stripped parameter names."""
108
+ if not tool_name or tool_name not in _STRIP_EMPTY_PARAMS or not args_json:
109
+ return args_json, []
110
+ try:
111
+ parsed = json.loads(args_json)
112
+ except json.JSONDecodeError:
113
+ return args_json, []
114
+ if not isinstance(parsed, dict):
115
+ return args_json, []
116
+ cleaned, stripped_params = sanitize_tool_input_with_report(tool_name, parsed)
117
+ return json.dumps(cleaned, separators=(",", ":")), stripped_params
118
+
119
+
120
+ def _schedule_tool_args_sanitized_event(
121
+ request_id: str,
122
+ tool_name: str | None,
123
+ stripped_params: list[str],
124
+ *,
125
+ tool_id: str | None,
126
+ streaming: bool,
127
+ block_index: int | None = None,
128
+ ) -> None:
129
+ """Emit debug-only telemetry when proxy sanitization changes model-generated tool args."""
130
+ if not tool_name or not stripped_params:
131
+ return
132
+
133
+ details: dict[str, Any] = {
134
+ "event": "tool_args_sanitized",
135
+ "streaming": streaming,
136
+ "stripped_params": stripped_params,
137
+ }
138
+ if tool_id is not None:
139
+ details["tool_id"] = tool_id
140
+ if block_index is not None:
141
+ details["block_index"] = block_index
142
+
143
+ asyncio.create_task(
144
+ log_tool_event(
145
+ request_id=request_id,
146
+ tool_name=tool_name,
147
+ status="success",
148
+ stage="client_response",
149
+ details=details,
150
+ )
151
+ )
152
+
153
+
154
+ def enhance_tool_description(tool_name: str, original_description: str, schema: Dict) -> str:
155
+ """
156
+ Enhance tool descriptions with concrete examples to help Gemini generate proper tool calls.
157
+
158
+ This function adds detailed usage examples for tools that have shown high failure rates
159
+ in client execution reports. Examples are formatted to match the schema structure and
160
+ highlight required parameters.
161
+
162
+ Args:
163
+ tool_name: The name of the tool
164
+ original_description: The original tool description
165
+ schema: The cleaned schema for this tool
166
+
167
+ Returns:
168
+ Enhanced description with appropriate usage examples
169
+ """
170
+ enhanced_description = original_description
171
+
172
+ # Library of tool examples for problematic tools
173
+ if tool_name == "Batch":
174
+ example = (
175
+ "\n\nEXAMPLE USAGE (Always include the invocations array):\n"
176
+ "{\n"
177
+ ' "description": "Run multiple tools in parallel",\n'
178
+ ' "invocations": [ // REQUIRED: Array of tool invocations to execute\n'
179
+ " {\n"
180
+ ' "tool_name": "Read", // Name of the tool to invoke\n'
181
+ ' "input": { // Parameters for the tool\n'
182
+ ' "file_path": "/path/to/file.txt"\n'
183
+ " }\n"
184
+ " },\n"
185
+ " {\n"
186
+ ' "tool_name": "Grep",\n'
187
+ ' "input": {\n'
188
+ ' "pattern": "search term",\n'
189
+ ' "include": "*.py"\n'
190
+ " }\n"
191
+ " }\n"
192
+ " ]\n"
193
+ "}"
194
+ )
195
+ enhanced_description += example
196
+ logger.debug("Enhanced Batch tool description with usage example")
197
+
198
+ elif tool_name == "Edit":
199
+ example = (
200
+ "\n\nEXAMPLE USAGE:\n"
201
+ "{\n"
202
+ ' "file_path": "/path/to/file.py", // REQUIRED: Absolute path to the file\n'
203
+ ' "old_string": "def old_function(x, y):\\n return x + y", // REQUIRED: Exact text to replace\n'
204
+ ' "new_string": "def old_function(x, y):\\n # Add comment\\n return x + y", // REQUIRED: New text\n'
205
+ ' "expected_replacements": 1 // Optional: Number of replacements to perform\n'
206
+ "}"
207
+ )
208
+ enhanced_description += example
209
+ logger.debug("Enhanced Edit tool description with usage example")
210
+
211
+ elif tool_name == "Read":
212
+ example = (
213
+ "\n\nEXAMPLE USAGE:\n"
214
+ "{\n"
215
+ ' "file_path": "/path/to/file.txt" // REQUIRED: Absolute path to the file\n'
216
+ "}"
217
+ )
218
+ enhanced_description += example
219
+ logger.debug("Enhanced Read tool description with usage example")
220
+
221
+ elif tool_name == "Write":
222
+ example = (
223
+ "\n\nEXAMPLE USAGE:\n"
224
+ "{\n"
225
+ ' "file_path": "/path/to/file.txt", // REQUIRED: Absolute path to the file\n'
226
+ ' "content": "Contents to write to the file" // REQUIRED: Content to write\n'
227
+ "}"
228
+ )
229
+ enhanced_description += example
230
+ logger.debug("Enhanced Write tool description with usage example")
231
+
232
+ elif tool_name == "Glob":
233
+ example = (
234
+ "\n\nEXAMPLE USAGE:\n"
235
+ "{\n"
236
+ ' "pattern": "**/*.py" // REQUIRED: The glob pattern to match files against\n'
237
+ "}"
238
+ )
239
+ enhanced_description += example
240
+ logger.debug("Enhanced Glob tool description with usage example")
241
+
242
+ elif tool_name == "Grep":
243
+ example = (
244
+ "\n\nEXAMPLE USAGE:\n"
245
+ "{\n"
246
+ ' "pattern": "function", // REQUIRED: The regex pattern to search for\n'
247
+ ' "include": "*.py" // Optional: File pattern to include in search\n'
248
+ "}"
249
+ )
250
+ enhanced_description += example
251
+ logger.debug("Enhanced Grep tool description with usage example")
252
+
253
+ elif tool_name == "MultiEdit":
254
+ example = (
255
+ "\n\n⚠︎ CRITICAL: This is a TOOL CALL, not Python code! DO NOT use print(), default_api, or any Python syntax!\n"
256
+ "✔ CORRECT JSON FORMAT:\n"
257
+ "{\n"
258
+ ' "file_path": "/absolute/path/to/file.py",\n'
259
+ ' "edits": [\n'
260
+ " {\n"
261
+ ' "old_string": "exact text to find",\n'
262
+ ' "new_string": "replacement text",\n'
263
+ ' "replace_all": false\n'
264
+ " }\n"
265
+ " ]\n"
266
+ "}\n\n"
267
+ "✘ NEVER DO THIS:\n"
268
+ "- print(default_api.MultiEdit(...))\n"
269
+ "- default_api.MultieditEdits(...)\n"
270
+ "- MultiEdit(file_path=..., edits=[...])\n"
271
+ "- Any Python function call syntax\n\n"
272
+ "Remember: You are calling a TOOL via JSON, not writing Python code!"
273
+ )
274
+ enhanced_description += example
275
+ logger.debug("Enhanced MultiEdit tool description with usage example")
276
+
277
+ # Add more tool examples as needed based on failure patterns in logs
278
+
279
+ return enhanced_description
280
+
281
+
282
+ def _should_ignore_tool(tool_name: str) -> bool:
283
+ """Return True if tool_name matches any configured ignore glob pattern."""
284
+ try:
285
+ from fnmatch import fnmatch
286
+
287
+ from forge.config import config
288
+
289
+ patterns = config.proxy.tool_prefixes_to_ignore
290
+ except Exception as e:
291
+ logger.debug("Cannot load tool ignore config: %s", e)
292
+ return False
293
+ for pattern in patterns:
294
+ if fnmatch(tool_name, pattern):
295
+ return True
296
+ return False
297
+
298
+
299
+ def _model_supports_cache_control(model_name: str) -> bool:
300
+ """Check if model requires explicit cache_control in requests.
301
+
302
+ Anthropic/Bedrock: requires cache_control on content blocks to enable caching.
303
+ OpenAI/Deepseek: automatic caching (≥1024 tokens), no field needed.
304
+ Gemini: separate Context Caching API (not supported here).
305
+
306
+ For non-Anthropic models, cache_control is silently stripped to avoid 400 errors.
307
+ """
308
+ if not model_name:
309
+ return False
310
+ name = model_name.lower()
311
+ return "anthropic/" in name or "claude" in name or "bedrock/anthropic" in name
312
+
313
+
314
+ def convert_anthropic_to_openai(request: MessagesRequest, provider: str = "gemini") -> Dict[str, Any]:
315
+ """Convert Anthropic API request to intermediate OpenAI format.
316
+
317
+ Transforms Anthropic's message-based format into an OpenAI format that's
318
+ easier to process before final conversion to provider-specific format. Handles system messages,
319
+ content blocks, tool calls/results, and various parameter conversions.
320
+
321
+ Args:
322
+ request: The validated Anthropic API request with messages and parameters
323
+ provider: Target provider ("gemini", "openai", "litellm") - affects schema normalization
324
+
325
+ Returns:
326
+ Dict[str, Any]: Request in OpenAI-compatible format with mapped parameters
327
+ """
328
+ openai_messages = []
329
+
330
+ # system_cache_control is preserved and forwarded for Anthropic models only
331
+ system_text = None
332
+ system_cache_control = None
333
+
334
+ if request.system:
335
+ if isinstance(request.system, str):
336
+ system_text = request.system
337
+ else:
338
+ text_parts = []
339
+ for block in request.system:
340
+ if block.type == "text":
341
+ text_parts.append(block.text)
342
+ if block.cache_control and _model_supports_cache_control(request.model):
343
+ system_cache_control = {"type": block.cache_control.type}
344
+ system_text = "\n".join(text_parts) if text_parts else None
345
+
346
+ if system_text:
347
+ if provider in ("openai", "litellm", "openrouter"):
348
+ # Auto-inject cache_control if configured and no explicit cache_control
349
+ if not system_cache_control and _model_supports_cache_control(request.model):
350
+ try:
351
+ from forge.config import config as forge_config
352
+
353
+ provider_cfg = forge_config.proxy.get_provider(forge_config.proxy.preferred_provider)
354
+ if provider_cfg.prompt_caching == "auto_inject":
355
+ estimated_tokens = len(system_text) // 4
356
+ if estimated_tokens >= provider_cfg.auto_cache_min_tokens:
357
+ system_cache_control = {"type": "ephemeral"}
358
+ logger.debug(
359
+ f"Auto-injected cache_control for system prompt "
360
+ f"(~{estimated_tokens} tokens >= {provider_cfg.auto_cache_min_tokens})"
361
+ )
362
+ except RuntimeError:
363
+ logger.debug("Config not loaded, skipping cache_control auto-injection")
364
+
365
+ # Use content block array when cache_control present (Anthropic requirement)
366
+ if system_cache_control:
367
+ system_content = [
368
+ {
369
+ "type": "text",
370
+ "text": system_text,
371
+ "cache_control": system_cache_control,
372
+ }
373
+ ]
374
+ openai_messages.append({"role": "system", "content": system_content})
375
+ else:
376
+ openai_messages.append({"role": "system", "content": system_text})
377
+
378
+ logger.debug(
379
+ f"System prompt added as message for {provider}"
380
+ + (" with cache_control" if system_cache_control else "")
381
+ )
382
+ else:
383
+ # For Gemini: store separately
384
+ logger.debug("System prompt extracted for Vertex SDK.")
385
+ else:
386
+ system_text = None # Ensure it's None if empty
387
+
388
+ for msg in request.messages:
389
+ is_tool_response_message = False
390
+ content_list = []
391
+ tool_calls_list: list[Dict[str, Any]] = []
392
+
393
+ if isinstance(msg.content, str):
394
+ content_list.append({"type": "text", "text": msg.content})
395
+ elif isinstance(msg.content, list):
396
+ for block in msg.content: # type: ignore[assignment] # Pydantic ContentBlock union
397
+ if block.type in ("thinking", "redacted_thinking"):
398
+ # Anthropic thinking blocks appear in --resume history;
399
+ # non-Anthropic providers don't support them — strip for conversion.
400
+ logger.debug("Stripping %s block (unsupported by target provider)", block.type)
401
+ continue
402
+ if block.type == "text":
403
+ text_block: Dict[str, Any] = {"type": "text", "text": block.text}
404
+ if block.cache_control and _model_supports_cache_control(request.model):
405
+ text_block["cache_control"] = {"type": block.cache_control.type}
406
+ content_list.append(text_block)
407
+ elif block.type == "image" and msg.role == "user": # Images only supported for user role
408
+ content_list.append(
409
+ {
410
+ "type": "image_url",
411
+ "image_url": {"url": f"data:{block.source.media_type};base64,{block.source.data}"},
412
+ }
413
+ )
414
+ logger.debug("Image block added to intermediate format.")
415
+ elif block.type == "tool_use" and msg.role == "assistant":
416
+ cleaned_input = (
417
+ sanitize_tool_input(block.name, block.input) if isinstance(block.input, dict) else block.input
418
+ )
419
+ tool_calls_list.append(
420
+ {
421
+ "id": block.id,
422
+ "type": "function",
423
+ "function": {
424
+ "name": block.name,
425
+ "arguments": json.dumps(cleaned_input),
426
+ },
427
+ }
428
+ )
429
+ logger.debug(f"Assistant tool_use '{block.name}' converted to intermediate tool_calls.")
430
+ elif block.type == "tool_result" and msg.role == "user":
431
+ if content_list:
432
+ openai_messages.append({"role": "user", "content": content_list})
433
+ content_list = []
434
+
435
+ tool_content = block.content
436
+ # Ensure content is a string (JSON if possible) for OpenAI format
437
+ if not isinstance(tool_content, str):
438
+ try:
439
+ tool_content = json.dumps(tool_content)
440
+ except Exception:
441
+ tool_content = str(tool_content) # Fallback to string representation
442
+
443
+ openai_messages.append(
444
+ {
445
+ "role": "tool",
446
+ "tool_call_id": block.tool_use_id,
447
+ "content": tool_content,
448
+ }
449
+ )
450
+ logger.debug(f"User tool_result for '{block.tool_use_id}' converted to intermediate tool message.")
451
+ is_tool_response_message = True
452
+ # Don't break - process all tool_result blocks in this message
453
+
454
+ # Flush any remaining content after tool_result blocks
455
+ if is_tool_response_message and content_list:
456
+ openai_messages.append({"role": "user", "content": content_list})
457
+ content_list = []
458
+
459
+ if not is_tool_response_message:
460
+ openai_message: Dict[str, Any] = {"role": msg.role}
461
+ # Simplify content if only text AND no extra metadata (like cache_control)
462
+ first_item = content_list[0] if len(content_list) == 1 else None
463
+ if (
464
+ isinstance(first_item, dict)
465
+ and first_item.get("type") == "text"
466
+ and set(first_item.keys()) == {"type", "text"}
467
+ ):
468
+ openai_message["content"] = first_item.get("text", "")
469
+ elif content_list: # Keep as list for multimodal or when metadata present
470
+ openai_message["content"] = content_list
471
+ else:
472
+ openai_message["content"] = None # Or empty string ""? Let's use None for clarity
473
+
474
+ if tool_calls_list:
475
+ openai_message["tool_calls"] = tool_calls_list
476
+
477
+ if openai_message.get("content") is not None or openai_message.get("tool_calls"):
478
+ openai_messages.append(openai_message)
479
+ elif msg.role == "assistant" and not openai_message.get("content") and not openai_message.get("tool_calls"):
480
+ # Handle case where assistant message might be empty (e.g., after tool call)
481
+ # OpenAI format expects content: null or content: ""
482
+ openai_message["content"] = ""
483
+ openai_messages.append(openai_message)
484
+
485
+ # --- Assemble OpenAI Request Dictionary ---
486
+ # Note: request.model already contains the *mapped* Gemini ID from the validator
487
+ openai_request = {
488
+ "model": request.model,
489
+ "messages": openai_messages,
490
+ "max_tokens": request.max_tokens,
491
+ "stream": request.stream or False,
492
+ }
493
+ if request.temperature is not None:
494
+ openai_request["temperature"] = request.temperature
495
+ if request.top_p is not None:
496
+ openai_request["top_p"] = request.top_p
497
+ if request.top_k is not None:
498
+ openai_request["top_k"] = request.top_k
499
+ if request.stop_sequences:
500
+ openai_request["stop"] = request.stop_sequences
501
+ if request.metadata:
502
+ openai_request["metadata"] = request.metadata
503
+
504
+ if system_text:
505
+ openai_request["system_prompt"] = system_text
506
+
507
+ if request.tools:
508
+ openai_tools = []
509
+ ignored_tool_names = []
510
+ for tool in request.tools:
511
+ if _should_ignore_tool(tool.name):
512
+ ignored_tool_names.append(tool.name)
513
+ continue
514
+
515
+ input_schema = tool.input_schema.model_dump(exclude_unset=True)
516
+ logger.debug(f"Cleaning schema for intermediate tool format: {tool.name}")
517
+ logger.debug(f"Original schema for tool '{tool.name}': {smart_format_str(input_schema)}")
518
+
519
+ tool_schema_details = {
520
+ "tool_name": tool.name,
521
+ "original_schema": input_schema,
522
+ }
523
+
524
+ # Pass through original schema (no normalization needed for OpenAI/LiteLLM)
525
+ cleaned_schema = input_schema
526
+ logger.debug(f"[{provider.upper()}] Using original schema for tool '{tool.name}'")
527
+ asyncio.create_task(
528
+ log_tool_event(
529
+ request_id="schema_" + str(uuid.uuid4())[:8],
530
+ tool_name=tool.name,
531
+ status="attempt",
532
+ stage="openai_request",
533
+ details=tool_schema_details,
534
+ )
535
+ )
536
+
537
+ # Default to an empty object schema when unspecified.
538
+ if "type" not in cleaned_schema:
539
+ cleaned_schema["type"] = "object"
540
+ logger.debug(f"Added missing 'type': 'object' to schema root for tool '{tool.name}'")
541
+ if cleaned_schema.get("type") == "object" and "properties" not in cleaned_schema:
542
+ cleaned_schema["properties"] = {}
543
+ logger.debug(f"Added missing empty 'properties' object for tool '{tool.name}'")
544
+
545
+ enhanced_description = enhance_tool_description(tool.name, tool.description or "", cleaned_schema)
546
+
547
+ openai_tools.append(
548
+ {
549
+ "type": "function",
550
+ "function": {
551
+ "name": tool.name,
552
+ "description": enhanced_description,
553
+ "parameters": cleaned_schema,
554
+ },
555
+ }
556
+ )
557
+
558
+ if openai_tools:
559
+ openai_request["tools"] = openai_tools
560
+ logger.debug(f"Converted {len(openai_tools)} tools to intermediate OpenAI format.")
561
+
562
+ if len(ignored_tool_names) > 0:
563
+ logger.info(f"Skipping {len(ignored_tool_names)} tool(s) due to TOOL_PREFIXES_TO_IGNORE")
564
+ ignored_names = ", ".join(ignored_tool_names)
565
+ logger.debug(f"Skipped tool(s): {ignored_names}")
566
+
567
+ # Note: Vertex has a different `tool_config`, this mapping might be approximate
568
+ if request.tool_choice:
569
+ choice_type = request.tool_choice.get("type")
570
+ if choice_type == "any" or choice_type == "auto":
571
+ openai_request["tool_choice"] = "auto"
572
+ elif choice_type == "tool" and "name" in request.tool_choice:
573
+ openai_request["tool_choice"] = {
574
+ "type": "function",
575
+ "function": {"name": request.tool_choice["name"]},
576
+ }
577
+ else: # Includes 'none' or other types
578
+ openai_request["tool_choice"] = "none"
579
+ logger.debug(f"Converted tool_choice '{choice_type}' to intermediate format '{openai_request['tool_choice']}'.")
580
+
581
+ logger.debug(f"Intermediate OpenAI Request Prepared: {smart_format_str(openai_request)}")
582
+ return openai_request
583
+
584
+
585
+ def convert_openai_to_anthropic(
586
+ response_chunk: Union[Dict, Any], original_model_name: Optional[str] = None
587
+ ) -> Optional[MessagesResponse]:
588
+ """Convert OpenAI-format response to Anthropic API response format.
589
+
590
+ Transforms a completed (non-streaming) response from the intermediate OpenAI
591
+ format back to the Anthropic API response format expected by Claude clients.
592
+ Handles content blocks, tool calls, and finish reason mapping.
593
+
594
+ Args:
595
+ response_chunk: Response in OpenAI format from the intermediate conversion
596
+ original_model_name: The original Claude model name requested by the client
597
+
598
+ Returns:
599
+ Optional[MessagesResponse]: Response in Anthropic format, or None if conversion fails
600
+ """
601
+ request_id = response_chunk.get("request_id", "unknown") # Get request ID if passed through
602
+ logger.info(f"[{request_id}] Converting adapted OpenAI response to Anthropic MessagesResponse format.")
603
+ try:
604
+ # Ensure input is a dictionary
605
+ resp_dict = {}
606
+ if isinstance(response_chunk, dict):
607
+ resp_dict = response_chunk
608
+ elif hasattr(response_chunk, "model_dump"):
609
+ resp_dict = response_chunk.model_dump()
610
+ else:
611
+ try:
612
+ resp_dict = vars(response_chunk) # Fallback for simple objects
613
+ except TypeError as e:
614
+ logger.error(f"[{request_id}] Cannot convert response_chunk of type {type(response_chunk)} to dict.")
615
+ raise ValueError(
616
+ "Input response_chunk is not convertible to dict.",
617
+ ) from e
618
+
619
+ resp_id = resp_dict.get("id") or f"msg_{uuid.uuid4().hex[:24]}"
620
+ choices = resp_dict.get("choices", [])
621
+ usage_data = resp_dict.get("usage", {}) or {}
622
+
623
+ anthropic_content: List[ContentBlock] = []
624
+ stop_reason_map = {
625
+ "stop": "end_turn",
626
+ "length": "max_tokens",
627
+ "tool_calls": "tool_use",
628
+ "content_filter": "content_filtered",
629
+ }
630
+ openai_finish_reason = "stop" # Default
631
+
632
+ if choices:
633
+ choice = choices[0] # Assume only one choice
634
+ openai_finish_reason = choice.get("finish_reason", "stop")
635
+ message = choice.get("message", {}) or {}
636
+
637
+ text_content = message.get("content")
638
+ tool_calls = message.get("tool_calls")
639
+
640
+ if text_content and isinstance(text_content, str):
641
+ anthropic_content.append(ContentBlockText(type="text", text=text_content))
642
+ logger.debug(f"[{request_id}] Added text content block.")
643
+
644
+ if tool_calls and isinstance(tool_calls, list):
645
+ for tc in tool_calls:
646
+ if isinstance(tc, dict) and tc.get("type") == "function":
647
+ func = tc.get("function", {})
648
+ args_str = func.get("arguments", "{}")
649
+ tool_id = tc.get("id", f"toolu_{uuid.uuid4().hex[:12]}")
650
+ tool_name = func.get("name", "unknown_tool")
651
+
652
+ try:
653
+ args_input = json.loads(args_str)
654
+ stripped_params: list[str] = []
655
+ if isinstance(args_input, dict):
656
+ args_input, stripped_params = sanitize_tool_input_with_report(tool_name, args_input)
657
+ _schedule_tool_args_sanitized_event(
658
+ request_id,
659
+ tool_name,
660
+ stripped_params,
661
+ tool_id=tool_id,
662
+ streaming=False,
663
+ )
664
+ except json.JSONDecodeError:
665
+ logger.warning(
666
+ f"[{request_id}] Non-streaming: Failed to parse tool arguments JSON: {args_str}. Sending raw string."
667
+ )
668
+ args_input = {"raw_arguments": args_str}
669
+ except Exception as e:
670
+ logger.error(
671
+ f"[{request_id}] Non-streaming: Error parsing tool arguments: {e}. Args: {args_str}"
672
+ )
673
+ args_input = {
674
+ "error_parsing_arguments": str(e),
675
+ "raw_arguments": args_str,
676
+ }
677
+
678
+ anthropic_content.append(
679
+ ContentBlockToolUse(
680
+ type="tool_use",
681
+ id=tool_id,
682
+ name=tool_name,
683
+ input=args_input,
684
+ )
685
+ )
686
+ logger.debug(f"[{request_id}] Added tool_use content block: id={tool_id}, name={tool_name}")
687
+
688
+ asyncio.create_task(
689
+ log_tool_event(
690
+ request_id=request_id,
691
+ tool_name=tool_name,
692
+ status="success",
693
+ stage="client_response",
694
+ details={"tool_id": tool_id, "streaming": False},
695
+ )
696
+ )
697
+ else:
698
+ logger.warning(
699
+ f"[{request_id}] Skipping conversion of non-function tool_call in response: {tc}"
700
+ )
701
+
702
+ # Ensure there's always at least one content block (even if empty text)
703
+ # Anthropic requires content to be a non-empty list.
704
+ if not anthropic_content:
705
+ logger.warning(f"[{request_id}] No content generated, adding empty text block.")
706
+ anthropic_content.append(ContentBlockText(type="text", text=""))
707
+
708
+ anthropic_stop_reason = stop_reason_map.get(openai_finish_reason, "end_turn")
709
+ logger.debug(
710
+ f"[{request_id}] Mapped finish_reason '{openai_finish_reason}' to stop_reason '{anthropic_stop_reason}'."
711
+ )
712
+
713
+ model_name = original_model_name if original_model_name else "claude-3.7-sonnet"
714
+
715
+ return MessagesResponse(
716
+ id=resp_id,
717
+ model=model_name,
718
+ type="message",
719
+ role="assistant",
720
+ content=anthropic_content,
721
+ stop_reason=anthropic_stop_reason, # type: ignore[arg-type] # values from controlled stop_reason_map
722
+ stop_sequence=None, # not returned in OpenAI format
723
+ usage=Usage(
724
+ input_tokens=usage_data.get("prompt_tokens", 0),
725
+ output_tokens=usage_data.get("completion_tokens", 0),
726
+ ),
727
+ )
728
+ except Exception as e:
729
+ logger.error(
730
+ f"[{request_id}] Failed to convert adapted OpenAI response to Anthropic format: {e}",
731
+ exc_info=True,
732
+ )
733
+ model_name = original_model_name if original_model_name else "claude-3.7-sonnet"
734
+
735
+ return MessagesResponse(
736
+ id=f"error_{uuid.uuid4().hex[:24]}",
737
+ model=model_name,
738
+ type="message",
739
+ role="assistant",
740
+ content=[ContentBlockText(type="text", text=f"Error processing model response: {str(e)}")],
741
+ stop_reason="end_turn", # Or maybe a custom error reason?
742
+ usage=Usage(input_tokens=0, output_tokens=0),
743
+ )
744
+
745
+
746
+ async def convert_openai_to_anthropic_sse(
747
+ response_generator: AsyncGenerator[Dict[str, Any], None],
748
+ request: MessagesRequest,
749
+ request_id: str,
750
+ on_complete: Optional["_OnCompleteCallback"] = None,
751
+ ):
752
+ """Convert OpenAI streaming format to Anthropic Server-Sent Events (SSE) format.
753
+
754
+ Transforms a stream of OpenAI-format chunks into the Anthropic streaming format
755
+ using Server-Sent Events. Handles the complex event structure required by Anthropic:
756
+ - message_start/stop events
757
+ - content_block_start/stop events
758
+ - content_block_delta events
759
+ - message_delta events with finish information
760
+ - ping events for connection maintenance
761
+
762
+ Args:
763
+ response_generator: Async generator yielding OpenAI-format response chunks
764
+ request: The original MessagesRequest from the client
765
+ request_id: Unique identifier for logging and tracking this request
766
+
767
+ Yields:
768
+ SSE-formatted text chunks following the Anthropic streaming protocol
769
+ """
770
+ message_id = f"msg_{uuid.uuid4().hex[:24]}"
771
+ response_model_name = request.original_model_name or request.model # fallback to mapped ID if original is missing
772
+ logger.info(
773
+ f"[{request_id}] Starting Anthropic SSE stream conversion (message {message_id}, model: {response_model_name})"
774
+ )
775
+
776
+ # --- Stream Initialization ---
777
+ start_event_data = {
778
+ "type": "message_start",
779
+ "message": {
780
+ "id": message_id,
781
+ "type": "message",
782
+ "role": "assistant",
783
+ "model": response_model_name,
784
+ "content": [], # Content starts empty
785
+ "stop_reason": None,
786
+ "stop_sequence": None,
787
+ "usage": {"input_tokens": 0, "output_tokens": 0},
788
+ },
789
+ }
790
+ yield f"event: message_start\ndata: {json.dumps(start_event_data)}\n\n"
791
+ logger.debug(f"[{request_id}] Sent message_start")
792
+
793
+ yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n"
794
+ logger.debug(f"[{request_id}] Sent initial ping")
795
+
796
+ # --- Stream Processing ---
797
+ content_block_index = -1
798
+ current_block_type: Optional[Literal["text", "tool_use"]] = None
799
+ text_started = False
800
+ tool_calls_buffer: Dict[int, Dict[str, Any]] = (
801
+ {}
802
+ ) # {openai_tc_index: {id: str, name: str, args: str, block_idx: int}}
803
+ final_usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "cached_tokens": 0}
804
+ _stream_failed = False
805
+ _stream_error_type: Optional[str] = None
806
+ final_stop_reason: Optional[str] = None
807
+
808
+ def _build_sanitized_args_delta(tool_info: Dict[str, Any]) -> dict[str, Any] | None:
809
+ block_idx = tool_info.get("block_idx")
810
+ args_json = tool_info.get("args", "")
811
+ if block_idx is None or not args_json:
812
+ return None
813
+ partial_json, stripped_params = _sanitize_tool_arguments_json_with_report(tool_info.get("name"), args_json)
814
+ if not partial_json:
815
+ return None
816
+ _schedule_tool_args_sanitized_event(
817
+ request_id,
818
+ tool_info.get("name"),
819
+ stripped_params,
820
+ tool_id=tool_info.get("id"),
821
+ streaming=True,
822
+ block_index=block_idx,
823
+ )
824
+ return {
825
+ "type": "content_block_delta",
826
+ "index": block_idx,
827
+ "delta": {
828
+ "type": "input_json_delta",
829
+ "partial_json": partial_json,
830
+ },
831
+ }
832
+
833
+ def _build_tool_block_close_events() -> list[tuple[str, dict[str, Any]]]:
834
+ events: list[tuple[str, dict[str, Any]]] = []
835
+ started_tools = [
836
+ tool_info for tool_info in tool_calls_buffer.values() if tool_info.get("block_idx") is not None
837
+ ]
838
+ for tool_info in sorted(started_tools, key=lambda item: item["block_idx"]):
839
+ if tool_info.get("buffer_args"):
840
+ args_delta_event = _build_sanitized_args_delta(tool_info)
841
+ if args_delta_event is not None:
842
+ events.append(("content_block_delta", args_delta_event))
843
+ events.append(
844
+ (
845
+ "content_block_stop",
846
+ {
847
+ "type": "content_block_stop",
848
+ "index": tool_info["block_idx"],
849
+ },
850
+ )
851
+ )
852
+ return events
853
+
854
+ stop_reason_map = {
855
+ "stop": "end_turn",
856
+ "length": "max_tokens",
857
+ "tool_calls": "tool_use",
858
+ "content_filter": "content_filtered",
859
+ }
860
+
861
+ try:
862
+ async for chunk in response_generator:
863
+ logger.debug(f"[{request_id}] Processing adapted OpenAI Chunk: {chunk}")
864
+
865
+ if not isinstance(chunk, dict):
866
+ logger.warning(f"[{request_id}] Skipping invalid chunk format: {type(chunk)}")
867
+ continue
868
+
869
+ # Handle error chunks from stream generator.
870
+ # stream_generator() catches ToolCallError/ProxyStreamError and yields
871
+ # error dicts instead of raising — so no exception reaches the except
872
+ # block below. We must set the failure flag here for metrics.
873
+ if "error" in chunk:
874
+ error_data = chunk["error"]
875
+ _stream_failed = True
876
+ _stream_error_type = error_data.get("type", "stream_error")
877
+ error_event = {
878
+ "type": "error",
879
+ "error": {
880
+ "type": error_data.get("type", "api_error"),
881
+ "message": error_data.get("message", "Unknown streaming error"),
882
+ },
883
+ }
884
+ yield f"event: error\ndata: {json.dumps(error_event)}\n\n"
885
+ return # End stream after error
886
+
887
+ # --- Check for usage-only chunk (LiteLLM sends usage in chunk with empty choices) ---
888
+ chunk_usage = chunk.get("usage")
889
+ if chunk_usage and isinstance(chunk_usage, dict):
890
+ prompt_tokens = chunk_usage.get("prompt_tokens", 0)
891
+ completion_tokens = chunk_usage.get("completion_tokens", 0)
892
+
893
+ if prompt_tokens > 0 and final_usage["input_tokens"] == 0:
894
+ # First time seeing input_tokens - send immediately
895
+ final_usage["input_tokens"] = prompt_tokens
896
+ usage_update_event = {
897
+ "type": "message_delta",
898
+ "delta": {},
899
+ "usage": {"input_tokens": prompt_tokens},
900
+ }
901
+ yield f"event: message_delta\ndata: {json.dumps(usage_update_event)}\n\n"
902
+ logger.debug(f"[{request_id}] Sent immediate message_delta with input_tokens={prompt_tokens}")
903
+
904
+ if completion_tokens > 0:
905
+ final_usage["output_tokens"] = completion_tokens
906
+ logger.debug(f"[{request_id}] Updated output_tokens={completion_tokens}")
907
+
908
+ # Accumulate cached_tokens (propagated from client_adapter since Step 2)
909
+ cached_tokens = chunk_usage.get("cached_tokens", 0)
910
+ if cached_tokens > 0:
911
+ final_usage["cached_tokens"] = cached_tokens
912
+
913
+ logger.debug(f"[{request_id}] Updated usage from chunk: {final_usage}")
914
+
915
+ choices = chunk.get("choices", [])
916
+ if not choices or not isinstance(choices, list):
917
+ # Skip chunk if no choices AND no usage (truly empty chunk)
918
+ if not chunk_usage:
919
+ logger.warning(f"[{request_id}] Skipping chunk with missing or invalid 'choices': {chunk}")
920
+ continue
921
+
922
+ if len(choices) == 0:
923
+ # Empty choices is OK if we just processed usage
924
+ if chunk_usage:
925
+ logger.debug(f"[{request_id}] Processed usage-only chunk (empty choices)")
926
+ continue
927
+ else:
928
+ logger.warning(f"[{request_id}] Skipping chunk with empty 'choices' list: {chunk}")
929
+ continue
930
+
931
+ choice = choices[0]
932
+
933
+ if not isinstance(choice, dict):
934
+ logger.warning(
935
+ f"[{request_id}] Skipping chunk with invalid choice format (type={type(choice)}): {choice}"
936
+ )
937
+ continue
938
+
939
+ delta = choice.get("delta", {}) or {}
940
+ finish_reason = choice.get("finish_reason")
941
+
942
+ # --- Process Delta Content ---
943
+ text_delta = delta.get("content")
944
+ tool_calls_delta = delta.get("tool_calls")
945
+
946
+ if text_delta and isinstance(text_delta, str):
947
+ # If currently in a tool_use block, stop it first
948
+ if current_block_type == "tool_use":
949
+ if tool_calls_buffer:
950
+ for event_name, event_data in _build_tool_block_close_events():
951
+ yield f"event: {event_name}\ndata: {json.dumps(event_data)}\n\n"
952
+ tool_calls_buffer.clear()
953
+ logger.debug(f"[{request_id}] Stopped tool block(s) due to incoming text.")
954
+ else:
955
+ logger.warning(
956
+ f"[{request_id}] current_block_type is 'tool_use' but tool_calls_buffer is empty"
957
+ )
958
+ current_block_type = None
959
+
960
+ if not text_started:
961
+ content_block_index += 1
962
+ current_block_type = "text"
963
+ text_started = True
964
+ start_event = {
965
+ "type": "content_block_start",
966
+ "index": content_block_index,
967
+ "content_block": {
968
+ "type": "text",
969
+ "text": "",
970
+ },
971
+ }
972
+ yield f"event: content_block_start\ndata: {json.dumps(start_event)}\n\n"
973
+ logger.debug(f"[{request_id}] Started text block {content_block_index}")
974
+
975
+ delta_event = {
976
+ "type": "content_block_delta",
977
+ "index": content_block_index,
978
+ "delta": {"type": "text_delta", "text": text_delta},
979
+ }
980
+ yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
981
+ logger.debug(f"[{request_id}] Sent text delta: '{text_delta[:50]}...'")
982
+
983
+ if tool_calls_delta and isinstance(tool_calls_delta, list):
984
+ logger.debug(f"[{request_id}] Received tool_calls_delta: {tool_calls_delta}")
985
+ if current_block_type == "text" and text_started:
986
+ yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
987
+ logger.debug(f"[{request_id}] Stopped text block {content_block_index} due to incoming tool call.")
988
+ current_block_type = None
989
+ text_started = False
990
+
991
+ for tc_delta in tool_calls_delta:
992
+ if not isinstance(tc_delta, dict):
993
+ continue # Skip invalid format
994
+
995
+ # OpenAI tool index (usually 0 for the first tool, 1 for second, etc.)
996
+ # We rely on this index to aggregate arguments for the *same* tool call.
997
+ tc_openai_index = tc_delta.get("index", 0)
998
+ tc_id = tc_delta.get("id")
999
+ func_delta = tc_delta.get("function", {}) or {}
1000
+ func_name = func_delta.get("name")
1001
+ args_delta = func_delta.get("arguments")
1002
+
1003
+ # --- Start a new tool_use block if necessary ---
1004
+ if tc_openai_index not in tool_calls_buffer:
1005
+ if tc_id and func_name:
1006
+ content_block_index += 1
1007
+ current_block_type = "tool_use"
1008
+ tool_calls_buffer[tc_openai_index] = {
1009
+ "id": tc_id,
1010
+ "name": func_name,
1011
+ "args": "",
1012
+ "block_idx": content_block_index,
1013
+ "buffer_args": _should_buffer_streaming_tool_args(func_name),
1014
+ }
1015
+ start_event = {
1016
+ "type": "content_block_start",
1017
+ "index": content_block_index,
1018
+ "content_block": {
1019
+ "type": "tool_use",
1020
+ "id": tc_id,
1021
+ "name": func_name,
1022
+ "input": {},
1023
+ },
1024
+ }
1025
+ yield f"event: content_block_start\ndata: {json.dumps(start_event)}\n\n"
1026
+ logger.debug(
1027
+ f"[{request_id}] Started tool_use block {content_block_index} (id: {tc_id}, name: {func_name})"
1028
+ )
1029
+
1030
+ # Log successful tool event for client in streaming
1031
+ asyncio.create_task(
1032
+ log_tool_event(
1033
+ request_id=request_id,
1034
+ tool_name=func_name,
1035
+ status="success",
1036
+ stage="client_response",
1037
+ details={
1038
+ "tool_id": tc_id,
1039
+ "streaming": True,
1040
+ "block_index": content_block_index,
1041
+ },
1042
+ )
1043
+ )
1044
+ # ID can arrive before name in some providers; buffer until name arrives
1045
+ elif tc_id and not func_name:
1046
+ tool_calls_buffer[tc_openai_index] = {
1047
+ "id": tc_id,
1048
+ "name": None,
1049
+ "args": "",
1050
+ "block_idx": None,
1051
+ "buffer_args": False,
1052
+ }
1053
+ logger.debug(
1054
+ f"[{request_id}] Received tool ID {tc_id} first for index {tc_openai_index}, waiting for name."
1055
+ )
1056
+ else:
1057
+ logger.warning(
1058
+ f"[{request_id}] Cannot start tool block for index {tc_openai_index} without ID and/or Name. Delta: {tc_delta}"
1059
+ )
1060
+ continue # Cannot start block yet
1061
+
1062
+ # --- If name arrives later for an existing ID ---
1063
+ elif (
1064
+ tc_openai_index in tool_calls_buffer
1065
+ and func_name
1066
+ and tool_calls_buffer[tc_openai_index]["name"] is None
1067
+ ):
1068
+ tool_info = tool_calls_buffer[tc_openai_index]
1069
+ if tool_info["id"] == tc_id: # Ensure ID matches if provided again
1070
+ content_block_index += 1
1071
+ current_block_type = "tool_use"
1072
+ tool_info["name"] = func_name
1073
+ tool_info["block_idx"] = content_block_index
1074
+ tool_info["buffer_args"] = _should_buffer_streaming_tool_args(func_name)
1075
+ start_event = {
1076
+ "type": "content_block_start",
1077
+ "index": content_block_index,
1078
+ "content_block": {
1079
+ "type": "tool_use",
1080
+ "id": tool_info["id"],
1081
+ "name": func_name,
1082
+ "input": {},
1083
+ },
1084
+ }
1085
+ yield f"event: content_block_start\ndata: {json.dumps(start_event)}\n\n"
1086
+ logger.debug(
1087
+ f"[{request_id}] Started tool_use block {content_block_index} for index {tc_openai_index} after receiving name ({func_name})"
1088
+ )
1089
+ else:
1090
+ logger.warning(
1091
+ f"[{request_id}] Received name '{func_name}' for index {tc_openai_index}, but ID mismatch (expected {tool_info['id']}, got {tc_id}). Skipping."
1092
+ )
1093
+
1094
+ # --- Append argument fragments if block has started ---
1095
+ if (
1096
+ tc_openai_index in tool_calls_buffer
1097
+ and args_delta
1098
+ and tool_calls_buffer[tc_openai_index]["block_idx"] is not None
1099
+ ):
1100
+ tool_info = tool_calls_buffer[tc_openai_index]
1101
+ tool_info["args"] += args_delta
1102
+ if tool_info.get("buffer_args"):
1103
+ logger.debug(
1104
+ f"[{request_id}] Buffered tool args delta for block {tool_info['block_idx']}: '{args_delta[:50]}...'"
1105
+ )
1106
+ else:
1107
+ delta_event = {
1108
+ "type": "content_block_delta",
1109
+ "index": tool_info["block_idx"],
1110
+ "delta": {
1111
+ "type": "input_json_delta",
1112
+ "partial_json": args_delta,
1113
+ },
1114
+ }
1115
+ yield f"event: content_block_delta\ndata: {json.dumps(delta_event)}\n\n"
1116
+ logger.debug(
1117
+ f"[{request_id}] Sent tool args delta for block {tool_info['block_idx']}: '{args_delta[:50]}...'"
1118
+ )
1119
+
1120
+ # --- Process Finish Reason ---
1121
+ if finish_reason:
1122
+ final_stop_reason = stop_reason_map.get(finish_reason, "end_turn")
1123
+ logger.info(
1124
+ f"[{request_id}] Received final finish_reason: '{finish_reason}' -> Mapped to stop_reason: '{final_stop_reason}'"
1125
+ )
1126
+ break
1127
+
1128
+ # --- End of Stream ---
1129
+ if current_block_type == "text" and text_started:
1130
+ yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
1131
+ logger.debug(f"[{request_id}] Stopped final text block {content_block_index}")
1132
+ elif current_block_type == "tool_use":
1133
+ if tool_calls_buffer:
1134
+ for event_name, event_data in _build_tool_block_close_events():
1135
+ logger.debug(f"[{request_id}] Yielding {event_name} for tool_use: {json.dumps(event_data)}")
1136
+ yield f"event: {event_name}\ndata: {json.dumps(event_data)}\n\n"
1137
+ logger.debug(f"[{request_id}] Stopped final tool_use block(s)")
1138
+ else:
1139
+ logger.warning(
1140
+ f"[{request_id}] Current block type is tool_use, but buffer is empty. Cannot stop block."
1141
+ )
1142
+
1143
+ if final_stop_reason is None:
1144
+ logger.warning(
1145
+ f"[{request_id}] Stream finished without receiving a finish_reason. Defaulting to 'end_turn'."
1146
+ )
1147
+ final_stop_reason = "end_turn"
1148
+
1149
+ final_delta_event = {
1150
+ "type": "message_delta",
1151
+ "delta": {
1152
+ "stop_reason": final_stop_reason,
1153
+ "stop_sequence": None, # not returned in OpenAI stream format
1154
+ },
1155
+ }
1156
+
1157
+ # Only include usage if we have valid data (not zeros)
1158
+ # Sending zeros overwrites any previously displayed usage in Claude Code UI
1159
+ input_tokens = final_usage.get("input_tokens", 0)
1160
+ output_tokens = final_usage.get("output_tokens", 0)
1161
+ if input_tokens > 0 or output_tokens > 0:
1162
+ final_delta_event["usage"] = {
1163
+ "input_tokens": input_tokens,
1164
+ "output_tokens": output_tokens,
1165
+ }
1166
+
1167
+ logger.debug(f"[{request_id}] Yielding final message_delta: {json.dumps(final_delta_event)}")
1168
+ yield f"event: message_delta\ndata: {json.dumps(final_delta_event)}\n\n"
1169
+ logger.debug(
1170
+ f"[{request_id}] Sent final message_delta (stop_reason: {final_stop_reason}, "
1171
+ f"usage: {final_delta_event.get('usage', 'not included')})"
1172
+ )
1173
+
1174
+ stop_event_data = {"type": "message_stop"}
1175
+ logger.debug(f"[{request_id}] Yielding message_stop: {json.dumps(stop_event_data)}")
1176
+ yield f"event: message_stop\ndata: {json.dumps(stop_event_data)}\n\n"
1177
+ logger.debug(f"[{request_id}] Sent message_stop")
1178
+
1179
+ except Exception as e:
1180
+ _stream_failed = True
1181
+ _stream_error_type = "internal_error"
1182
+ logger.error(
1183
+ f"[{request_id}] Error during Anthropic SSE stream conversion: {e}, "
1184
+ f"Full traceback:\n{traceback.format_exc()}"
1185
+ )
1186
+ try:
1187
+ error_payload = {
1188
+ "type": "error",
1189
+ "error": {
1190
+ "type": "internal_server_error",
1191
+ "message": "Stream processing error",
1192
+ },
1193
+ }
1194
+ yield f"event: error\ndata: {json.dumps(error_payload)}\n\n"
1195
+ # Always send message_stop after an error
1196
+ yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
1197
+ logger.debug(f"[{request_id}] Sent error event and message_stop after exception.")
1198
+ except Exception as e2:
1199
+ logger.error(f"[{request_id}] Failed to send error event to client: {e2}")
1200
+ finally:
1201
+ logger.info(f"[{request_id}] Anthropic SSE stream conversion finished.")
1202
+ if on_complete is not None:
1203
+ try:
1204
+ on_complete(final_usage, _stream_failed, _stream_error_type)
1205
+ except Exception:
1206
+ logger.debug(f"[{request_id}] on_complete callback failed", exc_info=True)