multi-forge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (311) hide show
  1. forge/__init__.py +3 -0
  2. forge/_extensions/agents/.gitkeep +0 -0
  3. forge/_extensions/commands/.gitkeep +0 -0
  4. forge/_extensions/skills/analyze/SKILL.md +87 -0
  5. forge/_extensions/skills/challenge/SKILL.md +91 -0
  6. forge/_extensions/skills/consensus/SKILL.md +120 -0
  7. forge/_extensions/skills/consensus/resources/code_consensus_evaluation.md +94 -0
  8. forge/_extensions/skills/consensus/resources/consensus_evaluation.md +70 -0
  9. forge/_extensions/skills/consensus/resources/synthesis.md +101 -0
  10. forge/_extensions/skills/debate/SKILL.md +116 -0
  11. forge/_extensions/skills/debate/resources/code_debate_evaluation.md +101 -0
  12. forge/_extensions/skills/debate/resources/debate_evaluation.md +90 -0
  13. forge/_extensions/skills/panel/SKILL.md +141 -0
  14. forge/_extensions/skills/panel/resources/synthesis.md +103 -0
  15. forge/_extensions/skills/qa/SKILL.md +704 -0
  16. forge/_extensions/skills/qa/resources/checklist/0-enable.md +78 -0
  17. forge/_extensions/skills/qa/resources/checklist/1-preflight.md +24 -0
  18. forge/_extensions/skills/qa/resources/checklist/10-resume.md +143 -0
  19. forge/_extensions/skills/qa/resources/checklist/11-config.md +150 -0
  20. forge/_extensions/skills/qa/resources/checklist/12-search.md +58 -0
  21. forge/_extensions/skills/qa/resources/checklist/13-guard.md +237 -0
  22. forge/_extensions/skills/qa/resources/checklist/14-workflow.md +305 -0
  23. forge/_extensions/skills/qa/resources/checklist/15-skills.md +155 -0
  24. forge/_extensions/skills/qa/resources/checklist/16-handoff.md +224 -0
  25. forge/_extensions/skills/qa/resources/checklist/17-info.md +50 -0
  26. forge/_extensions/skills/qa/resources/checklist/18-disable.md +84 -0
  27. forge/_extensions/skills/qa/resources/checklist/19-uninstall.md +146 -0
  28. forge/_extensions/skills/qa/resources/checklist/2-extensions.md +188 -0
  29. forge/_extensions/skills/qa/resources/checklist/20-cleanup.md +36 -0
  30. forge/_extensions/skills/qa/resources/checklist/3-auth.md +234 -0
  31. forge/_extensions/skills/qa/resources/checklist/4-proxy.md +481 -0
  32. forge/_extensions/skills/qa/resources/checklist/5-session.md +541 -0
  33. forge/_extensions/skills/qa/resources/checklist/6-hooks.md +275 -0
  34. forge/_extensions/skills/qa/resources/checklist/7-costs.md +309 -0
  35. forge/_extensions/skills/qa/resources/checklist/8-status-line.md +174 -0
  36. forge/_extensions/skills/qa/resources/checklist/9-direct-commands.md +146 -0
  37. forge/_extensions/skills/qa/resources/checklist.md +103 -0
  38. forge/_extensions/skills/qa/resources/report-template.md +62 -0
  39. forge/_extensions/skills/qa/scripts/start-container.sh +529 -0
  40. forge/_extensions/skills/qa/scripts/walkthrough-state.py +1137 -0
  41. forge/_extensions/skills/review/SKILL.md +125 -0
  42. forge/_extensions/skills/review/references/claude-4.6.md +474 -0
  43. forge/_extensions/skills/review/references/claude-4.7.md +710 -0
  44. forge/_extensions/skills/review/references/gemini-3.1.md +546 -0
  45. forge/_extensions/skills/review/references/gpt-5.5.md +490 -0
  46. forge/_extensions/skills/review/references/skills-writing-guide.md +1588 -0
  47. forge/_extensions/skills/review/resources/code-anthropic.md +160 -0
  48. forge/_extensions/skills/review/resources/code-gemini.md +184 -0
  49. forge/_extensions/skills/review/resources/code-openai.md +203 -0
  50. forge/_extensions/skills/review/resources/code.md +160 -0
  51. forge/_extensions/skills/review-docs/SKILL.md +121 -0
  52. forge/_extensions/skills/review-docs/resources/docs-anthropic.md +170 -0
  53. forge/_extensions/skills/review-docs/resources/docs-gemini.md +204 -0
  54. forge/_extensions/skills/review-docs/resources/docs-openai.md +231 -0
  55. forge/_extensions/skills/review-docs/resources/docs.md +170 -0
  56. forge/_extensions/skills/smoke-test/SKILL.md +27 -0
  57. forge/_extensions/skills/smoke-test/scripts/smoke-test.sh +118 -0
  58. forge/_extensions/skills/understand/SKILL.md +148 -0
  59. forge/_extensions/skills/understand/resources/code-anthropic.md +163 -0
  60. forge/_extensions/skills/understand/resources/code-gemini.md +194 -0
  61. forge/_extensions/skills/understand/resources/code-openai.md +181 -0
  62. forge/_extensions/skills/understand/resources/code.md +163 -0
  63. forge/_extensions/skills/understand/resources/docs-anthropic.md +177 -0
  64. forge/_extensions/skills/understand/resources/docs-gemini.md +202 -0
  65. forge/_extensions/skills/understand/resources/docs-openai.md +191 -0
  66. forge/_extensions/skills/understand/resources/docs.md +177 -0
  67. forge/_extensions/skills/walkthrough/SKILL.md +599 -0
  68. forge/_extensions/skills/walkthrough/resources/checklist.md +765 -0
  69. forge/_extensions/skills/walkthrough/scripts/run-in-repo.sh +118 -0
  70. forge/_extensions/skills/walkthrough/scripts/setup-test-repo.sh +198 -0
  71. forge/_extensions/skills/walkthrough/scripts/walkthrough-state.py +1137 -0
  72. forge/backend/__init__.py +174 -0
  73. forge/backend/adapters/__init__.py +38 -0
  74. forge/backend/adapters/litellm.py +158 -0
  75. forge/backend/creation.py +89 -0
  76. forge/backend/registry.py +178 -0
  77. forge/cli/__init__.py +16 -0
  78. forge/cli/auth.py +483 -0
  79. forge/cli/backend.py +298 -0
  80. forge/cli/claude.py +411 -0
  81. forge/cli/config_cmd.py +303 -0
  82. forge/cli/extensions.py +1001 -0
  83. forge/cli/gc.py +165 -0
  84. forge/cli/guard.py +1018 -0
  85. forge/cli/guards.py +106 -0
  86. forge/cli/handoff.py +110 -0
  87. forge/cli/hooks/__init__.py +36 -0
  88. forge/cli/hooks/_group.py +20 -0
  89. forge/cli/hooks/_helpers.py +149 -0
  90. forge/cli/hooks/commands.py +1677 -0
  91. forge/cli/hooks/direct_commands.py +1304 -0
  92. forge/cli/hooks/install.py +232 -0
  93. forge/cli/hooks/policy.py +151 -0
  94. forge/cli/hooks/read_hygiene.py +74 -0
  95. forge/cli/hooks/verification.py +370 -0
  96. forge/cli/logs.py +406 -0
  97. forge/cli/main.py +292 -0
  98. forge/cli/proxy.py +1821 -0
  99. forge/cli/proxy_costs.py +313 -0
  100. forge/cli/search.py +416 -0
  101. forge/cli/session.py +892 -0
  102. forge/cli/session_addendum.py +81 -0
  103. forge/cli/session_fork.py +750 -0
  104. forge/cli/session_handoff.py +141 -0
  105. forge/cli/session_lifecycle.py +2053 -0
  106. forge/cli/session_manage.py +1336 -0
  107. forge/cli/session_memory.py +201 -0
  108. forge/cli/status_line.py +1398 -0
  109. forge/cli/workflow.py +1964 -0
  110. forge/config/__init__.py +110 -0
  111. forge/config/dataclass_utils.py +88 -0
  112. forge/config/defaults/__init__.py +0 -0
  113. forge/config/defaults/backends/__init__.py +0 -0
  114. forge/config/defaults/backends/litellm.yaml +196 -0
  115. forge/config/defaults/templates/__init__.py +0 -0
  116. forge/config/defaults/templates/litellm-anthropic-local.yaml +33 -0
  117. forge/config/defaults/templates/litellm-anthropic.yaml +24 -0
  118. forge/config/defaults/templates/litellm-gemini-flash-local.yaml +37 -0
  119. forge/config/defaults/templates/litellm-gemini-local.yaml +32 -0
  120. forge/config/defaults/templates/litellm-gemini-test.yaml +34 -0
  121. forge/config/defaults/templates/litellm-gemini.yaml +21 -0
  122. forge/config/defaults/templates/litellm-openai-codex-local.yaml +36 -0
  123. forge/config/defaults/templates/litellm-openai-local.yaml +38 -0
  124. forge/config/defaults/templates/litellm-openai.yaml +28 -0
  125. forge/config/defaults/templates/openrouter-anthropic.yaml +23 -0
  126. forge/config/defaults/templates/openrouter-deepseek.yaml +26 -0
  127. forge/config/defaults/templates/openrouter-gemini-flash.yaml +26 -0
  128. forge/config/defaults/templates/openrouter-gemini.yaml +23 -0
  129. forge/config/defaults/templates/openrouter-glm.yaml +23 -0
  130. forge/config/defaults/templates/openrouter-kimi.yaml +30 -0
  131. forge/config/defaults/templates/openrouter-minimax.yaml +26 -0
  132. forge/config/defaults/templates/openrouter-openai-codex.yaml +23 -0
  133. forge/config/defaults/templates/openrouter-openai.yaml +28 -0
  134. forge/config/defaults/templates/openrouter-qwen.yaml +25 -0
  135. forge/config/loader.py +675 -0
  136. forge/config/schema.py +448 -0
  137. forge/core/__init__.py +5 -0
  138. forge/core/auth/__init__.py +67 -0
  139. forge/core/auth/capabilities.py +219 -0
  140. forge/core/auth/credentials_file.py +244 -0
  141. forge/core/auth/protocols.py +18 -0
  142. forge/core/auth/secrets.py +243 -0
  143. forge/core/auth/template_secrets.py +112 -0
  144. forge/core/data/__init__.py +5 -0
  145. forge/core/data/model_catalog.yaml +1522 -0
  146. forge/core/data/pricing.yaml +140 -0
  147. forge/core/data/system_prompt_addendums/__init__.py +0 -0
  148. forge/core/data/system_prompt_addendums/gemini.md +330 -0
  149. forge/core/data/system_prompt_addendums/openai.md +328 -0
  150. forge/core/llm/__init__.py +231 -0
  151. forge/core/llm/clients/__init__.py +14 -0
  152. forge/core/llm/clients/base.py +115 -0
  153. forge/core/llm/clients/litellm.py +619 -0
  154. forge/core/llm/clients/openai_compat.py +244 -0
  155. forge/core/llm/clients/openrouter.py +234 -0
  156. forge/core/llm/credentials.py +439 -0
  157. forge/core/llm/detection.py +86 -0
  158. forge/core/llm/errors.py +44 -0
  159. forge/core/llm/protocols.py +80 -0
  160. forge/core/llm/types.py +176 -0
  161. forge/core/logging.py +146 -0
  162. forge/core/models/__init__.py +91 -0
  163. forge/core/models/catalog.py +467 -0
  164. forge/core/models/pricing.py +165 -0
  165. forge/core/models/types.py +167 -0
  166. forge/core/naming.py +212 -0
  167. forge/core/ops/__init__.py +73 -0
  168. forge/core/ops/context.py +141 -0
  169. forge/core/ops/gc.py +802 -0
  170. forge/core/ops/proxy.py +146 -0
  171. forge/core/ops/resolution.py +135 -0
  172. forge/core/ops/session.py +344 -0
  173. forge/core/ops/session_context.py +548 -0
  174. forge/core/paths.py +38 -0
  175. forge/core/process.py +54 -0
  176. forge/core/reactive/__init__.py +38 -0
  177. forge/core/reactive/cost_tracking.py +300 -0
  178. forge/core/reactive/env.py +180 -0
  179. forge/core/reactive/proxy.py +78 -0
  180. forge/core/reactive/routing.py +622 -0
  181. forge/core/reactive/session_runner.py +185 -0
  182. forge/core/reactive/structured_output.py +62 -0
  183. forge/core/reactive/tagger.py +94 -0
  184. forge/core/reactive/throttle.py +132 -0
  185. forge/core/state/__init__.py +59 -0
  186. forge/core/state/exceptions.py +59 -0
  187. forge/core/state/io.py +140 -0
  188. forge/core/state/lock.py +99 -0
  189. forge/core/state/timestamps.py +60 -0
  190. forge/core/transcript.py +78 -0
  191. forge/core/typing_helpers.py +24 -0
  192. forge/core/workqueue/__init__.py +67 -0
  193. forge/core/workqueue/queue.py +552 -0
  194. forge/core/workqueue/types.py +63 -0
  195. forge/guard/__init__.py +26 -0
  196. forge/guard/deterministic/__init__.py +26 -0
  197. forge/guard/deterministic/base.py +158 -0
  198. forge/guard/deterministic/coding_standards.py +256 -0
  199. forge/guard/deterministic/registry.py +148 -0
  200. forge/guard/deterministic/tdd.py +171 -0
  201. forge/guard/engine.py +216 -0
  202. forge/guard/protocols.py +91 -0
  203. forge/guard/queries.py +96 -0
  204. forge/guard/semantic/__init__.py +34 -0
  205. forge/guard/semantic/promotion.py +18 -0
  206. forge/guard/semantic/supervisor.py +813 -0
  207. forge/guard/semantic/verdict.py +183 -0
  208. forge/guard/store.py +124 -0
  209. forge/guard/team/__init__.py +6 -0
  210. forge/guard/team/config.py +24 -0
  211. forge/guard/team/handlers.py +209 -0
  212. forge/guard/team/prompts.py +41 -0
  213. forge/guard/types.py +125 -0
  214. forge/guard/workflow/__init__.py +17 -0
  215. forge/guard/workflow/branches.py +67 -0
  216. forge/guard/workflow/config.py +63 -0
  217. forge/guard/workflow/divergence.py +113 -0
  218. forge/guard/workflow/policy.py +87 -0
  219. forge/guard/workflow/stages.py +205 -0
  220. forge/install/__init__.py +55 -0
  221. forge/install/cli.py +281 -0
  222. forge/install/exceptions.py +163 -0
  223. forge/install/hooks.py +109 -0
  224. forge/install/installer.py +1037 -0
  225. forge/install/models.py +321 -0
  226. forge/install/preset.py +272 -0
  227. forge/install/settings_merge.py +831 -0
  228. forge/install/tracking.py +238 -0
  229. forge/install/version.py +141 -0
  230. forge/proxy/__init__.py +0 -0
  231. forge/proxy/base_client.py +181 -0
  232. forge/proxy/client_adapter.py +476 -0
  233. forge/proxy/client_factory.py +531 -0
  234. forge/proxy/converters.py +1206 -0
  235. forge/proxy/cost_logger.py +132 -0
  236. forge/proxy/cost_tracker.py +242 -0
  237. forge/proxy/data_models.py +338 -0
  238. forge/proxy/error_hints.py +92 -0
  239. forge/proxy/metrics.py +222 -0
  240. forge/proxy/model_spec.py +158 -0
  241. forge/proxy/proxies.py +333 -0
  242. forge/proxy/proxy_identity.py +134 -0
  243. forge/proxy/proxy_orchestrator.py +1018 -0
  244. forge/proxy/proxy_startup.py +54 -0
  245. forge/proxy/server.py +1561 -0
  246. forge/proxy/utils.py +537 -0
  247. forge/review/__init__.py +6 -0
  248. forge/review/adversarial.py +111 -0
  249. forge/review/consensus.py +236 -0
  250. forge/review/engine.py +356 -0
  251. forge/review/models.py +437 -0
  252. forge/review/resources/__init__.py +5 -0
  253. forge/review/resources/codereview-performance.md +85 -0
  254. forge/review/resources/codereview-quick.md +75 -0
  255. forge/review/resources/codereview-security.md +92 -0
  256. forge/review/resources/codereview.md +85 -0
  257. forge/review/resources/docreview-quick.md +75 -0
  258. forge/review/resources/docreview.md +86 -0
  259. forge/review/resources/thinkdeep.md +89 -0
  260. forge/review/routing.py +368 -0
  261. forge/review/synthesis.py +73 -0
  262. forge/runtime_config.py +438 -0
  263. forge/search/__init__.py +55 -0
  264. forge/search/bm25_store.py +264 -0
  265. forge/search/content_store.py +197 -0
  266. forge/search/engine.py +352 -0
  267. forge/search/exceptions.py +51 -0
  268. forge/search/extractor.py +234 -0
  269. forge/search/index_state.py +295 -0
  270. forge/search/store.py +215 -0
  271. forge/search/tokenizer.py +24 -0
  272. forge/session/__init__.py +130 -0
  273. forge/session/active.py +339 -0
  274. forge/session/artifacts.py +202 -0
  275. forge/session/claude/__init__.py +50 -0
  276. forge/session/claude/cleanup.py +105 -0
  277. forge/session/claude/invoke.py +236 -0
  278. forge/session/claude/paths.py +200 -0
  279. forge/session/cleanup.py +216 -0
  280. forge/session/config.py +34 -0
  281. forge/session/direct_model.py +107 -0
  282. forge/session/effective.py +169 -0
  283. forge/session/exceptions.py +255 -0
  284. forge/session/handoff.py +881 -0
  285. forge/session/handoff_agent.py +544 -0
  286. forge/session/hooks/__init__.py +35 -0
  287. forge/session/hooks/models.py +73 -0
  288. forge/session/hooks/session_start.py +507 -0
  289. forge/session/identity.py +84 -0
  290. forge/session/index.py +553 -0
  291. forge/session/manager.py +1506 -0
  292. forge/session/models.py +572 -0
  293. forge/session/overrides.py +344 -0
  294. forge/session/plan_resolution.py +286 -0
  295. forge/session/prev_sessions.py +128 -0
  296. forge/session/store.py +431 -0
  297. forge/session/validation.py +47 -0
  298. forge/session/worktree/__init__.py +65 -0
  299. forge/session/worktree/cleanup.py +262 -0
  300. forge/session/worktree/config_copy.py +203 -0
  301. forge/session/worktree/create.py +332 -0
  302. forge/sidecar/__init__.py +29 -0
  303. forge/sidecar/container.py +161 -0
  304. forge/sidecar/docker.py +86 -0
  305. forge/sidecar/secrets.py +19 -0
  306. multi_forge-0.2.0.dist-info/METADATA +242 -0
  307. multi_forge-0.2.0.dist-info/RECORD +311 -0
  308. multi_forge-0.2.0.dist-info/WHEEL +4 -0
  309. multi_forge-0.2.0.dist-info/entry_points.txt +2 -0
  310. multi_forge-0.2.0.dist-info/licenses/LICENSE +203 -0
  311. multi_forge-0.2.0.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,619 @@
1
+ """LiteLLM client implementation.
2
+
3
+ Uses OpenAI SDK to communicate with LiteLLM endpoints (remote or local).
4
+ Supports both non-streaming and streaming completions with tool support.
5
+
6
+ GPT-5 Models:
7
+ GPT-5 family models use the Responses API which supports tools, verbosity
8
+ control, and reasoning_effort together. Chat Completions API is only used
9
+ for non-GPT-5 models (it does NOT support reasoning_effort with function
10
+ tools for GPT-5).
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ import ssl
16
+ import time
17
+ from typing import Any, AsyncGenerator
18
+
19
+ import httpx
20
+ from openai import AsyncOpenAI
21
+ from tenacity import (
22
+ retry,
23
+ retry_if_exception,
24
+ stop_after_attempt,
25
+ wait_exponential,
26
+ )
27
+
28
+ from forge.core.models.catalog import get_model_spec, model_exists
29
+ from forge.runtime_config import get_runtime_config
30
+
31
+ from ..credentials import CredentialManager
32
+ from ..detection import ProviderType
33
+ from ..errors import AuthenticationError, ProviderError
34
+ from ..types import (
35
+ CompletionResponse,
36
+ Message,
37
+ ModelHyperparameters,
38
+ StreamEvent,
39
+ ToolCall,
40
+ ToolCallDelta,
41
+ )
42
+ from .base import estimate_message_tokens, merge_hyperparams
43
+ from .openai_compat import (
44
+ ToolCallAccumulator,
45
+ build_chat_completion_kwargs,
46
+ extract_cached_tokens,
47
+ is_retryable_error,
48
+ message_to_openai,
49
+ openai_response_to_completion,
50
+ )
51
+
52
+ logger = logging.getLogger(__name__)
53
+
54
+
55
+ class LiteLLMClient:
56
+ """LiteLLM client using OpenAI SDK.
57
+
58
+ Supports both remote LiteLLM and local LiteLLM instances.
59
+ Uses Chat Completions API for standard models, and Responses API for GPT-5
60
+ family models (which supports tools, verbosity, and reasoning_effort together).
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ model: str,
66
+ provider: ProviderType,
67
+ credentials: CredentialManager | None = None,
68
+ default_hyperparams: ModelHyperparameters | None = None,
69
+ ) -> None:
70
+ """Initialize LiteLLM client.
71
+
72
+ Args:
73
+ model: Model identifier (e.g., "openai/gpt-5.5").
74
+ provider: Provider type (litellm_remote or litellm_local).
75
+ credentials: Credential manager (uses default if not provided).
76
+ default_hyperparams: Default hyperparameters for all calls.
77
+ """
78
+ self._model = model
79
+ self._provider = provider
80
+ self._credentials = credentials or CredentialManager.default()
81
+ self._default_hyperparams = default_hyperparams
82
+ self._client: AsyncOpenAI | None = None
83
+
84
+ @property
85
+ def model(self) -> str:
86
+ """The model this client is configured for."""
87
+ return self._model
88
+
89
+ async def _get_client(self) -> AsyncOpenAI:
90
+ """Get or create the OpenAI client with credentials."""
91
+ if self._client is not None:
92
+ return self._client
93
+
94
+ creds = await self._credentials.get_credentials(self._provider)
95
+
96
+ http_client = None
97
+ ssl_cert = creds.get("ssl_cert")
98
+ if ssl_cert:
99
+ # Custom SSL certificate (e.g., remote proxy root CA)
100
+ ssl_context = ssl.create_default_context(cafile=ssl_cert)
101
+ http_client = httpx.AsyncClient(verify=ssl_context)
102
+
103
+ version = get_runtime_config().user_agent_claude_code_version or "unknown"
104
+ self._client = AsyncOpenAI(
105
+ api_key=creds["api_key"],
106
+ base_url=creds["base_url"],
107
+ http_client=http_client,
108
+ default_headers={"User-Agent": f"claude-cli/{version} (external, cli)"},
109
+ )
110
+ return self._client
111
+
112
+ _is_retryable_error = staticmethod(is_retryable_error)
113
+
114
+ def _is_gpt5_model(self) -> bool:
115
+ """Check if the current model belongs to the GPT-5 family.
116
+
117
+ Used by the Chat Completions safety net (`_build_request_kwargs`) to
118
+ strip `reasoning_effort` when tools are present -- a Chat Completions
119
+ API limitation that affects all GPT-5 models regardless of whether
120
+ we route them to Responses API. Distinct from `_should_use_responses_api`
121
+ which determines routing.
122
+ """
123
+ model_name = self._model.split("/")[-1].lower()
124
+ return model_name.startswith("gpt-5")
125
+
126
+ def _should_use_responses_api(
127
+ self,
128
+ tools: list[dict[str, Any]] | None,
129
+ hyperparams: ModelHyperparameters,
130
+ ) -> bool:
131
+ """Determine if Responses API should be used.
132
+
133
+ Reads `use_responses_api` from the model catalog (single source of truth).
134
+ Returns False for models not in the catalog (graceful for OpenRouter's
135
+ open model space).
136
+ """
137
+ model_name = self._model.split("/")[-1].lower()
138
+ if not model_exists(model_name):
139
+ return False
140
+ return get_model_spec(model_name).use_responses_api
141
+
142
+ @staticmethod
143
+ def _convert_messages_for_responses(messages: list[Message]) -> list[dict[str, Any]]:
144
+ """Convert canonical Messages to Responses API structured input format.
145
+
146
+ Handles tool call history by converting:
147
+ - assistant messages with tool_calls -> assistant message + function_call items
148
+ - tool role messages -> function_call_output items
149
+ - system/user/assistant text -> standard role messages
150
+ - multimodal content -> Responses API format (input_text, input_image)
151
+ """
152
+ input_items: list[dict[str, Any]] = []
153
+
154
+ for msg in messages:
155
+ content: Any = msg.content
156
+
157
+ # Convert multimodal content to Responses API format
158
+ if isinstance(content, list):
159
+ converted_parts: list[dict[str, Any]] = []
160
+ for item in content:
161
+ if not isinstance(item, dict):
162
+ continue
163
+ if item.get("type") == "text":
164
+ converted_parts.append({"type": "input_text", "text": item.get("text", "")})
165
+ elif item.get("type") == "image_url":
166
+ image_data = item.get("image_url", {})
167
+ url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
168
+ converted_parts.append({"type": "input_image", "image_url": url})
169
+ content = converted_parts if converted_parts else ""
170
+
171
+ if msg.role in ("system", "user"):
172
+ input_items.append({"role": msg.role, "content": content or ""})
173
+
174
+ elif msg.role == "assistant":
175
+ if content:
176
+ input_items.append({"role": "assistant", "content": content})
177
+ # Convert tool_calls to Responses API function_call items
178
+ if msg.tool_calls:
179
+ for tc in msg.tool_calls:
180
+ input_items.append(
181
+ {
182
+ "type": "function_call",
183
+ "call_id": tc.id,
184
+ "name": tc.name,
185
+ "arguments": json.dumps(tc.arguments),
186
+ }
187
+ )
188
+ elif not content:
189
+ input_items.append({"role": "assistant", "content": ""})
190
+
191
+ elif msg.role == "tool":
192
+ # Convert tool result to Responses API function_call_output
193
+ if isinstance(content, (dict, list)):
194
+ output_str = json.dumps(content)
195
+ else:
196
+ output_str = str(content) if content else ""
197
+ input_items.append(
198
+ {
199
+ "type": "function_call_output",
200
+ "call_id": msg.tool_call_id or "",
201
+ "output": output_str,
202
+ }
203
+ )
204
+
205
+ return input_items
206
+
207
+ @staticmethod
208
+ def _convert_tools_for_responses(tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
209
+ """Convert Chat Completions tool format to Responses API format.
210
+
211
+ Chat Completions: {type: "function", function: {name, description, parameters}}
212
+ Responses API: {type: "function", name, description, parameters}
213
+ """
214
+ responses_tools = []
215
+ for tool in tools:
216
+ if tool.get("type") == "function" and "function" in tool:
217
+ func = tool["function"]
218
+ resp_tool: dict[str, Any] = {
219
+ "type": "function",
220
+ "name": func.get("name"),
221
+ "parameters": func.get("parameters", {}),
222
+ }
223
+ if func.get("description"):
224
+ resp_tool["description"] = func["description"]
225
+ if func.get("strict") is not None:
226
+ resp_tool["strict"] = func["strict"]
227
+ responses_tools.append(resp_tool)
228
+ else:
229
+ responses_tools.append(tool)
230
+ return responses_tools
231
+
232
+ @staticmethod
233
+ def _parse_responses_output(response: Any, model: str) -> CompletionResponse:
234
+ """Parse Responses API output into canonical CompletionResponse.
235
+
236
+ Extracts text content and tool calls from the response output items.
237
+ Checks response.status for incomplete/truncated responses.
238
+ """
239
+ text_parts: list[str] = []
240
+ tool_calls: list[ToolCall] = []
241
+
242
+ for item in getattr(response, "output", []):
243
+ item_type = getattr(item, "type", None)
244
+ if item_type == "message":
245
+ for part in getattr(item, "content", []):
246
+ if getattr(part, "type", None) == "output_text":
247
+ text_parts.append(getattr(part, "text", ""))
248
+ elif item_type == "function_call":
249
+ args_raw = getattr(item, "arguments", "{}")
250
+ if isinstance(args_raw, dict):
251
+ arguments = args_raw
252
+ else:
253
+ try:
254
+ arguments = json.loads(args_raw) if args_raw else {}
255
+ except (json.JSONDecodeError, TypeError):
256
+ arguments = {}
257
+ tool_calls.append(
258
+ ToolCall(
259
+ id=getattr(item, "call_id", ""),
260
+ name=getattr(item, "name", ""),
261
+ arguments=arguments,
262
+ )
263
+ )
264
+
265
+ text = "".join(text_parts)
266
+
267
+ usage = None
268
+ resp_usage = getattr(response, "usage", None)
269
+ if resp_usage:
270
+ input_tokens = getattr(resp_usage, "input_tokens", 0) or 0
271
+ output_tokens = getattr(resp_usage, "output_tokens", 0) or 0
272
+ usage = {
273
+ "prompt_tokens": input_tokens,
274
+ "completion_tokens": output_tokens,
275
+ "total_tokens": input_tokens + output_tokens,
276
+ }
277
+ cached = extract_cached_tokens(resp_usage)
278
+ if cached:
279
+ usage["cached_tokens"] = cached
280
+
281
+ status = getattr(response, "status", "completed")
282
+ if status == "incomplete":
283
+ finish_reason = "length"
284
+ elif status in ("failed", "cancelled"):
285
+ finish_reason = "error"
286
+ elif tool_calls:
287
+ finish_reason = "tool_calls"
288
+ else:
289
+ finish_reason = "stop"
290
+
291
+ return CompletionResponse(
292
+ text=text,
293
+ tool_calls=tool_calls if tool_calls else None,
294
+ usage=usage,
295
+ raw={
296
+ "id": getattr(response, "id", f"responses-{int(time.time())}"),
297
+ "object": "responses",
298
+ "model": model,
299
+ "finish_reason": finish_reason,
300
+ },
301
+ )
302
+
303
+ def _message_to_openai(self, msg: Message) -> dict[str, Any]:
304
+ """Convert canonical Message to OpenAI format."""
305
+ return message_to_openai(msg)
306
+
307
+ def _openai_to_completion(self, response: Any) -> CompletionResponse:
308
+ """Convert OpenAI response to canonical CompletionResponse."""
309
+ return openai_response_to_completion(response, self._provider)
310
+
311
+ def _build_request_kwargs(
312
+ self,
313
+ messages: list[Message],
314
+ tools: list[dict[str, Any]] | None,
315
+ hyperparams: ModelHyperparameters,
316
+ ) -> dict[str, Any]:
317
+ """Build kwargs for OpenAI chat completion request."""
318
+ kwargs = build_chat_completion_kwargs(self._model, messages, tools, hyperparams)
319
+
320
+ # GPT-5 Chat Completions API doesn't support reasoning_effort with
321
+ # function tools. Runs AFTER extras merge so callers can't reintroduce it.
322
+ if tools and "reasoning_effort" in kwargs and self._is_gpt5_model():
323
+ dropped = kwargs.pop("reasoning_effort")
324
+ logger.warning(
325
+ f"Stripped reasoning_effort={dropped} - "
326
+ f"not supported with function tools on Chat Completions API for {self._model}"
327
+ )
328
+
329
+ return kwargs
330
+
331
+ async def _complete_with_responses_api(
332
+ self,
333
+ client: AsyncOpenAI,
334
+ messages: list[Message],
335
+ hyperparams: ModelHyperparameters,
336
+ tools: list[dict[str, Any]] | None = None,
337
+ ) -> CompletionResponse:
338
+ """Complete using GPT-5 Responses API.
339
+
340
+ The Responses API supports tools, verbosity, and reasoning_effort together.
341
+ This is extracted as a separate method (without retry decorator) so that
342
+ both complete() and stream() can call it without nesting retries (3x3=9).
343
+ """
344
+ input_items = self._convert_messages_for_responses(messages)
345
+
346
+ # Responses API requires max_output_tokens >= 16
347
+ max_tokens = max(hyperparams.max_tokens, 16)
348
+
349
+ request_params: dict[str, Any] = {
350
+ "model": self._model,
351
+ "input": input_items,
352
+ "max_output_tokens": max_tokens,
353
+ }
354
+
355
+ if hyperparams.verbosity is not None:
356
+ request_params["text"] = {"verbosity": hyperparams.verbosity}
357
+
358
+ if hyperparams.reasoning_effort is not None:
359
+ request_params["reasoning"] = {"effort": hyperparams.reasoning_effort}
360
+
361
+ if hyperparams.temperature is not None:
362
+ request_params["temperature"] = hyperparams.temperature
363
+
364
+ if tools:
365
+ request_params["tools"] = self._convert_tools_for_responses(tools)
366
+
367
+ # Forward extra_headers (e.g., User-Agent from incoming Claude Code request)
368
+ extra_headers = hyperparams.extra.get("openai", {}).get("extra_headers")
369
+ if extra_headers:
370
+ request_params["extra_headers"] = extra_headers
371
+
372
+ tools_log = f", tools={len(tools)}" if tools else ""
373
+ logger.info(
374
+ f"GPT-5 Responses API call: model={self._model}, "
375
+ f"verbosity={hyperparams.verbosity}, reasoning={hyperparams.reasoning_effort}{tools_log}"
376
+ )
377
+
378
+ response = await client.responses.create(**request_params)
379
+
380
+ return self._parse_responses_output(response, self._model)
381
+
382
+ @retry(
383
+ retry=retry_if_exception(lambda e: isinstance(e, Exception) and LiteLLMClient._is_retryable_error(e)),
384
+ stop=stop_after_attempt(3),
385
+ wait=wait_exponential(multiplier=1, min=2, max=10),
386
+ reraise=True,
387
+ )
388
+ async def _make_completion_request(
389
+ self,
390
+ client: AsyncOpenAI,
391
+ messages: list[Message],
392
+ tools: list[dict[str, Any]] | None,
393
+ merged_params: ModelHyperparameters,
394
+ ) -> CompletionResponse:
395
+ """Make the completion request with retry logic.
396
+
397
+ Retry is applied here (not on complete()) so tenacity sees raw
398
+ OpenAI exceptions (RateLimitError, APIStatusError) before they
399
+ are wrapped into ProviderError/AuthenticationError.
400
+ """
401
+ if self._should_use_responses_api(tools, merged_params):
402
+ return await self._complete_with_responses_api(client, messages, merged_params, tools=tools)
403
+
404
+ kwargs = self._build_request_kwargs(messages, tools, merged_params)
405
+ response = await client.chat.completions.create(**kwargs)
406
+ return self._openai_to_completion(response)
407
+
408
+ async def complete(
409
+ self,
410
+ messages: list[Message],
411
+ *,
412
+ tools: list[dict[str, Any]] | None = None,
413
+ hyperparams: ModelHyperparameters | None = None,
414
+ ) -> CompletionResponse:
415
+ """Non-streaming completion.
416
+
417
+ For GPT-5 models, uses Responses API. Otherwise, uses Chat Completions API.
418
+
419
+ Args:
420
+ messages: List of messages in the conversation.
421
+ tools: Optional list of tool definitions.
422
+ hyperparams: Optional hyperparameters to override defaults.
423
+
424
+ Returns:
425
+ CompletionResponse with text, optional tool_calls, and usage.
426
+
427
+ Raises:
428
+ ProviderError: If the API call fails.
429
+ AuthenticationError: If authentication fails.
430
+ """
431
+ merged_params = merge_hyperparams(self._default_hyperparams, hyperparams)
432
+ client = await self._get_client()
433
+
434
+ try:
435
+ return await self._make_completion_request(client, messages, tools, merged_params)
436
+ except (ProviderError, AuthenticationError):
437
+ # Already wrapped, re-raise as-is
438
+ raise
439
+ except Exception as e:
440
+ error_str = str(e).lower()
441
+ if "authentication" in error_str or "unauthorized" in error_str:
442
+ await self._credentials.invalidate(self._provider)
443
+ await self._close_client()
444
+ raise AuthenticationError(self._provider, str(e)) from e
445
+ raise ProviderError(self._provider, e) from e
446
+
447
+ async def _close_client(self) -> None:
448
+ """Close and discard the cached HTTP client.
449
+
450
+ Forces credential re-resolution on next request. Especially
451
+ important when a custom httpx.AsyncClient with SSL context was
452
+ created (remote LiteLLM with root CA).
453
+ """
454
+ client = self._client
455
+ self._client = None
456
+ if client is not None:
457
+ try:
458
+ await client.close()
459
+ except Exception:
460
+ pass
461
+
462
+ async def stream(
463
+ self,
464
+ messages: list[Message],
465
+ *,
466
+ tools: list[dict[str, Any]] | None = None,
467
+ hyperparams: ModelHyperparameters | None = None,
468
+ ) -> AsyncGenerator[StreamEvent, None]:
469
+ """Streaming completion.
470
+
471
+ For GPT-5 models, falls back to non-streaming Responses API
472
+ since it doesn't support streaming.
473
+
474
+ Yields canonical StreamEvent objects. For tool calls, accumulate
475
+ ToolCallDelta events until response_end.
476
+
477
+ Args:
478
+ messages: List of messages in the conversation.
479
+ tools: Optional list of tool definitions.
480
+ hyperparams: Optional hyperparameters to override defaults.
481
+
482
+ Yields:
483
+ StreamEvent objects (text_delta, tool_call_delta, response_end, usage, error).
484
+ """
485
+ merged_params = merge_hyperparams(self._default_hyperparams, hyperparams)
486
+ client = await self._get_client()
487
+
488
+ # GPT-5 models use Responses API (doesn't support streaming),
489
+ # so we fall back to non-streaming and emit synthetic stream events
490
+ if self._should_use_responses_api(tools, merged_params):
491
+ try:
492
+ logger.info(
493
+ f"GPT-5 Responses API (streaming fallback): model={self._model}, "
494
+ f"verbosity={merged_params.verbosity}"
495
+ )
496
+ response = await self._complete_with_responses_api(client, messages, merged_params, tools=tools)
497
+
498
+ if response.text:
499
+ yield StreamEvent(type="text_delta", text=response.text)
500
+
501
+ # Emit tool call deltas so callers can accumulate them
502
+ if response.tool_calls:
503
+ for i, tc in enumerate(response.tool_calls):
504
+ yield StreamEvent(
505
+ type="tool_call_delta",
506
+ tool_call_delta=ToolCallDelta(
507
+ index=i,
508
+ id=tc.id,
509
+ name=tc.name,
510
+ arguments_json=json.dumps(tc.arguments),
511
+ ),
512
+ )
513
+
514
+ if response.usage:
515
+ yield StreamEvent(type="usage", usage=response.usage)
516
+
517
+ yield StreamEvent(
518
+ type="response_end",
519
+ tool_calls=response.tool_calls,
520
+ usage=response.usage,
521
+ )
522
+ return
523
+
524
+ except Exception as e:
525
+ error_str = str(e).lower()
526
+ if "authentication" in error_str or "unauthorized" in error_str:
527
+ await self._credentials.invalidate(self._provider)
528
+ await self._close_client()
529
+ yield StreamEvent(type="error", error=str(e))
530
+ return
531
+
532
+ # Standard Chat Completions API streaming path
533
+ accumulator = ToolCallAccumulator()
534
+ usage_data: dict[str, int] | None = None
535
+
536
+ try:
537
+ kwargs = self._build_request_kwargs(messages, tools, merged_params)
538
+ kwargs["stream"] = True
539
+ kwargs["stream_options"] = {"include_usage": True}
540
+
541
+ stream = await client.chat.completions.create(**kwargs)
542
+
543
+ async for chunk in stream:
544
+ # Handle usage from final chunk
545
+ if chunk.usage:
546
+ usage_data = {
547
+ "prompt_tokens": chunk.usage.prompt_tokens,
548
+ "completion_tokens": chunk.usage.completion_tokens,
549
+ "total_tokens": chunk.usage.total_tokens,
550
+ }
551
+ cached = extract_cached_tokens(chunk.usage)
552
+ if cached:
553
+ usage_data["cached_tokens"] = cached
554
+
555
+ if not chunk.choices:
556
+ continue
557
+
558
+ choice = chunk.choices[0]
559
+ delta = choice.delta
560
+
561
+ if delta.content:
562
+ yield StreamEvent(type="text_delta", text=delta.content)
563
+
564
+ if delta.tool_calls:
565
+ for tc_delta in delta.tool_calls:
566
+ idx = tc_delta.index
567
+ if idx is None and len(delta.tool_calls) == 1:
568
+ idx = accumulator.default_index()
569
+ tool_delta = ToolCallDelta(
570
+ index=idx,
571
+ id=tc_delta.id,
572
+ name=tc_delta.function.name if tc_delta.function else None,
573
+ arguments_json=(tc_delta.function.arguments or "") if tc_delta.function else "",
574
+ )
575
+ accumulator.add_delta(tool_delta)
576
+ yield StreamEvent(type="tool_call_delta", tool_call_delta=tool_delta)
577
+
578
+ if usage_data:
579
+ yield StreamEvent(type="usage", usage=usage_data)
580
+
581
+ final_tool_calls = accumulator.finalize() if accumulator.has_pending() else None
582
+ yield StreamEvent(
583
+ type="response_end",
584
+ tool_calls=final_tool_calls,
585
+ usage=usage_data,
586
+ )
587
+
588
+ except Exception as e:
589
+ error_str = str(e).lower()
590
+ if "authentication" in error_str or "unauthorized" in error_str:
591
+ await self._credentials.invalidate(self._provider)
592
+ await self._close_client()
593
+ yield StreamEvent(type="error", error=str(e))
594
+
595
+ async def count_tokens(
596
+ self,
597
+ messages: list[Message],
598
+ tools: list[dict[str, Any]] | None = None,
599
+ ) -> int:
600
+ """Estimate token count for messages and tools.
601
+
602
+ Uses simple estimation (4 chars per token) since LiteLLM
603
+ doesn't provide a tokenization endpoint.
604
+
605
+ Args:
606
+ messages: List of messages to count.
607
+ tools: Optional list of tool definitions to include in count.
608
+
609
+ Returns:
610
+ Estimated token count.
611
+ """
612
+ openai_messages = [self._message_to_openai(m) for m in messages]
613
+ total = estimate_message_tokens(openai_messages)
614
+
615
+ if tools:
616
+ tools_json = json.dumps(tools)
617
+ total += len(tools_json) // 4
618
+
619
+ return total