bareagent-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. bareagent/__init__.py +10 -0
  2. bareagent/concurrency/__init__.py +6 -0
  3. bareagent/concurrency/background.py +97 -0
  4. bareagent/concurrency/notification.py +61 -0
  5. bareagent/concurrency/scheduler.py +136 -0
  6. bareagent/config.toml +299 -0
  7. bareagent/core/__init__.py +1 -0
  8. bareagent/core/config_paths.py +49 -0
  9. bareagent/core/context.py +127 -0
  10. bareagent/core/fileutil.py +103 -0
  11. bareagent/core/goal.py +214 -0
  12. bareagent/core/handlers/__init__.py +1 -0
  13. bareagent/core/handlers/bash.py +79 -0
  14. bareagent/core/handlers/file_edit.py +47 -0
  15. bareagent/core/handlers/file_read.py +270 -0
  16. bareagent/core/handlers/file_write.py +34 -0
  17. bareagent/core/handlers/glob_search.py +30 -0
  18. bareagent/core/handlers/goal.py +60 -0
  19. bareagent/core/handlers/grep_search.py +52 -0
  20. bareagent/core/handlers/memory.py +71 -0
  21. bareagent/core/handlers/plan.py +106 -0
  22. bareagent/core/handlers/search_utils.py +77 -0
  23. bareagent/core/handlers/skill.py +87 -0
  24. bareagent/core/handlers/subagent_send.py +70 -0
  25. bareagent/core/handlers/web_fetch.py +126 -0
  26. bareagent/core/handlers/web_search.py +165 -0
  27. bareagent/core/handlers/workflow.py +190 -0
  28. bareagent/core/loop.py +535 -0
  29. bareagent/core/retry.py +131 -0
  30. bareagent/core/sandbox.py +27 -0
  31. bareagent/core/schema.py +21 -0
  32. bareagent/core/tools.py +779 -0
  33. bareagent/core/workflow.py +517 -0
  34. bareagent/core/workflow_registry.py +219 -0
  35. bareagent/debug/__init__.py +0 -0
  36. bareagent/debug/interaction_log.py +263 -0
  37. bareagent/debug/viewer.html +1750 -0
  38. bareagent/debug/web_viewer.py +157 -0
  39. bareagent/hooks/__init__.py +32 -0
  40. bareagent/hooks/config.py +118 -0
  41. bareagent/hooks/engine.py +197 -0
  42. bareagent/hooks/errors.py +14 -0
  43. bareagent/hooks/events.py +22 -0
  44. bareagent/lsp/__init__.py +63 -0
  45. bareagent/lsp/config.py +134 -0
  46. bareagent/lsp/coord.py +118 -0
  47. bareagent/lsp/diagnostics.py +240 -0
  48. bareagent/lsp/errors.py +24 -0
  49. bareagent/lsp/manager.py +866 -0
  50. bareagent/lsp/tools.py +629 -0
  51. bareagent/lsp/workspace_edit.py +305 -0
  52. bareagent/main.py +4205 -0
  53. bareagent/mcp/__init__.py +69 -0
  54. bareagent/mcp/_sse.py +69 -0
  55. bareagent/mcp/client.py +341 -0
  56. bareagent/mcp/config.py +169 -0
  57. bareagent/mcp/errors.py +32 -0
  58. bareagent/mcp/manager.py +318 -0
  59. bareagent/mcp/protocol.py +187 -0
  60. bareagent/mcp/registry.py +557 -0
  61. bareagent/mcp/transport/__init__.py +15 -0
  62. bareagent/mcp/transport/base.py +149 -0
  63. bareagent/mcp/transport/http_legacy.py +192 -0
  64. bareagent/mcp/transport/http_streamable.py +217 -0
  65. bareagent/mcp/transport/stdio.py +202 -0
  66. bareagent/memory/__init__.py +1 -0
  67. bareagent/memory/compact.py +203 -0
  68. bareagent/memory/conversation_io.py +226 -0
  69. bareagent/memory/embedding.py +194 -0
  70. bareagent/memory/persistent.py +515 -0
  71. bareagent/memory/token_counter.py +67 -0
  72. bareagent/memory/token_tracker.py +262 -0
  73. bareagent/memory/transcript.py +100 -0
  74. bareagent/permission/__init__.py +1 -0
  75. bareagent/permission/guard.py +329 -0
  76. bareagent/permission/rules.py +19 -0
  77. bareagent/planning/__init__.py +19 -0
  78. bareagent/planning/agent_types.py +169 -0
  79. bareagent/planning/skill_gen.py +141 -0
  80. bareagent/planning/skill_store.py +173 -0
  81. bareagent/planning/skills.py +146 -0
  82. bareagent/planning/subagent.py +355 -0
  83. bareagent/planning/subagent_registry.py +77 -0
  84. bareagent/planning/tasks.py +348 -0
  85. bareagent/planning/todo.py +153 -0
  86. bareagent/planning/worktree.py +122 -0
  87. bareagent/provider/__init__.py +1 -0
  88. bareagent/provider/anthropic.py +348 -0
  89. bareagent/provider/base.py +136 -0
  90. bareagent/provider/factory.py +130 -0
  91. bareagent/provider/openai.py +881 -0
  92. bareagent/provider/presets.py +72 -0
  93. bareagent/provider/setup.py +356 -0
  94. bareagent/skills/.gitkeep +1 -0
  95. bareagent/skills/code-review/SKILL.md +68 -0
  96. bareagent/skills/git/SKILL.md +68 -0
  97. bareagent/skills/test/SKILL.md +70 -0
  98. bareagent/team/__init__.py +17 -0
  99. bareagent/team/autonomous.py +193 -0
  100. bareagent/team/mailbox.py +239 -0
  101. bareagent/team/manager.py +155 -0
  102. bareagent/team/protocols.py +129 -0
  103. bareagent/tracing/__init__.py +12 -0
  104. bareagent/tracing/_api.py +92 -0
  105. bareagent/tracing/_proxy.py +60 -0
  106. bareagent/tracing/composite.py +115 -0
  107. bareagent/tracing/json_file.py +115 -0
  108. bareagent/tracing/langfuse.py +139 -0
  109. bareagent/tracing/otel.py +107 -0
  110. bareagent/tracing/setup.py +85 -0
  111. bareagent/ui/__init__.py +24 -0
  112. bareagent/ui/console.py +167 -0
  113. bareagent/ui/prompt.py +78 -0
  114. bareagent/ui/protocol.py +24 -0
  115. bareagent/ui/stream.py +66 -0
  116. bareagent/ui/theme.py +240 -0
  117. bareagent_cli-0.1.0.dist-info/METADATA +331 -0
  118. bareagent_cli-0.1.0.dist-info/RECORD +121 -0
  119. bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
  120. bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
  121. bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,881 @@
1
+ from __future__ import annotations
2
+
3
+ import itertools
4
+ import json
5
+ from typing import Any
6
+
7
+ import openai
8
+
9
+ from bareagent.provider.base import BaseLLMProvider, LLMResponse, StreamEvent, ToolCall
10
+
11
+ _PROTECTED_CHAT_KEYS = frozenset({"model", "messages", "tools"})
12
+ _PROTECTED_RESPONSES_KEYS = frozenset({"model", "input", "tools", "instructions"})
13
+ _OPENAI_OFFICIAL_HOSTS = frozenset({"api.openai.com"})
14
+
15
+
16
+ def _stringify_block(value: Any) -> str:
17
+ """Render an arbitrary content block as compact JSON for the tool role.
18
+
19
+ Mirrors ``BaseLLMProvider._stringify_content`` for single items so the lift
20
+ helper stays a free function (and therefore reusable from both the
21
+ chat_completions and Responses-API code paths).
22
+ """
23
+ if isinstance(value, dict) and value.get("type") == "text":
24
+ return str(value.get("text", ""))
25
+ return json.dumps(value, ensure_ascii=False, default=str)
26
+
27
+
28
+ def _lift_image_blocks(
29
+ tool_result_content: Any,
30
+ ) -> tuple[str, list[dict[str, Any]]]:
31
+ """Split multimodal MCP tool_result content into (text, image_blocks).
32
+
33
+ OpenAI's ``tool`` role (chat_completions) and the Responses API's
34
+ ``function_call_output`` item both refuse image attachments — image
35
+ content must be lifted into a follow-up ``user`` message. Both code paths
36
+ share this helper so the lift rules (placeholder text when there is no
37
+ text part, image_url shape, non-base64 source degradation) stay aligned.
38
+
39
+ Returns ``(text_for_tool_role, image_blocks)``. ``text_for_tool_role`` is
40
+ always a string: empty content yields a placeholder when images exist, or
41
+ a stringified fallback otherwise. ``image_blocks`` is a list of
42
+ chat-completion-shaped ``{type:"image_url", image_url:{url:"data:..."}}``
43
+ blocks; the Responses-API caller translates them to ``input_image`` parts.
44
+ """
45
+ if not isinstance(tool_result_content, list):
46
+ return _stringify_block(tool_result_content), []
47
+
48
+ text_parts: list[str] = []
49
+ image_blocks: list[dict[str, Any]] = []
50
+ for item in tool_result_content:
51
+ if not isinstance(item, dict):
52
+ text_parts.append(_stringify_block(item))
53
+ continue
54
+ item_type = item.get("type")
55
+ if item_type == "text":
56
+ text = item.get("text", "")
57
+ if isinstance(text, str):
58
+ text_parts.append(text)
59
+ continue
60
+ if item_type == "image":
61
+ source = item.get("source")
62
+ if not isinstance(source, dict) or source.get("type") != "base64":
63
+ text_parts.append(_stringify_block(item))
64
+ continue
65
+ data = source.get("data", "")
66
+ if not isinstance(data, str) or not data:
67
+ text_parts.append(_stringify_block(item))
68
+ continue
69
+ mime = source.get("media_type", "image/png")
70
+ image_blocks.append(
71
+ {
72
+ "type": "image_url",
73
+ "image_url": {"url": f"data:{mime};base64,{data}"},
74
+ }
75
+ )
76
+ continue
77
+ text_parts.append(_stringify_block(item))
78
+
79
+ text = "\n".join(part for part in text_parts if part)
80
+ if not text and image_blocks:
81
+ text = "[Tool returned image(s); see next message]"
82
+ return text, image_blocks
83
+
84
+
85
+ class OpenAIProvider(BaseLLMProvider):
86
+ def __init__(
87
+ self,
88
+ api_key: str,
89
+ model: str,
90
+ base_url: str | None = None,
91
+ wire_api: str | None = None,
92
+ ) -> None:
93
+ # The app layer (src/core/retry.py) owns retries exclusively; disable
94
+ # the SDK's built-in retries to avoid 2xN compound amplification.
95
+ self.client = openai.OpenAI(api_key=api_key, base_url=base_url, max_retries=0)
96
+ self.model = model
97
+ self.base_url = base_url
98
+ self.wire_api = (wire_api or "chat_completions").strip().lower()
99
+
100
+ def create(
101
+ self,
102
+ messages: list[dict[str, Any]],
103
+ tools: list[dict[str, Any]],
104
+ **kwargs: Any,
105
+ ) -> LLMResponse:
106
+ if self.wire_api == "responses":
107
+ return self._create_via_responses(messages, tools, **kwargs)
108
+
109
+ params = self._build_chat_request_params(messages, tools, **kwargs)
110
+ response = self.client.chat.completions.create(**params)
111
+ return self._parse_response(response)
112
+
113
+ def create_stream(
114
+ self,
115
+ messages: list[dict[str, Any]],
116
+ tools: list[dict[str, Any]],
117
+ **kwargs: Any,
118
+ ):
119
+ if self.wire_api == "responses":
120
+ return (yield from self._create_stream_via_responses(messages, tools, **kwargs))
121
+
122
+ return (yield from self._create_stream_via_chat(messages, tools, **kwargs))
123
+
124
+ def _create_via_responses(
125
+ self,
126
+ messages: list[dict[str, Any]],
127
+ tools: list[dict[str, Any]],
128
+ **kwargs: Any,
129
+ ) -> LLMResponse:
130
+ params = self._build_responses_request_params(messages, tools, **kwargs)
131
+ raw_response = self.client.responses.create(**params)
132
+ return self._parse_responses_api_response(raw_response)
133
+
134
+ def _is_openai_official_api(self) -> bool:
135
+ if not self.base_url:
136
+ return True
137
+ from urllib.parse import urlparse
138
+
139
+ host = urlparse(self.base_url).hostname or ""
140
+ return host in _OPENAI_OFFICIAL_HOSTS
141
+
142
+ def _create_stream_via_chat(
143
+ self,
144
+ messages: list[dict[str, Any]],
145
+ tools: list[dict[str, Any]],
146
+ **kwargs: Any,
147
+ ):
148
+ params = self._build_chat_request_params(messages, tools, **kwargs)
149
+ params["stream"] = True
150
+ if "stream_options" not in params and self._is_openai_official_api():
151
+ params["stream_options"] = {"include_usage": True}
152
+
153
+ text_parts: list[str] = []
154
+ pending_tool_calls: dict[int, dict[str, str]] = {}
155
+ emitted_tool_call_ids: set[str] = set()
156
+ usage_prompt_tokens = 0
157
+ usage_completion_tokens = 0
158
+ usage_cached_tokens = 0
159
+ stop_reason = ""
160
+
161
+ stream = self.client.chat.completions.create(**params)
162
+ for chunk in stream:
163
+ usage = getattr(chunk, "usage", None)
164
+ if usage is not None:
165
+ val = getattr(usage, "prompt_tokens", None)
166
+ if val is not None:
167
+ usage_prompt_tokens = val
168
+ val = getattr(usage, "completion_tokens", None)
169
+ if val is not None:
170
+ usage_completion_tokens = val
171
+ cached = self._extract_cached_tokens(usage)
172
+ if cached:
173
+ usage_cached_tokens = cached
174
+
175
+ choices = getattr(chunk, "choices", None) or []
176
+ if not choices:
177
+ continue
178
+
179
+ choice = choices[0]
180
+ if choice.finish_reason:
181
+ stop_reason = choice.finish_reason
182
+
183
+ # Some OpenAI-compatible relays/proxies (and reasoning models) emit
184
+ # chunks with a null ``delta`` — e.g. a trailing usage/finish chunk
185
+ # or a keep-alive. Guard the member accesses so a null delta does not
186
+ # crash the stream with "'NoneType' object has no attribute 'content'".
187
+ delta = getattr(choice, "delta", None)
188
+ if delta is not None:
189
+ if delta.content:
190
+ text_parts.append(delta.content)
191
+ yield StreamEvent(type="text", text=delta.content)
192
+
193
+ for tool_delta in delta.tool_calls or []:
194
+ call_state = pending_tool_calls.setdefault(
195
+ tool_delta.index,
196
+ {"id": "", "name": "", "arguments": ""},
197
+ )
198
+ if tool_delta.id:
199
+ call_state["id"] = tool_delta.id
200
+ function = tool_delta.function
201
+ if function is None:
202
+ continue
203
+ if function.name:
204
+ call_state["name"] = function.name
205
+ if function.arguments:
206
+ call_state["arguments"] += function.arguments
207
+
208
+ if choice.finish_reason == "tool_calls":
209
+ for tool_call in self._iter_new_tool_calls(
210
+ self._finalize_tool_calls(pending_tool_calls),
211
+ emitted_tool_call_ids,
212
+ ):
213
+ yield StreamEvent(
214
+ type="tool_call",
215
+ tool_call_id=tool_call.id,
216
+ name=tool_call.name,
217
+ input=tool_call.input,
218
+ )
219
+
220
+ tool_calls = self._finalize_tool_calls(pending_tool_calls)
221
+ for tool_call in self._iter_new_tool_calls(tool_calls, emitted_tool_call_ids):
222
+ yield StreamEvent(
223
+ type="tool_call",
224
+ tool_call_id=tool_call.id,
225
+ name=tool_call.name,
226
+ input=tool_call.input,
227
+ )
228
+ return LLMResponse(
229
+ text="".join(text_parts),
230
+ tool_calls=tool_calls,
231
+ stop_reason="tool_calls" if tool_calls else (stop_reason or "stop"),
232
+ input_tokens=max(usage_prompt_tokens - usage_cached_tokens, 0),
233
+ output_tokens=usage_completion_tokens,
234
+ cache_read_input_tokens=usage_cached_tokens,
235
+ )
236
+
237
+ def _create_stream_via_responses(
238
+ self,
239
+ messages: list[dict[str, Any]],
240
+ tools: list[dict[str, Any]],
241
+ **kwargs: Any,
242
+ ):
243
+ params = self._build_responses_request_params(messages, tools, **kwargs)
244
+ params["stream"] = True
245
+
246
+ final_payload: Any = None
247
+ yielded_tool_calls: set[str] = set()
248
+ streamed_text_parts: list[str] = []
249
+ streamed_tool_calls: list[ToolCall] = []
250
+
251
+ stream = self.client.responses.create(**params)
252
+ for event in stream:
253
+ event_type = getattr(event, "type", "")
254
+ if event_type == "response.output_text.delta":
255
+ delta = getattr(event, "delta", "")
256
+ if delta:
257
+ streamed_text_parts.append(delta)
258
+ yield StreamEvent(type="text", text=delta)
259
+ continue
260
+
261
+ if event_type == "response.output_item.done":
262
+ item = getattr(event, "item", None)
263
+ if getattr(item, "type", "") != "function_call":
264
+ continue
265
+
266
+ tool_call = ToolCall(
267
+ id=getattr(item, "call_id", "") or getattr(item, "id", ""),
268
+ name=getattr(item, "name", ""),
269
+ input=self._parse_tool_input(getattr(item, "arguments", "{}")),
270
+ )
271
+ for emitted_tool_call in self._iter_new_tool_calls(
272
+ [tool_call],
273
+ yielded_tool_calls,
274
+ ):
275
+ streamed_tool_calls.append(
276
+ ToolCall(
277
+ id=emitted_tool_call.id,
278
+ name=emitted_tool_call.name,
279
+ input=dict(emitted_tool_call.input),
280
+ )
281
+ )
282
+ yield StreamEvent(
283
+ type="tool_call",
284
+ tool_call_id=emitted_tool_call.id,
285
+ name=emitted_tool_call.name,
286
+ input=emitted_tool_call.input,
287
+ )
288
+ continue
289
+
290
+ if event_type == "response.completed":
291
+ final_payload = getattr(event, "response", None)
292
+ continue
293
+
294
+ if event_type == "response.incomplete":
295
+ final_payload = getattr(event, "response", None)
296
+ continue
297
+
298
+ if event_type == "response.failed":
299
+ response = getattr(event, "response", None)
300
+ raise RuntimeError(self._extract_responses_error(response) or "Response failed.")
301
+
302
+ if event_type == "error":
303
+ raise RuntimeError(getattr(event, "message", "Responses stream error."))
304
+
305
+ if final_payload is None:
306
+ raise RuntimeError("Responses stream ended without a completed response.")
307
+ return self._merge_streamed_responses_result(
308
+ self._parse_responses_api_response(final_payload),
309
+ streamed_text_parts=streamed_text_parts,
310
+ streamed_tool_calls=streamed_tool_calls,
311
+ )
312
+
313
+ def _build_chat_request_params(
314
+ self,
315
+ messages: list[dict[str, Any]],
316
+ tools: list[dict[str, Any]],
317
+ **kwargs: Any,
318
+ ) -> dict[str, Any]:
319
+ params: dict[str, Any] = {
320
+ "model": self.model,
321
+ "messages": self._convert_messages(messages),
322
+ }
323
+ converted_tools = self._convert_tools(tools)
324
+ if converted_tools:
325
+ params["tools"] = converted_tools
326
+ params.update({k: v for k, v in kwargs.items() if k not in _PROTECTED_CHAT_KEYS})
327
+ return params
328
+
329
+ def _build_responses_request_params(
330
+ self,
331
+ messages: list[dict[str, Any]],
332
+ tools: list[dict[str, Any]],
333
+ **kwargs: Any,
334
+ ) -> dict[str, Any]:
335
+ instructions, input_items = self._convert_messages_for_responses(messages)
336
+ params: dict[str, Any] = {
337
+ "model": self.model,
338
+ "input": input_items,
339
+ }
340
+ if instructions:
341
+ params["instructions"] = instructions
342
+ converted_tools = self._convert_tools_for_responses(tools)
343
+ if converted_tools:
344
+ params["tools"] = converted_tools
345
+
346
+ response_kwargs = dict(kwargs)
347
+ if "max_tokens" in response_kwargs and "max_output_tokens" not in response_kwargs:
348
+ response_kwargs["max_output_tokens"] = response_kwargs.pop("max_tokens")
349
+ params.update(
350
+ {k: v for k, v in response_kwargs.items() if k not in _PROTECTED_RESPONSES_KEYS}
351
+ )
352
+ return params
353
+
354
+ def _convert_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
355
+ converted: list[dict[str, Any]] = []
356
+ for message in messages:
357
+ role = message["role"]
358
+ content = message.get("content", "")
359
+ if role in {"system", "user"}:
360
+ converted.extend(self._convert_non_assistant_message(role, content))
361
+ continue
362
+ if role == "assistant":
363
+ converted.append(self._convert_assistant_message(content))
364
+ continue
365
+
366
+ converted.append({"role": role, "content": self._stringify_content(content)})
367
+ return converted
368
+
369
+ def _convert_messages_for_responses(
370
+ self,
371
+ messages: list[dict[str, Any]],
372
+ ) -> tuple[str | None, list[dict[str, Any]]]:
373
+ instruction_parts: list[str] = []
374
+ converted: list[dict[str, Any]] = []
375
+ for message in messages:
376
+ role = message["role"]
377
+ content = message.get("content", "")
378
+ if role in {"system", "developer"}:
379
+ instruction_text = self._stringify_content(content)
380
+ if instruction_text:
381
+ instruction_parts.append(instruction_text)
382
+ continue
383
+ if role in {"user", "assistant"}:
384
+ converted.extend(self._convert_response_message(role, content))
385
+ continue
386
+
387
+ converted.append(
388
+ self._make_response_text_message(role, self._stringify_content(content))
389
+ )
390
+ instructions = "\n\n".join(part for part in instruction_parts if part) or None
391
+ return instructions, converted
392
+
393
+ def _convert_response_message(self, role: str, content: Any) -> list[dict[str, Any]]:
394
+ if isinstance(content, str):
395
+ return [self._make_response_text_message(role, content)]
396
+ if not isinstance(content, list):
397
+ return [self._make_response_text_message(role, self._stringify_content(content))]
398
+
399
+ converted: list[dict[str, Any]] = []
400
+ text_parts: list[str] = []
401
+ # Image blocks lifted out of multimodal tool_result content. They must
402
+ # be attached to a user message *after* the function_call_output (the
403
+ # Responses API does not accept ``image_url`` inside the output value
404
+ # itself), mirroring the chat_completions lift logic.
405
+ deferred_image_messages: list[dict[str, Any]] = []
406
+ for block in content:
407
+ block_type = block.get("type")
408
+ if block_type == "tool_result":
409
+ tool_use_id = block.get("tool_use_id", "")
410
+ raw_content = block.get("content", "")
411
+ output_text, image_blocks = _lift_image_blocks(raw_content)
412
+ converted.append(
413
+ {
414
+ "type": "function_call_output",
415
+ "call_id": tool_use_id,
416
+ "output": output_text,
417
+ }
418
+ )
419
+ if image_blocks:
420
+ deferred_image_messages.append(
421
+ self._build_responses_image_user_message(image_blocks)
422
+ )
423
+ continue
424
+ if block_type == "tool_use":
425
+ converted.append(
426
+ {
427
+ "type": "function_call",
428
+ "call_id": block.get("id", ""),
429
+ "name": block.get("name", ""),
430
+ "arguments": json.dumps(block.get("input", {}), ensure_ascii=False),
431
+ }
432
+ )
433
+ continue
434
+ if block_type == "text":
435
+ text_parts.append(str(block.get("text", "")))
436
+ continue
437
+ text_parts.append(self._stringify_content(block))
438
+
439
+ text = "\n".join(part for part in text_parts if part)
440
+ if text:
441
+ converted.insert(0, self._make_response_text_message(role, text))
442
+ # Image-bearing follow-ups always go after the function_call_output
443
+ # they came from. Multiple tool_results in one message keep their
444
+ # relative ordering — first lifted, first appended.
445
+ converted.extend(deferred_image_messages)
446
+ return converted
447
+
448
+ def _build_responses_image_user_message(
449
+ self, image_blocks: list[dict[str, Any]]
450
+ ) -> dict[str, Any]:
451
+ """Build a Responses-API ``message`` carrying lifted image blocks.
452
+
453
+ The Responses API expects ``input_image`` parts (not ``image_url``),
454
+ so we translate from the chat_completions shape stored in
455
+ ``image_blocks`` into the Responses-API native shape here.
456
+ """
457
+ count = len(image_blocks)
458
+ parts: list[dict[str, Any]] = [
459
+ {
460
+ "type": "input_text",
461
+ "text": f"[Tool returned {count} image(s)]",
462
+ }
463
+ ]
464
+ for block in image_blocks:
465
+ url = block.get("image_url", {}).get("url", "")
466
+ parts.append({"type": "input_image", "image_url": url})
467
+ return {
468
+ "type": "message",
469
+ "role": "user",
470
+ "content": parts,
471
+ }
472
+
473
+ def _make_response_text_message(self, role: str, text: str) -> dict[str, Any]:
474
+ content_type = "output_text" if role == "assistant" else "input_text"
475
+ return {
476
+ "type": "message",
477
+ "role": role,
478
+ "content": [{"type": content_type, "text": text}],
479
+ }
480
+
481
+ def _convert_non_assistant_message(
482
+ self,
483
+ role: str,
484
+ content: Any,
485
+ ) -> list[dict[str, Any]]:
486
+ if role != "user":
487
+ return [{"role": role, "content": self._stringify_content(content)}]
488
+ if isinstance(content, str):
489
+ return [{"role": "user", "content": content}]
490
+ if not isinstance(content, list):
491
+ return [{"role": "user", "content": self._stringify_content(content)}]
492
+
493
+ converted: list[dict[str, Any]] = []
494
+ pending_text: list[str] = []
495
+
496
+ def _flush_text() -> None:
497
+ if pending_text:
498
+ text = "\n".join(p for p in pending_text if p)
499
+ if text:
500
+ converted.append({"role": "user", "content": text})
501
+ pending_text.clear()
502
+
503
+ for block in content:
504
+ if block.get("type") == "tool_result":
505
+ _flush_text()
506
+ converted.extend(self._convert_tool_result_for_openai(block))
507
+ continue
508
+ if block.get("type") == "text":
509
+ pending_text.append(str(block.get("text", "")))
510
+ continue
511
+ pending_text.append(self._stringify_content(block))
512
+
513
+ _flush_text()
514
+ return converted
515
+
516
+ def _convert_tool_result_for_openai(
517
+ self,
518
+ block: dict[str, Any],
519
+ ) -> list[dict[str, Any]]:
520
+ """Convert one tool_result block into one or more chat-completion messages.
521
+
522
+ - ``str`` content keeps the legacy single ``tool`` message shape.
523
+ - ``list`` content (multimodal MCP path) puts text into the ``tool``
524
+ message and lifts image blocks into a follow-up ``user`` message,
525
+ because OpenAI's ``tool`` role does not accept ``image_url`` parts.
526
+ The lift logic is shared with the Responses-API path via the
527
+ module-level ``_lift_image_blocks`` helper.
528
+ """
529
+ tool_use_id = block.get("tool_use_id", "")
530
+ raw_content = block.get("content", "")
531
+ if not isinstance(raw_content, list):
532
+ return [
533
+ {
534
+ "role": "tool",
535
+ "tool_call_id": tool_use_id,
536
+ "content": self._stringify_content(raw_content),
537
+ }
538
+ ]
539
+
540
+ tool_text, image_blocks = _lift_image_blocks(raw_content)
541
+ messages: list[dict[str, Any]] = [
542
+ {
543
+ "role": "tool",
544
+ "tool_call_id": tool_use_id,
545
+ "content": tool_text,
546
+ }
547
+ ]
548
+ if image_blocks:
549
+ count = len(image_blocks)
550
+ messages.append(
551
+ {
552
+ "role": "user",
553
+ "content": [
554
+ {
555
+ "type": "text",
556
+ "text": f"[Tool returned {count} image(s)]",
557
+ },
558
+ *image_blocks,
559
+ ],
560
+ }
561
+ )
562
+ return messages
563
+
564
+ def _convert_assistant_message(self, content: Any) -> dict[str, Any]:
565
+ if isinstance(content, str):
566
+ return {"role": "assistant", "content": content}
567
+ if not isinstance(content, list):
568
+ return {"role": "assistant", "content": self._stringify_content(content)}
569
+
570
+ text_parts: list[str] = []
571
+ tool_calls: list[dict[str, Any]] = []
572
+ for block in content:
573
+ block_type = block.get("type")
574
+ if block_type == "text":
575
+ text_parts.append(str(block.get("text", "")))
576
+ continue
577
+ if block_type == "tool_use":
578
+ tool_calls.append(
579
+ {
580
+ "id": block.get("id", ""),
581
+ "type": "function",
582
+ "function": {
583
+ "name": block.get("name", ""),
584
+ "arguments": json.dumps(
585
+ block.get("input", {}),
586
+ ensure_ascii=False,
587
+ ),
588
+ },
589
+ }
590
+ )
591
+
592
+ assistant_message: dict[str, Any] = {
593
+ "role": "assistant",
594
+ "content": "\n".join(part for part in text_parts if part) or None,
595
+ }
596
+ if tool_calls:
597
+ assistant_message["tool_calls"] = tool_calls
598
+ return assistant_message
599
+
600
+ def _convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
601
+ return [
602
+ {
603
+ "type": "function",
604
+ "function": {
605
+ "name": tool["name"],
606
+ "description": tool.get("description", ""),
607
+ "parameters": tool.get(
608
+ "parameters",
609
+ {"type": "object", "properties": {}},
610
+ ),
611
+ },
612
+ }
613
+ for tool in tools
614
+ ]
615
+
616
+ def _convert_tools_for_responses(
617
+ self,
618
+ tools: list[dict[str, Any]],
619
+ ) -> list[dict[str, Any]]:
620
+ return [
621
+ {
622
+ "type": "function",
623
+ "name": tool["name"],
624
+ "description": tool.get("description", ""),
625
+ "parameters": tool.get(
626
+ "parameters",
627
+ {"type": "object", "properties": {}},
628
+ ),
629
+ "strict": False,
630
+ }
631
+ for tool in tools
632
+ ]
633
+
634
+ def _parse_response(self, response: Any) -> LLMResponse:
635
+ if not response.choices:
636
+ raise RuntimeError("OpenAI returned empty choices (content may have been filtered).")
637
+ choice = response.choices[0]
638
+ message = choice.message
639
+ tool_calls: list[ToolCall] = []
640
+ for tool_call in message.tool_calls or []:
641
+ tool_calls.append(
642
+ ToolCall(
643
+ id=tool_call.id,
644
+ name=tool_call.function.name,
645
+ input=self._parse_tool_input(tool_call.function.arguments or "{}"),
646
+ )
647
+ )
648
+
649
+ usage = getattr(response, "usage", None)
650
+ prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
651
+ cached_tokens = self._extract_cached_tokens(usage)
652
+ return LLMResponse(
653
+ text=message.content or "",
654
+ tool_calls=tool_calls,
655
+ stop_reason=choice.finish_reason or "",
656
+ input_tokens=max(prompt_tokens - cached_tokens, 0),
657
+ output_tokens=getattr(usage, "completion_tokens", 0) or 0,
658
+ cache_read_input_tokens=cached_tokens,
659
+ )
660
+
661
+ @staticmethod
662
+ def _extract_cached_tokens(usage: Any) -> int:
663
+ """Read auto-cache hit tokens from an OpenAI/DeepSeek ``usage`` object.
664
+
665
+ Normalizes both provider shapes into a single cached-token count (a
666
+ subset of ``prompt_tokens``, so callers compute full-price input as
667
+ ``prompt_tokens - cached``):
668
+ - OpenAI: ``usage.prompt_tokens_details.cached_tokens``
669
+ - DeepSeek: ``usage.prompt_cache_hit_tokens``
670
+ Defensive against missing fields / dict-vs-attr shapes — absent fields
671
+ degrade to 0 (no behavior change from the pre-caching baseline).
672
+ """
673
+ if usage is None:
674
+ return 0
675
+ details = getattr(usage, "prompt_tokens_details", None)
676
+ if details is None and isinstance(usage, dict):
677
+ details = usage.get("prompt_tokens_details")
678
+ if details is not None:
679
+ cached = getattr(details, "cached_tokens", None)
680
+ if cached is None and isinstance(details, dict):
681
+ cached = details.get("cached_tokens")
682
+ if cached:
683
+ return int(cached)
684
+ hit = getattr(usage, "prompt_cache_hit_tokens", None)
685
+ if hit is None and isinstance(usage, dict):
686
+ hit = usage.get("prompt_cache_hit_tokens")
687
+ if hit:
688
+ return int(hit)
689
+ return 0
690
+
691
+ def _parse_responses_api_response(self, response: Any) -> LLMResponse:
692
+ payload = self._coerce_responses_payload(response)
693
+ output_items = payload.get("output", [])
694
+
695
+ text_parts: list[str] = []
696
+ content_blocks: list[dict[str, Any]] = []
697
+ tool_calls: list[ToolCall] = []
698
+
699
+ for item in output_items:
700
+ item_type = item.get("type")
701
+ if item_type == "message":
702
+ for part in item.get("content", []):
703
+ if part.get("type") != "output_text":
704
+ continue
705
+ text = str(part.get("text", ""))
706
+ text_parts.append(text)
707
+ content_blocks.append({"type": "text", "text": text})
708
+ continue
709
+ if item_type != "function_call":
710
+ continue
711
+
712
+ call_id = str(item.get("call_id", item.get("id", "")))
713
+ name = str(item.get("name", ""))
714
+ parsed_input = self._parse_tool_input(item.get("arguments", "{}"))
715
+ tool_calls.append(ToolCall(id=call_id, name=name, input=parsed_input))
716
+ content_blocks.append(
717
+ {
718
+ "type": "tool_use",
719
+ "id": call_id,
720
+ "name": name,
721
+ "input": parsed_input,
722
+ }
723
+ )
724
+
725
+ usage = payload.get("usage", {}) or {}
726
+ input_tokens = int(usage.get("input_tokens", 0) or 0)
727
+ output_tokens = int(usage.get("output_tokens", 0) or 0)
728
+ # Responses API exposes auto-cache hits under input_tokens_details;
729
+ # input_tokens includes cached, so subtract to get full-price input.
730
+ cached_tokens = 0
731
+ details = usage.get("input_tokens_details")
732
+ if isinstance(details, dict):
733
+ cached_tokens = int(details.get("cached_tokens", 0) or 0)
734
+ stop_reason = "tool_calls" if tool_calls else str(payload.get("status", "completed"))
735
+
736
+ return LLMResponse(
737
+ text="".join(text_parts),
738
+ tool_calls=tool_calls,
739
+ stop_reason=stop_reason,
740
+ input_tokens=max(input_tokens - cached_tokens, 0),
741
+ output_tokens=output_tokens,
742
+ cache_read_input_tokens=cached_tokens,
743
+ content_blocks=content_blocks,
744
+ )
745
+
746
+ def _merge_streamed_responses_result(
747
+ self,
748
+ response: LLMResponse,
749
+ *,
750
+ streamed_text_parts: list[str],
751
+ streamed_tool_calls: list[ToolCall],
752
+ ) -> LLMResponse:
753
+ if not streamed_text_parts and not streamed_tool_calls:
754
+ return response
755
+
756
+ merged_text = response.text or "".join(streamed_text_parts)
757
+ merged_tool_calls = list(response.tool_calls)
758
+ if not merged_tool_calls and streamed_tool_calls:
759
+ merged_tool_calls = [
760
+ ToolCall(
761
+ id=tool_call.id,
762
+ name=tool_call.name,
763
+ input=dict(tool_call.input),
764
+ )
765
+ for tool_call in streamed_tool_calls
766
+ ]
767
+
768
+ merged_content_blocks = [dict(block) for block in response.content_blocks]
769
+ has_text_block = any(block.get("type") == "text" for block in merged_content_blocks)
770
+ if merged_text and not has_text_block:
771
+ merged_content_blocks.insert(0, {"type": "text", "text": merged_text})
772
+
773
+ existing_tool_ids = {
774
+ str(block.get("id", ""))
775
+ for block in merged_content_blocks
776
+ if block.get("type") == "tool_use"
777
+ }
778
+ for tool_call in merged_tool_calls:
779
+ if tool_call.id in existing_tool_ids:
780
+ continue
781
+ merged_content_blocks.append(
782
+ {
783
+ "type": "tool_use",
784
+ "id": tool_call.id,
785
+ "name": tool_call.name,
786
+ "input": dict(tool_call.input),
787
+ }
788
+ )
789
+
790
+ stop_reason = response.stop_reason
791
+ if merged_tool_calls and stop_reason != "tool_calls":
792
+ stop_reason = "tool_calls"
793
+
794
+ return LLMResponse(
795
+ text=merged_text,
796
+ tool_calls=merged_tool_calls,
797
+ stop_reason=stop_reason,
798
+ input_tokens=response.input_tokens,
799
+ output_tokens=response.output_tokens,
800
+ cache_creation_input_tokens=response.cache_creation_input_tokens,
801
+ cache_read_input_tokens=response.cache_read_input_tokens,
802
+ thinking=response.thinking,
803
+ content_blocks=merged_content_blocks,
804
+ )
805
+
806
+ def _coerce_responses_payload(self, response: Any) -> dict[str, Any]:
807
+ if isinstance(response, str):
808
+ return self._parse_responses_sse(response)
809
+ if isinstance(response, dict):
810
+ return response
811
+ if hasattr(response, "to_dict"):
812
+ payload = response.to_dict()
813
+ if isinstance(payload, dict):
814
+ return payload
815
+ raise TypeError(f"Unsupported Responses API payload: {type(response).__name__}")
816
+
817
+ def _parse_responses_sse(self, payload: str) -> dict[str, Any]:
818
+ last_response: dict[str, Any] | None = None
819
+ for line in payload.splitlines():
820
+ if not line.startswith("data: "):
821
+ continue
822
+ raw_json = line[6:].strip()
823
+ if not raw_json:
824
+ continue
825
+ event = json.loads(raw_json)
826
+ if event.get("type") == "response.completed":
827
+ return dict(event.get("response", {}))
828
+ if isinstance(event.get("response"), dict):
829
+ last_response = dict(event["response"])
830
+ if last_response is not None:
831
+ return last_response
832
+ raise ValueError("Could not parse Responses API payload.")
833
+
834
+ def _extract_responses_error(self, response: Any) -> str:
835
+ error = getattr(response, "error", None)
836
+ if error is None:
837
+ return ""
838
+ message = getattr(error, "message", None)
839
+ if message:
840
+ return str(message)
841
+ return self._stringify_content(error)
842
+
843
+ def _finalize_tool_calls(
844
+ self,
845
+ pending_tool_calls: dict[int, dict[str, str]],
846
+ ) -> list[ToolCall]:
847
+ tool_calls: list[ToolCall] = []
848
+ for index in sorted(pending_tool_calls):
849
+ tool_call = pending_tool_calls[index]
850
+ tool_calls.append(
851
+ ToolCall(
852
+ id=tool_call["id"],
853
+ name=tool_call["name"],
854
+ input=self._parse_tool_input(tool_call["arguments"] or "{}"),
855
+ )
856
+ )
857
+ return tool_calls
858
+
859
+ _fallback_counter = itertools.count(1)
860
+
861
+ def _iter_new_tool_calls(
862
+ self,
863
+ tool_calls: list[ToolCall],
864
+ emitted_tool_call_ids: set[str],
865
+ ):
866
+ for tool_call in tool_calls:
867
+ if not tool_call.id:
868
+ tool_call.id = f"_fallback_{next(OpenAIProvider._fallback_counter)}"
869
+ if tool_call.id in emitted_tool_call_ids:
870
+ continue
871
+ emitted_tool_call_ids.add(tool_call.id)
872
+ yield tool_call
873
+
874
+ def _parse_tool_input(self, arguments: str) -> dict[str, Any]:
875
+ try:
876
+ parsed_input = json.loads(arguments)
877
+ except json.JSONDecodeError:
878
+ parsed_input = {"raw_arguments": arguments}
879
+ if not isinstance(parsed_input, dict):
880
+ parsed_input = {"value": parsed_input}
881
+ return parsed_input