agenthub-python 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. agenthub_python-0.3.0/PKG-INFO +10 -0
  2. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/auto_client.py +22 -11
  3. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/base_client.py +67 -10
  4. {agenthub_python-0.2.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/__init__.py +2 -2
  5. {agenthub_python-0.2.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/client.py +161 -93
  6. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/gemini3/client.py +96 -45
  7. {agenthub_python-0.2.0/agenthub/gpt5_2 → agenthub_python-0.3.0/agenthub/glm5}/__init__.py +2 -2
  8. {agenthub_python-0.2.0/agenthub/glm4_7 → agenthub_python-0.3.0/agenthub/glm5}/client.py +114 -62
  9. {agenthub_python-0.2.0/agenthub/glm4_7 → agenthub_python-0.3.0/agenthub/gpt5_4}/__init__.py +2 -2
  10. {agenthub_python-0.2.0/agenthub/gpt5_2 → agenthub_python-0.3.0/agenthub/gpt5_4}/client.py +47 -13
  11. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/integration/playground.py +182 -307
  12. agenthub_python-0.3.0/agenthub/integration/tracer.py +528 -0
  13. agenthub_python-0.3.0/agenthub/kimi_k2_5/__init__.py +18 -0
  14. agenthub_python-0.3.0/agenthub/kimi_k2_5/client.py +388 -0
  15. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/qwen3/client.py +106 -60
  16. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/types.py +5 -3
  17. agenthub_python-0.3.0/agenthub/utils.py +35 -0
  18. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/pyproject.toml +3 -3
  19. agenthub_python-0.2.0/PKG-INFO +0 -9
  20. agenthub_python-0.2.0/agenthub/integration/tracer.py +0 -750
  21. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/__init__.py +0 -0
  22. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/gemini3/__init__.py +0 -0
  23. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/integration/__init__.py +0 -0
  24. {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/qwen3/__init__.py +0 -0
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.3
2
+ Name: agenthub-python
3
+ Version: 0.3.0
4
+ Summary: AgentHub is the LLM API Hub for the Agent era, built for high-precision autonomous agents.
5
+ Requires-Dist: google-genai>=1.5.0
6
+ Requires-Dist: anthropic[bedrock]>=0.40.0
7
+ Requires-Dist: flask>=3.0.0
8
+ Requires-Dist: openai>=1.0.0
9
+ Requires-Dist: httpx>=0.27.0
10
+ Requires-Python: >=3.11
@@ -46,22 +46,26 @@ class AutoLLMClient(LLMClient):
46
46
  ) -> LLMClient:
47
47
  """Create the appropriate client for the given model."""
48
48
  client_type = client_type or os.getenv("CLIENT_TYPE", model.lower())
49
- if "gemini-3" in client_type: # e.g., gemini-3-flash-preview
49
+ if "gemini-3-" in client_type or "gemini-3.1-" in client_type: # e.g., gemini-3-flash-preview
50
50
  from .gemini3 import Gemini3Client
51
51
 
52
52
  return Gemini3Client(model=model, api_key=api_key, base_url=base_url)
53
- elif "claude" in client_type and "4-5" in client_type: # e.g., claude-sonnet-4-5
54
- from .claude4_5 import Claude4_5Client
53
+ elif "claude" in client_type and "4-6" in client_type: # e.g., claude-sonnet-4-6
54
+ from .claude4_6 import Claude4_6Client
55
55
 
56
- return Claude4_5Client(model=model, api_key=api_key, base_url=base_url)
57
- elif "gpt-5.1" in client_type or "gpt-5.2" in client_type: # e.g., gpt-5.2
58
- from .gpt5_2 import GPT5_2Client
56
+ return Claude4_6Client(model=model, api_key=api_key, base_url=base_url)
57
+ elif "gpt-5.4" in client_type: # e.g., gpt-5.4
58
+ from .gpt5_4 import GPT5_4Client
59
59
 
60
- return GPT5_2Client(model=model, api_key=api_key, base_url=base_url)
61
- elif "glm-4.7" in client_type: # e.g., glm-4.7
62
- from .glm4_7 import GLM4_7Client
60
+ return GPT5_4Client(model=model, api_key=api_key, base_url=base_url)
61
+ elif "glm-5" in client_type:
62
+ from .glm5 import GLM5Client
63
63
 
64
- return GLM4_7Client(model=model, api_key=api_key, base_url=base_url)
64
+ return GLM5Client(model=model, api_key=api_key, base_url=base_url)
65
+ elif "kimi-k2.5" in client_type:
66
+ from .kimi_k2_5 import KimiK2_5Client
67
+
68
+ return KimiK2_5Client(model=model, api_key=api_key, base_url=base_url)
65
69
  elif "qwen3" in client_type:
66
70
  from .qwen3 import Qwen3Client
67
71
 
@@ -69,7 +73,7 @@ class AutoLLMClient(LLMClient):
69
73
  else:
70
74
  raise ValueError(
71
75
  f"{client_type} is not supported. "
72
- "Supported client types: gemini-3, claude-4-5, gpt-5.2, glm-4.7, qwen3."
76
+ "Supported client types: gemini-3, claude-4-6, gpt-5.4, glm-5, kimi-k2.5, qwen3."
73
77
  )
74
78
 
75
79
  def transform_uni_config_to_model_config(self, config: UniConfig) -> Any:
@@ -84,6 +88,13 @@ class AutoLLMClient(LLMClient):
84
88
  """Delegate to underlying client's transform_model_output_to_uni_event."""
85
89
  return self._client.transform_model_output_to_uni_event(model_output)
86
90
 
91
+ async def _streaming_response_internal(
92
+ self,
93
+ messages: list[UniMessage],
94
+ config: UniConfig,
95
+ ) -> AsyncIterator[UniEvent]:
96
+ raise NotImplementedError("Please use streaming_response instead.")
97
+
87
98
  async def streaming_response(
88
99
  self,
89
100
  messages: list[UniMessage],
@@ -89,16 +89,25 @@ class LLMClient(ABC):
89
89
  # Merge content_items from all events
90
90
  for item in event["content_items"]:
91
91
  if item["type"] == "text":
92
- if content_items and content_items[-1]["type"] == "text":
92
+ if (
93
+ content_items
94
+ and content_items[-1]["type"] == "text"
95
+ and content_items[-1].get("signature") is None # no signature yet
96
+ and item.get("phase") is None # no new phase
97
+ ):
93
98
  content_items[-1]["text"] += item["text"]
94
- if "signature" in item: # signature may appear at the last item
99
+ if "signature" in item: # finish the current item if signature is not None
95
100
  content_items[-1]["signature"] = item["signature"]
96
- elif item["text"]: # omit empty text items
101
+ elif item["text"] or item.get("phase") is not None: # text or new phase starts an item
97
102
  content_items.append(item.copy())
98
103
  elif item["type"] == "thinking":
99
- if content_items and content_items[-1]["type"] == "thinking":
104
+ if (
105
+ content_items
106
+ and content_items[-1]["type"] == "thinking"
107
+ and content_items[-1].get("signature") is None # no signature yet
108
+ ):
100
109
  content_items[-1]["thinking"] += item["thinking"]
101
- if "signature" in item: # signature may appear at the last item
110
+ if "signature" in item: # finish the current item if signature is not None
102
111
  content_items[-1]["signature"] = item["signature"]
103
112
  elif item["thinking"] or item.get("signature"): # omit empty thinking items
104
113
  content_items.append(item.copy())
@@ -119,6 +128,26 @@ class LLMClient(ABC):
119
128
  }
120
129
 
121
130
  @abstractmethod
131
+ async def _streaming_response_internal(
132
+ self,
133
+ messages: list[UniMessage],
134
+ config: UniConfig,
135
+ ) -> AsyncIterator[UniEvent]:
136
+ """
137
+ Internal method to handle streaming response.
138
+
139
+ This method should be implemented by each model client to handle
140
+ the actual streaming request and yield model-specific events.
141
+
142
+ Args:
143
+ messages: List of universal message dictionaries
144
+ config: Universal configuration dict
145
+
146
+ Yields:
147
+ Model-specific events from the streaming response
148
+ """
149
+ pass
150
+
122
151
  async def streaming_response(
123
152
  self,
124
153
  messages: list[UniMessage],
@@ -138,7 +167,12 @@ class LLMClient(ABC):
138
167
  Yields:
139
168
  Universal events from the streaming response
140
169
  """
141
- pass
170
+ last_event: UniEvent | None = None
171
+ async for event in self._streaming_response_internal(messages, config):
172
+ last_event = event
173
+ yield event
174
+
175
+ self._validate_last_event(last_event)
142
176
 
143
177
  async def streaming_response_stateful(
144
178
  self,
@@ -159,18 +193,19 @@ class LLMClient(ABC):
159
193
  Yields:
160
194
  Universal events from the streaming response
161
195
  """
162
- # Add user message to history
163
- self._history.append(message)
196
+ # Build a temporary messages list for inference without mutating history yet
197
+ temp_messages = self._history + [message]
164
198
 
165
199
  # Collect all events for history
166
200
  events = []
167
- async for event in self.streaming_response(messages=self._history, config=config):
201
+ async for event in self.streaming_response(messages=temp_messages, config=config):
168
202
  events.append(event)
169
203
  yield event
170
204
 
171
- # Convert events to message and add to history
205
+ # Only update history after successful inference
172
206
  if events:
173
207
  assistant_message = self.concat_uni_events_to_uni_message(events)
208
+ self._history.append(message)
174
209
  self._history.append(assistant_message)
175
210
 
176
211
  # Save history to file if trace_id is specified
@@ -180,6 +215,28 @@ class LLMClient(ABC):
180
215
  tracer = Tracer()
181
216
  tracer.save_history(self._model, self._history, config["trace_id"], config)
182
217
 
218
+ @staticmethod
219
+ def _validate_last_event(last_event: UniEvent | None) -> None:
220
+ """Validate that the last event has usage_metadata and finish_reason.
221
+
222
+ This validation guards against servers that silently terminate streaming
223
+ output partway through without sending a proper final event.
224
+
225
+ Args:
226
+ last_event: The last event yielded by streaming_response
227
+
228
+ Raises:
229
+ ValueError: If last_event is None or missing usage_metadata/finish_reason
230
+ """
231
+ if last_event is None:
232
+ raise ValueError("Streaming response yielded no events")
233
+
234
+ if last_event["usage_metadata"] is None:
235
+ raise ValueError(f"Last event must carry usage_metadata, got: {last_event}")
236
+
237
+ if last_event["finish_reason"] is None:
238
+ raise ValueError(f"Last event must carry finish_reason, got: {last_event}")
239
+
183
240
  def clear_history(self) -> None:
184
241
  """Clear the message history."""
185
242
  self._history.clear()
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .client import Claude4_5Client
15
+ from .client import Claude4_6Client
16
16
 
17
17
 
18
- __all__ = ["Claude4_5Client"]
18
+ __all__ = ["Claude4_6Client"]
@@ -12,12 +12,16 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import base64
15
16
  import json
17
+ import mimetypes
16
18
  import os
19
+ import re
17
20
  from typing import Any, AsyncIterator
18
21
 
19
- from anthropic import AsyncAnthropic
20
- from anthropic.types import MessageParam, MessageStreamEvent
22
+ import httpx
23
+ from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
24
+ from anthropic.types.beta import BetaMessageParam, BetaRawMessageStreamEvent
21
25
 
22
26
  from ..base_client import LLMClient
23
27
  from ..types import (
@@ -34,25 +38,78 @@ from ..types import (
34
38
  )
35
39
 
36
40
 
37
- class Claude4_5Client(LLMClient):
38
- """Claude 4.5-specific LLM client implementation."""
41
+ REDACTED_THINKING = "_REDACTED_THINKING"
42
+
43
+
44
+ class Claude4_6Client(LLMClient):
45
+ """Claude 4.6-specific LLM client implementation."""
39
46
 
40
47
  def __init__(self, model: str, api_key: str | None = None, base_url: str | None = None):
41
- """Initialize Claude 4.5 client with model and API key."""
48
+ """Initialize Claude 4.6 client with model and API key."""
42
49
  self._model = model
43
50
  api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
44
51
  base_url = base_url or os.getenv("ANTHROPIC_BASE_URL")
45
- self._client = AsyncAnthropic(api_key=api_key, base_url=base_url)
52
+ if base_url and base_url.startswith("bedrock://"): # example: bedrock://us-east-1
53
+ region = base_url.replace("bedrock://", "")
54
+ access_key, secret_key = api_key.split(",")
55
+ self._client = AsyncAnthropicBedrock(
56
+ aws_secret_key=secret_key, aws_access_key=access_key, aws_region=region
57
+ )
58
+ self._use_bedrock = True
59
+ else:
60
+ self._client = AsyncAnthropic(api_key=api_key, base_url=base_url)
61
+ self._use_bedrock = False
62
+
46
63
  self._history: list[UniMessage] = []
47
64
 
48
- def _convert_thinking_level_to_budget(self, thinking_level: ThinkingLevel) -> dict[str, Any]:
49
- """Convert ThinkingLevel enum to Claude's budget_tokens."""
65
+ async def _convert_image_url_to_source(self, url: str) -> dict[str, Any]:
66
+ """Convert image URL to image source.
67
+
68
+ Bedrock does not support image url sources, so we need to fetch the image bytes and encode them.
69
+
70
+ Args:
71
+ url: Image URL to convert
72
+
73
+ Returns:
74
+ Image source
75
+ """
76
+ if url.startswith("data:"):
77
+ match = re.match(r"data:([^;]+);base64,(.+)", url)
78
+ if match:
79
+ media_type = match.group(1)
80
+ base64_data = match.group(2)
81
+ source = {
82
+ "type": "image",
83
+ "source": {"type": "base64", "media_type": media_type, "data": base64_data},
84
+ }
85
+ else:
86
+ raise ValueError(f"Invalid base64 image: {url}")
87
+ elif self._use_bedrock:
88
+ async with httpx.AsyncClient() as client:
89
+ response = await client.get(url)
90
+ response.raise_for_status()
91
+ image_bytes = response.content
92
+ mime_type = mimetypes.guess_type(url)[0] or "image/jpeg"
93
+ source = {
94
+ "type": "image",
95
+ "source": {
96
+ "type": "base64",
97
+ "media_type": mime_type,
98
+ "data": base64.b64encode(image_bytes).decode("utf-8"),
99
+ },
100
+ }
101
+ else:
102
+ source = {"type": "image", "source": {"type": "url", "url": url}}
103
+
104
+ return source
50
105
 
106
+ def _convert_thinking_level_to_thinking_config(self, thinking_level: ThinkingLevel) -> dict[str, Any]:
107
+ """Convert ThinkingLevel enum to Claude's adaptive thinking config."""
51
108
  mapping = {
52
- ThinkingLevel.NONE: {"type": "disabled"},
53
- ThinkingLevel.LOW: {"type": "enabled", "budget_tokens": 1024},
54
- ThinkingLevel.MEDIUM: {"type": "enabled", "budget_tokens": 4096},
55
- ThinkingLevel.HIGH: {"type": "enabled", "budget_tokens": 16384},
109
+ ThinkingLevel.NONE: {}, # omit thinking config
110
+ ThinkingLevel.LOW: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "low"}},
111
+ ThinkingLevel.MEDIUM: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "medium"}},
112
+ ThinkingLevel.HIGH: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "high"}},
56
113
  }
57
114
  return mapping.get(thinking_level)
58
115
 
@@ -80,7 +137,7 @@ class Claude4_5Client(LLMClient):
80
137
  Returns:
81
138
  Claude configuration dictionary
82
139
  """
83
- claude_config = {"model": self._model}
140
+ claude_config = {"model": self._model, "stream": True}
84
141
 
85
142
  if config.get("system_prompt") is not None:
86
143
  claude_config["system"] = config["system_prompt"]
@@ -96,7 +153,7 @@ class Claude4_5Client(LLMClient):
96
153
  # NOTE: Claude always provides thinking summary
97
154
  if config.get("thinking_level") is not None:
98
155
  claude_config["temperature"] = 1.0 # `temperature` may only be set to 1 when thinking is enabled
99
- claude_config["thinking"] = self._convert_thinking_level_to_budget(config["thinking_level"])
156
+ claude_config.update(self._convert_thinking_level_to_thinking_config(config["thinking_level"]))
100
157
 
101
158
  # Convert tools to Claude's tool schema
102
159
  if config.get("tools") is not None:
@@ -116,17 +173,17 @@ class Claude4_5Client(LLMClient):
116
173
 
117
174
  return claude_config
118
175
 
119
- def transform_uni_message_to_model_input(self, messages: list[UniMessage]) -> list[MessageParam]:
176
+ async def transform_uni_message_to_model_input(self, messages: list[UniMessage]) -> list[BetaMessageParam]:
120
177
  """
121
- Transform universal message format to Claude's MessageParam format.
178
+ Transform universal message format to Claude's BetaMessageParam format.
122
179
 
123
180
  Args:
124
181
  messages: List of universal message dictionaries
125
182
 
126
183
  Returns:
127
- List of Claude MessageParam objects
184
+ List of Claude BetaMessageParam objects
128
185
  """
129
- claude_messages: list[MessageParam] = []
186
+ claude_messages: list[BetaMessageParam] = []
130
187
 
131
188
  for msg in messages:
132
189
  content_blocks = []
@@ -134,12 +191,14 @@ class Claude4_5Client(LLMClient):
134
191
  if item["type"] == "text":
135
192
  content_blocks.append({"type": "text", "text": item["text"]})
136
193
  elif item["type"] == "image_url":
137
- # TODO: support base64 encoded images
138
- content_blocks.append({"type": "image", "source": {"type": "url", "url": item["image_url"]}})
194
+ content_blocks.append(await self._convert_image_url_to_source(item["image_url"]))
139
195
  elif item["type"] == "thinking":
140
- content_blocks.append(
141
- {"type": "thinking", "thinking": item["thinking"], "signature": item["signature"]}
142
- )
196
+ if item["thinking"] == REDACTED_THINKING:
197
+ content_blocks.append({"type": "redacted_thinking", "data": item["signature"]})
198
+ else:
199
+ content_blocks.append(
200
+ {"type": "thinking", "thinking": item["thinking"], "signature": item["signature"]}
201
+ )
143
202
  elif item["type"] == "tool_call":
144
203
  content_blocks.append(
145
204
  {
@@ -153,8 +212,13 @@ class Claude4_5Client(LLMClient):
153
212
  if "tool_call_id" not in item:
154
213
  raise ValueError("tool_call_id is required for tool result.")
155
214
 
215
+ tool_result = [{"type": "text", "text": item["text"]}]
216
+ if "images" in item:
217
+ for image_url in item["images"]:
218
+ tool_result.append(await self._convert_image_url_to_source(image_url))
219
+
156
220
  content_blocks.append(
157
- {"type": "tool_result", "content": item["result"], "tool_use_id": item["tool_call_id"]}
221
+ {"type": "tool_result", "content": tool_result, "tool_use_id": item["tool_call_id"]}
158
222
  )
159
223
  else:
160
224
  raise ValueError(f"Unknown item: {item}")
@@ -163,7 +227,7 @@ class Claude4_5Client(LLMClient):
163
227
 
164
228
  return claude_messages
165
229
 
166
- def transform_model_output_to_uni_event(self, model_output: MessageStreamEvent) -> UniEvent:
230
+ def transform_model_output_to_uni_event(self, model_output: BetaRawMessageStreamEvent) -> UniEvent:
167
231
  """
168
232
  Transform Claude model output to universal event format.
169
233
 
@@ -188,6 +252,8 @@ class Claude4_5Client(LLMClient):
188
252
  content_items.append(
189
253
  {"type": "partial_tool_call", "name": block.name, "arguments": "", "tool_call_id": block.id}
190
254
  )
255
+ elif block.type == "redacted_thinking":
256
+ content_items.append({"type": "thinking", "thinking": REDACTED_THINKING, "signature": block.data})
191
257
 
192
258
  elif claude_event_type == "content_block_delta":
193
259
  event_type = "delta"
@@ -210,11 +276,12 @@ class Claude4_5Client(LLMClient):
210
276
  event_type = "start"
211
277
  message = model_output.message
212
278
  if getattr(message, "usage", None):
279
+ cache_creation_tokens = message.usage.cache_creation_input_tokens or 0
213
280
  usage_metadata = {
214
- "prompt_tokens": message.usage.input_tokens,
281
+ "cached_tokens": message.usage.cache_read_input_tokens,
282
+ "prompt_tokens": message.usage.input_tokens + cache_creation_tokens,
215
283
  "thoughts_tokens": None,
216
284
  "response_tokens": None,
217
- "cached_tokens": message.usage.cache_read_input_tokens,
218
285
  }
219
286
 
220
287
  elif claude_event_type == "message_delta":
@@ -225,16 +292,17 @@ class Claude4_5Client(LLMClient):
225
292
  "end_turn": "stop",
226
293
  "max_tokens": "length",
227
294
  "stop_sequence": "stop",
228
- "tool_use": "stop",
295
+ "tool_use": "tool_call",
229
296
  }
230
297
  finish_reason = stop_reason_mapping.get(delta.stop_reason, "unknown")
231
298
 
232
299
  if getattr(model_output, "usage", None):
300
+ # In message_delta, we only update response_tokens
233
301
  usage_metadata = {
302
+ "cached_tokens": None,
234
303
  "prompt_tokens": None,
235
304
  "thoughts_tokens": None,
236
305
  "response_tokens": model_output.usage.output_tokens,
237
- "cached_tokens": None,
238
306
  }
239
307
 
240
308
  elif claude_event_type == "message_stop":
@@ -254,7 +322,7 @@ class Claude4_5Client(LLMClient):
254
322
  "finish_reason": finish_reason,
255
323
  }
256
324
 
257
- async def streaming_response(
325
+ async def _streaming_response_internal(
258
326
  self,
259
327
  messages: list[UniMessage],
260
328
  config: UniConfig,
@@ -264,7 +332,7 @@ class Claude4_5Client(LLMClient):
264
332
  claude_config = self.transform_uni_config_to_model_config(config)
265
333
 
266
334
  # Use unified message conversion
267
- claude_messages = self.transform_uni_message_to_model_input(messages)
335
+ claude_messages = await self.transform_uni_message_to_model_input(messages)
268
336
 
269
337
  # Add cache_control to last user message's last item if enabled
270
338
  prompt_caching = config.get("prompt_caching", PromptCaching.ENABLE)
@@ -282,66 +350,66 @@ class Claude4_5Client(LLMClient):
282
350
  # Stream generate
283
351
  partial_tool_call = {}
284
352
  partial_usage = {}
285
- async with self._client.messages.stream(**claude_config, messages=claude_messages) as stream:
286
- async for event in stream:
287
- event = self.transform_model_output_to_uni_event(event)
288
- if event["event_type"] == "start":
289
- for item in event["content_items"]:
290
- if item["type"] == "partial_tool_call":
291
- # initialize partial_tool_call
292
- partial_tool_call = {
293
- "name": item["name"],
294
- "arguments": "",
295
- "tool_call_id": item["tool_call_id"],
296
- }
297
- yield event
298
-
299
- if event["usage_metadata"] is not None:
300
- # initialize partial_usage
301
- partial_usage = {
302
- "prompt_tokens": event["usage_metadata"]["prompt_tokens"],
303
- "cached_tokens": event["usage_metadata"]["cached_tokens"],
304
- }
305
-
306
- elif event["event_type"] == "delta":
307
- for item in event["content_items"]:
308
- if item["type"] == "partial_tool_call":
309
- # update partial_tool_call
310
- partial_tool_call["arguments"] += item["arguments"]
311
-
312
- yield event
313
-
314
- elif event["event_type"] == "stop":
315
- if "name" in partial_tool_call and "arguments" in partial_tool_call:
316
- # finish partial_tool_call
317
- yield {
318
- "role": "assistant",
319
- "event_type": "delta",
320
- "content_items": [
321
- {
322
- "type": "tool_call",
323
- "name": partial_tool_call["name"],
324
- "arguments": json.loads(partial_tool_call["arguments"]),
325
- "tool_call_id": partial_tool_call["tool_call_id"],
326
- }
327
- ],
328
- "usage_metadata": None,
329
- "finish_reason": None,
330
- }
331
- partial_tool_call = {}
332
-
333
- if "prompt_tokens" in partial_usage and event["usage_metadata"] is not None:
334
- # finish partial_usage
335
- yield {
336
- "role": "assistant",
337
- "event_type": "stop",
338
- "content_items": [],
339
- "usage_metadata": {
340
- "prompt_tokens": partial_usage["prompt_tokens"],
341
- "thoughts_tokens": None,
342
- "response_tokens": event["usage_metadata"]["response_tokens"],
343
- "cached_tokens": partial_usage["cached_tokens"],
344
- },
345
- "finish_reason": event["finish_reason"],
353
+ stream = await self._client.beta.messages.create(**claude_config, messages=claude_messages)
354
+ async for event in stream:
355
+ event = self.transform_model_output_to_uni_event(event)
356
+ if event["event_type"] == "start":
357
+ for item in event["content_items"]:
358
+ if item["type"] == "partial_tool_call":
359
+ # initialize partial_tool_call
360
+ partial_tool_call = {
361
+ "name": item["name"],
362
+ "arguments": "",
363
+ "tool_call_id": item["tool_call_id"],
346
364
  }
347
- partial_usage = {}
365
+ yield event
366
+
367
+ if event["usage_metadata"] is not None:
368
+ # initialize partial_usage
369
+ partial_usage = {
370
+ "prompt_tokens": event["usage_metadata"]["prompt_tokens"],
371
+ "cached_tokens": event["usage_metadata"]["cached_tokens"],
372
+ }
373
+
374
+ elif event["event_type"] == "delta":
375
+ for item in event["content_items"]:
376
+ if item["type"] == "partial_tool_call":
377
+ # update partial_tool_call
378
+ partial_tool_call["arguments"] += item["arguments"]
379
+
380
+ yield event
381
+
382
+ elif event["event_type"] == "stop":
383
+ if "name" in partial_tool_call and "arguments" in partial_tool_call:
384
+ # finish partial_tool_call
385
+ yield {
386
+ "role": "assistant",
387
+ "event_type": "delta",
388
+ "content_items": [
389
+ {
390
+ "type": "tool_call",
391
+ "name": partial_tool_call["name"],
392
+ "arguments": json.loads(partial_tool_call["arguments"]),
393
+ "tool_call_id": partial_tool_call["tool_call_id"],
394
+ }
395
+ ],
396
+ "usage_metadata": None,
397
+ "finish_reason": None,
398
+ }
399
+ partial_tool_call = {}
400
+
401
+ if "prompt_tokens" in partial_usage and event["usage_metadata"] is not None:
402
+ # finish partial_usage
403
+ yield {
404
+ "role": "assistant",
405
+ "event_type": "stop",
406
+ "content_items": [],
407
+ "usage_metadata": {
408
+ "prompt_tokens": partial_usage["prompt_tokens"],
409
+ "thoughts_tokens": None,
410
+ "response_tokens": event["usage_metadata"]["response_tokens"],
411
+ "cached_tokens": partial_usage["cached_tokens"],
412
+ },
413
+ "finish_reason": event["finish_reason"],
414
+ }
415
+ partial_usage = {}