agenthub-python 0.1.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. agenthub_python-0.3.0/PKG-INFO +10 -0
  2. {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/__init__.py +2 -3
  3. {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/auto_client.py +43 -11
  4. {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/base_client.py +74 -13
  5. {agenthub_python-0.1.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/__init__.py +2 -2
  6. agenthub_python-0.3.0/agenthub/claude4_6/client.py +415 -0
  7. {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/gemini3/client.py +130 -52
  8. agenthub_python-0.3.0/agenthub/glm5/__init__.py +18 -0
  9. agenthub_python-0.3.0/agenthub/glm5/client.py +354 -0
  10. agenthub_python-0.3.0/agenthub/gpt5_4/__init__.py +18 -0
  11. agenthub_python-0.3.0/agenthub/gpt5_4/client.py +354 -0
  12. agenthub_python-0.3.0/agenthub/integration/__init__.py +14 -0
  13. agenthub_python-0.3.0/agenthub/integration/playground.py +646 -0
  14. agenthub_python-0.3.0/agenthub/integration/tracer.py +528 -0
  15. agenthub_python-0.3.0/agenthub/kimi_k2_5/__init__.py +18 -0
  16. agenthub_python-0.3.0/agenthub/kimi_k2_5/client.py +388 -0
  17. agenthub_python-0.3.0/agenthub/qwen3/__init__.py +18 -0
  18. agenthub_python-0.3.0/agenthub/qwen3/client.py +376 -0
  19. {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/types.py +23 -21
  20. agenthub_python-0.3.0/agenthub/utils.py +35 -0
  21. {agenthub_python-0.1.0 → agenthub_python-0.3.0}/pyproject.toml +4 -4
  22. agenthub_python-0.1.0/PKG-INFO +0 -9
  23. agenthub_python-0.1.0/agenthub/claude4_5/client.py +0 -315
  24. agenthub_python-0.1.0/agenthub/tracer.py +0 -722
  25. {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/gemini3/__init__.py +0 -0
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.3
2
+ Name: agenthub-python
3
+ Version: 0.3.0
4
+ Summary: AgentHub is the LLM API Hub for the Agent era, built for high-precision autonomous agents.
5
+ Requires-Dist: google-genai>=1.5.0
6
+ Requires-Dist: anthropic[bedrock]>=0.40.0
7
+ Requires-Dist: flask>=3.0.0
8
+ Requires-Dist: openai>=1.0.0
9
+ Requires-Dist: httpx>=0.27.0
10
+ Requires-Python: >=3.11
@@ -13,8 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from .auto_client import AutoLLMClient
16
- from .tracer import Tracer
17
- from .types import ThinkingLevel
16
+ from .types import PromptCaching, ThinkingLevel
18
17
 
19
18
 
20
- __all__ = ["AutoLLMClient", "ThinkingLevel", "Tracer"]
19
+ __all__ = ["AutoLLMClient", "PromptCaching", "ThinkingLevel"]
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import os
15
16
  from typing import Any, AsyncIterator
16
17
 
17
18
  from .base_client import LLMClient
@@ -26,30 +27,54 @@ class AutoLLMClient(LLMClient):
26
27
  conversation history for that specific model.
27
28
  """
28
29
 
29
- def __init__(self, model: str, api_key: str | None = None):
30
+ def __init__(
31
+ self, model: str, api_key: str | None = None, base_url: str | None = None, client_type: str | None = None
32
+ ):
30
33
  """
31
34
  Initialize AutoLLMClient with a specific model.
32
35
 
33
36
  Args:
34
37
  model: Model identifier (determines which client to use)
35
38
  api_key: Optional API key
39
+ base_url: Optional base URL for API requests
40
+ client_type: Optional client type override
36
41
  """
37
- self._client = self._create_client_for_model(model, api_key)
42
+ self._client = self._create_client_for_model(model, api_key, base_url, client_type)
38
43
 
39
- def _create_client_for_model(self, model: str, api_key: str | None = None) -> LLMClient:
44
+ def _create_client_for_model(
45
+ self, model: str, api_key: str | None = None, base_url: str | None = None, client_type: str | None = None
46
+ ) -> LLMClient:
40
47
  """Create the appropriate client for the given model."""
41
- if "gemini-3" in model.lower(): # e.g., gemini-3-flash-preview
48
+ client_type = client_type or os.getenv("CLIENT_TYPE", model.lower())
49
+ if "gemini-3-" in client_type or "gemini-3.1-" in client_type: # e.g., gemini-3-flash-preview
42
50
  from .gemini3 import Gemini3Client
43
51
 
44
- return Gemini3Client(model=model, api_key=api_key)
45
- elif "claude" in model.lower() and "4-5" in model.lower(): # e.g., claude-sonnet-4-5
46
- from .claude4_5 import Claude4_5Client
52
+ return Gemini3Client(model=model, api_key=api_key, base_url=base_url)
53
+ elif "claude" in client_type and "4-6" in client_type: # e.g., claude-sonnet-4-6
54
+ from .claude4_6 import Claude4_6Client
47
55
 
48
- return Claude4_5Client(model=model, api_key=api_key)
49
- elif "gpt-5.2" in model.lower(): # e.g., gpt-5.2
50
- raise NotImplementedError("GPT models not yet implemented.")
56
+ return Claude4_6Client(model=model, api_key=api_key, base_url=base_url)
57
+ elif "gpt-5.4" in client_type: # e.g., gpt-5.4
58
+ from .gpt5_4 import GPT5_4Client
59
+
60
+ return GPT5_4Client(model=model, api_key=api_key, base_url=base_url)
61
+ elif "glm-5" in client_type:
62
+ from .glm5 import GLM5Client
63
+
64
+ return GLM5Client(model=model, api_key=api_key, base_url=base_url)
65
+ elif "kimi-k2.5" in client_type:
66
+ from .kimi_k2_5 import KimiK2_5Client
67
+
68
+ return KimiK2_5Client(model=model, api_key=api_key, base_url=base_url)
69
+ elif "qwen3" in client_type:
70
+ from .qwen3 import Qwen3Client
71
+
72
+ return Qwen3Client(model=model, api_key=api_key, base_url=base_url)
51
73
  else:
52
- raise ValueError(f"{model} is not supported.")
74
+ raise ValueError(
75
+ f"{client_type} is not supported. "
76
+ "Supported client types: gemini-3, claude-4-6, gpt-5.4, glm-5, kimi-k2.5, qwen3."
77
+ )
53
78
 
54
79
  def transform_uni_config_to_model_config(self, config: UniConfig) -> Any:
55
80
  """Delegate to underlying client's transform_uni_config_to_model_config."""
@@ -63,6 +88,13 @@ class AutoLLMClient(LLMClient):
63
88
  """Delegate to underlying client's transform_model_output_to_uni_event."""
64
89
  return self._client.transform_model_output_to_uni_event(model_output)
65
90
 
91
+ async def _streaming_response_internal(
92
+ self,
93
+ messages: list[UniMessage],
94
+ config: UniConfig,
95
+ ) -> AsyncIterator[UniEvent]:
96
+ raise NotImplementedError("Please use streaming_response instead.")
97
+
66
98
  async def streaming_response(
67
99
  self,
68
100
  messages: list[UniMessage],
@@ -26,6 +26,7 @@ class LLMClient(ABC):
26
26
  the required abstract methods for complete SDK abstraction.
27
27
  """
28
28
 
29
+ _model: str
29
30
  _history: list[UniMessage] = []
30
31
 
31
32
  @abstractmethod
@@ -88,19 +89,31 @@ class LLMClient(ABC):
88
89
  # Merge content_items from all events
89
90
  for item in event["content_items"]:
90
91
  if item["type"] == "text":
91
- if content_items and content_items[-1]["type"] == "text":
92
+ if (
93
+ content_items
94
+ and content_items[-1]["type"] == "text"
95
+ and content_items[-1].get("signature") is None # no signature yet
96
+ and item.get("phase") is None # no new phase
97
+ ):
92
98
  content_items[-1]["text"] += item["text"]
93
- if "signature" in item: # signature may appear at the last item
99
+ if "signature" in item: # finish the current item if signature is not None
94
100
  content_items[-1]["signature"] = item["signature"]
95
- elif item["text"]: # omit empty text items
101
+ elif item["text"] or item.get("phase") is not None: # text or new phase starts an item
96
102
  content_items.append(item.copy())
97
103
  elif item["type"] == "thinking":
98
- if content_items and content_items[-1]["type"] == "thinking":
104
+ if (
105
+ content_items
106
+ and content_items[-1]["type"] == "thinking"
107
+ and content_items[-1].get("signature") is None # no signature yet
108
+ ):
99
109
  content_items[-1]["thinking"] += item["thinking"]
100
- if "signature" in item: # signature may appear at the last item
110
+ if "signature" in item: # finish the current item if signature is not None
101
111
  content_items[-1]["signature"] = item["signature"]
102
- elif item["thinking"]: # omit empty thinking items
112
+ elif item["thinking"] or item.get("signature"): # omit empty thinking items
103
113
  content_items.append(item.copy())
114
+ elif item["type"] == "partial_tool_call":
115
+ # Skip partial_tool_call items - they should already be converted to tool_call
116
+ pass
104
117
  else:
105
118
  content_items.append(item.copy())
106
119
 
@@ -115,6 +128,26 @@ class LLMClient(ABC):
115
128
  }
116
129
 
117
130
  @abstractmethod
131
+ async def _streaming_response_internal(
132
+ self,
133
+ messages: list[UniMessage],
134
+ config: UniConfig,
135
+ ) -> AsyncIterator[UniEvent]:
136
+ """
137
+ Internal method to handle streaming response.
138
+
139
+ This method should be implemented by each model client to handle
140
+ the actual streaming request and yield model-specific events.
141
+
142
+ Args:
143
+ messages: List of universal message dictionaries
144
+ config: Universal configuration dict
145
+
146
+ Yields:
147
+ Model-specific events from the streaming response
148
+ """
149
+ pass
150
+
118
151
  async def streaming_response(
119
152
  self,
120
153
  messages: list[UniMessage],
@@ -134,7 +167,12 @@ class LLMClient(ABC):
134
167
  Yields:
135
168
  Universal events from the streaming response
136
169
  """
137
- pass
170
+ last_event: UniEvent | None = None
171
+ async for event in self._streaming_response_internal(messages, config):
172
+ last_event = event
173
+ yield event
174
+
175
+ self._validate_last_event(last_event)
138
176
 
139
177
  async def streaming_response_stateful(
140
178
  self,
@@ -155,26 +193,49 @@ class LLMClient(ABC):
155
193
  Yields:
156
194
  Universal events from the streaming response
157
195
  """
158
- # Add user message to history
159
- self._history.append(message)
196
+ # Build a temporary messages list for inference without mutating history yet
197
+ temp_messages = self._history + [message]
160
198
 
161
199
  # Collect all events for history
162
200
  events = []
163
- async for event in self.streaming_response(messages=self._history, config=config):
201
+ async for event in self.streaming_response(messages=temp_messages, config=config):
164
202
  events.append(event)
165
203
  yield event
166
204
 
167
- # Convert events to message and add to history
205
+ # Only update history after successful inference
168
206
  if events:
169
207
  assistant_message = self.concat_uni_events_to_uni_message(events)
208
+ self._history.append(message)
170
209
  self._history.append(assistant_message)
171
210
 
172
211
  # Save history to file if trace_id is specified
173
212
  if config.get("trace_id"):
174
- from .tracer import Tracer
213
+ from .integration.tracer import Tracer
175
214
 
176
215
  tracer = Tracer()
177
- tracer.save_history(self._history, config["trace_id"], config)
216
+ tracer.save_history(self._model, self._history, config["trace_id"], config)
217
+
218
+ @staticmethod
219
+ def _validate_last_event(last_event: UniEvent | None) -> None:
220
+ """Validate that the last event has usage_metadata and finish_reason.
221
+
222
+ This validation guards against servers that silently terminate streaming
223
+ output partway through without sending a proper final event.
224
+
225
+ Args:
226
+ last_event: The last event yielded by streaming_response
227
+
228
+ Raises:
229
+ ValueError: If last_event is None or missing usage_metadata/finish_reason
230
+ """
231
+ if last_event is None:
232
+ raise ValueError("Streaming response yielded no events")
233
+
234
+ if last_event["usage_metadata"] is None:
235
+ raise ValueError(f"Last event must carry usage_metadata, got: {last_event}")
236
+
237
+ if last_event["finish_reason"] is None:
238
+ raise ValueError(f"Last event must carry finish_reason, got: {last_event}")
178
239
 
179
240
  def clear_history(self) -> None:
180
241
  """Clear the message history."""
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .client import Claude4_5Client
15
+ from .client import Claude4_6Client
16
16
 
17
17
 
18
- __all__ = ["Claude4_5Client"]
18
+ __all__ = ["Claude4_6Client"]
@@ -0,0 +1,415 @@
1
+ # Copyright 2025 Prism Shadow. and/or its affiliates
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import base64
16
+ import json
17
+ import mimetypes
18
+ import os
19
+ import re
20
+ from typing import Any, AsyncIterator
21
+
22
+ import httpx
23
+ from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
24
+ from anthropic.types.beta import BetaMessageParam, BetaRawMessageStreamEvent
25
+
26
+ from ..base_client import LLMClient
27
+ from ..types import (
28
+ EventType,
29
+ FinishReason,
30
+ PartialContentItem,
31
+ PromptCaching,
32
+ ThinkingLevel,
33
+ ToolChoice,
34
+ UniConfig,
35
+ UniEvent,
36
+ UniMessage,
37
+ UsageMetadata,
38
+ )
39
+
40
+
41
+ REDACTED_THINKING = "_REDACTED_THINKING"
42
+
43
+
44
+ class Claude4_6Client(LLMClient):
45
+ """Claude 4.6-specific LLM client implementation."""
46
+
47
+ def __init__(self, model: str, api_key: str | None = None, base_url: str | None = None):
48
+ """Initialize Claude 4.6 client with model and API key."""
49
+ self._model = model
50
+ api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
51
+ base_url = base_url or os.getenv("ANTHROPIC_BASE_URL")
52
+ if base_url and base_url.startswith("bedrock://"): # example: bedrock://us-east-1
53
+ region = base_url.replace("bedrock://", "")
54
+ access_key, secret_key = api_key.split(",")
55
+ self._client = AsyncAnthropicBedrock(
56
+ aws_secret_key=secret_key, aws_access_key=access_key, aws_region=region
57
+ )
58
+ self._use_bedrock = True
59
+ else:
60
+ self._client = AsyncAnthropic(api_key=api_key, base_url=base_url)
61
+ self._use_bedrock = False
62
+
63
+ self._history: list[UniMessage] = []
64
+
65
+ async def _convert_image_url_to_source(self, url: str) -> dict[str, Any]:
66
+ """Convert image URL to image source.
67
+
68
+ Bedrock does not support image url sources, so we need to fetch the image bytes and encode them.
69
+
70
+ Args:
71
+ url: Image URL to convert
72
+
73
+ Returns:
74
+ Image source
75
+ """
76
+ if url.startswith("data:"):
77
+ match = re.match(r"data:([^;]+);base64,(.+)", url)
78
+ if match:
79
+ media_type = match.group(1)
80
+ base64_data = match.group(2)
81
+ source = {
82
+ "type": "image",
83
+ "source": {"type": "base64", "media_type": media_type, "data": base64_data},
84
+ }
85
+ else:
86
+ raise ValueError(f"Invalid base64 image: {url}")
87
+ elif self._use_bedrock:
88
+ async with httpx.AsyncClient() as client:
89
+ response = await client.get(url)
90
+ response.raise_for_status()
91
+ image_bytes = response.content
92
+ mime_type = mimetypes.guess_type(url)[0] or "image/jpeg"
93
+ source = {
94
+ "type": "image",
95
+ "source": {
96
+ "type": "base64",
97
+ "media_type": mime_type,
98
+ "data": base64.b64encode(image_bytes).decode("utf-8"),
99
+ },
100
+ }
101
+ else:
102
+ source = {"type": "image", "source": {"type": "url", "url": url}}
103
+
104
+ return source
105
+
106
+ def _convert_thinking_level_to_thinking_config(self, thinking_level: ThinkingLevel) -> dict[str, Any]:
107
+ """Convert ThinkingLevel enum to Claude's adaptive thinking config."""
108
+ mapping = {
109
+ ThinkingLevel.NONE: {}, # omit thinking config
110
+ ThinkingLevel.LOW: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "low"}},
111
+ ThinkingLevel.MEDIUM: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "medium"}},
112
+ ThinkingLevel.HIGH: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "high"}},
113
+ }
114
+ return mapping.get(thinking_level)
115
+
116
+ def _convert_tool_choice(self, tool_choice: ToolChoice) -> dict[str, str]:
117
+ """Convert ToolChoice to Claude's tool_choice format."""
118
+ if isinstance(tool_choice, list):
119
+ if len(tool_choice) > 1:
120
+ raise ValueError("Claude supports only one tool choice.")
121
+
122
+ return {"type": "any", "name": tool_choice[0]}
123
+ elif tool_choice == "none":
124
+ return {"type": "none"}
125
+ elif tool_choice == "auto":
126
+ return {"type": "auto"}
127
+ elif tool_choice == "required":
128
+ return {"type": "any"}
129
+
130
+ def transform_uni_config_to_model_config(self, config: UniConfig) -> dict[str, Any]:
131
+ """
132
+ Transform universal configuration to Claude-specific configuration.
133
+
134
+ Args:
135
+ config: Universal configuration dict
136
+
137
+ Returns:
138
+ Claude configuration dictionary
139
+ """
140
+ claude_config = {"model": self._model, "stream": True}
141
+
142
+ if config.get("system_prompt") is not None:
143
+ claude_config["system"] = config["system_prompt"]
144
+
145
+ if config.get("max_tokens") is not None:
146
+ claude_config["max_tokens"] = config["max_tokens"]
147
+ else:
148
+ claude_config["max_tokens"] = 32768 # Claude requires max_tokens to be specified
149
+
150
+ if config.get("temperature") is not None:
151
+ claude_config["temperature"] = config["temperature"]
152
+
153
+ # NOTE: Claude always provides thinking summary
154
+ if config.get("thinking_level") is not None:
155
+ claude_config["temperature"] = 1.0 # `temperature` may only be set to 1 when thinking is enabled
156
+ claude_config.update(self._convert_thinking_level_to_thinking_config(config["thinking_level"]))
157
+
158
+ # Convert tools to Claude's tool schema
159
+ if config.get("tools") is not None:
160
+ claude_tools = []
161
+ for tool in config["tools"]:
162
+ claude_tool = {}
163
+ for key, value in tool.items():
164
+ claude_tool[key.replace("parameters", "input_schema")] = value
165
+
166
+ claude_tools.append(claude_tool)
167
+
168
+ claude_config["tools"] = claude_tools
169
+
170
+ # Convert tool_choice
171
+ if config.get("tool_choice") is not None:
172
+ claude_config["tool_choice"] = self._convert_tool_choice(config["tool_choice"])
173
+
174
+ return claude_config
175
+
176
+ async def transform_uni_message_to_model_input(self, messages: list[UniMessage]) -> list[BetaMessageParam]:
177
+ """
178
+ Transform universal message format to Claude's BetaMessageParam format.
179
+
180
+ Args:
181
+ messages: List of universal message dictionaries
182
+
183
+ Returns:
184
+ List of Claude BetaMessageParam objects
185
+ """
186
+ claude_messages: list[BetaMessageParam] = []
187
+
188
+ for msg in messages:
189
+ content_blocks = []
190
+ for item in msg["content_items"]:
191
+ if item["type"] == "text":
192
+ content_blocks.append({"type": "text", "text": item["text"]})
193
+ elif item["type"] == "image_url":
194
+ content_blocks.append(await self._convert_image_url_to_source(item["image_url"]))
195
+ elif item["type"] == "thinking":
196
+ if item["thinking"] == REDACTED_THINKING:
197
+ content_blocks.append({"type": "redacted_thinking", "data": item["signature"]})
198
+ else:
199
+ content_blocks.append(
200
+ {"type": "thinking", "thinking": item["thinking"], "signature": item["signature"]}
201
+ )
202
+ elif item["type"] == "tool_call":
203
+ content_blocks.append(
204
+ {
205
+ "type": "tool_use",
206
+ "id": item["tool_call_id"],
207
+ "name": item["name"],
208
+ "input": item["arguments"],
209
+ }
210
+ )
211
+ elif item["type"] == "tool_result":
212
+ if "tool_call_id" not in item:
213
+ raise ValueError("tool_call_id is required for tool result.")
214
+
215
+ tool_result = [{"type": "text", "text": item["text"]}]
216
+ if "images" in item:
217
+ for image_url in item["images"]:
218
+ tool_result.append(await self._convert_image_url_to_source(image_url))
219
+
220
+ content_blocks.append(
221
+ {"type": "tool_result", "content": tool_result, "tool_use_id": item["tool_call_id"]}
222
+ )
223
+ else:
224
+ raise ValueError(f"Unknown item: {item}")
225
+
226
+ claude_messages.append({"role": msg["role"], "content": content_blocks})
227
+
228
+ return claude_messages
229
+
230
+ def transform_model_output_to_uni_event(self, model_output: BetaRawMessageStreamEvent) -> UniEvent:
231
+ """
232
+ Transform Claude model output to universal event format.
233
+
234
+ NOTE: Claude always has only one content item per event.
235
+
236
+ Args:
237
+ model_output: Claude streaming event
238
+
239
+ Returns:
240
+ Universal event dictionary
241
+ """
242
+ event_type: EventType | None = None
243
+ content_items: list[PartialContentItem] = []
244
+ usage_metadata: UsageMetadata | None = None
245
+ finish_reason: FinishReason | None = None
246
+
247
+ claude_event_type = model_output.type
248
+ if claude_event_type == "content_block_start":
249
+ event_type = "start"
250
+ block = model_output.content_block
251
+ if block.type == "tool_use":
252
+ content_items.append(
253
+ {"type": "partial_tool_call", "name": block.name, "arguments": "", "tool_call_id": block.id}
254
+ )
255
+ elif block.type == "redacted_thinking":
256
+ content_items.append({"type": "thinking", "thinking": REDACTED_THINKING, "signature": block.data})
257
+
258
+ elif claude_event_type == "content_block_delta":
259
+ event_type = "delta"
260
+ delta = model_output.delta
261
+ if delta.type == "thinking_delta":
262
+ content_items.append({"type": "thinking", "thinking": delta.thinking})
263
+ elif delta.type == "text_delta":
264
+ content_items.append({"type": "text", "text": delta.text})
265
+ elif delta.type == "input_json_delta":
266
+ content_items.append(
267
+ {"type": "partial_tool_call", "name": "", "arguments": delta.partial_json, "tool_call_id": ""}
268
+ )
269
+ elif delta.type == "signature_delta":
270
+ content_items.append({"type": "thinking", "thinking": "", "signature": delta.signature})
271
+
272
+ elif claude_event_type == "content_block_stop":
273
+ event_type = "stop"
274
+
275
+ elif claude_event_type == "message_start":
276
+ event_type = "start"
277
+ message = model_output.message
278
+ if getattr(message, "usage", None):
279
+ cache_creation_tokens = message.usage.cache_creation_input_tokens or 0
280
+ usage_metadata = {
281
+ "cached_tokens": message.usage.cache_read_input_tokens,
282
+ "prompt_tokens": message.usage.input_tokens + cache_creation_tokens,
283
+ "thoughts_tokens": None,
284
+ "response_tokens": None,
285
+ }
286
+
287
+ elif claude_event_type == "message_delta":
288
+ event_type = "stop"
289
+ delta = model_output.delta
290
+ if getattr(delta, "stop_reason", None):
291
+ stop_reason_mapping = {
292
+ "end_turn": "stop",
293
+ "max_tokens": "length",
294
+ "stop_sequence": "stop",
295
+ "tool_use": "tool_call",
296
+ }
297
+ finish_reason = stop_reason_mapping.get(delta.stop_reason, "unknown")
298
+
299
+ if getattr(model_output, "usage", None):
300
+ # In message_delta, we only update response_tokens
301
+ usage_metadata = {
302
+ "cached_tokens": None,
303
+ "prompt_tokens": None,
304
+ "thoughts_tokens": None,
305
+ "response_tokens": model_output.usage.output_tokens,
306
+ }
307
+
308
+ elif claude_event_type == "message_stop":
309
+ event_type = "stop"
310
+
311
+ elif claude_event_type in ["text", "thinking", "signature", "input_json"]:
312
+ event_type = "unused"
313
+
314
+ else:
315
+ raise ValueError(f"Unknown output: {model_output}")
316
+
317
+ return {
318
+ "role": "assistant",
319
+ "event_type": event_type,
320
+ "content_items": content_items,
321
+ "usage_metadata": usage_metadata,
322
+ "finish_reason": finish_reason,
323
+ }
324
+
325
+ async def _streaming_response_internal(
326
+ self,
327
+ messages: list[UniMessage],
328
+ config: UniConfig,
329
+ ) -> AsyncIterator[UniEvent]:
330
+ """Stream generate using Claude SDK with unified conversion methods."""
331
+ # Use unified config conversion
332
+ claude_config = self.transform_uni_config_to_model_config(config)
333
+
334
+ # Use unified message conversion
335
+ claude_messages = await self.transform_uni_message_to_model_input(messages)
336
+
337
+ # Add cache_control to last user message's last item if enabled
338
+ prompt_caching = config.get("prompt_caching", PromptCaching.ENABLE)
339
+ if prompt_caching != PromptCaching.DISABLE and claude_messages:
340
+ try:
341
+ last_user_message = next(filter(lambda x: x["role"] == "user", claude_messages[::-1]))
342
+ last_content_item = last_user_message["content"][-1]
343
+ last_content_item["cache_control"] = {
344
+ "type": "ephemeral",
345
+ "ttl": "1h" if prompt_caching == PromptCaching.ENHANCE else "5m",
346
+ }
347
+ except StopIteration:
348
+ pass
349
+
350
+ # Stream generate
351
+ partial_tool_call = {}
352
+ partial_usage = {}
353
+ stream = await self._client.beta.messages.create(**claude_config, messages=claude_messages)
354
+ async for event in stream:
355
+ event = self.transform_model_output_to_uni_event(event)
356
+ if event["event_type"] == "start":
357
+ for item in event["content_items"]:
358
+ if item["type"] == "partial_tool_call":
359
+ # initialize partial_tool_call
360
+ partial_tool_call = {
361
+ "name": item["name"],
362
+ "arguments": "",
363
+ "tool_call_id": item["tool_call_id"],
364
+ }
365
+ yield event
366
+
367
+ if event["usage_metadata"] is not None:
368
+ # initialize partial_usage
369
+ partial_usage = {
370
+ "prompt_tokens": event["usage_metadata"]["prompt_tokens"],
371
+ "cached_tokens": event["usage_metadata"]["cached_tokens"],
372
+ }
373
+
374
+ elif event["event_type"] == "delta":
375
+ for item in event["content_items"]:
376
+ if item["type"] == "partial_tool_call":
377
+ # update partial_tool_call
378
+ partial_tool_call["arguments"] += item["arguments"]
379
+
380
+ yield event
381
+
382
+ elif event["event_type"] == "stop":
383
+ if "name" in partial_tool_call and "arguments" in partial_tool_call:
384
+ # finish partial_tool_call
385
+ yield {
386
+ "role": "assistant",
387
+ "event_type": "delta",
388
+ "content_items": [
389
+ {
390
+ "type": "tool_call",
391
+ "name": partial_tool_call["name"],
392
+ "arguments": json.loads(partial_tool_call["arguments"]),
393
+ "tool_call_id": partial_tool_call["tool_call_id"],
394
+ }
395
+ ],
396
+ "usage_metadata": None,
397
+ "finish_reason": None,
398
+ }
399
+ partial_tool_call = {}
400
+
401
+ if "prompt_tokens" in partial_usage and event["usage_metadata"] is not None:
402
+ # finish partial_usage
403
+ yield {
404
+ "role": "assistant",
405
+ "event_type": "stop",
406
+ "content_items": [],
407
+ "usage_metadata": {
408
+ "prompt_tokens": partial_usage["prompt_tokens"],
409
+ "thoughts_tokens": None,
410
+ "response_tokens": event["usage_metadata"]["response_tokens"],
411
+ "cached_tokens": partial_usage["cached_tokens"],
412
+ },
413
+ "finish_reason": event["finish_reason"],
414
+ }
415
+ partial_usage = {}