agenthub-python 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agenthub_python-0.3.0/PKG-INFO +10 -0
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/auto_client.py +22 -11
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/base_client.py +67 -10
- {agenthub_python-0.2.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/__init__.py +2 -2
- {agenthub_python-0.2.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/client.py +161 -93
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/gemini3/client.py +96 -45
- {agenthub_python-0.2.0/agenthub/gpt5_2 → agenthub_python-0.3.0/agenthub/glm5}/__init__.py +2 -2
- {agenthub_python-0.2.0/agenthub/glm4_7 → agenthub_python-0.3.0/agenthub/glm5}/client.py +114 -62
- {agenthub_python-0.2.0/agenthub/glm4_7 → agenthub_python-0.3.0/agenthub/gpt5_4}/__init__.py +2 -2
- {agenthub_python-0.2.0/agenthub/gpt5_2 → agenthub_python-0.3.0/agenthub/gpt5_4}/client.py +47 -13
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/integration/playground.py +182 -307
- agenthub_python-0.3.0/agenthub/integration/tracer.py +528 -0
- agenthub_python-0.3.0/agenthub/kimi_k2_5/__init__.py +18 -0
- agenthub_python-0.3.0/agenthub/kimi_k2_5/client.py +388 -0
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/qwen3/client.py +106 -60
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/types.py +5 -3
- agenthub_python-0.3.0/agenthub/utils.py +35 -0
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/pyproject.toml +3 -3
- agenthub_python-0.2.0/PKG-INFO +0 -9
- agenthub_python-0.2.0/agenthub/integration/tracer.py +0 -750
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/__init__.py +0 -0
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/gemini3/__init__.py +0 -0
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/integration/__init__.py +0 -0
- {agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/qwen3/__init__.py +0 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: agenthub-python
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: AgentHub is the LLM API Hub for the Agent era, built for high-precision autonomous agents.
|
|
5
|
+
Requires-Dist: google-genai>=1.5.0
|
|
6
|
+
Requires-Dist: anthropic[bedrock]>=0.40.0
|
|
7
|
+
Requires-Dist: flask>=3.0.0
|
|
8
|
+
Requires-Dist: openai>=1.0.0
|
|
9
|
+
Requires-Dist: httpx>=0.27.0
|
|
10
|
+
Requires-Python: >=3.11
|
|
@@ -46,22 +46,26 @@ class AutoLLMClient(LLMClient):
|
|
|
46
46
|
) -> LLMClient:
|
|
47
47
|
"""Create the appropriate client for the given model."""
|
|
48
48
|
client_type = client_type or os.getenv("CLIENT_TYPE", model.lower())
|
|
49
|
-
if "gemini-3" in client_type: # e.g., gemini-3-flash-preview
|
|
49
|
+
if "gemini-3-" in client_type or "gemini-3.1-" in client_type: # e.g., gemini-3-flash-preview
|
|
50
50
|
from .gemini3 import Gemini3Client
|
|
51
51
|
|
|
52
52
|
return Gemini3Client(model=model, api_key=api_key, base_url=base_url)
|
|
53
|
-
elif "claude" in client_type and "4-
|
|
54
|
-
from .
|
|
53
|
+
elif "claude" in client_type and "4-6" in client_type: # e.g., claude-sonnet-4-6
|
|
54
|
+
from .claude4_6 import Claude4_6Client
|
|
55
55
|
|
|
56
|
-
return
|
|
57
|
-
elif "gpt-5.
|
|
58
|
-
from .
|
|
56
|
+
return Claude4_6Client(model=model, api_key=api_key, base_url=base_url)
|
|
57
|
+
elif "gpt-5.4" in client_type: # e.g., gpt-5.4
|
|
58
|
+
from .gpt5_4 import GPT5_4Client
|
|
59
59
|
|
|
60
|
-
return
|
|
61
|
-
elif "glm-
|
|
62
|
-
from .
|
|
60
|
+
return GPT5_4Client(model=model, api_key=api_key, base_url=base_url)
|
|
61
|
+
elif "glm-5" in client_type:
|
|
62
|
+
from .glm5 import GLM5Client
|
|
63
63
|
|
|
64
|
-
return
|
|
64
|
+
return GLM5Client(model=model, api_key=api_key, base_url=base_url)
|
|
65
|
+
elif "kimi-k2.5" in client_type:
|
|
66
|
+
from .kimi_k2_5 import KimiK2_5Client
|
|
67
|
+
|
|
68
|
+
return KimiK2_5Client(model=model, api_key=api_key, base_url=base_url)
|
|
65
69
|
elif "qwen3" in client_type:
|
|
66
70
|
from .qwen3 import Qwen3Client
|
|
67
71
|
|
|
@@ -69,7 +73,7 @@ class AutoLLMClient(LLMClient):
|
|
|
69
73
|
else:
|
|
70
74
|
raise ValueError(
|
|
71
75
|
f"{client_type} is not supported. "
|
|
72
|
-
"Supported client types: gemini-3, claude-4-
|
|
76
|
+
"Supported client types: gemini-3, claude-4-6, gpt-5.4, glm-5, kimi-k2.5, qwen3."
|
|
73
77
|
)
|
|
74
78
|
|
|
75
79
|
def transform_uni_config_to_model_config(self, config: UniConfig) -> Any:
|
|
@@ -84,6 +88,13 @@ class AutoLLMClient(LLMClient):
|
|
|
84
88
|
"""Delegate to underlying client's transform_model_output_to_uni_event."""
|
|
85
89
|
return self._client.transform_model_output_to_uni_event(model_output)
|
|
86
90
|
|
|
91
|
+
async def _streaming_response_internal(
|
|
92
|
+
self,
|
|
93
|
+
messages: list[UniMessage],
|
|
94
|
+
config: UniConfig,
|
|
95
|
+
) -> AsyncIterator[UniEvent]:
|
|
96
|
+
raise NotImplementedError("Please use streaming_response instead.")
|
|
97
|
+
|
|
87
98
|
async def streaming_response(
|
|
88
99
|
self,
|
|
89
100
|
messages: list[UniMessage],
|
|
@@ -89,16 +89,25 @@ class LLMClient(ABC):
|
|
|
89
89
|
# Merge content_items from all events
|
|
90
90
|
for item in event["content_items"]:
|
|
91
91
|
if item["type"] == "text":
|
|
92
|
-
if
|
|
92
|
+
if (
|
|
93
|
+
content_items
|
|
94
|
+
and content_items[-1]["type"] == "text"
|
|
95
|
+
and content_items[-1].get("signature") is None # no signature yet
|
|
96
|
+
and item.get("phase") is None # no new phase
|
|
97
|
+
):
|
|
93
98
|
content_items[-1]["text"] += item["text"]
|
|
94
|
-
if "signature" in item: #
|
|
99
|
+
if "signature" in item: # finish the current item if signature is not None
|
|
95
100
|
content_items[-1]["signature"] = item["signature"]
|
|
96
|
-
elif item["text"]: #
|
|
101
|
+
elif item["text"] or item.get("phase") is not None: # text or new phase starts an item
|
|
97
102
|
content_items.append(item.copy())
|
|
98
103
|
elif item["type"] == "thinking":
|
|
99
|
-
if
|
|
104
|
+
if (
|
|
105
|
+
content_items
|
|
106
|
+
and content_items[-1]["type"] == "thinking"
|
|
107
|
+
and content_items[-1].get("signature") is None # no signature yet
|
|
108
|
+
):
|
|
100
109
|
content_items[-1]["thinking"] += item["thinking"]
|
|
101
|
-
if "signature" in item: #
|
|
110
|
+
if "signature" in item: # finish the current item if signature is not None
|
|
102
111
|
content_items[-1]["signature"] = item["signature"]
|
|
103
112
|
elif item["thinking"] or item.get("signature"): # omit empty thinking items
|
|
104
113
|
content_items.append(item.copy())
|
|
@@ -119,6 +128,26 @@ class LLMClient(ABC):
|
|
|
119
128
|
}
|
|
120
129
|
|
|
121
130
|
@abstractmethod
|
|
131
|
+
async def _streaming_response_internal(
|
|
132
|
+
self,
|
|
133
|
+
messages: list[UniMessage],
|
|
134
|
+
config: UniConfig,
|
|
135
|
+
) -> AsyncIterator[UniEvent]:
|
|
136
|
+
"""
|
|
137
|
+
Internal method to handle streaming response.
|
|
138
|
+
|
|
139
|
+
This method should be implemented by each model client to handle
|
|
140
|
+
the actual streaming request and yield model-specific events.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
messages: List of universal message dictionaries
|
|
144
|
+
config: Universal configuration dict
|
|
145
|
+
|
|
146
|
+
Yields:
|
|
147
|
+
Model-specific events from the streaming response
|
|
148
|
+
"""
|
|
149
|
+
pass
|
|
150
|
+
|
|
122
151
|
async def streaming_response(
|
|
123
152
|
self,
|
|
124
153
|
messages: list[UniMessage],
|
|
@@ -138,7 +167,12 @@ class LLMClient(ABC):
|
|
|
138
167
|
Yields:
|
|
139
168
|
Universal events from the streaming response
|
|
140
169
|
"""
|
|
141
|
-
|
|
170
|
+
last_event: UniEvent | None = None
|
|
171
|
+
async for event in self._streaming_response_internal(messages, config):
|
|
172
|
+
last_event = event
|
|
173
|
+
yield event
|
|
174
|
+
|
|
175
|
+
self._validate_last_event(last_event)
|
|
142
176
|
|
|
143
177
|
async def streaming_response_stateful(
|
|
144
178
|
self,
|
|
@@ -159,18 +193,19 @@ class LLMClient(ABC):
|
|
|
159
193
|
Yields:
|
|
160
194
|
Universal events from the streaming response
|
|
161
195
|
"""
|
|
162
|
-
#
|
|
163
|
-
self._history
|
|
196
|
+
# Build a temporary messages list for inference without mutating history yet
|
|
197
|
+
temp_messages = self._history + [message]
|
|
164
198
|
|
|
165
199
|
# Collect all events for history
|
|
166
200
|
events = []
|
|
167
|
-
async for event in self.streaming_response(messages=
|
|
201
|
+
async for event in self.streaming_response(messages=temp_messages, config=config):
|
|
168
202
|
events.append(event)
|
|
169
203
|
yield event
|
|
170
204
|
|
|
171
|
-
#
|
|
205
|
+
# Only update history after successful inference
|
|
172
206
|
if events:
|
|
173
207
|
assistant_message = self.concat_uni_events_to_uni_message(events)
|
|
208
|
+
self._history.append(message)
|
|
174
209
|
self._history.append(assistant_message)
|
|
175
210
|
|
|
176
211
|
# Save history to file if trace_id is specified
|
|
@@ -180,6 +215,28 @@ class LLMClient(ABC):
|
|
|
180
215
|
tracer = Tracer()
|
|
181
216
|
tracer.save_history(self._model, self._history, config["trace_id"], config)
|
|
182
217
|
|
|
218
|
+
@staticmethod
|
|
219
|
+
def _validate_last_event(last_event: UniEvent | None) -> None:
|
|
220
|
+
"""Validate that the last event has usage_metadata and finish_reason.
|
|
221
|
+
|
|
222
|
+
This validation guards against servers that silently terminate streaming
|
|
223
|
+
output partway through without sending a proper final event.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
last_event: The last event yielded by streaming_response
|
|
227
|
+
|
|
228
|
+
Raises:
|
|
229
|
+
ValueError: If last_event is None or missing usage_metadata/finish_reason
|
|
230
|
+
"""
|
|
231
|
+
if last_event is None:
|
|
232
|
+
raise ValueError("Streaming response yielded no events")
|
|
233
|
+
|
|
234
|
+
if last_event["usage_metadata"] is None:
|
|
235
|
+
raise ValueError(f"Last event must carry usage_metadata, got: {last_event}")
|
|
236
|
+
|
|
237
|
+
if last_event["finish_reason"] is None:
|
|
238
|
+
raise ValueError(f"Last event must carry finish_reason, got: {last_event}")
|
|
239
|
+
|
|
183
240
|
def clear_history(self) -> None:
|
|
184
241
|
"""Clear the message history."""
|
|
185
242
|
self._history.clear()
|
{agenthub_python-0.2.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/__init__.py
RENAMED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from .client import
|
|
15
|
+
from .client import Claude4_6Client
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
__all__ = ["
|
|
18
|
+
__all__ = ["Claude4_6Client"]
|
{agenthub_python-0.2.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/client.py
RENAMED
|
@@ -12,12 +12,16 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import base64
|
|
15
16
|
import json
|
|
17
|
+
import mimetypes
|
|
16
18
|
import os
|
|
19
|
+
import re
|
|
17
20
|
from typing import Any, AsyncIterator
|
|
18
21
|
|
|
19
|
-
|
|
20
|
-
from anthropic
|
|
22
|
+
import httpx
|
|
23
|
+
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
|
|
24
|
+
from anthropic.types.beta import BetaMessageParam, BetaRawMessageStreamEvent
|
|
21
25
|
|
|
22
26
|
from ..base_client import LLMClient
|
|
23
27
|
from ..types import (
|
|
@@ -34,25 +38,78 @@ from ..types import (
|
|
|
34
38
|
)
|
|
35
39
|
|
|
36
40
|
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
REDACTED_THINKING = "_REDACTED_THINKING"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Claude4_6Client(LLMClient):
|
|
45
|
+
"""Claude 4.6-specific LLM client implementation."""
|
|
39
46
|
|
|
40
47
|
def __init__(self, model: str, api_key: str | None = None, base_url: str | None = None):
|
|
41
|
-
"""Initialize Claude 4.
|
|
48
|
+
"""Initialize Claude 4.6 client with model and API key."""
|
|
42
49
|
self._model = model
|
|
43
50
|
api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
|
|
44
51
|
base_url = base_url or os.getenv("ANTHROPIC_BASE_URL")
|
|
45
|
-
|
|
52
|
+
if base_url and base_url.startswith("bedrock://"): # example: bedrock://us-east-1
|
|
53
|
+
region = base_url.replace("bedrock://", "")
|
|
54
|
+
access_key, secret_key = api_key.split(",")
|
|
55
|
+
self._client = AsyncAnthropicBedrock(
|
|
56
|
+
aws_secret_key=secret_key, aws_access_key=access_key, aws_region=region
|
|
57
|
+
)
|
|
58
|
+
self._use_bedrock = True
|
|
59
|
+
else:
|
|
60
|
+
self._client = AsyncAnthropic(api_key=api_key, base_url=base_url)
|
|
61
|
+
self._use_bedrock = False
|
|
62
|
+
|
|
46
63
|
self._history: list[UniMessage] = []
|
|
47
64
|
|
|
48
|
-
def
|
|
49
|
-
"""Convert
|
|
65
|
+
async def _convert_image_url_to_source(self, url: str) -> dict[str, Any]:
|
|
66
|
+
"""Convert image URL to image source.
|
|
67
|
+
|
|
68
|
+
Bedrock does not support image url sources, so we need to fetch the image bytes and encode them.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
url: Image URL to convert
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Image source
|
|
75
|
+
"""
|
|
76
|
+
if url.startswith("data:"):
|
|
77
|
+
match = re.match(r"data:([^;]+);base64,(.+)", url)
|
|
78
|
+
if match:
|
|
79
|
+
media_type = match.group(1)
|
|
80
|
+
base64_data = match.group(2)
|
|
81
|
+
source = {
|
|
82
|
+
"type": "image",
|
|
83
|
+
"source": {"type": "base64", "media_type": media_type, "data": base64_data},
|
|
84
|
+
}
|
|
85
|
+
else:
|
|
86
|
+
raise ValueError(f"Invalid base64 image: {url}")
|
|
87
|
+
elif self._use_bedrock:
|
|
88
|
+
async with httpx.AsyncClient() as client:
|
|
89
|
+
response = await client.get(url)
|
|
90
|
+
response.raise_for_status()
|
|
91
|
+
image_bytes = response.content
|
|
92
|
+
mime_type = mimetypes.guess_type(url)[0] or "image/jpeg"
|
|
93
|
+
source = {
|
|
94
|
+
"type": "image",
|
|
95
|
+
"source": {
|
|
96
|
+
"type": "base64",
|
|
97
|
+
"media_type": mime_type,
|
|
98
|
+
"data": base64.b64encode(image_bytes).decode("utf-8"),
|
|
99
|
+
},
|
|
100
|
+
}
|
|
101
|
+
else:
|
|
102
|
+
source = {"type": "image", "source": {"type": "url", "url": url}}
|
|
103
|
+
|
|
104
|
+
return source
|
|
50
105
|
|
|
106
|
+
def _convert_thinking_level_to_thinking_config(self, thinking_level: ThinkingLevel) -> dict[str, Any]:
|
|
107
|
+
"""Convert ThinkingLevel enum to Claude's adaptive thinking config."""
|
|
51
108
|
mapping = {
|
|
52
|
-
ThinkingLevel.NONE: {
|
|
53
|
-
ThinkingLevel.LOW: {"type": "
|
|
54
|
-
ThinkingLevel.MEDIUM: {"type": "
|
|
55
|
-
ThinkingLevel.HIGH: {"type": "
|
|
109
|
+
ThinkingLevel.NONE: {}, # omit thinking config
|
|
110
|
+
ThinkingLevel.LOW: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "low"}},
|
|
111
|
+
ThinkingLevel.MEDIUM: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "medium"}},
|
|
112
|
+
ThinkingLevel.HIGH: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "high"}},
|
|
56
113
|
}
|
|
57
114
|
return mapping.get(thinking_level)
|
|
58
115
|
|
|
@@ -80,7 +137,7 @@ class Claude4_5Client(LLMClient):
|
|
|
80
137
|
Returns:
|
|
81
138
|
Claude configuration dictionary
|
|
82
139
|
"""
|
|
83
|
-
claude_config = {"model": self._model}
|
|
140
|
+
claude_config = {"model": self._model, "stream": True}
|
|
84
141
|
|
|
85
142
|
if config.get("system_prompt") is not None:
|
|
86
143
|
claude_config["system"] = config["system_prompt"]
|
|
@@ -96,7 +153,7 @@ class Claude4_5Client(LLMClient):
|
|
|
96
153
|
# NOTE: Claude always provides thinking summary
|
|
97
154
|
if config.get("thinking_level") is not None:
|
|
98
155
|
claude_config["temperature"] = 1.0 # `temperature` may only be set to 1 when thinking is enabled
|
|
99
|
-
claude_config
|
|
156
|
+
claude_config.update(self._convert_thinking_level_to_thinking_config(config["thinking_level"]))
|
|
100
157
|
|
|
101
158
|
# Convert tools to Claude's tool schema
|
|
102
159
|
if config.get("tools") is not None:
|
|
@@ -116,17 +173,17 @@ class Claude4_5Client(LLMClient):
|
|
|
116
173
|
|
|
117
174
|
return claude_config
|
|
118
175
|
|
|
119
|
-
def transform_uni_message_to_model_input(self, messages: list[UniMessage]) -> list[
|
|
176
|
+
async def transform_uni_message_to_model_input(self, messages: list[UniMessage]) -> list[BetaMessageParam]:
|
|
120
177
|
"""
|
|
121
|
-
Transform universal message format to Claude's
|
|
178
|
+
Transform universal message format to Claude's BetaMessageParam format.
|
|
122
179
|
|
|
123
180
|
Args:
|
|
124
181
|
messages: List of universal message dictionaries
|
|
125
182
|
|
|
126
183
|
Returns:
|
|
127
|
-
List of Claude
|
|
184
|
+
List of Claude BetaMessageParam objects
|
|
128
185
|
"""
|
|
129
|
-
claude_messages: list[
|
|
186
|
+
claude_messages: list[BetaMessageParam] = []
|
|
130
187
|
|
|
131
188
|
for msg in messages:
|
|
132
189
|
content_blocks = []
|
|
@@ -134,12 +191,14 @@ class Claude4_5Client(LLMClient):
|
|
|
134
191
|
if item["type"] == "text":
|
|
135
192
|
content_blocks.append({"type": "text", "text": item["text"]})
|
|
136
193
|
elif item["type"] == "image_url":
|
|
137
|
-
|
|
138
|
-
content_blocks.append({"type": "image", "source": {"type": "url", "url": item["image_url"]}})
|
|
194
|
+
content_blocks.append(await self._convert_image_url_to_source(item["image_url"]))
|
|
139
195
|
elif item["type"] == "thinking":
|
|
140
|
-
|
|
141
|
-
{"type": "
|
|
142
|
-
|
|
196
|
+
if item["thinking"] == REDACTED_THINKING:
|
|
197
|
+
content_blocks.append({"type": "redacted_thinking", "data": item["signature"]})
|
|
198
|
+
else:
|
|
199
|
+
content_blocks.append(
|
|
200
|
+
{"type": "thinking", "thinking": item["thinking"], "signature": item["signature"]}
|
|
201
|
+
)
|
|
143
202
|
elif item["type"] == "tool_call":
|
|
144
203
|
content_blocks.append(
|
|
145
204
|
{
|
|
@@ -153,8 +212,13 @@ class Claude4_5Client(LLMClient):
|
|
|
153
212
|
if "tool_call_id" not in item:
|
|
154
213
|
raise ValueError("tool_call_id is required for tool result.")
|
|
155
214
|
|
|
215
|
+
tool_result = [{"type": "text", "text": item["text"]}]
|
|
216
|
+
if "images" in item:
|
|
217
|
+
for image_url in item["images"]:
|
|
218
|
+
tool_result.append(await self._convert_image_url_to_source(image_url))
|
|
219
|
+
|
|
156
220
|
content_blocks.append(
|
|
157
|
-
{"type": "tool_result", "content":
|
|
221
|
+
{"type": "tool_result", "content": tool_result, "tool_use_id": item["tool_call_id"]}
|
|
158
222
|
)
|
|
159
223
|
else:
|
|
160
224
|
raise ValueError(f"Unknown item: {item}")
|
|
@@ -163,7 +227,7 @@ class Claude4_5Client(LLMClient):
|
|
|
163
227
|
|
|
164
228
|
return claude_messages
|
|
165
229
|
|
|
166
|
-
def transform_model_output_to_uni_event(self, model_output:
|
|
230
|
+
def transform_model_output_to_uni_event(self, model_output: BetaRawMessageStreamEvent) -> UniEvent:
|
|
167
231
|
"""
|
|
168
232
|
Transform Claude model output to universal event format.
|
|
169
233
|
|
|
@@ -188,6 +252,8 @@ class Claude4_5Client(LLMClient):
|
|
|
188
252
|
content_items.append(
|
|
189
253
|
{"type": "partial_tool_call", "name": block.name, "arguments": "", "tool_call_id": block.id}
|
|
190
254
|
)
|
|
255
|
+
elif block.type == "redacted_thinking":
|
|
256
|
+
content_items.append({"type": "thinking", "thinking": REDACTED_THINKING, "signature": block.data})
|
|
191
257
|
|
|
192
258
|
elif claude_event_type == "content_block_delta":
|
|
193
259
|
event_type = "delta"
|
|
@@ -210,11 +276,12 @@ class Claude4_5Client(LLMClient):
|
|
|
210
276
|
event_type = "start"
|
|
211
277
|
message = model_output.message
|
|
212
278
|
if getattr(message, "usage", None):
|
|
279
|
+
cache_creation_tokens = message.usage.cache_creation_input_tokens or 0
|
|
213
280
|
usage_metadata = {
|
|
214
|
-
"
|
|
281
|
+
"cached_tokens": message.usage.cache_read_input_tokens,
|
|
282
|
+
"prompt_tokens": message.usage.input_tokens + cache_creation_tokens,
|
|
215
283
|
"thoughts_tokens": None,
|
|
216
284
|
"response_tokens": None,
|
|
217
|
-
"cached_tokens": message.usage.cache_read_input_tokens,
|
|
218
285
|
}
|
|
219
286
|
|
|
220
287
|
elif claude_event_type == "message_delta":
|
|
@@ -225,16 +292,17 @@ class Claude4_5Client(LLMClient):
|
|
|
225
292
|
"end_turn": "stop",
|
|
226
293
|
"max_tokens": "length",
|
|
227
294
|
"stop_sequence": "stop",
|
|
228
|
-
"tool_use": "
|
|
295
|
+
"tool_use": "tool_call",
|
|
229
296
|
}
|
|
230
297
|
finish_reason = stop_reason_mapping.get(delta.stop_reason, "unknown")
|
|
231
298
|
|
|
232
299
|
if getattr(model_output, "usage", None):
|
|
300
|
+
# In message_delta, we only update response_tokens
|
|
233
301
|
usage_metadata = {
|
|
302
|
+
"cached_tokens": None,
|
|
234
303
|
"prompt_tokens": None,
|
|
235
304
|
"thoughts_tokens": None,
|
|
236
305
|
"response_tokens": model_output.usage.output_tokens,
|
|
237
|
-
"cached_tokens": None,
|
|
238
306
|
}
|
|
239
307
|
|
|
240
308
|
elif claude_event_type == "message_stop":
|
|
@@ -254,7 +322,7 @@ class Claude4_5Client(LLMClient):
|
|
|
254
322
|
"finish_reason": finish_reason,
|
|
255
323
|
}
|
|
256
324
|
|
|
257
|
-
async def
|
|
325
|
+
async def _streaming_response_internal(
|
|
258
326
|
self,
|
|
259
327
|
messages: list[UniMessage],
|
|
260
328
|
config: UniConfig,
|
|
@@ -264,7 +332,7 @@ class Claude4_5Client(LLMClient):
|
|
|
264
332
|
claude_config = self.transform_uni_config_to_model_config(config)
|
|
265
333
|
|
|
266
334
|
# Use unified message conversion
|
|
267
|
-
claude_messages = self.transform_uni_message_to_model_input(messages)
|
|
335
|
+
claude_messages = await self.transform_uni_message_to_model_input(messages)
|
|
268
336
|
|
|
269
337
|
# Add cache_control to last user message's last item if enabled
|
|
270
338
|
prompt_caching = config.get("prompt_caching", PromptCaching.ENABLE)
|
|
@@ -282,66 +350,66 @@ class Claude4_5Client(LLMClient):
|
|
|
282
350
|
# Stream generate
|
|
283
351
|
partial_tool_call = {}
|
|
284
352
|
partial_usage = {}
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
}
|
|
297
|
-
yield event
|
|
298
|
-
|
|
299
|
-
if event["usage_metadata"] is not None:
|
|
300
|
-
# initialize partial_usage
|
|
301
|
-
partial_usage = {
|
|
302
|
-
"prompt_tokens": event["usage_metadata"]["prompt_tokens"],
|
|
303
|
-
"cached_tokens": event["usage_metadata"]["cached_tokens"],
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
elif event["event_type"] == "delta":
|
|
307
|
-
for item in event["content_items"]:
|
|
308
|
-
if item["type"] == "partial_tool_call":
|
|
309
|
-
# update partial_tool_call
|
|
310
|
-
partial_tool_call["arguments"] += item["arguments"]
|
|
311
|
-
|
|
312
|
-
yield event
|
|
313
|
-
|
|
314
|
-
elif event["event_type"] == "stop":
|
|
315
|
-
if "name" in partial_tool_call and "arguments" in partial_tool_call:
|
|
316
|
-
# finish partial_tool_call
|
|
317
|
-
yield {
|
|
318
|
-
"role": "assistant",
|
|
319
|
-
"event_type": "delta",
|
|
320
|
-
"content_items": [
|
|
321
|
-
{
|
|
322
|
-
"type": "tool_call",
|
|
323
|
-
"name": partial_tool_call["name"],
|
|
324
|
-
"arguments": json.loads(partial_tool_call["arguments"]),
|
|
325
|
-
"tool_call_id": partial_tool_call["tool_call_id"],
|
|
326
|
-
}
|
|
327
|
-
],
|
|
328
|
-
"usage_metadata": None,
|
|
329
|
-
"finish_reason": None,
|
|
330
|
-
}
|
|
331
|
-
partial_tool_call = {}
|
|
332
|
-
|
|
333
|
-
if "prompt_tokens" in partial_usage and event["usage_metadata"] is not None:
|
|
334
|
-
# finish partial_usage
|
|
335
|
-
yield {
|
|
336
|
-
"role": "assistant",
|
|
337
|
-
"event_type": "stop",
|
|
338
|
-
"content_items": [],
|
|
339
|
-
"usage_metadata": {
|
|
340
|
-
"prompt_tokens": partial_usage["prompt_tokens"],
|
|
341
|
-
"thoughts_tokens": None,
|
|
342
|
-
"response_tokens": event["usage_metadata"]["response_tokens"],
|
|
343
|
-
"cached_tokens": partial_usage["cached_tokens"],
|
|
344
|
-
},
|
|
345
|
-
"finish_reason": event["finish_reason"],
|
|
353
|
+
stream = await self._client.beta.messages.create(**claude_config, messages=claude_messages)
|
|
354
|
+
async for event in stream:
|
|
355
|
+
event = self.transform_model_output_to_uni_event(event)
|
|
356
|
+
if event["event_type"] == "start":
|
|
357
|
+
for item in event["content_items"]:
|
|
358
|
+
if item["type"] == "partial_tool_call":
|
|
359
|
+
# initialize partial_tool_call
|
|
360
|
+
partial_tool_call = {
|
|
361
|
+
"name": item["name"],
|
|
362
|
+
"arguments": "",
|
|
363
|
+
"tool_call_id": item["tool_call_id"],
|
|
346
364
|
}
|
|
347
|
-
|
|
365
|
+
yield event
|
|
366
|
+
|
|
367
|
+
if event["usage_metadata"] is not None:
|
|
368
|
+
# initialize partial_usage
|
|
369
|
+
partial_usage = {
|
|
370
|
+
"prompt_tokens": event["usage_metadata"]["prompt_tokens"],
|
|
371
|
+
"cached_tokens": event["usage_metadata"]["cached_tokens"],
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
elif event["event_type"] == "delta":
|
|
375
|
+
for item in event["content_items"]:
|
|
376
|
+
if item["type"] == "partial_tool_call":
|
|
377
|
+
# update partial_tool_call
|
|
378
|
+
partial_tool_call["arguments"] += item["arguments"]
|
|
379
|
+
|
|
380
|
+
yield event
|
|
381
|
+
|
|
382
|
+
elif event["event_type"] == "stop":
|
|
383
|
+
if "name" in partial_tool_call and "arguments" in partial_tool_call:
|
|
384
|
+
# finish partial_tool_call
|
|
385
|
+
yield {
|
|
386
|
+
"role": "assistant",
|
|
387
|
+
"event_type": "delta",
|
|
388
|
+
"content_items": [
|
|
389
|
+
{
|
|
390
|
+
"type": "tool_call",
|
|
391
|
+
"name": partial_tool_call["name"],
|
|
392
|
+
"arguments": json.loads(partial_tool_call["arguments"]),
|
|
393
|
+
"tool_call_id": partial_tool_call["tool_call_id"],
|
|
394
|
+
}
|
|
395
|
+
],
|
|
396
|
+
"usage_metadata": None,
|
|
397
|
+
"finish_reason": None,
|
|
398
|
+
}
|
|
399
|
+
partial_tool_call = {}
|
|
400
|
+
|
|
401
|
+
if "prompt_tokens" in partial_usage and event["usage_metadata"] is not None:
|
|
402
|
+
# finish partial_usage
|
|
403
|
+
yield {
|
|
404
|
+
"role": "assistant",
|
|
405
|
+
"event_type": "stop",
|
|
406
|
+
"content_items": [],
|
|
407
|
+
"usage_metadata": {
|
|
408
|
+
"prompt_tokens": partial_usage["prompt_tokens"],
|
|
409
|
+
"thoughts_tokens": None,
|
|
410
|
+
"response_tokens": event["usage_metadata"]["response_tokens"],
|
|
411
|
+
"cached_tokens": partial_usage["cached_tokens"],
|
|
412
|
+
},
|
|
413
|
+
"finish_reason": event["finish_reason"],
|
|
414
|
+
}
|
|
415
|
+
partial_usage = {}
|