agenthub-python 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agenthub_python-0.3.0/PKG-INFO +10 -0
- {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/__init__.py +2 -3
- {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/auto_client.py +43 -11
- {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/base_client.py +74 -13
- {agenthub_python-0.1.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/__init__.py +2 -2
- agenthub_python-0.3.0/agenthub/claude4_6/client.py +415 -0
- {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/gemini3/client.py +130 -52
- agenthub_python-0.3.0/agenthub/glm5/__init__.py +18 -0
- agenthub_python-0.3.0/agenthub/glm5/client.py +354 -0
- agenthub_python-0.3.0/agenthub/gpt5_4/__init__.py +18 -0
- agenthub_python-0.3.0/agenthub/gpt5_4/client.py +354 -0
- agenthub_python-0.3.0/agenthub/integration/__init__.py +14 -0
- agenthub_python-0.3.0/agenthub/integration/playground.py +646 -0
- agenthub_python-0.3.0/agenthub/integration/tracer.py +528 -0
- agenthub_python-0.3.0/agenthub/kimi_k2_5/__init__.py +18 -0
- agenthub_python-0.3.0/agenthub/kimi_k2_5/client.py +388 -0
- agenthub_python-0.3.0/agenthub/qwen3/__init__.py +18 -0
- agenthub_python-0.3.0/agenthub/qwen3/client.py +376 -0
- {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/types.py +23 -21
- agenthub_python-0.3.0/agenthub/utils.py +35 -0
- {agenthub_python-0.1.0 → agenthub_python-0.3.0}/pyproject.toml +4 -4
- agenthub_python-0.1.0/PKG-INFO +0 -9
- agenthub_python-0.1.0/agenthub/claude4_5/client.py +0 -315
- agenthub_python-0.1.0/agenthub/tracer.py +0 -722
- {agenthub_python-0.1.0 → agenthub_python-0.3.0}/agenthub/gemini3/__init__.py +0 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: agenthub-python
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: AgentHub is the LLM API Hub for the Agent era, built for high-precision autonomous agents.
|
|
5
|
+
Requires-Dist: google-genai>=1.5.0
|
|
6
|
+
Requires-Dist: anthropic[bedrock]>=0.40.0
|
|
7
|
+
Requires-Dist: flask>=3.0.0
|
|
8
|
+
Requires-Dist: openai>=1.0.0
|
|
9
|
+
Requires-Dist: httpx>=0.27.0
|
|
10
|
+
Requires-Python: >=3.11
|
|
@@ -13,8 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from .auto_client import AutoLLMClient
|
|
16
|
-
from .
|
|
17
|
-
from .types import ThinkingLevel
|
|
16
|
+
from .types import PromptCaching, ThinkingLevel
|
|
18
17
|
|
|
19
18
|
|
|
20
|
-
__all__ = ["AutoLLMClient", "
|
|
19
|
+
__all__ = ["AutoLLMClient", "PromptCaching", "ThinkingLevel"]
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import os
|
|
15
16
|
from typing import Any, AsyncIterator
|
|
16
17
|
|
|
17
18
|
from .base_client import LLMClient
|
|
@@ -26,30 +27,54 @@ class AutoLLMClient(LLMClient):
|
|
|
26
27
|
conversation history for that specific model.
|
|
27
28
|
"""
|
|
28
29
|
|
|
29
|
-
def __init__(
|
|
30
|
+
def __init__(
|
|
31
|
+
self, model: str, api_key: str | None = None, base_url: str | None = None, client_type: str | None = None
|
|
32
|
+
):
|
|
30
33
|
"""
|
|
31
34
|
Initialize AutoLLMClient with a specific model.
|
|
32
35
|
|
|
33
36
|
Args:
|
|
34
37
|
model: Model identifier (determines which client to use)
|
|
35
38
|
api_key: Optional API key
|
|
39
|
+
base_url: Optional base URL for API requests
|
|
40
|
+
client_type: Optional client type override
|
|
36
41
|
"""
|
|
37
|
-
self._client = self._create_client_for_model(model, api_key)
|
|
42
|
+
self._client = self._create_client_for_model(model, api_key, base_url, client_type)
|
|
38
43
|
|
|
39
|
-
def _create_client_for_model(
|
|
44
|
+
def _create_client_for_model(
|
|
45
|
+
self, model: str, api_key: str | None = None, base_url: str | None = None, client_type: str | None = None
|
|
46
|
+
) -> LLMClient:
|
|
40
47
|
"""Create the appropriate client for the given model."""
|
|
41
|
-
|
|
48
|
+
client_type = client_type or os.getenv("CLIENT_TYPE", model.lower())
|
|
49
|
+
if "gemini-3-" in client_type or "gemini-3.1-" in client_type: # e.g., gemini-3-flash-preview
|
|
42
50
|
from .gemini3 import Gemini3Client
|
|
43
51
|
|
|
44
|
-
return Gemini3Client(model=model, api_key=api_key)
|
|
45
|
-
elif "claude" in
|
|
46
|
-
from .
|
|
52
|
+
return Gemini3Client(model=model, api_key=api_key, base_url=base_url)
|
|
53
|
+
elif "claude" in client_type and "4-6" in client_type: # e.g., claude-sonnet-4-6
|
|
54
|
+
from .claude4_6 import Claude4_6Client
|
|
47
55
|
|
|
48
|
-
return
|
|
49
|
-
elif "gpt-5.
|
|
50
|
-
|
|
56
|
+
return Claude4_6Client(model=model, api_key=api_key, base_url=base_url)
|
|
57
|
+
elif "gpt-5.4" in client_type: # e.g., gpt-5.4
|
|
58
|
+
from .gpt5_4 import GPT5_4Client
|
|
59
|
+
|
|
60
|
+
return GPT5_4Client(model=model, api_key=api_key, base_url=base_url)
|
|
61
|
+
elif "glm-5" in client_type:
|
|
62
|
+
from .glm5 import GLM5Client
|
|
63
|
+
|
|
64
|
+
return GLM5Client(model=model, api_key=api_key, base_url=base_url)
|
|
65
|
+
elif "kimi-k2.5" in client_type:
|
|
66
|
+
from .kimi_k2_5 import KimiK2_5Client
|
|
67
|
+
|
|
68
|
+
return KimiK2_5Client(model=model, api_key=api_key, base_url=base_url)
|
|
69
|
+
elif "qwen3" in client_type:
|
|
70
|
+
from .qwen3 import Qwen3Client
|
|
71
|
+
|
|
72
|
+
return Qwen3Client(model=model, api_key=api_key, base_url=base_url)
|
|
51
73
|
else:
|
|
52
|
-
raise ValueError(
|
|
74
|
+
raise ValueError(
|
|
75
|
+
f"{client_type} is not supported. "
|
|
76
|
+
"Supported client types: gemini-3, claude-4-6, gpt-5.4, glm-5, kimi-k2.5, qwen3."
|
|
77
|
+
)
|
|
53
78
|
|
|
54
79
|
def transform_uni_config_to_model_config(self, config: UniConfig) -> Any:
|
|
55
80
|
"""Delegate to underlying client's transform_uni_config_to_model_config."""
|
|
@@ -63,6 +88,13 @@ class AutoLLMClient(LLMClient):
|
|
|
63
88
|
"""Delegate to underlying client's transform_model_output_to_uni_event."""
|
|
64
89
|
return self._client.transform_model_output_to_uni_event(model_output)
|
|
65
90
|
|
|
91
|
+
async def _streaming_response_internal(
|
|
92
|
+
self,
|
|
93
|
+
messages: list[UniMessage],
|
|
94
|
+
config: UniConfig,
|
|
95
|
+
) -> AsyncIterator[UniEvent]:
|
|
96
|
+
raise NotImplementedError("Please use streaming_response instead.")
|
|
97
|
+
|
|
66
98
|
async def streaming_response(
|
|
67
99
|
self,
|
|
68
100
|
messages: list[UniMessage],
|
|
@@ -26,6 +26,7 @@ class LLMClient(ABC):
|
|
|
26
26
|
the required abstract methods for complete SDK abstraction.
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
|
+
_model: str
|
|
29
30
|
_history: list[UniMessage] = []
|
|
30
31
|
|
|
31
32
|
@abstractmethod
|
|
@@ -88,19 +89,31 @@ class LLMClient(ABC):
|
|
|
88
89
|
# Merge content_items from all events
|
|
89
90
|
for item in event["content_items"]:
|
|
90
91
|
if item["type"] == "text":
|
|
91
|
-
if
|
|
92
|
+
if (
|
|
93
|
+
content_items
|
|
94
|
+
and content_items[-1]["type"] == "text"
|
|
95
|
+
and content_items[-1].get("signature") is None # no signature yet
|
|
96
|
+
and item.get("phase") is None # no new phase
|
|
97
|
+
):
|
|
92
98
|
content_items[-1]["text"] += item["text"]
|
|
93
|
-
if "signature" in item: #
|
|
99
|
+
if "signature" in item: # finish the current item if signature is not None
|
|
94
100
|
content_items[-1]["signature"] = item["signature"]
|
|
95
|
-
elif item["text"]: #
|
|
101
|
+
elif item["text"] or item.get("phase") is not None: # text or new phase starts an item
|
|
96
102
|
content_items.append(item.copy())
|
|
97
103
|
elif item["type"] == "thinking":
|
|
98
|
-
if
|
|
104
|
+
if (
|
|
105
|
+
content_items
|
|
106
|
+
and content_items[-1]["type"] == "thinking"
|
|
107
|
+
and content_items[-1].get("signature") is None # no signature yet
|
|
108
|
+
):
|
|
99
109
|
content_items[-1]["thinking"] += item["thinking"]
|
|
100
|
-
if "signature" in item: #
|
|
110
|
+
if "signature" in item: # finish the current item if signature is not None
|
|
101
111
|
content_items[-1]["signature"] = item["signature"]
|
|
102
|
-
elif item["thinking"]: # omit empty thinking items
|
|
112
|
+
elif item["thinking"] or item.get("signature"): # omit empty thinking items
|
|
103
113
|
content_items.append(item.copy())
|
|
114
|
+
elif item["type"] == "partial_tool_call":
|
|
115
|
+
# Skip partial_tool_call items - they should already be converted to tool_call
|
|
116
|
+
pass
|
|
104
117
|
else:
|
|
105
118
|
content_items.append(item.copy())
|
|
106
119
|
|
|
@@ -115,6 +128,26 @@ class LLMClient(ABC):
|
|
|
115
128
|
}
|
|
116
129
|
|
|
117
130
|
@abstractmethod
|
|
131
|
+
async def _streaming_response_internal(
|
|
132
|
+
self,
|
|
133
|
+
messages: list[UniMessage],
|
|
134
|
+
config: UniConfig,
|
|
135
|
+
) -> AsyncIterator[UniEvent]:
|
|
136
|
+
"""
|
|
137
|
+
Internal method to handle streaming response.
|
|
138
|
+
|
|
139
|
+
This method should be implemented by each model client to handle
|
|
140
|
+
the actual streaming request and yield model-specific events.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
messages: List of universal message dictionaries
|
|
144
|
+
config: Universal configuration dict
|
|
145
|
+
|
|
146
|
+
Yields:
|
|
147
|
+
Model-specific events from the streaming response
|
|
148
|
+
"""
|
|
149
|
+
pass
|
|
150
|
+
|
|
118
151
|
async def streaming_response(
|
|
119
152
|
self,
|
|
120
153
|
messages: list[UniMessage],
|
|
@@ -134,7 +167,12 @@ class LLMClient(ABC):
|
|
|
134
167
|
Yields:
|
|
135
168
|
Universal events from the streaming response
|
|
136
169
|
"""
|
|
137
|
-
|
|
170
|
+
last_event: UniEvent | None = None
|
|
171
|
+
async for event in self._streaming_response_internal(messages, config):
|
|
172
|
+
last_event = event
|
|
173
|
+
yield event
|
|
174
|
+
|
|
175
|
+
self._validate_last_event(last_event)
|
|
138
176
|
|
|
139
177
|
async def streaming_response_stateful(
|
|
140
178
|
self,
|
|
@@ -155,26 +193,49 @@ class LLMClient(ABC):
|
|
|
155
193
|
Yields:
|
|
156
194
|
Universal events from the streaming response
|
|
157
195
|
"""
|
|
158
|
-
#
|
|
159
|
-
self._history
|
|
196
|
+
# Build a temporary messages list for inference without mutating history yet
|
|
197
|
+
temp_messages = self._history + [message]
|
|
160
198
|
|
|
161
199
|
# Collect all events for history
|
|
162
200
|
events = []
|
|
163
|
-
async for event in self.streaming_response(messages=
|
|
201
|
+
async for event in self.streaming_response(messages=temp_messages, config=config):
|
|
164
202
|
events.append(event)
|
|
165
203
|
yield event
|
|
166
204
|
|
|
167
|
-
#
|
|
205
|
+
# Only update history after successful inference
|
|
168
206
|
if events:
|
|
169
207
|
assistant_message = self.concat_uni_events_to_uni_message(events)
|
|
208
|
+
self._history.append(message)
|
|
170
209
|
self._history.append(assistant_message)
|
|
171
210
|
|
|
172
211
|
# Save history to file if trace_id is specified
|
|
173
212
|
if config.get("trace_id"):
|
|
174
|
-
from .tracer import Tracer
|
|
213
|
+
from .integration.tracer import Tracer
|
|
175
214
|
|
|
176
215
|
tracer = Tracer()
|
|
177
|
-
tracer.save_history(self._history, config["trace_id"], config)
|
|
216
|
+
tracer.save_history(self._model, self._history, config["trace_id"], config)
|
|
217
|
+
|
|
218
|
+
@staticmethod
|
|
219
|
+
def _validate_last_event(last_event: UniEvent | None) -> None:
|
|
220
|
+
"""Validate that the last event has usage_metadata and finish_reason.
|
|
221
|
+
|
|
222
|
+
This validation guards against servers that silently terminate streaming
|
|
223
|
+
output partway through without sending a proper final event.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
last_event: The last event yielded by streaming_response
|
|
227
|
+
|
|
228
|
+
Raises:
|
|
229
|
+
ValueError: If last_event is None or missing usage_metadata/finish_reason
|
|
230
|
+
"""
|
|
231
|
+
if last_event is None:
|
|
232
|
+
raise ValueError("Streaming response yielded no events")
|
|
233
|
+
|
|
234
|
+
if last_event["usage_metadata"] is None:
|
|
235
|
+
raise ValueError(f"Last event must carry usage_metadata, got: {last_event}")
|
|
236
|
+
|
|
237
|
+
if last_event["finish_reason"] is None:
|
|
238
|
+
raise ValueError(f"Last event must carry finish_reason, got: {last_event}")
|
|
178
239
|
|
|
179
240
|
def clear_history(self) -> None:
|
|
180
241
|
"""Clear the message history."""
|
{agenthub_python-0.1.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/__init__.py
RENAMED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from .client import
|
|
15
|
+
from .client import Claude4_6Client
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
__all__ = ["
|
|
18
|
+
__all__ = ["Claude4_6Client"]
|
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
# Copyright 2025 Prism Shadow. and/or its affiliates
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import base64
|
|
16
|
+
import json
|
|
17
|
+
import mimetypes
|
|
18
|
+
import os
|
|
19
|
+
import re
|
|
20
|
+
from typing import Any, AsyncIterator
|
|
21
|
+
|
|
22
|
+
import httpx
|
|
23
|
+
from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
|
|
24
|
+
from anthropic.types.beta import BetaMessageParam, BetaRawMessageStreamEvent
|
|
25
|
+
|
|
26
|
+
from ..base_client import LLMClient
|
|
27
|
+
from ..types import (
|
|
28
|
+
EventType,
|
|
29
|
+
FinishReason,
|
|
30
|
+
PartialContentItem,
|
|
31
|
+
PromptCaching,
|
|
32
|
+
ThinkingLevel,
|
|
33
|
+
ToolChoice,
|
|
34
|
+
UniConfig,
|
|
35
|
+
UniEvent,
|
|
36
|
+
UniMessage,
|
|
37
|
+
UsageMetadata,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
REDACTED_THINKING = "_REDACTED_THINKING"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Claude4_6Client(LLMClient):
|
|
45
|
+
"""Claude 4.6-specific LLM client implementation."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, model: str, api_key: str | None = None, base_url: str | None = None):
|
|
48
|
+
"""Initialize Claude 4.6 client with model and API key."""
|
|
49
|
+
self._model = model
|
|
50
|
+
api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
|
|
51
|
+
base_url = base_url or os.getenv("ANTHROPIC_BASE_URL")
|
|
52
|
+
if base_url and base_url.startswith("bedrock://"): # example: bedrock://us-east-1
|
|
53
|
+
region = base_url.replace("bedrock://", "")
|
|
54
|
+
access_key, secret_key = api_key.split(",")
|
|
55
|
+
self._client = AsyncAnthropicBedrock(
|
|
56
|
+
aws_secret_key=secret_key, aws_access_key=access_key, aws_region=region
|
|
57
|
+
)
|
|
58
|
+
self._use_bedrock = True
|
|
59
|
+
else:
|
|
60
|
+
self._client = AsyncAnthropic(api_key=api_key, base_url=base_url)
|
|
61
|
+
self._use_bedrock = False
|
|
62
|
+
|
|
63
|
+
self._history: list[UniMessage] = []
|
|
64
|
+
|
|
65
|
+
async def _convert_image_url_to_source(self, url: str) -> dict[str, Any]:
|
|
66
|
+
"""Convert image URL to image source.
|
|
67
|
+
|
|
68
|
+
Bedrock does not support image url sources, so we need to fetch the image bytes and encode them.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
url: Image URL to convert
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Image source
|
|
75
|
+
"""
|
|
76
|
+
if url.startswith("data:"):
|
|
77
|
+
match = re.match(r"data:([^;]+);base64,(.+)", url)
|
|
78
|
+
if match:
|
|
79
|
+
media_type = match.group(1)
|
|
80
|
+
base64_data = match.group(2)
|
|
81
|
+
source = {
|
|
82
|
+
"type": "image",
|
|
83
|
+
"source": {"type": "base64", "media_type": media_type, "data": base64_data},
|
|
84
|
+
}
|
|
85
|
+
else:
|
|
86
|
+
raise ValueError(f"Invalid base64 image: {url}")
|
|
87
|
+
elif self._use_bedrock:
|
|
88
|
+
async with httpx.AsyncClient() as client:
|
|
89
|
+
response = await client.get(url)
|
|
90
|
+
response.raise_for_status()
|
|
91
|
+
image_bytes = response.content
|
|
92
|
+
mime_type = mimetypes.guess_type(url)[0] or "image/jpeg"
|
|
93
|
+
source = {
|
|
94
|
+
"type": "image",
|
|
95
|
+
"source": {
|
|
96
|
+
"type": "base64",
|
|
97
|
+
"media_type": mime_type,
|
|
98
|
+
"data": base64.b64encode(image_bytes).decode("utf-8"),
|
|
99
|
+
},
|
|
100
|
+
}
|
|
101
|
+
else:
|
|
102
|
+
source = {"type": "image", "source": {"type": "url", "url": url}}
|
|
103
|
+
|
|
104
|
+
return source
|
|
105
|
+
|
|
106
|
+
def _convert_thinking_level_to_thinking_config(self, thinking_level: ThinkingLevel) -> dict[str, Any]:
|
|
107
|
+
"""Convert ThinkingLevel enum to Claude's adaptive thinking config."""
|
|
108
|
+
mapping = {
|
|
109
|
+
ThinkingLevel.NONE: {}, # omit thinking config
|
|
110
|
+
ThinkingLevel.LOW: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "low"}},
|
|
111
|
+
ThinkingLevel.MEDIUM: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "medium"}},
|
|
112
|
+
ThinkingLevel.HIGH: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "high"}},
|
|
113
|
+
}
|
|
114
|
+
return mapping.get(thinking_level)
|
|
115
|
+
|
|
116
|
+
def _convert_tool_choice(self, tool_choice: ToolChoice) -> dict[str, str]:
|
|
117
|
+
"""Convert ToolChoice to Claude's tool_choice format."""
|
|
118
|
+
if isinstance(tool_choice, list):
|
|
119
|
+
if len(tool_choice) > 1:
|
|
120
|
+
raise ValueError("Claude supports only one tool choice.")
|
|
121
|
+
|
|
122
|
+
return {"type": "any", "name": tool_choice[0]}
|
|
123
|
+
elif tool_choice == "none":
|
|
124
|
+
return {"type": "none"}
|
|
125
|
+
elif tool_choice == "auto":
|
|
126
|
+
return {"type": "auto"}
|
|
127
|
+
elif tool_choice == "required":
|
|
128
|
+
return {"type": "any"}
|
|
129
|
+
|
|
130
|
+
def transform_uni_config_to_model_config(self, config: UniConfig) -> dict[str, Any]:
|
|
131
|
+
"""
|
|
132
|
+
Transform universal configuration to Claude-specific configuration.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
config: Universal configuration dict
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Claude configuration dictionary
|
|
139
|
+
"""
|
|
140
|
+
claude_config = {"model": self._model, "stream": True}
|
|
141
|
+
|
|
142
|
+
if config.get("system_prompt") is not None:
|
|
143
|
+
claude_config["system"] = config["system_prompt"]
|
|
144
|
+
|
|
145
|
+
if config.get("max_tokens") is not None:
|
|
146
|
+
claude_config["max_tokens"] = config["max_tokens"]
|
|
147
|
+
else:
|
|
148
|
+
claude_config["max_tokens"] = 32768 # Claude requires max_tokens to be specified
|
|
149
|
+
|
|
150
|
+
if config.get("temperature") is not None:
|
|
151
|
+
claude_config["temperature"] = config["temperature"]
|
|
152
|
+
|
|
153
|
+
# NOTE: Claude always provides thinking summary
|
|
154
|
+
if config.get("thinking_level") is not None:
|
|
155
|
+
claude_config["temperature"] = 1.0 # `temperature` may only be set to 1 when thinking is enabled
|
|
156
|
+
claude_config.update(self._convert_thinking_level_to_thinking_config(config["thinking_level"]))
|
|
157
|
+
|
|
158
|
+
# Convert tools to Claude's tool schema
|
|
159
|
+
if config.get("tools") is not None:
|
|
160
|
+
claude_tools = []
|
|
161
|
+
for tool in config["tools"]:
|
|
162
|
+
claude_tool = {}
|
|
163
|
+
for key, value in tool.items():
|
|
164
|
+
claude_tool[key.replace("parameters", "input_schema")] = value
|
|
165
|
+
|
|
166
|
+
claude_tools.append(claude_tool)
|
|
167
|
+
|
|
168
|
+
claude_config["tools"] = claude_tools
|
|
169
|
+
|
|
170
|
+
# Convert tool_choice
|
|
171
|
+
if config.get("tool_choice") is not None:
|
|
172
|
+
claude_config["tool_choice"] = self._convert_tool_choice(config["tool_choice"])
|
|
173
|
+
|
|
174
|
+
return claude_config
|
|
175
|
+
|
|
176
|
+
async def transform_uni_message_to_model_input(self, messages: list[UniMessage]) -> list[BetaMessageParam]:
|
|
177
|
+
"""
|
|
178
|
+
Transform universal message format to Claude's BetaMessageParam format.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
messages: List of universal message dictionaries
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of Claude BetaMessageParam objects
|
|
185
|
+
"""
|
|
186
|
+
claude_messages: list[BetaMessageParam] = []
|
|
187
|
+
|
|
188
|
+
for msg in messages:
|
|
189
|
+
content_blocks = []
|
|
190
|
+
for item in msg["content_items"]:
|
|
191
|
+
if item["type"] == "text":
|
|
192
|
+
content_blocks.append({"type": "text", "text": item["text"]})
|
|
193
|
+
elif item["type"] == "image_url":
|
|
194
|
+
content_blocks.append(await self._convert_image_url_to_source(item["image_url"]))
|
|
195
|
+
elif item["type"] == "thinking":
|
|
196
|
+
if item["thinking"] == REDACTED_THINKING:
|
|
197
|
+
content_blocks.append({"type": "redacted_thinking", "data": item["signature"]})
|
|
198
|
+
else:
|
|
199
|
+
content_blocks.append(
|
|
200
|
+
{"type": "thinking", "thinking": item["thinking"], "signature": item["signature"]}
|
|
201
|
+
)
|
|
202
|
+
elif item["type"] == "tool_call":
|
|
203
|
+
content_blocks.append(
|
|
204
|
+
{
|
|
205
|
+
"type": "tool_use",
|
|
206
|
+
"id": item["tool_call_id"],
|
|
207
|
+
"name": item["name"],
|
|
208
|
+
"input": item["arguments"],
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
elif item["type"] == "tool_result":
|
|
212
|
+
if "tool_call_id" not in item:
|
|
213
|
+
raise ValueError("tool_call_id is required for tool result.")
|
|
214
|
+
|
|
215
|
+
tool_result = [{"type": "text", "text": item["text"]}]
|
|
216
|
+
if "images" in item:
|
|
217
|
+
for image_url in item["images"]:
|
|
218
|
+
tool_result.append(await self._convert_image_url_to_source(image_url))
|
|
219
|
+
|
|
220
|
+
content_blocks.append(
|
|
221
|
+
{"type": "tool_result", "content": tool_result, "tool_use_id": item["tool_call_id"]}
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
raise ValueError(f"Unknown item: {item}")
|
|
225
|
+
|
|
226
|
+
claude_messages.append({"role": msg["role"], "content": content_blocks})
|
|
227
|
+
|
|
228
|
+
return claude_messages
|
|
229
|
+
|
|
230
|
+
def transform_model_output_to_uni_event(self, model_output: BetaRawMessageStreamEvent) -> UniEvent:
|
|
231
|
+
"""
|
|
232
|
+
Transform Claude model output to universal event format.
|
|
233
|
+
|
|
234
|
+
NOTE: Claude always has only one content item per event.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
model_output: Claude streaming event
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Universal event dictionary
|
|
241
|
+
"""
|
|
242
|
+
event_type: EventType | None = None
|
|
243
|
+
content_items: list[PartialContentItem] = []
|
|
244
|
+
usage_metadata: UsageMetadata | None = None
|
|
245
|
+
finish_reason: FinishReason | None = None
|
|
246
|
+
|
|
247
|
+
claude_event_type = model_output.type
|
|
248
|
+
if claude_event_type == "content_block_start":
|
|
249
|
+
event_type = "start"
|
|
250
|
+
block = model_output.content_block
|
|
251
|
+
if block.type == "tool_use":
|
|
252
|
+
content_items.append(
|
|
253
|
+
{"type": "partial_tool_call", "name": block.name, "arguments": "", "tool_call_id": block.id}
|
|
254
|
+
)
|
|
255
|
+
elif block.type == "redacted_thinking":
|
|
256
|
+
content_items.append({"type": "thinking", "thinking": REDACTED_THINKING, "signature": block.data})
|
|
257
|
+
|
|
258
|
+
elif claude_event_type == "content_block_delta":
|
|
259
|
+
event_type = "delta"
|
|
260
|
+
delta = model_output.delta
|
|
261
|
+
if delta.type == "thinking_delta":
|
|
262
|
+
content_items.append({"type": "thinking", "thinking": delta.thinking})
|
|
263
|
+
elif delta.type == "text_delta":
|
|
264
|
+
content_items.append({"type": "text", "text": delta.text})
|
|
265
|
+
elif delta.type == "input_json_delta":
|
|
266
|
+
content_items.append(
|
|
267
|
+
{"type": "partial_tool_call", "name": "", "arguments": delta.partial_json, "tool_call_id": ""}
|
|
268
|
+
)
|
|
269
|
+
elif delta.type == "signature_delta":
|
|
270
|
+
content_items.append({"type": "thinking", "thinking": "", "signature": delta.signature})
|
|
271
|
+
|
|
272
|
+
elif claude_event_type == "content_block_stop":
|
|
273
|
+
event_type = "stop"
|
|
274
|
+
|
|
275
|
+
elif claude_event_type == "message_start":
|
|
276
|
+
event_type = "start"
|
|
277
|
+
message = model_output.message
|
|
278
|
+
if getattr(message, "usage", None):
|
|
279
|
+
cache_creation_tokens = message.usage.cache_creation_input_tokens or 0
|
|
280
|
+
usage_metadata = {
|
|
281
|
+
"cached_tokens": message.usage.cache_read_input_tokens,
|
|
282
|
+
"prompt_tokens": message.usage.input_tokens + cache_creation_tokens,
|
|
283
|
+
"thoughts_tokens": None,
|
|
284
|
+
"response_tokens": None,
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
elif claude_event_type == "message_delta":
|
|
288
|
+
event_type = "stop"
|
|
289
|
+
delta = model_output.delta
|
|
290
|
+
if getattr(delta, "stop_reason", None):
|
|
291
|
+
stop_reason_mapping = {
|
|
292
|
+
"end_turn": "stop",
|
|
293
|
+
"max_tokens": "length",
|
|
294
|
+
"stop_sequence": "stop",
|
|
295
|
+
"tool_use": "tool_call",
|
|
296
|
+
}
|
|
297
|
+
finish_reason = stop_reason_mapping.get(delta.stop_reason, "unknown")
|
|
298
|
+
|
|
299
|
+
if getattr(model_output, "usage", None):
|
|
300
|
+
# In message_delta, we only update response_tokens
|
|
301
|
+
usage_metadata = {
|
|
302
|
+
"cached_tokens": None,
|
|
303
|
+
"prompt_tokens": None,
|
|
304
|
+
"thoughts_tokens": None,
|
|
305
|
+
"response_tokens": model_output.usage.output_tokens,
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
elif claude_event_type == "message_stop":
|
|
309
|
+
event_type = "stop"
|
|
310
|
+
|
|
311
|
+
elif claude_event_type in ["text", "thinking", "signature", "input_json"]:
|
|
312
|
+
event_type = "unused"
|
|
313
|
+
|
|
314
|
+
else:
|
|
315
|
+
raise ValueError(f"Unknown output: {model_output}")
|
|
316
|
+
|
|
317
|
+
return {
|
|
318
|
+
"role": "assistant",
|
|
319
|
+
"event_type": event_type,
|
|
320
|
+
"content_items": content_items,
|
|
321
|
+
"usage_metadata": usage_metadata,
|
|
322
|
+
"finish_reason": finish_reason,
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
async def _streaming_response_internal(
|
|
326
|
+
self,
|
|
327
|
+
messages: list[UniMessage],
|
|
328
|
+
config: UniConfig,
|
|
329
|
+
) -> AsyncIterator[UniEvent]:
|
|
330
|
+
"""Stream generate using Claude SDK with unified conversion methods."""
|
|
331
|
+
# Use unified config conversion
|
|
332
|
+
claude_config = self.transform_uni_config_to_model_config(config)
|
|
333
|
+
|
|
334
|
+
# Use unified message conversion
|
|
335
|
+
claude_messages = await self.transform_uni_message_to_model_input(messages)
|
|
336
|
+
|
|
337
|
+
# Add cache_control to last user message's last item if enabled
|
|
338
|
+
prompt_caching = config.get("prompt_caching", PromptCaching.ENABLE)
|
|
339
|
+
if prompt_caching != PromptCaching.DISABLE and claude_messages:
|
|
340
|
+
try:
|
|
341
|
+
last_user_message = next(filter(lambda x: x["role"] == "user", claude_messages[::-1]))
|
|
342
|
+
last_content_item = last_user_message["content"][-1]
|
|
343
|
+
last_content_item["cache_control"] = {
|
|
344
|
+
"type": "ephemeral",
|
|
345
|
+
"ttl": "1h" if prompt_caching == PromptCaching.ENHANCE else "5m",
|
|
346
|
+
}
|
|
347
|
+
except StopIteration:
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
# Stream generate
|
|
351
|
+
partial_tool_call = {}
|
|
352
|
+
partial_usage = {}
|
|
353
|
+
stream = await self._client.beta.messages.create(**claude_config, messages=claude_messages)
|
|
354
|
+
async for event in stream:
|
|
355
|
+
event = self.transform_model_output_to_uni_event(event)
|
|
356
|
+
if event["event_type"] == "start":
|
|
357
|
+
for item in event["content_items"]:
|
|
358
|
+
if item["type"] == "partial_tool_call":
|
|
359
|
+
# initialize partial_tool_call
|
|
360
|
+
partial_tool_call = {
|
|
361
|
+
"name": item["name"],
|
|
362
|
+
"arguments": "",
|
|
363
|
+
"tool_call_id": item["tool_call_id"],
|
|
364
|
+
}
|
|
365
|
+
yield event
|
|
366
|
+
|
|
367
|
+
if event["usage_metadata"] is not None:
|
|
368
|
+
# initialize partial_usage
|
|
369
|
+
partial_usage = {
|
|
370
|
+
"prompt_tokens": event["usage_metadata"]["prompt_tokens"],
|
|
371
|
+
"cached_tokens": event["usage_metadata"]["cached_tokens"],
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
elif event["event_type"] == "delta":
|
|
375
|
+
for item in event["content_items"]:
|
|
376
|
+
if item["type"] == "partial_tool_call":
|
|
377
|
+
# update partial_tool_call
|
|
378
|
+
partial_tool_call["arguments"] += item["arguments"]
|
|
379
|
+
|
|
380
|
+
yield event
|
|
381
|
+
|
|
382
|
+
elif event["event_type"] == "stop":
|
|
383
|
+
if "name" in partial_tool_call and "arguments" in partial_tool_call:
|
|
384
|
+
# finish partial_tool_call
|
|
385
|
+
yield {
|
|
386
|
+
"role": "assistant",
|
|
387
|
+
"event_type": "delta",
|
|
388
|
+
"content_items": [
|
|
389
|
+
{
|
|
390
|
+
"type": "tool_call",
|
|
391
|
+
"name": partial_tool_call["name"],
|
|
392
|
+
"arguments": json.loads(partial_tool_call["arguments"]),
|
|
393
|
+
"tool_call_id": partial_tool_call["tool_call_id"],
|
|
394
|
+
}
|
|
395
|
+
],
|
|
396
|
+
"usage_metadata": None,
|
|
397
|
+
"finish_reason": None,
|
|
398
|
+
}
|
|
399
|
+
partial_tool_call = {}
|
|
400
|
+
|
|
401
|
+
if "prompt_tokens" in partial_usage and event["usage_metadata"] is not None:
|
|
402
|
+
# finish partial_usage
|
|
403
|
+
yield {
|
|
404
|
+
"role": "assistant",
|
|
405
|
+
"event_type": "stop",
|
|
406
|
+
"content_items": [],
|
|
407
|
+
"usage_metadata": {
|
|
408
|
+
"prompt_tokens": partial_usage["prompt_tokens"],
|
|
409
|
+
"thoughts_tokens": None,
|
|
410
|
+
"response_tokens": event["usage_metadata"]["response_tokens"],
|
|
411
|
+
"cached_tokens": partial_usage["cached_tokens"],
|
|
412
|
+
},
|
|
413
|
+
"finish_reason": event["finish_reason"],
|
|
414
|
+
}
|
|
415
|
+
partial_usage = {}
|