PyPI - agenthub-python - Versions diffs - 0.2.0__tar.gz → 0.3.0__tar.gz - Mend

agenthub-python 0.2.0tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

agenthub_python-0.3.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,10 @@
+Metadata-Version: 2.3
+Name: agenthub-python
+Version: 0.3.0
+Summary: AgentHub is the LLM API Hub for the Agent era, built for high-precision autonomous agents.
+Requires-Dist: google-genai>=1.5.0
+Requires-Dist: anthropic[bedrock]>=0.40.0
+Requires-Dist: flask>=3.0.0
+Requires-Dist: openai>=1.0.0
+Requires-Dist: httpx>=0.27.0
+Requires-Python: >=3.11

{agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/auto_client.py RENAMED Viewed

@@ -46,22 +46,26 @@ class AutoLLMClient(LLMClient):
     ) -> LLMClient:
         """Create the appropriate client for the given model."""
         client_type = client_type or os.getenv("CLIENT_TYPE", model.lower())
-        if "gemini-3" in client_type:  # e.g., gemini-3-flash-preview
+        if "gemini-3-" in client_type or "gemini-3.1-" in client_type:  # e.g., gemini-3-flash-preview
             from .gemini3 import Gemini3Client
             return Gemini3Client(model=model, api_key=api_key, base_url=base_url)
-        elif "claude" in client_type and "4-5" in client_type:  # e.g., claude-sonnet-4-5
-            from .claude4_5 import Claude4_5Client
+        elif "claude" in client_type and "4-6" in client_type:  # e.g., claude-sonnet-4-6
+            from .claude4_6 import Claude4_6Client
-            return Claude4_5Client(model=model, api_key=api_key, base_url=base_url)
-        elif "gpt-5.1" in client_type or "gpt-5.2" in client_type:  # e.g., gpt-5.2
-            from .gpt5_2 import GPT5_2Client
+            return Claude4_6Client(model=model, api_key=api_key, base_url=base_url)
+        elif "gpt-5.4" in client_type:  # e.g., gpt-5.4
+            from .gpt5_4 import GPT5_4Client
-            return GPT5_2Client(model=model, api_key=api_key, base_url=base_url)
-        elif "glm-4.7" in client_type:  # e.g., glm-4.7
-            from .glm4_7 import GLM4_7Client
+            return GPT5_4Client(model=model, api_key=api_key, base_url=base_url)
+        elif "glm-5" in client_type:
+            from .glm5 import GLM5Client
-            return GLM4_7Client(model=model, api_key=api_key, base_url=base_url)
+            return GLM5Client(model=model, api_key=api_key, base_url=base_url)
+        elif "kimi-k2.5" in client_type:
+            from .kimi_k2_5 import KimiK2_5Client
+            return KimiK2_5Client(model=model, api_key=api_key, base_url=base_url)
         elif "qwen3" in client_type:
             from .qwen3 import Qwen3Client
@@ -69,7 +73,7 @@ class AutoLLMClient(LLMClient):
         else:
             raise ValueError(
                 f"{client_type} is not supported. "
-                "Supported client types: gemini-3, claude-4-5, gpt-5.2, glm-4.7, qwen3."
+                "Supported client types: gemini-3, claude-4-6, gpt-5.4, glm-5, kimi-k2.5, qwen3."
             )
     def transform_uni_config_to_model_config(self, config: UniConfig) -> Any:
@@ -84,6 +88,13 @@ class AutoLLMClient(LLMClient):
         """Delegate to underlying client's transform_model_output_to_uni_event."""
         return self._client.transform_model_output_to_uni_event(model_output)
+    async def _streaming_response_internal(
+        self,
+        messages: list[UniMessage],
+        config: UniConfig,
+    ) -> AsyncIterator[UniEvent]:
+        raise NotImplementedError("Please use streaming_response instead.")
     async def streaming_response(
         self,
         messages: list[UniMessage],

{agenthub_python-0.2.0 → agenthub_python-0.3.0}/agenthub/base_client.py RENAMED Viewed

@@ -89,16 +89,25 @@ class LLMClient(ABC):
             # Merge content_items from all events
             for item in event["content_items"]:
                 if item["type"] == "text":
-                    if content_items and content_items[-1]["type"] == "text":
+                    if (
+                        content_items
+                        and content_items[-1]["type"] == "text"
+                        and content_items[-1].get("signature") is None  # no signature yet
+                        and item.get("phase") is None  # no new phase
+                    ):
                         content_items[-1]["text"] += item["text"]
-                        if "signature" in item:  # signature may appear at the last item
+                        if "signature" in item:  # finish the current item if signature is not None
                             content_items[-1]["signature"] = item["signature"]
-                    elif item["text"]:  # omit empty text items
+                    elif item["text"] or item.get("phase") is not None:  # text or new phase starts an item
                         content_items.append(item.copy())
                 elif item["type"] == "thinking":
-                    if content_items and content_items[-1]["type"] == "thinking":
+                    if (
+                        content_items
+                        and content_items[-1]["type"] == "thinking"
+                        and content_items[-1].get("signature") is None  # no signature yet
+                    ):
                         content_items[-1]["thinking"] += item["thinking"]
-                        if "signature" in item:  # signature may appear at the last item
+                        if "signature" in item:  # finish the current item if signature is not None
                             content_items[-1]["signature"] = item["signature"]
                     elif item["thinking"] or item.get("signature"):  # omit empty thinking items
                         content_items.append(item.copy())
@@ -119,6 +128,26 @@ class LLMClient(ABC):
         }
     @abstractmethod
+    async def _streaming_response_internal(
+        self,
+        messages: list[UniMessage],
+        config: UniConfig,
+    ) -> AsyncIterator[UniEvent]:
+        """
+        Internal method to handle streaming response.
+        This method should be implemented by each model client to handle
+        the actual streaming request and yield model-specific events.
+        Args:
+            messages: List of universal message dictionaries
+            config: Universal configuration dict
+        Yields:
+            Model-specific events from the streaming response
+        """
+        pass
     async def streaming_response(
         self,
         messages: list[UniMessage],
@@ -138,7 +167,12 @@ class LLMClient(ABC):
         Yields:
             Universal events from the streaming response
         """
-        pass
+        last_event: UniEvent | None = None
+        async for event in self._streaming_response_internal(messages, config):
+            last_event = event
+            yield event
+        self._validate_last_event(last_event)
     async def streaming_response_stateful(
         self,
@@ -159,18 +193,19 @@ class LLMClient(ABC):
         Yields:
             Universal events from the streaming response
         """
-        # Add user message to history
-        self._history.append(message)
+        # Build a temporary messages list for inference without mutating history yet
+        temp_messages = self._history + [message]
         # Collect all events for history
         events = []
-        async for event in self.streaming_response(messages=self._history, config=config):
+        async for event in self.streaming_response(messages=temp_messages, config=config):
             events.append(event)
             yield event
-        # Convert events to message and add to history
+        # Only update history after successful inference
         if events:
             assistant_message = self.concat_uni_events_to_uni_message(events)
+            self._history.append(message)
             self._history.append(assistant_message)
         # Save history to file if trace_id is specified
@@ -180,6 +215,28 @@ class LLMClient(ABC):
             tracer = Tracer()
             tracer.save_history(self._model, self._history, config["trace_id"], config)
+    @staticmethod
+    def _validate_last_event(last_event: UniEvent | None) -> None:
+        """Validate that the last event has usage_metadata and finish_reason.
+        This validation guards against servers that silently terminate streaming
+        output partway through without sending a proper final event.
+        Args:
+            last_event: The last event yielded by streaming_response
+        Raises:
+            ValueError: If last_event is None or missing usage_metadata/finish_reason
+        """
+        if last_event is None:
+            raise ValueError("Streaming response yielded no events")
+        if last_event["usage_metadata"] is None:
+            raise ValueError(f"Last event must carry usage_metadata, got: {last_event}")
+        if last_event["finish_reason"] is None:
+            raise ValueError(f"Last event must carry finish_reason, got: {last_event}")
     def clear_history(self) -> None:
         """Clear the message history."""
         self._history.clear()

{agenthub_python-0.2.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .client import Claude4_5Client
+from .client import Claude4_6Client
-__all__ = ["Claude4_5Client"]
+__all__ = ["Claude4_6Client"]

{agenthub_python-0.2.0/agenthub/claude4_5 → agenthub_python-0.3.0/agenthub/claude4_6}/client.py RENAMED Viewed

@@ -12,12 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import base64
 import json
+import mimetypes
 import os
+import re
 from typing import Any, AsyncIterator
-from anthropic import AsyncAnthropic
-from anthropic.types import MessageParam, MessageStreamEvent
+import httpx
+from anthropic import AsyncAnthropic, AsyncAnthropicBedrock
+from anthropic.types.beta import BetaMessageParam, BetaRawMessageStreamEvent
 from ..base_client import LLMClient
 from ..types import (
@@ -34,25 +38,78 @@ from ..types import (
 )
-class Claude4_5Client(LLMClient):
-    """Claude 4.5-specific LLM client implementation."""
+REDACTED_THINKING = "_REDACTED_THINKING"
+class Claude4_6Client(LLMClient):
+    """Claude 4.6-specific LLM client implementation."""
     def __init__(self, model: str, api_key: str | None = None, base_url: str | None = None):
-        """Initialize Claude 4.5 client with model and API key."""
+        """Initialize Claude 4.6 client with model and API key."""
         self._model = model
         api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
         base_url = base_url or os.getenv("ANTHROPIC_BASE_URL")
-        self._client = AsyncAnthropic(api_key=api_key, base_url=base_url)
+        if base_url and base_url.startswith("bedrock://"):  # example: bedrock://us-east-1
+            region = base_url.replace("bedrock://", "")
+            access_key, secret_key = api_key.split(",")
+            self._client = AsyncAnthropicBedrock(
+                aws_secret_key=secret_key, aws_access_key=access_key, aws_region=region
+            )
+            self._use_bedrock = True
+        else:
+            self._client = AsyncAnthropic(api_key=api_key, base_url=base_url)
+            self._use_bedrock = False
         self._history: list[UniMessage] = []
-    def _convert_thinking_level_to_budget(self, thinking_level: ThinkingLevel) -> dict[str, Any]:
-        """Convert ThinkingLevel enum to Claude's budget_tokens."""
+    async def _convert_image_url_to_source(self, url: str) -> dict[str, Any]:
+        """Convert image URL to image source.
+        Bedrock does not support image url sources, so we need to fetch the image bytes and encode them.
+        Args:
+            url: Image URL to convert
+        Returns:
+            Image source
+        """
+        if url.startswith("data:"):
+            match = re.match(r"data:([^;]+);base64,(.+)", url)
+            if match:
+                media_type = match.group(1)
+                base64_data = match.group(2)
+                source = {
+                    "type": "image",
+                    "source": {"type": "base64", "media_type": media_type, "data": base64_data},
+                }
+            else:
+                raise ValueError(f"Invalid base64 image: {url}")
+        elif self._use_bedrock:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(url)
+                response.raise_for_status()
+                image_bytes = response.content
+                mime_type = mimetypes.guess_type(url)[0] or "image/jpeg"
+                source = {
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": mime_type,
+                        "data": base64.b64encode(image_bytes).decode("utf-8"),
+                    },
+                }
+        else:
+            source = {"type": "image", "source": {"type": "url", "url": url}}
+        return source
+    def _convert_thinking_level_to_thinking_config(self, thinking_level: ThinkingLevel) -> dict[str, Any]:
+        """Convert ThinkingLevel enum to Claude's adaptive thinking config."""
         mapping = {
-            ThinkingLevel.NONE: {"type": "disabled"},
-            ThinkingLevel.LOW: {"type": "enabled", "budget_tokens": 1024},
-            ThinkingLevel.MEDIUM: {"type": "enabled", "budget_tokens": 4096},
-            ThinkingLevel.HIGH: {"type": "enabled", "budget_tokens": 16384},
+            ThinkingLevel.NONE: {},  # omit thinking config
+            ThinkingLevel.LOW: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "low"}},
+            ThinkingLevel.MEDIUM: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "medium"}},
+            ThinkingLevel.HIGH: {"thinking": {"type": "adaptive"}, "output_config": {"effort": "high"}},
         }
         return mapping.get(thinking_level)
@@ -80,7 +137,7 @@ class Claude4_5Client(LLMClient):
         Returns:
             Claude configuration dictionary
         """
-        claude_config = {"model": self._model}
+        claude_config = {"model": self._model, "stream": True}
         if config.get("system_prompt") is not None:
             claude_config["system"] = config["system_prompt"]
@@ -96,7 +153,7 @@ class Claude4_5Client(LLMClient):
         # NOTE: Claude always provides thinking summary
         if config.get("thinking_level") is not None:
             claude_config["temperature"] = 1.0  # `temperature` may only be set to 1 when thinking is enabled
-            claude_config["thinking"] = self._convert_thinking_level_to_budget(config["thinking_level"])
+            claude_config.update(self._convert_thinking_level_to_thinking_config(config["thinking_level"]))
         # Convert tools to Claude's tool schema
         if config.get("tools") is not None:
@@ -116,17 +173,17 @@ class Claude4_5Client(LLMClient):
         return claude_config
-    def transform_uni_message_to_model_input(self, messages: list[UniMessage]) -> list[MessageParam]:
+    async def transform_uni_message_to_model_input(self, messages: list[UniMessage]) -> list[BetaMessageParam]:
         """
-        Transform universal message format to Claude's MessageParam format.
+        Transform universal message format to Claude's BetaMessageParam format.
         Args:
             messages: List of universal message dictionaries
         Returns:
-            List of Claude MessageParam objects
+            List of Claude BetaMessageParam objects
         """
-        claude_messages: list[MessageParam] = []
+        claude_messages: list[BetaMessageParam] = []
         for msg in messages:
             content_blocks = []
@@ -134,12 +191,14 @@ class Claude4_5Client(LLMClient):
                 if item["type"] == "text":
                     content_blocks.append({"type": "text", "text": item["text"]})
                 elif item["type"] == "image_url":
-                    # TODO: support base64 encoded images
-                    content_blocks.append({"type": "image", "source": {"type": "url", "url": item["image_url"]}})
+                    content_blocks.append(await self._convert_image_url_to_source(item["image_url"]))
                 elif item["type"] == "thinking":
-                    content_blocks.append(
-                        {"type": "thinking", "thinking": item["thinking"], "signature": item["signature"]}
-                    )
+                    if item["thinking"] == REDACTED_THINKING:
+                        content_blocks.append({"type": "redacted_thinking", "data": item["signature"]})
+                    else:
+                        content_blocks.append(
+                            {"type": "thinking", "thinking": item["thinking"], "signature": item["signature"]}
+                        )
                 elif item["type"] == "tool_call":
                     content_blocks.append(
                         {
@@ -153,8 +212,13 @@ class Claude4_5Client(LLMClient):
                     if "tool_call_id" not in item:
                         raise ValueError("tool_call_id is required for tool result.")
+                    tool_result = [{"type": "text", "text": item["text"]}]
+                    if "images" in item:
+                        for image_url in item["images"]:
+                            tool_result.append(await self._convert_image_url_to_source(image_url))
                     content_blocks.append(
-                        {"type": "tool_result", "content": item["result"], "tool_use_id": item["tool_call_id"]}
+                        {"type": "tool_result", "content": tool_result, "tool_use_id": item["tool_call_id"]}
                     )
                 else:
                     raise ValueError(f"Unknown item: {item}")
@@ -163,7 +227,7 @@ class Claude4_5Client(LLMClient):
         return claude_messages
-    def transform_model_output_to_uni_event(self, model_output: MessageStreamEvent) -> UniEvent:
+    def transform_model_output_to_uni_event(self, model_output: BetaRawMessageStreamEvent) -> UniEvent:
         """
         Transform Claude model output to universal event format.
@@ -188,6 +252,8 @@ class Claude4_5Client(LLMClient):
                 content_items.append(
                     {"type": "partial_tool_call", "name": block.name, "arguments": "", "tool_call_id": block.id}
                 )
+            elif block.type == "redacted_thinking":
+                content_items.append({"type": "thinking", "thinking": REDACTED_THINKING, "signature": block.data})
         elif claude_event_type == "content_block_delta":
             event_type = "delta"
@@ -210,11 +276,12 @@ class Claude4_5Client(LLMClient):
             event_type = "start"
             message = model_output.message
             if getattr(message, "usage", None):
+                cache_creation_tokens = message.usage.cache_creation_input_tokens or 0
                 usage_metadata = {
-                    "prompt_tokens": message.usage.input_tokens,
+                    "cached_tokens": message.usage.cache_read_input_tokens,
+                    "prompt_tokens": message.usage.input_tokens + cache_creation_tokens,
                     "thoughts_tokens": None,
                     "response_tokens": None,
-                    "cached_tokens": message.usage.cache_read_input_tokens,
                 }
         elif claude_event_type == "message_delta":
@@ -225,16 +292,17 @@ class Claude4_5Client(LLMClient):
                     "end_turn": "stop",
                     "max_tokens": "length",
                     "stop_sequence": "stop",
-                    "tool_use": "stop",
+                    "tool_use": "tool_call",
                 }
                 finish_reason = stop_reason_mapping.get(delta.stop_reason, "unknown")
             if getattr(model_output, "usage", None):
+                # In message_delta, we only update response_tokens
                 usage_metadata = {
+                    "cached_tokens": None,
                     "prompt_tokens": None,
                     "thoughts_tokens": None,
                     "response_tokens": model_output.usage.output_tokens,
-                    "cached_tokens": None,
                 }
         elif claude_event_type == "message_stop":
@@ -254,7 +322,7 @@ class Claude4_5Client(LLMClient):
             "finish_reason": finish_reason,
         }
-    async def streaming_response(
+    async def _streaming_response_internal(
         self,
         messages: list[UniMessage],
         config: UniConfig,
@@ -264,7 +332,7 @@ class Claude4_5Client(LLMClient):
         claude_config = self.transform_uni_config_to_model_config(config)
         # Use unified message conversion
-        claude_messages = self.transform_uni_message_to_model_input(messages)
+        claude_messages = await self.transform_uni_message_to_model_input(messages)
         # Add cache_control to last user message's last item if enabled
         prompt_caching = config.get("prompt_caching", PromptCaching.ENABLE)
@@ -282,66 +350,66 @@ class Claude4_5Client(LLMClient):
         # Stream generate
         partial_tool_call = {}
         partial_usage = {}
-        async with self._client.messages.stream(**claude_config, messages=claude_messages) as stream:
-            async for event in stream:
-                event = self.transform_model_output_to_uni_event(event)
-                if event["event_type"] == "start":
-                    for item in event["content_items"]:
-                        if item["type"] == "partial_tool_call":
-                            # initialize partial_tool_call
-                            partial_tool_call = {
-                                "name": item["name"],
-                                "arguments": "",
-                                "tool_call_id": item["tool_call_id"],
-                            }
-                            yield event
-                    if event["usage_metadata"] is not None:
-                        # initialize partial_usage
-                        partial_usage = {
-                            "prompt_tokens": event["usage_metadata"]["prompt_tokens"],
-                            "cached_tokens": event["usage_metadata"]["cached_tokens"],
-                        }
-                elif event["event_type"] == "delta":
-                    for item in event["content_items"]:
-                        if item["type"] == "partial_tool_call":
-                            # update partial_tool_call
-                            partial_tool_call["arguments"] += item["arguments"]
-                    yield event
-                elif event["event_type"] == "stop":
-                    if "name" in partial_tool_call and "arguments" in partial_tool_call:
-                        # finish partial_tool_call
-                        yield {
-                            "role": "assistant",
-                            "event_type": "delta",
-                            "content_items": [
-                                {
-                                    "type": "tool_call",
-                                    "name": partial_tool_call["name"],
-                                    "arguments": json.loads(partial_tool_call["arguments"]),
-                                    "tool_call_id": partial_tool_call["tool_call_id"],
-                                }
-                            ],
-                            "usage_metadata": None,
-                            "finish_reason": None,
-                        }
-                        partial_tool_call = {}
-                    if "prompt_tokens" in partial_usage and event["usage_metadata"] is not None:
-                        # finish partial_usage
-                        yield {
-                            "role": "assistant",
-                            "event_type": "stop",
-                            "content_items": [],
-                            "usage_metadata": {
-                                "prompt_tokens": partial_usage["prompt_tokens"],
-                                "thoughts_tokens": None,
-                                "response_tokens": event["usage_metadata"]["response_tokens"],
-                                "cached_tokens": partial_usage["cached_tokens"],
-                            },
-                            "finish_reason": event["finish_reason"],
+        stream = await self._client.beta.messages.create(**claude_config, messages=claude_messages)
+        async for event in stream:
+            event = self.transform_model_output_to_uni_event(event)
+            if event["event_type"] == "start":
+                for item in event["content_items"]:
+                    if item["type"] == "partial_tool_call":
+                        # initialize partial_tool_call
+                        partial_tool_call = {
+                            "name": item["name"],
+                            "arguments": "",
+                            "tool_call_id": item["tool_call_id"],
                         }
-                        partial_usage = {}
+                        yield event
+                if event["usage_metadata"] is not None:
+                    # initialize partial_usage
+                    partial_usage = {
+                        "prompt_tokens": event["usage_metadata"]["prompt_tokens"],
+                        "cached_tokens": event["usage_metadata"]["cached_tokens"],
+                    }
+            elif event["event_type"] == "delta":
+                for item in event["content_items"]:
+                    if item["type"] == "partial_tool_call":
+                        # update partial_tool_call
+                        partial_tool_call["arguments"] += item["arguments"]
+                yield event
+            elif event["event_type"] == "stop":
+                if "name" in partial_tool_call and "arguments" in partial_tool_call:
+                    # finish partial_tool_call
+                    yield {
+                        "role": "assistant",
+                        "event_type": "delta",
+                        "content_items": [
+                            {
+                                "type": "tool_call",
+                                "name": partial_tool_call["name"],
+                                "arguments": json.loads(partial_tool_call["arguments"]),
+                                "tool_call_id": partial_tool_call["tool_call_id"],
+                            }
+                        ],
+                        "usage_metadata": None,
+                        "finish_reason": None,
+                    }
+                    partial_tool_call = {}
+                if "prompt_tokens" in partial_usage and event["usage_metadata"] is not None:
+                    # finish partial_usage
+                    yield {
+                        "role": "assistant",
+                        "event_type": "stop",
+                        "content_items": [],
+                        "usage_metadata": {
+                            "prompt_tokens": partial_usage["prompt_tokens"],
+                            "thoughts_tokens": None,
+                            "response_tokens": event["usage_metadata"]["response_tokens"],
+                            "cached_tokens": partial_usage["cached_tokens"],
+                        },
+                        "finish_reason": event["finish_reason"],
+                    }
+                    partial_usage = {}

agenthub-python 0.2.0__tar.gz → 0.3.0__tar.gz

agenthub-python 0.2.0tar.gz → 0.3.0tar.gz