PyPI - henchman-ai - Versions diffs - 0.1.15__py3-none-any.whl → 0.1.16__py3-none-any.whl - Mend

henchman-ai 0.1.15py3-none-any.whl → 0.1.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

henchman/providers/anthropic.py CHANGED Viewed

@@ -4,12 +4,13 @@ This provider uses the Anthropic SDK to communicate with Claude models.
 Unlike OpenAI-compatible APIs, Anthropic has its own message format.
 """
+import asyncio
 import json
 import os
 from collections.abc import AsyncIterator
 from typing import Any
-from anthropic import AsyncAnthropic
+from anthropic import AsyncAnthropic, RateLimitError
 from henchman.providers.base import (
     FinishReason,
@@ -54,6 +55,7 @@ class AnthropicProvider(ModelProvider):
         model: str = "claude-sonnet-4-20250514",
         max_tokens: int = 8192,
         tokens_per_minute: int = 30000,
+        max_retries: int = 3,
     ) -> None:
         """Initialize the Anthropic provider.
@@ -62,10 +64,12 @@ class AnthropicProvider(ModelProvider):
             model: Default model to use.
             max_tokens: Maximum tokens in response.
             tokens_per_minute: Maximum tokens per minute (rate limit).
+            max_retries: Maximum number of retries for rate limits.
         """
         self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY", "")
         self.default_model = model
         self.max_tokens = max_tokens
+        self.max_retries = max_retries
         self._client = AsyncAnthropic(api_key=self.api_key or "placeholder")
         self._rate_limiter = AsyncRateLimiter(tokens_per_minute)
@@ -194,10 +198,6 @@ class AnthropicProvider(ModelProvider):
             if not (message.content or '').strip():
                 raise ValueError(f"Message with role '{message.role}' cannot have empty content")
-        # Rate limiting: wait for capacity based on input tokens
-        input_tokens = TokenCounter.count_messages(messages, model=self.default_model)
-        await self._rate_limiter.wait_for_capacity(input_tokens)
         system_prompt, formatted_messages = self._format_messages(messages)
         params: dict[str, Any] = {
@@ -213,71 +213,99 @@ class AnthropicProvider(ModelProvider):
         if tools:
             params["tools"] = [self._format_tool(t) for t in tools]
-        total_output_tokens = 0
-        async with self._client.messages.stream(**params) as stream:
-            pending_tool_calls: dict[str, dict[str, Any]] = {}
-            current_tool_id: str | None = None
-            async for event in stream:
-                content: str | None = None
-                thinking: str | None = None
-                tool_calls: list[ToolCall] | None = None
-                finish_reason: FinishReason | None = None
-                if event.type == "content_block_start":
-                    block = event.content_block
-                    if block.type == "tool_use":
-                        current_tool_id = block.id
-                        pending_tool_calls[block.id] = {
-                            "id": block.id,
-                            "name": block.name,
-                            "arguments": "",
-                        }
-                elif event.type == "content_block_delta":
-                    delta = event.delta
-                    if delta.type == "text_delta":
-                        content = delta.text
-                        total_output_tokens += TokenCounter.count_text(content, model=self.default_model)
-                    elif delta.type == "thinking_delta":
-                        thinking = delta.thinking
-                        total_output_tokens += TokenCounter.count_text(thinking, model=self.default_model)
-                    elif delta.type == "input_json_delta" and current_tool_id:
-                        pending_tool_calls[current_tool_id]["arguments"] += delta.partial_json
-                        # Note: we don't count JSON tokens precisely here as they come in,
-                        # but we could count the delta text.
-                        total_output_tokens += TokenCounter.count_text(delta.partial_json, model=self.default_model)
-                elif event.type == "content_block_stop":
-                    current_tool_id = None
-                elif event.type == "message_delta":
-                    finish_reason = self._parse_finish_reason(event.delta.stop_reason)
-                    # Emit completed tool calls
-                    if finish_reason == FinishReason.TOOL_CALLS and pending_tool_calls:
-                        tool_calls = []
-                        for tc_data in pending_tool_calls.values():
-                            try:
-                                arguments = json.loads(tc_data["arguments"]) if tc_data["arguments"] else {}
-                            except json.JSONDecodeError:
-                                arguments = {}
-                            tool_calls.append(
-                                ToolCall(
-                                    id=tc_data["id"],
-                                    name=tc_data["name"],
-                                    arguments=arguments,
-                                )
+        input_tokens = TokenCounter.count_messages(messages, model=self.default_model)
+        retries = 0
+        while True:
+            try:
+                # Rate limiting: wait for capacity based on input tokens
+                await self._rate_limiter.wait_for_capacity(input_tokens)
+                total_output_tokens = 0
+                async with self._client.messages.stream(**params) as stream:
+                    pending_tool_calls: dict[str, dict[str, Any]] = {}
+                    current_tool_id: str | None = None
+                    async for event in stream:
+                        content: str | None = None
+                        thinking: str | None = None
+                        tool_calls: list[ToolCall] | None = None
+                        finish_reason: FinishReason | None = None
+                        if event.type == "content_block_start":
+                            block = event.content_block
+                            if block.type == "tool_use":
+                                current_tool_id = block.id
+                                pending_tool_calls[block.id] = {
+                                    "id": block.id,
+                                    "name": block.name,
+                                    "arguments": "",
+                                }
+                        elif event.type == "content_block_delta":
+                            delta = event.delta
+                            if delta.type == "text_delta":
+                                content = delta.text
+                                total_output_tokens += TokenCounter.count_text(content, model=self.default_model)
+                            elif delta.type == "thinking_delta":
+                                thinking = delta.thinking
+                                total_output_tokens += TokenCounter.count_text(thinking, model=self.default_model)
+                            elif delta.type == "input_json_delta" and current_tool_id:
+                                pending_tool_calls[current_tool_id]["arguments"] += delta.partial_json
+                                # Note: we don't count JSON tokens precisely here as they come in,
+                                # but we could count the delta text.
+                                total_output_tokens += TokenCounter.count_text(delta.partial_json, model=self.default_model)
+                        elif event.type == "content_block_stop":
+                            current_tool_id = None
+                        elif event.type == "message_delta":
+                            finish_reason = self._parse_finish_reason(event.delta.stop_reason)
+                            # Emit completed tool calls
+                            if finish_reason == FinishReason.TOOL_CALLS and pending_tool_calls:
+                                tool_calls = []
+                                for tc_data in pending_tool_calls.values():
+                                    try:
+                                        arguments = json.loads(tc_data["arguments"]) if tc_data["arguments"] else {}
+                                    except json.JSONDecodeError:
+                                        arguments = {}
+                                    tool_calls.append(
+                                        ToolCall(
+                                            id=tc_data["id"],
+                                            name=tc_data["name"],
+                                            arguments=arguments,
+                                        )
+                                    )
+                        # Only yield if we have meaningful content
+                        if content is not None or thinking is not None or tool_calls or finish_reason:
+                            yield StreamChunk(
+                                content=content,
+                                tool_calls=tool_calls,
+                                finish_reason=finish_reason,
+                                thinking=thinking,
                             )
-                # Only yield if we have meaningful content
-                if content is not None or thinking is not None or tool_calls or finish_reason:
-                    yield StreamChunk(
-                        content=content,
-                        tool_calls=tool_calls,
-                        finish_reason=finish_reason,
-                        thinking=thinking,
-                    )
-        # Record final usage
-        await self._rate_limiter.add_usage(input_tokens + total_output_tokens)
+                # Record final usage
+                await self._rate_limiter.add_usage(input_tokens + total_output_tokens)
+                break # Success, exit retry loop
+            except RateLimitError as e:
+                retries += 1
+                if retries > self.max_retries:
+                    raise
+                # Hit rate limit, wait and retry
+                # Extract wait time from headers if available, otherwise use exponential backoff
+                wait_time = 5.0 * (2 ** (retries - 1)) # Default backoff
+                # Log to console if possible
+                from rich.console import Console
+                Console().print(f"[yellow]Rate limit reached (429). Retrying in {wait_time:.1f}s... (Attempt {retries}/{self.max_retries})[/yellow]")
+                await asyncio.sleep(wait_time)
+                # After sleeping, we loop back and try again.
+                # The rate limiter's wait_for_capacity will be called again if we move it inside the loop,
+                # but we already called it once. However, Anthropic says we are OVER the limit,
+                # so we should probably record that usage or just wait.
+                # Let's move the wait_for_capacity INSIDE the retry loop.

henchman/version.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Version information for Henchman-AI."""
-VERSION_TUPLE = (0, 1, 15)
+VERSION_TUPLE = (0, 1, 16)
 VERSION = ".".join(str(v) for v in VERSION_TUPLE)
 __all__ = ["VERSION", "VERSION_TUPLE"]

{henchman_ai-0.1.15.dist-info → henchman_ai-0.1.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: henchman-ai
-Version: 0.1.15
+Version: 0.1.16
 Summary: A model-agnostic AI agent CLI - your AI henchman for the terminal
 Project-URL: Homepage, https://github.com/MGPowerlytics/henchman-ai
 Project-URL: Repository, https://github.com/MGPowerlytics/henchman-ai

{henchman_ai-0.1.15.dist-info → henchman_ai-0.1.16.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 henchman/__init__.py,sha256=P_jCbtgAVbk2hn6uMum2UYkE7ptT361mWRkUZz0xKvk,148
 henchman/__main__.py,sha256=3oRWZvoWON5ErlJFYOOSU5p1PERRyK6MkT2LGEnbb2o,131
-henchman/version.py,sha256=0FZF9u0mohI-486usBu856Rq06_dnv5Ls-VzX8E_Sz0,161
+henchman/version.py,sha256=72yF3FAUrFWwBLOTlo9ueDmjrA3nvCMAIV3CJ9qqRlw,161
 henchman/cli/__init__.py,sha256=Gv86a_heuBLqUd-y46JZUyzUaDl5H-9RtcWGr3rMwBw,673
 henchman/cli/app.py,sha256=2hmIZghPi2C__c_0xIqrab5JpIBArNsGxAqZY1cn8ZI,11739
 henchman/cli/console.py,sha256=S4Jvq0UTmu9KtOkLNsIsvG_8X9eg1Guc6NAh8T_JeNI,8017
@@ -39,7 +39,7 @@ henchman/mcp/config.py,sha256=qzAJITMpQlfVfZXiUN0SLDKEratXR-9BKih7JJA_-RA,1390
 henchman/mcp/manager.py,sha256=DBh85SmdRbU96DLIIwRmT6QYBKRMNMr5vt1_UhHxrrA,3348
 henchman/mcp/tool.py,sha256=jeL-FtgC2JSbhfhR8RF4vO9PxLQet-KFZuDCN67cYG8,2654
 henchman/providers/__init__.py,sha256=Vh8yPhJSCtDgvEvYA2YHRQvuGea6eCm_CCG1rxpLYZE,795
-henchman/providers/anthropic.py,sha256=gCnMSKtthvyQisOvodEXUnic5QnrKUUJNIE2Aq55Gxs,10354
+henchman/providers/anthropic.py,sha256=XQJT_DGMM7VYsY6fsW44OYAikyelEIffdwuzfhmhG0o,12223
 henchman/providers/base.py,sha256=23YM21uHbSXN7vT92CUlN6FgIoztSOGMg7yFUwh2c6A,2814
 henchman/providers/deepseek.py,sha256=O__Gxy0xHCDhksHJgTa5f-u-5RhbT8ufh7dA6ly2yZ4,1349
 henchman/providers/ollama.py,sha256=g4vGTSlv8UEW82yrVRLCqjJqdDW_sG-kyvyRiE6ZbYg,1911
@@ -80,8 +80,8 @@ henchman/utils/ratelimit.py,sha256=P8HJYf68fSYNFK1bjhjdennL-1Vo7GwYzivQKlZh-Z4,2
 henchman/utils/retry.py,sha256=sobZk9LLGxglSJw_jeNaBYCrvH14YNFrBVyp_OwLWcw,4993
 henchman/utils/tokens.py,sha256=w5HjySzg5t9RYL-ivhhHLnT2gV0a83j4rwKDZGgAF6c,5696
 henchman/utils/validation.py,sha256=moj4LQXVXt2J-3_pWVH_0-EabyRYApOU2Oh5JSTIua8,4146
-henchman_ai-0.1.15.dist-info/METADATA,sha256=0jATgYUQMY1VLxHNPX4v5MXAHZekVtWBbR2UGYYLCHc,9186
-henchman_ai-0.1.15.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-henchman_ai-0.1.15.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
-henchman_ai-0.1.15.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
-henchman_ai-0.1.15.dist-info/RECORD,,
+henchman_ai-0.1.16.dist-info/METADATA,sha256=QkiPPnTpBk2DJ2oU_NHis7J8-EW1ixy9zNGJwHSs01M,9186
+henchman_ai-0.1.16.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+henchman_ai-0.1.16.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
+henchman_ai-0.1.16.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
+henchman_ai-0.1.16.dist-info/RECORD,,

{henchman_ai-0.1.15.dist-info → henchman_ai-0.1.16.dist-info}/WHEEL RENAMED Viewed

File without changes

{henchman_ai-0.1.15.dist-info → henchman_ai-0.1.16.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{henchman_ai-0.1.15.dist-info → henchman_ai-0.1.16.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

henchman-ai 0.1.15__py3-none-any.whl → 0.1.16__py3-none-any.whl

henchman-ai 0.1.15py3-none-any.whl → 0.1.16py3-none-any.whl