henchman-ai 0.1.15__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,12 +4,13 @@ This provider uses the Anthropic SDK to communicate with Claude models.
4
4
  Unlike OpenAI-compatible APIs, Anthropic has its own message format.
5
5
  """
6
6
 
7
+ import asyncio
7
8
  import json
8
9
  import os
9
10
  from collections.abc import AsyncIterator
10
11
  from typing import Any
11
12
 
12
- from anthropic import AsyncAnthropic
13
+ from anthropic import AsyncAnthropic, RateLimitError
13
14
 
14
15
  from henchman.providers.base import (
15
16
  FinishReason,
@@ -54,6 +55,7 @@ class AnthropicProvider(ModelProvider):
54
55
  model: str = "claude-sonnet-4-20250514",
55
56
  max_tokens: int = 8192,
56
57
  tokens_per_minute: int = 30000,
58
+ max_retries: int = 3,
57
59
  ) -> None:
58
60
  """Initialize the Anthropic provider.
59
61
 
@@ -62,10 +64,12 @@ class AnthropicProvider(ModelProvider):
62
64
  model: Default model to use.
63
65
  max_tokens: Maximum tokens in response.
64
66
  tokens_per_minute: Maximum tokens per minute (rate limit).
67
+ max_retries: Maximum number of retries for rate limits.
65
68
  """
66
69
  self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY", "")
67
70
  self.default_model = model
68
71
  self.max_tokens = max_tokens
72
+ self.max_retries = max_retries
69
73
  self._client = AsyncAnthropic(api_key=self.api_key or "placeholder")
70
74
  self._rate_limiter = AsyncRateLimiter(tokens_per_minute)
71
75
 
@@ -194,10 +198,6 @@ class AnthropicProvider(ModelProvider):
194
198
  if not (message.content or '').strip():
195
199
  raise ValueError(f"Message with role '{message.role}' cannot have empty content")
196
200
 
197
- # Rate limiting: wait for capacity based on input tokens
198
- input_tokens = TokenCounter.count_messages(messages, model=self.default_model)
199
- await self._rate_limiter.wait_for_capacity(input_tokens)
200
-
201
201
  system_prompt, formatted_messages = self._format_messages(messages)
202
202
 
203
203
  params: dict[str, Any] = {
@@ -213,71 +213,99 @@ class AnthropicProvider(ModelProvider):
213
213
  if tools:
214
214
  params["tools"] = [self._format_tool(t) for t in tools]
215
215
 
216
- total_output_tokens = 0
217
- async with self._client.messages.stream(**params) as stream:
218
- pending_tool_calls: dict[str, dict[str, Any]] = {}
219
- current_tool_id: str | None = None
220
-
221
- async for event in stream:
222
- content: str | None = None
223
- thinking: str | None = None
224
- tool_calls: list[ToolCall] | None = None
225
- finish_reason: FinishReason | None = None
226
-
227
- if event.type == "content_block_start":
228
- block = event.content_block
229
- if block.type == "tool_use":
230
- current_tool_id = block.id
231
- pending_tool_calls[block.id] = {
232
- "id": block.id,
233
- "name": block.name,
234
- "arguments": "",
235
- }
236
-
237
- elif event.type == "content_block_delta":
238
- delta = event.delta
239
- if delta.type == "text_delta":
240
- content = delta.text
241
- total_output_tokens += TokenCounter.count_text(content, model=self.default_model)
242
- elif delta.type == "thinking_delta":
243
- thinking = delta.thinking
244
- total_output_tokens += TokenCounter.count_text(thinking, model=self.default_model)
245
- elif delta.type == "input_json_delta" and current_tool_id:
246
- pending_tool_calls[current_tool_id]["arguments"] += delta.partial_json
247
- # Note: we don't count JSON tokens precisely here as they come in,
248
- # but we could count the delta text.
249
- total_output_tokens += TokenCounter.count_text(delta.partial_json, model=self.default_model)
250
-
251
- elif event.type == "content_block_stop":
252
- current_tool_id = None
253
-
254
- elif event.type == "message_delta":
255
- finish_reason = self._parse_finish_reason(event.delta.stop_reason)
256
-
257
- # Emit completed tool calls
258
- if finish_reason == FinishReason.TOOL_CALLS and pending_tool_calls:
259
- tool_calls = []
260
- for tc_data in pending_tool_calls.values():
261
- try:
262
- arguments = json.loads(tc_data["arguments"]) if tc_data["arguments"] else {}
263
- except json.JSONDecodeError:
264
- arguments = {}
265
- tool_calls.append(
266
- ToolCall(
267
- id=tc_data["id"],
268
- name=tc_data["name"],
269
- arguments=arguments,
270
- )
216
+ input_tokens = TokenCounter.count_messages(messages, model=self.default_model)
217
+ retries = 0
218
+ while True:
219
+ try:
220
+ # Rate limiting: wait for capacity based on input tokens
221
+ await self._rate_limiter.wait_for_capacity(input_tokens)
222
+
223
+ total_output_tokens = 0
224
+ async with self._client.messages.stream(**params) as stream:
225
+ pending_tool_calls: dict[str, dict[str, Any]] = {}
226
+ current_tool_id: str | None = None
227
+
228
+ async for event in stream:
229
+ content: str | None = None
230
+ thinking: str | None = None
231
+ tool_calls: list[ToolCall] | None = None
232
+ finish_reason: FinishReason | None = None
233
+
234
+ if event.type == "content_block_start":
235
+ block = event.content_block
236
+ if block.type == "tool_use":
237
+ current_tool_id = block.id
238
+ pending_tool_calls[block.id] = {
239
+ "id": block.id,
240
+ "name": block.name,
241
+ "arguments": "",
242
+ }
243
+
244
+ elif event.type == "content_block_delta":
245
+ delta = event.delta
246
+ if delta.type == "text_delta":
247
+ content = delta.text
248
+ total_output_tokens += TokenCounter.count_text(content, model=self.default_model)
249
+ elif delta.type == "thinking_delta":
250
+ thinking = delta.thinking
251
+ total_output_tokens += TokenCounter.count_text(thinking, model=self.default_model)
252
+ elif delta.type == "input_json_delta" and current_tool_id:
253
+ pending_tool_calls[current_tool_id]["arguments"] += delta.partial_json
254
+ # Note: we don't count JSON tokens precisely here as they come in,
255
+ # but we could count the delta text.
256
+ total_output_tokens += TokenCounter.count_text(delta.partial_json, model=self.default_model)
257
+
258
+ elif event.type == "content_block_stop":
259
+ current_tool_id = None
260
+
261
+ elif event.type == "message_delta":
262
+ finish_reason = self._parse_finish_reason(event.delta.stop_reason)
263
+
264
+ # Emit completed tool calls
265
+ if finish_reason == FinishReason.TOOL_CALLS and pending_tool_calls:
266
+ tool_calls = []
267
+ for tc_data in pending_tool_calls.values():
268
+ try:
269
+ arguments = json.loads(tc_data["arguments"]) if tc_data["arguments"] else {}
270
+ except json.JSONDecodeError:
271
+ arguments = {}
272
+ tool_calls.append(
273
+ ToolCall(
274
+ id=tc_data["id"],
275
+ name=tc_data["name"],
276
+ arguments=arguments,
277
+ )
278
+ )
279
+
280
+ # Only yield if we have meaningful content
281
+ if content is not None or thinking is not None or tool_calls or finish_reason:
282
+ yield StreamChunk(
283
+ content=content,
284
+ tool_calls=tool_calls,
285
+ finish_reason=finish_reason,
286
+ thinking=thinking,
271
287
  )
272
288
 
273
- # Only yield if we have meaningful content
274
- if content is not None or thinking is not None or tool_calls or finish_reason:
275
- yield StreamChunk(
276
- content=content,
277
- tool_calls=tool_calls,
278
- finish_reason=finish_reason,
279
- thinking=thinking,
280
- )
281
-
282
- # Record final usage
283
- await self._rate_limiter.add_usage(input_tokens + total_output_tokens)
289
+ # Record final usage
290
+ await self._rate_limiter.add_usage(input_tokens + total_output_tokens)
291
+ break # Success, exit retry loop
292
+
293
+ except RateLimitError as e:
294
+ retries += 1
295
+ if retries > self.max_retries:
296
+ raise
297
+
298
+ # Hit rate limit, wait and retry
299
+ # Extract wait time from headers if available, otherwise use exponential backoff
300
+ wait_time = 5.0 * (2 ** (retries - 1)) # Default backoff
301
+
302
+ # Log to console if possible
303
+ from rich.console import Console
304
+ Console().print(f"[yellow]Rate limit reached (429). Retrying in {wait_time:.1f}s... (Attempt {retries}/{self.max_retries})[/yellow]")
305
+
306
+ await asyncio.sleep(wait_time)
307
+ # After sleeping, we loop back and try again.
308
+ # The rate limiter's wait_for_capacity will be called again if we move it inside the loop,
309
+ # but we already called it once. However, Anthropic says we are OVER the limit,
310
+ # so we should probably record that usage or just wait.
311
+ # Let's move the wait_for_capacity INSIDE the retry loop.
henchman/version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Version information for Henchman-AI."""
2
2
 
3
- VERSION_TUPLE = (0, 1, 15)
3
+ VERSION_TUPLE = (0, 1, 16)
4
4
  VERSION = ".".join(str(v) for v in VERSION_TUPLE)
5
5
 
6
6
  __all__ = ["VERSION", "VERSION_TUPLE"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: henchman-ai
3
- Version: 0.1.15
3
+ Version: 0.1.16
4
4
  Summary: A model-agnostic AI agent CLI - your AI henchman for the terminal
5
5
  Project-URL: Homepage, https://github.com/MGPowerlytics/henchman-ai
6
6
  Project-URL: Repository, https://github.com/MGPowerlytics/henchman-ai
@@ -1,6 +1,6 @@
1
1
  henchman/__init__.py,sha256=P_jCbtgAVbk2hn6uMum2UYkE7ptT361mWRkUZz0xKvk,148
2
2
  henchman/__main__.py,sha256=3oRWZvoWON5ErlJFYOOSU5p1PERRyK6MkT2LGEnbb2o,131
3
- henchman/version.py,sha256=0FZF9u0mohI-486usBu856Rq06_dnv5Ls-VzX8E_Sz0,161
3
+ henchman/version.py,sha256=72yF3FAUrFWwBLOTlo9ueDmjrA3nvCMAIV3CJ9qqRlw,161
4
4
  henchman/cli/__init__.py,sha256=Gv86a_heuBLqUd-y46JZUyzUaDl5H-9RtcWGr3rMwBw,673
5
5
  henchman/cli/app.py,sha256=2hmIZghPi2C__c_0xIqrab5JpIBArNsGxAqZY1cn8ZI,11739
6
6
  henchman/cli/console.py,sha256=S4Jvq0UTmu9KtOkLNsIsvG_8X9eg1Guc6NAh8T_JeNI,8017
@@ -39,7 +39,7 @@ henchman/mcp/config.py,sha256=qzAJITMpQlfVfZXiUN0SLDKEratXR-9BKih7JJA_-RA,1390
39
39
  henchman/mcp/manager.py,sha256=DBh85SmdRbU96DLIIwRmT6QYBKRMNMr5vt1_UhHxrrA,3348
40
40
  henchman/mcp/tool.py,sha256=jeL-FtgC2JSbhfhR8RF4vO9PxLQet-KFZuDCN67cYG8,2654
41
41
  henchman/providers/__init__.py,sha256=Vh8yPhJSCtDgvEvYA2YHRQvuGea6eCm_CCG1rxpLYZE,795
42
- henchman/providers/anthropic.py,sha256=gCnMSKtthvyQisOvodEXUnic5QnrKUUJNIE2Aq55Gxs,10354
42
+ henchman/providers/anthropic.py,sha256=XQJT_DGMM7VYsY6fsW44OYAikyelEIffdwuzfhmhG0o,12223
43
43
  henchman/providers/base.py,sha256=23YM21uHbSXN7vT92CUlN6FgIoztSOGMg7yFUwh2c6A,2814
44
44
  henchman/providers/deepseek.py,sha256=O__Gxy0xHCDhksHJgTa5f-u-5RhbT8ufh7dA6ly2yZ4,1349
45
45
  henchman/providers/ollama.py,sha256=g4vGTSlv8UEW82yrVRLCqjJqdDW_sG-kyvyRiE6ZbYg,1911
@@ -80,8 +80,8 @@ henchman/utils/ratelimit.py,sha256=P8HJYf68fSYNFK1bjhjdennL-1Vo7GwYzivQKlZh-Z4,2
80
80
  henchman/utils/retry.py,sha256=sobZk9LLGxglSJw_jeNaBYCrvH14YNFrBVyp_OwLWcw,4993
81
81
  henchman/utils/tokens.py,sha256=w5HjySzg5t9RYL-ivhhHLnT2gV0a83j4rwKDZGgAF6c,5696
82
82
  henchman/utils/validation.py,sha256=moj4LQXVXt2J-3_pWVH_0-EabyRYApOU2Oh5JSTIua8,4146
83
- henchman_ai-0.1.15.dist-info/METADATA,sha256=0jATgYUQMY1VLxHNPX4v5MXAHZekVtWBbR2UGYYLCHc,9186
84
- henchman_ai-0.1.15.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
85
- henchman_ai-0.1.15.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
86
- henchman_ai-0.1.15.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
87
- henchman_ai-0.1.15.dist-info/RECORD,,
83
+ henchman_ai-0.1.16.dist-info/METADATA,sha256=QkiPPnTpBk2DJ2oU_NHis7J8-EW1ixy9zNGJwHSs01M,9186
84
+ henchman_ai-0.1.16.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
85
+ henchman_ai-0.1.16.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
86
+ henchman_ai-0.1.16.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
87
+ henchman_ai-0.1.16.dist-info/RECORD,,