henchman-ai 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- henchman/cli/app.py +8 -4
- henchman/cli/commands/builtins.py +3 -1
- henchman/cli/commands/model.py +285 -0
- henchman/providers/anthropic.py +106 -58
- henchman/utils/ratelimit.py +71 -0
- henchman/utils/tokens.py +1 -0
- henchman/version.py +1 -1
- {henchman_ai-0.1.14.dist-info → henchman_ai-0.1.16.dist-info}/METADATA +1 -1
- {henchman_ai-0.1.14.dist-info → henchman_ai-0.1.16.dist-info}/RECORD +12 -10
- {henchman_ai-0.1.14.dist-info → henchman_ai-0.1.16.dist-info}/WHEEL +0 -0
- {henchman_ai-0.1.14.dist-info → henchman_ai-0.1.16.dist-info}/entry_points.txt +0 -0
- {henchman_ai-0.1.14.dist-info → henchman_ai-0.1.16.dist-info}/licenses/LICENSE +0 -0
henchman/cli/app.py
CHANGED
|
@@ -43,13 +43,17 @@ def _get_provider() -> ModelProvider:
|
|
|
43
43
|
registry = get_default_registry()
|
|
44
44
|
|
|
45
45
|
provider_name = settings.providers.default or "deepseek"
|
|
46
|
-
provider_settings = getattr(settings.providers, provider_name,
|
|
46
|
+
provider_settings = getattr(settings.providers, provider_name, {})
|
|
47
47
|
|
|
48
|
-
if provider_settings:
|
|
48
|
+
if isinstance(provider_settings, dict):
|
|
49
|
+
# Ensure api_key is handled correctly (backward compatibility or env var)
|
|
50
|
+
kwargs = provider_settings.copy()
|
|
51
|
+
if not kwargs.get("api_key"):
|
|
52
|
+
kwargs["api_key"] = os.environ.get("ANTHROPIC_API_KEY") if provider_name == "anthropic" else os.environ.get("HENCHMAN_API_KEY")
|
|
53
|
+
|
|
49
54
|
return registry.create(
|
|
50
55
|
provider_name,
|
|
51
|
-
|
|
52
|
-
model=getattr(provider_settings, "model", None),
|
|
56
|
+
**kwargs
|
|
53
57
|
)
|
|
54
58
|
except Exception: # pragma: no cover
|
|
55
59
|
pass
|
|
@@ -8,6 +8,7 @@ from __future__ import annotations
|
|
|
8
8
|
from henchman.cli.commands import Command, CommandContext
|
|
9
9
|
from henchman.cli.commands.chat import ChatCommand
|
|
10
10
|
from henchman.cli.commands.mcp import McpCommand
|
|
11
|
+
from henchman.cli.commands.model import ModelCommand
|
|
11
12
|
from henchman.cli.commands.plan import PlanCommand
|
|
12
13
|
from henchman.cli.commands.rag import RagCommand
|
|
13
14
|
from henchman.cli.commands.skill import SkillCommand
|
|
@@ -57,10 +58,10 @@ class HelpCommand(Command):
|
|
|
57
58
|
ctx.console.print(" /skill - Manage and execute learned skills")
|
|
58
59
|
ctx.console.print(" /chat - Manage chat sessions (save, list, resume)")
|
|
59
60
|
ctx.console.print(" /mcp - Manage MCP server connections")
|
|
61
|
+
ctx.console.print(" /model - Show or change model/provider")
|
|
60
62
|
ctx.console.print(" /quit - Exit the CLI")
|
|
61
63
|
ctx.console.print(" /clear - Clear the screen")
|
|
62
64
|
ctx.console.print(" /tools - List available tools")
|
|
63
|
-
ctx.console.print(" /model - Show or change the model")
|
|
64
65
|
ctx.console.print("")
|
|
65
66
|
|
|
66
67
|
|
|
@@ -212,6 +213,7 @@ def get_builtin_commands() -> list[Command]:
|
|
|
212
213
|
ToolsCommand(),
|
|
213
214
|
ChatCommand(),
|
|
214
215
|
McpCommand(),
|
|
216
|
+
ModelCommand(),
|
|
215
217
|
PlanCommand(),
|
|
216
218
|
RagCommand(),
|
|
217
219
|
SkillCommand(),
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""Model and provider management commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from henchman.cli.commands import Command, CommandContext
|
|
9
|
+
from henchman.config import load_settings
|
|
10
|
+
from henchman.providers import get_default_registry
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from henchman.providers.base import ModelProvider
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ModelCommand(Command):
|
|
17
|
+
"""Show or change the model and provider."""
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def name(self) -> str:
|
|
21
|
+
"""Command name.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Command name string.
|
|
25
|
+
"""
|
|
26
|
+
return "model"
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def description(self) -> str:
|
|
30
|
+
"""Command description.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Description string.
|
|
34
|
+
"""
|
|
35
|
+
return "Show or change the model and provider"
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def usage(self) -> str:
|
|
39
|
+
"""Command usage.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Usage string.
|
|
43
|
+
"""
|
|
44
|
+
return "/model [list|set <provider> [<model>]]"
|
|
45
|
+
|
|
46
|
+
async def execute(self, ctx: CommandContext) -> None:
|
|
47
|
+
"""Execute the model command.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
ctx: Command context.
|
|
51
|
+
"""
|
|
52
|
+
args = ctx.args
|
|
53
|
+
if not args:
|
|
54
|
+
await self._show_current(ctx)
|
|
55
|
+
elif args[0] == "list":
|
|
56
|
+
await self._list_providers(ctx)
|
|
57
|
+
elif args[0] == "set" and len(args) >= 2:
|
|
58
|
+
await self._set_provider(ctx, args[1], args[2] if len(args) > 2 else None)
|
|
59
|
+
else:
|
|
60
|
+
ctx.console.print(f"[yellow]Usage: {self.usage}[/]")
|
|
61
|
+
|
|
62
|
+
async def _show_current(self, ctx: CommandContext) -> None:
|
|
63
|
+
"""Show current provider and model.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
ctx: Command context.
|
|
67
|
+
"""
|
|
68
|
+
if not ctx.agent:
|
|
69
|
+
ctx.console.print("[yellow]No active agent. Cannot show current model.[/]")
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
provider = ctx.agent.provider
|
|
73
|
+
settings = load_settings()
|
|
74
|
+
registry = get_default_registry()
|
|
75
|
+
|
|
76
|
+
ctx.console.print("\n[bold blue]Current Configuration[/]\n")
|
|
77
|
+
ctx.console.print(f" Provider: [cyan]{provider.name}[/]")
|
|
78
|
+
|
|
79
|
+
# Show model if available
|
|
80
|
+
if hasattr(provider, "default_model"):
|
|
81
|
+
ctx.console.print(f" Model: [cyan]{provider.default_model}[/]")
|
|
82
|
+
|
|
83
|
+
# Show available providers
|
|
84
|
+
available = registry.list_providers()
|
|
85
|
+
ctx.console.print(f"\n Available providers: [dim]{', '.join(available)}[/]")
|
|
86
|
+
ctx.console.print(f"\n Use [cyan]/model list[/] to see all providers")
|
|
87
|
+
ctx.console.print(f" Use [cyan]/model set <provider> [model][/] to switch")
|
|
88
|
+
ctx.console.print("")
|
|
89
|
+
|
|
90
|
+
async def _list_providers(self, ctx: CommandContext) -> None:
|
|
91
|
+
"""List all available providers and models.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
ctx: Command context.
|
|
95
|
+
"""
|
|
96
|
+
registry = get_default_registry()
|
|
97
|
+
providers = registry.list_providers()
|
|
98
|
+
|
|
99
|
+
ctx.console.print("\n[bold blue]Available Providers[/]\n")
|
|
100
|
+
|
|
101
|
+
for provider_name in sorted(providers):
|
|
102
|
+
try:
|
|
103
|
+
provider_class = registry.get(provider_name)
|
|
104
|
+
|
|
105
|
+
# Get example configuration
|
|
106
|
+
example_config = self._get_example_config(provider_name)
|
|
107
|
+
|
|
108
|
+
ctx.console.print(f" [cyan]{provider_name}[/]")
|
|
109
|
+
if hasattr(provider_class, "__doc__") and provider_class.__doc__:
|
|
110
|
+
doc_lines = provider_class.__doc__.strip().split('\n')
|
|
111
|
+
first_line = doc_lines[0].strip()
|
|
112
|
+
ctx.console.print(f" [dim]{first_line}[/]")
|
|
113
|
+
|
|
114
|
+
if example_config:
|
|
115
|
+
ctx.console.print(f" [yellow]Config:[/] {example_config}")
|
|
116
|
+
|
|
117
|
+
# Show environment variables needed
|
|
118
|
+
env_vars = self._get_env_vars(provider_name)
|
|
119
|
+
if env_vars:
|
|
120
|
+
ctx.console.print(f" [yellow]Env vars:[/] {env_vars}")
|
|
121
|
+
|
|
122
|
+
ctx.console.print("")
|
|
123
|
+
except Exception as e:
|
|
124
|
+
ctx.console.print(f" [red]{provider_name}[/] - Error: {e}")
|
|
125
|
+
|
|
126
|
+
async def _set_provider(
|
|
127
|
+
self,
|
|
128
|
+
ctx: CommandContext,
|
|
129
|
+
provider_name: str,
|
|
130
|
+
model_name: str | None = None
|
|
131
|
+
) -> None:
|
|
132
|
+
"""Switch to a different provider.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
ctx: Command context.
|
|
136
|
+
provider_name: Name of the provider to switch to.
|
|
137
|
+
model_name: Optional model name to use.
|
|
138
|
+
|
|
139
|
+
Raises:
|
|
140
|
+
ValueError: If provider cannot be created.
|
|
141
|
+
"""
|
|
142
|
+
if not ctx.repl:
|
|
143
|
+
ctx.console.print("[yellow]Cannot switch providers without REPL context.[/]")
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
# Get registry and create new provider
|
|
148
|
+
registry = get_default_registry()
|
|
149
|
+
|
|
150
|
+
if provider_name not in registry.list_providers():
|
|
151
|
+
ctx.console.print(f"[red]Provider '{provider_name}' not found.[/]")
|
|
152
|
+
ctx.console.print(f"Available providers: {', '.join(registry.list_providers())}")
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
# Try to get API key from environment or settings
|
|
156
|
+
api_key = self._get_api_key_for_provider(provider_name)
|
|
157
|
+
|
|
158
|
+
# Create provider instance
|
|
159
|
+
provider_kwargs = {"api_key": api_key or ""}
|
|
160
|
+
if model_name:
|
|
161
|
+
provider_kwargs["model"] = model_name
|
|
162
|
+
|
|
163
|
+
new_provider = registry.create(provider_name, **provider_kwargs)
|
|
164
|
+
|
|
165
|
+
# Test the provider with a simple call
|
|
166
|
+
ctx.console.print(f"[dim]Testing {provider_name} connection...[/]")
|
|
167
|
+
try:
|
|
168
|
+
# Simple test to verify provider works
|
|
169
|
+
if hasattr(new_provider, "default_model"):
|
|
170
|
+
ctx.console.print(f"[green]✓ Connected to {provider_name}[/]")
|
|
171
|
+
if model_name:
|
|
172
|
+
ctx.console.print(f"[green]✓ Using model: {model_name}[/]")
|
|
173
|
+
else:
|
|
174
|
+
ctx.console.print(f"[green]✓ Using default model: {new_provider.default_model}[/]")
|
|
175
|
+
else:
|
|
176
|
+
ctx.console.print(f"[green]✓ Connected to {provider_name}[/]")
|
|
177
|
+
except Exception as e:
|
|
178
|
+
ctx.console.print(f"[yellow]⚠ Connection test failed: {e}[/]")
|
|
179
|
+
ctx.console.print("[yellow]Provider created but may not work correctly.[/]")
|
|
180
|
+
|
|
181
|
+
# Update the agent with new provider
|
|
182
|
+
old_provider = ctx.agent.provider
|
|
183
|
+
ctx.agent.provider = new_provider
|
|
184
|
+
|
|
185
|
+
# Update REPL's provider reference
|
|
186
|
+
ctx.repl.provider = new_provider
|
|
187
|
+
|
|
188
|
+
ctx.console.print(f"\n[bold green]✓ Switched from {old_provider.name} to {new_provider.name}[/]")
|
|
189
|
+
|
|
190
|
+
# Show any configuration needed
|
|
191
|
+
if not api_key:
|
|
192
|
+
env_var = self._get_env_var_name(provider_name)
|
|
193
|
+
ctx.console.print(f"\n[yellow]⚠ No API key found for {provider_name}[/]")
|
|
194
|
+
ctx.console.print(f" Set environment variable: [cyan]{env_var}=your-api-key[/]")
|
|
195
|
+
ctx.console.print(f" Or configure in [cyan]~/.henchman/settings.yaml[/]:")
|
|
196
|
+
ctx.console.print(f" providers:")
|
|
197
|
+
ctx.console.print(f" {provider_name}:")
|
|
198
|
+
ctx.console.print(f" api_key: your-api-key")
|
|
199
|
+
|
|
200
|
+
except Exception as e:
|
|
201
|
+
ctx.console.print(f"[red]Failed to switch provider: {e}[/]")
|
|
202
|
+
ctx.console.print("[dim]Check that the provider is properly configured.[/]")
|
|
203
|
+
|
|
204
|
+
def _get_example_config(self, provider_name: str) -> str:
|
|
205
|
+
"""Get example configuration for a provider.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
provider_name: Name of the provider.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
Example configuration string.
|
|
212
|
+
"""
|
|
213
|
+
examples = {
|
|
214
|
+
"deepseek": "deepseek-chat (default), deepseek-coder",
|
|
215
|
+
"openai": "gpt-4-turbo, gpt-3.5-turbo",
|
|
216
|
+
"anthropic": "claude-3-opus, claude-3-sonnet",
|
|
217
|
+
"ollama": "llama2, mistral, codellama",
|
|
218
|
+
}
|
|
219
|
+
return examples.get(provider_name, "Check provider documentation")
|
|
220
|
+
|
|
221
|
+
def _get_env_vars(self, provider_name: str) -> str:
|
|
222
|
+
"""Get environment variables needed for a provider.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
provider_name: Name of the provider.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Environment variable names.
|
|
229
|
+
"""
|
|
230
|
+
env_vars = {
|
|
231
|
+
"deepseek": "DEEPSEEK_API_KEY",
|
|
232
|
+
"openai": "OPENAI_API_KEY",
|
|
233
|
+
"anthropic": "ANTHROPIC_API_KEY",
|
|
234
|
+
"ollama": "OLLAMA_HOST (optional, defaults to http://localhost:11434)",
|
|
235
|
+
}
|
|
236
|
+
return env_vars.get(provider_name, "Check provider documentation")
|
|
237
|
+
|
|
238
|
+
def _get_env_var_name(self, provider_name: str) -> str:
|
|
239
|
+
"""Get the environment variable name for a provider's API key.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
provider_name: Name of the provider.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Environment variable name.
|
|
246
|
+
"""
|
|
247
|
+
mapping = {
|
|
248
|
+
"deepseek": "DEEPSEEK_API_KEY",
|
|
249
|
+
"openai": "OPENAI_API_KEY",
|
|
250
|
+
"anthropic": "ANTHROPIC_API_KEY",
|
|
251
|
+
"ollama": "OLLAMA_API_KEY", # Ollama doesn't usually need API key
|
|
252
|
+
}
|
|
253
|
+
return mapping.get(provider_name, f"{provider_name.upper()}_API_KEY")
|
|
254
|
+
|
|
255
|
+
def _get_api_key_for_provider(self, provider_name: str) -> str | None:
|
|
256
|
+
"""Get API key for a provider from environment or settings.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
provider_name: Name of the provider.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
API key if found, None otherwise.
|
|
263
|
+
"""
|
|
264
|
+
# Try environment variables first
|
|
265
|
+
env_var = self._get_env_var_name(provider_name)
|
|
266
|
+
api_key = os.environ.get(env_var)
|
|
267
|
+
|
|
268
|
+
if api_key:
|
|
269
|
+
return api_key
|
|
270
|
+
|
|
271
|
+
# Try generic HENCHMAN_API_KEY
|
|
272
|
+
api_key = os.environ.get("HENCHMAN_API_KEY")
|
|
273
|
+
if api_key:
|
|
274
|
+
return api_key
|
|
275
|
+
|
|
276
|
+
# Try settings
|
|
277
|
+
try:
|
|
278
|
+
settings = load_settings()
|
|
279
|
+
provider_settings = getattr(settings.providers, provider_name, None)
|
|
280
|
+
if provider_settings and hasattr(provider_settings, "api_key"):
|
|
281
|
+
return provider_settings.api_key
|
|
282
|
+
except Exception:
|
|
283
|
+
pass
|
|
284
|
+
|
|
285
|
+
return None
|
henchman/providers/anthropic.py
CHANGED
|
@@ -4,12 +4,13 @@ This provider uses the Anthropic SDK to communicate with Claude models.
|
|
|
4
4
|
Unlike OpenAI-compatible APIs, Anthropic has its own message format.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import asyncio
|
|
7
8
|
import json
|
|
8
9
|
import os
|
|
9
10
|
from collections.abc import AsyncIterator
|
|
10
11
|
from typing import Any
|
|
11
12
|
|
|
12
|
-
from anthropic import AsyncAnthropic
|
|
13
|
+
from anthropic import AsyncAnthropic, RateLimitError
|
|
13
14
|
|
|
14
15
|
from henchman.providers.base import (
|
|
15
16
|
FinishReason,
|
|
@@ -19,11 +20,14 @@ from henchman.providers.base import (
|
|
|
19
20
|
ToolCall,
|
|
20
21
|
ToolDeclaration,
|
|
21
22
|
)
|
|
23
|
+
from henchman.utils.ratelimit import AsyncRateLimiter
|
|
24
|
+
from henchman.utils.tokens import TokenCounter
|
|
22
25
|
|
|
23
26
|
__all__ = ["AnthropicProvider"]
|
|
24
27
|
|
|
25
28
|
# Available Claude models
|
|
26
29
|
ANTHROPIC_MODELS = [
|
|
30
|
+
"claude-opus-4-6",
|
|
27
31
|
"claude-sonnet-4-20250514",
|
|
28
32
|
"claude-3-7-sonnet-20250219",
|
|
29
33
|
"claude-3-5-sonnet-20241022",
|
|
@@ -50,6 +54,8 @@ class AnthropicProvider(ModelProvider):
|
|
|
50
54
|
api_key: str | None = None,
|
|
51
55
|
model: str = "claude-sonnet-4-20250514",
|
|
52
56
|
max_tokens: int = 8192,
|
|
57
|
+
tokens_per_minute: int = 30000,
|
|
58
|
+
max_retries: int = 3,
|
|
53
59
|
) -> None:
|
|
54
60
|
"""Initialize the Anthropic provider.
|
|
55
61
|
|
|
@@ -57,11 +63,15 @@ class AnthropicProvider(ModelProvider):
|
|
|
57
63
|
api_key: API key for authentication. Defaults to ANTHROPIC_API_KEY env var.
|
|
58
64
|
model: Default model to use.
|
|
59
65
|
max_tokens: Maximum tokens in response.
|
|
66
|
+
tokens_per_minute: Maximum tokens per minute (rate limit).
|
|
67
|
+
max_retries: Maximum number of retries for rate limits.
|
|
60
68
|
"""
|
|
61
69
|
self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY", "")
|
|
62
70
|
self.default_model = model
|
|
63
71
|
self.max_tokens = max_tokens
|
|
72
|
+
self.max_retries = max_retries
|
|
64
73
|
self._client = AsyncAnthropic(api_key=self.api_key or "placeholder")
|
|
74
|
+
self._rate_limiter = AsyncRateLimiter(tokens_per_minute)
|
|
65
75
|
|
|
66
76
|
@property
|
|
67
77
|
def name(self) -> str:
|
|
@@ -187,6 +197,7 @@ class AnthropicProvider(ModelProvider):
|
|
|
187
197
|
# All other messages must have non-empty content
|
|
188
198
|
if not (message.content or '').strip():
|
|
189
199
|
raise ValueError(f"Message with role '{message.role}' cannot have empty content")
|
|
200
|
+
|
|
190
201
|
system_prompt, formatted_messages = self._format_messages(messages)
|
|
191
202
|
|
|
192
203
|
params: dict[str, Any] = {
|
|
@@ -202,62 +213,99 @@ class AnthropicProvider(ModelProvider):
|
|
|
202
213
|
if tools:
|
|
203
214
|
params["tools"] = [self._format_tool(t) for t in tools]
|
|
204
215
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
216
|
+
input_tokens = TokenCounter.count_messages(messages, model=self.default_model)
|
|
217
|
+
retries = 0
|
|
218
|
+
while True:
|
|
219
|
+
try:
|
|
220
|
+
# Rate limiting: wait for capacity based on input tokens
|
|
221
|
+
await self._rate_limiter.wait_for_capacity(input_tokens)
|
|
222
|
+
|
|
223
|
+
total_output_tokens = 0
|
|
224
|
+
async with self._client.messages.stream(**params) as stream:
|
|
225
|
+
pending_tool_calls: dict[str, dict[str, Any]] = {}
|
|
226
|
+
current_tool_id: str | None = None
|
|
227
|
+
|
|
228
|
+
async for event in stream:
|
|
229
|
+
content: str | None = None
|
|
230
|
+
thinking: str | None = None
|
|
231
|
+
tool_calls: list[ToolCall] | None = None
|
|
232
|
+
finish_reason: FinishReason | None = None
|
|
233
|
+
|
|
234
|
+
if event.type == "content_block_start":
|
|
235
|
+
block = event.content_block
|
|
236
|
+
if block.type == "tool_use":
|
|
237
|
+
current_tool_id = block.id
|
|
238
|
+
pending_tool_calls[block.id] = {
|
|
239
|
+
"id": block.id,
|
|
240
|
+
"name": block.name,
|
|
241
|
+
"arguments": "",
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
elif event.type == "content_block_delta":
|
|
245
|
+
delta = event.delta
|
|
246
|
+
if delta.type == "text_delta":
|
|
247
|
+
content = delta.text
|
|
248
|
+
total_output_tokens += TokenCounter.count_text(content, model=self.default_model)
|
|
249
|
+
elif delta.type == "thinking_delta":
|
|
250
|
+
thinking = delta.thinking
|
|
251
|
+
total_output_tokens += TokenCounter.count_text(thinking, model=self.default_model)
|
|
252
|
+
elif delta.type == "input_json_delta" and current_tool_id:
|
|
253
|
+
pending_tool_calls[current_tool_id]["arguments"] += delta.partial_json
|
|
254
|
+
# Note: we don't count JSON tokens precisely here as they come in,
|
|
255
|
+
# but we could count the delta text.
|
|
256
|
+
total_output_tokens += TokenCounter.count_text(delta.partial_json, model=self.default_model)
|
|
257
|
+
|
|
258
|
+
elif event.type == "content_block_stop":
|
|
259
|
+
current_tool_id = None
|
|
260
|
+
|
|
261
|
+
elif event.type == "message_delta":
|
|
262
|
+
finish_reason = self._parse_finish_reason(event.delta.stop_reason)
|
|
263
|
+
|
|
264
|
+
# Emit completed tool calls
|
|
265
|
+
if finish_reason == FinishReason.TOOL_CALLS and pending_tool_calls:
|
|
266
|
+
tool_calls = []
|
|
267
|
+
for tc_data in pending_tool_calls.values():
|
|
268
|
+
try:
|
|
269
|
+
arguments = json.loads(tc_data["arguments"]) if tc_data["arguments"] else {}
|
|
270
|
+
except json.JSONDecodeError:
|
|
271
|
+
arguments = {}
|
|
272
|
+
tool_calls.append(
|
|
273
|
+
ToolCall(
|
|
274
|
+
id=tc_data["id"],
|
|
275
|
+
name=tc_data["name"],
|
|
276
|
+
arguments=arguments,
|
|
277
|
+
)
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Only yield if we have meaningful content
|
|
281
|
+
if content is not None or thinking is not None or tool_calls or finish_reason:
|
|
282
|
+
yield StreamChunk(
|
|
283
|
+
content=content,
|
|
284
|
+
tool_calls=tool_calls,
|
|
285
|
+
finish_reason=finish_reason,
|
|
286
|
+
thinking=thinking,
|
|
254
287
|
)
|
|
255
288
|
|
|
256
|
-
#
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
289
|
+
# Record final usage
|
|
290
|
+
await self._rate_limiter.add_usage(input_tokens + total_output_tokens)
|
|
291
|
+
break # Success, exit retry loop
|
|
292
|
+
|
|
293
|
+
except RateLimitError as e:
|
|
294
|
+
retries += 1
|
|
295
|
+
if retries > self.max_retries:
|
|
296
|
+
raise
|
|
297
|
+
|
|
298
|
+
# Hit rate limit, wait and retry
|
|
299
|
+
# Extract wait time from headers if available, otherwise use exponential backoff
|
|
300
|
+
wait_time = 5.0 * (2 ** (retries - 1)) # Default backoff
|
|
301
|
+
|
|
302
|
+
# Log to console if possible
|
|
303
|
+
from rich.console import Console
|
|
304
|
+
Console().print(f"[yellow]Rate limit reached (429). Retrying in {wait_time:.1f}s... (Attempt {retries}/{self.max_retries})[/yellow]")
|
|
305
|
+
|
|
306
|
+
await asyncio.sleep(wait_time)
|
|
307
|
+
# After sleeping, we loop back and try again.
|
|
308
|
+
# The rate limiter's wait_for_capacity will be called again if we move it inside the loop,
|
|
309
|
+
# but we already called it once. However, Anthropic says we are OVER the limit,
|
|
310
|
+
# so we should probably record that usage or just wait.
|
|
311
|
+
# Let's move the wait_for_capacity INSIDE the retry loop.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Rate limiting utilities for API providers."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
from collections import deque
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AsyncRateLimiter:
|
|
9
|
+
"""An asynchronous rate limiter using a sliding window.
|
|
10
|
+
|
|
11
|
+
Tracks token usage and provides a way to wait until enough capacity
|
|
12
|
+
is available.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, tokens_per_minute: int) -> None:
|
|
16
|
+
"""Initialize the rate limiter.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
tokens_per_minute: Maximum tokens allowed per 60-second window.
|
|
20
|
+
"""
|
|
21
|
+
self.tokens_per_minute = tokens_per_minute
|
|
22
|
+
# Each entry is (timestamp, token_count)
|
|
23
|
+
self.usage: deque[tuple[float, int]] = deque()
|
|
24
|
+
self._lock = asyncio.Lock()
|
|
25
|
+
|
|
26
|
+
async def _clean_old_usage(self) -> None:
|
|
27
|
+
"""Remove usage entries older than 60 seconds."""
|
|
28
|
+
now = time.time()
|
|
29
|
+
while self.usage and self.usage[0][0] < now - 60:
|
|
30
|
+
self.usage.popleft()
|
|
31
|
+
|
|
32
|
+
def _get_current_usage(self) -> int:
|
|
33
|
+
"""Calculate the total tokens used in the last 60 seconds."""
|
|
34
|
+
return sum(tokens for _, tokens in self.usage)
|
|
35
|
+
|
|
36
|
+
async def wait_for_capacity(self, tokens: int) -> None:
|
|
37
|
+
"""Wait until the specified number of tokens can be used.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
tokens: The number of tokens to be used.
|
|
41
|
+
"""
|
|
42
|
+
if tokens > self.tokens_per_minute:
|
|
43
|
+
# If a single request is larger than the limit, we can't really
|
|
44
|
+
# satisfy it, but we'll just wait for the window to be completely clear.
|
|
45
|
+
tokens = self.tokens_per_minute
|
|
46
|
+
|
|
47
|
+
async with self._lock:
|
|
48
|
+
while True:
|
|
49
|
+
await self._clean_old_usage()
|
|
50
|
+
current_usage = self._get_current_usage()
|
|
51
|
+
|
|
52
|
+
if current_usage + tokens <= self.tokens_per_minute:
|
|
53
|
+
break
|
|
54
|
+
|
|
55
|
+
# Wait until the oldest entry expires
|
|
56
|
+
if self.usage:
|
|
57
|
+
sleep_time = self.usage[0][0] + 60.1 - time.time()
|
|
58
|
+
if sleep_time > 0:
|
|
59
|
+
await asyncio.sleep(sleep_time)
|
|
60
|
+
else:
|
|
61
|
+
# Should not happen if current_usage > 0, but safety first
|
|
62
|
+
await asyncio.sleep(1)
|
|
63
|
+
|
|
64
|
+
async def add_usage(self, tokens: int) -> None:
|
|
65
|
+
"""Record token usage.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
tokens: The number of tokens used.
|
|
69
|
+
"""
|
|
70
|
+
async with self._lock:
|
|
71
|
+
self.usage.append((time.time(), tokens))
|
henchman/utils/tokens.py
CHANGED
|
@@ -27,6 +27,7 @@ MODEL_LIMITS: dict[str, int] = {
|
|
|
27
27
|
"gpt-4": 8192,
|
|
28
28
|
"gpt-3.5-turbo": 16385,
|
|
29
29
|
# Anthropic models (these use different tokenization but we estimate)
|
|
30
|
+
"claude-opus-4-6": 200000,
|
|
30
31
|
"claude-sonnet-4-20250514": 200000,
|
|
31
32
|
"claude-3-7-sonnet-20250219": 200000,
|
|
32
33
|
"claude-3-5-sonnet-20241022": 200000,
|
henchman/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: henchman-ai
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.16
|
|
4
4
|
Summary: A model-agnostic AI agent CLI - your AI henchman for the terminal
|
|
5
5
|
Project-URL: Homepage, https://github.com/MGPowerlytics/henchman-ai
|
|
6
6
|
Project-URL: Repository, https://github.com/MGPowerlytics/henchman-ai
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
henchman/__init__.py,sha256=P_jCbtgAVbk2hn6uMum2UYkE7ptT361mWRkUZz0xKvk,148
|
|
2
2
|
henchman/__main__.py,sha256=3oRWZvoWON5ErlJFYOOSU5p1PERRyK6MkT2LGEnbb2o,131
|
|
3
|
-
henchman/version.py,sha256=
|
|
3
|
+
henchman/version.py,sha256=72yF3FAUrFWwBLOTlo9ueDmjrA3nvCMAIV3CJ9qqRlw,161
|
|
4
4
|
henchman/cli/__init__.py,sha256=Gv86a_heuBLqUd-y46JZUyzUaDl5H-9RtcWGr3rMwBw,673
|
|
5
|
-
henchman/cli/app.py,sha256=
|
|
5
|
+
henchman/cli/app.py,sha256=2hmIZghPi2C__c_0xIqrab5JpIBArNsGxAqZY1cn8ZI,11739
|
|
6
6
|
henchman/cli/console.py,sha256=S4Jvq0UTmu9KtOkLNsIsvG_8X9eg1Guc6NAh8T_JeNI,8017
|
|
7
7
|
henchman/cli/input.py,sha256=1-dz9mg3IvCtKMJbXzstxNbJYpwU1pGr-a0wJ0gqUik,7129
|
|
8
8
|
henchman/cli/json_output.py,sha256=9kP9S5q0xBgP4HQGTT4P6DDT76F9VVTdEY_KiEpoZnI,2669
|
|
@@ -11,10 +11,11 @@ henchman/cli/repl.py,sha256=YLV1oVfGASYWLux4G5_I88RCaGk7y97um3B6uRxAacg,27844
|
|
|
11
11
|
henchman/cli/repl.py.backup,sha256=3iagruUgsvtcfpDv1mTAYg4I14X4CaNSEeMQjj91src,15638
|
|
12
12
|
henchman/cli/repl.py.backup2,sha256=-zgSUrnobd_sHq3jG-8NbwPTVlPc3FaqSkv32gAFdPo,11328
|
|
13
13
|
henchman/cli/commands/__init__.py,sha256=8s6NBCPlc4jKTCdvnKJCmdLwRCQ4QLCARjQbr7ICipw,3828
|
|
14
|
-
henchman/cli/commands/builtins.py,sha256
|
|
14
|
+
henchman/cli/commands/builtins.py,sha256=LQUnLivHD4ydLyeLfGzg8FxFgL0yDQc4t9aO35z6avM,5521
|
|
15
15
|
henchman/cli/commands/chat.py,sha256=ePPRh68ZHHS_l1Uj7fUtjBQrVKOx6WvZQsuIzXdxgjY,6204
|
|
16
16
|
henchman/cli/commands/extensions.py,sha256=r7PfvbBjwBr5WhF8G49p29z7FKx6geRJiR-R67pj6i0,1758
|
|
17
17
|
henchman/cli/commands/mcp.py,sha256=bbW1J9-fIpvDBIba3L1MAkNqCjFBTZnZLNIgf6LjJEA,3554
|
|
18
|
+
henchman/cli/commands/model.py,sha256=_K6XaSRgf4R8BHC2VSdd04K1hIsQexheNc-QrkqL70E,10435
|
|
18
19
|
henchman/cli/commands/plan.py,sha256=5ZXePoMVIKBxugSnDB6N2TEDpl2xZszQDz9wTQffzpY,2486
|
|
19
20
|
henchman/cli/commands/rag.py,sha256=sXY7MCZ4UMVzNX2ALVM8wt7q82PZovwVHOSMDfot8jQ,7308
|
|
20
21
|
henchman/cli/commands/skill.py,sha256=azXb6-KXjtZKwHiBV-Ppk6CdJQKZhetr46hNgZ_r45Q,8096
|
|
@@ -38,7 +39,7 @@ henchman/mcp/config.py,sha256=qzAJITMpQlfVfZXiUN0SLDKEratXR-9BKih7JJA_-RA,1390
|
|
|
38
39
|
henchman/mcp/manager.py,sha256=DBh85SmdRbU96DLIIwRmT6QYBKRMNMr5vt1_UhHxrrA,3348
|
|
39
40
|
henchman/mcp/tool.py,sha256=jeL-FtgC2JSbhfhR8RF4vO9PxLQet-KFZuDCN67cYG8,2654
|
|
40
41
|
henchman/providers/__init__.py,sha256=Vh8yPhJSCtDgvEvYA2YHRQvuGea6eCm_CCG1rxpLYZE,795
|
|
41
|
-
henchman/providers/anthropic.py,sha256=
|
|
42
|
+
henchman/providers/anthropic.py,sha256=XQJT_DGMM7VYsY6fsW44OYAikyelEIffdwuzfhmhG0o,12223
|
|
42
43
|
henchman/providers/base.py,sha256=23YM21uHbSXN7vT92CUlN6FgIoztSOGMg7yFUwh2c6A,2814
|
|
43
44
|
henchman/providers/deepseek.py,sha256=O__Gxy0xHCDhksHJgTa5f-u-5RhbT8ufh7dA6ly2yZ4,1349
|
|
44
45
|
henchman/providers/ollama.py,sha256=g4vGTSlv8UEW82yrVRLCqjJqdDW_sG-kyvyRiE6ZbYg,1911
|
|
@@ -75,11 +76,12 @@ henchman/tools/builtins/web_fetch.py,sha256=uwgZm0ye3yDuS2U2DPV4D-8bjviYDTKN-cNi
|
|
|
75
76
|
henchman/tools/builtins/web_search.py,sha256=dapmhN5Yf_WYJT5bnwkkhyDe1n2aDmT-mU9ZY1BC6Sw,4265
|
|
76
77
|
henchman/utils/__init__.py,sha256=ayu2XRNx3Fw0z8vbIne63A3gBjxu779QE8sUQsjNnm4,240
|
|
77
78
|
henchman/utils/compaction.py,sha256=ARS0jUDI2adsoCTfJjygRom31N16QtWbRzNXDKzX6cA,22871
|
|
79
|
+
henchman/utils/ratelimit.py,sha256=P8HJYf68fSYNFK1bjhjdennL-1Vo7GwYzivQKlZh-Z4,2422
|
|
78
80
|
henchman/utils/retry.py,sha256=sobZk9LLGxglSJw_jeNaBYCrvH14YNFrBVyp_OwLWcw,4993
|
|
79
|
-
henchman/utils/tokens.py,sha256=
|
|
81
|
+
henchman/utils/tokens.py,sha256=w5HjySzg5t9RYL-ivhhHLnT2gV0a83j4rwKDZGgAF6c,5696
|
|
80
82
|
henchman/utils/validation.py,sha256=moj4LQXVXt2J-3_pWVH_0-EabyRYApOU2Oh5JSTIua8,4146
|
|
81
|
-
henchman_ai-0.1.
|
|
82
|
-
henchman_ai-0.1.
|
|
83
|
-
henchman_ai-0.1.
|
|
84
|
-
henchman_ai-0.1.
|
|
85
|
-
henchman_ai-0.1.
|
|
83
|
+
henchman_ai-0.1.16.dist-info/METADATA,sha256=QkiPPnTpBk2DJ2oU_NHis7J8-EW1ixy9zNGJwHSs01M,9186
|
|
84
|
+
henchman_ai-0.1.16.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
85
|
+
henchman_ai-0.1.16.dist-info/entry_points.txt,sha256=dtPyd6BzK3A8lmrj1KXTFlHBplIWcWMdryjtR0jw5iU,51
|
|
86
|
+
henchman_ai-0.1.16.dist-info/licenses/LICENSE,sha256=TMoSCCG1I1vCMK-Bjtvxe80E8kIdSdrtuQXYHc_ahqg,1064
|
|
87
|
+
henchman_ai-0.1.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|