hanzo 0.3.20__py3-none-any.whl → 0.3.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo might be problematic. Click here for more details.
- hanzo/cli.py +1 -1
- hanzo/dev.py +5 -12
- hanzo/fallback_handler.py +14 -9
- hanzo/rate_limiter.py +332 -0
- hanzo/streaming.py +271 -0
- {hanzo-0.3.20.dist-info → hanzo-0.3.21.dist-info}/METADATA +1 -1
- {hanzo-0.3.20.dist-info → hanzo-0.3.21.dist-info}/RECORD +9 -7
- {hanzo-0.3.20.dist-info → hanzo-0.3.21.dist-info}/WHEEL +0 -0
- {hanzo-0.3.20.dist-info → hanzo-0.3.21.dist-info}/entry_points.txt +0 -0
hanzo/cli.py
CHANGED
hanzo/dev.py
CHANGED
|
@@ -929,21 +929,14 @@ Examples:
|
|
|
929
929
|
# Try smart fallback if no specific model configured
|
|
930
930
|
if not hasattr(self.orchestrator, 'orchestrator_model') or \
|
|
931
931
|
self.orchestrator.orchestrator_model == "auto":
|
|
932
|
-
|
|
933
|
-
|
|
932
|
+
# Use streaming if available
|
|
933
|
+
from .streaming import stream_with_fallback
|
|
934
|
+
response = await stream_with_fallback(enhanced_message, console)
|
|
935
|
+
|
|
934
936
|
if response:
|
|
935
937
|
# Save AI response to memory
|
|
936
938
|
self.memory_manager.add_message("assistant", response)
|
|
937
|
-
|
|
938
|
-
from rich.panel import Panel
|
|
939
|
-
console.print()
|
|
940
|
-
console.print(Panel(
|
|
941
|
-
response,
|
|
942
|
-
title="[bold cyan]AI Response[/bold cyan]",
|
|
943
|
-
title_align="left",
|
|
944
|
-
border_style="dim cyan",
|
|
945
|
-
padding=(1, 2)
|
|
946
|
-
))
|
|
939
|
+
# Response already displayed by streaming handler
|
|
947
940
|
return
|
|
948
941
|
else:
|
|
949
942
|
console.print("[red]No AI options available. Please configure API keys or install tools.[/red]")
|
hanzo/fallback_handler.py
CHANGED
|
@@ -158,6 +158,8 @@ async def smart_chat(message: str, console=None) -> Optional[str]:
|
|
|
158
158
|
Smart chat that automatically tries available AI options.
|
|
159
159
|
Returns the AI response or None if all options fail.
|
|
160
160
|
"""
|
|
161
|
+
from .rate_limiter import smart_limiter
|
|
162
|
+
|
|
161
163
|
handler = FallbackHandler()
|
|
162
164
|
|
|
163
165
|
if console:
|
|
@@ -171,17 +173,20 @@ async def smart_chat(message: str, console=None) -> Optional[str]:
|
|
|
171
173
|
|
|
172
174
|
option_type, model = best_option
|
|
173
175
|
|
|
174
|
-
# Try the primary option
|
|
176
|
+
# Try the primary option with rate limiting
|
|
175
177
|
try:
|
|
176
178
|
if option_type == "openai_api":
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
179
|
+
async def call_openai():
|
|
180
|
+
from openai import AsyncOpenAI
|
|
181
|
+
client = AsyncOpenAI()
|
|
182
|
+
response = await client.chat.completions.create(
|
|
183
|
+
model="gpt-4",
|
|
184
|
+
messages=[{"role": "user", "content": message}],
|
|
185
|
+
max_tokens=500
|
|
186
|
+
)
|
|
187
|
+
return response.choices[0].message.content
|
|
188
|
+
|
|
189
|
+
return await smart_limiter.execute_with_limit("openai", call_openai)
|
|
185
190
|
|
|
186
191
|
elif option_type == "anthropic_api":
|
|
187
192
|
from anthropic import AsyncAnthropic
|
hanzo/rate_limiter.py
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rate limiting and error recovery for Hanzo Dev.
|
|
3
|
+
Prevents API overuse and handles failures gracefully.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import time
|
|
7
|
+
import asyncio
|
|
8
|
+
from typing import Dict, Optional, Any, Callable
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
from collections import deque
|
|
12
|
+
import random
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class RateLimitConfig:
|
|
17
|
+
"""Configuration for rate limiting."""
|
|
18
|
+
requests_per_minute: int = 20
|
|
19
|
+
requests_per_hour: int = 100
|
|
20
|
+
burst_size: int = 5
|
|
21
|
+
cooldown_seconds: int = 60
|
|
22
|
+
max_retries: int = 3
|
|
23
|
+
backoff_base: float = 2.0
|
|
24
|
+
jitter: bool = True
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class RateLimitState:
|
|
29
|
+
"""Current state of rate limiter."""
|
|
30
|
+
minute_requests: deque = field(default_factory=lambda: deque(maxlen=60))
|
|
31
|
+
hour_requests: deque = field(default_factory=lambda: deque(maxlen=3600))
|
|
32
|
+
last_request: Optional[datetime] = None
|
|
33
|
+
consecutive_errors: int = 0
|
|
34
|
+
total_requests: int = 0
|
|
35
|
+
total_errors: int = 0
|
|
36
|
+
is_throttled: bool = False
|
|
37
|
+
throttle_until: Optional[datetime] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RateLimiter:
|
|
41
|
+
"""Rate limiter with error recovery."""
|
|
42
|
+
|
|
43
|
+
def __init__(self, config: RateLimitConfig = None):
|
|
44
|
+
"""Initialize rate limiter."""
|
|
45
|
+
self.config = config or RateLimitConfig()
|
|
46
|
+
self.states: Dict[str, RateLimitState] = {}
|
|
47
|
+
|
|
48
|
+
def get_state(self, key: str = "default") -> RateLimitState:
|
|
49
|
+
"""Get or create state for a key."""
|
|
50
|
+
if key not in self.states:
|
|
51
|
+
self.states[key] = RateLimitState()
|
|
52
|
+
return self.states[key]
|
|
53
|
+
|
|
54
|
+
async def check_rate_limit(self, key: str = "default") -> tuple[bool, float]:
|
|
55
|
+
"""
|
|
56
|
+
Check if request is allowed.
|
|
57
|
+
Returns (allowed, wait_seconds).
|
|
58
|
+
"""
|
|
59
|
+
state = self.get_state(key)
|
|
60
|
+
now = datetime.now()
|
|
61
|
+
|
|
62
|
+
# Check if throttled
|
|
63
|
+
if state.is_throttled and state.throttle_until:
|
|
64
|
+
if now < state.throttle_until:
|
|
65
|
+
wait_seconds = (state.throttle_until - now).total_seconds()
|
|
66
|
+
return False, wait_seconds
|
|
67
|
+
else:
|
|
68
|
+
# Throttle period ended
|
|
69
|
+
state.is_throttled = False
|
|
70
|
+
state.throttle_until = None
|
|
71
|
+
|
|
72
|
+
# Clean old requests
|
|
73
|
+
minute_ago = now - timedelta(minutes=1)
|
|
74
|
+
hour_ago = now - timedelta(hours=1)
|
|
75
|
+
|
|
76
|
+
# Remove old requests from queues
|
|
77
|
+
while state.minute_requests and state.minute_requests[0] < minute_ago:
|
|
78
|
+
state.minute_requests.popleft()
|
|
79
|
+
|
|
80
|
+
while state.hour_requests and state.hour_requests[0] < hour_ago:
|
|
81
|
+
state.hour_requests.popleft()
|
|
82
|
+
|
|
83
|
+
# Check minute limit
|
|
84
|
+
if len(state.minute_requests) >= self.config.requests_per_minute:
|
|
85
|
+
# Calculate wait time
|
|
86
|
+
oldest = state.minute_requests[0]
|
|
87
|
+
wait_seconds = (oldest + timedelta(minutes=1) - now).total_seconds()
|
|
88
|
+
return False, max(0, wait_seconds)
|
|
89
|
+
|
|
90
|
+
# Check hour limit
|
|
91
|
+
if len(state.hour_requests) >= self.config.requests_per_hour:
|
|
92
|
+
# Calculate wait time
|
|
93
|
+
oldest = state.hour_requests[0]
|
|
94
|
+
wait_seconds = (oldest + timedelta(hours=1) - now).total_seconds()
|
|
95
|
+
return False, max(0, wait_seconds)
|
|
96
|
+
|
|
97
|
+
# Check burst limit
|
|
98
|
+
if state.last_request:
|
|
99
|
+
time_since_last = (now - state.last_request).total_seconds()
|
|
100
|
+
if time_since_last < 1.0 / self.config.burst_size:
|
|
101
|
+
wait_seconds = (1.0 / self.config.burst_size) - time_since_last
|
|
102
|
+
return False, wait_seconds
|
|
103
|
+
|
|
104
|
+
return True, 0
|
|
105
|
+
|
|
106
|
+
async def acquire(self, key: str = "default") -> bool:
|
|
107
|
+
"""
|
|
108
|
+
Acquire a rate limit slot.
|
|
109
|
+
Waits if necessary.
|
|
110
|
+
"""
|
|
111
|
+
while True:
|
|
112
|
+
allowed, wait_seconds = await self.check_rate_limit(key)
|
|
113
|
+
|
|
114
|
+
if allowed:
|
|
115
|
+
# Record request
|
|
116
|
+
state = self.get_state(key)
|
|
117
|
+
now = datetime.now()
|
|
118
|
+
state.minute_requests.append(now)
|
|
119
|
+
state.hour_requests.append(now)
|
|
120
|
+
state.last_request = now
|
|
121
|
+
state.total_requests += 1
|
|
122
|
+
return True
|
|
123
|
+
|
|
124
|
+
# Wait before retrying
|
|
125
|
+
if wait_seconds > 0:
|
|
126
|
+
await asyncio.sleep(min(wait_seconds, 5)) # Check every 5 seconds max
|
|
127
|
+
|
|
128
|
+
def record_error(self, key: str = "default", error: Exception = None):
|
|
129
|
+
"""Record an error for the key."""
|
|
130
|
+
state = self.get_state(key)
|
|
131
|
+
state.consecutive_errors += 1
|
|
132
|
+
state.total_errors += 1
|
|
133
|
+
|
|
134
|
+
# Implement exponential backoff on errors
|
|
135
|
+
if state.consecutive_errors >= 3:
|
|
136
|
+
# Throttle for increasing periods
|
|
137
|
+
backoff_minutes = min(
|
|
138
|
+
self.config.backoff_base ** (state.consecutive_errors - 2),
|
|
139
|
+
60 # Max 1 hour
|
|
140
|
+
)
|
|
141
|
+
state.is_throttled = True
|
|
142
|
+
state.throttle_until = datetime.now() + timedelta(minutes=backoff_minutes)
|
|
143
|
+
|
|
144
|
+
def record_success(self, key: str = "default"):
|
|
145
|
+
"""Record a successful request."""
|
|
146
|
+
state = self.get_state(key)
|
|
147
|
+
state.consecutive_errors = 0
|
|
148
|
+
|
|
149
|
+
def get_status(self, key: str = "default") -> Dict[str, Any]:
|
|
150
|
+
"""Get current status for monitoring."""
|
|
151
|
+
state = self.get_state(key)
|
|
152
|
+
now = datetime.now()
|
|
153
|
+
|
|
154
|
+
return {
|
|
155
|
+
"requests_last_minute": len(state.minute_requests),
|
|
156
|
+
"requests_last_hour": len(state.hour_requests),
|
|
157
|
+
"total_requests": state.total_requests,
|
|
158
|
+
"total_errors": state.total_errors,
|
|
159
|
+
"consecutive_errors": state.consecutive_errors,
|
|
160
|
+
"is_throttled": state.is_throttled,
|
|
161
|
+
"throttle_remaining": (
|
|
162
|
+
(state.throttle_until - now).total_seconds()
|
|
163
|
+
if state.throttle_until and now < state.throttle_until
|
|
164
|
+
else 0
|
|
165
|
+
),
|
|
166
|
+
"minute_limit": self.config.requests_per_minute,
|
|
167
|
+
"hour_limit": self.config.requests_per_hour,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class ErrorRecovery:
|
|
172
|
+
"""Error recovery with retries and fallback."""
|
|
173
|
+
|
|
174
|
+
def __init__(self, rate_limiter: RateLimiter = None):
|
|
175
|
+
"""Initialize error recovery."""
|
|
176
|
+
self.rate_limiter = rate_limiter or RateLimiter()
|
|
177
|
+
self.fallback_handlers: Dict[type, Callable] = {}
|
|
178
|
+
|
|
179
|
+
def register_fallback(self, error_type: type, handler: Callable):
|
|
180
|
+
"""Register a fallback handler for an error type."""
|
|
181
|
+
self.fallback_handlers[error_type] = handler
|
|
182
|
+
|
|
183
|
+
async def with_retry(
|
|
184
|
+
self,
|
|
185
|
+
func: Callable,
|
|
186
|
+
*args,
|
|
187
|
+
key: str = "default",
|
|
188
|
+
max_retries: Optional[int] = None,
|
|
189
|
+
**kwargs
|
|
190
|
+
) -> Any:
|
|
191
|
+
"""
|
|
192
|
+
Execute function with retry logic.
|
|
193
|
+
"""
|
|
194
|
+
max_retries = max_retries or self.rate_limiter.config.max_retries
|
|
195
|
+
last_error = None
|
|
196
|
+
|
|
197
|
+
for attempt in range(max_retries):
|
|
198
|
+
try:
|
|
199
|
+
# Check rate limit
|
|
200
|
+
await self.rate_limiter.acquire(key)
|
|
201
|
+
|
|
202
|
+
# Execute function
|
|
203
|
+
result = await func(*args, **kwargs)
|
|
204
|
+
|
|
205
|
+
# Record success
|
|
206
|
+
self.rate_limiter.record_success(key)
|
|
207
|
+
|
|
208
|
+
return result
|
|
209
|
+
|
|
210
|
+
except Exception as e:
|
|
211
|
+
last_error = e
|
|
212
|
+
self.rate_limiter.record_error(key, e)
|
|
213
|
+
|
|
214
|
+
# Check for fallback handler
|
|
215
|
+
for error_type, handler in self.fallback_handlers.items():
|
|
216
|
+
if isinstance(e, error_type):
|
|
217
|
+
try:
|
|
218
|
+
return await handler(*args, **kwargs)
|
|
219
|
+
except:
|
|
220
|
+
pass # Fallback failed, continue with retry
|
|
221
|
+
|
|
222
|
+
# Calculate backoff
|
|
223
|
+
if attempt < max_retries - 1:
|
|
224
|
+
backoff = self.rate_limiter.config.backoff_base ** attempt
|
|
225
|
+
|
|
226
|
+
# Add jitter if configured
|
|
227
|
+
if self.rate_limiter.config.jitter:
|
|
228
|
+
backoff *= (0.5 + random.random())
|
|
229
|
+
|
|
230
|
+
await asyncio.sleep(min(backoff, 60)) # Max 60 seconds
|
|
231
|
+
|
|
232
|
+
# All retries failed
|
|
233
|
+
raise last_error or Exception("All retry attempts failed")
|
|
234
|
+
|
|
235
|
+
async def with_circuit_breaker(
|
|
236
|
+
self,
|
|
237
|
+
func: Callable,
|
|
238
|
+
*args,
|
|
239
|
+
key: str = "default",
|
|
240
|
+
threshold: int = 5,
|
|
241
|
+
timeout: int = 60,
|
|
242
|
+
**kwargs
|
|
243
|
+
) -> Any:
|
|
244
|
+
"""
|
|
245
|
+
Execute function with circuit breaker pattern.
|
|
246
|
+
"""
|
|
247
|
+
state = self.rate_limiter.get_state(key)
|
|
248
|
+
|
|
249
|
+
# Check if circuit is open
|
|
250
|
+
if state.is_throttled:
|
|
251
|
+
raise Exception(f"Circuit breaker open for {key}")
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
result = await self.with_retry(func, *args, key=key, **kwargs)
|
|
255
|
+
return result
|
|
256
|
+
|
|
257
|
+
except Exception as e:
|
|
258
|
+
# Check if we should open the circuit
|
|
259
|
+
if state.consecutive_errors >= threshold:
|
|
260
|
+
state.is_throttled = True
|
|
261
|
+
state.throttle_until = datetime.now() + timedelta(seconds=timeout)
|
|
262
|
+
raise Exception(f"Circuit breaker triggered for {key}: {e}")
|
|
263
|
+
raise
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class SmartRateLimiter:
|
|
267
|
+
"""Smart rate limiter that adapts to API responses."""
|
|
268
|
+
|
|
269
|
+
def __init__(self):
|
|
270
|
+
"""Initialize smart rate limiter."""
|
|
271
|
+
self.limiters: Dict[str, RateLimiter] = {}
|
|
272
|
+
self.recovery = ErrorRecovery()
|
|
273
|
+
|
|
274
|
+
# Default configs for known APIs
|
|
275
|
+
self.configs = {
|
|
276
|
+
"openai": RateLimitConfig(
|
|
277
|
+
requests_per_minute=60,
|
|
278
|
+
requests_per_hour=1000,
|
|
279
|
+
burst_size=10
|
|
280
|
+
),
|
|
281
|
+
"anthropic": RateLimitConfig(
|
|
282
|
+
requests_per_minute=50,
|
|
283
|
+
requests_per_hour=1000,
|
|
284
|
+
burst_size=5
|
|
285
|
+
),
|
|
286
|
+
"local": RateLimitConfig(
|
|
287
|
+
requests_per_minute=100,
|
|
288
|
+
requests_per_hour=10000,
|
|
289
|
+
burst_size=20
|
|
290
|
+
),
|
|
291
|
+
"free": RateLimitConfig(
|
|
292
|
+
requests_per_minute=10,
|
|
293
|
+
requests_per_hour=100,
|
|
294
|
+
burst_size=2
|
|
295
|
+
),
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
def get_limiter(self, api_type: str) -> RateLimiter:
|
|
299
|
+
"""Get or create limiter for API type."""
|
|
300
|
+
if api_type not in self.limiters:
|
|
301
|
+
config = self.configs.get(api_type, RateLimitConfig())
|
|
302
|
+
self.limiters[api_type] = RateLimiter(config)
|
|
303
|
+
return self.limiters[api_type]
|
|
304
|
+
|
|
305
|
+
async def execute_with_limit(
|
|
306
|
+
self,
|
|
307
|
+
api_type: str,
|
|
308
|
+
func: Callable,
|
|
309
|
+
*args,
|
|
310
|
+
**kwargs
|
|
311
|
+
) -> Any:
|
|
312
|
+
"""Execute function with appropriate rate limiting."""
|
|
313
|
+
limiter = self.get_limiter(api_type)
|
|
314
|
+
recovery = ErrorRecovery(limiter)
|
|
315
|
+
|
|
316
|
+
return await recovery.with_retry(
|
|
317
|
+
func,
|
|
318
|
+
*args,
|
|
319
|
+
key=api_type,
|
|
320
|
+
**kwargs
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def get_all_status(self) -> Dict[str, Dict[str, Any]]:
|
|
324
|
+
"""Get status of all limiters."""
|
|
325
|
+
return {
|
|
326
|
+
api_type: limiter.get_status()
|
|
327
|
+
for api_type, limiter in self.limiters.items()
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
# Global instance for easy use
|
|
332
|
+
smart_limiter = SmartRateLimiter()
|
hanzo/streaming.py
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Streaming response handler for Hanzo Dev.
|
|
3
|
+
Provides real-time feedback as AI generates responses.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
from typing import AsyncGenerator, Optional, Callable
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.live import Live
|
|
10
|
+
from rich.panel import Panel
|
|
11
|
+
from rich.markdown import Markdown
|
|
12
|
+
import time
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StreamingHandler:
|
|
16
|
+
"""Handles streaming responses from AI models."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, console: Console = None):
|
|
19
|
+
"""Initialize streaming handler."""
|
|
20
|
+
self.console = console or Console()
|
|
21
|
+
self.current_response = ""
|
|
22
|
+
self.is_streaming = False
|
|
23
|
+
|
|
24
|
+
async def stream_openai(self, client, messages: list, model: str = "gpt-4") -> str:
|
|
25
|
+
"""Stream response from OpenAI API."""
|
|
26
|
+
try:
|
|
27
|
+
stream = await client.chat.completions.create(
|
|
28
|
+
model=model,
|
|
29
|
+
messages=messages,
|
|
30
|
+
stream=True,
|
|
31
|
+
max_tokens=1000
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
self.current_response = ""
|
|
35
|
+
self.is_streaming = True
|
|
36
|
+
|
|
37
|
+
with Live(
|
|
38
|
+
Panel("", title="[bold cyan]AI Response[/bold cyan]",
|
|
39
|
+
title_align="left", border_style="dim cyan"),
|
|
40
|
+
console=self.console,
|
|
41
|
+
refresh_per_second=10
|
|
42
|
+
) as live:
|
|
43
|
+
async for chunk in stream:
|
|
44
|
+
if chunk.choices[0].delta.content:
|
|
45
|
+
self.current_response += chunk.choices[0].delta.content
|
|
46
|
+
live.update(
|
|
47
|
+
Panel(
|
|
48
|
+
Markdown(self.current_response),
|
|
49
|
+
title="[bold cyan]AI Response[/bold cyan]",
|
|
50
|
+
title_align="left",
|
|
51
|
+
border_style="dim cyan",
|
|
52
|
+
padding=(1, 2)
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
self.is_streaming = False
|
|
57
|
+
return self.current_response
|
|
58
|
+
|
|
59
|
+
except Exception as e:
|
|
60
|
+
self.console.print(f"[red]Streaming error: {e}[/red]")
|
|
61
|
+
self.is_streaming = False
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
async def stream_anthropic(self, client, messages: list, model: str = "claude-3-5-sonnet-20241022") -> str:
|
|
65
|
+
"""Stream response from Anthropic API."""
|
|
66
|
+
try:
|
|
67
|
+
self.current_response = ""
|
|
68
|
+
self.is_streaming = True
|
|
69
|
+
|
|
70
|
+
with Live(
|
|
71
|
+
Panel("", title="[bold cyan]AI Response[/bold cyan]",
|
|
72
|
+
title_align="left", border_style="dim cyan"),
|
|
73
|
+
console=self.console,
|
|
74
|
+
refresh_per_second=10
|
|
75
|
+
) as live:
|
|
76
|
+
async with client.messages.stream(
|
|
77
|
+
model=model,
|
|
78
|
+
messages=messages,
|
|
79
|
+
max_tokens=1000
|
|
80
|
+
) as stream:
|
|
81
|
+
async for text in stream.text_stream:
|
|
82
|
+
self.current_response += text
|
|
83
|
+
live.update(
|
|
84
|
+
Panel(
|
|
85
|
+
Markdown(self.current_response),
|
|
86
|
+
title="[bold cyan]AI Response[/bold cyan]",
|
|
87
|
+
title_align="left",
|
|
88
|
+
border_style="dim cyan",
|
|
89
|
+
padding=(1, 2)
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
self.is_streaming = False
|
|
94
|
+
return self.current_response
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
self.console.print(f"[red]Streaming error: {e}[/red]")
|
|
98
|
+
self.is_streaming = False
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
async def stream_ollama(self, message: str, model: str = "llama3.2") -> str:
|
|
102
|
+
"""Stream response from Ollama local model."""
|
|
103
|
+
import httpx
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
self.current_response = ""
|
|
107
|
+
self.is_streaming = True
|
|
108
|
+
|
|
109
|
+
with Live(
|
|
110
|
+
Panel("", title="[bold cyan]AI Response (Local)[/bold cyan]",
|
|
111
|
+
title_align="left", border_style="dim cyan"),
|
|
112
|
+
console=self.console,
|
|
113
|
+
refresh_per_second=10
|
|
114
|
+
) as live:
|
|
115
|
+
async with httpx.AsyncClient() as client:
|
|
116
|
+
async with client.stream(
|
|
117
|
+
"POST",
|
|
118
|
+
"http://localhost:11434/api/generate",
|
|
119
|
+
json={"model": model, "prompt": message, "stream": True},
|
|
120
|
+
timeout=60.0
|
|
121
|
+
) as response:
|
|
122
|
+
async for line in response.aiter_lines():
|
|
123
|
+
if line:
|
|
124
|
+
import json
|
|
125
|
+
data = json.loads(line)
|
|
126
|
+
if "response" in data:
|
|
127
|
+
self.current_response += data["response"]
|
|
128
|
+
live.update(
|
|
129
|
+
Panel(
|
|
130
|
+
Markdown(self.current_response),
|
|
131
|
+
title="[bold cyan]AI Response (Local)[/bold cyan]",
|
|
132
|
+
title_align="left",
|
|
133
|
+
border_style="dim cyan",
|
|
134
|
+
padding=(1, 2)
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
if data.get("done", False):
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
self.is_streaming = False
|
|
141
|
+
return self.current_response
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
self.console.print(f"[red]Ollama streaming error: {e}[/red]")
|
|
145
|
+
self.is_streaming = False
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
async def simulate_streaming(self, text: str, delay: float = 0.02) -> str:
|
|
149
|
+
"""Simulate streaming for non-streaming APIs."""
|
|
150
|
+
self.current_response = ""
|
|
151
|
+
self.is_streaming = True
|
|
152
|
+
|
|
153
|
+
words = text.split()
|
|
154
|
+
|
|
155
|
+
with Live(
|
|
156
|
+
Panel("", title="[bold cyan]AI Response[/bold cyan]",
|
|
157
|
+
title_align="left", border_style="dim cyan"),
|
|
158
|
+
console=self.console,
|
|
159
|
+
refresh_per_second=20
|
|
160
|
+
) as live:
|
|
161
|
+
for i, word in enumerate(words):
|
|
162
|
+
self.current_response += word
|
|
163
|
+
if i < len(words) - 1:
|
|
164
|
+
self.current_response += " "
|
|
165
|
+
|
|
166
|
+
live.update(
|
|
167
|
+
Panel(
|
|
168
|
+
Markdown(self.current_response),
|
|
169
|
+
title="[bold cyan]AI Response[/bold cyan]",
|
|
170
|
+
title_align="left",
|
|
171
|
+
border_style="dim cyan",
|
|
172
|
+
padding=(1, 2)
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
await asyncio.sleep(delay)
|
|
176
|
+
|
|
177
|
+
self.is_streaming = False
|
|
178
|
+
return self.current_response
|
|
179
|
+
|
|
180
|
+
def stop_streaming(self):
|
|
181
|
+
"""Stop current streaming operation."""
|
|
182
|
+
self.is_streaming = False
|
|
183
|
+
if self.current_response:
|
|
184
|
+
self.console.print(f"\n[yellow]Streaming interrupted[/yellow]")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class TypewriterEffect:
|
|
188
|
+
"""Provides typewriter effect for text output."""
|
|
189
|
+
|
|
190
|
+
def __init__(self, console: Console = None):
|
|
191
|
+
self.console = console or Console()
|
|
192
|
+
|
|
193
|
+
async def type_text(self, text: str, speed: float = 0.03):
|
|
194
|
+
"""Type text with typewriter effect."""
|
|
195
|
+
for char in text:
|
|
196
|
+
self.console.print(char, end="")
|
|
197
|
+
await asyncio.sleep(speed)
|
|
198
|
+
self.console.print() # New line at end
|
|
199
|
+
|
|
200
|
+
async def type_code(self, code: str, language: str = "python", speed: float = 0.01):
|
|
201
|
+
"""Type code with syntax highlighting."""
|
|
202
|
+
from rich.syntax import Syntax
|
|
203
|
+
|
|
204
|
+
# Build up code progressively
|
|
205
|
+
current_code = ""
|
|
206
|
+
lines = code.split('\n')
|
|
207
|
+
|
|
208
|
+
with Live(console=self.console, refresh_per_second=30) as live:
|
|
209
|
+
for line in lines:
|
|
210
|
+
for char in line:
|
|
211
|
+
current_code += char
|
|
212
|
+
syntax = Syntax(current_code, language, theme="monokai", line_numbers=True)
|
|
213
|
+
live.update(syntax)
|
|
214
|
+
await asyncio.sleep(speed)
|
|
215
|
+
current_code += '\n'
|
|
216
|
+
syntax = Syntax(current_code, language, theme="monokai", line_numbers=True)
|
|
217
|
+
live.update(syntax)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
async def stream_with_fallback(message: str, console: Console = None) -> Optional[str]:
|
|
221
|
+
"""
|
|
222
|
+
Stream response with automatic fallback to available options.
|
|
223
|
+
"""
|
|
224
|
+
import os
|
|
225
|
+
handler = StreamingHandler(console)
|
|
226
|
+
|
|
227
|
+
# Try OpenAI streaming
|
|
228
|
+
if os.getenv("OPENAI_API_KEY"):
|
|
229
|
+
try:
|
|
230
|
+
from openai import AsyncOpenAI
|
|
231
|
+
client = AsyncOpenAI()
|
|
232
|
+
return await handler.stream_openai(
|
|
233
|
+
client,
|
|
234
|
+
[{"role": "user", "content": message}]
|
|
235
|
+
)
|
|
236
|
+
except Exception as e:
|
|
237
|
+
if console:
|
|
238
|
+
console.print(f"[yellow]OpenAI streaming failed: {e}[/yellow]")
|
|
239
|
+
|
|
240
|
+
# Try Anthropic streaming
|
|
241
|
+
if os.getenv("ANTHROPIC_API_KEY"):
|
|
242
|
+
try:
|
|
243
|
+
from anthropic import AsyncAnthropic
|
|
244
|
+
client = AsyncAnthropic()
|
|
245
|
+
return await handler.stream_anthropic(
|
|
246
|
+
client,
|
|
247
|
+
[{"role": "user", "content": message}]
|
|
248
|
+
)
|
|
249
|
+
except Exception as e:
|
|
250
|
+
if console:
|
|
251
|
+
console.print(f"[yellow]Anthropic streaming failed: {e}[/yellow]")
|
|
252
|
+
|
|
253
|
+
# Try Ollama streaming
|
|
254
|
+
try:
|
|
255
|
+
return await handler.stream_ollama(message)
|
|
256
|
+
except:
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
# Fallback to non-streaming with simulated effect
|
|
260
|
+
if console:
|
|
261
|
+
console.print("[yellow]Falling back to non-streaming mode[/yellow]")
|
|
262
|
+
|
|
263
|
+
# Get response from fallback handler
|
|
264
|
+
from .fallback_handler import smart_chat
|
|
265
|
+
response = await smart_chat(message, console)
|
|
266
|
+
|
|
267
|
+
if response:
|
|
268
|
+
# Simulate streaming
|
|
269
|
+
return await handler.simulate_streaming(response)
|
|
270
|
+
|
|
271
|
+
return None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hanzo
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.21
|
|
4
4
|
Summary: Hanzo AI - Complete AI Infrastructure Platform with CLI, Router, MCP, and Agent Runtime
|
|
5
5
|
Project-URL: Homepage, https://hanzo.ai
|
|
6
6
|
Project-URL: Repository, https://github.com/hanzoai/python-sdk
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
hanzo/__init__.py,sha256=f6N_RcJZ0F9ADrROlvPi1OrgwjF8cWQm34cml8hb1zk,169
|
|
2
2
|
hanzo/__main__.py,sha256=F3Vz0Ty3bdAj_8oxyETMIqxlmNRnJOAFB1XPxbyfouI,105
|
|
3
|
-
hanzo/cli.py,sha256=
|
|
4
|
-
hanzo/dev.py,sha256=
|
|
5
|
-
hanzo/fallback_handler.py,sha256=
|
|
3
|
+
hanzo/cli.py,sha256=uxlJ30ZBwr_q8-iTipatwCXUA8cY3FZFMnVySRxehes,18586
|
|
4
|
+
hanzo/dev.py,sha256=OcRGLjH9m_HSFhVRm7i8L4TtgB-9XseXLA0RexBHUH0,102736
|
|
5
|
+
hanzo/fallback_handler.py,sha256=UJOzfGbf_5rg168PyzC9BQlB6yJ-lEW7B8MkHkVSiK8,10148
|
|
6
6
|
hanzo/mcp_server.py,sha256=XVygFNn-9CVdu8c95sP7fQjIRtA8K7nsGpgQNe44BRg,460
|
|
7
7
|
hanzo/memory_manager.py,sha256=BjnHN0Fu6Lqg3aKGTVPKfQxDBjtengjwG3ac8zoirrQ,15101
|
|
8
8
|
hanzo/orchestrator_config.py,sha256=JV7DS8aVZwBJ9XzgkQronFwV_A50QyXG3MH_pKwmCB8,11006
|
|
9
|
+
hanzo/rate_limiter.py,sha256=wDC_dwx1pg87YnfNwaELnW2zYRAYjmrXb0_LhsJbz5c,11442
|
|
9
10
|
hanzo/repl.py,sha256=sW1quuqGkJ_AqgjN2vLNdtWgKDlXIkXiO9Bo1QQI0G4,1089
|
|
11
|
+
hanzo/streaming.py,sha256=ZcFGD0k-RjURoamqCeq1Ripxs6I-ousaeQUFIs52Sic,10188
|
|
10
12
|
hanzo/commands/__init__.py,sha256=7rh94TPNhdq4gJBJS0Ayf0fGNChQYCQCJcJPmYYehiQ,182
|
|
11
13
|
hanzo/commands/agent.py,sha256=DXCfuxHfmC90IoIOL6BJyp7h2yNUo-VIxrfl4OMh8CU,3480
|
|
12
14
|
hanzo/commands/auth.py,sha256=JrM-EV4XDHzNDJeGJMjAr69T0Rxez53HEzlNo0jQ8nE,11187
|
|
@@ -26,7 +28,7 @@ hanzo/utils/__init__.py,sha256=5RRwKI852vp8smr4xCRgeKfn7dLEnHbdXGfVYTZ5jDQ,69
|
|
|
26
28
|
hanzo/utils/config.py,sha256=FD_LoBpcoF5dgJ7WL4o6LDp2pdOy8kS-dJ6iRO2GcGM,4728
|
|
27
29
|
hanzo/utils/net_check.py,sha256=YFbJ65SzfDYHkHLZe3n51VhId1VI3zhyx8p6BM-l6jE,3017
|
|
28
30
|
hanzo/utils/output.py,sha256=W0j3psF07vJiX4s02gbN4zYWfbKNsb8TSIoagBSf5vA,2704
|
|
29
|
-
hanzo-0.3.
|
|
30
|
-
hanzo-0.3.
|
|
31
|
-
hanzo-0.3.
|
|
32
|
-
hanzo-0.3.
|
|
31
|
+
hanzo-0.3.21.dist-info/METADATA,sha256=OTh520I_RktXqn_gjGoIdbOD0sZS7LHeeeZwDMNMHSM,4279
|
|
32
|
+
hanzo-0.3.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
33
|
+
hanzo-0.3.21.dist-info/entry_points.txt,sha256=pQLPMdqOXU_2BfTcMDhkqTCDNk_H6ApvYuSaWcuQOOw,171
|
|
34
|
+
hanzo-0.3.21.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|