PyPI - hanzo - Versions diffs - 0.3.20__tar.gz → 0.3.22__tar.gz - Mend

hanzo 0.3.20tar.gz → 0.3.22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hanzo might be problematic. Click here for more details.

Files changed (34) hide show

{hanzo-0.3.20 → hanzo-0.3.22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hanzo
-Version: 0.3.20
+Version: 0.3.22
 Summary: Hanzo AI - Complete AI Infrastructure Platform with CLI, Router, MCP, and Agent Runtime
 Project-URL: Homepage, https://hanzo.ai
 Project-URL: Repository, https://github.com/hanzoai/python-sdk

{hanzo-0.3.20 → hanzo-0.3.22}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hanzo"
-version = "0.3.20"
+version = "0.3.22"
 description = "Hanzo AI - Complete AI Infrastructure Platform with CLI, Router, MCP, and Agent Runtime"
 authors = [
     {name = "Hanzo AI", email = "dev@hanzo.ai"},

{hanzo-0.3.20 → hanzo-0.3.22}/src/hanzo/cli.py RENAMED Viewed

@@ -26,7 +26,7 @@ from .utils.output import console
 from .interactive.repl import HanzoREPL
 # Version
-__version__ = "0.3.20"
+__version__ = "0.3.22"
 @click.group(invoke_without_command=True)

{hanzo-0.3.20 → hanzo-0.3.22}/src/hanzo/dev.py RENAMED Viewed

@@ -697,34 +697,35 @@ class HanzoDevREPL:
             padding=(0, 1)
         ))
         console.print()
+        # Check for available API keys and show status
+        from .fallback_handler import FallbackHandler
+        handler = FallbackHandler()
+        if not handler.fallback_order:
+            console.print("[yellow]⚠️  No API keys detected[/yellow]")
+            console.print("[dim]Set OPENAI_API_KEY or ANTHROPIC_API_KEY to enable AI[/dim]")
+            console.print()
+        else:
+            primary = handler.fallback_order[0][1]
+            console.print(f"[green]✅ Using {primary} for AI responses[/green]")
+            console.print()
         while True:
             try:
-                # Draw input box border (top)
-                console.print("[dim white]╭" + "─" * 78 + "╮[/dim white]")
-                # Get input with styled prompt inside the box
-                console.print("[dim white]│[/dim white] ", end="")
+                # Simple prompt without box borders to avoid rendering issues
                 try:
-                    # Get input - using simple input() wrapped in executor for async
-                    # The visual box is drawn by console.print statements
+                    # Add spacing to prevent UI cutoff at bottom
                     user_input = await asyncio.get_event_loop().run_in_executor(
                         None,
                         input,
-                        '› '  # Using › instead of > for a more modern look
+                        '› '  # Clean prompt
                     )
-                    # Draw input box border (bottom)
-                    console.print("[dim white]╰" + "─" * 78 + "╯[/dim white]")
+                    console.print()  # Add spacing after input
                 except EOFError:
                     console.print()  # New line before exit
-                    console.print("[dim white]╰" + "─" * 78 + "╯[/dim white]")
                     break
                 except KeyboardInterrupt:
-                    console.print()  # Complete the box
-                    console.print("[dim white]╰" + "─" * 78 + "╯[/dim white]")
                     console.print("\n[dim yellow]Use /exit to quit[/dim]")
                     continue
@@ -929,21 +930,14 @@ Examples:
             # Try smart fallback if no specific model configured
             if not hasattr(self.orchestrator, 'orchestrator_model') or \
                self.orchestrator.orchestrator_model == "auto":
-                from .fallback_handler import smart_chat
-                response = await smart_chat(enhanced_message, console)
+                # Use streaming if available
+                from .streaming import stream_with_fallback
+                response = await stream_with_fallback(enhanced_message, console)
                 if response:
                     # Save AI response to memory
                     self.memory_manager.add_message("assistant", response)
-                    from rich.panel import Panel
-                    console.print()
-                    console.print(Panel(
-                        response,
-                        title="[bold cyan]AI Response[/bold cyan]",
-                        title_align="left",
-                        border_style="dim cyan",
-                        padding=(1, 2)
-                    ))
+                    # Response already displayed by streaming handler
                     return
                 else:
                     console.print("[red]No AI options available. Please configure API keys or install tools.[/red]")
@@ -1051,8 +1045,25 @@ Examples:
             await self._use_local_model(message)
             return
-        # Try OpenAI first
-        if os.getenv("OPENAI_API_KEY"):
+        # Use the fallback handler to intelligently try available options
+        from .fallback_handler import smart_chat
+        response = await smart_chat(message, console=console)
+        if response:
+            from rich.panel import Panel
+            console.print()
+            console.print(Panel(
+                response,
+                title="[bold cyan]AI Response[/bold cyan]",
+                title_align="left",
+                border_style="dim cyan",
+                padding=(1, 2)
+            ))
+            return
+        # Try OpenAI first explicitly (in case fallback handler missed it)
+        openai_key = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY")
+        if openai_key:
             try:
                 from openai import AsyncOpenAI
@@ -1585,13 +1596,10 @@ async def run_dev_orchestrator(**kwargs):
             console_obj.print("[red]Failed to initialize network[/red]")
             return
     else:
-        # Fallback to multi-Claude mode
-        console_obj.print(f"[cyan]Mode: Multi-Claude Orchestration (legacy)[/cyan]")
-        console_obj.print(
-            f"Instances: {instances} (1 primary + {instances-1} critic{'s' if instances > 2 else ''})"
-        )
+        # Fallback to API mode
+        console_obj.print(f"[cyan]Mode: AI Chat[/cyan]")
+        console_obj.print(f"Model: {orchestrator_model}")
         console_obj.print(f"MCP Tools: {'Enabled' if mcp_tools else 'Disabled'}")
-        console_obj.print(f"Networking: {'Enabled' if network_mode else 'Disabled'}")
         console_obj.print(f"Guardrails: {'Enabled' if guardrails else 'Disabled'}\n")
         orchestrator = MultiClaudeOrchestrator(
@@ -2223,6 +2231,21 @@ class MultiClaudeOrchestrator(HanzoDevOrchestrator):
     async def initialize(self):
         """Initialize all Claude instances with MCP networking."""
+        # Check if Claude is available first
+        claude_available = False
+        try:
+            import shutil
+            if self.claude_code_path and Path(self.claude_code_path).exists():
+                claude_available = True
+            elif shutil.which("claude"):
+                claude_available = True
+        except:
+            pass
+        if not claude_available:
+            # Skip Claude instance initialization - will use API fallback silently
+            return
         self.console.print("[cyan]Initializing Claude instances...[/cyan]")
         for i in range(self.num_instances):
@@ -2244,7 +2267,8 @@ class MultiClaudeOrchestrator(HanzoDevOrchestrator):
             if success:
                 self.console.print(f"[green]✓ Instance {i} started[/green]")
             else:
-                self.console.print(f"[red]✗ Failed to start instance {i}[/red]")
+                # Don't show error, just skip silently
+                pass
     async def _create_instance_config(self, index: int, role: str) -> Dict:
         """Create configuration for a Claude instance."""
@@ -2384,7 +2408,12 @@ class MultiClaudeOrchestrator(HanzoDevOrchestrator):
         # Check if instances are initialized
         if not self.claude_instances:
-            # No instances started, use direct API
+            # No instances started, use fallback handler for smart routing
+            from .fallback_handler import smart_chat
+            response = await smart_chat(task, console=self.console)
+            if response:
+                return {"output": response, "success": True}
+            # If smart_chat fails, try direct API as last resort
             return await self._call_api_model(task)
         # Step 1: Primary execution
@@ -2537,11 +2566,12 @@ class MultiClaudeOrchestrator(HanzoDevOrchestrator):
         """Call API-based model and return structured response."""
         import os
-        # Try OpenAI
-        if os.getenv("OPENAI_API_KEY"):
+        # Try OpenAI first (check environment variable properly)
+        openai_key = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY")
+        if openai_key:
             try:
                 from openai import AsyncOpenAI
-                client = AsyncOpenAI()
+                client = AsyncOpenAI(api_key=openai_key)
                 response = await client.chat.completions.create(
                     model="gpt-4",
                     messages=[{"role": "user", "content": prompt}],
@@ -2553,10 +2583,11 @@ class MultiClaudeOrchestrator(HanzoDevOrchestrator):
                 logger.error(f"OpenAI API error: {e}")
         # Try Anthropic
-        if os.getenv("ANTHROPIC_API_KEY"):
+        anthropic_key = os.environ.get("ANTHROPIC_API_KEY") or os.getenv("ANTHROPIC_API_KEY")
+        if anthropic_key:
             try:
                 from anthropic import AsyncAnthropic
-                client = AsyncAnthropic()
+                client = AsyncAnthropic(api_key=anthropic_key)
                 response = await client.messages.create(
                     model="claude-3-5-sonnet-20241022",
                     messages=[{"role": "user", "content": prompt}],
@@ -2567,6 +2598,12 @@ class MultiClaudeOrchestrator(HanzoDevOrchestrator):
             except Exception as e:
                 logger.error(f"Anthropic API error: {e}")
+        # Try fallback handler as last resort
+        from .fallback_handler import smart_chat
+        response = await smart_chat(prompt, console=None)  # No console to avoid duplicate messages
+        if response:
+            return {"output": response, "success": True}
         return {"output": "No API keys configured. Set OPENAI_API_KEY or ANTHROPIC_API_KEY", "success": False}
     async def _validate_improvement(self, original: Dict, improved: Dict) -> bool:

{hanzo-0.3.20 → hanzo-0.3.22}/src/hanzo/fallback_handler.py RENAMED Viewed

@@ -158,6 +158,8 @@ async def smart_chat(message: str, console=None) -> Optional[str]:
     Smart chat that automatically tries available AI options.
     Returns the AI response or None if all options fail.
     """
+    from .rate_limiter import smart_limiter
     handler = FallbackHandler()
     if console:
@@ -171,17 +173,20 @@ async def smart_chat(message: str, console=None) -> Optional[str]:
     option_type, model = best_option
-    # Try the primary option
+    # Try the primary option with rate limiting
     try:
         if option_type == "openai_api":
-            from openai import AsyncOpenAI
-            client = AsyncOpenAI()
-            response = await client.chat.completions.create(
-                model="gpt-4",
-                messages=[{"role": "user", "content": message}],
-                max_tokens=500
-            )
-            return response.choices[0].message.content
+            async def call_openai():
+                from openai import AsyncOpenAI
+                client = AsyncOpenAI()
+                response = await client.chat.completions.create(
+                    model="gpt-4",
+                    messages=[{"role": "user", "content": message}],
+                    max_tokens=500
+                )
+                return response.choices[0].message.content
+            return await smart_limiter.execute_with_limit("openai", call_openai)
         elif option_type == "anthropic_api":
             from anthropic import AsyncAnthropic

hanzo-0.3.22/src/hanzo/rate_limiter.py ADDED Viewed

@@ -0,0 +1,332 @@
+"""
+Rate limiting and error recovery for Hanzo Dev.
+Prevents API overuse and handles failures gracefully.
+"""
+import time
+import asyncio
+from typing import Dict, Optional, Any, Callable
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from collections import deque
+import random
+@dataclass
+class RateLimitConfig:
+    """Configuration for rate limiting."""
+    requests_per_minute: int = 20
+    requests_per_hour: int = 100
+    burst_size: int = 5
+    cooldown_seconds: int = 60
+    max_retries: int = 3
+    backoff_base: float = 2.0
+    jitter: bool = True
+@dataclass
+class RateLimitState:
+    """Current state of rate limiter."""
+    minute_requests: deque = field(default_factory=lambda: deque(maxlen=60))
+    hour_requests: deque = field(default_factory=lambda: deque(maxlen=3600))
+    last_request: Optional[datetime] = None
+    consecutive_errors: int = 0
+    total_requests: int = 0
+    total_errors: int = 0
+    is_throttled: bool = False
+    throttle_until: Optional[datetime] = None
+class RateLimiter:
+    """Rate limiter with error recovery."""
+    def __init__(self, config: RateLimitConfig = None):
+        """Initialize rate limiter."""
+        self.config = config or RateLimitConfig()
+        self.states: Dict[str, RateLimitState] = {}
+    def get_state(self, key: str = "default") -> RateLimitState:
+        """Get or create state for a key."""
+        if key not in self.states:
+            self.states[key] = RateLimitState()
+        return self.states[key]
+    async def check_rate_limit(self, key: str = "default") -> tuple[bool, float]:
+        """
+        Check if request is allowed.
+        Returns (allowed, wait_seconds).
+        """
+        state = self.get_state(key)
+        now = datetime.now()
+        # Check if throttled
+        if state.is_throttled and state.throttle_until:
+            if now < state.throttle_until:
+                wait_seconds = (state.throttle_until - now).total_seconds()
+                return False, wait_seconds
+            else:
+                # Throttle period ended
+                state.is_throttled = False
+                state.throttle_until = None
+        # Clean old requests
+        minute_ago = now - timedelta(minutes=1)
+        hour_ago = now - timedelta(hours=1)
+        # Remove old requests from queues
+        while state.minute_requests and state.minute_requests[0] < minute_ago:
+            state.minute_requests.popleft()
+        while state.hour_requests and state.hour_requests[0] < hour_ago:
+            state.hour_requests.popleft()
+        # Check minute limit
+        if len(state.minute_requests) >= self.config.requests_per_minute:
+            # Calculate wait time
+            oldest = state.minute_requests[0]
+            wait_seconds = (oldest + timedelta(minutes=1) - now).total_seconds()
+            return False, max(0, wait_seconds)
+        # Check hour limit
+        if len(state.hour_requests) >= self.config.requests_per_hour:
+            # Calculate wait time
+            oldest = state.hour_requests[0]
+            wait_seconds = (oldest + timedelta(hours=1) - now).total_seconds()
+            return False, max(0, wait_seconds)
+        # Check burst limit
+        if state.last_request:
+            time_since_last = (now - state.last_request).total_seconds()
+            if time_since_last < 1.0 / self.config.burst_size:
+                wait_seconds = (1.0 / self.config.burst_size) - time_since_last
+                return False, wait_seconds
+        return True, 0
+    async def acquire(self, key: str = "default") -> bool:
+        """
+        Acquire a rate limit slot.
+        Waits if necessary.
+        """
+        while True:
+            allowed, wait_seconds = await self.check_rate_limit(key)
+            if allowed:
+                # Record request
+                state = self.get_state(key)
+                now = datetime.now()
+                state.minute_requests.append(now)
+                state.hour_requests.append(now)
+                state.last_request = now
+                state.total_requests += 1
+                return True
+            # Wait before retrying
+            if wait_seconds > 0:
+                await asyncio.sleep(min(wait_seconds, 5))  # Check every 5 seconds max
+    def record_error(self, key: str = "default", error: Exception = None):
+        """Record an error for the key."""
+        state = self.get_state(key)
+        state.consecutive_errors += 1
+        state.total_errors += 1
+        # Implement exponential backoff on errors
+        if state.consecutive_errors >= 3:
+            # Throttle for increasing periods
+            backoff_minutes = min(
+                self.config.backoff_base ** (state.consecutive_errors - 2),
+                60  # Max 1 hour
+            )
+            state.is_throttled = True
+            state.throttle_until = datetime.now() + timedelta(minutes=backoff_minutes)
+    def record_success(self, key: str = "default"):
+        """Record a successful request."""
+        state = self.get_state(key)
+        state.consecutive_errors = 0
+    def get_status(self, key: str = "default") -> Dict[str, Any]:
+        """Get current status for monitoring."""
+        state = self.get_state(key)
+        now = datetime.now()
+        return {
+            "requests_last_minute": len(state.minute_requests),
+            "requests_last_hour": len(state.hour_requests),
+            "total_requests": state.total_requests,
+            "total_errors": state.total_errors,
+            "consecutive_errors": state.consecutive_errors,
+            "is_throttled": state.is_throttled,
+            "throttle_remaining": (
+                (state.throttle_until - now).total_seconds()
+                if state.throttle_until and now < state.throttle_until
+                else 0
+            ),
+            "minute_limit": self.config.requests_per_minute,
+            "hour_limit": self.config.requests_per_hour,
+        }
+class ErrorRecovery:
+    """Error recovery with retries and fallback."""
+    def __init__(self, rate_limiter: RateLimiter = None):
+        """Initialize error recovery."""
+        self.rate_limiter = rate_limiter or RateLimiter()
+        self.fallback_handlers: Dict[type, Callable] = {}
+    def register_fallback(self, error_type: type, handler: Callable):
+        """Register a fallback handler for an error type."""
+        self.fallback_handlers[error_type] = handler
+    async def with_retry(
+        self,
+        func: Callable,
+        *args,
+        key: str = "default",
+        max_retries: Optional[int] = None,
+        **kwargs
+    ) -> Any:
+        """
+        Execute function with retry logic.
+        """
+        max_retries = max_retries or self.rate_limiter.config.max_retries
+        last_error = None
+        for attempt in range(max_retries):
+            try:
+                # Check rate limit
+                await self.rate_limiter.acquire(key)
+                # Execute function
+                result = await func(*args, **kwargs)
+                # Record success
+                self.rate_limiter.record_success(key)
+                return result
+            except Exception as e:
+                last_error = e
+                self.rate_limiter.record_error(key, e)
+                # Check for fallback handler
+                for error_type, handler in self.fallback_handlers.items():
+                    if isinstance(e, error_type):
+                        try:
+                            return await handler(*args, **kwargs)
+                        except:
+                            pass  # Fallback failed, continue with retry
+                # Calculate backoff
+                if attempt < max_retries - 1:
+                    backoff = self.rate_limiter.config.backoff_base ** attempt
+                    # Add jitter if configured
+                    if self.rate_limiter.config.jitter:
+                        backoff *= (0.5 + random.random())
+                    await asyncio.sleep(min(backoff, 60))  # Max 60 seconds
+        # All retries failed
+        raise last_error or Exception("All retry attempts failed")
+    async def with_circuit_breaker(
+        self,
+        func: Callable,
+        *args,
+        key: str = "default",
+        threshold: int = 5,
+        timeout: int = 60,
+        **kwargs
+    ) -> Any:
+        """
+        Execute function with circuit breaker pattern.
+        """
+        state = self.rate_limiter.get_state(key)
+        # Check if circuit is open
+        if state.is_throttled:
+            raise Exception(f"Circuit breaker open for {key}")
+        try:
+            result = await self.with_retry(func, *args, key=key, **kwargs)
+            return result
+        except Exception as e:
+            # Check if we should open the circuit
+            if state.consecutive_errors >= threshold:
+                state.is_throttled = True
+                state.throttle_until = datetime.now() + timedelta(seconds=timeout)
+                raise Exception(f"Circuit breaker triggered for {key}: {e}")
+            raise
+class SmartRateLimiter:
+    """Smart rate limiter that adapts to API responses."""
+    def __init__(self):
+        """Initialize smart rate limiter."""
+        self.limiters: Dict[str, RateLimiter] = {}
+        self.recovery = ErrorRecovery()
+        # Default configs for known APIs
+        self.configs = {
+            "openai": RateLimitConfig(
+                requests_per_minute=60,
+                requests_per_hour=1000,
+                burst_size=10
+            ),
+            "anthropic": RateLimitConfig(
+                requests_per_minute=50,
+                requests_per_hour=1000,
+                burst_size=5
+            ),
+            "local": RateLimitConfig(
+                requests_per_minute=100,
+                requests_per_hour=10000,
+                burst_size=20
+            ),
+            "free": RateLimitConfig(
+                requests_per_minute=10,
+                requests_per_hour=100,
+                burst_size=2
+            ),
+        }
+    def get_limiter(self, api_type: str) -> RateLimiter:
+        """Get or create limiter for API type."""
+        if api_type not in self.limiters:
+            config = self.configs.get(api_type, RateLimitConfig())
+            self.limiters[api_type] = RateLimiter(config)
+        return self.limiters[api_type]
+    async def execute_with_limit(
+        self,
+        api_type: str,
+        func: Callable,
+        *args,
+        **kwargs
+    ) -> Any:
+        """Execute function with appropriate rate limiting."""
+        limiter = self.get_limiter(api_type)
+        recovery = ErrorRecovery(limiter)
+        return await recovery.with_retry(
+            func,
+            *args,
+            key=api_type,
+            **kwargs
+        )
+    def get_all_status(self) -> Dict[str, Dict[str, Any]]:
+        """Get status of all limiters."""
+        return {
+            api_type: limiter.get_status()
+            for api_type, limiter in self.limiters.items()
+        }
+# Global instance for easy use
+smart_limiter = SmartRateLimiter()

hanzo-0.3.22/src/hanzo/streaming.py ADDED Viewed

@@ -0,0 +1,271 @@
+"""
+Streaming response handler for Hanzo Dev.
+Provides real-time feedback as AI generates responses.
+"""
+import asyncio
+from typing import AsyncGenerator, Optional, Callable
+from rich.console import Console
+from rich.live import Live
+from rich.panel import Panel
+from rich.markdown import Markdown
+import time
+class StreamingHandler:
+    """Handles streaming responses from AI models."""
+    def __init__(self, console: Console = None):
+        """Initialize streaming handler."""
+        self.console = console or Console()
+        self.current_response = ""
+        self.is_streaming = False
+    async def stream_openai(self, client, messages: list, model: str = "gpt-4") -> str:
+        """Stream response from OpenAI API."""
+        try:
+            stream = await client.chat.completions.create(
+                model=model,
+                messages=messages,
+                stream=True,
+                max_tokens=1000
+            )
+            self.current_response = ""
+            self.is_streaming = True
+            with Live(
+                Panel("", title="[bold cyan]AI Response[/bold cyan]",
+                      title_align="left", border_style="dim cyan"),
+                console=self.console,
+                refresh_per_second=10
+            ) as live:
+                async for chunk in stream:
+                    if chunk.choices[0].delta.content:
+                        self.current_response += chunk.choices[0].delta.content
+                        live.update(
+                            Panel(
+                                Markdown(self.current_response),
+                                title="[bold cyan]AI Response[/bold cyan]",
+                                title_align="left",
+                                border_style="dim cyan",
+                                padding=(1, 2)
+                            )
+                        )
+            self.is_streaming = False
+            return self.current_response
+        except Exception as e:
+            self.console.print(f"[red]Streaming error: {e}[/red]")
+            self.is_streaming = False
+            return None
+    async def stream_anthropic(self, client, messages: list, model: str = "claude-3-5-sonnet-20241022") -> str:
+        """Stream response from Anthropic API."""
+        try:
+            self.current_response = ""
+            self.is_streaming = True
+            with Live(
+                Panel("", title="[bold cyan]AI Response[/bold cyan]",
+                      title_align="left", border_style="dim cyan"),
+                console=self.console,
+                refresh_per_second=10
+            ) as live:
+                async with client.messages.stream(
+                    model=model,
+                    messages=messages,
+                    max_tokens=1000
+                ) as stream:
+                    async for text in stream.text_stream:
+                        self.current_response += text
+                        live.update(
+                            Panel(
+                                Markdown(self.current_response),
+                                title="[bold cyan]AI Response[/bold cyan]",
+                                title_align="left",
+                                border_style="dim cyan",
+                                padding=(1, 2)
+                            )
+                        )
+            self.is_streaming = False
+            return self.current_response
+        except Exception as e:
+            self.console.print(f"[red]Streaming error: {e}[/red]")
+            self.is_streaming = False
+            return None
+    async def stream_ollama(self, message: str, model: str = "llama3.2") -> str:
+        """Stream response from Ollama local model."""
+        import httpx
+        try:
+            self.current_response = ""
+            self.is_streaming = True
+            with Live(
+                Panel("", title="[bold cyan]AI Response (Local)[/bold cyan]",
+                      title_align="left", border_style="dim cyan"),
+                console=self.console,
+                refresh_per_second=10
+            ) as live:
+                async with httpx.AsyncClient() as client:
+                    async with client.stream(
+                        "POST",
+                        "http://localhost:11434/api/generate",
+                        json={"model": model, "prompt": message, "stream": True},
+                        timeout=60.0
+                    ) as response:
+                        async for line in response.aiter_lines():
+                            if line:
+                                import json
+                                data = json.loads(line)
+                                if "response" in data:
+                                    self.current_response += data["response"]
+                                    live.update(
+                                        Panel(
+                                            Markdown(self.current_response),
+                                            title="[bold cyan]AI Response (Local)[/bold cyan]",
+                                            title_align="left",
+                                            border_style="dim cyan",
+                                            padding=(1, 2)
+                                        )
+                                    )
+                                if data.get("done", False):
+                                    break
+            self.is_streaming = False
+            return self.current_response
+        except Exception as e:
+            self.console.print(f"[red]Ollama streaming error: {e}[/red]")
+            self.is_streaming = False
+            return None
+    async def simulate_streaming(self, text: str, delay: float = 0.02) -> str:
+        """Simulate streaming for non-streaming APIs."""
+        self.current_response = ""
+        self.is_streaming = True
+        words = text.split()
+        with Live(
+            Panel("", title="[bold cyan]AI Response[/bold cyan]",
+                  title_align="left", border_style="dim cyan"),
+            console=self.console,
+            refresh_per_second=20
+        ) as live:
+            for i, word in enumerate(words):
+                self.current_response += word
+                if i < len(words) - 1:
+                    self.current_response += " "
+                live.update(
+                    Panel(
+                        Markdown(self.current_response),
+                        title="[bold cyan]AI Response[/bold cyan]",
+                        title_align="left",
+                        border_style="dim cyan",
+                        padding=(1, 2)
+                    )
+                )
+                await asyncio.sleep(delay)
+        self.is_streaming = False
+        return self.current_response
+    def stop_streaming(self):
+        """Stop current streaming operation."""
+        self.is_streaming = False
+        if self.current_response:
+            self.console.print(f"\n[yellow]Streaming interrupted[/yellow]")
+class TypewriterEffect:
+    """Provides typewriter effect for text output."""
+    def __init__(self, console: Console = None):
+        self.console = console or Console()
+    async def type_text(self, text: str, speed: float = 0.03):
+        """Type text with typewriter effect."""
+        for char in text:
+            self.console.print(char, end="")
+            await asyncio.sleep(speed)
+        self.console.print()  # New line at end
+    async def type_code(self, code: str, language: str = "python", speed: float = 0.01):
+        """Type code with syntax highlighting."""
+        from rich.syntax import Syntax
+        # Build up code progressively
+        current_code = ""
+        lines = code.split('\n')
+        with Live(console=self.console, refresh_per_second=30) as live:
+            for line in lines:
+                for char in line:
+                    current_code += char
+                    syntax = Syntax(current_code, language, theme="monokai", line_numbers=True)
+                    live.update(syntax)
+                    await asyncio.sleep(speed)
+                current_code += '\n'
+                syntax = Syntax(current_code, language, theme="monokai", line_numbers=True)
+                live.update(syntax)
+async def stream_with_fallback(message: str, console: Console = None) -> Optional[str]:
+    """
+    Stream response with automatic fallback to available options.
+    """
+    import os
+    handler = StreamingHandler(console)
+    # Try OpenAI streaming
+    if os.getenv("OPENAI_API_KEY"):
+        try:
+            from openai import AsyncOpenAI
+            client = AsyncOpenAI()
+            return await handler.stream_openai(
+                client,
+                [{"role": "user", "content": message}]
+            )
+        except Exception as e:
+            if console:
+                console.print(f"[yellow]OpenAI streaming failed: {e}[/yellow]")
+    # Try Anthropic streaming
+    if os.getenv("ANTHROPIC_API_KEY"):
+        try:
+            from anthropic import AsyncAnthropic
+            client = AsyncAnthropic()
+            return await handler.stream_anthropic(
+                client,
+                [{"role": "user", "content": message}]
+            )
+        except Exception as e:
+            if console:
+                console.print(f"[yellow]Anthropic streaming failed: {e}[/yellow]")
+    # Try Ollama streaming
+    try:
+        return await handler.stream_ollama(message)
+    except:
+        pass
+    # Fallback to non-streaming with simulated effect
+    if console:
+        console.print("[yellow]Falling back to non-streaming mode[/yellow]")
+    # Get response from fallback handler
+    from .fallback_handler import smart_chat
+    response = await smart_chat(message, console)
+    if response:
+        # Simulate streaming
+        return await handler.simulate_streaming(response)
+    return None