npm - entroplain - Versions diffs - 0.1.0 → 0.2.0 - Mend

entroplain 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/26.0.1 +0 -0
package/CONTRIBUTING.md +103 -103
package/README.md +209 -122
package/dist/entroplain-0.2.0-py3-none-any.whl +0 -0
package/dist/entroplain-0.2.0.tar.gz +0 -0
package/docs/AGENT_USAGE.md +178 -0
package/entroplain/__init__.py +30 -30
package/entroplain/cost_tracker.py +231 -0
package/entroplain/dashboard.py +368 -0
package/entroplain/monitor.py +178 -60
package/entroplain/proxy.py +349 -0
package/entroplain-proxy +0 -0
package/package.json +4 -2
package/paper.md +299 -0
package/pip +0 -0
package/pyproject.toml +96 -85
package/test_nvidia.py +56 -0
package/test_proxy.py +16 -0
package/dist/entroplain-0.1.0-py3-none-any.whl +0 -0
package/dist/entroplain-0.1.0.tar.gz +0 -0

package/docs/AGENT_USAGE.md ADDED Viewed

@@ -0,0 +1,178 @@
+# Entroplain Usage Guide for Agents
+## Quick Setup
+### For OpenClaw/Claude Code (Proxy Method)
+Run the entropy proxy and point your agent to it:
+```bash
+# Start the proxy (monitors entropy, enables early exit)
+python -m entroplain.proxy --port 8765 --log-entropy
+# Set environment to use proxy
+export OPENAI_BASE_URL=http://localhost:8765/v1
+# or for NVIDIA:
+export NVIDIA_BASE_URL=http://localhost:8765/v1
+```
+Now OpenClaw/Claude Code will automatically have entropy monitoring!
+### How the Proxy Works
+```
+Agent -> Proxy (localhost:8765) -> Real API
+           |
+           v
+      Entropy Monitor
+           |
+           v
+      Early Exit Check
+```
+The proxy:
+1. Intercepts all chat completion requests
+2. Enables logprobs automatically
+3. Calculates entropy for each token
+4. Terminates stream when reasoning converges
+5. Passes everything through unchanged to the agent
+---
+## Direct Usage (Python)
+```python
+from entroplain import EntropyMonitor, NVIDIAProvider
+monitor = EntropyMonitor()
+provider = NVIDIAProvider()
+for token in provider.stream_with_entropy(
+    model="meta/llama-3.1-70b-instruct",
+    messages=[{"role": "user", "content": "Solve: x^2 = 16"}]
+):
+    monitor.track(token.token, token.entropy)
+    print(token.token, end="")
+    if monitor.should_exit():
+        print("\n[Early exit - reasoning converged]")
+        break
+print(f"\nStats: {monitor.get_stats()}")
+```
+---
+## Supported Providers
+| Provider | Works? | How |
+|----------|--------|-----|
+| OpenAI | YES | `logprobs: true` |
+| NVIDIA NIM | YES | OpenAI-compatible |
+| Anthropic Claude 4 | YES | `logprobs: True` |
+| Google Gemini | YES | `response_logprobs=True` |
+| Ollama (local) | YES | Built-in logit access |
+| llama.cpp | YES | Built-in logit access |
+---
+## Configuration
+### Exit Conditions
+```python
+monitor = EntropyMonitor(
+    entropy_threshold=0.15,  # Exit when entropy drops below this
+    min_valleys=2,           # Require N reasoning milestones
+    min_tokens=50,           # Don't exit before this many tokens
+    velocity_threshold=0.05, # Exit when change rate stabilizes
+    exit_condition="combined"  # or: "valleys_plateau", "entropy_drop", "velocity_zero"
+)
+```
+### Environment Variables
+```bash
+# API keys (used by providers)
+export OPENAI_API_KEY=sk-...
+export ANTHROPIC_API_KEY=sk-ant-...
+export NVIDIA_API_KEY=nvapi-...
+export GOOGLE_API_KEY=...
+# For proxy
+export ENTROPPLAIN_PORT=8765
+export ENTROPPLAIN_LOG_ENTROPY=true
+```
+---
+## CLI
+```bash
+# Analyze a prompt
+entroplain analyze "What is 2+2?" --model gpt-4o
+# Stream with early exit
+entroplain stream "Explain quantum computing" --exit-on-converge
+# Run proxy
+entroplain proxy --port 8765 --log-entropy
+```
+---
+## Agent Integration Examples
+### OpenClaw with Proxy
+```yaml
+# In config.yaml
+llm:
+  provider: openai-compatible
+  base_url: http://localhost:8765/v1  # Point to proxy
+  primary_model: meta/llama-3.1-70b-instruct
+```
+### Claude Code with Proxy
+Set environment before running:
+```bash
+export ANTHROPIC_BASE_URL=http://localhost:8765/v1
+claude
+```
+### Custom Agent
+```python
+from entroplain.hooks import EntropyHook
+hook = EntropyHook(config={"entropy_threshold": 0.15})
+for token in your_agent.generate_stream():
+    result = hook.on_token(token.text, token.entropy)
+    if result["should_exit"]:
+        print(f"Early exit at token {result['index']}")
+        break
+```
+---
+## Troubleshooting
+### "No logprobs returned"
+Some models don't support logprobs. Try a different model or check provider docs.
+### "Entropy is always 0"
+Make sure `logprobs: true` and `top_logprobs: 5` are set in your API request.
+### "Proxy won't start"
+Install dependencies: `pip install entroplain[all] fastapi uvicorn httpx`
+---
+## Learn More
+- GitHub: https://github.com/entroplain/entroplain
+- PyPI: https://pypi.org/project/entroplain/
+- npm: https://www.npmjs.com/package/entroplain

package/entroplain/__init__.py CHANGED Viewed

@@ -1,30 +1,30 @@
-"""
-Entroplain — Entropy-based early exit for efficient agent reasoning.
-"""
-__version__ = "0.1.0"
-__author__ = "Entroplain Contributors"
-from .monitor import EntropyMonitor, calculate_entropy
-from .providers import (
-    OpenAIProvider,
-    AnthropicProvider,
-    GeminiProvider,
-    NVIDIAProvider,
-    OllamaProvider,
-    LlamaCppProvider,
-)
-from .hooks import track_entropy, early_exit
-__all__ = [
-    "EntropyMonitor",
-    "calculate_entropy",
-    "OpenAIProvider",
-    "AnthropicProvider",
-    "GeminiProvider",
-    "NVIDIAProvider",
-    "OllamaProvider",
-    "LlamaCppProvider",
-    "track_entropy",
-    "early_exit",
-]
+"""
+Entroplain — Entropy-based early exit for efficient agent reasoning.
+"""
+__version__ = "0.2.0"
+__author__ = "Entroplain Contributors"
+from .monitor import EntropyMonitor, calculate_entropy
+from .providers import (
+    OpenAIProvider,
+    AnthropicProvider,
+    GeminiProvider,
+    NVIDIAProvider,
+    OllamaProvider,
+    LlamaCppProvider,
+)
+from .hooks import track_entropy, early_exit
+__all__ = [
+    "EntropyMonitor",
+    "calculate_entropy",
+    "OpenAIProvider",
+    "AnthropicProvider",
+    "GeminiProvider",
+    "NVIDIAProvider",
+    "OllamaProvider",
+    "LlamaCppProvider",
+    "track_entropy",
+    "early_exit",
+]

package/entroplain/cost_tracker.py ADDED Viewed

@@ -0,0 +1,231 @@
+"""
+Cost tracking and savings calculator.
+Estimates cost savings from early exit based on token usage.
+"""
+import math
+from dataclasses import dataclass
+from typing import Optional, Dict, Any
+from enum import Enum
+class PricingTier(Enum):
+    """Pricing tiers for different models."""
+    # OpenAI
+    GPT4O = ("gpt-4o", 2.50, 10.00)  # input, output per 1M tokens
+    GPT4O_MINI = ("gpt-4o-mini", 0.15, 0.60)
+    GPT4_TURBO = ("gpt-4-turbo", 10.00, 30.00)
+    # Anthropic
+    CLAUDE_4_OPUS = ("claude-4-opus", 15.00, 75.00)
+    CLAUDE_4_SONNET = ("claude-4-sonnet", 3.00, 15.00)
+    # NVIDIA
+    LLAMA_70B = ("meta/llama-3.1-70b-instruct", 0.70, 0.70)
+    LLAMA_405B = ("meta/llama-3.1-405b-instruct", 2.70, 2.70)
+    # Default (unknown model)
+    DEFAULT = ("default", 1.00, 1.00)
+@dataclass
+class CostEstimate:
+    """Estimated cost for a completion."""
+    model: str
+    input_tokens: int
+    output_tokens: int
+    output_tokens_full: int  # If no early exit
+    cost_actual_usd: float
+    cost_full_usd: float
+    cost_saved_usd: float
+    savings_percent: float
+class CostTracker:
+    """
+    Track token usage and calculate cost savings.
+    Usage:
+        tracker = CostTracker(model="gpt-4o")
+        tracker.track_input(100)  # 100 input tokens
+        tracker.track_output(50)  # 50 output tokens
+        tracker.set_full_estimate(150)  # Would have been 150 output tokens
+        estimate = tracker.get_estimate()
+        print(f"Saved ${estimate.cost_saved_usd:.4f}")
+    """
+    # Model name to pricing tier mapping
+    MODEL_ALIASES = {
+        # OpenAI
+        "gpt-4o": PricingTier.GPT4O,
+        "gpt-4o-mini": PricingTier.GPT4O_MINI,
+        "gpt-4-turbo": PricingTier.GPT4_TURBO,
+        "gpt-4-turbo-preview": PricingTier.GPT4_TURBO,
+        # Anthropic
+        "claude-4-opus": PricingTier.CLAUDE_4_OPUS,
+        "claude-opus-4": PricingTier.CLAUDE_4_OPUS,
+        "claude-4-sonnet": PricingTier.CLAUDE_4_SONNET,
+        "claude-sonnet-4": PricingTier.CLAUDE_4_SONNET,
+        # NVIDIA / Meta
+        "meta/llama-3.1-70b-instruct": PricingTier.LLAMA_70B,
+        "llama-3.1-70b": PricingTier.LLAMA_70B,
+        "meta/llama-3.1-405b-instruct": PricingTier.LLAMA_405B,
+        "llama-3.1-405b": PricingTier.LLAMA_405B,
+    }
+    def __init__(
+        self,
+        model: str = "default",
+        custom_pricing: Optional[tuple] = None
+    ):
+        """
+        Initialize cost tracker.
+        Args:
+            model: Model name (e.g., "gpt-4o", "claude-4-sonnet")
+            custom_pricing: Optional (input_price, output_price) per 1M tokens
+        """
+        self.model = model
+        self.input_tokens = 0
+        self.output_tokens = 0
+        self.estimated_full_output = None
+        self._custom_pricing = custom_pricing
+        # Get pricing for model
+        if custom_pricing:
+            self._input_price, self._output_price = custom_pricing
+        else:
+            tier = self.MODEL_ALIASES.get(model.lower(), PricingTier.DEFAULT)
+            self._input_price, self._output_price = tier.value[1], tier.value[2]
+    def track_input(self, tokens: int):
+        """Track input tokens."""
+        self.input_tokens += tokens
+    def track_output(self, tokens: int):
+        """Track output tokens generated."""
+        self.output_tokens += tokens
+    def set_full_estimate(self, tokens: int):
+        """Set estimate of what output would have been without early exit."""
+        self.estimated_full_output = tokens
+    def estimate_full_output(self, multiplier: float = 2.0) -> int:
+        """
+        Auto-estimate full output if not set.
+        Uses a simple multiplier based on observed tokens.
+        Default assumes early exit saves ~50%.
+        """
+        if self.estimated_full_output:
+            return self.estimated_full_output
+        return int(self.output_tokens * multiplier)
+    def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
+        """Calculate cost for given token counts."""
+        input_cost = (input_tokens / 1_000_000) * self._input_price
+        output_cost = (output_tokens / 1_000_000) * self._output_price
+        return input_cost + output_cost
+    def get_estimate(self) -> CostEstimate:
+        """Get cost estimate with savings calculation."""
+        full_output = self.estimate_full_output()
+        cost_actual = self.calculate_cost(self.input_tokens, self.output_tokens)
+        cost_full = self.calculate_cost(self.input_tokens, full_output)
+        cost_saved = cost_full - cost_actual
+        if cost_full > 0:
+            savings_pct = (cost_saved / cost_full) * 100
+        else:
+            savings_pct = 0.0
+        return CostEstimate(
+            model=self.model,
+            input_tokens=self.input_tokens,
+            output_tokens=self.output_tokens,
+            output_tokens_full=full_output,
+            cost_actual_usd=cost_actual,
+            cost_full_usd=cost_full,
+            cost_saved_usd=cost_saved,
+            savings_percent=savings_pct
+        )
+    def reset(self):
+        """Reset tracking for new request."""
+        self.input_tokens = 0
+        self.output_tokens = 0
+        self.estimated_full_output = None
+    def get_stats(self) -> Dict[str, Any]:
+        """Get current stats as dict."""
+        estimate = self.get_estimate()
+        return {
+            "model": estimate.model,
+            "input_tokens": estimate.input_tokens,
+            "output_tokens": estimate.output_tokens,
+            "output_tokens_full": estimate.output_tokens_full,
+            "tokens_saved": estimate.output_tokens_full - estimate.output_tokens,
+            "cost_actual_usd": estimate.cost_actual_usd,
+            "cost_full_usd": estimate.cost_full_usd,
+            "cost_saved_usd": estimate.cost_saved_usd,
+            "savings_percent": estimate.savings_percent,
+        }
+# Convenience function for quick estimates
+def estimate_savings(
+    model: str,
+    tokens_generated: int,
+    tokens_if_full: int,
+    input_tokens: int = 0
+) -> CostEstimate:
+    """
+    Quick estimate of cost savings.
+    Args:
+        model: Model name
+        tokens_generated: Actual tokens generated (with early exit)
+        tokens_if_full: Tokens that would have been generated without early exit
+        input_tokens: Input prompt tokens
+    Returns:
+        CostEstimate with savings details
+    """
+    tracker = CostTracker(model)
+    tracker.track_input(input_tokens)
+    tracker.track_output(tokens_generated)
+    tracker.set_full_estimate(tokens_if_full)
+    return tracker.get_estimate()
+def format_cost_report(estimate: CostEstimate) -> str:
+    """Format a human-readable cost report."""
+    lines = [
+        f"📊 Cost Report for {estimate.model}",
+        f"",
+        f"  Input tokens:    {estimate.input_tokens:,}",
+        f"  Output tokens:   {estimate.output_tokens:,} (actual)",
+        f"                   {estimate.output_tokens_full:,} (if no early exit)",
+        f"  Tokens saved:    {estimate.output_tokens_full - estimate.output_tokens:,}",
+        f"",
+        f"  Cost actual:     ${estimate.cost_actual_usd:.6f}",
+        f"  Cost if full:    ${estimate.cost_full_usd:.6f}",
+        f"  💰 Cost saved:   ${estimate.cost_saved_usd:.6f} ({estimate.savings_percent:.1f}%)",
+    ]
+    return "\n".join(lines)
+if __name__ == "__main__":
+    # Demo
+    estimate = estimate_savings(
+        model="gpt-4o",
+        tokens_generated=82,
+        tokens_if_full=150,
+        input_tokens=50
+    )
+    print(format_cost_report(estimate))