entroplain 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,178 @@
1
+ # Entroplain Usage Guide for Agents
2
+
3
+ ## Quick Setup
4
+
5
+ ### For OpenClaw/Claude Code (Proxy Method)
6
+
7
+ Run the entropy proxy and point your agent to it:
8
+
9
+ ```bash
10
+ # Start the proxy (monitors entropy, enables early exit)
11
+ python -m entroplain.proxy --port 8765 --log-entropy
12
+
13
+ # Set environment to use proxy
14
+ export OPENAI_BASE_URL=http://localhost:8765/v1
15
+ # or for NVIDIA:
16
+ export NVIDIA_BASE_URL=http://localhost:8765/v1
17
+ ```
18
+
19
+ Now OpenClaw/Claude Code will automatically have entropy monitoring!
20
+
21
+ ### How the Proxy Works
22
+
23
+ ```
24
+ Agent -> Proxy (localhost:8765) -> Real API
25
+ |
26
+ v
27
+ Entropy Monitor
28
+ |
29
+ v
30
+ Early Exit Check
31
+ ```
32
+
33
+ The proxy:
34
+ 1. Intercepts all chat completion requests
35
+ 2. Enables logprobs automatically
36
+ 3. Calculates entropy for each token
37
+ 4. Terminates stream when reasoning converges
38
+ 5. Passes everything through unchanged to the agent
39
+
40
+ ---
41
+
42
+ ## Direct Usage (Python)
43
+
44
+ ```python
45
+ from entroplain import EntropyMonitor, NVIDIAProvider
46
+
47
+ monitor = EntropyMonitor()
48
+ provider = NVIDIAProvider()
49
+
50
+ for token in provider.stream_with_entropy(
51
+ model="meta/llama-3.1-70b-instruct",
52
+ messages=[{"role": "user", "content": "Solve: x^2 = 16"}]
53
+ ):
54
+ monitor.track(token.token, token.entropy)
55
+ print(token.token, end="")
56
+
57
+ if monitor.should_exit():
58
+ print("\n[Early exit - reasoning converged]")
59
+ break
60
+
61
+ print(f"\nStats: {monitor.get_stats()}")
62
+ ```
63
+
64
+ ---
65
+
66
+ ## Supported Providers
67
+
68
+ | Provider | Works? | How |
69
+ |----------|--------|-----|
70
+ | OpenAI | YES | `logprobs: true` |
71
+ | NVIDIA NIM | YES | OpenAI-compatible |
72
+ | Anthropic Claude 4 | YES | `logprobs: True` |
73
+ | Google Gemini | YES | `response_logprobs=True` |
74
+ | Ollama (local) | YES | Built-in logit access |
75
+ | llama.cpp | YES | Built-in logit access |
76
+
77
+ ---
78
+
79
+ ## Configuration
80
+
81
+ ### Exit Conditions
82
+
83
+ ```python
84
+ monitor = EntropyMonitor(
85
+ entropy_threshold=0.15, # Exit when entropy drops below this
86
+ min_valleys=2, # Require N reasoning milestones
87
+ min_tokens=50, # Don't exit before this many tokens
88
+ velocity_threshold=0.05, # Exit when change rate stabilizes
89
+ exit_condition="combined" # or: "valleys_plateau", "entropy_drop", "velocity_zero"
90
+ )
91
+ ```
92
+
93
+ ### Environment Variables
94
+
95
+ ```bash
96
+ # API keys (used by providers)
97
+ export OPENAI_API_KEY=sk-...
98
+ export ANTHROPIC_API_KEY=sk-ant-...
99
+ export NVIDIA_API_KEY=nvapi-...
100
+ export GOOGLE_API_KEY=...
101
+
102
+ # For proxy
103
+ export ENTROPPLAIN_PORT=8765
104
+ export ENTROPPLAIN_LOG_ENTROPY=true
105
+ ```
106
+
107
+ ---
108
+
109
+ ## CLI
110
+
111
+ ```bash
112
+ # Analyze a prompt
113
+ entroplain analyze "What is 2+2?" --model gpt-4o
114
+
115
+ # Stream with early exit
116
+ entroplain stream "Explain quantum computing" --exit-on-converge
117
+
118
+ # Run proxy
119
+ entroplain proxy --port 8765 --log-entropy
120
+ ```
121
+
122
+ ---
123
+
124
+ ## Agent Integration Examples
125
+
126
+ ### OpenClaw with Proxy
127
+
128
+ ```yaml
129
+ # In config.yaml
130
+ llm:
131
+ provider: openai-compatible
132
+ base_url: http://localhost:8765/v1 # Point to proxy
133
+ primary_model: meta/llama-3.1-70b-instruct
134
+ ```
135
+
136
+ ### Claude Code with Proxy
137
+
138
+ Set environment before running:
139
+ ```bash
140
+ export ANTHROPIC_BASE_URL=http://localhost:8765/v1
141
+ claude
142
+ ```
143
+
144
+ ### Custom Agent
145
+
146
+ ```python
147
+ from entroplain.hooks import EntropyHook
148
+
149
+ hook = EntropyHook(config={"entropy_threshold": 0.15})
150
+
151
+ for token in your_agent.generate_stream():
152
+ result = hook.on_token(token.text, token.entropy)
153
+
154
+ if result["should_exit"]:
155
+ print(f"Early exit at token {result['index']}")
156
+ break
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Troubleshooting
162
+
163
+ ### "No logprobs returned"
164
+ Some models don't support logprobs. Try a different model or check provider docs.
165
+
166
+ ### "Entropy is always 0"
167
+ Make sure `logprobs: true` and `top_logprobs: 5` are set in your API request.
168
+
169
+ ### "Proxy won't start"
170
+ Install dependencies: `pip install entroplain[all] fastapi uvicorn httpx`
171
+
172
+ ---
173
+
174
+ ## Learn More
175
+
176
+ - GitHub: https://github.com/entroplain/entroplain
177
+ - PyPI: https://pypi.org/project/entroplain/
178
+ - npm: https://www.npmjs.com/package/entroplain
@@ -1,30 +1,30 @@
1
- """
2
- Entroplain — Entropy-based early exit for efficient agent reasoning.
3
- """
4
-
5
- __version__ = "0.1.0"
6
- __author__ = "Entroplain Contributors"
7
-
8
- from .monitor import EntropyMonitor, calculate_entropy
9
- from .providers import (
10
- OpenAIProvider,
11
- AnthropicProvider,
12
- GeminiProvider,
13
- NVIDIAProvider,
14
- OllamaProvider,
15
- LlamaCppProvider,
16
- )
17
- from .hooks import track_entropy, early_exit
18
-
19
- __all__ = [
20
- "EntropyMonitor",
21
- "calculate_entropy",
22
- "OpenAIProvider",
23
- "AnthropicProvider",
24
- "GeminiProvider",
25
- "NVIDIAProvider",
26
- "OllamaProvider",
27
- "LlamaCppProvider",
28
- "track_entropy",
29
- "early_exit",
30
- ]
1
+ """
2
+ Entroplain — Entropy-based early exit for efficient agent reasoning.
3
+ """
4
+
5
+ __version__ = "0.2.0"
6
+ __author__ = "Entroplain Contributors"
7
+
8
+ from .monitor import EntropyMonitor, calculate_entropy
9
+ from .providers import (
10
+ OpenAIProvider,
11
+ AnthropicProvider,
12
+ GeminiProvider,
13
+ NVIDIAProvider,
14
+ OllamaProvider,
15
+ LlamaCppProvider,
16
+ )
17
+ from .hooks import track_entropy, early_exit
18
+
19
+ __all__ = [
20
+ "EntropyMonitor",
21
+ "calculate_entropy",
22
+ "OpenAIProvider",
23
+ "AnthropicProvider",
24
+ "GeminiProvider",
25
+ "NVIDIAProvider",
26
+ "OllamaProvider",
27
+ "LlamaCppProvider",
28
+ "track_entropy",
29
+ "early_exit",
30
+ ]
@@ -0,0 +1,231 @@
1
+ """
2
+ Cost tracking and savings calculator.
3
+
4
+ Estimates cost savings from early exit based on token usage.
5
+ """
6
+
7
+ import math
8
+ from dataclasses import dataclass
9
+ from typing import Optional, Dict, Any
10
+ from enum import Enum
11
+
12
+
13
+ class PricingTier(Enum):
14
+ """Pricing tiers for different models."""
15
+ # OpenAI
16
+ GPT4O = ("gpt-4o", 2.50, 10.00) # input, output per 1M tokens
17
+ GPT4O_MINI = ("gpt-4o-mini", 0.15, 0.60)
18
+ GPT4_TURBO = ("gpt-4-turbo", 10.00, 30.00)
19
+
20
+ # Anthropic
21
+ CLAUDE_4_OPUS = ("claude-4-opus", 15.00, 75.00)
22
+ CLAUDE_4_SONNET = ("claude-4-sonnet", 3.00, 15.00)
23
+
24
+ # NVIDIA
25
+ LLAMA_70B = ("meta/llama-3.1-70b-instruct", 0.70, 0.70)
26
+ LLAMA_405B = ("meta/llama-3.1-405b-instruct", 2.70, 2.70)
27
+
28
+ # Default (unknown model)
29
+ DEFAULT = ("default", 1.00, 1.00)
30
+
31
+
32
+ @dataclass
33
+ class CostEstimate:
34
+ """Estimated cost for a completion."""
35
+ model: str
36
+ input_tokens: int
37
+ output_tokens: int
38
+ output_tokens_full: int # If no early exit
39
+ cost_actual_usd: float
40
+ cost_full_usd: float
41
+ cost_saved_usd: float
42
+ savings_percent: float
43
+
44
+
45
+ class CostTracker:
46
+ """
47
+ Track token usage and calculate cost savings.
48
+
49
+ Usage:
50
+ tracker = CostTracker(model="gpt-4o")
51
+ tracker.track_input(100) # 100 input tokens
52
+ tracker.track_output(50) # 50 output tokens
53
+ tracker.set_full_estimate(150) # Would have been 150 output tokens
54
+
55
+ estimate = tracker.get_estimate()
56
+ print(f"Saved ${estimate.cost_saved_usd:.4f}")
57
+ """
58
+
59
+ # Model name to pricing tier mapping
60
+ MODEL_ALIASES = {
61
+ # OpenAI
62
+ "gpt-4o": PricingTier.GPT4O,
63
+ "gpt-4o-mini": PricingTier.GPT4O_MINI,
64
+ "gpt-4-turbo": PricingTier.GPT4_TURBO,
65
+ "gpt-4-turbo-preview": PricingTier.GPT4_TURBO,
66
+
67
+ # Anthropic
68
+ "claude-4-opus": PricingTier.CLAUDE_4_OPUS,
69
+ "claude-opus-4": PricingTier.CLAUDE_4_OPUS,
70
+ "claude-4-sonnet": PricingTier.CLAUDE_4_SONNET,
71
+ "claude-sonnet-4": PricingTier.CLAUDE_4_SONNET,
72
+
73
+ # NVIDIA / Meta
74
+ "meta/llama-3.1-70b-instruct": PricingTier.LLAMA_70B,
75
+ "llama-3.1-70b": PricingTier.LLAMA_70B,
76
+ "meta/llama-3.1-405b-instruct": PricingTier.LLAMA_405B,
77
+ "llama-3.1-405b": PricingTier.LLAMA_405B,
78
+ }
79
+
80
+ def __init__(
81
+ self,
82
+ model: str = "default",
83
+ custom_pricing: Optional[tuple] = None
84
+ ):
85
+ """
86
+ Initialize cost tracker.
87
+
88
+ Args:
89
+ model: Model name (e.g., "gpt-4o", "claude-4-sonnet")
90
+ custom_pricing: Optional (input_price, output_price) per 1M tokens
91
+ """
92
+ self.model = model
93
+ self.input_tokens = 0
94
+ self.output_tokens = 0
95
+ self.estimated_full_output = None
96
+ self._custom_pricing = custom_pricing
97
+
98
+ # Get pricing for model
99
+ if custom_pricing:
100
+ self._input_price, self._output_price = custom_pricing
101
+ else:
102
+ tier = self.MODEL_ALIASES.get(model.lower(), PricingTier.DEFAULT)
103
+ self._input_price, self._output_price = tier.value[1], tier.value[2]
104
+
105
+ def track_input(self, tokens: int):
106
+ """Track input tokens."""
107
+ self.input_tokens += tokens
108
+
109
+ def track_output(self, tokens: int):
110
+ """Track output tokens generated."""
111
+ self.output_tokens += tokens
112
+
113
+ def set_full_estimate(self, tokens: int):
114
+ """Set estimate of what output would have been without early exit."""
115
+ self.estimated_full_output = tokens
116
+
117
+ def estimate_full_output(self, multiplier: float = 2.0) -> int:
118
+ """
119
+ Auto-estimate full output if not set.
120
+
121
+ Uses a simple multiplier based on observed tokens.
122
+ Default assumes early exit saves ~50%.
123
+ """
124
+ if self.estimated_full_output:
125
+ return self.estimated_full_output
126
+ return int(self.output_tokens * multiplier)
127
+
128
+ def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
129
+ """Calculate cost for given token counts."""
130
+ input_cost = (input_tokens / 1_000_000) * self._input_price
131
+ output_cost = (output_tokens / 1_000_000) * self._output_price
132
+ return input_cost + output_cost
133
+
134
+ def get_estimate(self) -> CostEstimate:
135
+ """Get cost estimate with savings calculation."""
136
+ full_output = self.estimate_full_output()
137
+
138
+ cost_actual = self.calculate_cost(self.input_tokens, self.output_tokens)
139
+ cost_full = self.calculate_cost(self.input_tokens, full_output)
140
+ cost_saved = cost_full - cost_actual
141
+
142
+ if cost_full > 0:
143
+ savings_pct = (cost_saved / cost_full) * 100
144
+ else:
145
+ savings_pct = 0.0
146
+
147
+ return CostEstimate(
148
+ model=self.model,
149
+ input_tokens=self.input_tokens,
150
+ output_tokens=self.output_tokens,
151
+ output_tokens_full=full_output,
152
+ cost_actual_usd=cost_actual,
153
+ cost_full_usd=cost_full,
154
+ cost_saved_usd=cost_saved,
155
+ savings_percent=savings_pct
156
+ )
157
+
158
+ def reset(self):
159
+ """Reset tracking for new request."""
160
+ self.input_tokens = 0
161
+ self.output_tokens = 0
162
+ self.estimated_full_output = None
163
+
164
+ def get_stats(self) -> Dict[str, Any]:
165
+ """Get current stats as dict."""
166
+ estimate = self.get_estimate()
167
+ return {
168
+ "model": estimate.model,
169
+ "input_tokens": estimate.input_tokens,
170
+ "output_tokens": estimate.output_tokens,
171
+ "output_tokens_full": estimate.output_tokens_full,
172
+ "tokens_saved": estimate.output_tokens_full - estimate.output_tokens,
173
+ "cost_actual_usd": estimate.cost_actual_usd,
174
+ "cost_full_usd": estimate.cost_full_usd,
175
+ "cost_saved_usd": estimate.cost_saved_usd,
176
+ "savings_percent": estimate.savings_percent,
177
+ }
178
+
179
+
180
+ # Convenience function for quick estimates
181
+ def estimate_savings(
182
+ model: str,
183
+ tokens_generated: int,
184
+ tokens_if_full: int,
185
+ input_tokens: int = 0
186
+ ) -> CostEstimate:
187
+ """
188
+ Quick estimate of cost savings.
189
+
190
+ Args:
191
+ model: Model name
192
+ tokens_generated: Actual tokens generated (with early exit)
193
+ tokens_if_full: Tokens that would have been generated without early exit
194
+ input_tokens: Input prompt tokens
195
+
196
+ Returns:
197
+ CostEstimate with savings details
198
+ """
199
+ tracker = CostTracker(model)
200
+ tracker.track_input(input_tokens)
201
+ tracker.track_output(tokens_generated)
202
+ tracker.set_full_estimate(tokens_if_full)
203
+ return tracker.get_estimate()
204
+
205
+
206
+ def format_cost_report(estimate: CostEstimate) -> str:
207
+ """Format a human-readable cost report."""
208
+ lines = [
209
+ f"📊 Cost Report for {estimate.model}",
210
+ f"",
211
+ f" Input tokens: {estimate.input_tokens:,}",
212
+ f" Output tokens: {estimate.output_tokens:,} (actual)",
213
+ f" {estimate.output_tokens_full:,} (if no early exit)",
214
+ f" Tokens saved: {estimate.output_tokens_full - estimate.output_tokens:,}",
215
+ f"",
216
+ f" Cost actual: ${estimate.cost_actual_usd:.6f}",
217
+ f" Cost if full: ${estimate.cost_full_usd:.6f}",
218
+ f" 💰 Cost saved: ${estimate.cost_saved_usd:.6f} ({estimate.savings_percent:.1f}%)",
219
+ ]
220
+ return "\n".join(lines)
221
+
222
+
223
+ if __name__ == "__main__":
224
+ # Demo
225
+ estimate = estimate_savings(
226
+ model="gpt-4o",
227
+ tokens_generated=82,
228
+ tokens_if_full=150,
229
+ input_tokens=50
230
+ )
231
+ print(format_cost_report(estimate))