entroplain 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/26.0.1 +0 -0
- package/CONTRIBUTING.md +103 -103
- package/DEPLOY.md +41 -0
- package/README.md +478 -389
- package/dist/entroplain-0.2.2-py3-none-any.whl +0 -0
- package/dist/entroplain-0.2.2.tar.gz +0 -0
- package/dist/entroplain-0.2.3-py3-none-any.whl +0 -0
- package/dist/entroplain-0.2.3.tar.gz +0 -0
- package/docs/AGENT_USAGE.md +178 -178
- package/docs/USAGE.md +302 -302
- package/entroplain/__init__.py +32 -33
- package/entroplain/cost_tracker.py +231 -0
- package/entroplain/dashboard.py +480 -0
- package/entroplain/monitor.py +390 -272
- package/entroplain/providers.py +626 -626
- package/entroplain/proxy.py +561 -278
- package/entroplain/shared_state.py +72 -0
- package/entroplain-proxy +0 -0
- package/package.json +47 -44
- package/paper.md +299 -0
- package/pip +0 -0
- package/pyproject.toml +96 -89
- package/scripts/setup.bat +89 -0
- package/scripts/setup.sh +98 -0
- package/test_nvidia.py +56 -0
- package/test_proxy.py +16 -0
- package/vercel.json +6 -0
- package/website/README.md +14 -0
- package/website/app/globals.css +88 -0
- package/website/app/layout.tsx +34 -0
- package/website/app/page.tsx +537 -0
- package/website/package-lock.json +520 -0
- package/website/package.json +25 -0
- package/website/tsconfig.json +40 -0
- package/website/vercel.json +3 -0
- package/dist/entroplain-0.1.1-py3-none-any.whl +0 -0
- package/dist/entroplain-0.1.1.tar.gz +0 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cost tracking and savings calculator.
|
|
3
|
+
|
|
4
|
+
Estimates cost savings from early exit based on token usage.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import math
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Optional, Dict, Any
|
|
10
|
+
from enum import Enum
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PricingTier(Enum):
|
|
14
|
+
"""Pricing tiers for different models."""
|
|
15
|
+
# OpenAI
|
|
16
|
+
GPT4O = ("gpt-4o", 2.50, 10.00) # input, output per 1M tokens
|
|
17
|
+
GPT4O_MINI = ("gpt-4o-mini", 0.15, 0.60)
|
|
18
|
+
GPT4_TURBO = ("gpt-4-turbo", 10.00, 30.00)
|
|
19
|
+
|
|
20
|
+
# Anthropic
|
|
21
|
+
CLAUDE_4_OPUS = ("claude-4-opus", 15.00, 75.00)
|
|
22
|
+
CLAUDE_4_SONNET = ("claude-4-sonnet", 3.00, 15.00)
|
|
23
|
+
|
|
24
|
+
# NVIDIA
|
|
25
|
+
LLAMA_70B = ("meta/llama-3.1-70b-instruct", 0.70, 0.70)
|
|
26
|
+
LLAMA_405B = ("meta/llama-3.1-405b-instruct", 2.70, 2.70)
|
|
27
|
+
|
|
28
|
+
# Default (unknown model)
|
|
29
|
+
DEFAULT = ("default", 1.00, 1.00)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class CostEstimate:
|
|
34
|
+
"""Estimated cost for a completion."""
|
|
35
|
+
model: str
|
|
36
|
+
input_tokens: int
|
|
37
|
+
output_tokens: int
|
|
38
|
+
output_tokens_full: int # If no early exit
|
|
39
|
+
cost_actual_usd: float
|
|
40
|
+
cost_full_usd: float
|
|
41
|
+
cost_saved_usd: float
|
|
42
|
+
savings_percent: float
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class CostTracker:
|
|
46
|
+
"""
|
|
47
|
+
Track token usage and calculate cost savings.
|
|
48
|
+
|
|
49
|
+
Usage:
|
|
50
|
+
tracker = CostTracker(model="gpt-4o")
|
|
51
|
+
tracker.track_input(100) # 100 input tokens
|
|
52
|
+
tracker.track_output(50) # 50 output tokens
|
|
53
|
+
tracker.set_full_estimate(150) # Would have been 150 output tokens
|
|
54
|
+
|
|
55
|
+
estimate = tracker.get_estimate()
|
|
56
|
+
print(f"Saved ${estimate.cost_saved_usd:.4f}")
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# Model name to pricing tier mapping
|
|
60
|
+
MODEL_ALIASES = {
|
|
61
|
+
# OpenAI
|
|
62
|
+
"gpt-4o": PricingTier.GPT4O,
|
|
63
|
+
"gpt-4o-mini": PricingTier.GPT4O_MINI,
|
|
64
|
+
"gpt-4-turbo": PricingTier.GPT4_TURBO,
|
|
65
|
+
"gpt-4-turbo-preview": PricingTier.GPT4_TURBO,
|
|
66
|
+
|
|
67
|
+
# Anthropic
|
|
68
|
+
"claude-4-opus": PricingTier.CLAUDE_4_OPUS,
|
|
69
|
+
"claude-opus-4": PricingTier.CLAUDE_4_OPUS,
|
|
70
|
+
"claude-4-sonnet": PricingTier.CLAUDE_4_SONNET,
|
|
71
|
+
"claude-sonnet-4": PricingTier.CLAUDE_4_SONNET,
|
|
72
|
+
|
|
73
|
+
# NVIDIA / Meta
|
|
74
|
+
"meta/llama-3.1-70b-instruct": PricingTier.LLAMA_70B,
|
|
75
|
+
"llama-3.1-70b": PricingTier.LLAMA_70B,
|
|
76
|
+
"meta/llama-3.1-405b-instruct": PricingTier.LLAMA_405B,
|
|
77
|
+
"llama-3.1-405b": PricingTier.LLAMA_405B,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
model: str = "default",
|
|
83
|
+
custom_pricing: Optional[tuple] = None
|
|
84
|
+
):
|
|
85
|
+
"""
|
|
86
|
+
Initialize cost tracker.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
model: Model name (e.g., "gpt-4o", "claude-4-sonnet")
|
|
90
|
+
custom_pricing: Optional (input_price, output_price) per 1M tokens
|
|
91
|
+
"""
|
|
92
|
+
self.model = model
|
|
93
|
+
self.input_tokens = 0
|
|
94
|
+
self.output_tokens = 0
|
|
95
|
+
self.estimated_full_output = None
|
|
96
|
+
self._custom_pricing = custom_pricing
|
|
97
|
+
|
|
98
|
+
# Get pricing for model
|
|
99
|
+
if custom_pricing:
|
|
100
|
+
self._input_price, self._output_price = custom_pricing
|
|
101
|
+
else:
|
|
102
|
+
tier = self.MODEL_ALIASES.get(model.lower(), PricingTier.DEFAULT)
|
|
103
|
+
self._input_price, self._output_price = tier.value[1], tier.value[2]
|
|
104
|
+
|
|
105
|
+
def track_input(self, tokens: int):
|
|
106
|
+
"""Track input tokens."""
|
|
107
|
+
self.input_tokens += tokens
|
|
108
|
+
|
|
109
|
+
def track_output(self, tokens: int):
|
|
110
|
+
"""Track output tokens generated."""
|
|
111
|
+
self.output_tokens += tokens
|
|
112
|
+
|
|
113
|
+
def set_full_estimate(self, tokens: int):
|
|
114
|
+
"""Set estimate of what output would have been without early exit."""
|
|
115
|
+
self.estimated_full_output = tokens
|
|
116
|
+
|
|
117
|
+
def estimate_full_output(self, multiplier: float = 2.0) -> int:
|
|
118
|
+
"""
|
|
119
|
+
Auto-estimate full output if not set.
|
|
120
|
+
|
|
121
|
+
Uses a simple multiplier based on observed tokens.
|
|
122
|
+
Default assumes early exit saves ~50%.
|
|
123
|
+
"""
|
|
124
|
+
if self.estimated_full_output:
|
|
125
|
+
return self.estimated_full_output
|
|
126
|
+
return int(self.output_tokens * multiplier)
|
|
127
|
+
|
|
128
|
+
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
129
|
+
"""Calculate cost for given token counts."""
|
|
130
|
+
input_cost = (input_tokens / 1_000_000) * self._input_price
|
|
131
|
+
output_cost = (output_tokens / 1_000_000) * self._output_price
|
|
132
|
+
return input_cost + output_cost
|
|
133
|
+
|
|
134
|
+
def get_estimate(self) -> CostEstimate:
|
|
135
|
+
"""Get cost estimate with savings calculation."""
|
|
136
|
+
full_output = self.estimate_full_output()
|
|
137
|
+
|
|
138
|
+
cost_actual = self.calculate_cost(self.input_tokens, self.output_tokens)
|
|
139
|
+
cost_full = self.calculate_cost(self.input_tokens, full_output)
|
|
140
|
+
cost_saved = cost_full - cost_actual
|
|
141
|
+
|
|
142
|
+
if cost_full > 0:
|
|
143
|
+
savings_pct = (cost_saved / cost_full) * 100
|
|
144
|
+
else:
|
|
145
|
+
savings_pct = 0.0
|
|
146
|
+
|
|
147
|
+
return CostEstimate(
|
|
148
|
+
model=self.model,
|
|
149
|
+
input_tokens=self.input_tokens,
|
|
150
|
+
output_tokens=self.output_tokens,
|
|
151
|
+
output_tokens_full=full_output,
|
|
152
|
+
cost_actual_usd=cost_actual,
|
|
153
|
+
cost_full_usd=cost_full,
|
|
154
|
+
cost_saved_usd=cost_saved,
|
|
155
|
+
savings_percent=savings_pct
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def reset(self):
|
|
159
|
+
"""Reset tracking for new request."""
|
|
160
|
+
self.input_tokens = 0
|
|
161
|
+
self.output_tokens = 0
|
|
162
|
+
self.estimated_full_output = None
|
|
163
|
+
|
|
164
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
165
|
+
"""Get current stats as dict."""
|
|
166
|
+
estimate = self.get_estimate()
|
|
167
|
+
return {
|
|
168
|
+
"model": estimate.model,
|
|
169
|
+
"input_tokens": estimate.input_tokens,
|
|
170
|
+
"output_tokens": estimate.output_tokens,
|
|
171
|
+
"output_tokens_full": estimate.output_tokens_full,
|
|
172
|
+
"tokens_saved": estimate.output_tokens_full - estimate.output_tokens,
|
|
173
|
+
"cost_actual_usd": estimate.cost_actual_usd,
|
|
174
|
+
"cost_full_usd": estimate.cost_full_usd,
|
|
175
|
+
"cost_saved_usd": estimate.cost_saved_usd,
|
|
176
|
+
"savings_percent": estimate.savings_percent,
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# Convenience function for quick estimates
|
|
181
|
+
def estimate_savings(
|
|
182
|
+
model: str,
|
|
183
|
+
tokens_generated: int,
|
|
184
|
+
tokens_if_full: int,
|
|
185
|
+
input_tokens: int = 0
|
|
186
|
+
) -> CostEstimate:
|
|
187
|
+
"""
|
|
188
|
+
Quick estimate of cost savings.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
model: Model name
|
|
192
|
+
tokens_generated: Actual tokens generated (with early exit)
|
|
193
|
+
tokens_if_full: Tokens that would have been generated without early exit
|
|
194
|
+
input_tokens: Input prompt tokens
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
CostEstimate with savings details
|
|
198
|
+
"""
|
|
199
|
+
tracker = CostTracker(model)
|
|
200
|
+
tracker.track_input(input_tokens)
|
|
201
|
+
tracker.track_output(tokens_generated)
|
|
202
|
+
tracker.set_full_estimate(tokens_if_full)
|
|
203
|
+
return tracker.get_estimate()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def format_cost_report(estimate: CostEstimate) -> str:
|
|
207
|
+
"""Format a human-readable cost report."""
|
|
208
|
+
lines = [
|
|
209
|
+
f"📊 Cost Report for {estimate.model}",
|
|
210
|
+
f"",
|
|
211
|
+
f" Input tokens: {estimate.input_tokens:,}",
|
|
212
|
+
f" Output tokens: {estimate.output_tokens:,} (actual)",
|
|
213
|
+
f" {estimate.output_tokens_full:,} (if no early exit)",
|
|
214
|
+
f" Tokens saved: {estimate.output_tokens_full - estimate.output_tokens:,}",
|
|
215
|
+
f"",
|
|
216
|
+
f" Cost actual: ${estimate.cost_actual_usd:.6f}",
|
|
217
|
+
f" Cost if full: ${estimate.cost_full_usd:.6f}",
|
|
218
|
+
f" 💰 Cost saved: ${estimate.cost_saved_usd:.6f} ({estimate.savings_percent:.1f}%)",
|
|
219
|
+
]
|
|
220
|
+
return "\n".join(lines)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
if __name__ == "__main__":
|
|
224
|
+
# Demo
|
|
225
|
+
estimate = estimate_savings(
|
|
226
|
+
model="gpt-4o",
|
|
227
|
+
tokens_generated=82,
|
|
228
|
+
tokens_if_full=150,
|
|
229
|
+
input_tokens=50
|
|
230
|
+
)
|
|
231
|
+
print(format_cost_report(estimate))
|