entroplain 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,231 +1,231 @@
1
- """
2
- Cost tracking and savings calculator.
3
-
4
- Estimates cost savings from early exit based on token usage.
5
- """
6
-
7
- import math
8
- from dataclasses import dataclass
9
- from typing import Optional, Dict, Any
10
- from enum import Enum
11
-
12
-
13
- class PricingTier(Enum):
14
- """Pricing tiers for different models."""
15
- # OpenAI
16
- GPT4O = ("gpt-4o", 2.50, 10.00) # input, output per 1M tokens
17
- GPT4O_MINI = ("gpt-4o-mini", 0.15, 0.60)
18
- GPT4_TURBO = ("gpt-4-turbo", 10.00, 30.00)
19
-
20
- # Anthropic
21
- CLAUDE_4_OPUS = ("claude-4-opus", 15.00, 75.00)
22
- CLAUDE_4_SONNET = ("claude-4-sonnet", 3.00, 15.00)
23
-
24
- # NVIDIA
25
- LLAMA_70B = ("meta/llama-3.1-70b-instruct", 0.70, 0.70)
26
- LLAMA_405B = ("meta/llama-3.1-405b-instruct", 2.70, 2.70)
27
-
28
- # Default (unknown model)
29
- DEFAULT = ("default", 1.00, 1.00)
30
-
31
-
32
- @dataclass
33
- class CostEstimate:
34
- """Estimated cost for a completion."""
35
- model: str
36
- input_tokens: int
37
- output_tokens: int
38
- output_tokens_full: int # If no early exit
39
- cost_actual_usd: float
40
- cost_full_usd: float
41
- cost_saved_usd: float
42
- savings_percent: float
43
-
44
-
45
- class CostTracker:
46
- """
47
- Track token usage and calculate cost savings.
48
-
49
- Usage:
50
- tracker = CostTracker(model="gpt-4o")
51
- tracker.track_input(100) # 100 input tokens
52
- tracker.track_output(50) # 50 output tokens
53
- tracker.set_full_estimate(150) # Would have been 150 output tokens
54
-
55
- estimate = tracker.get_estimate()
56
- print(f"Saved ${estimate.cost_saved_usd:.4f}")
57
- """
58
-
59
- # Model name to pricing tier mapping
60
- MODEL_ALIASES = {
61
- # OpenAI
62
- "gpt-4o": PricingTier.GPT4O,
63
- "gpt-4o-mini": PricingTier.GPT4O_MINI,
64
- "gpt-4-turbo": PricingTier.GPT4_TURBO,
65
- "gpt-4-turbo-preview": PricingTier.GPT4_TURBO,
66
-
67
- # Anthropic
68
- "claude-4-opus": PricingTier.CLAUDE_4_OPUS,
69
- "claude-opus-4": PricingTier.CLAUDE_4_OPUS,
70
- "claude-4-sonnet": PricingTier.CLAUDE_4_SONNET,
71
- "claude-sonnet-4": PricingTier.CLAUDE_4_SONNET,
72
-
73
- # NVIDIA / Meta
74
- "meta/llama-3.1-70b-instruct": PricingTier.LLAMA_70B,
75
- "llama-3.1-70b": PricingTier.LLAMA_70B,
76
- "meta/llama-3.1-405b-instruct": PricingTier.LLAMA_405B,
77
- "llama-3.1-405b": PricingTier.LLAMA_405B,
78
- }
79
-
80
- def __init__(
81
- self,
82
- model: str = "default",
83
- custom_pricing: Optional[tuple] = None
84
- ):
85
- """
86
- Initialize cost tracker.
87
-
88
- Args:
89
- model: Model name (e.g., "gpt-4o", "claude-4-sonnet")
90
- custom_pricing: Optional (input_price, output_price) per 1M tokens
91
- """
92
- self.model = model
93
- self.input_tokens = 0
94
- self.output_tokens = 0
95
- self.estimated_full_output = None
96
- self._custom_pricing = custom_pricing
97
-
98
- # Get pricing for model
99
- if custom_pricing:
100
- self._input_price, self._output_price = custom_pricing
101
- else:
102
- tier = self.MODEL_ALIASES.get(model.lower(), PricingTier.DEFAULT)
103
- self._input_price, self._output_price = tier.value[1], tier.value[2]
104
-
105
- def track_input(self, tokens: int):
106
- """Track input tokens."""
107
- self.input_tokens += tokens
108
-
109
- def track_output(self, tokens: int):
110
- """Track output tokens generated."""
111
- self.output_tokens += tokens
112
-
113
- def set_full_estimate(self, tokens: int):
114
- """Set estimate of what output would have been without early exit."""
115
- self.estimated_full_output = tokens
116
-
117
- def estimate_full_output(self, multiplier: float = 2.0) -> int:
118
- """
119
- Auto-estimate full output if not set.
120
-
121
- Uses a simple multiplier based on observed tokens.
122
- Default assumes early exit saves ~50%.
123
- """
124
- if self.estimated_full_output:
125
- return self.estimated_full_output
126
- return int(self.output_tokens * multiplier)
127
-
128
- def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
129
- """Calculate cost for given token counts."""
130
- input_cost = (input_tokens / 1_000_000) * self._input_price
131
- output_cost = (output_tokens / 1_000_000) * self._output_price
132
- return input_cost + output_cost
133
-
134
- def get_estimate(self) -> CostEstimate:
135
- """Get cost estimate with savings calculation."""
136
- full_output = self.estimate_full_output()
137
-
138
- cost_actual = self.calculate_cost(self.input_tokens, self.output_tokens)
139
- cost_full = self.calculate_cost(self.input_tokens, full_output)
140
- cost_saved = cost_full - cost_actual
141
-
142
- if cost_full > 0:
143
- savings_pct = (cost_saved / cost_full) * 100
144
- else:
145
- savings_pct = 0.0
146
-
147
- return CostEstimate(
148
- model=self.model,
149
- input_tokens=self.input_tokens,
150
- output_tokens=self.output_tokens,
151
- output_tokens_full=full_output,
152
- cost_actual_usd=cost_actual,
153
- cost_full_usd=cost_full,
154
- cost_saved_usd=cost_saved,
155
- savings_percent=savings_pct
156
- )
157
-
158
- def reset(self):
159
- """Reset tracking for new request."""
160
- self.input_tokens = 0
161
- self.output_tokens = 0
162
- self.estimated_full_output = None
163
-
164
- def get_stats(self) -> Dict[str, Any]:
165
- """Get current stats as dict."""
166
- estimate = self.get_estimate()
167
- return {
168
- "model": estimate.model,
169
- "input_tokens": estimate.input_tokens,
170
- "output_tokens": estimate.output_tokens,
171
- "output_tokens_full": estimate.output_tokens_full,
172
- "tokens_saved": estimate.output_tokens_full - estimate.output_tokens,
173
- "cost_actual_usd": estimate.cost_actual_usd,
174
- "cost_full_usd": estimate.cost_full_usd,
175
- "cost_saved_usd": estimate.cost_saved_usd,
176
- "savings_percent": estimate.savings_percent,
177
- }
178
-
179
-
180
- # Convenience function for quick estimates
181
- def estimate_savings(
182
- model: str,
183
- tokens_generated: int,
184
- tokens_if_full: int,
185
- input_tokens: int = 0
186
- ) -> CostEstimate:
187
- """
188
- Quick estimate of cost savings.
189
-
190
- Args:
191
- model: Model name
192
- tokens_generated: Actual tokens generated (with early exit)
193
- tokens_if_full: Tokens that would have been generated without early exit
194
- input_tokens: Input prompt tokens
195
-
196
- Returns:
197
- CostEstimate with savings details
198
- """
199
- tracker = CostTracker(model)
200
- tracker.track_input(input_tokens)
201
- tracker.track_output(tokens_generated)
202
- tracker.set_full_estimate(tokens_if_full)
203
- return tracker.get_estimate()
204
-
205
-
206
- def format_cost_report(estimate: CostEstimate) -> str:
207
- """Format a human-readable cost report."""
208
- lines = [
209
- f"📊 Cost Report for {estimate.model}",
210
- f"",
211
- f" Input tokens: {estimate.input_tokens:,}",
212
- f" Output tokens: {estimate.output_tokens:,} (actual)",
213
- f" {estimate.output_tokens_full:,} (if no early exit)",
214
- f" Tokens saved: {estimate.output_tokens_full - estimate.output_tokens:,}",
215
- f"",
216
- f" Cost actual: ${estimate.cost_actual_usd:.6f}",
217
- f" Cost if full: ${estimate.cost_full_usd:.6f}",
218
- f" 💰 Cost saved: ${estimate.cost_saved_usd:.6f} ({estimate.savings_percent:.1f}%)",
219
- ]
220
- return "\n".join(lines)
221
-
222
-
223
- if __name__ == "__main__":
224
- # Demo
225
- estimate = estimate_savings(
226
- model="gpt-4o",
227
- tokens_generated=82,
228
- tokens_if_full=150,
229
- input_tokens=50
230
- )
231
- print(format_cost_report(estimate))
1
+ """
2
+ Cost tracking and savings calculator.
3
+
4
+ Estimates cost savings from early exit based on token usage.
5
+ """
6
+
7
+ import math
8
+ from dataclasses import dataclass
9
+ from typing import Optional, Dict, Any
10
+ from enum import Enum
11
+
12
+
13
+ class PricingTier(Enum):
14
+ """Pricing tiers for different models."""
15
+ # OpenAI
16
+ GPT4O = ("gpt-4o", 2.50, 10.00) # input, output per 1M tokens
17
+ GPT4O_MINI = ("gpt-4o-mini", 0.15, 0.60)
18
+ GPT4_TURBO = ("gpt-4-turbo", 10.00, 30.00)
19
+
20
+ # Anthropic
21
+ CLAUDE_4_OPUS = ("claude-4-opus", 15.00, 75.00)
22
+ CLAUDE_4_SONNET = ("claude-4-sonnet", 3.00, 15.00)
23
+
24
+ # NVIDIA
25
+ LLAMA_70B = ("meta/llama-3.1-70b-instruct", 0.70, 0.70)
26
+ LLAMA_405B = ("meta/llama-3.1-405b-instruct", 2.70, 2.70)
27
+
28
+ # Default (unknown model)
29
+ DEFAULT = ("default", 1.00, 1.00)
30
+
31
+
32
+ @dataclass
33
+ class CostEstimate:
34
+ """Estimated cost for a completion."""
35
+ model: str
36
+ input_tokens: int
37
+ output_tokens: int
38
+ output_tokens_full: int # If no early exit
39
+ cost_actual_usd: float
40
+ cost_full_usd: float
41
+ cost_saved_usd: float
42
+ savings_percent: float
43
+
44
+
45
+ class CostTracker:
46
+ """
47
+ Track token usage and calculate cost savings.
48
+
49
+ Usage:
50
+ tracker = CostTracker(model="gpt-4o")
51
+ tracker.track_input(100) # 100 input tokens
52
+ tracker.track_output(50) # 50 output tokens
53
+ tracker.set_full_estimate(150) # Would have been 150 output tokens
54
+
55
+ estimate = tracker.get_estimate()
56
+ print(f"Saved ${estimate.cost_saved_usd:.4f}")
57
+ """
58
+
59
+ # Model name to pricing tier mapping
60
+ MODEL_ALIASES = {
61
+ # OpenAI
62
+ "gpt-4o": PricingTier.GPT4O,
63
+ "gpt-4o-mini": PricingTier.GPT4O_MINI,
64
+ "gpt-4-turbo": PricingTier.GPT4_TURBO,
65
+ "gpt-4-turbo-preview": PricingTier.GPT4_TURBO,
66
+
67
+ # Anthropic
68
+ "claude-4-opus": PricingTier.CLAUDE_4_OPUS,
69
+ "claude-opus-4": PricingTier.CLAUDE_4_OPUS,
70
+ "claude-4-sonnet": PricingTier.CLAUDE_4_SONNET,
71
+ "claude-sonnet-4": PricingTier.CLAUDE_4_SONNET,
72
+
73
+ # NVIDIA / Meta
74
+ "meta/llama-3.1-70b-instruct": PricingTier.LLAMA_70B,
75
+ "llama-3.1-70b": PricingTier.LLAMA_70B,
76
+ "meta/llama-3.1-405b-instruct": PricingTier.LLAMA_405B,
77
+ "llama-3.1-405b": PricingTier.LLAMA_405B,
78
+ }
79
+
80
+ def __init__(
81
+ self,
82
+ model: str = "default",
83
+ custom_pricing: Optional[tuple] = None
84
+ ):
85
+ """
86
+ Initialize cost tracker.
87
+
88
+ Args:
89
+ model: Model name (e.g., "gpt-4o", "claude-4-sonnet")
90
+ custom_pricing: Optional (input_price, output_price) per 1M tokens
91
+ """
92
+ self.model = model
93
+ self.input_tokens = 0
94
+ self.output_tokens = 0
95
+ self.estimated_full_output = None
96
+ self._custom_pricing = custom_pricing
97
+
98
+ # Get pricing for model
99
+ if custom_pricing:
100
+ self._input_price, self._output_price = custom_pricing
101
+ else:
102
+ tier = self.MODEL_ALIASES.get(model.lower(), PricingTier.DEFAULT)
103
+ self._input_price, self._output_price = tier.value[1], tier.value[2]
104
+
105
+ def track_input(self, tokens: int):
106
+ """Track input tokens."""
107
+ self.input_tokens += tokens
108
+
109
+ def track_output(self, tokens: int):
110
+ """Track output tokens generated."""
111
+ self.output_tokens += tokens
112
+
113
+ def set_full_estimate(self, tokens: int):
114
+ """Set estimate of what output would have been without early exit."""
115
+ self.estimated_full_output = tokens
116
+
117
+ def estimate_full_output(self, multiplier: float = 2.0) -> int:
118
+ """
119
+ Auto-estimate full output if not set.
120
+
121
+ Uses a simple multiplier based on observed tokens.
122
+ Default assumes early exit saves ~50%.
123
+ """
124
+ if self.estimated_full_output:
125
+ return self.estimated_full_output
126
+ return int(self.output_tokens * multiplier)
127
+
128
+ def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
129
+ """Calculate cost for given token counts."""
130
+ input_cost = (input_tokens / 1_000_000) * self._input_price
131
+ output_cost = (output_tokens / 1_000_000) * self._output_price
132
+ return input_cost + output_cost
133
+
134
+ def get_estimate(self) -> CostEstimate:
135
+ """Get cost estimate with savings calculation."""
136
+ full_output = self.estimate_full_output()
137
+
138
+ cost_actual = self.calculate_cost(self.input_tokens, self.output_tokens)
139
+ cost_full = self.calculate_cost(self.input_tokens, full_output)
140
+ cost_saved = cost_full - cost_actual
141
+
142
+ if cost_full > 0:
143
+ savings_pct = (cost_saved / cost_full) * 100
144
+ else:
145
+ savings_pct = 0.0
146
+
147
+ return CostEstimate(
148
+ model=self.model,
149
+ input_tokens=self.input_tokens,
150
+ output_tokens=self.output_tokens,
151
+ output_tokens_full=full_output,
152
+ cost_actual_usd=cost_actual,
153
+ cost_full_usd=cost_full,
154
+ cost_saved_usd=cost_saved,
155
+ savings_percent=savings_pct
156
+ )
157
+
158
+ def reset(self):
159
+ """Reset tracking for new request."""
160
+ self.input_tokens = 0
161
+ self.output_tokens = 0
162
+ self.estimated_full_output = None
163
+
164
+ def get_stats(self) -> Dict[str, Any]:
165
+ """Get current stats as dict."""
166
+ estimate = self.get_estimate()
167
+ return {
168
+ "model": estimate.model,
169
+ "input_tokens": estimate.input_tokens,
170
+ "output_tokens": estimate.output_tokens,
171
+ "output_tokens_full": estimate.output_tokens_full,
172
+ "tokens_saved": estimate.output_tokens_full - estimate.output_tokens,
173
+ "cost_actual_usd": estimate.cost_actual_usd,
174
+ "cost_full_usd": estimate.cost_full_usd,
175
+ "cost_saved_usd": estimate.cost_saved_usd,
176
+ "savings_percent": estimate.savings_percent,
177
+ }
178
+
179
+
180
+ # Convenience function for quick estimates
181
+ def estimate_savings(
182
+ model: str,
183
+ tokens_generated: int,
184
+ tokens_if_full: int,
185
+ input_tokens: int = 0
186
+ ) -> CostEstimate:
187
+ """
188
+ Quick estimate of cost savings.
189
+
190
+ Args:
191
+ model: Model name
192
+ tokens_generated: Actual tokens generated (with early exit)
193
+ tokens_if_full: Tokens that would have been generated without early exit
194
+ input_tokens: Input prompt tokens
195
+
196
+ Returns:
197
+ CostEstimate with savings details
198
+ """
199
+ tracker = CostTracker(model)
200
+ tracker.track_input(input_tokens)
201
+ tracker.track_output(tokens_generated)
202
+ tracker.set_full_estimate(tokens_if_full)
203
+ return tracker.get_estimate()
204
+
205
+
206
+ def format_cost_report(estimate: CostEstimate) -> str:
207
+ """Format a human-readable cost report."""
208
+ lines = [
209
+ f"📊 Cost Report for {estimate.model}",
210
+ f"",
211
+ f" Input tokens: {estimate.input_tokens:,}",
212
+ f" Output tokens: {estimate.output_tokens:,} (actual)",
213
+ f" {estimate.output_tokens_full:,} (if no early exit)",
214
+ f" Tokens saved: {estimate.output_tokens_full - estimate.output_tokens:,}",
215
+ f"",
216
+ f" Cost actual: ${estimate.cost_actual_usd:.6f}",
217
+ f" Cost if full: ${estimate.cost_full_usd:.6f}",
218
+ f" 💰 Cost saved: ${estimate.cost_saved_usd:.6f} ({estimate.savings_percent:.1f}%)",
219
+ ]
220
+ return "\n".join(lines)
221
+
222
+
223
+ if __name__ == "__main__":
224
+ # Demo
225
+ estimate = estimate_savings(
226
+ model="gpt-4o",
227
+ tokens_generated=82,
228
+ tokens_if_full=150,
229
+ input_tokens=50
230
+ )
231
+ print(format_cost_report(estimate))