netra-zen 1.0.9__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_interface/__init__.py +25 -25
- agent_interface/base_agent.py +350 -350
- {netra_zen-1.0.9.dist-info → netra_zen-1.0.11.dist-info}/METADATA +36 -15
- netra_zen-1.0.11.dist-info/RECORD +30 -0
- {netra_zen-1.0.9.dist-info → netra_zen-1.0.11.dist-info}/licenses/LICENSE.md +1 -1
- scripts/__init__.py +1 -1
- scripts/__main__.py +5 -5
- scripts/agent_cli.py +7179 -6948
- scripts/agent_logs.py +327 -327
- scripts/bump_version.py +137 -137
- scripts/demo_log_collection.py +146 -144
- scripts/embed_release_credentials.py +75 -75
- scripts/test_apex_telemetry_debug.py +221 -0
- scripts/verify_log_transmission.py +140 -140
- token_budget/budget_manager.py +199 -199
- token_budget/models.py +73 -73
- token_budget/visualization.py +21 -21
- token_transparency/__init__.py +19 -19
- token_transparency/claude_pricing_engine.py +326 -326
- zen/__init__.py +7 -7
- zen/__main__.py +11 -11
- zen/telemetry/__init__.py +14 -11
- zen/telemetry/apex_telemetry.py +259 -0
- zen/telemetry/embedded_credentials.py +59 -59
- zen/telemetry/manager.py +249 -249
- zen_orchestrator.py +3058 -3008
- netra_zen-1.0.9.dist-info/RECORD +0 -28
- {netra_zen-1.0.9.dist-info → netra_zen-1.0.11.dist-info}/WHEEL +0 -0
- {netra_zen-1.0.9.dist-info → netra_zen-1.0.11.dist-info}/entry_points.txt +0 -0
- {netra_zen-1.0.9.dist-info → netra_zen-1.0.11.dist-info}/top_level.txt +0 -0
@@ -1,327 +1,327 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
Claude Code Pricing Compliance Engine
|
4
|
-
|
5
|
-
Provides accurate token counting and cost calculation based on official Claude pricing.
|
6
|
-
Designed to be the SSOT for all Claude Code pricing calculations within zen.
|
7
|
-
|
8
|
-
Key Features:
|
9
|
-
- Model detection from API responses
|
10
|
-
- Accurate cache pricing based on duration
|
11
|
-
- Tool cost calculation
|
12
|
-
- Compliance with Claude pricing documentation
|
13
|
-
- Extensible for future Claude Code agent support
|
14
|
-
"""
|
15
|
-
|
16
|
-
from dataclasses import dataclass
|
17
|
-
from typing import Dict, Optional, Tuple, Any
|
18
|
-
import re
|
19
|
-
import json
|
20
|
-
import logging
|
21
|
-
|
22
|
-
logger = logging.getLogger(__name__)
|
23
|
-
|
24
|
-
@dataclass
|
25
|
-
class ClaudePricingConfig:
|
26
|
-
"""Current Claude pricing rates as of 2024-2025"""
|
27
|
-
|
28
|
-
# Model pricing per million tokens (input, output)
|
29
|
-
MODEL_PRICING = {
|
30
|
-
"claude-opus-4": {"input": 15.0, "output": 75.0},
|
31
|
-
"claude-opus-4.1": {"input": 15.0, "output": 75.0},
|
32
|
-
"claude-sonnet-4": {"input": 3.0, "output": 15.0},
|
33
|
-
"claude-sonnet-3.7": {"input": 3.0, "output": 15.0},
|
34
|
-
"claude-3-5-sonnet": {"input": 3.0, "output": 15.0},
|
35
|
-
"claude-haiku-3.5": {"input": 0.8, "output": 4.0},
|
36
|
-
}
|
37
|
-
|
38
|
-
# Cache pricing multipliers
|
39
|
-
CACHE_READ_MULTIPLIER = 0.1 # 10% of base input price
|
40
|
-
CACHE_5MIN_WRITE_MULTIPLIER = 1.25 # 25% premium
|
41
|
-
CACHE_1HOUR_WRITE_MULTIPLIER = 2.0 # 100% premium
|
42
|
-
|
43
|
-
# Tool pricing (per 1000 calls)
|
44
|
-
TOOL_PRICING = {
|
45
|
-
"web_search": 10.0, # $10 per 1000 searches
|
46
|
-
"web_fetch": 0.0, # No additional charge
|
47
|
-
"default": 0.0 # Most tools have no additional charge
|
48
|
-
}
|
49
|
-
|
50
|
-
@dataclass
|
51
|
-
class TokenUsageData:
|
52
|
-
"""Token usage data with detailed breakdown"""
|
53
|
-
input_tokens: int = 0
|
54
|
-
output_tokens: int = 0
|
55
|
-
cache_read_tokens: int = 0
|
56
|
-
cache_creation_tokens: int = 0
|
57
|
-
cache_type: str = "5min" # "5min" or "1hour"
|
58
|
-
total_tokens: int = 0
|
59
|
-
tool_calls: int = 0
|
60
|
-
model: str = "claude-3-5-sonnet"
|
61
|
-
|
62
|
-
def __post_init__(self):
|
63
|
-
"""Calculate total if not provided"""
|
64
|
-
if self.total_tokens == 0:
|
65
|
-
self.total_tokens = (self.input_tokens + self.output_tokens +
|
66
|
-
self.cache_read_tokens + self.cache_creation_tokens)
|
67
|
-
|
68
|
-
@dataclass
|
69
|
-
class CostBreakdown:
|
70
|
-
"""Detailed cost breakdown for transparency"""
|
71
|
-
input_cost: float = 0.0
|
72
|
-
output_cost: float = 0.0
|
73
|
-
cache_read_cost: float = 0.0
|
74
|
-
cache_creation_cost: float = 0.0
|
75
|
-
tool_cost: float = 0.0
|
76
|
-
total_cost: float = 0.0
|
77
|
-
model_used: str = ""
|
78
|
-
cache_type: str = ""
|
79
|
-
|
80
|
-
def __post_init__(self):
|
81
|
-
"""Calculate total cost"""
|
82
|
-
self.total_cost = (self.input_cost + self.output_cost +
|
83
|
-
self.cache_read_cost + self.cache_creation_cost + self.tool_cost)
|
84
|
-
|
85
|
-
class ClaudePricingEngine:
|
86
|
-
"""
|
87
|
-
Claude Code pricing compliance engine for accurate cost calculation.
|
88
|
-
|
89
|
-
Ensures compliance with official Claude pricing documentation and provides
|
90
|
-
detailed transparency for token usage costs.
|
91
|
-
"""
|
92
|
-
|
93
|
-
def __init__(self):
|
94
|
-
self.pricing_config = ClaudePricingConfig()
|
95
|
-
|
96
|
-
def detect_model_from_response(self, response_data: Dict[str, Any]) -> str:
|
97
|
-
"""
|
98
|
-
Detect Claude model from API response or usage data.
|
99
|
-
|
100
|
-
Args:
|
101
|
-
response_data: API response or usage data containing model information
|
102
|
-
|
103
|
-
Returns:
|
104
|
-
Model name string, defaults to claude-3-5-sonnet if not detected
|
105
|
-
"""
|
106
|
-
# Try multiple locations where model might be specified
|
107
|
-
model_locations = [
|
108
|
-
response_data.get('model'),
|
109
|
-
response_data.get('model_name'),
|
110
|
-
response_data.get('usage', {}).get('model'),
|
111
|
-
response_data.get('message', {}).get('model'),
|
112
|
-
response_data.get('metadata', {}).get('model')
|
113
|
-
]
|
114
|
-
|
115
|
-
for model in model_locations:
|
116
|
-
if model and isinstance(model, str):
|
117
|
-
# Normalize model name
|
118
|
-
normalized = self._normalize_model_name(model)
|
119
|
-
if normalized in self.pricing_config.MODEL_PRICING:
|
120
|
-
return normalized
|
121
|
-
|
122
|
-
# Default fallback
|
123
|
-
logger.debug("Model not detected in response, defaulting to claude-3-5-sonnet")
|
124
|
-
return "claude-3-5-sonnet"
|
125
|
-
|
126
|
-
def _normalize_model_name(self, model_name: str) -> str:
|
127
|
-
"""Normalize model name to match pricing config keys"""
|
128
|
-
model_name = model_name.lower().strip()
|
129
|
-
|
130
|
-
# Handle various model name formats
|
131
|
-
if "opus" in model_name:
|
132
|
-
if "4.1" in model_name:
|
133
|
-
return "claude-opus-4.1"
|
134
|
-
elif "4" in model_name:
|
135
|
-
return "claude-opus-4"
|
136
|
-
elif "sonnet" in model_name:
|
137
|
-
if "4" in model_name:
|
138
|
-
return "claude-sonnet-4"
|
139
|
-
elif "3.7" in model_name:
|
140
|
-
return "claude-sonnet-3.7"
|
141
|
-
elif "3.5" in model_name or "3-5" in model_name:
|
142
|
-
return "claude-3-5-sonnet"
|
143
|
-
elif "haiku" in model_name:
|
144
|
-
if "3.5" in model_name:
|
145
|
-
return "claude-haiku-3.5"
|
146
|
-
|
147
|
-
return model_name
|
148
|
-
|
149
|
-
def detect_cache_type(self, response_data: Dict[str, Any]) -> str:
|
150
|
-
"""
|
151
|
-
Detect cache type (5min vs 1hour) from response data.
|
152
|
-
|
153
|
-
Args:
|
154
|
-
response_data: API response data
|
155
|
-
|
156
|
-
Returns:
|
157
|
-
"5min" or "1hour", defaults to "5min"
|
158
|
-
"""
|
159
|
-
# Look for cache type indicators in response
|
160
|
-
cache_indicators = [
|
161
|
-
response_data.get('cache_type'),
|
162
|
-
response_data.get('usage', {}).get('cache_type'),
|
163
|
-
response_data.get('metadata', {}).get('cache_type')
|
164
|
-
]
|
165
|
-
|
166
|
-
for indicator in cache_indicators:
|
167
|
-
if indicator:
|
168
|
-
if "1hour" in str(indicator).lower() or "60min" in str(indicator).lower():
|
169
|
-
return "1hour"
|
170
|
-
elif "5min" in str(indicator).lower():
|
171
|
-
return "5min"
|
172
|
-
|
173
|
-
# Default to 5min cache
|
174
|
-
return "5min"
|
175
|
-
|
176
|
-
def calculate_cost(self, usage_data: TokenUsageData,
|
177
|
-
authoritative_cost: Optional[float] = None,
|
178
|
-
tool_tokens: Optional[Dict[str, int]] = None) -> CostBreakdown:
|
179
|
-
"""
|
180
|
-
Calculate detailed cost breakdown with Claude pricing compliance.
|
181
|
-
|
182
|
-
Args:
|
183
|
-
usage_data: Token usage information
|
184
|
-
authoritative_cost: SDK-provided cost (preferred when available)
|
185
|
-
tool_tokens: Dictionary of tool names to token counts for tool cost calculation
|
186
|
-
|
187
|
-
Returns:
|
188
|
-
Detailed cost breakdown for transparency
|
189
|
-
"""
|
190
|
-
# Use authoritative cost if provided (most accurate)
|
191
|
-
if authoritative_cost is not None:
|
192
|
-
breakdown = CostBreakdown(
|
193
|
-
model_used=usage_data.model,
|
194
|
-
cache_type=usage_data.cache_type
|
195
|
-
)
|
196
|
-
breakdown.total_cost = authoritative_cost
|
197
|
-
return breakdown
|
198
|
-
|
199
|
-
# Get model pricing
|
200
|
-
model_pricing = self.pricing_config.MODEL_PRICING.get(
|
201
|
-
usage_data.model,
|
202
|
-
self.pricing_config.MODEL_PRICING["claude-3-5-sonnet"]
|
203
|
-
)
|
204
|
-
|
205
|
-
# Calculate base costs
|
206
|
-
input_cost = (usage_data.input_tokens / 1_000_000) * model_pricing["input"]
|
207
|
-
output_cost = (usage_data.output_tokens / 1_000_000) * model_pricing["output"]
|
208
|
-
|
209
|
-
# Calculate cache costs with correct multipliers
|
210
|
-
cache_read_cost = (usage_data.cache_read_tokens / 1_000_000) * \
|
211
|
-
(model_pricing["input"] * self.pricing_config.CACHE_READ_MULTIPLIER)
|
212
|
-
|
213
|
-
# Cache creation cost depends on cache type
|
214
|
-
cache_multiplier = (self.pricing_config.CACHE_1HOUR_WRITE_MULTIPLIER
|
215
|
-
if usage_data.cache_type == "1hour"
|
216
|
-
else self.pricing_config.CACHE_5MIN_WRITE_MULTIPLIER)
|
217
|
-
|
218
|
-
cache_creation_cost = (usage_data.cache_creation_tokens / 1_000_000) * \
|
219
|
-
(model_pricing["input"] * cache_multiplier)
|
220
|
-
|
221
|
-
# Calculate tool costs based on token usage
|
222
|
-
tool_cost = 0.0
|
223
|
-
if tool_tokens:
|
224
|
-
for tool_name, tokens in tool_tokens.items():
|
225
|
-
# Tool tokens are charged at the same rate as input tokens for the model
|
226
|
-
tool_cost += (tokens / 1_000_000) * model_pricing["input"]
|
227
|
-
|
228
|
-
return CostBreakdown(
|
229
|
-
input_cost=input_cost,
|
230
|
-
output_cost=output_cost,
|
231
|
-
cache_read_cost=cache_read_cost,
|
232
|
-
cache_creation_cost=cache_creation_cost,
|
233
|
-
tool_cost=tool_cost,
|
234
|
-
model_used=usage_data.model,
|
235
|
-
cache_type=usage_data.cache_type
|
236
|
-
)
|
237
|
-
|
238
|
-
def parse_claude_response(self, response_line: str) -> Optional[TokenUsageData]:
|
239
|
-
"""
|
240
|
-
Parse token usage from Claude Code response line with model detection.
|
241
|
-
|
242
|
-
Args:
|
243
|
-
response_line: Single line from Claude Code output
|
244
|
-
|
245
|
-
Returns:
|
246
|
-
TokenUsageData if parsing successful, None otherwise
|
247
|
-
"""
|
248
|
-
line = response_line.strip()
|
249
|
-
if not line.startswith('{'):
|
250
|
-
return None
|
251
|
-
|
252
|
-
try:
|
253
|
-
json_data = json.loads(line)
|
254
|
-
|
255
|
-
# Detect model and cache type
|
256
|
-
model = self.detect_model_from_response(json_data)
|
257
|
-
cache_type = self.detect_cache_type(json_data)
|
258
|
-
|
259
|
-
# Extract usage data
|
260
|
-
usage_data = None
|
261
|
-
if 'usage' in json_data:
|
262
|
-
usage_data = json_data['usage']
|
263
|
-
elif 'message' in json_data and isinstance(json_data['message'], dict):
|
264
|
-
usage_data = json_data['message'].get('usage')
|
265
|
-
|
266
|
-
if usage_data and isinstance(usage_data, dict):
|
267
|
-
return TokenUsageData(
|
268
|
-
input_tokens=int(usage_data.get('input_tokens', 0)),
|
269
|
-
output_tokens=int(usage_data.get('output_tokens', 0)),
|
270
|
-
cache_read_tokens=int(usage_data.get('cache_read_input_tokens', 0)),
|
271
|
-
cache_creation_tokens=int(usage_data.get('cache_creation_input_tokens', 0)),
|
272
|
-
total_tokens=int(usage_data.get('total_tokens', 0)),
|
273
|
-
model=model,
|
274
|
-
cache_type=cache_type
|
275
|
-
)
|
276
|
-
|
277
|
-
except (json.JSONDecodeError, ValueError, KeyError) as e:
|
278
|
-
logger.debug(f"Failed to parse Claude response: {e}")
|
279
|
-
|
280
|
-
return None
|
281
|
-
|
282
|
-
def get_transparency_report(self, usage_data: TokenUsageData,
|
283
|
-
cost_breakdown: CostBreakdown,
|
284
|
-
tool_tokens: Optional[Dict[str, int]] = None) -> Dict[str, Any]:
|
285
|
-
"""
|
286
|
-
Generate transparency report for token usage and costs.
|
287
|
-
|
288
|
-
Args:
|
289
|
-
usage_data: Token usage information
|
290
|
-
cost_breakdown: Detailed cost breakdown
|
291
|
-
tool_tokens: Tool-specific token usage
|
292
|
-
|
293
|
-
Returns:
|
294
|
-
Comprehensive transparency report
|
295
|
-
"""
|
296
|
-
return {
|
297
|
-
"model_used": usage_data.model,
|
298
|
-
"cache_type": usage_data.cache_type,
|
299
|
-
"token_breakdown": {
|
300
|
-
"input_tokens": usage_data.input_tokens,
|
301
|
-
"output_tokens": usage_data.output_tokens,
|
302
|
-
"cache_read_tokens": usage_data.cache_read_tokens,
|
303
|
-
"cache_creation_tokens": usage_data.cache_creation_tokens,
|
304
|
-
"total_tokens": usage_data.total_tokens,
|
305
|
-
"tool_tokens": tool_tokens or {}
|
306
|
-
},
|
307
|
-
"cost_breakdown": {
|
308
|
-
"input_cost_usd": round(cost_breakdown.input_cost, 6),
|
309
|
-
"output_cost_usd": round(cost_breakdown.output_cost, 6),
|
310
|
-
"cache_read_cost_usd": round(cost_breakdown.cache_read_cost, 6),
|
311
|
-
"cache_creation_cost_usd": round(cost_breakdown.cache_creation_cost, 6),
|
312
|
-
"tool_cost_usd": round(cost_breakdown.tool_cost, 6),
|
313
|
-
"total_cost_usd": round(cost_breakdown.total_cost, 6)
|
314
|
-
},
|
315
|
-
"pricing_rates": {
|
316
|
-
"model_rates": self.pricing_config.MODEL_PRICING[usage_data.model],
|
317
|
-
"cache_read_multiplier": self.pricing_config.CACHE_READ_MULTIPLIER,
|
318
|
-
"cache_write_multiplier": (self.pricing_config.CACHE_1HOUR_WRITE_MULTIPLIER
|
319
|
-
if usage_data.cache_type == "1hour"
|
320
|
-
else self.pricing_config.CACHE_5MIN_WRITE_MULTIPLIER)
|
321
|
-
},
|
322
|
-
"compliance_info": {
|
323
|
-
"pricing_source": "https://docs.claude.com/en/docs/about-claude/pricing",
|
324
|
-
"last_updated": "2024-2025",
|
325
|
-
"model_detected": usage_data.model != "claude-3-5-sonnet"
|
326
|
-
}
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Claude Code Pricing Compliance Engine
|
4
|
+
|
5
|
+
Provides accurate token counting and cost calculation based on official Claude pricing.
|
6
|
+
Designed to be the SSOT for all Claude Code pricing calculations within zen.
|
7
|
+
|
8
|
+
Key Features:
|
9
|
+
- Model detection from API responses
|
10
|
+
- Accurate cache pricing based on duration
|
11
|
+
- Tool cost calculation
|
12
|
+
- Compliance with Claude pricing documentation
|
13
|
+
- Extensible for future Claude Code agent support
|
14
|
+
"""
|
15
|
+
|
16
|
+
from dataclasses import dataclass
|
17
|
+
from typing import Dict, Optional, Tuple, Any
|
18
|
+
import re
|
19
|
+
import json
|
20
|
+
import logging
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class ClaudePricingConfig:
|
26
|
+
"""Current Claude pricing rates as of 2024-2025"""
|
27
|
+
|
28
|
+
# Model pricing per million tokens (input, output)
|
29
|
+
MODEL_PRICING = {
|
30
|
+
"claude-opus-4": {"input": 15.0, "output": 75.0},
|
31
|
+
"claude-opus-4.1": {"input": 15.0, "output": 75.0},
|
32
|
+
"claude-sonnet-4": {"input": 3.0, "output": 15.0},
|
33
|
+
"claude-sonnet-3.7": {"input": 3.0, "output": 15.0},
|
34
|
+
"claude-3-5-sonnet": {"input": 3.0, "output": 15.0},
|
35
|
+
"claude-haiku-3.5": {"input": 0.8, "output": 4.0},
|
36
|
+
}
|
37
|
+
|
38
|
+
# Cache pricing multipliers
|
39
|
+
CACHE_READ_MULTIPLIER = 0.1 # 10% of base input price
|
40
|
+
CACHE_5MIN_WRITE_MULTIPLIER = 1.25 # 25% premium
|
41
|
+
CACHE_1HOUR_WRITE_MULTIPLIER = 2.0 # 100% premium
|
42
|
+
|
43
|
+
# Tool pricing (per 1000 calls)
|
44
|
+
TOOL_PRICING = {
|
45
|
+
"web_search": 10.0, # $10 per 1000 searches
|
46
|
+
"web_fetch": 0.0, # No additional charge
|
47
|
+
"default": 0.0 # Most tools have no additional charge
|
48
|
+
}
|
49
|
+
|
50
|
+
@dataclass
|
51
|
+
class TokenUsageData:
|
52
|
+
"""Token usage data with detailed breakdown"""
|
53
|
+
input_tokens: int = 0
|
54
|
+
output_tokens: int = 0
|
55
|
+
cache_read_tokens: int = 0
|
56
|
+
cache_creation_tokens: int = 0
|
57
|
+
cache_type: str = "5min" # "5min" or "1hour"
|
58
|
+
total_tokens: int = 0
|
59
|
+
tool_calls: int = 0
|
60
|
+
model: str = "claude-3-5-sonnet"
|
61
|
+
|
62
|
+
def __post_init__(self):
|
63
|
+
"""Calculate total if not provided"""
|
64
|
+
if self.total_tokens == 0:
|
65
|
+
self.total_tokens = (self.input_tokens + self.output_tokens +
|
66
|
+
self.cache_read_tokens + self.cache_creation_tokens)
|
67
|
+
|
68
|
+
@dataclass
|
69
|
+
class CostBreakdown:
|
70
|
+
"""Detailed cost breakdown for transparency"""
|
71
|
+
input_cost: float = 0.0
|
72
|
+
output_cost: float = 0.0
|
73
|
+
cache_read_cost: float = 0.0
|
74
|
+
cache_creation_cost: float = 0.0
|
75
|
+
tool_cost: float = 0.0
|
76
|
+
total_cost: float = 0.0
|
77
|
+
model_used: str = ""
|
78
|
+
cache_type: str = ""
|
79
|
+
|
80
|
+
def __post_init__(self):
|
81
|
+
"""Calculate total cost"""
|
82
|
+
self.total_cost = (self.input_cost + self.output_cost +
|
83
|
+
self.cache_read_cost + self.cache_creation_cost + self.tool_cost)
|
84
|
+
|
85
|
+
class ClaudePricingEngine:
|
86
|
+
"""
|
87
|
+
Claude Code pricing compliance engine for accurate cost calculation.
|
88
|
+
|
89
|
+
Ensures compliance with official Claude pricing documentation and provides
|
90
|
+
detailed transparency for token usage costs.
|
91
|
+
"""
|
92
|
+
|
93
|
+
def __init__(self):
|
94
|
+
self.pricing_config = ClaudePricingConfig()
|
95
|
+
|
96
|
+
def detect_model_from_response(self, response_data: Dict[str, Any]) -> str:
|
97
|
+
"""
|
98
|
+
Detect Claude model from API response or usage data.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
response_data: API response or usage data containing model information
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
Model name string, defaults to claude-3-5-sonnet if not detected
|
105
|
+
"""
|
106
|
+
# Try multiple locations where model might be specified
|
107
|
+
model_locations = [
|
108
|
+
response_data.get('model'),
|
109
|
+
response_data.get('model_name'),
|
110
|
+
response_data.get('usage', {}).get('model'),
|
111
|
+
response_data.get('message', {}).get('model'),
|
112
|
+
response_data.get('metadata', {}).get('model')
|
113
|
+
]
|
114
|
+
|
115
|
+
for model in model_locations:
|
116
|
+
if model and isinstance(model, str):
|
117
|
+
# Normalize model name
|
118
|
+
normalized = self._normalize_model_name(model)
|
119
|
+
if normalized in self.pricing_config.MODEL_PRICING:
|
120
|
+
return normalized
|
121
|
+
|
122
|
+
# Default fallback
|
123
|
+
logger.debug("Model not detected in response, defaulting to claude-3-5-sonnet")
|
124
|
+
return "claude-3-5-sonnet"
|
125
|
+
|
126
|
+
def _normalize_model_name(self, model_name: str) -> str:
|
127
|
+
"""Normalize model name to match pricing config keys"""
|
128
|
+
model_name = model_name.lower().strip()
|
129
|
+
|
130
|
+
# Handle various model name formats
|
131
|
+
if "opus" in model_name:
|
132
|
+
if "4.1" in model_name:
|
133
|
+
return "claude-opus-4.1"
|
134
|
+
elif "4" in model_name:
|
135
|
+
return "claude-opus-4"
|
136
|
+
elif "sonnet" in model_name:
|
137
|
+
if "4" in model_name:
|
138
|
+
return "claude-sonnet-4"
|
139
|
+
elif "3.7" in model_name:
|
140
|
+
return "claude-sonnet-3.7"
|
141
|
+
elif "3.5" in model_name or "3-5" in model_name:
|
142
|
+
return "claude-3-5-sonnet"
|
143
|
+
elif "haiku" in model_name:
|
144
|
+
if "3.5" in model_name:
|
145
|
+
return "claude-haiku-3.5"
|
146
|
+
|
147
|
+
return model_name
|
148
|
+
|
149
|
+
def detect_cache_type(self, response_data: Dict[str, Any]) -> str:
|
150
|
+
"""
|
151
|
+
Detect cache type (5min vs 1hour) from response data.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
response_data: API response data
|
155
|
+
|
156
|
+
Returns:
|
157
|
+
"5min" or "1hour", defaults to "5min"
|
158
|
+
"""
|
159
|
+
# Look for cache type indicators in response
|
160
|
+
cache_indicators = [
|
161
|
+
response_data.get('cache_type'),
|
162
|
+
response_data.get('usage', {}).get('cache_type'),
|
163
|
+
response_data.get('metadata', {}).get('cache_type')
|
164
|
+
]
|
165
|
+
|
166
|
+
for indicator in cache_indicators:
|
167
|
+
if indicator:
|
168
|
+
if "1hour" in str(indicator).lower() or "60min" in str(indicator).lower():
|
169
|
+
return "1hour"
|
170
|
+
elif "5min" in str(indicator).lower():
|
171
|
+
return "5min"
|
172
|
+
|
173
|
+
# Default to 5min cache
|
174
|
+
return "5min"
|
175
|
+
|
176
|
+
def calculate_cost(self, usage_data: TokenUsageData,
|
177
|
+
authoritative_cost: Optional[float] = None,
|
178
|
+
tool_tokens: Optional[Dict[str, int]] = None) -> CostBreakdown:
|
179
|
+
"""
|
180
|
+
Calculate detailed cost breakdown with Claude pricing compliance.
|
181
|
+
|
182
|
+
Args:
|
183
|
+
usage_data: Token usage information
|
184
|
+
authoritative_cost: SDK-provided cost (preferred when available)
|
185
|
+
tool_tokens: Dictionary of tool names to token counts for tool cost calculation
|
186
|
+
|
187
|
+
Returns:
|
188
|
+
Detailed cost breakdown for transparency
|
189
|
+
"""
|
190
|
+
# Use authoritative cost if provided (most accurate)
|
191
|
+
if authoritative_cost is not None:
|
192
|
+
breakdown = CostBreakdown(
|
193
|
+
model_used=usage_data.model,
|
194
|
+
cache_type=usage_data.cache_type
|
195
|
+
)
|
196
|
+
breakdown.total_cost = authoritative_cost
|
197
|
+
return breakdown
|
198
|
+
|
199
|
+
# Get model pricing
|
200
|
+
model_pricing = self.pricing_config.MODEL_PRICING.get(
|
201
|
+
usage_data.model,
|
202
|
+
self.pricing_config.MODEL_PRICING["claude-3-5-sonnet"]
|
203
|
+
)
|
204
|
+
|
205
|
+
# Calculate base costs
|
206
|
+
input_cost = (usage_data.input_tokens / 1_000_000) * model_pricing["input"]
|
207
|
+
output_cost = (usage_data.output_tokens / 1_000_000) * model_pricing["output"]
|
208
|
+
|
209
|
+
# Calculate cache costs with correct multipliers
|
210
|
+
cache_read_cost = (usage_data.cache_read_tokens / 1_000_000) * \
|
211
|
+
(model_pricing["input"] * self.pricing_config.CACHE_READ_MULTIPLIER)
|
212
|
+
|
213
|
+
# Cache creation cost depends on cache type
|
214
|
+
cache_multiplier = (self.pricing_config.CACHE_1HOUR_WRITE_MULTIPLIER
|
215
|
+
if usage_data.cache_type == "1hour"
|
216
|
+
else self.pricing_config.CACHE_5MIN_WRITE_MULTIPLIER)
|
217
|
+
|
218
|
+
cache_creation_cost = (usage_data.cache_creation_tokens / 1_000_000) * \
|
219
|
+
(model_pricing["input"] * cache_multiplier)
|
220
|
+
|
221
|
+
# Calculate tool costs based on token usage
|
222
|
+
tool_cost = 0.0
|
223
|
+
if tool_tokens:
|
224
|
+
for tool_name, tokens in tool_tokens.items():
|
225
|
+
# Tool tokens are charged at the same rate as input tokens for the model
|
226
|
+
tool_cost += (tokens / 1_000_000) * model_pricing["input"]
|
227
|
+
|
228
|
+
return CostBreakdown(
|
229
|
+
input_cost=input_cost,
|
230
|
+
output_cost=output_cost,
|
231
|
+
cache_read_cost=cache_read_cost,
|
232
|
+
cache_creation_cost=cache_creation_cost,
|
233
|
+
tool_cost=tool_cost,
|
234
|
+
model_used=usage_data.model,
|
235
|
+
cache_type=usage_data.cache_type
|
236
|
+
)
|
237
|
+
|
238
|
+
def parse_claude_response(self, response_line: str) -> Optional[TokenUsageData]:
|
239
|
+
"""
|
240
|
+
Parse token usage from Claude Code response line with model detection.
|
241
|
+
|
242
|
+
Args:
|
243
|
+
response_line: Single line from Claude Code output
|
244
|
+
|
245
|
+
Returns:
|
246
|
+
TokenUsageData if parsing successful, None otherwise
|
247
|
+
"""
|
248
|
+
line = response_line.strip()
|
249
|
+
if not line.startswith('{'):
|
250
|
+
return None
|
251
|
+
|
252
|
+
try:
|
253
|
+
json_data = json.loads(line)
|
254
|
+
|
255
|
+
# Detect model and cache type
|
256
|
+
model = self.detect_model_from_response(json_data)
|
257
|
+
cache_type = self.detect_cache_type(json_data)
|
258
|
+
|
259
|
+
# Extract usage data
|
260
|
+
usage_data = None
|
261
|
+
if 'usage' in json_data:
|
262
|
+
usage_data = json_data['usage']
|
263
|
+
elif 'message' in json_data and isinstance(json_data['message'], dict):
|
264
|
+
usage_data = json_data['message'].get('usage')
|
265
|
+
|
266
|
+
if usage_data and isinstance(usage_data, dict):
|
267
|
+
return TokenUsageData(
|
268
|
+
input_tokens=int(usage_data.get('input_tokens', 0)),
|
269
|
+
output_tokens=int(usage_data.get('output_tokens', 0)),
|
270
|
+
cache_read_tokens=int(usage_data.get('cache_read_input_tokens', 0)),
|
271
|
+
cache_creation_tokens=int(usage_data.get('cache_creation_input_tokens', 0)),
|
272
|
+
total_tokens=int(usage_data.get('total_tokens', 0)),
|
273
|
+
model=model,
|
274
|
+
cache_type=cache_type
|
275
|
+
)
|
276
|
+
|
277
|
+
except (json.JSONDecodeError, ValueError, KeyError) as e:
|
278
|
+
logger.debug(f"Failed to parse Claude response: {e}")
|
279
|
+
|
280
|
+
return None
|
281
|
+
|
282
|
+
def get_transparency_report(self, usage_data: TokenUsageData,
|
283
|
+
cost_breakdown: CostBreakdown,
|
284
|
+
tool_tokens: Optional[Dict[str, int]] = None) -> Dict[str, Any]:
|
285
|
+
"""
|
286
|
+
Generate transparency report for token usage and costs.
|
287
|
+
|
288
|
+
Args:
|
289
|
+
usage_data: Token usage information
|
290
|
+
cost_breakdown: Detailed cost breakdown
|
291
|
+
tool_tokens: Tool-specific token usage
|
292
|
+
|
293
|
+
Returns:
|
294
|
+
Comprehensive transparency report
|
295
|
+
"""
|
296
|
+
return {
|
297
|
+
"model_used": usage_data.model,
|
298
|
+
"cache_type": usage_data.cache_type,
|
299
|
+
"token_breakdown": {
|
300
|
+
"input_tokens": usage_data.input_tokens,
|
301
|
+
"output_tokens": usage_data.output_tokens,
|
302
|
+
"cache_read_tokens": usage_data.cache_read_tokens,
|
303
|
+
"cache_creation_tokens": usage_data.cache_creation_tokens,
|
304
|
+
"total_tokens": usage_data.total_tokens,
|
305
|
+
"tool_tokens": tool_tokens or {}
|
306
|
+
},
|
307
|
+
"cost_breakdown": {
|
308
|
+
"input_cost_usd": round(cost_breakdown.input_cost, 6),
|
309
|
+
"output_cost_usd": round(cost_breakdown.output_cost, 6),
|
310
|
+
"cache_read_cost_usd": round(cost_breakdown.cache_read_cost, 6),
|
311
|
+
"cache_creation_cost_usd": round(cost_breakdown.cache_creation_cost, 6),
|
312
|
+
"tool_cost_usd": round(cost_breakdown.tool_cost, 6),
|
313
|
+
"total_cost_usd": round(cost_breakdown.total_cost, 6)
|
314
|
+
},
|
315
|
+
"pricing_rates": {
|
316
|
+
"model_rates": self.pricing_config.MODEL_PRICING[usage_data.model],
|
317
|
+
"cache_read_multiplier": self.pricing_config.CACHE_READ_MULTIPLIER,
|
318
|
+
"cache_write_multiplier": (self.pricing_config.CACHE_1HOUR_WRITE_MULTIPLIER
|
319
|
+
if usage_data.cache_type == "1hour"
|
320
|
+
else self.pricing_config.CACHE_5MIN_WRITE_MULTIPLIER)
|
321
|
+
},
|
322
|
+
"compliance_info": {
|
323
|
+
"pricing_source": "https://docs.claude.com/en/docs/about-claude/pricing",
|
324
|
+
"last_updated": "2024-2025",
|
325
|
+
"model_detected": usage_data.model != "claude-3-5-sonnet"
|
326
|
+
}
|
327
327
|
}
|