dtSpark 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtSpark/__init__.py +0 -0
- dtSpark/_description.txt +1 -0
- dtSpark/_full_name.txt +1 -0
- dtSpark/_licence.txt +21 -0
- dtSpark/_metadata.yaml +6 -0
- dtSpark/_name.txt +1 -0
- dtSpark/_version.txt +1 -0
- dtSpark/aws/__init__.py +7 -0
- dtSpark/aws/authentication.py +296 -0
- dtSpark/aws/bedrock.py +578 -0
- dtSpark/aws/costs.py +318 -0
- dtSpark/aws/pricing.py +580 -0
- dtSpark/cli_interface.py +2645 -0
- dtSpark/conversation_manager.py +3050 -0
- dtSpark/core/__init__.py +12 -0
- dtSpark/core/application.py +3355 -0
- dtSpark/core/context_compaction.py +735 -0
- dtSpark/daemon/__init__.py +104 -0
- dtSpark/daemon/__main__.py +10 -0
- dtSpark/daemon/action_monitor.py +213 -0
- dtSpark/daemon/daemon_app.py +730 -0
- dtSpark/daemon/daemon_manager.py +289 -0
- dtSpark/daemon/execution_coordinator.py +194 -0
- dtSpark/daemon/pid_file.py +169 -0
- dtSpark/database/__init__.py +482 -0
- dtSpark/database/autonomous_actions.py +1191 -0
- dtSpark/database/backends.py +329 -0
- dtSpark/database/connection.py +122 -0
- dtSpark/database/conversations.py +520 -0
- dtSpark/database/credential_prompt.py +218 -0
- dtSpark/database/files.py +205 -0
- dtSpark/database/mcp_ops.py +355 -0
- dtSpark/database/messages.py +161 -0
- dtSpark/database/schema.py +673 -0
- dtSpark/database/tool_permissions.py +186 -0
- dtSpark/database/usage.py +167 -0
- dtSpark/files/__init__.py +4 -0
- dtSpark/files/manager.py +322 -0
- dtSpark/launch.py +39 -0
- dtSpark/limits/__init__.py +10 -0
- dtSpark/limits/costs.py +296 -0
- dtSpark/limits/tokens.py +342 -0
- dtSpark/llm/__init__.py +17 -0
- dtSpark/llm/anthropic_direct.py +446 -0
- dtSpark/llm/base.py +146 -0
- dtSpark/llm/context_limits.py +438 -0
- dtSpark/llm/manager.py +177 -0
- dtSpark/llm/ollama.py +578 -0
- dtSpark/mcp_integration/__init__.py +5 -0
- dtSpark/mcp_integration/manager.py +653 -0
- dtSpark/mcp_integration/tool_selector.py +225 -0
- dtSpark/resources/config.yaml.template +631 -0
- dtSpark/safety/__init__.py +22 -0
- dtSpark/safety/llm_service.py +111 -0
- dtSpark/safety/patterns.py +229 -0
- dtSpark/safety/prompt_inspector.py +442 -0
- dtSpark/safety/violation_logger.py +346 -0
- dtSpark/scheduler/__init__.py +20 -0
- dtSpark/scheduler/creation_tools.py +599 -0
- dtSpark/scheduler/execution_queue.py +159 -0
- dtSpark/scheduler/executor.py +1152 -0
- dtSpark/scheduler/manager.py +395 -0
- dtSpark/tools/__init__.py +4 -0
- dtSpark/tools/builtin.py +833 -0
- dtSpark/web/__init__.py +20 -0
- dtSpark/web/auth.py +152 -0
- dtSpark/web/dependencies.py +37 -0
- dtSpark/web/endpoints/__init__.py +17 -0
- dtSpark/web/endpoints/autonomous_actions.py +1125 -0
- dtSpark/web/endpoints/chat.py +621 -0
- dtSpark/web/endpoints/conversations.py +353 -0
- dtSpark/web/endpoints/main_menu.py +547 -0
- dtSpark/web/endpoints/streaming.py +421 -0
- dtSpark/web/server.py +578 -0
- dtSpark/web/session.py +167 -0
- dtSpark/web/ssl_utils.py +195 -0
- dtSpark/web/static/css/dark-theme.css +427 -0
- dtSpark/web/static/js/actions.js +1101 -0
- dtSpark/web/static/js/chat.js +614 -0
- dtSpark/web/static/js/main.js +496 -0
- dtSpark/web/static/js/sse-client.js +242 -0
- dtSpark/web/templates/actions.html +408 -0
- dtSpark/web/templates/base.html +93 -0
- dtSpark/web/templates/chat.html +814 -0
- dtSpark/web/templates/conversations.html +350 -0
- dtSpark/web/templates/goodbye.html +81 -0
- dtSpark/web/templates/login.html +90 -0
- dtSpark/web/templates/main_menu.html +983 -0
- dtSpark/web/templates/new_conversation.html +191 -0
- dtSpark/web/web_interface.py +137 -0
- dtspark-1.0.4.dist-info/METADATA +187 -0
- dtspark-1.0.4.dist-info/RECORD +96 -0
- dtspark-1.0.4.dist-info/WHEEL +5 -0
- dtspark-1.0.4.dist-info/entry_points.txt +3 -0
- dtspark-1.0.4.dist-info/licenses/LICENSE +21 -0
- dtspark-1.0.4.dist-info/top_level.txt +1 -0
dtSpark/aws/pricing.py
ADDED
|
@@ -0,0 +1,580 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AWS Bedrock pricing module.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for:
|
|
5
|
+
- Fetching Bedrock pricing from AWS Price List API
|
|
6
|
+
- Caching pricing data locally
|
|
7
|
+
- Calculating costs based on model, region, and token usage
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import requests
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from datetime import datetime, timedelta
|
|
15
|
+
from typing import Dict, Optional, Tuple
|
|
16
|
+
from botocore.exceptions import ClientError
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BedrockPricing:
|
|
20
|
+
"""Manages AWS Bedrock pricing data and cost calculations."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, pricing_client, data_path: Path):
|
|
23
|
+
"""
|
|
24
|
+
Initialise the pricing manager.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
pricing_client: Boto3 Pricing API client
|
|
28
|
+
data_path: Path to store cached pricing data
|
|
29
|
+
"""
|
|
30
|
+
self.pricing_client = pricing_client
|
|
31
|
+
self.data_path = Path(data_path)
|
|
32
|
+
self.pricing_file = self.data_path / "bedrock_pricing.json"
|
|
33
|
+
self.pricing_data = {}
|
|
34
|
+
self.last_updated = None
|
|
35
|
+
|
|
36
|
+
# Ensure data directory exists
|
|
37
|
+
self.data_path.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
|
|
39
|
+
def load_pricing_data(self, force_refresh: bool = False) -> bool:
|
|
40
|
+
"""
|
|
41
|
+
Load pricing data from cache or fetch from AWS.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
force_refresh: If True, fetch fresh data from AWS
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
True if pricing data loaded successfully
|
|
48
|
+
"""
|
|
49
|
+
# Try to load from cache first
|
|
50
|
+
if not force_refresh and self.pricing_file.exists():
|
|
51
|
+
try:
|
|
52
|
+
with open(self.pricing_file, 'r') as f:
|
|
53
|
+
cached_data = json.load(f)
|
|
54
|
+
|
|
55
|
+
# Convert string keys back to tuples
|
|
56
|
+
cached_pricing = cached_data.get('pricing', {})
|
|
57
|
+
self.pricing_data = {}
|
|
58
|
+
for key_str, prices in cached_pricing.items():
|
|
59
|
+
if '|' in key_str:
|
|
60
|
+
model_id, region = key_str.split('|', 1)
|
|
61
|
+
self.pricing_data[(model_id, region)] = prices
|
|
62
|
+
|
|
63
|
+
self.last_updated = datetime.fromisoformat(cached_data.get('last_updated'))
|
|
64
|
+
|
|
65
|
+
# Check if cache is still valid (less than 7 days old)
|
|
66
|
+
if datetime.now() - self.last_updated < timedelta(days=7):
|
|
67
|
+
logging.info(f"Loaded pricing data from cache (updated {self.last_updated})")
|
|
68
|
+
return True
|
|
69
|
+
else:
|
|
70
|
+
logging.info("Cached pricing data is stale, fetching fresh data")
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logging.warning(f"Failed to load cached pricing data: {e}")
|
|
73
|
+
|
|
74
|
+
# Fetch fresh pricing data from AWS
|
|
75
|
+
return self._fetch_pricing_from_aws()
|
|
76
|
+
|
|
77
|
+
def _fetch_pricing_from_aws(self) -> bool:
|
|
78
|
+
"""
|
|
79
|
+
Fetch Bedrock pricing from AWS Bulk Price List API.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
True if successful
|
|
83
|
+
"""
|
|
84
|
+
# Try Bulk API first (more reliable, no permissions needed)
|
|
85
|
+
if self._fetch_from_bulk_api():
|
|
86
|
+
return True
|
|
87
|
+
|
|
88
|
+
# Fall back to Pricing API
|
|
89
|
+
if self._fetch_from_pricing_api():
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
# Fall back to hardcoded pricing
|
|
93
|
+
logging.warning("All pricing fetch methods failed, using fallback pricing")
|
|
94
|
+
self._use_fallback_pricing()
|
|
95
|
+
return True
|
|
96
|
+
|
|
97
|
+
def _fetch_from_bulk_api(self) -> bool:
|
|
98
|
+
"""
|
|
99
|
+
Fetch pricing from AWS Bulk Price List API (no credentials needed).
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
True if successful
|
|
103
|
+
"""
|
|
104
|
+
try:
|
|
105
|
+
logging.info("Fetching Bedrock pricing from AWS Bulk Price List...")
|
|
106
|
+
|
|
107
|
+
# Fetch from the public Bulk API URL
|
|
108
|
+
url = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonBedrockService/current/index.json"
|
|
109
|
+
|
|
110
|
+
response = requests.get(url, timeout=30)
|
|
111
|
+
response.raise_for_status()
|
|
112
|
+
|
|
113
|
+
pricing_json = response.json()
|
|
114
|
+
logging.info("Successfully downloaded Bedrock pricing from Bulk API")
|
|
115
|
+
|
|
116
|
+
# Parse the bulk pricing format
|
|
117
|
+
self.pricing_data = self._parse_bulk_pricing(pricing_json)
|
|
118
|
+
|
|
119
|
+
if not self.pricing_data:
|
|
120
|
+
logging.warning("No usable pricing data found in Bulk API response")
|
|
121
|
+
return False
|
|
122
|
+
|
|
123
|
+
# Cache the data
|
|
124
|
+
self.last_updated = datetime.now()
|
|
125
|
+
|
|
126
|
+
# Convert tuple keys to strings for JSON serialization
|
|
127
|
+
pricing_for_cache = {}
|
|
128
|
+
for (model_id, region), prices in self.pricing_data.items():
|
|
129
|
+
key_str = f"{model_id}|{region}"
|
|
130
|
+
pricing_for_cache[key_str] = prices
|
|
131
|
+
|
|
132
|
+
cache_data = {
|
|
133
|
+
'pricing': pricing_for_cache,
|
|
134
|
+
'last_updated': self.last_updated.isoformat()
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
with open(self.pricing_file, 'w') as f:
|
|
138
|
+
json.dump(cache_data, f, indent=2)
|
|
139
|
+
|
|
140
|
+
logging.info(f"Successfully cached pricing for {len(self.pricing_data)} model/region combinations")
|
|
141
|
+
return True
|
|
142
|
+
|
|
143
|
+
except requests.RequestException as e:
|
|
144
|
+
logging.warning(f"Failed to fetch from Bulk API: {e}")
|
|
145
|
+
return False
|
|
146
|
+
except Exception as e:
|
|
147
|
+
logging.error(f"Unexpected error fetching from Bulk API: {e}")
|
|
148
|
+
return False
|
|
149
|
+
|
|
150
|
+
def _fetch_from_pricing_api(self) -> bool:
|
|
151
|
+
"""
|
|
152
|
+
Fetch Bedrock pricing from AWS Pricing API (requires credentials).
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
True if successful
|
|
156
|
+
"""
|
|
157
|
+
try:
|
|
158
|
+
logging.info("Fetching Bedrock pricing from AWS Pricing API...")
|
|
159
|
+
|
|
160
|
+
# The Pricing API is only available in us-east-1 and ap-south-1
|
|
161
|
+
# Query for Amazon Bedrock Foundation Models pricing
|
|
162
|
+
# Try multiple service codes as AWS uses different names
|
|
163
|
+
service_codes = ['AmazonBedrockFoundationModels', 'AmazonBedrockService', 'AmazonBedrock']
|
|
164
|
+
|
|
165
|
+
all_price_lists = []
|
|
166
|
+
successful_service_code = None
|
|
167
|
+
|
|
168
|
+
for service_code in service_codes:
|
|
169
|
+
try:
|
|
170
|
+
response = self.pricing_client.get_products(
|
|
171
|
+
ServiceCode=service_code,
|
|
172
|
+
FormatVersion='aws_v1',
|
|
173
|
+
MaxResults=100
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
price_list = response.get('PriceList', [])
|
|
177
|
+
if price_list:
|
|
178
|
+
all_price_lists.extend(price_list)
|
|
179
|
+
successful_service_code = service_code
|
|
180
|
+
logging.info(f"Found {len(price_list)} products with service code: {service_code}")
|
|
181
|
+
|
|
182
|
+
# Continue fetching if there are more results
|
|
183
|
+
while 'NextToken' in response:
|
|
184
|
+
response = self.pricing_client.get_products(
|
|
185
|
+
ServiceCode=service_code,
|
|
186
|
+
FormatVersion='aws_v1',
|
|
187
|
+
MaxResults=100,
|
|
188
|
+
NextToken=response['NextToken']
|
|
189
|
+
)
|
|
190
|
+
additional_list = response.get('PriceList', [])
|
|
191
|
+
all_price_lists.extend(additional_list)
|
|
192
|
+
logging.info(f"Fetched additional {len(additional_list)} products")
|
|
193
|
+
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logging.debug(f"Service code {service_code} failed: {e}")
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
if not all_price_lists:
|
|
199
|
+
logging.warning("No pricing data found from Pricing API")
|
|
200
|
+
return False
|
|
201
|
+
|
|
202
|
+
# Parse all collected pricing data
|
|
203
|
+
self.pricing_data = {}
|
|
204
|
+
for price_item in all_price_lists:
|
|
205
|
+
parsed_item = self._parse_price_item(price_item)
|
|
206
|
+
if parsed_item:
|
|
207
|
+
for key, value in parsed_item.items():
|
|
208
|
+
if key not in self.pricing_data:
|
|
209
|
+
self.pricing_data[key] = value
|
|
210
|
+
|
|
211
|
+
# Cache the data
|
|
212
|
+
self.last_updated = datetime.now()
|
|
213
|
+
|
|
214
|
+
# Convert tuple keys to strings for JSON serialization
|
|
215
|
+
pricing_for_cache = {}
|
|
216
|
+
for (model_id, region), prices in self.pricing_data.items():
|
|
217
|
+
key_str = f"{model_id}|{region}"
|
|
218
|
+
pricing_for_cache[key_str] = prices
|
|
219
|
+
|
|
220
|
+
cache_data = {
|
|
221
|
+
'pricing': pricing_for_cache,
|
|
222
|
+
'last_updated': self.last_updated.isoformat()
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
with open(self.pricing_file, 'w') as f:
|
|
226
|
+
json.dump(cache_data, f, indent=2)
|
|
227
|
+
|
|
228
|
+
logging.info(f"Successfully fetched and cached pricing for {len(self.pricing_data)} model/region combinations")
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
except ClientError as e:
|
|
232
|
+
error_code = e.response.get('Error', {}).get('Code', '')
|
|
233
|
+
if error_code in ['AccessDeniedException', 'UnauthorizedException']:
|
|
234
|
+
logging.warning("No permissions for Pricing API, using fallback pricing")
|
|
235
|
+
self._use_fallback_pricing()
|
|
236
|
+
return True
|
|
237
|
+
else:
|
|
238
|
+
logging.error(f"Error fetching pricing data: {e}")
|
|
239
|
+
self._use_fallback_pricing()
|
|
240
|
+
return True
|
|
241
|
+
except Exception as e:
|
|
242
|
+
logging.error(f"Unexpected error fetching pricing: {e}")
|
|
243
|
+
self._use_fallback_pricing()
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
def _parse_bulk_pricing(self, pricing_json: Dict) -> Dict:
|
|
247
|
+
"""
|
|
248
|
+
Parse AWS Bulk Price List format into usable pricing data.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
pricing_json: Complete pricing JSON from Bulk API
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Dictionary mapping (model_id, region) to pricing info
|
|
255
|
+
"""
|
|
256
|
+
parsed_data = {}
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
products = pricing_json.get('products', {})
|
|
260
|
+
terms = pricing_json.get('terms', {}).get('OnDemand', {})
|
|
261
|
+
|
|
262
|
+
# First pass: build a map of SKU to product attributes
|
|
263
|
+
sku_to_product = {}
|
|
264
|
+
for sku, product in products.items():
|
|
265
|
+
attributes = product.get('attributes', {})
|
|
266
|
+
|
|
267
|
+
# Get model name and region
|
|
268
|
+
model_name = attributes.get('model', '')
|
|
269
|
+
region_code = attributes.get('regionCode', '')
|
|
270
|
+
inference_type = attributes.get('inferenceType', '')
|
|
271
|
+
|
|
272
|
+
# Filter for token-based pricing (ignore video generation, etc.)
|
|
273
|
+
if not model_name or not region_code:
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
# Only process input/output tokens (not cache, batch, etc.)
|
|
277
|
+
if 'token' not in inference_type.lower():
|
|
278
|
+
continue
|
|
279
|
+
|
|
280
|
+
# Skip special types for now (cache, batch, long context)
|
|
281
|
+
if any(x in inference_type.lower() for x in ['cache', 'batch', 'long context']):
|
|
282
|
+
continue
|
|
283
|
+
|
|
284
|
+
sku_to_product[sku] = {
|
|
285
|
+
'model': model_name,
|
|
286
|
+
'region': region_code,
|
|
287
|
+
'inference_type': inference_type
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
# Second pass: extract pricing from terms
|
|
291
|
+
for sku, term_data in terms.items():
|
|
292
|
+
if sku not in sku_to_product:
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
product_info = sku_to_product[sku]
|
|
296
|
+
model_name = product_info['model']
|
|
297
|
+
region = product_info['region']
|
|
298
|
+
inference_type = product_info['inference_type']
|
|
299
|
+
|
|
300
|
+
# Map model name to Bedrock model ID
|
|
301
|
+
# The Bulk API uses friendly names like "Claude Sonnet 4"
|
|
302
|
+
# We need to map these to the actual model IDs
|
|
303
|
+
model_id = self._map_model_name_to_id(model_name, region)
|
|
304
|
+
if not model_id:
|
|
305
|
+
continue # Skip unknown models
|
|
306
|
+
|
|
307
|
+
# Extract price dimensions
|
|
308
|
+
for term_key, term_value in term_data.items():
|
|
309
|
+
price_dimensions = term_value.get('priceDimensions', {})
|
|
310
|
+
|
|
311
|
+
for dim_key, dim_value in price_dimensions.items():
|
|
312
|
+
price_per_unit = float(dim_value.get('pricePerUnit', {}).get('USD', 0))
|
|
313
|
+
|
|
314
|
+
# Pricing is typically per token, but could be per 1000 tokens
|
|
315
|
+
unit = dim_value.get('unit', '').lower()
|
|
316
|
+
if 'token' in unit:
|
|
317
|
+
# Already per token, convert to per 1000 tokens
|
|
318
|
+
price_per_unit = price_per_unit * 1000
|
|
319
|
+
|
|
320
|
+
# Create key for this model/region
|
|
321
|
+
key = (model_id, region)
|
|
322
|
+
|
|
323
|
+
if key not in parsed_data:
|
|
324
|
+
parsed_data[key] = {'input': 0, 'output': 0}
|
|
325
|
+
|
|
326
|
+
# Classify as input or output based on inference_type
|
|
327
|
+
if 'input' in inference_type.lower():
|
|
328
|
+
parsed_data[key]['input'] = price_per_unit
|
|
329
|
+
elif 'output' in inference_type.lower():
|
|
330
|
+
parsed_data[key]['output'] = price_per_unit
|
|
331
|
+
|
|
332
|
+
logging.info(f"Parsed {len(parsed_data)} model/region pricing combinations from Bulk API")
|
|
333
|
+
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logging.error(f"Error parsing bulk pricing: {e}")
|
|
336
|
+
import traceback
|
|
337
|
+
traceback.print_exc()
|
|
338
|
+
|
|
339
|
+
return parsed_data
|
|
340
|
+
|
|
341
|
+
def _map_model_name_to_id(self, model_name: str, region: str) -> Optional[str]:
|
|
342
|
+
"""
|
|
343
|
+
Map AWS Bulk API model names to Bedrock model IDs.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
model_name: Friendly model name from Bulk API (e.g., "Claude Sonnet 4")
|
|
347
|
+
region: AWS region
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
Bedrock model ID or None if unknown
|
|
351
|
+
"""
|
|
352
|
+
# Normalize the model name
|
|
353
|
+
model_lower = model_name.lower()
|
|
354
|
+
|
|
355
|
+
# Claude models
|
|
356
|
+
if 'claude' in model_lower:
|
|
357
|
+
if 'sonnet 4.5' in model_lower or 'sonnet-4.5' in model_lower:
|
|
358
|
+
return 'anthropic.claude-sonnet-4.5-v1:0'
|
|
359
|
+
elif 'sonnet 4' in model_lower or 'sonnet-4' in model_lower:
|
|
360
|
+
return 'anthropic.claude-sonnet-4-0-v1:0'
|
|
361
|
+
elif '3.5 sonnet v2' in model_lower or '3-5-sonnet-v2' in model_lower:
|
|
362
|
+
return 'anthropic.claude-3-5-sonnet-20241022-v2:0'
|
|
363
|
+
elif '3.5 sonnet' in model_lower or '3-5-sonnet' in model_lower:
|
|
364
|
+
return 'anthropic.claude-3-5-sonnet-20240620-v1:0'
|
|
365
|
+
elif '3 opus' in model_lower or '3-opus' in model_lower:
|
|
366
|
+
return 'anthropic.claude-3-opus-20240229-v1:0'
|
|
367
|
+
elif '3 sonnet' in model_lower or '3-sonnet' in model_lower:
|
|
368
|
+
return 'anthropic.claude-3-sonnet-20240229-v1:0'
|
|
369
|
+
elif '3 haiku' in model_lower or '3-haiku' in model_lower:
|
|
370
|
+
return 'anthropic.claude-3-haiku-20240307-v1:0'
|
|
371
|
+
|
|
372
|
+
# Amazon Titan models
|
|
373
|
+
elif 'titan' in model_lower:
|
|
374
|
+
if 'text express' in model_lower:
|
|
375
|
+
return 'amazon.titan-text-express-v1'
|
|
376
|
+
elif 'text lite' in model_lower:
|
|
377
|
+
return 'amazon.titan-text-lite-v1'
|
|
378
|
+
|
|
379
|
+
# Meta Llama models
|
|
380
|
+
elif 'llama' in model_lower:
|
|
381
|
+
if 'llama 3' in model_lower:
|
|
382
|
+
if '70b' in model_lower:
|
|
383
|
+
return 'meta.llama3-70b-instruct-v1:0'
|
|
384
|
+
elif '8b' in model_lower:
|
|
385
|
+
return 'meta.llama3-8b-instruct-v1:0'
|
|
386
|
+
|
|
387
|
+
# Cohere models
|
|
388
|
+
elif 'cohere' in model_lower or 'command' in model_lower:
|
|
389
|
+
if 'command r+' in model_lower:
|
|
390
|
+
return 'cohere.command-r-plus-v1:0'
|
|
391
|
+
elif 'command r' in model_lower:
|
|
392
|
+
return 'cohere.command-r-v1:0'
|
|
393
|
+
|
|
394
|
+
# Unknown model
|
|
395
|
+
logging.debug(f"Unknown model name: {model_name}")
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
def _parse_price_item(self, price_item: str) -> Dict:
|
|
399
|
+
"""
|
|
400
|
+
Parse a single price item from AWS Pricing API.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
price_item: JSON string from PriceList
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
Dictionary mapping (model_id, region) to pricing info
|
|
407
|
+
"""
|
|
408
|
+
parsed_data = {}
|
|
409
|
+
|
|
410
|
+
try:
|
|
411
|
+
# Parse the JSON string
|
|
412
|
+
product = json.loads(price_item)
|
|
413
|
+
|
|
414
|
+
# Extract product attributes
|
|
415
|
+
attributes = product.get('product', {}).get('attributes', {})
|
|
416
|
+
region_code = attributes.get('regionCode')
|
|
417
|
+
usage_type = attributes.get('usageType', '')
|
|
418
|
+
|
|
419
|
+
# Extract pricing terms
|
|
420
|
+
on_demand = product.get('terms', {}).get('OnDemand', {})
|
|
421
|
+
if not on_demand:
|
|
422
|
+
return parsed_data
|
|
423
|
+
|
|
424
|
+
# Get first price dimension
|
|
425
|
+
for term_key, term_value in on_demand.items():
|
|
426
|
+
price_dimensions = term_value.get('priceDimensions', {})
|
|
427
|
+
for dim_key, dim_value in price_dimensions.items():
|
|
428
|
+
unit = dim_value.get('unit', '')
|
|
429
|
+
price_per_unit = float(dim_value.get('pricePerUnit', {}).get('USD', 0))
|
|
430
|
+
|
|
431
|
+
# Determine if this is input or output pricing
|
|
432
|
+
description = dim_value.get('description', '').lower()
|
|
433
|
+
|
|
434
|
+
# Try to extract model name from usage type
|
|
435
|
+
model_name = self._extract_model_from_usage_type(usage_type)
|
|
436
|
+
|
|
437
|
+
if model_name and region_code:
|
|
438
|
+
key = (model_name, region_code)
|
|
439
|
+
|
|
440
|
+
if key not in parsed_data:
|
|
441
|
+
parsed_data[key] = {'input': 0, 'output': 0}
|
|
442
|
+
|
|
443
|
+
# Classify as input or output based on description
|
|
444
|
+
if 'input' in description or 'request' in description:
|
|
445
|
+
parsed_data[key]['input'] = price_per_unit
|
|
446
|
+
elif 'output' in description or 'response' in description:
|
|
447
|
+
parsed_data[key]['output'] = price_per_unit
|
|
448
|
+
|
|
449
|
+
except Exception as e:
|
|
450
|
+
logging.debug(f"Error parsing price item: {e}")
|
|
451
|
+
|
|
452
|
+
return parsed_data
|
|
453
|
+
|
|
454
|
+
def _extract_model_from_usage_type(self, usage_type: str) -> Optional[str]:
|
|
455
|
+
"""
|
|
456
|
+
Extract model identifier from usage type string.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
usage_type: AWS usage type string
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
Model identifier or None
|
|
463
|
+
"""
|
|
464
|
+
# Usage types typically look like: "APS2-ModelInference-Claude-3-5-Sonnet-v2"
|
|
465
|
+
if 'Claude' in usage_type:
|
|
466
|
+
if 'Claude-3-5-Sonnet-v2' in usage_type:
|
|
467
|
+
return 'claude-3-5-sonnet-20241022'
|
|
468
|
+
elif 'Claude-3-5-Sonnet' in usage_type:
|
|
469
|
+
return 'claude-3-5-sonnet-20240620'
|
|
470
|
+
elif 'Claude-3-Opus' in usage_type:
|
|
471
|
+
return 'claude-3-opus-20240229'
|
|
472
|
+
elif 'Claude-3-Sonnet' in usage_type:
|
|
473
|
+
return 'claude-3-sonnet-20240229'
|
|
474
|
+
elif 'Claude-3-Haiku' in usage_type:
|
|
475
|
+
return 'claude-3-haiku-20240307'
|
|
476
|
+
|
|
477
|
+
return None
|
|
478
|
+
|
|
479
|
+
def _use_fallback_pricing(self):
|
|
480
|
+
"""
|
|
481
|
+
Use fallback pricing data when API is unavailable.
|
|
482
|
+
Prices as of January 2025 in USD per 1000 tokens.
|
|
483
|
+
"""
|
|
484
|
+
# Fallback pricing for common models (prices per 1000 tokens)
|
|
485
|
+
fallback = {
|
|
486
|
+
# Claude 3.5 Sonnet v2
|
|
487
|
+
('anthropic.claude-3-5-sonnet-20241022-v2:0', 'us-east-1'): {'input': 0.003, 'output': 0.015},
|
|
488
|
+
('anthropic.claude-3-5-sonnet-20241022-v2:0', 'us-west-2'): {'input': 0.003, 'output': 0.015},
|
|
489
|
+
('anthropic.claude-3-5-sonnet-20241022-v2:0', 'ap-southeast-2'): {'input': 0.003, 'output': 0.015},
|
|
490
|
+
|
|
491
|
+
# Claude 3.5 Sonnet v1
|
|
492
|
+
('anthropic.claude-3-5-sonnet-20240620-v1:0', 'us-east-1'): {'input': 0.003, 'output': 0.015},
|
|
493
|
+
('anthropic.claude-3-5-sonnet-20240620-v1:0', 'us-west-2'): {'input': 0.003, 'output': 0.015},
|
|
494
|
+
('anthropic.claude-3-5-sonnet-20240620-v1:0', 'ap-southeast-2'): {'input': 0.003, 'output': 0.015},
|
|
495
|
+
|
|
496
|
+
# Claude 3 Opus
|
|
497
|
+
('anthropic.claude-3-opus-20240229-v1:0', 'us-east-1'): {'input': 0.015, 'output': 0.075},
|
|
498
|
+
('anthropic.claude-3-opus-20240229-v1:0', 'us-west-2'): {'input': 0.015, 'output': 0.075},
|
|
499
|
+
|
|
500
|
+
# Claude 3 Sonnet
|
|
501
|
+
('anthropic.claude-3-sonnet-20240229-v1:0', 'us-east-1'): {'input': 0.003, 'output': 0.015},
|
|
502
|
+
('anthropic.claude-3-sonnet-20240229-v1:0', 'us-west-2'): {'input': 0.003, 'output': 0.015},
|
|
503
|
+
|
|
504
|
+
# Claude 3 Haiku
|
|
505
|
+
('anthropic.claude-3-haiku-20240307-v1:0', 'us-east-1'): {'input': 0.00025, 'output': 0.00125},
|
|
506
|
+
('anthropic.claude-3-haiku-20240307-v1:0', 'us-west-2'): {'input': 0.00025, 'output': 0.00125},
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
self.pricing_data = fallback
|
|
510
|
+
self.last_updated = datetime.now()
|
|
511
|
+
logging.info(f"Using fallback pricing data for {len(fallback)} model/region combinations")
|
|
512
|
+
|
|
513
|
+
def get_model_pricing(self, model_id: str, region: str) -> Optional[Dict[str, float]]:
|
|
514
|
+
"""
|
|
515
|
+
Get pricing for a specific model and region.
|
|
516
|
+
|
|
517
|
+
Args:
|
|
518
|
+
model_id: Bedrock model ID
|
|
519
|
+
region: AWS region code
|
|
520
|
+
|
|
521
|
+
Returns:
|
|
522
|
+
Dictionary with 'input' and 'output' prices per 1000 tokens, or None
|
|
523
|
+
"""
|
|
524
|
+
# Try exact match first
|
|
525
|
+
pricing = self.pricing_data.get((model_id, region))
|
|
526
|
+
|
|
527
|
+
# If not found, try to find similar model (e.g., without version suffix)
|
|
528
|
+
if not pricing:
|
|
529
|
+
for (cached_model, cached_region), cached_pricing in self.pricing_data.items():
|
|
530
|
+
if cached_region == region and model_id.startswith(cached_model.split('-v')[0]):
|
|
531
|
+
pricing = cached_pricing
|
|
532
|
+
break
|
|
533
|
+
|
|
534
|
+
return pricing
|
|
535
|
+
|
|
536
|
+
def calculate_cost(self, model_id: str, region: str, input_tokens: int, output_tokens: int) -> Tuple[float, str]:
|
|
537
|
+
"""
|
|
538
|
+
Calculate cost for a model invocation.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
model_id: Bedrock model ID
|
|
542
|
+
region: AWS region code
|
|
543
|
+
input_tokens: Number of input tokens
|
|
544
|
+
output_tokens: Number of output tokens
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Tuple of (cost in USD, pricing source description)
|
|
548
|
+
"""
|
|
549
|
+
pricing = self.get_model_pricing(model_id, region)
|
|
550
|
+
|
|
551
|
+
if not pricing:
|
|
552
|
+
logging.warning(f"No pricing data for {model_id} in {region}, using default estimate")
|
|
553
|
+
# Use a conservative default (similar to Claude 3.5 Sonnet)
|
|
554
|
+
pricing = {'input': 0.003, 'output': 0.015}
|
|
555
|
+
source = "estimated (no pricing data)"
|
|
556
|
+
else:
|
|
557
|
+
source = "from AWS pricing data" if self.last_updated else "estimated"
|
|
558
|
+
|
|
559
|
+
# Calculate cost (pricing is per 1000 tokens)
|
|
560
|
+
input_cost = (input_tokens / 1000.0) * pricing['input']
|
|
561
|
+
output_cost = (output_tokens / 1000.0) * pricing['output']
|
|
562
|
+
total_cost = input_cost + output_cost
|
|
563
|
+
|
|
564
|
+
return total_cost, source
|
|
565
|
+
|
|
566
|
+
def estimate_max_cost(self, model_id: str, region: str, input_tokens: int, max_output_tokens: int) -> float:
|
|
567
|
+
"""
|
|
568
|
+
Estimate maximum possible cost for a request.
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
model_id: Bedrock model ID
|
|
572
|
+
region: AWS region code
|
|
573
|
+
input_tokens: Number of input tokens
|
|
574
|
+
max_output_tokens: Maximum output tokens configured
|
|
575
|
+
|
|
576
|
+
Returns:
|
|
577
|
+
Maximum cost in USD
|
|
578
|
+
"""
|
|
579
|
+
cost, _ = self.calculate_cost(model_id, region, input_tokens, max_output_tokens)
|
|
580
|
+
return cost
|