llm-cost-guard 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. llm_cost_guard/__init__.py +39 -0
  2. llm_cost_guard/backends/__init__.py +52 -0
  3. llm_cost_guard/backends/base.py +121 -0
  4. llm_cost_guard/backends/memory.py +265 -0
  5. llm_cost_guard/backends/sqlite.py +425 -0
  6. llm_cost_guard/budget.py +306 -0
  7. llm_cost_guard/cli.py +464 -0
  8. llm_cost_guard/clients/__init__.py +11 -0
  9. llm_cost_guard/clients/anthropic.py +231 -0
  10. llm_cost_guard/clients/openai.py +262 -0
  11. llm_cost_guard/exceptions.py +71 -0
  12. llm_cost_guard/integrations/__init__.py +12 -0
  13. llm_cost_guard/integrations/cache.py +189 -0
  14. llm_cost_guard/integrations/langchain.py +257 -0
  15. llm_cost_guard/models.py +123 -0
  16. llm_cost_guard/pricing/__init__.py +7 -0
  17. llm_cost_guard/pricing/anthropic.yaml +88 -0
  18. llm_cost_guard/pricing/bedrock.yaml +215 -0
  19. llm_cost_guard/pricing/loader.py +221 -0
  20. llm_cost_guard/pricing/openai.yaml +148 -0
  21. llm_cost_guard/pricing/vertex.yaml +133 -0
  22. llm_cost_guard/providers/__init__.py +69 -0
  23. llm_cost_guard/providers/anthropic.py +115 -0
  24. llm_cost_guard/providers/base.py +72 -0
  25. llm_cost_guard/providers/bedrock.py +135 -0
  26. llm_cost_guard/providers/openai.py +110 -0
  27. llm_cost_guard/rate_limit.py +233 -0
  28. llm_cost_guard/span.py +143 -0
  29. llm_cost_guard/tokenizers/__init__.py +7 -0
  30. llm_cost_guard/tokenizers/base.py +207 -0
  31. llm_cost_guard/tracker.py +718 -0
  32. llm_cost_guard-0.1.0.dist-info/METADATA +357 -0
  33. llm_cost_guard-0.1.0.dist-info/RECORD +36 -0
  34. llm_cost_guard-0.1.0.dist-info/WHEEL +4 -0
  35. llm_cost_guard-0.1.0.dist-info/entry_points.txt +2 -0
  36. llm_cost_guard-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,257 @@
1
+ """
2
+ LangChain integration for LLM Cost Guard.
3
+ """
4
+
5
+ import functools
6
+ import logging
7
+ from typing import Any, Callable, Dict, List, Optional, TypeVar, Union
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ F = TypeVar("F", bound=Callable[..., Any])
12
+
13
+
14
+ try:
15
+ from langchain_core.callbacks.base import BaseCallbackHandler
16
+ from langchain_core.outputs import LLMResult
17
+
18
+ LANGCHAIN_AVAILABLE = True
19
+ except ImportError:
20
+ LANGCHAIN_AVAILABLE = False
21
+ BaseCallbackHandler = object # type: ignore
22
+ LLMResult = None # type: ignore
23
+
24
+
25
+ class CostTrackingCallback(BaseCallbackHandler):
26
+ """
27
+ LangChain callback handler for cost tracking.
28
+
29
+ Usage:
30
+ from llm_cost_guard import CostTracker
31
+ from llm_cost_guard.integrations.langchain import CostTrackingCallback
32
+
33
+ tracker = CostTracker()
34
+ callback = CostTrackingCallback(tracker)
35
+
36
+ llm = ChatOpenAI(model="gpt-4o", callbacks=[callback])
37
+ result = llm.invoke("Hello!")
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ tracker: Any, # CostTracker
43
+ tags: Optional[Dict[str, str]] = None,
44
+ ):
45
+ """
46
+ Initialize the callback handler.
47
+
48
+ Args:
49
+ tracker: CostTracker instance
50
+ tags: Default tags to apply to all tracked calls
51
+ """
52
+ if not LANGCHAIN_AVAILABLE:
53
+ raise ImportError(
54
+ "LangChain is required for this integration. "
55
+ "Install with: pip install llm-cost-guard[langchain]"
56
+ )
57
+
58
+ super().__init__()
59
+ self._tracker = tracker
60
+ self._default_tags = tags or {}
61
+
62
+ # Track in-flight calls
63
+ self._run_info: Dict[str, Dict[str, Any]] = {}
64
+
65
+ def on_llm_start(
66
+ self,
67
+ serialized: Dict[str, Any],
68
+ prompts: List[str],
69
+ *,
70
+ run_id: Any,
71
+ parent_run_id: Optional[Any] = None,
72
+ tags: Optional[List[str]] = None,
73
+ metadata: Optional[Dict[str, Any]] = None,
74
+ **kwargs: Any,
75
+ ) -> None:
76
+ """Record the start of an LLM call."""
77
+ import time
78
+
79
+ self._run_info[str(run_id)] = {
80
+ "start_time": time.time(),
81
+ "model": serialized.get("kwargs", {}).get("model_name", "unknown"),
82
+ "prompts": prompts,
83
+ "tags": tags or [],
84
+ "metadata": metadata or {},
85
+ }
86
+
87
+ def on_chat_model_start(
88
+ self,
89
+ serialized: Dict[str, Any],
90
+ messages: List[List[Any]],
91
+ *,
92
+ run_id: Any,
93
+ parent_run_id: Optional[Any] = None,
94
+ tags: Optional[List[str]] = None,
95
+ metadata: Optional[Dict[str, Any]] = None,
96
+ **kwargs: Any,
97
+ ) -> None:
98
+ """Record the start of a chat model call."""
99
+ import time
100
+
101
+ model = serialized.get("kwargs", {}).get("model_name")
102
+ if not model:
103
+ model = serialized.get("kwargs", {}).get("model", "unknown")
104
+
105
+ self._run_info[str(run_id)] = {
106
+ "start_time": time.time(),
107
+ "model": model,
108
+ "messages": messages,
109
+ "tags": tags or [],
110
+ "metadata": metadata or {},
111
+ }
112
+
113
+ def on_llm_end(
114
+ self,
115
+ response: "LLMResult",
116
+ *,
117
+ run_id: Any,
118
+ parent_run_id: Optional[Any] = None,
119
+ **kwargs: Any,
120
+ ) -> None:
121
+ """Record the end of an LLM call."""
122
+ import time
123
+
124
+ run_id_str = str(run_id)
125
+ if run_id_str not in self._run_info:
126
+ return
127
+
128
+ run_info = self._run_info.pop(run_id_str)
129
+ latency_ms = int((time.time() - run_info["start_time"]) * 1000)
130
+
131
+ # Extract usage from response
132
+ input_tokens = 0
133
+ output_tokens = 0
134
+
135
+ if response.llm_output:
136
+ token_usage = response.llm_output.get("token_usage", {})
137
+ input_tokens = token_usage.get("prompt_tokens", 0)
138
+ output_tokens = token_usage.get("completion_tokens", 0)
139
+
140
+ # Also check for model-specific usage
141
+ if "usage" in response.llm_output:
142
+ usage = response.llm_output["usage"]
143
+ input_tokens = usage.get("prompt_tokens", usage.get("input_tokens", input_tokens))
144
+ output_tokens = usage.get(
145
+ "completion_tokens", usage.get("output_tokens", output_tokens)
146
+ )
147
+
148
+ # Detect provider from model
149
+ from llm_cost_guard.providers import detect_provider
150
+
151
+ model = run_info["model"]
152
+ provider = detect_provider(model)
153
+
154
+ # Build tags
155
+ tags = dict(self._default_tags)
156
+ for tag in run_info.get("tags", []):
157
+ if ":" in tag:
158
+ key, value = tag.split(":", 1)
159
+ tags[key] = value
160
+ else:
161
+ tags[tag] = "true"
162
+
163
+ # Record the call
164
+ try:
165
+ self._tracker.record(
166
+ provider=provider,
167
+ model=model,
168
+ input_tokens=input_tokens,
169
+ output_tokens=output_tokens,
170
+ tags=tags,
171
+ success=True,
172
+ latency_ms=latency_ms,
173
+ metadata=run_info.get("metadata", {}),
174
+ )
175
+ except Exception as e:
176
+ logger.warning(f"Failed to record LangChain call: {e}")
177
+
178
+ def on_llm_error(
179
+ self,
180
+ error: BaseException,
181
+ *,
182
+ run_id: Any,
183
+ parent_run_id: Optional[Any] = None,
184
+ **kwargs: Any,
185
+ ) -> None:
186
+ """Record an LLM call error."""
187
+ import time
188
+
189
+ run_id_str = str(run_id)
190
+ if run_id_str not in self._run_info:
191
+ return
192
+
193
+ run_info = self._run_info.pop(run_id_str)
194
+ latency_ms = int((time.time() - run_info["start_time"]) * 1000)
195
+
196
+ # Detect provider from model
197
+ from llm_cost_guard.providers import detect_provider
198
+
199
+ model = run_info["model"]
200
+ provider = detect_provider(model)
201
+
202
+ # Build tags
203
+ tags = dict(self._default_tags)
204
+ for tag in run_info.get("tags", []):
205
+ if ":" in tag:
206
+ key, value = tag.split(":", 1)
207
+ tags[key] = value
208
+ else:
209
+ tags[tag] = "true"
210
+
211
+ # Record the failed call
212
+ try:
213
+ self._tracker.record(
214
+ provider=provider,
215
+ model=model,
216
+ input_tokens=0, # We don't know tokens for failed calls
217
+ output_tokens=0,
218
+ tags=tags,
219
+ success=False,
220
+ error_type=type(error).__name__,
221
+ latency_ms=latency_ms,
222
+ metadata=run_info.get("metadata", {}),
223
+ )
224
+ except Exception as e:
225
+ logger.warning(f"Failed to record LangChain error: {e}")
226
+
227
+
228
+ def track_chain(
229
+ tracker: Any, # CostTracker
230
+ tags: Optional[Dict[str, str]] = None,
231
+ ) -> Callable[[F], F]:
232
+ """
233
+ Decorator to track costs for an entire LangChain chain.
234
+
235
+ Usage:
236
+ @track_chain(tracker, tags={"chain": "rag_pipeline"})
237
+ def my_rag_chain(query):
238
+ # Chain implementation
239
+ return result
240
+
241
+ Args:
242
+ tracker: CostTracker instance
243
+ tags: Tags to apply to the tracked span
244
+
245
+ Returns:
246
+ Decorated function
247
+ """
248
+
249
+ def decorator(func: F) -> F:
250
+ @functools.wraps(func)
251
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
252
+ with tracker.span(func.__name__, tags=tags):
253
+ return func(*args, **kwargs)
254
+
255
+ return wrapper # type: ignore
256
+
257
+ return decorator
@@ -0,0 +1,123 @@
1
+ """
2
+ Data models for LLM Cost Guard.
3
+ """
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from typing import Any, Dict, List, Literal, Optional
9
+
10
+
11
+ class ModelType(str, Enum):
12
+ """Types of LLM models."""
13
+
14
+ CHAT = "chat"
15
+ EMBEDDING = "embedding"
16
+ IMAGE = "image"
17
+ AUDIO = "audio"
18
+ COMPLETION = "completion"
19
+
20
+
21
+ @dataclass
22
+ class CostRecord:
23
+ """Single LLM call record."""
24
+
25
+ timestamp: datetime
26
+ provider: str
27
+ model: str
28
+ model_type: ModelType = ModelType.CHAT
29
+ input_tokens: int = 0
30
+ output_tokens: int = 0
31
+ input_cost: float = 0.0
32
+ output_cost: float = 0.0
33
+ total_cost: float = 0.0
34
+ latency_ms: int = 0
35
+ tags: Dict[str, str] = field(default_factory=dict)
36
+ metadata: Dict[str, Any] = field(default_factory=dict)
37
+ success: bool = True
38
+ error_type: Optional[str] = None
39
+ cached: bool = False
40
+ cache_savings: float = 0.0
41
+ span_id: Optional[str] = None
42
+
43
+ def __post_init__(self) -> None:
44
+ """Calculate total cost if not provided."""
45
+ if self.total_cost == 0.0 and (self.input_cost > 0 or self.output_cost > 0):
46
+ self.total_cost = self.input_cost + self.output_cost
47
+
48
+
49
+ @dataclass
50
+ class CostReport:
51
+ """Aggregated cost report."""
52
+
53
+ start_date: Optional[datetime] = None
54
+ end_date: Optional[datetime] = None
55
+ total_cost: float = 0.0
56
+ total_input_tokens: int = 0
57
+ total_output_tokens: int = 0
58
+ total_calls: int = 0
59
+ successful_calls: int = 0
60
+ failed_calls: int = 0
61
+ cache_hits: int = 0
62
+ cache_savings: float = 0.0
63
+ effective_cost: float = 0.0 # total_cost - cache_savings
64
+ records: List[CostRecord] = field(default_factory=list)
65
+ grouped_data: Dict[str, Any] = field(default_factory=dict)
66
+
67
+ def __post_init__(self) -> None:
68
+ """Calculate effective cost."""
69
+ if self.effective_cost == 0.0:
70
+ self.effective_cost = self.total_cost - self.cache_savings
71
+
72
+
73
+ @dataclass
74
+ class HealthStatus:
75
+ """Health check status for the tracker."""
76
+
77
+ healthy: bool = True
78
+ backend_connected: bool = True
79
+ pricing_fresh: bool = True
80
+ last_record_time: Optional[datetime] = None
81
+ pending_records: int = 0
82
+ errors: List[str] = field(default_factory=list)
83
+ pricing_version: Optional[str] = None
84
+ pricing_last_updated: Optional[datetime] = None
85
+
86
+
87
+ @dataclass
88
+ class ModelPricing:
89
+ """Pricing information for a model."""
90
+
91
+ input_cost_per_1k: float
92
+ output_cost_per_1k: float
93
+ cached_input_cost_per_1k: Optional[float] = None
94
+ context_window: int = 128000
95
+ model_type: ModelType = ModelType.CHAT
96
+
97
+ # For image models
98
+ image_cost_per_image: Optional[float] = None
99
+
100
+ # For audio models
101
+ audio_cost_per_minute: Optional[float] = None
102
+
103
+ # For embedding models
104
+ embedding_dimensions: Optional[int] = None
105
+
106
+
107
+ @dataclass
108
+ class UsageData:
109
+ """Token usage data from an LLM call."""
110
+
111
+ input_tokens: int = 0
112
+ output_tokens: int = 0
113
+ cached_tokens: int = 0
114
+ total_tokens: int = 0
115
+
116
+ # For non-text models
117
+ image_count: int = 0
118
+ audio_duration_seconds: float = 0.0
119
+
120
+ def __post_init__(self) -> None:
121
+ """Calculate total tokens if not provided."""
122
+ if self.total_tokens == 0:
123
+ self.total_tokens = self.input_tokens + self.output_tokens
@@ -0,0 +1,7 @@
1
+ """
2
+ Pricing module for LLM Cost Guard.
3
+ """
4
+
5
+ from llm_cost_guard.pricing.loader import PricingLoader, get_pricing
6
+
7
+ __all__ = ["PricingLoader", "get_pricing"]
@@ -0,0 +1,88 @@
1
+ version: "2026-01-15"
2
+ models:
3
+ # Claude 3.5 Sonnet
4
+ claude-3-5-sonnet-20241022:
5
+ input_cost_per_1k: 0.003
6
+ output_cost_per_1k: 0.015
7
+ cached_input_cost_per_1k: 0.0003
8
+ context_window: 200000
9
+ model_type: chat
10
+
11
+ claude-3-5-sonnet-latest:
12
+ input_cost_per_1k: 0.003
13
+ output_cost_per_1k: 0.015
14
+ cached_input_cost_per_1k: 0.0003
15
+ context_window: 200000
16
+ model_type: chat
17
+
18
+ claude-3-5-sonnet-20240620:
19
+ input_cost_per_1k: 0.003
20
+ output_cost_per_1k: 0.015
21
+ cached_input_cost_per_1k: 0.0003
22
+ context_window: 200000
23
+ model_type: chat
24
+
25
+ # Claude 3.5 Haiku
26
+ claude-3-5-haiku-20241022:
27
+ input_cost_per_1k: 0.0008
28
+ output_cost_per_1k: 0.004
29
+ cached_input_cost_per_1k: 0.00008
30
+ context_window: 200000
31
+ model_type: chat
32
+
33
+ claude-3-5-haiku-latest:
34
+ input_cost_per_1k: 0.0008
35
+ output_cost_per_1k: 0.004
36
+ cached_input_cost_per_1k: 0.00008
37
+ context_window: 200000
38
+ model_type: chat
39
+
40
+ # Claude 3 Opus
41
+ claude-3-opus-20240229:
42
+ input_cost_per_1k: 0.015
43
+ output_cost_per_1k: 0.075
44
+ cached_input_cost_per_1k: 0.0015
45
+ context_window: 200000
46
+ model_type: chat
47
+
48
+ claude-3-opus-latest:
49
+ input_cost_per_1k: 0.015
50
+ output_cost_per_1k: 0.075
51
+ cached_input_cost_per_1k: 0.0015
52
+ context_window: 200000
53
+ model_type: chat
54
+
55
+ # Claude 3 Sonnet
56
+ claude-3-sonnet-20240229:
57
+ input_cost_per_1k: 0.003
58
+ output_cost_per_1k: 0.015
59
+ context_window: 200000
60
+ model_type: chat
61
+
62
+ # Claude 3 Haiku
63
+ claude-3-haiku-20240307:
64
+ input_cost_per_1k: 0.00025
65
+ output_cost_per_1k: 0.00125
66
+ cached_input_cost_per_1k: 0.00003
67
+ context_window: 200000
68
+ model_type: chat
69
+
70
+ # Claude 2 (legacy)
71
+ claude-2.1:
72
+ input_cost_per_1k: 0.008
73
+ output_cost_per_1k: 0.024
74
+ context_window: 200000
75
+ model_type: chat
76
+
77
+ claude-2.0:
78
+ input_cost_per_1k: 0.008
79
+ output_cost_per_1k: 0.024
80
+ context_window: 100000
81
+ model_type: chat
82
+
83
+ # Claude Instant (legacy)
84
+ claude-instant-1.2:
85
+ input_cost_per_1k: 0.0008
86
+ output_cost_per_1k: 0.0024
87
+ context_window: 100000
88
+ model_type: chat
@@ -0,0 +1,215 @@
1
+ version: "2026-01-15"
2
+ # AWS Bedrock pricing (us-east-1 region)
3
+ models:
4
+ # Anthropic Claude on Bedrock
5
+ anthropic.claude-3-5-sonnet-20241022-v2:0:
6
+ input_cost_per_1k: 0.003
7
+ output_cost_per_1k: 0.015
8
+ context_window: 200000
9
+ model_type: chat
10
+
11
+ anthropic.claude-3-5-sonnet-20240620-v1:0:
12
+ input_cost_per_1k: 0.003
13
+ output_cost_per_1k: 0.015
14
+ context_window: 200000
15
+ model_type: chat
16
+
17
+ anthropic.claude-3-5-haiku-20241022-v1:0:
18
+ input_cost_per_1k: 0.0008
19
+ output_cost_per_1k: 0.004
20
+ context_window: 200000
21
+ model_type: chat
22
+
23
+ anthropic.claude-3-opus-20240229-v1:0:
24
+ input_cost_per_1k: 0.015
25
+ output_cost_per_1k: 0.075
26
+ context_window: 200000
27
+ model_type: chat
28
+
29
+ anthropic.claude-3-sonnet-20240229-v1:0:
30
+ input_cost_per_1k: 0.003
31
+ output_cost_per_1k: 0.015
32
+ context_window: 200000
33
+ model_type: chat
34
+
35
+ anthropic.claude-3-haiku-20240307-v1:0:
36
+ input_cost_per_1k: 0.00025
37
+ output_cost_per_1k: 0.00125
38
+ context_window: 200000
39
+ model_type: chat
40
+
41
+ anthropic.claude-v2:1:
42
+ input_cost_per_1k: 0.008
43
+ output_cost_per_1k: 0.024
44
+ context_window: 200000
45
+ model_type: chat
46
+
47
+ anthropic.claude-v2:
48
+ input_cost_per_1k: 0.008
49
+ output_cost_per_1k: 0.024
50
+ context_window: 100000
51
+ model_type: chat
52
+
53
+ anthropic.claude-instant-v1:
54
+ input_cost_per_1k: 0.0008
55
+ output_cost_per_1k: 0.0024
56
+ context_window: 100000
57
+ model_type: chat
58
+
59
+ # Amazon Titan
60
+ amazon.titan-text-premier-v1:0:
61
+ input_cost_per_1k: 0.0005
62
+ output_cost_per_1k: 0.0015
63
+ context_window: 32000
64
+ model_type: chat
65
+
66
+ amazon.titan-text-express-v1:
67
+ input_cost_per_1k: 0.0002
68
+ output_cost_per_1k: 0.0006
69
+ context_window: 8000
70
+ model_type: chat
71
+
72
+ amazon.titan-text-lite-v1:
73
+ input_cost_per_1k: 0.00015
74
+ output_cost_per_1k: 0.0002
75
+ context_window: 4000
76
+ model_type: chat
77
+
78
+ amazon.titan-embed-text-v1:
79
+ input_cost_per_1k: 0.0001
80
+ output_cost_per_1k: 0.0
81
+ context_window: 8000
82
+ model_type: embedding
83
+ embedding_dimensions: 1536
84
+
85
+ amazon.titan-embed-text-v2:0:
86
+ input_cost_per_1k: 0.00002
87
+ output_cost_per_1k: 0.0
88
+ context_window: 8000
89
+ model_type: embedding
90
+ embedding_dimensions: 1024
91
+
92
+ # Meta Llama
93
+ meta.llama3-2-90b-instruct-v1:0:
94
+ input_cost_per_1k: 0.002
95
+ output_cost_per_1k: 0.002
96
+ context_window: 128000
97
+ model_type: chat
98
+
99
+ meta.llama3-2-11b-instruct-v1:0:
100
+ input_cost_per_1k: 0.00016
101
+ output_cost_per_1k: 0.00016
102
+ context_window: 128000
103
+ model_type: chat
104
+
105
+ meta.llama3-2-3b-instruct-v1:0:
106
+ input_cost_per_1k: 0.00015
107
+ output_cost_per_1k: 0.00015
108
+ context_window: 128000
109
+ model_type: chat
110
+
111
+ meta.llama3-2-1b-instruct-v1:0:
112
+ input_cost_per_1k: 0.0001
113
+ output_cost_per_1k: 0.0001
114
+ context_window: 128000
115
+ model_type: chat
116
+
117
+ meta.llama3-1-405b-instruct-v1:0:
118
+ input_cost_per_1k: 0.00195
119
+ output_cost_per_1k: 0.00256
120
+ context_window: 128000
121
+ model_type: chat
122
+
123
+ meta.llama3-1-70b-instruct-v1:0:
124
+ input_cost_per_1k: 0.00072
125
+ output_cost_per_1k: 0.00072
126
+ context_window: 128000
127
+ model_type: chat
128
+
129
+ meta.llama3-1-8b-instruct-v1:0:
130
+ input_cost_per_1k: 0.00022
131
+ output_cost_per_1k: 0.00022
132
+ context_window: 128000
133
+ model_type: chat
134
+
135
+ meta.llama3-70b-instruct-v1:0:
136
+ input_cost_per_1k: 0.00265
137
+ output_cost_per_1k: 0.0035
138
+ context_window: 8000
139
+ model_type: chat
140
+
141
+ meta.llama3-8b-instruct-v1:0:
142
+ input_cost_per_1k: 0.0003
143
+ output_cost_per_1k: 0.0006
144
+ context_window: 8000
145
+ model_type: chat
146
+
147
+ # Mistral
148
+ mistral.mistral-large-2407-v1:0:
149
+ input_cost_per_1k: 0.002
150
+ output_cost_per_1k: 0.006
151
+ context_window: 128000
152
+ model_type: chat
153
+
154
+ mistral.mistral-large-2402-v1:0:
155
+ input_cost_per_1k: 0.004
156
+ output_cost_per_1k: 0.012
157
+ context_window: 32000
158
+ model_type: chat
159
+
160
+ mistral.mistral-small-2402-v1:0:
161
+ input_cost_per_1k: 0.001
162
+ output_cost_per_1k: 0.003
163
+ context_window: 32000
164
+ model_type: chat
165
+
166
+ mistral.mixtral-8x7b-instruct-v0:1:
167
+ input_cost_per_1k: 0.00045
168
+ output_cost_per_1k: 0.0007
169
+ context_window: 32000
170
+ model_type: chat
171
+
172
+ mistral.mistral-7b-instruct-v0:2:
173
+ input_cost_per_1k: 0.00015
174
+ output_cost_per_1k: 0.0002
175
+ context_window: 32000
176
+ model_type: chat
177
+
178
+ # Cohere
179
+ cohere.command-r-plus-v1:0:
180
+ input_cost_per_1k: 0.003
181
+ output_cost_per_1k: 0.015
182
+ context_window: 128000
183
+ model_type: chat
184
+
185
+ cohere.command-r-v1:0:
186
+ input_cost_per_1k: 0.0005
187
+ output_cost_per_1k: 0.0015
188
+ context_window: 128000
189
+ model_type: chat
190
+
191
+ cohere.command-text-v14:
192
+ input_cost_per_1k: 0.0015
193
+ output_cost_per_1k: 0.002
194
+ context_window: 4096
195
+ model_type: chat
196
+
197
+ cohere.command-light-text-v14:
198
+ input_cost_per_1k: 0.0003
199
+ output_cost_per_1k: 0.0006
200
+ context_window: 4096
201
+ model_type: chat
202
+
203
+ cohere.embed-english-v3:
204
+ input_cost_per_1k: 0.0001
205
+ output_cost_per_1k: 0.0
206
+ context_window: 512
207
+ model_type: embedding
208
+ embedding_dimensions: 1024
209
+
210
+ cohere.embed-multilingual-v3:
211
+ input_cost_per_1k: 0.0001
212
+ output_cost_per_1k: 0.0
213
+ context_window: 512
214
+ model_type: embedding
215
+ embedding_dimensions: 1024