mirascope 1.19.0__py3-none-any.whl → 1.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. mirascope/__init__.py +4 -0
  2. mirascope/beta/openai/realtime/realtime.py +7 -8
  3. mirascope/beta/openai/realtime/tool.py +2 -2
  4. mirascope/core/__init__.py +10 -1
  5. mirascope/core/anthropic/_utils/__init__.py +0 -2
  6. mirascope/core/anthropic/_utils/_convert_message_params.py +1 -7
  7. mirascope/core/anthropic/_utils/_message_param_converter.py +48 -31
  8. mirascope/core/anthropic/call_response.py +7 -9
  9. mirascope/core/anthropic/call_response_chunk.py +10 -0
  10. mirascope/core/anthropic/stream.py +6 -8
  11. mirascope/core/azure/_utils/__init__.py +0 -2
  12. mirascope/core/azure/call_response.py +7 -10
  13. mirascope/core/azure/call_response_chunk.py +6 -1
  14. mirascope/core/azure/stream.py +6 -8
  15. mirascope/core/base/__init__.py +10 -1
  16. mirascope/core/base/_utils/__init__.py +2 -0
  17. mirascope/core/base/_utils/_get_image_dimensions.py +39 -0
  18. mirascope/core/base/call_response.py +36 -6
  19. mirascope/core/base/call_response_chunk.py +15 -1
  20. mirascope/core/base/stream.py +25 -3
  21. mirascope/core/base/types.py +276 -2
  22. mirascope/core/bedrock/_utils/__init__.py +0 -2
  23. mirascope/core/bedrock/call_response.py +7 -10
  24. mirascope/core/bedrock/call_response_chunk.py +6 -0
  25. mirascope/core/bedrock/stream.py +6 -10
  26. mirascope/core/cohere/_utils/__init__.py +0 -2
  27. mirascope/core/cohere/call_response.py +7 -10
  28. mirascope/core/cohere/call_response_chunk.py +6 -0
  29. mirascope/core/cohere/stream.py +5 -8
  30. mirascope/core/costs/__init__.py +5 -0
  31. mirascope/core/{anthropic/_utils/_calculate_cost.py → costs/_anthropic_calculate_cost.py} +45 -14
  32. mirascope/core/{azure/_utils/_calculate_cost.py → costs/_azure_calculate_cost.py} +3 -3
  33. mirascope/core/{bedrock/_utils/_calculate_cost.py → costs/_bedrock_calculate_cost.py} +3 -3
  34. mirascope/core/{cohere/_utils/_calculate_cost.py → costs/_cohere_calculate_cost.py} +12 -8
  35. mirascope/core/{gemini/_utils/_calculate_cost.py → costs/_gemini_calculate_cost.py} +7 -7
  36. mirascope/core/costs/_google_calculate_cost.py +427 -0
  37. mirascope/core/costs/_groq_calculate_cost.py +156 -0
  38. mirascope/core/costs/_litellm_calculate_cost.py +11 -0
  39. mirascope/core/costs/_mistral_calculate_cost.py +64 -0
  40. mirascope/core/costs/_openai_calculate_cost.py +416 -0
  41. mirascope/core/{vertex/_utils/_calculate_cost.py → costs/_vertex_calculate_cost.py} +8 -7
  42. mirascope/core/{xai/_utils/_calculate_cost.py → costs/_xai_calculate_cost.py} +9 -9
  43. mirascope/core/costs/calculate_cost.py +86 -0
  44. mirascope/core/gemini/_utils/__init__.py +0 -2
  45. mirascope/core/gemini/call_response.py +7 -10
  46. mirascope/core/gemini/call_response_chunk.py +6 -1
  47. mirascope/core/gemini/stream.py +5 -8
  48. mirascope/core/google/_utils/__init__.py +0 -2
  49. mirascope/core/google/_utils/_setup_call.py +21 -2
  50. mirascope/core/google/call_response.py +9 -10
  51. mirascope/core/google/call_response_chunk.py +6 -1
  52. mirascope/core/google/stream.py +5 -8
  53. mirascope/core/groq/_utils/__init__.py +0 -2
  54. mirascope/core/groq/call_response.py +22 -10
  55. mirascope/core/groq/call_response_chunk.py +6 -0
  56. mirascope/core/groq/stream.py +5 -8
  57. mirascope/core/litellm/call_response.py +3 -4
  58. mirascope/core/litellm/stream.py +30 -22
  59. mirascope/core/mistral/_utils/__init__.py +0 -2
  60. mirascope/core/mistral/call_response.py +7 -10
  61. mirascope/core/mistral/call_response_chunk.py +6 -0
  62. mirascope/core/mistral/stream.py +5 -8
  63. mirascope/core/openai/_utils/__init__.py +0 -2
  64. mirascope/core/openai/_utils/_convert_message_params.py +4 -4
  65. mirascope/core/openai/call_response.py +30 -10
  66. mirascope/core/openai/call_response_chunk.py +6 -0
  67. mirascope/core/openai/stream.py +5 -8
  68. mirascope/core/vertex/_utils/__init__.py +0 -2
  69. mirascope/core/vertex/call_response.py +5 -10
  70. mirascope/core/vertex/call_response_chunk.py +6 -0
  71. mirascope/core/vertex/stream.py +5 -8
  72. mirascope/core/xai/_utils/__init__.py +1 -2
  73. mirascope/core/xai/call_response.py +0 -11
  74. mirascope/llm/__init__.py +10 -2
  75. mirascope/llm/_protocols.py +8 -28
  76. mirascope/llm/call_response.py +6 -6
  77. mirascope/llm/call_response_chunk.py +12 -3
  78. mirascope/llm/llm_call.py +21 -23
  79. mirascope/llm/llm_override.py +56 -27
  80. mirascope/llm/stream.py +7 -7
  81. mirascope/llm/tool.py +1 -1
  82. mirascope/retries/fallback.py +1 -1
  83. {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/METADATA +1 -1
  84. {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/RECORD +86 -82
  85. mirascope/core/google/_utils/_calculate_cost.py +0 -215
  86. mirascope/core/groq/_utils/_calculate_cost.py +0 -69
  87. mirascope/core/mistral/_utils/_calculate_cost.py +0 -48
  88. mirascope/core/openai/_utils/_calculate_cost.py +0 -246
  89. {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/WHEEL +0 -0
  90. {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/licenses/LICENSE +0 -0
@@ -14,6 +14,7 @@ from typing import (
14
14
  overload,
15
15
  )
16
16
 
17
+ from ..costs import calculate_cost
17
18
  from ._utils import (
18
19
  HandleStream,
19
20
  HandleStreamAsync,
@@ -35,6 +36,7 @@ from .messages import Messages
35
36
  from .metadata import Metadata
36
37
  from .prompt import prompt_template
37
38
  from .tool import BaseTool
39
+ from .types import CostMetadata, Provider
38
40
 
39
41
  _BaseCallResponseT = TypeVar("_BaseCallResponseT", bound=BaseCallResponse)
40
42
  _BaseCallResponseChunkT = TypeVar(
@@ -211,10 +213,30 @@ class BaseStream(
211
213
  self.finish_reasons = chunk.finish_reasons
212
214
 
213
215
  @property
214
- @abstractmethod
216
+ def provider(self) -> Provider:
217
+ return cast(Provider, self._provider)
218
+
219
+ @property
220
+ def cost_metadata(self) -> CostMetadata:
221
+ """Returns metadata needed for cost calculation."""
222
+ return CostMetadata(
223
+ input_tokens=self.input_tokens,
224
+ output_tokens=self.output_tokens,
225
+ cached_tokens=self.cached_tokens,
226
+ )
227
+
228
+ @property
215
229
  def cost(self) -> float | None:
216
- """Returns the cost of the stream."""
217
- ...
230
+ """Calculate the cost of this streaming API call."""
231
+
232
+ if self.input_tokens is None or self.output_tokens is None:
233
+ return None
234
+
235
+ return calculate_cost(
236
+ provider=self.provider,
237
+ model=self.model,
238
+ metadata=self.cost_metadata,
239
+ )
218
240
 
219
241
  @abstractmethod
220
242
  def _construct_message_param(
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Literal, TypeAlias
3
+ from typing import TYPE_CHECKING, Annotated, Literal, TypeAlias
4
4
 
5
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, ConfigDict, Field
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  from PIL import Image
@@ -68,3 +68,277 @@ JsonableType: TypeAlias = (
68
68
  | dict[str, "JsonableType"]
69
69
  | BaseModel
70
70
  )
71
+
72
+
73
+ class VideoMetadata(BaseModel):
74
+ """Metadata for a video for cost calculation"""
75
+
76
+ duration_seconds: Annotated[
77
+ float,
78
+ Field(description="Duration of the video in seconds"),
79
+ ]
80
+
81
+ with_audio: Annotated[
82
+ bool | None,
83
+ Field(
84
+ default=False,
85
+ description="Whether the video includes audio that should be processed",
86
+ ),
87
+ ] = False
88
+
89
+ tokens: Annotated[
90
+ int | None,
91
+ Field(default=None, description="Precalculated token count for this video"),
92
+ ] = None
93
+
94
+
95
+ class AudioMetadata(BaseModel):
96
+ """Metadata for an audio file for cost calculation"""
97
+
98
+ duration_seconds: Annotated[
99
+ float,
100
+ Field(description="Duration of the audio in seconds"),
101
+ ]
102
+
103
+ with_timestamps: Annotated[
104
+ bool | None,
105
+ Field(default=False, description="Whether timestamps should be included"),
106
+ ] = False
107
+
108
+ tokens: Annotated[
109
+ int | None,
110
+ Field(default=None, description="Precalculated token count for this audio"),
111
+ ] = None
112
+
113
+
114
+ class ImageMetadata(BaseModel):
115
+ """Metadata for an image for cost calculation"""
116
+
117
+ width: Annotated[
118
+ int,
119
+ Field(description="Width of the image in pixels"),
120
+ ]
121
+
122
+ height: Annotated[
123
+ int,
124
+ Field(description="Height of the image in pixels"),
125
+ ]
126
+
127
+ tokens: Annotated[
128
+ int | None,
129
+ Field(default=None, description="Precalculated token count for this image"),
130
+ ] = None
131
+
132
+ detail: Annotated[
133
+ str | None,
134
+ Field(default=None, description="Detail level of the image"),
135
+ ] = None
136
+
137
+
138
+ class GoogleMetadata(BaseModel):
139
+ """Google API specific metadata for cost calculation"""
140
+
141
+ use_vertex_ai: Annotated[
142
+ bool | None,
143
+ Field(
144
+ default=False,
145
+ description="Whether to use Vertex AI pricing (vs. direct Gemini API)",
146
+ ),
147
+ ] = False
148
+
149
+ grounding_requests: Annotated[
150
+ int | None,
151
+ Field(default=None, description="Number of Google Search grounding requests"),
152
+ ] = None
153
+
154
+
155
+ class PDFImageMetadata(BaseModel):
156
+ """Metadata for an image extracted from a PDF page"""
157
+
158
+ width: Annotated[
159
+ int,
160
+ Field(description="Width of the image in pixels"),
161
+ ]
162
+
163
+ height: Annotated[
164
+ int,
165
+ Field(description="Height of the image in pixels"),
166
+ ]
167
+
168
+ tokens: Annotated[
169
+ int | None,
170
+ Field(default=None, description="Precalculated token count for this image"),
171
+ ] = None
172
+
173
+
174
+ class PDFMetadata(BaseModel):
175
+ """Metadata specific to PDF documents for cost calculation"""
176
+
177
+ page_count: Annotated[
178
+ int | None,
179
+ Field(default=None, description="Number of pages in the PDF"),
180
+ ] = None
181
+
182
+ text_tokens: Annotated[
183
+ int | None,
184
+ Field(
185
+ default=None, description="Number of tokens from text content in the PDF"
186
+ ),
187
+ ] = None
188
+
189
+ images: Annotated[
190
+ list[PDFImageMetadata] | None,
191
+ Field(
192
+ default=None,
193
+ description="List of images extracted from PDF with width and height information",
194
+ ),
195
+ ] = None
196
+
197
+ cached: Annotated[
198
+ bool | None,
199
+ Field(
200
+ default=None,
201
+ description="Whether this PDF was cached for reduced token costs",
202
+ ),
203
+ ] = None
204
+
205
+
206
+ class CostMetadata(BaseModel):
207
+ """Metadata required for accurate LLM API cost calculation across all providers."""
208
+
209
+ model_config = ConfigDict(arbitrary_types_allowed=True)
210
+
211
+ # Common fields
212
+ input_tokens: Annotated[
213
+ int | float | None,
214
+ Field(default=None, description="Input tokens"),
215
+ ] = None
216
+ output_tokens: Annotated[
217
+ int | float | None,
218
+ Field(default=None, description="Output tokens"),
219
+ ] = None
220
+ cached_tokens: Annotated[
221
+ int | float | None,
222
+ Field(default=None, description="Cached tokens"),
223
+ ] = None
224
+ streaming_mode: Annotated[
225
+ bool | None,
226
+ Field(default=None, description="Whether streaming API was used"),
227
+ ] = None
228
+ cached_response: Annotated[
229
+ bool | None,
230
+ Field(default=None, description="Whether response was served from cache"),
231
+ ] = None
232
+ context_length: Annotated[
233
+ int | None,
234
+ Field(default=None, description="Total context window length in tokens"),
235
+ ] = None
236
+ realtime_mode: Annotated[
237
+ bool | None,
238
+ Field(default=None, description="Whether realtime processing was used"),
239
+ ] = None
240
+ region: Annotated[
241
+ str | None,
242
+ Field(
243
+ default=None,
244
+ description="Cloud region for request (affects pricing in some providers)",
245
+ ),
246
+ ] = None
247
+ tier: Annotated[
248
+ str | None,
249
+ Field(default=None, description="Service tier (e.g. standard, enterprise)"),
250
+ ] = None
251
+ batch_mode: Annotated[
252
+ bool | None,
253
+ Field(
254
+ default=False,
255
+ description="Whether batch mode is used (discount usually applies)",
256
+ ),
257
+ ] = None
258
+
259
+ # Media-related fields
260
+ images: Annotated[
261
+ list[ImageMetadata] | None,
262
+ Field(default=None, description="List of images with their metadata"),
263
+ ] = None
264
+ videos: Annotated[
265
+ list[VideoMetadata] | None,
266
+ Field(default=None, description="List of videos with their metadata"),
267
+ ] = None
268
+ audio: Annotated[
269
+ list[AudioMetadata] | None,
270
+ Field(default=None, description="List of audio clips with their metadata"),
271
+ ] = None
272
+ audio_output: Annotated[
273
+ list[AudioMetadata] | None,
274
+ Field(
275
+ default=None, description="List of audio output clips with their metadata"
276
+ ),
277
+ ] = None
278
+ # PDF-related fields
279
+ pdf: Annotated[
280
+ PDFMetadata | None,
281
+ Field(default=None, description="Metadata for PDF documents"),
282
+ ] = None
283
+
284
+ # Context caching related fields
285
+ context_cache_tokens: Annotated[
286
+ int | None,
287
+ Field(default=None, description="Number of cached context tokens"),
288
+ ] = None
289
+ context_cache_hours: Annotated[
290
+ float | None,
291
+ Field(default=None, description="Number of hours to keep context in cache"),
292
+ ] = None
293
+
294
+ # Provider-specific fields
295
+ google: Annotated[
296
+ GoogleMetadata | None,
297
+ Field(
298
+ default=None,
299
+ description="Google/Gemini-specific metadata for cost calculation",
300
+ ),
301
+ ] = None
302
+ realtime_tokens: Annotated[
303
+ int | None,
304
+ Field(
305
+ default=None,
306
+ description="[OpenAI] Number of realtime tokens in the request",
307
+ ),
308
+ ] = None
309
+
310
+ # Anthropic-specific fields
311
+ cache_write: Annotated[
312
+ bool | None,
313
+ Field(default=None, description="[Anthropic] Whether cache write occurred"),
314
+ ] = None
315
+ tool_use_tokens: Annotated[
316
+ int | None,
317
+ Field(default=None, description="[Anthropic] Tokens used for tool calls"),
318
+ ] = None
319
+
320
+ # If the provider happens to provide the cost, we should just use that.
321
+ cost: Annotated[
322
+ float | None,
323
+ Field(default=None, description="Cost provided by the API response"),
324
+ ] = None
325
+
326
+
327
+ Provider: TypeAlias = Literal[
328
+ "anthropic",
329
+ "azure",
330
+ "bedrock",
331
+ "cohere",
332
+ "gemini",
333
+ "google",
334
+ "groq",
335
+ "litellm",
336
+ "mistral",
337
+ "openai",
338
+ "vertex",
339
+ "xai",
340
+ ]
341
+ LocalProvider: TypeAlias = Literal[
342
+ "ollama",
343
+ "vllm",
344
+ ]
@@ -1,13 +1,11 @@
1
1
  """Bedrock utilities for decorator factories."""
2
2
 
3
- from ._calculate_cost import calculate_cost
4
3
  from ._convert_message_params import convert_message_params
5
4
  from ._get_json_output import get_json_output
6
5
  from ._handle_stream import handle_stream, handle_stream_async
7
6
  from ._setup_call import setup_call
8
7
 
9
8
  __all__ = [
10
- "calculate_cost",
11
9
  "convert_message_params",
12
10
  "get_json_output",
13
11
  "handle_stream",
@@ -19,7 +19,7 @@ from ..base import (
19
19
  BaseCallResponse,
20
20
  transform_tool_outputs,
21
21
  )
22
- from ..base.types import FinishReason
22
+ from ..base.types import CostMetadata, FinishReason
23
23
  from ._call_kwargs import BedrockCallKwargs
24
24
  from ._types import (
25
25
  AssistantMessageTypeDef,
@@ -33,7 +33,6 @@ from ._types import (
33
33
  ToolUseBlockContentTypeDef,
34
34
  UserMessageTypeDef,
35
35
  )
36
- from ._utils import calculate_cost
37
36
  from ._utils._convert_finish_reason_to_common_finish_reasons import (
38
37
  _convert_finish_reasons_to_common_finish_reasons,
39
38
  )
@@ -140,14 +139,6 @@ class BedrockCallResponse(
140
139
  """Returns the number of output tokens."""
141
140
  return self.usage["outputTokens"] if self.usage else None
142
141
 
143
- @computed_field
144
- @property
145
- def cost(self) -> float | None:
146
- """Returns the cost of the call."""
147
- return calculate_cost(
148
- self.input_tokens, self.cached_tokens, self.output_tokens, self.model
149
- )
150
-
151
142
  @computed_field
152
143
  @cached_property
153
144
  def message_param(self) -> SerializeAsAny[AssistantMessageTypeDef]:
@@ -245,3 +236,9 @@ class BedrockCallResponse(
245
236
  if not self.user_message_param:
246
237
  return None
247
238
  return BedrockMessageParamConverter.from_provider([self.user_message_param])[0] # pyright: ignore [reportArgumentType]
239
+
240
+ @computed_field
241
+ @property
242
+ def cost_metadata(self) -> CostMetadata:
243
+ """Get metadata required for cost calculation."""
244
+ return super().cost_metadata
@@ -9,6 +9,7 @@ from pydantic import SkipValidation
9
9
  from types_aiobotocore_bedrock_runtime.literals import StopReasonType as FinishReason
10
10
 
11
11
  from ..base import BaseCallResponseChunk, types
12
+ from ..base.types import CostMetadata
12
13
  from ._types import AsyncStreamOutputChunk, StreamOutputChunk, TokenUsageTypeDef
13
14
  from ._utils._convert_finish_reason_to_common_finish_reasons import (
14
15
  _convert_finish_reasons_to_common_finish_reasons,
@@ -98,6 +99,11 @@ class BedrockCallResponseChunk(
98
99
  return self.usage["outputTokens"]
99
100
  return None
100
101
 
102
+ @property
103
+ def cost_metadata(self) -> CostMetadata:
104
+ """Returns the cost metadata."""
105
+ return super().cost_metadata
106
+
101
107
  @property
102
108
  def common_finish_reasons(self) -> list[types.FinishReason] | None:
103
109
  return _convert_finish_reasons_to_common_finish_reasons(
@@ -20,6 +20,7 @@ from types_aiobotocore_bedrock_runtime.type_defs import (
20
20
  )
21
21
 
22
22
  from ..base.stream import BaseStream
23
+ from ..base.types import CostMetadata
23
24
  from ._types import (
24
25
  AssistantMessageTypeDef,
25
26
  InternalBedrockMessageParam,
@@ -28,9 +29,6 @@ from ._types import (
28
29
  ToolUseBlockMessageTypeDef,
29
30
  UserMessageTypeDef,
30
31
  )
31
- from ._utils import (
32
- calculate_cost,
33
- )
34
32
  from .call_params import BedrockCallParams
35
33
  from .call_response import BedrockCallResponse
36
34
  from .call_response_chunk import BedrockCallResponseChunk
@@ -91,13 +89,6 @@ class BedrockStream(
91
89
  ResponseMetadataTypeDef | AsyncResponseMetadataTypeDef
92
90
  ) = _DEFAULT_RESPONSE_METADATA
93
91
 
94
- @property
95
- def cost(self) -> float | None:
96
- """Returns the cost of the call."""
97
- return calculate_cost(
98
- self.input_tokens, self.cached_tokens, self.output_tokens, self.model
99
- )
100
-
101
92
  def _construct_message_param(
102
93
  self,
103
94
  tool_calls: list[ToolUseBlockContentTypeDef] | None = None,
@@ -156,3 +147,8 @@ class BedrockStream(
156
147
  start_time=self.start_time,
157
148
  end_time=self.end_time,
158
149
  )
150
+
151
+ @property
152
+ def cost_metadata(self) -> CostMetadata:
153
+ """Get metadata required for cost calculation."""
154
+ return super().cost_metadata
@@ -1,13 +1,11 @@
1
1
  """Cohere utilities for decorator factories."""
2
2
 
3
- from ._calculate_cost import calculate_cost
4
3
  from ._convert_message_params import convert_message_params
5
4
  from ._get_json_output import get_json_output
6
5
  from ._handle_stream import handle_stream, handle_stream_async
7
6
  from ._setup_call import setup_call
8
7
 
9
8
  __all__ = [
10
- "calculate_cost",
11
9
  "convert_message_params",
12
10
  "get_json_output",
13
11
  "handle_stream",
@@ -16,8 +16,7 @@ from pydantic import SkipValidation, computed_field
16
16
 
17
17
  from .. import BaseMessageParam
18
18
  from ..base import BaseCallResponse, transform_tool_outputs
19
- from ..base.types import FinishReason
20
- from ._utils import calculate_cost
19
+ from ..base.types import CostMetadata, FinishReason
21
20
  from ._utils._convert_finish_reason_to_common_finish_reasons import (
22
21
  _convert_finish_reasons_to_common_finish_reasons,
23
22
  )
@@ -119,14 +118,6 @@ class CohereCallResponse(
119
118
  return self.usage.output_tokens
120
119
  return None
121
120
 
122
- @computed_field
123
- @property
124
- def cost(self) -> float | None:
125
- """Returns the cost of the response."""
126
- return calculate_cost(
127
- self.input_tokens, self.cached_tokens, self.output_tokens, self.model
128
- )
129
-
130
121
  @computed_field
131
122
  @cached_property
132
123
  def message_param(self) -> ChatMessage:
@@ -202,3 +193,9 @@ class CohereCallResponse(
202
193
  if not self.user_message_param:
203
194
  return None
204
195
  return CohereMessageParamConverter.from_provider([self.user_message_param])[0]
196
+
197
+ @computed_field
198
+ @property
199
+ def cost_metadata(self) -> CostMetadata:
200
+ """Get metadata required for cost calculation."""
201
+ return super().cost_metadata
@@ -13,6 +13,7 @@ from cohere.types import (
13
13
  from pydantic import SkipValidation
14
14
 
15
15
  from ..base import BaseCallResponseChunk, types
16
+ from ..base.types import CostMetadata
16
17
  from ._types import (
17
18
  StreamEndStreamedChatResponse,
18
19
  StreamStartStreamedChatResponse,
@@ -112,6 +113,11 @@ class CohereCallResponseChunk(
112
113
  return self.usage.output_tokens
113
114
  return None
114
115
 
116
+ @property
117
+ def cost_metadata(self) -> CostMetadata:
118
+ """Returns the cost metadata."""
119
+ return super().cost_metadata
120
+
115
121
  @property
116
122
  def common_finish_reasons(self) -> list[types.FinishReason] | None:
117
123
  return _convert_finish_reasons_to_common_finish_reasons(
@@ -14,7 +14,7 @@ from cohere.types import (
14
14
  )
15
15
 
16
16
  from ..base.stream import BaseStream
17
- from ._utils import calculate_cost
17
+ from ..base.types import CostMetadata
18
18
  from .call_params import CohereCallParams
19
19
  from .call_response import CohereCallResponse
20
20
  from .call_response_chunk import CohereCallResponseChunk
@@ -59,13 +59,6 @@ class CohereStream(
59
59
 
60
60
  _provider = "cohere"
61
61
 
62
- @property
63
- def cost(self) -> float | None:
64
- """Returns the cost of the call."""
65
- return calculate_cost(
66
- self.input_tokens, self.cached_tokens, self.output_tokens, self.model
67
- )
68
-
69
62
  def _construct_message_param(
70
63
  self, tool_calls: list[ToolCall] | None = None, content: str | None = None
71
64
  ) -> ChatMessage:
@@ -114,3 +107,7 @@ class CohereStream(
114
107
  start_time=self.start_time,
115
108
  end_time=self.end_time,
116
109
  )
110
+
111
+ @property
112
+ def cost_metadata(self) -> CostMetadata:
113
+ return super().cost_metadata
@@ -0,0 +1,5 @@
1
+ """Cost calculation module for LLM API calls."""
2
+
3
+ from .calculate_cost import calculate_cost
4
+
5
+ __all__ = ["calculate_cost"]
@@ -1,10 +1,10 @@
1
1
  """Calculate the cost of a completion using the Anthropic API."""
2
2
 
3
+ from ..base.types import CostMetadata
4
+
3
5
 
4
6
  def calculate_cost(
5
- input_tokens: int | float | None,
6
- cached_tokens: int | float | None,
7
- output_tokens: int | float | None,
7
+ metadata: CostMetadata,
8
8
  model: str = "claude-3-haiku-20240229",
9
9
  ) -> float | None:
10
10
  """Calculate the cost of a completion using the Anthropic API.
@@ -14,6 +14,8 @@ def calculate_cost(
14
14
  Model Input Cached Output
15
15
  claude-3-5-haiku $0.80 / 1M tokens $0.08 / 1M tokens $4.00 / 1M tokens
16
16
  claude-3-5-haiku-20241022 $0.80 / 1M tokens $0.08 / 1M tokens $4.00 / 1M tokens
17
+ claude-3-7-sonnet $3.00 / 1M tokens $0.30 / 1M tokens $15.00 / 1M tokens
18
+ claude-3-7-sonnet-20250219 $3.00 / 1M tokens $0.30 / 1M tokens $15.00 / 1M tokens
17
19
  claude-3-5-sonnet $3.00 / 1M tokens $0.30 / 1M tokens $15.00 / 1M tokens
18
20
  claude-3-5-sonnet-20241022 $3.00 / 1M tokens $0.30 / 1M tokens $15.00 / 1M tokens
19
21
  claude-3-5-sonnet-20240620 $3.00 / 1M tokens $0.30 / 1M tokens $15.00 / 1M tokens
@@ -40,7 +42,7 @@ def calculate_cost(
40
42
  """
41
43
  pricing = {
42
44
  # Anthropic models
43
- "claude-3-5-haiku": {
45
+ "claude-3-5-haiku-latest": {
44
46
  "prompt": 0.000_000_8,
45
47
  "completion": 0.000_004,
46
48
  "cached": 0.000_000_08,
@@ -50,7 +52,17 @@ def calculate_cost(
50
52
  "completion": 0.000_004,
51
53
  "cached": 0.000_000_08,
52
54
  },
53
- "claude-3-5-sonnet": {
55
+ "claude-3-7-sonnet-latest": {
56
+ "prompt": 0.000_003,
57
+ "completion": 0.000_015,
58
+ "cached": 0.000_000_3,
59
+ },
60
+ "claude-3-7-sonnet-20250219": {
61
+ "prompt": 0.000_003,
62
+ "completion": 0.000_015,
63
+ "cached": 0.000_000_3,
64
+ },
65
+ "claude-3-5-sonnet-latest": {
54
66
  "prompt": 0.000_003,
55
67
  "completion": 0.000_015,
56
68
  "cached": 0.000_000_3,
@@ -65,7 +77,7 @@ def calculate_cost(
65
77
  "completion": 0.000_015,
66
78
  "cached": 0.000_000_3,
67
79
  },
68
- "claude-3-haiku": {
80
+ "claude-3-haiku-latest": {
69
81
  "prompt": 0.000_000_8,
70
82
  "completion": 0.000_004,
71
83
  "cached": 0.000_000_08,
@@ -75,7 +87,7 @@ def calculate_cost(
75
87
  "completion": 0.000_004,
76
88
  "cached": 0.000_000_08,
77
89
  },
78
- "claude-3-sonnet": {
90
+ "claude-3-sonnet-latest": {
79
91
  "prompt": 0.000_003,
80
92
  "completion": 0.000_015,
81
93
  "cached": 0.000_000_3,
@@ -85,7 +97,7 @@ def calculate_cost(
85
97
  "completion": 0.000_015,
86
98
  "cached": 0.000_000_3,
87
99
  },
88
- "claude-3-opus": {
100
+ "claude-3-opus-latest": {
89
101
  "prompt": 0.000_015,
90
102
  "completion": 0.000_075,
91
103
  "cached": 0.000_001_5,
@@ -111,6 +123,11 @@ def calculate_cost(
111
123
  "cached": 0,
112
124
  },
113
125
  # Bedrock models
126
+ "anthropic.claude-3-7-sonnet-20250219-v1:0": {
127
+ "prompt": 0.000_003,
128
+ "completion": 0.000_015,
129
+ "cached": 0.000_000_3,
130
+ },
114
131
  "anthropic.claude-3-5-sonnet-20241022-v2:0": {
115
132
  "prompt": 0.000_003,
116
133
  "completion": 0.000_015,
@@ -142,6 +159,11 @@ def calculate_cost(
142
159
  "cached": 0.000_001_5,
143
160
  },
144
161
  # Vertex AI models
162
+ "claude-3-7-sonnet@20250219": {
163
+ "prompt": 0.000_003,
164
+ "completion": 0.000_015,
165
+ "cached": 0.000_000_3,
166
+ },
145
167
  "claude-3-5-sonnet@20241022": {
146
168
  "prompt": 0.000_003,
147
169
  "completion": 0.000_015,
@@ -169,20 +191,29 @@ def calculate_cost(
169
191
  },
170
192
  }
171
193
 
172
- if input_tokens is None or output_tokens is None:
194
+ if metadata.input_tokens is None or metadata.output_tokens is None:
173
195
  return None
174
196
 
175
- if cached_tokens is None:
176
- cached_tokens = 0
197
+ if metadata.cached_tokens is None:
198
+ metadata.cached_tokens = 0
177
199
 
178
200
  try:
179
201
  model_pricing = pricing[model]
180
202
  except KeyError:
181
203
  return None
182
204
 
183
- prompt_cost = input_tokens * model_pricing["prompt"]
184
- cached_cost = cached_tokens * model_pricing["cached"]
185
- completion_cost = output_tokens * model_pricing["completion"]
205
+ # Calculate cost for text tokens
206
+ prompt_cost = metadata.input_tokens * model_pricing["prompt"]
207
+ cached_cost = metadata.cached_tokens * model_pricing["cached"]
208
+ completion_cost = metadata.output_tokens * model_pricing["completion"]
209
+
210
+ # Image tokens are in response tokens
211
+ # https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
212
+
213
+ # PDF documents tokens are in response tokens
214
+ # https://docs.anthropic.com/en/docs/build-with-claude/pdf-support#estimate-your-costs
215
+
216
+ # Sum all costs
186
217
  total_cost = prompt_cost + cached_cost + completion_cost
187
218
 
188
219
  return total_cost