mirascope 1.19.0__py3-none-any.whl → 1.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. mirascope/__init__.py +4 -0
  2. mirascope/beta/openai/realtime/realtime.py +7 -8
  3. mirascope/beta/openai/realtime/tool.py +2 -2
  4. mirascope/core/__init__.py +8 -1
  5. mirascope/core/anthropic/_utils/__init__.py +0 -2
  6. mirascope/core/anthropic/_utils/_convert_message_params.py +1 -7
  7. mirascope/core/anthropic/_utils/_message_param_converter.py +48 -31
  8. mirascope/core/anthropic/call_response.py +7 -9
  9. mirascope/core/anthropic/call_response_chunk.py +10 -0
  10. mirascope/core/anthropic/stream.py +6 -8
  11. mirascope/core/azure/_utils/__init__.py +0 -2
  12. mirascope/core/azure/call_response.py +7 -10
  13. mirascope/core/azure/call_response_chunk.py +6 -1
  14. mirascope/core/azure/stream.py +6 -8
  15. mirascope/core/base/__init__.py +2 -1
  16. mirascope/core/base/_utils/__init__.py +2 -0
  17. mirascope/core/base/_utils/_get_image_dimensions.py +39 -0
  18. mirascope/core/base/call_response.py +36 -6
  19. mirascope/core/base/call_response_chunk.py +15 -1
  20. mirascope/core/base/stream.py +25 -3
  21. mirascope/core/base/types.py +276 -2
  22. mirascope/core/bedrock/_utils/__init__.py +0 -2
  23. mirascope/core/bedrock/call_response.py +7 -10
  24. mirascope/core/bedrock/call_response_chunk.py +6 -0
  25. mirascope/core/bedrock/stream.py +6 -10
  26. mirascope/core/cohere/_utils/__init__.py +0 -2
  27. mirascope/core/cohere/call_response.py +7 -10
  28. mirascope/core/cohere/call_response_chunk.py +6 -0
  29. mirascope/core/cohere/stream.py +5 -8
  30. mirascope/core/costs/__init__.py +5 -0
  31. mirascope/core/{anthropic/_utils/_calculate_cost.py → costs/_anthropic_calculate_cost.py} +45 -14
  32. mirascope/core/{azure/_utils/_calculate_cost.py → costs/_azure_calculate_cost.py} +3 -3
  33. mirascope/core/{bedrock/_utils/_calculate_cost.py → costs/_bedrock_calculate_cost.py} +3 -3
  34. mirascope/core/{cohere/_utils/_calculate_cost.py → costs/_cohere_calculate_cost.py} +12 -8
  35. mirascope/core/{gemini/_utils/_calculate_cost.py → costs/_gemini_calculate_cost.py} +7 -7
  36. mirascope/core/costs/_google_calculate_cost.py +427 -0
  37. mirascope/core/costs/_groq_calculate_cost.py +156 -0
  38. mirascope/core/costs/_litellm_calculate_cost.py +11 -0
  39. mirascope/core/costs/_mistral_calculate_cost.py +64 -0
  40. mirascope/core/costs/_openai_calculate_cost.py +416 -0
  41. mirascope/core/{vertex/_utils/_calculate_cost.py → costs/_vertex_calculate_cost.py} +8 -7
  42. mirascope/core/{xai/_utils/_calculate_cost.py → costs/_xai_calculate_cost.py} +9 -9
  43. mirascope/core/costs/calculate_cost.py +86 -0
  44. mirascope/core/gemini/_utils/__init__.py +0 -2
  45. mirascope/core/gemini/call_response.py +7 -10
  46. mirascope/core/gemini/call_response_chunk.py +6 -1
  47. mirascope/core/gemini/stream.py +5 -8
  48. mirascope/core/google/_utils/__init__.py +0 -2
  49. mirascope/core/google/_utils/_setup_call.py +21 -2
  50. mirascope/core/google/call_response.py +9 -10
  51. mirascope/core/google/call_response_chunk.py +6 -1
  52. mirascope/core/google/stream.py +5 -8
  53. mirascope/core/groq/_utils/__init__.py +0 -2
  54. mirascope/core/groq/call_response.py +22 -10
  55. mirascope/core/groq/call_response_chunk.py +6 -0
  56. mirascope/core/groq/stream.py +5 -8
  57. mirascope/core/litellm/call_response.py +3 -4
  58. mirascope/core/litellm/stream.py +30 -22
  59. mirascope/core/mistral/_utils/__init__.py +0 -2
  60. mirascope/core/mistral/call_response.py +7 -10
  61. mirascope/core/mistral/call_response_chunk.py +6 -0
  62. mirascope/core/mistral/stream.py +5 -8
  63. mirascope/core/openai/_utils/__init__.py +0 -2
  64. mirascope/core/openai/_utils/_convert_message_params.py +4 -4
  65. mirascope/core/openai/call_response.py +30 -10
  66. mirascope/core/openai/call_response_chunk.py +6 -0
  67. mirascope/core/openai/stream.py +5 -8
  68. mirascope/core/vertex/_utils/__init__.py +0 -2
  69. mirascope/core/vertex/call_response.py +5 -10
  70. mirascope/core/vertex/call_response_chunk.py +6 -0
  71. mirascope/core/vertex/stream.py +5 -8
  72. mirascope/core/xai/_utils/__init__.py +1 -2
  73. mirascope/core/xai/call_response.py +0 -11
  74. mirascope/llm/__init__.py +9 -2
  75. mirascope/llm/_protocols.py +8 -28
  76. mirascope/llm/call_response.py +6 -6
  77. mirascope/llm/call_response_chunk.py +12 -3
  78. mirascope/llm/llm_call.py +21 -23
  79. mirascope/llm/llm_override.py +56 -27
  80. mirascope/llm/stream.py +7 -7
  81. mirascope/llm/tool.py +1 -1
  82. mirascope/retries/fallback.py +1 -1
  83. {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/METADATA +1 -1
  84. {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/RECORD +86 -82
  85. mirascope/core/google/_utils/_calculate_cost.py +0 -215
  86. mirascope/core/groq/_utils/_calculate_cost.py +0 -69
  87. mirascope/core/mistral/_utils/_calculate_cost.py +0 -48
  88. mirascope/core/openai/_utils/_calculate_cost.py +0 -246
  89. {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/WHEEL +0 -0
  90. {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,64 @@
1
+ """Calculate the cost of a completion using the Mistral API."""
2
+
3
+ from ..base.types import CostMetadata
4
+
5
+
6
+ def calculate_cost(
7
+ metadata: CostMetadata,
8
+ model: str = "open-mistral-7b",
9
+ ) -> float | None:
10
+ """Calculate the cost of a completion using the Mistral API.
11
+
12
+ https://mistral.ai/technology/#pricing
13
+
14
+ Model Input Cached Output
15
+ mistral-large-latest $2/1M tokens $6/1M tokens
16
+ pixtral-large-latest $2/1M tokens $6/1M tokens
17
+ mistral-small-latest $0.1/1M tokens $0.3/1M tokens
18
+ mistral-saba-latest $0.2/1M tokens $0.6/1M tokens
19
+ codestral-latest $0.3/1M tokens $0.9/1M tokens
20
+ ministral-8b-latest $0.1/1M tokens $0.1/1M tokens
21
+ ministral-3b-latest $0.04/1M tokens $0.04/1M tokens
22
+ mistral-embed $0.1/1M tokens -
23
+ mistral-moderation-latest $0.1/1M tokens -
24
+ open-mistral-nemo $0.3/1M tokens $0.3/1M tokens
25
+ open-mistral-7b $0.25/1M tokens $0.25/1M tokens
26
+ open-mixtral-8x7b $0.7/1M tokens $0.7/1M tokens
27
+ open-mixtral-8x22b $2/1M tokens $6/1M tokens
28
+ """
29
+ pricing = {
30
+ "mistral-large-latest": {"prompt": 0.000_002, "completion": 0.000_006},
31
+ "pixtral-large-latest": {"prompt": 0.000_002, "completion": 0.000_006},
32
+ "mistral-small-latest": {"prompt": 0.000_000_1, "completion": 0.000_000_3},
33
+ "mistral-saba-latest": {"prompt": 0.000_000_2, "completion": 0.000_000_6},
34
+ "codestral-latest": {"prompt": 0.000_000_3, "completion": 0.000_000_9},
35
+ "ministral-8b-latest": {"prompt": 0.000_000_1, "completion": 0.000_000_1},
36
+ "ministral-3b-latest": {"prompt": 0.000_000_04, "completion": 0.000_000_04},
37
+ "mistral-embed": {"prompt": 0.000_000_1, "completion": 0},
38
+ "mistral-moderation-latest": {"prompt": 0.000_000_1, "completion": 0},
39
+ "open-mistral-nemo": {"prompt": 0.000_000_3, "completion": 0.000_000_3},
40
+ "open-mistral-nemo-2407": {"prompt": 0.000_000_3, "completion": 0.000_000_3},
41
+ "open-mistral-7b": {"prompt": 0.000_000_25, "completion": 0.000_000_25},
42
+ "open-mixtral-8x7b": {"prompt": 0.000_000_7, "completion": 0.000_000_7},
43
+ "open-mixtral-8x22b": {"prompt": 0.000_002, "completion": 0.000_006},
44
+ "mistral-large-2407": {"prompt": 0.000_003, "completion": 0.000_009},
45
+ "mistral-medium-latest": {"prompt": 0.000_002_75, "completion": 0.000_008_1},
46
+ "pixtral-12b-2409": {"prompt": 0.000_002, "completion": 0.000_006},
47
+ }
48
+
49
+ if metadata.input_tokens is None or metadata.output_tokens is None:
50
+ return None
51
+
52
+ try:
53
+ model_pricing = pricing[model]
54
+ except KeyError:
55
+ return None
56
+
57
+ # Calculate cost for text tokens
58
+ prompt_cost = metadata.input_tokens * model_pricing["prompt"]
59
+ completion_cost = metadata.output_tokens * model_pricing["completion"]
60
+ total_cost = prompt_cost + completion_cost
61
+
62
+ # Image tokens is included in the cost
63
+
64
+ return total_cost
@@ -0,0 +1,416 @@
1
+ """Calculate the cost of a completion using the OpenAI API."""
2
+
3
+ from ..base.types import CostMetadata
4
+
5
+ # Constants for image token calculation
6
+ LOW_DETAIL_IMAGE_TOKENS = 85
7
+ HIGH_DETAIL_TILE_TOKENS = 170
8
+ HIGH_DETAIL_BASE_TOKENS = 85
9
+ TILE_SIZE = 512
10
+
11
+
12
+ def _calculate_image_tokens(metadata: CostMetadata) -> int | None:
13
+ """Calculate tokens used by images based on their size and detail level.
14
+
15
+ https://platform.openai.com/docs/guides/vision
16
+ Following OpenAI's pricing structure:
17
+ - Low detail: 85 tokens per image
18
+ - High detail: 85 tokens base + 170 tokens per 512px tile
19
+ (after scaling to fit within 2048x2048 and making shortest side 768px)
20
+ """
21
+ if not metadata.images or not metadata.images:
22
+ return 0
23
+
24
+ total_image_tokens = 0
25
+
26
+ for img in metadata.images:
27
+ if not img.width or not img.height:
28
+ continue
29
+
30
+ # If image already has precalculated tokens, use those
31
+ if img.tokens is not None:
32
+ total_image_tokens += img.tokens
33
+ continue
34
+
35
+ if img.detail is not None and img.detail != "auto":
36
+ detail = img.detail
37
+ else:
38
+ # Default to high detail for auto
39
+ # We can't determine detail level from image alone
40
+ detail = "high"
41
+ if detail == "low":
42
+ # Low detail is a fixed cost regardless of size
43
+ total_image_tokens += LOW_DETAIL_IMAGE_TOKENS
44
+ else:
45
+ # High detail calculation
46
+
47
+ # Scale to fit within 2048x2048 square
48
+ width, height = img.width, img.height
49
+ if width > 2048 or height > 2048:
50
+ aspect_ratio = width / height
51
+ if width > height:
52
+ width = 2048
53
+ height = int(width / aspect_ratio)
54
+ else:
55
+ height = 2048
56
+ width = int(height * aspect_ratio)
57
+
58
+ # Scale so shortest side is 768px
59
+ if min(width, height) > 768:
60
+ if width < height:
61
+ scale_factor = 768 / width
62
+ width = 768
63
+ height = int(height * scale_factor)
64
+ else:
65
+ scale_factor = 768 / height
66
+ height = 768
67
+ width = int(width * scale_factor)
68
+
69
+ # Calculate number of 512px tiles needed
70
+ tiles_x = (width + TILE_SIZE - 1) // TILE_SIZE
71
+ tiles_y = (height + TILE_SIZE - 1) // TILE_SIZE
72
+ num_tiles = tiles_x * tiles_y
73
+
74
+ # Calculate token cost
75
+ image_tokens = (
76
+ HIGH_DETAIL_TILE_TOKENS * num_tiles
77
+ ) + HIGH_DETAIL_BASE_TOKENS
78
+ total_image_tokens += image_tokens
79
+
80
+ return total_image_tokens
81
+
82
+
83
+ def calculate_cost(
84
+ metadata: CostMetadata,
85
+ model: str = "gpt-3.5-turbo-16k",
86
+ ) -> float | None:
87
+ """Calculate the cost of a completion using the OpenAI API.
88
+
89
+ https://openai.com/pricing
90
+
91
+ Model Input Cached Output
92
+ gpt-4.5-preview $75.00 / 1M tokens $37.50 / 1M tokens $150.00 / 1M tokens
93
+ gpt-4.5-preview-2025-02-27 $75.00 / 1M tokens $37.50 / 1M tokens $150.00 / 1M tokens
94
+ gpt-4o $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
95
+ gpt-4o-2024-11-20 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
96
+ gpt-4o-2024-08-06 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
97
+ gpt-4o-2024-05-13 $5.00 / 1M tokens $15.00 / 1M tokens
98
+ gpt-4o-audio-preview $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
99
+ gpt-4o-audio-preview-2024-12-17 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
100
+ gpt-4o-audio-preview-2024-10-01 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
101
+ gpt-4o-realtime-preview $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
102
+ gpt-4o-realtime-preview-2024-12-17 $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
103
+ gpt-4o-realtime-preview-2024-10-01 $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
104
+ gpt-4o-mini $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
105
+ gpt-4o-mini-2024-07-18 $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
106
+ gpt-4o-mini-audio-preview $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
107
+ gpt-4o-mini-audio-preview-2024-12-17 $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
108
+ gpt-4o-mini-realtime-preview $0.60 / 1M tokens $0.30 / 1M tokens $2.40 / 1M tokens
109
+ gpt-4o-mini-realtime-preview-2024-12-17 $0.60 / 1M tokens $0.30 / 1M tokens $2.40 / 1M tokens
110
+ o1 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
111
+ o1-2024-12-17 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
112
+ o1-preview $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
113
+ o1-preview-2024-09-12 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
114
+ o3-mini $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
115
+ o3-mini-2025-01-31 $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
116
+ o1-mini $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
117
+ o1-mini-2024-09-12 $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
118
+ chatgpt-4o-latest $5.00 / 1M tokens $15.00 / 1M tokens
119
+ gpt-4-turbo $10.00 / 1M tokens $30.00 / 1M tokens
120
+ gpt-4-turbo-2024-04-09 $10.00 / 1M tokens $30.00 / 1M tokens
121
+ gpt-3.5-turbo-0125 $0.50 / 1M tokens $1.50 / 1M tokens
122
+ gpt-3.5-turbo-1106 $1.00 / 1M tokens $2.00 / 1M tokens
123
+ gpt-4-0125-preview $10.00 / 1M tokens $30.00 / 1M tokens
124
+ gpt-4-1106-preview $10.00 / 1M tokens $30.00 / 1M tokens
125
+ gpt-4-vision-preview $10.00 / 1M tokens $30.00 / 1M tokens
126
+ gpt-4 $30.00 / 1M tokens $60.00 / 1M tokens
127
+ gpt-4-32k $60.00 / 1M tokens $120.00 / 1M tokens
128
+ text-embedding-3-small $0.02 / 1M tokens
129
+ text-embedding-3-large $0.13 / 1M tokens
130
+ text-embedding-ada-002 $0.10 / 1M tokens
131
+ """
132
+ pricing = {
133
+ "gpt-4.5-preview": {
134
+ "prompt": 0.000_075,
135
+ "cached": 0.000_037_5,
136
+ "completion": 0.000_15,
137
+ "batch_prompt": 0.000_037_5,
138
+ "batch_completion": 0.000_075,
139
+ },
140
+ "gpt-4.5-preview-2025-02-27": {
141
+ "prompt": 0.000_075,
142
+ "cached": 0.000_037_5,
143
+ "completion": 0.000_15,
144
+ },
145
+ "gpt-4o": {
146
+ "prompt": 0.000_002_5,
147
+ "cached": 0.000_001_25,
148
+ "completion": 0.000_01,
149
+ "batch_prompt": 0.000_001_25,
150
+ "batch_completion": 0.000_005,
151
+ },
152
+ "gpt-4o-2024-11-20": {
153
+ "prompt": 0.000_002_5,
154
+ "cached": 0.000_001_25,
155
+ "completion": 0.000_01,
156
+ },
157
+ "gpt-4o-2024-08-06": {
158
+ "prompt": 0.000_002_5,
159
+ "cached": 0.000_001_25,
160
+ "completion": 0.000_01,
161
+ },
162
+ "gpt-4o-2024-05-13": {
163
+ "prompt": 0.000_005,
164
+ "cached": 0.000_002_5,
165
+ "completion": 0.000_015,
166
+ },
167
+ "gpt-4o-audio-preview": {
168
+ "prompt": 0.000_002_5,
169
+ "cached": 0.000_001_25,
170
+ "completion": 0.000_01,
171
+ },
172
+ "gpt-4o-audio-preview-2024-12-17": {
173
+ "prompt": 0.000_002_5,
174
+ "cached": 0.000_001_25,
175
+ "completion": 0.000_01,
176
+ },
177
+ "gpt-4o-audio-preview-2024-10-01": {
178
+ "prompt": 0.000_002_5,
179
+ "cached": 0.000_001_25,
180
+ "completion": 0.000_01,
181
+ },
182
+ "gpt-4o-realtime-preview": {
183
+ "prompt": 0.000_005,
184
+ "cached": 0.000_002_5,
185
+ "completion": 0.000_02,
186
+ },
187
+ "gpt-4o-realtime-preview-2024-12-17": {
188
+ "prompt": 0.000_005,
189
+ "cached": 0.000_002_5,
190
+ "completion": 0.000_02,
191
+ },
192
+ "gpt-4o-realtime-preview-2024-10-01": {
193
+ "prompt": 0.000_005,
194
+ "cached": 0.000_002_5,
195
+ "completion": 0.000_02,
196
+ },
197
+ "gpt-4o-mini": {
198
+ "prompt": 0.000_000_15,
199
+ "cached": 0.000_000_075,
200
+ "completion": 0.000_000_6,
201
+ },
202
+ "gpt-4o-mini-2024-07-18": {
203
+ "prompt": 0.000_000_15,
204
+ "cached": 0.000_000_075,
205
+ "completion": 0.000_000_6,
206
+ },
207
+ "gpt-4o-mini-audio-preview": {
208
+ "prompt": 0.000_000_15,
209
+ "cached": 0.000_000_075,
210
+ "completion": 0.000_000_6,
211
+ },
212
+ "gpt-4o-mini-audio-preview-2024-12-17": {
213
+ "prompt": 0.000_000_15,
214
+ "cached": 0.000_000_075,
215
+ "completion": 0.000_000_6,
216
+ },
217
+ "gpt-4o-mini-realtime-preview": {
218
+ "prompt": 0.000_000_6,
219
+ "cached": 0.000_000_3,
220
+ "completion": 0.000_002_4,
221
+ },
222
+ "gpt-4o-mini-realtime-preview-2024-12-17": {
223
+ "prompt": 0.000_000_6,
224
+ "cached": 0.000_000_3,
225
+ "completion": 0.000_002_4,
226
+ },
227
+ "o1": {
228
+ "prompt": 0.000_015,
229
+ "cached": 0.000_007_5,
230
+ "completion": 0.000_06,
231
+ },
232
+ "o1-2024-12-17": {
233
+ "prompt": 0.000_015,
234
+ "cached": 0.000_007_5,
235
+ "completion": 0.000_06,
236
+ },
237
+ "o1-preview": {
238
+ "prompt": 0.000_015,
239
+ "cached": 0.000_007_5,
240
+ "completion": 0.000_06,
241
+ },
242
+ "o1-preview-2024-09-12": {
243
+ "prompt": 0.000_015,
244
+ "cached": 0.000_007_5,
245
+ "completion": 0.000_06,
246
+ },
247
+ "o3-mini": {
248
+ "prompt": 0.000_001_1,
249
+ "cached": 0.000_000_55,
250
+ "completion": 0.000_004_4,
251
+ },
252
+ "o3-mini-2025-01-31": {
253
+ "prompt": 0.000_001_1,
254
+ "cached": 0.000_000_55,
255
+ "completion": 0.000_004_4,
256
+ },
257
+ "o1-mini": {
258
+ "prompt": 0.000_001_1,
259
+ "cached": 0.000_000_55,
260
+ "completion": 0.000_004_4,
261
+ },
262
+ "o1-mini-2024-09-12": {
263
+ "prompt": 0.000_001_1,
264
+ "cached": 0.000_000_55,
265
+ "completion": 0.000_004_4,
266
+ },
267
+ "chatgpt-4o-latest": {
268
+ "prompt": 0.000_005,
269
+ "cached": 0,
270
+ "completion": 0.000_015,
271
+ },
272
+ "gpt-4-turbo": {
273
+ "prompt": 0.000_01,
274
+ "cached": 0,
275
+ "completion": 0.000_03,
276
+ },
277
+ "gpt-4-turbo-2024-04-09": {
278
+ "prompt": 0.000_01,
279
+ "cached": 0,
280
+ "completion": 0.000_03,
281
+ },
282
+ "gpt-3.5-turbo-0125": {
283
+ "prompt": 0.000_000_5,
284
+ "cached": 0,
285
+ "completion": 0.000_001_5,
286
+ },
287
+ "gpt-3.5-turbo-1106": {
288
+ "prompt": 0.000_001,
289
+ "cached": 0,
290
+ "completion": 0.000_002,
291
+ },
292
+ "gpt-4-0125-preview": {
293
+ "prompt": 0.000_01,
294
+ "cached": 0,
295
+ "completion": 0.000_03,
296
+ },
297
+ "gpt-4-1106-preview": {
298
+ "prompt": 0.000_01,
299
+ "cached": 0,
300
+ "completion": 0.000_03,
301
+ },
302
+ "gpt-4-vision-preview": {
303
+ "prompt": 0.000_01,
304
+ "cached": 0,
305
+ "completion": 0.000_03,
306
+ },
307
+ "gpt-4": {
308
+ "prompt": 0.000_03,
309
+ "cached": 0,
310
+ "completion": 0.000_06,
311
+ },
312
+ "gpt-4-32k": {
313
+ "prompt": 0.000_06,
314
+ "cached": 0,
315
+ "completion": 0.000_12,
316
+ },
317
+ "gpt-3.5-turbo-4k": {
318
+ "prompt": 0.000_015,
319
+ "cached": 0,
320
+ "completion": 0.000_02,
321
+ },
322
+ "gpt-3.5-turbo-16k": {
323
+ "prompt": 0.000_003,
324
+ "cached": 0,
325
+ "completion": 0.000_004,
326
+ },
327
+ "gpt-4-8k": {
328
+ "prompt": 0.000_03,
329
+ "cached": 0,
330
+ "completion": 0.000_06,
331
+ },
332
+ "text-embedding-3-small": {
333
+ "prompt": 0.000_000_02,
334
+ "cached": 0,
335
+ "completion": 0,
336
+ "batch_prompt": 0.000_000_01,
337
+ },
338
+ "text-embedding-ada-002": {
339
+ "prompt": 0.000_000_1,
340
+ "cached": 0,
341
+ "completion": 0,
342
+ "batch_prompt": 0.000_000_05,
343
+ },
344
+ "text-embedding-3-large": {
345
+ "prompt": 0.000_000_13,
346
+ "cached": 0,
347
+ "completion": 0,
348
+ "batch_prompt": 0.000_000_065,
349
+ },
350
+ }
351
+
352
+ # Audio pricing for audio models (per-minute rates in dollars)
353
+
354
+ if metadata.cost is not None:
355
+ return metadata.cost
356
+
357
+ # Audio input/output costs
358
+ # ChatCompletion.usage has brake down of audio input and output.
359
+ # The total cost already includes the audio input/output cost.
360
+
361
+ # Initialize cached tokens if not provided
362
+ if metadata.cached_tokens is None:
363
+ metadata.cached_tokens = 0
364
+
365
+ # Try to get model pricing
366
+ try:
367
+ model_pricing = pricing[model]
368
+ except KeyError:
369
+ return None
370
+
371
+ image_tokens = _calculate_image_tokens(metadata) or 0
372
+
373
+ input_tokens = (metadata.input_tokens or 0) + image_tokens
374
+
375
+ # Calculate costs for each component
376
+ prompt_cost = input_tokens * model_pricing["prompt"]
377
+ cached_cost = metadata.cached_tokens * model_pricing["cached"]
378
+ completion_cost = (metadata.output_tokens or 0) * model_pricing["completion"]
379
+
380
+ # Special handling for embedding models (only input tokens matter)
381
+ if "embedding" in model:
382
+ total_cost = prompt_cost
383
+ else:
384
+ total_cost = prompt_cost + cached_cost + completion_cost
385
+
386
+ # Apply batch discounts if applicable
387
+ if metadata.batch_mode:
388
+ # Based on the OpenAI pricing table, batch mode typically provides
389
+ # approximately 50% discount for both input and output tokens
390
+ if "embedding" in model.lower():
391
+ # Embedding models have specific batch pricing
392
+ if model == "text-embedding-3-small":
393
+ prompt_cost = (
394
+ input_tokens * 0.000_000_01
395
+ ) # $0.01 per 1M tokens in batch mode
396
+ elif model == "text-embedding-3-large":
397
+ prompt_cost = (
398
+ input_tokens * 0.000_000_065
399
+ ) # $0.065 per 1M tokens in batch mode
400
+ elif model == "text-embedding-ada-002":
401
+ prompt_cost = (
402
+ input_tokens * 0.000_000_05
403
+ ) # $0.05 per 1M tokens in batch mode
404
+ else:
405
+ # For LLM models, typically 50% discount
406
+ prompt_cost *= 0.5
407
+ cached_cost *= 0.5
408
+ completion_cost *= 0.5
409
+
410
+ # Recalculate total cost with batch pricing
411
+ if "embedding" in model:
412
+ total_cost = prompt_cost
413
+ else:
414
+ total_cost = prompt_cost + cached_cost + completion_cost
415
+
416
+ return total_cost
@@ -1,12 +1,11 @@
1
1
  """Calculate the cost of a completion using the Vertex AI Gemini API, considering context window size."""
2
2
 
3
+ from ..base.types import CostMetadata
4
+
3
5
 
4
6
  def calculate_cost(
5
- input_chars: int | float | None,
6
- cached_chars: int | float | None,
7
- output_chars: int | float | None,
7
+ metadata: CostMetadata,
8
8
  model: str = "gemini-1.5-pro",
9
- context_length: int = 0,
10
9
  ) -> float | None:
11
10
  """Calculate the cost of a completion using the Vertex AI Gemini API.
12
11
 
@@ -19,6 +18,8 @@ def calculate_cost(
19
18
 
20
19
  Note: Prices are per 1k characters. Gemini 1.0 Pro only supports up to 32K context window.
21
20
  """
21
+
22
+ context_length = metadata.context_length or 0
22
23
  pricing = {
23
24
  "gemini-1.5-flash": {
24
25
  "prompt_short": 0.000_018_75,
@@ -40,7 +41,7 @@ def calculate_cost(
40
41
  },
41
42
  }
42
43
 
43
- if input_chars is None or output_chars is None:
44
+ if metadata.input_tokens is None or metadata.output_tokens is None:
44
45
  return None
45
46
 
46
47
  try:
@@ -59,8 +60,8 @@ def calculate_cost(
59
60
  "completion_long" if use_long_context else "completion_short"
60
61
  ]
61
62
 
62
- prompt_cost = (input_chars / 1000) * prompt_price
63
- completion_cost = (output_chars / 1000) * completion_price
63
+ prompt_cost = (metadata.input_tokens / 1000) * prompt_price
64
+ completion_cost = (metadata.output_tokens / 1000) * completion_price
64
65
  total_cost = prompt_cost + completion_cost
65
66
 
66
67
  return total_cost
@@ -1,10 +1,10 @@
1
1
  """Calculate the cost of a Grok API call."""
2
2
 
3
+ from ..base.types import CostMetadata
4
+
3
5
 
4
6
  def calculate_cost(
5
- input_tokens: int | float | None,
6
- cached_tokens: int | float | None,
7
- output_tokens: int | float | None,
7
+ metadata: CostMetadata,
8
8
  model: str,
9
9
  ) -> float | None:
10
10
  """Calculate the cost of an xAI Grok API call.
@@ -81,11 +81,11 @@ def calculate_cost(
81
81
  },
82
82
  }
83
83
 
84
- if input_tokens is None or output_tokens is None:
84
+ if metadata.input_tokens is None or metadata.output_tokens is None:
85
85
  return None
86
86
 
87
- if cached_tokens is None:
88
- cached_tokens = 0
87
+ if metadata.cached_tokens is None:
88
+ metadata.cached_tokens = 0
89
89
 
90
90
  try:
91
91
  model_pricing = pricing[model]
@@ -96,9 +96,9 @@ def calculate_cost(
96
96
  cached_price = model_pricing["cached"]
97
97
  completion_price = model_pricing["completion"]
98
98
 
99
- prompt_cost = input_tokens * prompt_price
100
- cached_cost = cached_tokens * cached_price
101
- completion_cost = output_tokens * completion_price
99
+ prompt_cost = metadata.input_tokens * prompt_price
100
+ cached_cost = metadata.cached_tokens * cached_price
101
+ completion_cost = metadata.output_tokens * completion_price
102
102
  total_cost = prompt_cost + cached_cost + completion_cost
103
103
 
104
104
  return total_cost
@@ -0,0 +1,86 @@
1
+ """Cost calculation utilities for LLM API calls."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ..base.types import CostMetadata, Provider
6
+ from ._anthropic_calculate_cost import (
7
+ calculate_cost as anthropic_calculate_cost,
8
+ )
9
+ from ._azure_calculate_cost import calculate_cost as azure_calculate_cost
10
+ from ._bedrock_calculate_cost import calculate_cost as bedrock_calculate_cost
11
+ from ._cohere_calculate_cost import calculate_cost as cohere_calculate_cost
12
+ from ._gemini_calculate_cost import calculate_cost as gemini_calculate_cost
13
+ from ._google_calculate_cost import calculate_cost as google_calculate_cost
14
+ from ._groq_calculate_cost import calculate_cost as groq_calculate_cost
15
+ from ._litellm_calculate_cost import calculate_cost as litellm_calculate_cost
16
+ from ._mistral_calculate_cost import calculate_cost as mistral_calculate_cost
17
+ from ._openai_calculate_cost import calculate_cost as openai_calculate_cost
18
+ from ._vertex_calculate_cost import calculate_cost as vertex_calculate_cost
19
+ from ._xai_calculate_cost import calculate_cost as xai_calculate_cost
20
+
21
+
22
+ def calculate_cost(
23
+ provider: Provider,
24
+ model: str,
25
+ metadata: CostMetadata | None = None,
26
+ ) -> float | None:
27
+ """Calculate the cost for an LLM API call.
28
+
29
+ This function routes to the appropriate provider-specific cost calculation function,
30
+ preserving existing behavior while providing a unified interface.
31
+
32
+ Args:
33
+ provider: The LLM provider (e.g., "openai", "anthropic")
34
+ model: The model name (e.g., "gpt-4", "claude-3-opus")
35
+ metadata: Additional metadata required for cost calculation
36
+
37
+ Returns:
38
+ The calculated cost in USD or None if unable to calculate
39
+ """
40
+
41
+ # Initialize empty metadata if none provided
42
+ if metadata is None:
43
+ metadata = CostMetadata()
44
+
45
+ # Set default values
46
+ if metadata.cached_tokens is None:
47
+ metadata.cached_tokens = 0
48
+
49
+ # Route to provider-specific implementations
50
+ if provider == "openai":
51
+ return openai_calculate_cost(metadata, model)
52
+
53
+ elif provider == "anthropic":
54
+ return anthropic_calculate_cost(metadata, model)
55
+
56
+ elif provider == "azure":
57
+ return azure_calculate_cost(metadata, model)
58
+
59
+ elif provider == "bedrock":
60
+ return bedrock_calculate_cost(metadata, model)
61
+
62
+ elif provider == "cohere":
63
+ return cohere_calculate_cost(metadata, model)
64
+
65
+ elif provider == "gemini":
66
+ return gemini_calculate_cost(metadata, model)
67
+
68
+ elif provider == "google":
69
+ return google_calculate_cost(metadata, model)
70
+
71
+ elif provider == "groq":
72
+ return groq_calculate_cost(metadata, model)
73
+
74
+ elif provider == "mistral":
75
+ return mistral_calculate_cost(metadata, model)
76
+
77
+ elif provider == "vertex":
78
+ return vertex_calculate_cost(metadata, model)
79
+
80
+ elif provider == "xai":
81
+ return xai_calculate_cost(metadata, model)
82
+
83
+ elif provider == "litellm":
84
+ return litellm_calculate_cost(metadata, model)
85
+ else:
86
+ raise ValueError(f"Unsupported provider: {provider}")
@@ -1,13 +1,11 @@
1
1
  """Gemini utilities for decorator factories."""
2
2
 
3
- from ._calculate_cost import calculate_cost
4
3
  from ._convert_message_params import convert_message_params
5
4
  from ._get_json_output import get_json_output
6
5
  from ._handle_stream import handle_stream, handle_stream_async
7
6
  from ._setup_call import setup_call
8
7
 
9
8
  __all__ = [
10
- "calculate_cost",
11
9
  "convert_message_params",
12
10
  "get_json_output",
13
11
  "handle_stream",