mirascope 1.19.0__py3-none-any.whl → 1.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. mirascope/__init__.py +4 -0
  2. mirascope/beta/openai/realtime/realtime.py +7 -8
  3. mirascope/beta/openai/realtime/tool.py +2 -2
  4. mirascope/core/__init__.py +8 -1
  5. mirascope/core/anthropic/_utils/__init__.py +0 -2
  6. mirascope/core/anthropic/_utils/_convert_message_params.py +1 -7
  7. mirascope/core/anthropic/_utils/_message_param_converter.py +48 -31
  8. mirascope/core/anthropic/call_response.py +7 -9
  9. mirascope/core/anthropic/call_response_chunk.py +10 -0
  10. mirascope/core/anthropic/stream.py +6 -8
  11. mirascope/core/azure/_utils/__init__.py +0 -2
  12. mirascope/core/azure/call_response.py +7 -10
  13. mirascope/core/azure/call_response_chunk.py +6 -1
  14. mirascope/core/azure/stream.py +6 -8
  15. mirascope/core/base/__init__.py +2 -1
  16. mirascope/core/base/_utils/__init__.py +2 -0
  17. mirascope/core/base/_utils/_get_image_dimensions.py +39 -0
  18. mirascope/core/base/call_response.py +36 -6
  19. mirascope/core/base/call_response_chunk.py +15 -1
  20. mirascope/core/base/stream.py +25 -3
  21. mirascope/core/base/types.py +276 -2
  22. mirascope/core/bedrock/_utils/__init__.py +0 -2
  23. mirascope/core/bedrock/call_response.py +7 -10
  24. mirascope/core/bedrock/call_response_chunk.py +6 -0
  25. mirascope/core/bedrock/stream.py +6 -10
  26. mirascope/core/cohere/_utils/__init__.py +0 -2
  27. mirascope/core/cohere/call_response.py +7 -10
  28. mirascope/core/cohere/call_response_chunk.py +6 -0
  29. mirascope/core/cohere/stream.py +5 -8
  30. mirascope/core/costs/__init__.py +5 -0
  31. mirascope/core/{anthropic/_utils/_calculate_cost.py → costs/_anthropic_calculate_cost.py} +45 -14
  32. mirascope/core/{azure/_utils/_calculate_cost.py → costs/_azure_calculate_cost.py} +3 -3
  33. mirascope/core/{bedrock/_utils/_calculate_cost.py → costs/_bedrock_calculate_cost.py} +3 -3
  34. mirascope/core/{cohere/_utils/_calculate_cost.py → costs/_cohere_calculate_cost.py} +12 -8
  35. mirascope/core/{gemini/_utils/_calculate_cost.py → costs/_gemini_calculate_cost.py} +7 -7
  36. mirascope/core/costs/_google_calculate_cost.py +427 -0
  37. mirascope/core/costs/_groq_calculate_cost.py +156 -0
  38. mirascope/core/costs/_litellm_calculate_cost.py +11 -0
  39. mirascope/core/costs/_mistral_calculate_cost.py +64 -0
  40. mirascope/core/costs/_openai_calculate_cost.py +416 -0
  41. mirascope/core/{vertex/_utils/_calculate_cost.py → costs/_vertex_calculate_cost.py} +8 -7
  42. mirascope/core/{xai/_utils/_calculate_cost.py → costs/_xai_calculate_cost.py} +9 -9
  43. mirascope/core/costs/calculate_cost.py +86 -0
  44. mirascope/core/gemini/_utils/__init__.py +0 -2
  45. mirascope/core/gemini/call_response.py +7 -10
  46. mirascope/core/gemini/call_response_chunk.py +6 -1
  47. mirascope/core/gemini/stream.py +5 -8
  48. mirascope/core/google/_utils/__init__.py +0 -2
  49. mirascope/core/google/_utils/_setup_call.py +21 -2
  50. mirascope/core/google/call_response.py +9 -10
  51. mirascope/core/google/call_response_chunk.py +6 -1
  52. mirascope/core/google/stream.py +5 -8
  53. mirascope/core/groq/_utils/__init__.py +0 -2
  54. mirascope/core/groq/call_response.py +22 -10
  55. mirascope/core/groq/call_response_chunk.py +6 -0
  56. mirascope/core/groq/stream.py +5 -8
  57. mirascope/core/litellm/call_response.py +3 -4
  58. mirascope/core/litellm/stream.py +30 -22
  59. mirascope/core/mistral/_utils/__init__.py +0 -2
  60. mirascope/core/mistral/call_response.py +7 -10
  61. mirascope/core/mistral/call_response_chunk.py +6 -0
  62. mirascope/core/mistral/stream.py +5 -8
  63. mirascope/core/openai/_utils/__init__.py +0 -2
  64. mirascope/core/openai/_utils/_convert_message_params.py +4 -4
  65. mirascope/core/openai/call_response.py +30 -10
  66. mirascope/core/openai/call_response_chunk.py +6 -0
  67. mirascope/core/openai/stream.py +5 -8
  68. mirascope/core/vertex/_utils/__init__.py +0 -2
  69. mirascope/core/vertex/call_response.py +5 -10
  70. mirascope/core/vertex/call_response_chunk.py +6 -0
  71. mirascope/core/vertex/stream.py +5 -8
  72. mirascope/core/xai/_utils/__init__.py +1 -2
  73. mirascope/core/xai/call_response.py +0 -11
  74. mirascope/llm/__init__.py +9 -2
  75. mirascope/llm/_protocols.py +8 -28
  76. mirascope/llm/call_response.py +6 -6
  77. mirascope/llm/call_response_chunk.py +12 -3
  78. mirascope/llm/llm_call.py +21 -23
  79. mirascope/llm/llm_override.py +56 -27
  80. mirascope/llm/stream.py +7 -7
  81. mirascope/llm/tool.py +1 -1
  82. mirascope/retries/fallback.py +1 -1
  83. {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/METADATA +1 -1
  84. {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/RECORD +86 -82
  85. mirascope/core/google/_utils/_calculate_cost.py +0 -215
  86. mirascope/core/groq/_utils/_calculate_cost.py +0 -69
  87. mirascope/core/mistral/_utils/_calculate_cost.py +0 -48
  88. mirascope/core/openai/_utils/_calculate_cost.py +0 -246
  89. {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/WHEEL +0 -0
  90. {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,10 @@
1
1
  """Calculate the cost of a completion using the Cohere API."""
2
2
 
3
+ from ..base.types import CostMetadata
4
+
3
5
 
4
6
  def calculate_cost(
5
- input_tokens: int | float | None,
6
- cached_tokens: int | float | None,
7
- output_tokens: int | float | None,
7
+ metadata: CostMetadata,
8
8
  model: str = "command-r-plus",
9
9
  ) -> float | None:
10
10
  """Calculate the cost of a completion using the Cohere API.
@@ -12,8 +12,8 @@ def calculate_cost(
12
12
  https://cohere.com/pricing
13
13
 
14
14
  Model Input Cached Output
15
- command-r $0.5 / 1M tokens $1.5 / 1M tokens
16
- command-r-plus $3 / 1M tokens $15 / 1M tokens
15
+ command-r $0.15 / 1M tokens $0.6 / 1M tokens
16
+ command-r-plus $2.5 / 1M tokens $10 / 1M tokens
17
17
  """
18
18
  pricing = {
19
19
  "command-r": {
@@ -24,8 +24,12 @@ def calculate_cost(
24
24
  "prompt": 0.000_003,
25
25
  "completion": 0.000_015,
26
26
  },
27
+ "command-r7b-12-2024": {
28
+ "prompt": 0.000_000_375,
29
+ "completion": 0.000_001_5,
30
+ },
27
31
  }
28
- if input_tokens is None or output_tokens is None:
32
+ if metadata.input_tokens is None or metadata.output_tokens is None:
29
33
  return None
30
34
 
31
35
  try:
@@ -33,8 +37,8 @@ def calculate_cost(
33
37
  except KeyError:
34
38
  return None
35
39
 
36
- prompt_cost = input_tokens * model_pricing["prompt"]
37
- completion_cost = output_tokens * model_pricing["completion"]
40
+ prompt_cost = metadata.input_tokens * model_pricing["prompt"]
41
+ completion_cost = metadata.output_tokens * model_pricing["completion"]
38
42
  total_cost = prompt_cost + completion_cost
39
43
 
40
44
  return total_cost
@@ -1,10 +1,10 @@
1
1
  """Calculate the cost of a Gemini API call."""
2
2
 
3
+ from ..base.types import CostMetadata
4
+
3
5
 
4
6
  def calculate_cost(
5
- input_tokens: int | float | None,
6
- cached_tokens: int | float | None,
7
- output_tokens: int | float | None,
7
+ metadata: CostMetadata,
8
8
  model: str,
9
9
  ) -> float | None:
10
10
  """Calculate the cost of a Gemini API call.
@@ -44,7 +44,7 @@ def calculate_cost(
44
44
  },
45
45
  }
46
46
 
47
- if input_tokens is None or output_tokens is None:
47
+ if metadata.input_tokens is None or metadata.output_tokens is None:
48
48
  return None
49
49
 
50
50
  try:
@@ -53,15 +53,15 @@ def calculate_cost(
53
53
  return None
54
54
 
55
55
  # Determine if we're using long context pricing
56
- use_long_context = input_tokens > 128_000
56
+ use_long_context = metadata.input_tokens > 128_000
57
57
 
58
58
  prompt_price = model_pricing["prompt_long" if use_long_context else "prompt_short"]
59
59
  completion_price = model_pricing[
60
60
  "completion_long" if use_long_context else "completion_short"
61
61
  ]
62
62
 
63
- prompt_cost = input_tokens * prompt_price
64
- completion_cost = output_tokens * completion_price
63
+ prompt_cost = metadata.input_tokens * prompt_price
64
+ completion_cost = metadata.output_tokens * completion_price
65
65
  total_cost = prompt_cost + completion_cost
66
66
 
67
67
  return total_cost
@@ -0,0 +1,427 @@
1
+ """Calculate the cost of a Gemini API call."""
2
+
3
+ from ..base.types import CostMetadata
4
+
5
+ # Standard Gemini API pricing table
6
+ GEMINI_API_PRICING: dict[str, dict[str, float]] = {
7
+ "gemini-2.0-pro": {
8
+ "prompt_short": 0.000_001_25,
9
+ "completion_short": 0.000_005,
10
+ "prompt_long": 0.000_002_5,
11
+ "completion_long": 0.000_01,
12
+ "cached": 0.000_000_625,
13
+ },
14
+ "gemini-2.0-pro-preview-1206": {
15
+ "prompt_short": 0.000_001_25,
16
+ "completion_short": 0.000_005,
17
+ "prompt_long": 0.000_002_5,
18
+ "completion_long": 0.000_01,
19
+ "cached": 0.000_000_625,
20
+ },
21
+ "gemini-2.0-flash": {
22
+ "prompt_short": 0.000_000_10,
23
+ "completion_short": 0.000_000_40,
24
+ "prompt_long": 0.000_000_10,
25
+ "completion_long": 0.000_000_40,
26
+ "cached": 0.000_000_037_5,
27
+ },
28
+ "gemini-2.0-flash-latest": {
29
+ "prompt_short": 0.000_000_10,
30
+ "completion_short": 0.000_000_40,
31
+ "prompt_long": 0.000_000_10,
32
+ "completion_long": 0.000_000_40,
33
+ "cached": 0.000_000_037_5,
34
+ },
35
+ "gemini-2.0-flash-001": {
36
+ "prompt_short": 0.000_000_10,
37
+ "completion_short": 0.000_000_40,
38
+ "prompt_long": 0.000_000_10,
39
+ "completion_long": 0.000_000_40,
40
+ "cached": 0.000_000_037_5,
41
+ },
42
+ "gemini-2.0-flash-lite": {
43
+ "prompt_short": 0.000_000_075,
44
+ "completion_short": 0.000_000_30,
45
+ "prompt_long": 0.000_000_075,
46
+ "completion_long": 0.000_000_30,
47
+ "cached": 0.000_000_037_5,
48
+ },
49
+ "gemini-2.0-flash-lite-preview-02-05": {
50
+ "prompt_short": 0.000_000_075,
51
+ "completion_short": 0.000_000_30,
52
+ "prompt_long": 0.000_000_075,
53
+ "completion_long": 0.000_000_30,
54
+ "cached": 0.000_000_037_5,
55
+ },
56
+ "gemini-1.5-pro": {
57
+ "prompt_short": 0.000_001_25,
58
+ "completion_short": 0.000_005,
59
+ "prompt_long": 0.000_002_5,
60
+ "completion_long": 0.000_01,
61
+ "cached": 0.000_000_625,
62
+ },
63
+ "gemini-1.5-pro-latest": {
64
+ "prompt_short": 0.000_001_25,
65
+ "completion_short": 0.000_005,
66
+ "prompt_long": 0.000_002_5,
67
+ "completion_long": 0.000_01,
68
+ "cached": 0.000_000_625,
69
+ },
70
+ "gemini-1.5-pro-001": {
71
+ "prompt_short": 0.000_001_25,
72
+ "completion_short": 0.000_005,
73
+ "prompt_long": 0.000_002_5,
74
+ "completion_long": 0.000_01,
75
+ "cached": 0.000_000_625,
76
+ },
77
+ "gemini-1.5-pro-002": {
78
+ "prompt_short": 0.000_001_25,
79
+ "completion_short": 0.000_005,
80
+ "prompt_long": 0.000_002_5,
81
+ "completion_long": 0.000_01,
82
+ "cached": 0.000_000_625,
83
+ },
84
+ "gemini-1.5-flash": {
85
+ "prompt_short": 0.000_000_075,
86
+ "completion_short": 0.000_000_30,
87
+ "prompt_long": 0.000_000_15,
88
+ "completion_long": 0.000_000_60,
89
+ "cached": 0.000_000_037_5,
90
+ },
91
+ "gemini-1.5-flash-latest": {
92
+ "prompt_short": 0.000_000_075,
93
+ "completion_short": 0.000_000_30,
94
+ "prompt_long": 0.000_000_15,
95
+ "completion_long": 0.000_000_60,
96
+ "cached": 0.000_000_037_5,
97
+ },
98
+ "gemini-1.5-flash-001": {
99
+ "prompt_short": 0.000_000_075,
100
+ "completion_short": 0.000_000_30,
101
+ "prompt_long": 0.000_000_15,
102
+ "completion_long": 0.000_000_60,
103
+ "cached": 0.000_000_037_5,
104
+ },
105
+ "gemini-1.5-flash-002": {
106
+ "prompt_short": 0.000_000_075,
107
+ "completion_short": 0.000_000_30,
108
+ "prompt_long": 0.000_000_15,
109
+ "completion_long": 0.000_000_60,
110
+ "cached": 0.000_000_037_5,
111
+ },
112
+ "gemini-1.5-flash-8b": {
113
+ "prompt_short": 0.000_000_037_5,
114
+ "completion_short": 0.000_000_15,
115
+ "prompt_long": 0.000_000_075,
116
+ "completion_long": 0.000_000_30,
117
+ "cached": 0.000_000_025,
118
+ },
119
+ "gemini-1.5-flash-8b-latest": {
120
+ "prompt_short": 0.000_000_037_5,
121
+ "completion_short": 0.000_000_15,
122
+ "prompt_long": 0.000_000_075,
123
+ "completion_long": 0.000_000_30,
124
+ "cached": 0.000_000_025,
125
+ },
126
+ "gemini-1.5-flash-8b-001": {
127
+ "prompt_short": 0.000_000_037_5,
128
+ "completion_short": 0.000_000_15,
129
+ "prompt_long": 0.000_000_075,
130
+ "completion_long": 0.000_000_30,
131
+ "cached": 0.000_000_025,
132
+ },
133
+ "gemini-1.5-flash-8b-002": {
134
+ "prompt_short": 0.000_000_037_5,
135
+ "completion_short": 0.000_000_15,
136
+ "prompt_long": 0.000_000_075,
137
+ "completion_long": 0.000_000_30,
138
+ "cached": 0.000_000_025,
139
+ },
140
+ "gemini-1.0-pro": {
141
+ "prompt_short": 0.000_000_5,
142
+ "completion_short": 0.000_001_5,
143
+ "prompt_long": 0.000_000_5,
144
+ "completion_long": 0.000_001_5,
145
+ "cached": 0.000_000,
146
+ },
147
+ }
148
+
149
+ # Vertex AI pricing table
150
+ VERTEX_AI_PRICING: dict[str, dict[str, float]] = {
151
+ "gemini-2.0-flash": {
152
+ "text_input": 0.000_000_15,
153
+ "image_input": 0.000_000_15,
154
+ "video_input": 0.000_000_15,
155
+ "audio_input": 0.000_001_00,
156
+ "output": 0.000_000_60,
157
+ "cached": 0.000_000_037_5,
158
+ "cache_storage_per_hour": 0.000_001_00,
159
+ },
160
+ "gemini-2.0-flash-lite": {
161
+ "text_input": 0.000_000_075,
162
+ "image_input": 0.000_000_075,
163
+ "video_input": 0.000_000_075,
164
+ "audio_input": 0.000_000_075,
165
+ "output": 0.000_000_30,
166
+ "cached": 0.000_000_037_5,
167
+ "cache_storage_per_hour": 0.000_001_00,
168
+ },
169
+ # Vertex AI pricing for Gemini 1.5 models is based on modalities rather than tokens
170
+ "gemini-1.5-flash": {
171
+ "text_input": 0.000_000_075, # per 1K chars (approx. 250 tokens)
172
+ "image_input": 0.000_02, # per image
173
+ "video_input": 0.000_02, # per second
174
+ "audio_input": 0.000_002, # per second
175
+ "output": 0.000_000_30, # per 1K chars
176
+ "cached_text": 0.000_000_046_875, # per 1K chars
177
+ "cached_image": 0.000_005, # per image
178
+ "cached_video": 0.000_005, # per second
179
+ "cached_audio": 0.000_000_5, # per second
180
+ "cache_storage_text": 0.000_25, # per 1K chars per hour
181
+ "cache_storage_image": 0.000_263, # per image per hour
182
+ "cache_storage_video": 0.000_263, # per second per hour
183
+ "cache_storage_audio": 0.000_025, # per second per hour
184
+ },
185
+ "gemini-1.5-pro": {
186
+ "text_input": 0.000_001_25, # per 1K chars (approx. 250 tokens)
187
+ "image_input": 0.000_32875, # per image
188
+ "video_input": 0.000_32875, # per second
189
+ "audio_input": 0.000_03125, # per second
190
+ "output": 0.000_005, # per 1K chars
191
+ "cached_text": 0.000_000_078125, # per 1K chars
192
+ "cached_image": 0.000_0821875, # per image
193
+ "cached_video": 0.000_0821875, # per second
194
+ "cached_audio": 0.000_0078125, # per second
195
+ "cache_storage_text": 0.001125, # per 1K chars per hour
196
+ "cache_storage_image": 0.0011835, # per image per hour
197
+ "cache_storage_video": 0.0011835, # per second per hour
198
+ "cache_storage_audio": 0.0001125, # per second per hour
199
+ },
200
+ }
201
+
202
+
203
+ def _calculate_context_cache_cost(
204
+ metadata: CostMetadata,
205
+ model_pricing: dict[str, float],
206
+ model: str,
207
+ use_vertex_ai: bool = False,
208
+ ) -> float:
209
+ """Calculate cost for context caching."""
210
+ if metadata.context_cache_tokens is None or metadata.context_cache_hours is None:
211
+ return 0.0
212
+
213
+ if use_vertex_ai:
214
+ # Vertex AI pricing depends on the model family
215
+ if model.startswith("gemini-2.0"):
216
+ return (
217
+ metadata.context_cache_tokens
218
+ * model_pricing.get("cache_storage_per_hour", 0)
219
+ * metadata.context_cache_hours
220
+ )
221
+ elif model.startswith("gemini-1.5"):
222
+ # Convert cache tokens to characters (approx)
223
+ cache_chars = metadata.context_cache_tokens * 4
224
+ return (
225
+ (cache_chars / 1000)
226
+ * model_pricing["cache_storage_text"]
227
+ * metadata.context_cache_hours
228
+ )
229
+
230
+ # Standard Gemini API pricing - storage cost per token-hour
231
+ storage_rate_per_token = 0.000001 # $1.00 per million tokens per hour
232
+ if "flash-8b" in model:
233
+ storage_rate_per_token = 0.00000025 # $0.25 per million tokens for 8B models
234
+
235
+ return (
236
+ metadata.context_cache_tokens
237
+ * storage_rate_per_token
238
+ * metadata.context_cache_hours
239
+ )
240
+
241
+
242
+ def _calculate_grounding_cost(metadata: CostMetadata, model: str) -> float:
243
+ """Calculate cost for grounding requests."""
244
+ if metadata.google is None or metadata.google.grounding_requests is None:
245
+ return 0.0
246
+
247
+ # First 1,500 requests per day are free for Gemini 2.0 Flash models in Vertex AI
248
+ if (
249
+ model == "gemini-2.0-flash"
250
+ and metadata.google.use_vertex_ai
251
+ and metadata.google.grounding_requests <= 1500
252
+ ):
253
+ return 0.0
254
+
255
+ # $35 per 1,000 requests for excess
256
+ if metadata.google.use_vertex_ai and model == "gemini-2.0-flash":
257
+ excess_requests = max(0, metadata.google.grounding_requests - 1500)
258
+ else:
259
+ excess_requests = metadata.google.grounding_requests
260
+
261
+ return (excess_requests / 1000) * 35.0
262
+
263
+
264
+ def _calculate_vertex_2_0_cost(
265
+ metadata: CostMetadata, model_pricing: dict[str, float], model: str
266
+ ) -> float:
267
+ """Calculate cost for Vertex AI's Gemini 2.0 models."""
268
+ # Text tokens cost
269
+ prompt_cost = (metadata.input_tokens or 0) * model_pricing["text_input"]
270
+ completion_cost = (metadata.output_tokens or 0) * model_pricing["output"]
271
+ cached_cost = (metadata.cached_tokens or 0) * model_pricing.get("cached", 0)
272
+
273
+ # Context cache costs
274
+ context_cache_cost = _calculate_context_cache_cost(
275
+ metadata, model_pricing, model, use_vertex_ai=True
276
+ )
277
+
278
+ # Grounding costs
279
+ grounding_cost = _calculate_grounding_cost(metadata, model)
280
+
281
+ # Apply batch mode discount (50% for Vertex AI)
282
+ if metadata.batch_mode:
283
+ prompt_cost *= 0.5
284
+ completion_cost *= 0.5
285
+ context_cache_cost *= 0.5
286
+ # Note: We don't discount grounding costs
287
+
288
+ total_cost = (
289
+ prompt_cost
290
+ + completion_cost
291
+ + cached_cost
292
+ + context_cache_cost
293
+ + grounding_cost
294
+ )
295
+
296
+ return total_cost
297
+
298
+
299
+ def _calculate_vertex_1_5_cost(
300
+ metadata: CostMetadata, model_pricing: dict[str, float], model: str
301
+ ) -> float:
302
+ """Calculate cost for Vertex AI's Gemini 1.5 models."""
303
+ # Text cost - convert tokens to characters (approx 4 chars per token)
304
+ text_chars = (metadata.input_tokens or 0) * 4 # Approximation
305
+ text_cost = (text_chars / 1000) * model_pricing["text_input"]
306
+
307
+ # Output cost
308
+ output_chars = (metadata.output_tokens or 0) * 4 # Approximation
309
+ output_cost = (output_chars / 1000) * model_pricing["output"]
310
+
311
+ # Context cache costs
312
+ context_cache_cost = _calculate_context_cache_cost(
313
+ metadata, model_pricing, model, use_vertex_ai=True
314
+ )
315
+
316
+ # Grounding costs
317
+ grounding_cost = _calculate_grounding_cost(metadata, model)
318
+
319
+ # Apply batch mode discount if applicable (50% off for Vertex AI)
320
+ if metadata.batch_mode:
321
+ text_cost *= 0.5
322
+ output_cost *= 0.5
323
+ context_cache_cost *= 0.5
324
+ # Note: We don't discount grounding costs
325
+
326
+ total_cost = text_cost + output_cost + context_cache_cost + grounding_cost
327
+
328
+ return total_cost
329
+
330
+
331
+ def _calculate_standard_gemini_cost(
332
+ metadata: CostMetadata,
333
+ model_pricing: dict[str, float],
334
+ model: str,
335
+ use_long_context: bool,
336
+ ) -> float:
337
+ """Calculate cost for standard Gemini API."""
338
+ # Determine prices based on context length
339
+ prompt_price = model_pricing["prompt_long" if use_long_context else "prompt_short"]
340
+ cached_price = model_pricing["cached"]
341
+ completion_price = model_pricing[
342
+ "completion_long" if use_long_context else "completion_short"
343
+ ]
344
+
345
+ # Basic token costs
346
+ prompt_cost = (metadata.input_tokens or 0) * prompt_price
347
+ cached_cost = (metadata.cached_tokens or 0) * cached_price
348
+ completion_cost = (metadata.output_tokens or 0) * completion_price
349
+
350
+ # Media token costs is included in the prompt/completion cost
351
+
352
+ # Context cache costs
353
+ context_cache_cost = _calculate_context_cache_cost(
354
+ metadata, model_pricing, model, use_vertex_ai=False
355
+ )
356
+
357
+ # Grounding costs - only applies to certain models
358
+ grounding_cost = _calculate_grounding_cost(metadata, model)
359
+
360
+ total_cost = (
361
+ prompt_cost
362
+ + cached_cost
363
+ + completion_cost
364
+ + context_cache_cost
365
+ + grounding_cost
366
+ )
367
+
368
+ return total_cost
369
+
370
+
371
+ def calculate_cost(
372
+ metadata: CostMetadata,
373
+ model: str,
374
+ ) -> float | None:
375
+ """Calculate the cost of a Google API call.
376
+
377
+ This function supports both direct Gemini API and Vertex AI pricing.
378
+ It handles different media types (text, image, video, audio) and special features
379
+ like context caching and grounding.
380
+
381
+ https://ai.google.dev/pricing
382
+ https://cloud.google.com/vertex-ai/generative-ai/pricing
383
+
384
+ Args:
385
+ metadata: Additional metadata required for cost calculation
386
+ model: Model name to use for pricing calculation
387
+
388
+ Returns:
389
+ Total cost in USD or None if invalid input
390
+ """
391
+ # Basic validation
392
+ if metadata.input_tokens is None or metadata.output_tokens is None:
393
+ return None
394
+
395
+ # Initialize default values
396
+ if metadata.cached_tokens is None:
397
+ metadata.cached_tokens = 0
398
+
399
+ # Check if we're using Vertex AI pricing
400
+ use_vertex_ai = metadata.google and metadata.google.use_vertex_ai
401
+
402
+ # Determine if we're using long context pricing
403
+ use_long_context = (
404
+ metadata.context_length is not None and metadata.context_length > 128_000
405
+ ) or (metadata.input_tokens > 128_000)
406
+
407
+ # Get the appropriate pricing table
408
+ try:
409
+ if use_vertex_ai and model in VERTEX_AI_PRICING:
410
+ model_pricing = VERTEX_AI_PRICING[model]
411
+ else:
412
+ model_pricing = GEMINI_API_PRICING[model]
413
+ except KeyError:
414
+ # Unknown model
415
+ return None
416
+
417
+ # Calculate cost based on API type
418
+ if use_vertex_ai:
419
+ if model.startswith("gemini-2.0"):
420
+ return _calculate_vertex_2_0_cost(metadata, model_pricing, model)
421
+ elif model.startswith("gemini-1.5"): # pragma: no cover
422
+ return _calculate_vertex_1_5_cost(metadata, model_pricing, model)
423
+ else:
424
+ # Standard Gemini API pricing
425
+ return _calculate_standard_gemini_cost(
426
+ metadata, model_pricing, model, use_long_context
427
+ )
@@ -0,0 +1,156 @@
1
+ """Calculate the cost of a completion using the Groq API."""
2
+
3
+ from ..base.types import CostMetadata
4
+
5
+
6
+ def calculate_cost(
7
+ metadata: CostMetadata,
8
+ model: str = "mixtral-8x7b-32768",
9
+ ) -> float | None:
10
+ """Calculate the cost of a completion using the Groq API.
11
+
12
+ https://wow.groq.com/
13
+
14
+ Model Input Output
15
+ llama-3.3-70b-versatile $0.59 / 1M tokens $0.79 / 1M tokens
16
+ llama-3.3-70b-specdec $0.59 / 1M tokens $0.99 / 1M tokens
17
+ llama-3.1-8b-instant $0.05 / 1M tokens $0.08 / 1M tokens
18
+ llama3-70b-8192 $0.59 / 1M tokens $0.79 / 1M tokens
19
+ llama3-8b-8192 $0.05 / 1M tokens $0.08 / 1M tokens
20
+ llama-guard-3-8b $0.20 / 1M tokens $0.20 / 1M tokens
21
+ mixtral-8x7b-32768 $0.24 / 1M tokens $0.24 / 1M tokens
22
+ gemma-7b-it $0.07 / 1M tokens $0.07 / 1M tokens
23
+ gemma2-9b-it $0.20 / 1M tokens $0.20 / 1M tokens
24
+ mistral-saba-24b $0.79 / 1M tokens $0.79 / 1M tokens
25
+ qwen-2.5-32b $0.79 / 1M tokens $0.79 / 1M tokens
26
+ qwen-2.5-coder-32b $0.79 / 1M tokens $0.79 / 1M tokens
27
+ deepseek-r1-distill-qwen-32b $0.69 / 1M tokens $0.69 / 1M tokens
28
+ deepseek-r1-distill-llama-70b $0.75 / 1M tokens $0.99 / 1M tokens
29
+ deepseek-r1-distill-llama-70b-specdec $0.75 / 1M tokens $0.99 / 1M tokens
30
+ llama-3.2-1b-preview $0.04 / 1M tokens $0.04 / 1M tokens
31
+ llama-3.2-3b-preview $0.06 / 1M tokens $0.06 / 1M tokens
32
+ llama-3.2-11b-vision-preview $0.18 / 1M tokens $0.18 / 1M tokens
33
+ llama-3.2-90b-vision-preview $0.90 / 1M tokens $0.90 / 1M tokens
34
+ """
35
+ pricing = {
36
+ "llama-3.3-70b-versatile": {
37
+ "prompt": 0.000_000_59,
38
+ "completion": 0.000_000_79,
39
+ },
40
+ "llama-3.3-70b-specdec": {
41
+ "prompt": 0.000_000_59,
42
+ "completion": 0.000_000_99,
43
+ },
44
+ "llama3-groq-70b-8192-tool-use-preview": {
45
+ "prompt": 0.000_000_89,
46
+ "completion": 0.000_000_89,
47
+ },
48
+ "llama3-groq-8b-8192-tool-use-preview": {
49
+ "prompt": 0.000_000_19,
50
+ "completion": 0.000_000_19,
51
+ },
52
+ "llama-3.1-8b-instant": {
53
+ "prompt": 0.000_000_05,
54
+ "completion": 0.000_000_08,
55
+ },
56
+ "llama-guard-3-8b": {
57
+ "prompt": 0.000_000_2,
58
+ "completion": 0.000_000_2,
59
+ },
60
+ "llama3-70b-8192": {
61
+ "prompt": 0.000_000_59,
62
+ "completion": 0.000_000_79,
63
+ },
64
+ "llama3-8b-8192": {
65
+ "prompt": 0.000_000_05,
66
+ "completion": 0.000_000_08,
67
+ },
68
+ "mixtral-8x7b-32768": {
69
+ "prompt": 0.000_000_24,
70
+ "completion": 0.000_000_24,
71
+ },
72
+ "gemma-7b-it": {
73
+ "prompt": 0.000_000_07,
74
+ "completion": 0.000_000_07,
75
+ },
76
+ "gemma2-9b-it": {
77
+ "prompt": 0.000_000_2,
78
+ "completion": 0.000_000_2,
79
+ },
80
+ "mistral-saba-24b": {
81
+ "prompt": 0.000_000_79,
82
+ "completion": 0.000_000_79,
83
+ },
84
+ "qwen-2.5-32b": {
85
+ "prompt": 0.000_000_79,
86
+ "completion": 0.000_000_79,
87
+ },
88
+ "qwen-2.5-coder-32b": {
89
+ "prompt": 0.000_000_79,
90
+ "completion": 0.000_000_79,
91
+ },
92
+ "deepseek-r1-distill-qwen-32b": {
93
+ "prompt": 0.000_000_69,
94
+ "completion": 0.000_000_69,
95
+ },
96
+ "deepseek-r1-distill-llama-70b": {
97
+ "prompt": 0.000_000_75,
98
+ "completion": 0.000_000_99,
99
+ },
100
+ "deepseek-r1-distill-llama-70b-specdec": {
101
+ "prompt": 0.000_000_75,
102
+ "completion": 0.000_000_99,
103
+ },
104
+ "llama-3.2-1b-preview": {
105
+ "prompt": 0.000_000_04,
106
+ "completion": 0.000_000_04,
107
+ },
108
+ "llama-3.2-3b-preview": {
109
+ "prompt": 0.000_000_06,
110
+ "completion": 0.000_000_06,
111
+ },
112
+ # Vision models
113
+ "llama-3.2-11b-vision-preview": {
114
+ "prompt": 0.000_000_18,
115
+ "completion": 0.000_000_18,
116
+ },
117
+ "llama-3.2-90b-vision-preview": {
118
+ "prompt": 0.000_000_90,
119
+ "completion": 0.000_000_90,
120
+ },
121
+ }
122
+
123
+ if metadata.input_tokens is None or metadata.output_tokens is None:
124
+ return None
125
+
126
+ try:
127
+ model_pricing = pricing[model]
128
+ except KeyError:
129
+ return None
130
+
131
+ # Calculate cost for text tokens
132
+ prompt_cost = metadata.input_tokens * model_pricing["prompt"]
133
+ completion_cost = metadata.output_tokens * model_pricing["completion"]
134
+ total_cost = prompt_cost + completion_cost
135
+
136
+ # Calculate cost for images if present
137
+ # Groq bills 6,400 tokens per image for vision models
138
+ # https://groq.com/pricing/
139
+ image_cost = 0.0
140
+ if metadata.images and "vision" in model:
141
+ # For Groq vision models, each image is billed at 6,400 tokens
142
+ tokens_per_image = 6400
143
+
144
+ # Count the number of images
145
+ image_count = len(metadata.images)
146
+
147
+ # Calculate total image tokens
148
+ total_image_tokens = image_count * tokens_per_image
149
+
150
+ # Images are charged at the prompt token rate
151
+ image_cost = total_image_tokens * model_pricing["prompt"]
152
+
153
+ # Add image cost to total
154
+ total_cost += image_cost
155
+
156
+ return total_cost
@@ -0,0 +1,11 @@
1
+ """Calculate the cost of a Litellm call."""
2
+
3
+ from ..base.types import CostMetadata
4
+
5
+
6
+ def calculate_cost(
7
+ metadata: CostMetadata,
8
+ model: str,
9
+ ) -> float | None:
10
+ """Calculate the cost of a Litellm call."""
11
+ return metadata.cost