mirascope 1.19.0__py3-none-any.whl → 1.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mirascope/__init__.py +4 -0
- mirascope/beta/openai/realtime/realtime.py +7 -8
- mirascope/beta/openai/realtime/tool.py +2 -2
- mirascope/core/__init__.py +8 -1
- mirascope/core/anthropic/_utils/__init__.py +0 -2
- mirascope/core/anthropic/_utils/_convert_message_params.py +1 -7
- mirascope/core/anthropic/_utils/_message_param_converter.py +48 -31
- mirascope/core/anthropic/call_response.py +7 -9
- mirascope/core/anthropic/call_response_chunk.py +10 -0
- mirascope/core/anthropic/stream.py +6 -8
- mirascope/core/azure/_utils/__init__.py +0 -2
- mirascope/core/azure/call_response.py +7 -10
- mirascope/core/azure/call_response_chunk.py +6 -1
- mirascope/core/azure/stream.py +6 -8
- mirascope/core/base/__init__.py +2 -1
- mirascope/core/base/_utils/__init__.py +2 -0
- mirascope/core/base/_utils/_get_image_dimensions.py +39 -0
- mirascope/core/base/call_response.py +36 -6
- mirascope/core/base/call_response_chunk.py +15 -1
- mirascope/core/base/stream.py +25 -3
- mirascope/core/base/types.py +276 -2
- mirascope/core/bedrock/_utils/__init__.py +0 -2
- mirascope/core/bedrock/call_response.py +7 -10
- mirascope/core/bedrock/call_response_chunk.py +6 -0
- mirascope/core/bedrock/stream.py +6 -10
- mirascope/core/cohere/_utils/__init__.py +0 -2
- mirascope/core/cohere/call_response.py +7 -10
- mirascope/core/cohere/call_response_chunk.py +6 -0
- mirascope/core/cohere/stream.py +5 -8
- mirascope/core/costs/__init__.py +5 -0
- mirascope/core/{anthropic/_utils/_calculate_cost.py → costs/_anthropic_calculate_cost.py} +45 -14
- mirascope/core/{azure/_utils/_calculate_cost.py → costs/_azure_calculate_cost.py} +3 -3
- mirascope/core/{bedrock/_utils/_calculate_cost.py → costs/_bedrock_calculate_cost.py} +3 -3
- mirascope/core/{cohere/_utils/_calculate_cost.py → costs/_cohere_calculate_cost.py} +12 -8
- mirascope/core/{gemini/_utils/_calculate_cost.py → costs/_gemini_calculate_cost.py} +7 -7
- mirascope/core/costs/_google_calculate_cost.py +427 -0
- mirascope/core/costs/_groq_calculate_cost.py +156 -0
- mirascope/core/costs/_litellm_calculate_cost.py +11 -0
- mirascope/core/costs/_mistral_calculate_cost.py +64 -0
- mirascope/core/costs/_openai_calculate_cost.py +416 -0
- mirascope/core/{vertex/_utils/_calculate_cost.py → costs/_vertex_calculate_cost.py} +8 -7
- mirascope/core/{xai/_utils/_calculate_cost.py → costs/_xai_calculate_cost.py} +9 -9
- mirascope/core/costs/calculate_cost.py +86 -0
- mirascope/core/gemini/_utils/__init__.py +0 -2
- mirascope/core/gemini/call_response.py +7 -10
- mirascope/core/gemini/call_response_chunk.py +6 -1
- mirascope/core/gemini/stream.py +5 -8
- mirascope/core/google/_utils/__init__.py +0 -2
- mirascope/core/google/_utils/_setup_call.py +21 -2
- mirascope/core/google/call_response.py +9 -10
- mirascope/core/google/call_response_chunk.py +6 -1
- mirascope/core/google/stream.py +5 -8
- mirascope/core/groq/_utils/__init__.py +0 -2
- mirascope/core/groq/call_response.py +22 -10
- mirascope/core/groq/call_response_chunk.py +6 -0
- mirascope/core/groq/stream.py +5 -8
- mirascope/core/litellm/call_response.py +3 -4
- mirascope/core/litellm/stream.py +30 -22
- mirascope/core/mistral/_utils/__init__.py +0 -2
- mirascope/core/mistral/call_response.py +7 -10
- mirascope/core/mistral/call_response_chunk.py +6 -0
- mirascope/core/mistral/stream.py +5 -8
- mirascope/core/openai/_utils/__init__.py +0 -2
- mirascope/core/openai/_utils/_convert_message_params.py +4 -4
- mirascope/core/openai/call_response.py +30 -10
- mirascope/core/openai/call_response_chunk.py +6 -0
- mirascope/core/openai/stream.py +5 -8
- mirascope/core/vertex/_utils/__init__.py +0 -2
- mirascope/core/vertex/call_response.py +5 -10
- mirascope/core/vertex/call_response_chunk.py +6 -0
- mirascope/core/vertex/stream.py +5 -8
- mirascope/core/xai/_utils/__init__.py +1 -2
- mirascope/core/xai/call_response.py +0 -11
- mirascope/llm/__init__.py +9 -2
- mirascope/llm/_protocols.py +8 -28
- mirascope/llm/call_response.py +6 -6
- mirascope/llm/call_response_chunk.py +12 -3
- mirascope/llm/llm_call.py +21 -23
- mirascope/llm/llm_override.py +56 -27
- mirascope/llm/stream.py +7 -7
- mirascope/llm/tool.py +1 -1
- mirascope/retries/fallback.py +1 -1
- {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/METADATA +1 -1
- {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/RECORD +86 -82
- mirascope/core/google/_utils/_calculate_cost.py +0 -215
- mirascope/core/groq/_utils/_calculate_cost.py +0 -69
- mirascope/core/mistral/_utils/_calculate_cost.py +0 -48
- mirascope/core/openai/_utils/_calculate_cost.py +0 -246
- {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/WHEEL +0 -0
- {mirascope-1.19.0.dist-info → mirascope-1.20.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
"""Calculate the cost of a completion using the Cohere API."""
|
|
2
2
|
|
|
3
|
+
from ..base.types import CostMetadata
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
def calculate_cost(
|
|
5
|
-
|
|
6
|
-
cached_tokens: int | float | None,
|
|
7
|
-
output_tokens: int | float | None,
|
|
7
|
+
metadata: CostMetadata,
|
|
8
8
|
model: str = "command-r-plus",
|
|
9
9
|
) -> float | None:
|
|
10
10
|
"""Calculate the cost of a completion using the Cohere API.
|
|
@@ -12,8 +12,8 @@ def calculate_cost(
|
|
|
12
12
|
https://cohere.com/pricing
|
|
13
13
|
|
|
14
14
|
Model Input Cached Output
|
|
15
|
-
command-r $0.
|
|
16
|
-
command-r-plus $
|
|
15
|
+
command-r $0.15 / 1M tokens $0.6 / 1M tokens
|
|
16
|
+
command-r-plus $2.5 / 1M tokens $10 / 1M tokens
|
|
17
17
|
"""
|
|
18
18
|
pricing = {
|
|
19
19
|
"command-r": {
|
|
@@ -24,8 +24,12 @@ def calculate_cost(
|
|
|
24
24
|
"prompt": 0.000_003,
|
|
25
25
|
"completion": 0.000_015,
|
|
26
26
|
},
|
|
27
|
+
"command-r7b-12-2024": {
|
|
28
|
+
"prompt": 0.000_000_375,
|
|
29
|
+
"completion": 0.000_001_5,
|
|
30
|
+
},
|
|
27
31
|
}
|
|
28
|
-
if input_tokens is None or output_tokens is None:
|
|
32
|
+
if metadata.input_tokens is None or metadata.output_tokens is None:
|
|
29
33
|
return None
|
|
30
34
|
|
|
31
35
|
try:
|
|
@@ -33,8 +37,8 @@ def calculate_cost(
|
|
|
33
37
|
except KeyError:
|
|
34
38
|
return None
|
|
35
39
|
|
|
36
|
-
prompt_cost = input_tokens * model_pricing["prompt"]
|
|
37
|
-
completion_cost = output_tokens * model_pricing["completion"]
|
|
40
|
+
prompt_cost = metadata.input_tokens * model_pricing["prompt"]
|
|
41
|
+
completion_cost = metadata.output_tokens * model_pricing["completion"]
|
|
38
42
|
total_cost = prompt_cost + completion_cost
|
|
39
43
|
|
|
40
44
|
return total_cost
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
"""Calculate the cost of a Gemini API call."""
|
|
2
2
|
|
|
3
|
+
from ..base.types import CostMetadata
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
def calculate_cost(
|
|
5
|
-
|
|
6
|
-
cached_tokens: int | float | None,
|
|
7
|
-
output_tokens: int | float | None,
|
|
7
|
+
metadata: CostMetadata,
|
|
8
8
|
model: str,
|
|
9
9
|
) -> float | None:
|
|
10
10
|
"""Calculate the cost of a Gemini API call.
|
|
@@ -44,7 +44,7 @@ def calculate_cost(
|
|
|
44
44
|
},
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
-
if input_tokens is None or output_tokens is None:
|
|
47
|
+
if metadata.input_tokens is None or metadata.output_tokens is None:
|
|
48
48
|
return None
|
|
49
49
|
|
|
50
50
|
try:
|
|
@@ -53,15 +53,15 @@ def calculate_cost(
|
|
|
53
53
|
return None
|
|
54
54
|
|
|
55
55
|
# Determine if we're using long context pricing
|
|
56
|
-
use_long_context = input_tokens > 128_000
|
|
56
|
+
use_long_context = metadata.input_tokens > 128_000
|
|
57
57
|
|
|
58
58
|
prompt_price = model_pricing["prompt_long" if use_long_context else "prompt_short"]
|
|
59
59
|
completion_price = model_pricing[
|
|
60
60
|
"completion_long" if use_long_context else "completion_short"
|
|
61
61
|
]
|
|
62
62
|
|
|
63
|
-
prompt_cost = input_tokens * prompt_price
|
|
64
|
-
completion_cost = output_tokens * completion_price
|
|
63
|
+
prompt_cost = metadata.input_tokens * prompt_price
|
|
64
|
+
completion_cost = metadata.output_tokens * completion_price
|
|
65
65
|
total_cost = prompt_cost + completion_cost
|
|
66
66
|
|
|
67
67
|
return total_cost
|
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
"""Calculate the cost of a Gemini API call."""
|
|
2
|
+
|
|
3
|
+
from ..base.types import CostMetadata
|
|
4
|
+
|
|
5
|
+
# Standard Gemini API pricing table
|
|
6
|
+
GEMINI_API_PRICING: dict[str, dict[str, float]] = {
|
|
7
|
+
"gemini-2.0-pro": {
|
|
8
|
+
"prompt_short": 0.000_001_25,
|
|
9
|
+
"completion_short": 0.000_005,
|
|
10
|
+
"prompt_long": 0.000_002_5,
|
|
11
|
+
"completion_long": 0.000_01,
|
|
12
|
+
"cached": 0.000_000_625,
|
|
13
|
+
},
|
|
14
|
+
"gemini-2.0-pro-preview-1206": {
|
|
15
|
+
"prompt_short": 0.000_001_25,
|
|
16
|
+
"completion_short": 0.000_005,
|
|
17
|
+
"prompt_long": 0.000_002_5,
|
|
18
|
+
"completion_long": 0.000_01,
|
|
19
|
+
"cached": 0.000_000_625,
|
|
20
|
+
},
|
|
21
|
+
"gemini-2.0-flash": {
|
|
22
|
+
"prompt_short": 0.000_000_10,
|
|
23
|
+
"completion_short": 0.000_000_40,
|
|
24
|
+
"prompt_long": 0.000_000_10,
|
|
25
|
+
"completion_long": 0.000_000_40,
|
|
26
|
+
"cached": 0.000_000_037_5,
|
|
27
|
+
},
|
|
28
|
+
"gemini-2.0-flash-latest": {
|
|
29
|
+
"prompt_short": 0.000_000_10,
|
|
30
|
+
"completion_short": 0.000_000_40,
|
|
31
|
+
"prompt_long": 0.000_000_10,
|
|
32
|
+
"completion_long": 0.000_000_40,
|
|
33
|
+
"cached": 0.000_000_037_5,
|
|
34
|
+
},
|
|
35
|
+
"gemini-2.0-flash-001": {
|
|
36
|
+
"prompt_short": 0.000_000_10,
|
|
37
|
+
"completion_short": 0.000_000_40,
|
|
38
|
+
"prompt_long": 0.000_000_10,
|
|
39
|
+
"completion_long": 0.000_000_40,
|
|
40
|
+
"cached": 0.000_000_037_5,
|
|
41
|
+
},
|
|
42
|
+
"gemini-2.0-flash-lite": {
|
|
43
|
+
"prompt_short": 0.000_000_075,
|
|
44
|
+
"completion_short": 0.000_000_30,
|
|
45
|
+
"prompt_long": 0.000_000_075,
|
|
46
|
+
"completion_long": 0.000_000_30,
|
|
47
|
+
"cached": 0.000_000_037_5,
|
|
48
|
+
},
|
|
49
|
+
"gemini-2.0-flash-lite-preview-02-05": {
|
|
50
|
+
"prompt_short": 0.000_000_075,
|
|
51
|
+
"completion_short": 0.000_000_30,
|
|
52
|
+
"prompt_long": 0.000_000_075,
|
|
53
|
+
"completion_long": 0.000_000_30,
|
|
54
|
+
"cached": 0.000_000_037_5,
|
|
55
|
+
},
|
|
56
|
+
"gemini-1.5-pro": {
|
|
57
|
+
"prompt_short": 0.000_001_25,
|
|
58
|
+
"completion_short": 0.000_005,
|
|
59
|
+
"prompt_long": 0.000_002_5,
|
|
60
|
+
"completion_long": 0.000_01,
|
|
61
|
+
"cached": 0.000_000_625,
|
|
62
|
+
},
|
|
63
|
+
"gemini-1.5-pro-latest": {
|
|
64
|
+
"prompt_short": 0.000_001_25,
|
|
65
|
+
"completion_short": 0.000_005,
|
|
66
|
+
"prompt_long": 0.000_002_5,
|
|
67
|
+
"completion_long": 0.000_01,
|
|
68
|
+
"cached": 0.000_000_625,
|
|
69
|
+
},
|
|
70
|
+
"gemini-1.5-pro-001": {
|
|
71
|
+
"prompt_short": 0.000_001_25,
|
|
72
|
+
"completion_short": 0.000_005,
|
|
73
|
+
"prompt_long": 0.000_002_5,
|
|
74
|
+
"completion_long": 0.000_01,
|
|
75
|
+
"cached": 0.000_000_625,
|
|
76
|
+
},
|
|
77
|
+
"gemini-1.5-pro-002": {
|
|
78
|
+
"prompt_short": 0.000_001_25,
|
|
79
|
+
"completion_short": 0.000_005,
|
|
80
|
+
"prompt_long": 0.000_002_5,
|
|
81
|
+
"completion_long": 0.000_01,
|
|
82
|
+
"cached": 0.000_000_625,
|
|
83
|
+
},
|
|
84
|
+
"gemini-1.5-flash": {
|
|
85
|
+
"prompt_short": 0.000_000_075,
|
|
86
|
+
"completion_short": 0.000_000_30,
|
|
87
|
+
"prompt_long": 0.000_000_15,
|
|
88
|
+
"completion_long": 0.000_000_60,
|
|
89
|
+
"cached": 0.000_000_037_5,
|
|
90
|
+
},
|
|
91
|
+
"gemini-1.5-flash-latest": {
|
|
92
|
+
"prompt_short": 0.000_000_075,
|
|
93
|
+
"completion_short": 0.000_000_30,
|
|
94
|
+
"prompt_long": 0.000_000_15,
|
|
95
|
+
"completion_long": 0.000_000_60,
|
|
96
|
+
"cached": 0.000_000_037_5,
|
|
97
|
+
},
|
|
98
|
+
"gemini-1.5-flash-001": {
|
|
99
|
+
"prompt_short": 0.000_000_075,
|
|
100
|
+
"completion_short": 0.000_000_30,
|
|
101
|
+
"prompt_long": 0.000_000_15,
|
|
102
|
+
"completion_long": 0.000_000_60,
|
|
103
|
+
"cached": 0.000_000_037_5,
|
|
104
|
+
},
|
|
105
|
+
"gemini-1.5-flash-002": {
|
|
106
|
+
"prompt_short": 0.000_000_075,
|
|
107
|
+
"completion_short": 0.000_000_30,
|
|
108
|
+
"prompt_long": 0.000_000_15,
|
|
109
|
+
"completion_long": 0.000_000_60,
|
|
110
|
+
"cached": 0.000_000_037_5,
|
|
111
|
+
},
|
|
112
|
+
"gemini-1.5-flash-8b": {
|
|
113
|
+
"prompt_short": 0.000_000_037_5,
|
|
114
|
+
"completion_short": 0.000_000_15,
|
|
115
|
+
"prompt_long": 0.000_000_075,
|
|
116
|
+
"completion_long": 0.000_000_30,
|
|
117
|
+
"cached": 0.000_000_025,
|
|
118
|
+
},
|
|
119
|
+
"gemini-1.5-flash-8b-latest": {
|
|
120
|
+
"prompt_short": 0.000_000_037_5,
|
|
121
|
+
"completion_short": 0.000_000_15,
|
|
122
|
+
"prompt_long": 0.000_000_075,
|
|
123
|
+
"completion_long": 0.000_000_30,
|
|
124
|
+
"cached": 0.000_000_025,
|
|
125
|
+
},
|
|
126
|
+
"gemini-1.5-flash-8b-001": {
|
|
127
|
+
"prompt_short": 0.000_000_037_5,
|
|
128
|
+
"completion_short": 0.000_000_15,
|
|
129
|
+
"prompt_long": 0.000_000_075,
|
|
130
|
+
"completion_long": 0.000_000_30,
|
|
131
|
+
"cached": 0.000_000_025,
|
|
132
|
+
},
|
|
133
|
+
"gemini-1.5-flash-8b-002": {
|
|
134
|
+
"prompt_short": 0.000_000_037_5,
|
|
135
|
+
"completion_short": 0.000_000_15,
|
|
136
|
+
"prompt_long": 0.000_000_075,
|
|
137
|
+
"completion_long": 0.000_000_30,
|
|
138
|
+
"cached": 0.000_000_025,
|
|
139
|
+
},
|
|
140
|
+
"gemini-1.0-pro": {
|
|
141
|
+
"prompt_short": 0.000_000_5,
|
|
142
|
+
"completion_short": 0.000_001_5,
|
|
143
|
+
"prompt_long": 0.000_000_5,
|
|
144
|
+
"completion_long": 0.000_001_5,
|
|
145
|
+
"cached": 0.000_000,
|
|
146
|
+
},
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Vertex AI pricing table
|
|
150
|
+
VERTEX_AI_PRICING: dict[str, dict[str, float]] = {
|
|
151
|
+
"gemini-2.0-flash": {
|
|
152
|
+
"text_input": 0.000_000_15,
|
|
153
|
+
"image_input": 0.000_000_15,
|
|
154
|
+
"video_input": 0.000_000_15,
|
|
155
|
+
"audio_input": 0.000_001_00,
|
|
156
|
+
"output": 0.000_000_60,
|
|
157
|
+
"cached": 0.000_000_037_5,
|
|
158
|
+
"cache_storage_per_hour": 0.000_001_00,
|
|
159
|
+
},
|
|
160
|
+
"gemini-2.0-flash-lite": {
|
|
161
|
+
"text_input": 0.000_000_075,
|
|
162
|
+
"image_input": 0.000_000_075,
|
|
163
|
+
"video_input": 0.000_000_075,
|
|
164
|
+
"audio_input": 0.000_000_075,
|
|
165
|
+
"output": 0.000_000_30,
|
|
166
|
+
"cached": 0.000_000_037_5,
|
|
167
|
+
"cache_storage_per_hour": 0.000_001_00,
|
|
168
|
+
},
|
|
169
|
+
# Vertex AI pricing for Gemini 1.5 models is based on modalities rather than tokens
|
|
170
|
+
"gemini-1.5-flash": {
|
|
171
|
+
"text_input": 0.000_000_075, # per 1K chars (approx. 250 tokens)
|
|
172
|
+
"image_input": 0.000_02, # per image
|
|
173
|
+
"video_input": 0.000_02, # per second
|
|
174
|
+
"audio_input": 0.000_002, # per second
|
|
175
|
+
"output": 0.000_000_30, # per 1K chars
|
|
176
|
+
"cached_text": 0.000_000_046_875, # per 1K chars
|
|
177
|
+
"cached_image": 0.000_005, # per image
|
|
178
|
+
"cached_video": 0.000_005, # per second
|
|
179
|
+
"cached_audio": 0.000_000_5, # per second
|
|
180
|
+
"cache_storage_text": 0.000_25, # per 1K chars per hour
|
|
181
|
+
"cache_storage_image": 0.000_263, # per image per hour
|
|
182
|
+
"cache_storage_video": 0.000_263, # per second per hour
|
|
183
|
+
"cache_storage_audio": 0.000_025, # per second per hour
|
|
184
|
+
},
|
|
185
|
+
"gemini-1.5-pro": {
|
|
186
|
+
"text_input": 0.000_001_25, # per 1K chars (approx. 250 tokens)
|
|
187
|
+
"image_input": 0.000_32875, # per image
|
|
188
|
+
"video_input": 0.000_32875, # per second
|
|
189
|
+
"audio_input": 0.000_03125, # per second
|
|
190
|
+
"output": 0.000_005, # per 1K chars
|
|
191
|
+
"cached_text": 0.000_000_078125, # per 1K chars
|
|
192
|
+
"cached_image": 0.000_0821875, # per image
|
|
193
|
+
"cached_video": 0.000_0821875, # per second
|
|
194
|
+
"cached_audio": 0.000_0078125, # per second
|
|
195
|
+
"cache_storage_text": 0.001125, # per 1K chars per hour
|
|
196
|
+
"cache_storage_image": 0.0011835, # per image per hour
|
|
197
|
+
"cache_storage_video": 0.0011835, # per second per hour
|
|
198
|
+
"cache_storage_audio": 0.0001125, # per second per hour
|
|
199
|
+
},
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _calculate_context_cache_cost(
|
|
204
|
+
metadata: CostMetadata,
|
|
205
|
+
model_pricing: dict[str, float],
|
|
206
|
+
model: str,
|
|
207
|
+
use_vertex_ai: bool = False,
|
|
208
|
+
) -> float:
|
|
209
|
+
"""Calculate cost for context caching."""
|
|
210
|
+
if metadata.context_cache_tokens is None or metadata.context_cache_hours is None:
|
|
211
|
+
return 0.0
|
|
212
|
+
|
|
213
|
+
if use_vertex_ai:
|
|
214
|
+
# Vertex AI pricing depends on the model family
|
|
215
|
+
if model.startswith("gemini-2.0"):
|
|
216
|
+
return (
|
|
217
|
+
metadata.context_cache_tokens
|
|
218
|
+
* model_pricing.get("cache_storage_per_hour", 0)
|
|
219
|
+
* metadata.context_cache_hours
|
|
220
|
+
)
|
|
221
|
+
elif model.startswith("gemini-1.5"):
|
|
222
|
+
# Convert cache tokens to characters (approx)
|
|
223
|
+
cache_chars = metadata.context_cache_tokens * 4
|
|
224
|
+
return (
|
|
225
|
+
(cache_chars / 1000)
|
|
226
|
+
* model_pricing["cache_storage_text"]
|
|
227
|
+
* metadata.context_cache_hours
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Standard Gemini API pricing - storage cost per token-hour
|
|
231
|
+
storage_rate_per_token = 0.000001 # $1.00 per million tokens per hour
|
|
232
|
+
if "flash-8b" in model:
|
|
233
|
+
storage_rate_per_token = 0.00000025 # $0.25 per million tokens for 8B models
|
|
234
|
+
|
|
235
|
+
return (
|
|
236
|
+
metadata.context_cache_tokens
|
|
237
|
+
* storage_rate_per_token
|
|
238
|
+
* metadata.context_cache_hours
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _calculate_grounding_cost(metadata: CostMetadata, model: str) -> float:
|
|
243
|
+
"""Calculate cost for grounding requests."""
|
|
244
|
+
if metadata.google is None or metadata.google.grounding_requests is None:
|
|
245
|
+
return 0.0
|
|
246
|
+
|
|
247
|
+
# First 1,500 requests per day are free for Gemini 2.0 Flash models in Vertex AI
|
|
248
|
+
if (
|
|
249
|
+
model == "gemini-2.0-flash"
|
|
250
|
+
and metadata.google.use_vertex_ai
|
|
251
|
+
and metadata.google.grounding_requests <= 1500
|
|
252
|
+
):
|
|
253
|
+
return 0.0
|
|
254
|
+
|
|
255
|
+
# $35 per 1,000 requests for excess
|
|
256
|
+
if metadata.google.use_vertex_ai and model == "gemini-2.0-flash":
|
|
257
|
+
excess_requests = max(0, metadata.google.grounding_requests - 1500)
|
|
258
|
+
else:
|
|
259
|
+
excess_requests = metadata.google.grounding_requests
|
|
260
|
+
|
|
261
|
+
return (excess_requests / 1000) * 35.0
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _calculate_vertex_2_0_cost(
|
|
265
|
+
metadata: CostMetadata, model_pricing: dict[str, float], model: str
|
|
266
|
+
) -> float:
|
|
267
|
+
"""Calculate cost for Vertex AI's Gemini 2.0 models."""
|
|
268
|
+
# Text tokens cost
|
|
269
|
+
prompt_cost = (metadata.input_tokens or 0) * model_pricing["text_input"]
|
|
270
|
+
completion_cost = (metadata.output_tokens or 0) * model_pricing["output"]
|
|
271
|
+
cached_cost = (metadata.cached_tokens or 0) * model_pricing.get("cached", 0)
|
|
272
|
+
|
|
273
|
+
# Context cache costs
|
|
274
|
+
context_cache_cost = _calculate_context_cache_cost(
|
|
275
|
+
metadata, model_pricing, model, use_vertex_ai=True
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Grounding costs
|
|
279
|
+
grounding_cost = _calculate_grounding_cost(metadata, model)
|
|
280
|
+
|
|
281
|
+
# Apply batch mode discount (50% for Vertex AI)
|
|
282
|
+
if metadata.batch_mode:
|
|
283
|
+
prompt_cost *= 0.5
|
|
284
|
+
completion_cost *= 0.5
|
|
285
|
+
context_cache_cost *= 0.5
|
|
286
|
+
# Note: We don't discount grounding costs
|
|
287
|
+
|
|
288
|
+
total_cost = (
|
|
289
|
+
prompt_cost
|
|
290
|
+
+ completion_cost
|
|
291
|
+
+ cached_cost
|
|
292
|
+
+ context_cache_cost
|
|
293
|
+
+ grounding_cost
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
return total_cost
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _calculate_vertex_1_5_cost(
|
|
300
|
+
metadata: CostMetadata, model_pricing: dict[str, float], model: str
|
|
301
|
+
) -> float:
|
|
302
|
+
"""Calculate cost for Vertex AI's Gemini 1.5 models."""
|
|
303
|
+
# Text cost - convert tokens to characters (approx 4 chars per token)
|
|
304
|
+
text_chars = (metadata.input_tokens or 0) * 4 # Approximation
|
|
305
|
+
text_cost = (text_chars / 1000) * model_pricing["text_input"]
|
|
306
|
+
|
|
307
|
+
# Output cost
|
|
308
|
+
output_chars = (metadata.output_tokens or 0) * 4 # Approximation
|
|
309
|
+
output_cost = (output_chars / 1000) * model_pricing["output"]
|
|
310
|
+
|
|
311
|
+
# Context cache costs
|
|
312
|
+
context_cache_cost = _calculate_context_cache_cost(
|
|
313
|
+
metadata, model_pricing, model, use_vertex_ai=True
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
# Grounding costs
|
|
317
|
+
grounding_cost = _calculate_grounding_cost(metadata, model)
|
|
318
|
+
|
|
319
|
+
# Apply batch mode discount if applicable (50% off for Vertex AI)
|
|
320
|
+
if metadata.batch_mode:
|
|
321
|
+
text_cost *= 0.5
|
|
322
|
+
output_cost *= 0.5
|
|
323
|
+
context_cache_cost *= 0.5
|
|
324
|
+
# Note: We don't discount grounding costs
|
|
325
|
+
|
|
326
|
+
total_cost = text_cost + output_cost + context_cache_cost + grounding_cost
|
|
327
|
+
|
|
328
|
+
return total_cost
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _calculate_standard_gemini_cost(
|
|
332
|
+
metadata: CostMetadata,
|
|
333
|
+
model_pricing: dict[str, float],
|
|
334
|
+
model: str,
|
|
335
|
+
use_long_context: bool,
|
|
336
|
+
) -> float:
|
|
337
|
+
"""Calculate cost for standard Gemini API."""
|
|
338
|
+
# Determine prices based on context length
|
|
339
|
+
prompt_price = model_pricing["prompt_long" if use_long_context else "prompt_short"]
|
|
340
|
+
cached_price = model_pricing["cached"]
|
|
341
|
+
completion_price = model_pricing[
|
|
342
|
+
"completion_long" if use_long_context else "completion_short"
|
|
343
|
+
]
|
|
344
|
+
|
|
345
|
+
# Basic token costs
|
|
346
|
+
prompt_cost = (metadata.input_tokens or 0) * prompt_price
|
|
347
|
+
cached_cost = (metadata.cached_tokens or 0) * cached_price
|
|
348
|
+
completion_cost = (metadata.output_tokens or 0) * completion_price
|
|
349
|
+
|
|
350
|
+
# Media token costs is included in the prompt/completion cost
|
|
351
|
+
|
|
352
|
+
# Context cache costs
|
|
353
|
+
context_cache_cost = _calculate_context_cache_cost(
|
|
354
|
+
metadata, model_pricing, model, use_vertex_ai=False
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Grounding costs - only applies to certain models
|
|
358
|
+
grounding_cost = _calculate_grounding_cost(metadata, model)
|
|
359
|
+
|
|
360
|
+
total_cost = (
|
|
361
|
+
prompt_cost
|
|
362
|
+
+ cached_cost
|
|
363
|
+
+ completion_cost
|
|
364
|
+
+ context_cache_cost
|
|
365
|
+
+ grounding_cost
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
return total_cost
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def calculate_cost(
|
|
372
|
+
metadata: CostMetadata,
|
|
373
|
+
model: str,
|
|
374
|
+
) -> float | None:
|
|
375
|
+
"""Calculate the cost of a Google API call.
|
|
376
|
+
|
|
377
|
+
This function supports both direct Gemini API and Vertex AI pricing.
|
|
378
|
+
It handles different media types (text, image, video, audio) and special features
|
|
379
|
+
like context caching and grounding.
|
|
380
|
+
|
|
381
|
+
https://ai.google.dev/pricing
|
|
382
|
+
https://cloud.google.com/vertex-ai/generative-ai/pricing
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
metadata: Additional metadata required for cost calculation
|
|
386
|
+
model: Model name to use for pricing calculation
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
Total cost in USD or None if invalid input
|
|
390
|
+
"""
|
|
391
|
+
# Basic validation
|
|
392
|
+
if metadata.input_tokens is None or metadata.output_tokens is None:
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
# Initialize default values
|
|
396
|
+
if metadata.cached_tokens is None:
|
|
397
|
+
metadata.cached_tokens = 0
|
|
398
|
+
|
|
399
|
+
# Check if we're using Vertex AI pricing
|
|
400
|
+
use_vertex_ai = metadata.google and metadata.google.use_vertex_ai
|
|
401
|
+
|
|
402
|
+
# Determine if we're using long context pricing
|
|
403
|
+
use_long_context = (
|
|
404
|
+
metadata.context_length is not None and metadata.context_length > 128_000
|
|
405
|
+
) or (metadata.input_tokens > 128_000)
|
|
406
|
+
|
|
407
|
+
# Get the appropriate pricing table
|
|
408
|
+
try:
|
|
409
|
+
if use_vertex_ai and model in VERTEX_AI_PRICING:
|
|
410
|
+
model_pricing = VERTEX_AI_PRICING[model]
|
|
411
|
+
else:
|
|
412
|
+
model_pricing = GEMINI_API_PRICING[model]
|
|
413
|
+
except KeyError:
|
|
414
|
+
# Unknown model
|
|
415
|
+
return None
|
|
416
|
+
|
|
417
|
+
# Calculate cost based on API type
|
|
418
|
+
if use_vertex_ai:
|
|
419
|
+
if model.startswith("gemini-2.0"):
|
|
420
|
+
return _calculate_vertex_2_0_cost(metadata, model_pricing, model)
|
|
421
|
+
elif model.startswith("gemini-1.5"): # pragma: no cover
|
|
422
|
+
return _calculate_vertex_1_5_cost(metadata, model_pricing, model)
|
|
423
|
+
else:
|
|
424
|
+
# Standard Gemini API pricing
|
|
425
|
+
return _calculate_standard_gemini_cost(
|
|
426
|
+
metadata, model_pricing, model, use_long_context
|
|
427
|
+
)
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Calculate the cost of a completion using the Groq API."""
|
|
2
|
+
|
|
3
|
+
from ..base.types import CostMetadata
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def calculate_cost(
|
|
7
|
+
metadata: CostMetadata,
|
|
8
|
+
model: str = "mixtral-8x7b-32768",
|
|
9
|
+
) -> float | None:
|
|
10
|
+
"""Calculate the cost of a completion using the Groq API.
|
|
11
|
+
|
|
12
|
+
https://wow.groq.com/
|
|
13
|
+
|
|
14
|
+
Model Input Output
|
|
15
|
+
llama-3.3-70b-versatile $0.59 / 1M tokens $0.79 / 1M tokens
|
|
16
|
+
llama-3.3-70b-specdec $0.59 / 1M tokens $0.99 / 1M tokens
|
|
17
|
+
llama-3.1-8b-instant $0.05 / 1M tokens $0.08 / 1M tokens
|
|
18
|
+
llama3-70b-8192 $0.59 / 1M tokens $0.79 / 1M tokens
|
|
19
|
+
llama3-8b-8192 $0.05 / 1M tokens $0.08 / 1M tokens
|
|
20
|
+
llama-guard-3-8b $0.20 / 1M tokens $0.20 / 1M tokens
|
|
21
|
+
mixtral-8x7b-32768 $0.24 / 1M tokens $0.24 / 1M tokens
|
|
22
|
+
gemma-7b-it $0.07 / 1M tokens $0.07 / 1M tokens
|
|
23
|
+
gemma2-9b-it $0.20 / 1M tokens $0.20 / 1M tokens
|
|
24
|
+
mistral-saba-24b $0.79 / 1M tokens $0.79 / 1M tokens
|
|
25
|
+
qwen-2.5-32b $0.79 / 1M tokens $0.79 / 1M tokens
|
|
26
|
+
qwen-2.5-coder-32b $0.79 / 1M tokens $0.79 / 1M tokens
|
|
27
|
+
deepseek-r1-distill-qwen-32b $0.69 / 1M tokens $0.69 / 1M tokens
|
|
28
|
+
deepseek-r1-distill-llama-70b $0.75 / 1M tokens $0.99 / 1M tokens
|
|
29
|
+
deepseek-r1-distill-llama-70b-specdec $0.75 / 1M tokens $0.99 / 1M tokens
|
|
30
|
+
llama-3.2-1b-preview $0.04 / 1M tokens $0.04 / 1M tokens
|
|
31
|
+
llama-3.2-3b-preview $0.06 / 1M tokens $0.06 / 1M tokens
|
|
32
|
+
llama-3.2-11b-vision-preview $0.18 / 1M tokens $0.18 / 1M tokens
|
|
33
|
+
llama-3.2-90b-vision-preview $0.90 / 1M tokens $0.90 / 1M tokens
|
|
34
|
+
"""
|
|
35
|
+
pricing = {
|
|
36
|
+
"llama-3.3-70b-versatile": {
|
|
37
|
+
"prompt": 0.000_000_59,
|
|
38
|
+
"completion": 0.000_000_79,
|
|
39
|
+
},
|
|
40
|
+
"llama-3.3-70b-specdec": {
|
|
41
|
+
"prompt": 0.000_000_59,
|
|
42
|
+
"completion": 0.000_000_99,
|
|
43
|
+
},
|
|
44
|
+
"llama3-groq-70b-8192-tool-use-preview": {
|
|
45
|
+
"prompt": 0.000_000_89,
|
|
46
|
+
"completion": 0.000_000_89,
|
|
47
|
+
},
|
|
48
|
+
"llama3-groq-8b-8192-tool-use-preview": {
|
|
49
|
+
"prompt": 0.000_000_19,
|
|
50
|
+
"completion": 0.000_000_19,
|
|
51
|
+
},
|
|
52
|
+
"llama-3.1-8b-instant": {
|
|
53
|
+
"prompt": 0.000_000_05,
|
|
54
|
+
"completion": 0.000_000_08,
|
|
55
|
+
},
|
|
56
|
+
"llama-guard-3-8b": {
|
|
57
|
+
"prompt": 0.000_000_2,
|
|
58
|
+
"completion": 0.000_000_2,
|
|
59
|
+
},
|
|
60
|
+
"llama3-70b-8192": {
|
|
61
|
+
"prompt": 0.000_000_59,
|
|
62
|
+
"completion": 0.000_000_79,
|
|
63
|
+
},
|
|
64
|
+
"llama3-8b-8192": {
|
|
65
|
+
"prompt": 0.000_000_05,
|
|
66
|
+
"completion": 0.000_000_08,
|
|
67
|
+
},
|
|
68
|
+
"mixtral-8x7b-32768": {
|
|
69
|
+
"prompt": 0.000_000_24,
|
|
70
|
+
"completion": 0.000_000_24,
|
|
71
|
+
},
|
|
72
|
+
"gemma-7b-it": {
|
|
73
|
+
"prompt": 0.000_000_07,
|
|
74
|
+
"completion": 0.000_000_07,
|
|
75
|
+
},
|
|
76
|
+
"gemma2-9b-it": {
|
|
77
|
+
"prompt": 0.000_000_2,
|
|
78
|
+
"completion": 0.000_000_2,
|
|
79
|
+
},
|
|
80
|
+
"mistral-saba-24b": {
|
|
81
|
+
"prompt": 0.000_000_79,
|
|
82
|
+
"completion": 0.000_000_79,
|
|
83
|
+
},
|
|
84
|
+
"qwen-2.5-32b": {
|
|
85
|
+
"prompt": 0.000_000_79,
|
|
86
|
+
"completion": 0.000_000_79,
|
|
87
|
+
},
|
|
88
|
+
"qwen-2.5-coder-32b": {
|
|
89
|
+
"prompt": 0.000_000_79,
|
|
90
|
+
"completion": 0.000_000_79,
|
|
91
|
+
},
|
|
92
|
+
"deepseek-r1-distill-qwen-32b": {
|
|
93
|
+
"prompt": 0.000_000_69,
|
|
94
|
+
"completion": 0.000_000_69,
|
|
95
|
+
},
|
|
96
|
+
"deepseek-r1-distill-llama-70b": {
|
|
97
|
+
"prompt": 0.000_000_75,
|
|
98
|
+
"completion": 0.000_000_99,
|
|
99
|
+
},
|
|
100
|
+
"deepseek-r1-distill-llama-70b-specdec": {
|
|
101
|
+
"prompt": 0.000_000_75,
|
|
102
|
+
"completion": 0.000_000_99,
|
|
103
|
+
},
|
|
104
|
+
"llama-3.2-1b-preview": {
|
|
105
|
+
"prompt": 0.000_000_04,
|
|
106
|
+
"completion": 0.000_000_04,
|
|
107
|
+
},
|
|
108
|
+
"llama-3.2-3b-preview": {
|
|
109
|
+
"prompt": 0.000_000_06,
|
|
110
|
+
"completion": 0.000_000_06,
|
|
111
|
+
},
|
|
112
|
+
# Vision models
|
|
113
|
+
"llama-3.2-11b-vision-preview": {
|
|
114
|
+
"prompt": 0.000_000_18,
|
|
115
|
+
"completion": 0.000_000_18,
|
|
116
|
+
},
|
|
117
|
+
"llama-3.2-90b-vision-preview": {
|
|
118
|
+
"prompt": 0.000_000_90,
|
|
119
|
+
"completion": 0.000_000_90,
|
|
120
|
+
},
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if metadata.input_tokens is None or metadata.output_tokens is None:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
model_pricing = pricing[model]
|
|
128
|
+
except KeyError:
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
# Calculate cost for text tokens
|
|
132
|
+
prompt_cost = metadata.input_tokens * model_pricing["prompt"]
|
|
133
|
+
completion_cost = metadata.output_tokens * model_pricing["completion"]
|
|
134
|
+
total_cost = prompt_cost + completion_cost
|
|
135
|
+
|
|
136
|
+
# Calculate cost for images if present
|
|
137
|
+
# Groq bills 6,400 tokens per image for vision models
|
|
138
|
+
# https://groq.com/pricing/
|
|
139
|
+
image_cost = 0.0
|
|
140
|
+
if metadata.images and "vision" in model:
|
|
141
|
+
# For Groq vision models, each image is billed at 6,400 tokens
|
|
142
|
+
tokens_per_image = 6400
|
|
143
|
+
|
|
144
|
+
# Count the number of images
|
|
145
|
+
image_count = len(metadata.images)
|
|
146
|
+
|
|
147
|
+
# Calculate total image tokens
|
|
148
|
+
total_image_tokens = image_count * tokens_per_image
|
|
149
|
+
|
|
150
|
+
# Images are charged at the prompt token rate
|
|
151
|
+
image_cost = total_image_tokens * model_pricing["prompt"]
|
|
152
|
+
|
|
153
|
+
# Add image cost to total
|
|
154
|
+
total_cost += image_cost
|
|
155
|
+
|
|
156
|
+
return total_cost
|