mirascope 1.19.0__py3-none-any.whl → 1.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mirascope/__init__.py +4 -0
- mirascope/beta/openai/realtime/realtime.py +7 -8
- mirascope/beta/openai/realtime/tool.py +2 -2
- mirascope/core/__init__.py +10 -1
- mirascope/core/anthropic/_utils/__init__.py +0 -2
- mirascope/core/anthropic/_utils/_convert_message_params.py +1 -7
- mirascope/core/anthropic/_utils/_message_param_converter.py +48 -31
- mirascope/core/anthropic/call_response.py +7 -9
- mirascope/core/anthropic/call_response_chunk.py +10 -0
- mirascope/core/anthropic/stream.py +6 -8
- mirascope/core/azure/_utils/__init__.py +0 -2
- mirascope/core/azure/call_response.py +7 -10
- mirascope/core/azure/call_response_chunk.py +6 -1
- mirascope/core/azure/stream.py +6 -8
- mirascope/core/base/__init__.py +10 -1
- mirascope/core/base/_utils/__init__.py +2 -0
- mirascope/core/base/_utils/_get_image_dimensions.py +39 -0
- mirascope/core/base/call_response.py +36 -6
- mirascope/core/base/call_response_chunk.py +15 -1
- mirascope/core/base/stream.py +25 -3
- mirascope/core/base/types.py +276 -2
- mirascope/core/bedrock/_utils/__init__.py +0 -2
- mirascope/core/bedrock/call_response.py +7 -10
- mirascope/core/bedrock/call_response_chunk.py +6 -0
- mirascope/core/bedrock/stream.py +6 -10
- mirascope/core/cohere/_utils/__init__.py +0 -2
- mirascope/core/cohere/call_response.py +7 -10
- mirascope/core/cohere/call_response_chunk.py +6 -0
- mirascope/core/cohere/stream.py +5 -8
- mirascope/core/costs/__init__.py +5 -0
- mirascope/core/{anthropic/_utils/_calculate_cost.py → costs/_anthropic_calculate_cost.py} +45 -14
- mirascope/core/{azure/_utils/_calculate_cost.py → costs/_azure_calculate_cost.py} +3 -3
- mirascope/core/{bedrock/_utils/_calculate_cost.py → costs/_bedrock_calculate_cost.py} +3 -3
- mirascope/core/{cohere/_utils/_calculate_cost.py → costs/_cohere_calculate_cost.py} +12 -8
- mirascope/core/{gemini/_utils/_calculate_cost.py → costs/_gemini_calculate_cost.py} +7 -7
- mirascope/core/costs/_google_calculate_cost.py +427 -0
- mirascope/core/costs/_groq_calculate_cost.py +156 -0
- mirascope/core/costs/_litellm_calculate_cost.py +11 -0
- mirascope/core/costs/_mistral_calculate_cost.py +64 -0
- mirascope/core/costs/_openai_calculate_cost.py +416 -0
- mirascope/core/{vertex/_utils/_calculate_cost.py → costs/_vertex_calculate_cost.py} +8 -7
- mirascope/core/{xai/_utils/_calculate_cost.py → costs/_xai_calculate_cost.py} +9 -9
- mirascope/core/costs/calculate_cost.py +86 -0
- mirascope/core/gemini/_utils/__init__.py +0 -2
- mirascope/core/gemini/call_response.py +7 -10
- mirascope/core/gemini/call_response_chunk.py +6 -1
- mirascope/core/gemini/stream.py +5 -8
- mirascope/core/google/_utils/__init__.py +0 -2
- mirascope/core/google/_utils/_setup_call.py +21 -2
- mirascope/core/google/call_response.py +9 -10
- mirascope/core/google/call_response_chunk.py +6 -1
- mirascope/core/google/stream.py +5 -8
- mirascope/core/groq/_utils/__init__.py +0 -2
- mirascope/core/groq/call_response.py +22 -10
- mirascope/core/groq/call_response_chunk.py +6 -0
- mirascope/core/groq/stream.py +5 -8
- mirascope/core/litellm/call_response.py +3 -4
- mirascope/core/litellm/stream.py +30 -22
- mirascope/core/mistral/_utils/__init__.py +0 -2
- mirascope/core/mistral/call_response.py +7 -10
- mirascope/core/mistral/call_response_chunk.py +6 -0
- mirascope/core/mistral/stream.py +5 -8
- mirascope/core/openai/_utils/__init__.py +0 -2
- mirascope/core/openai/_utils/_convert_message_params.py +4 -4
- mirascope/core/openai/call_response.py +30 -10
- mirascope/core/openai/call_response_chunk.py +6 -0
- mirascope/core/openai/stream.py +5 -8
- mirascope/core/vertex/_utils/__init__.py +0 -2
- mirascope/core/vertex/call_response.py +5 -10
- mirascope/core/vertex/call_response_chunk.py +6 -0
- mirascope/core/vertex/stream.py +5 -8
- mirascope/core/xai/_utils/__init__.py +1 -2
- mirascope/core/xai/call_response.py +0 -11
- mirascope/llm/__init__.py +10 -2
- mirascope/llm/_protocols.py +8 -28
- mirascope/llm/call_response.py +6 -6
- mirascope/llm/call_response_chunk.py +12 -3
- mirascope/llm/llm_call.py +21 -23
- mirascope/llm/llm_override.py +56 -27
- mirascope/llm/stream.py +7 -7
- mirascope/llm/tool.py +1 -1
- mirascope/retries/fallback.py +1 -1
- {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/METADATA +1 -1
- {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/RECORD +86 -82
- mirascope/core/google/_utils/_calculate_cost.py +0 -215
- mirascope/core/groq/_utils/_calculate_cost.py +0 -69
- mirascope/core/mistral/_utils/_calculate_cost.py +0 -48
- mirascope/core/openai/_utils/_calculate_cost.py +0 -246
- {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/WHEEL +0 -0
- {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Calculate the cost of a completion using the Groq API."""
|
|
2
|
+
|
|
3
|
+
from ..base.types import CostMetadata
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def calculate_cost(
|
|
7
|
+
metadata: CostMetadata,
|
|
8
|
+
model: str = "mixtral-8x7b-32768",
|
|
9
|
+
) -> float | None:
|
|
10
|
+
"""Calculate the cost of a completion using the Groq API.
|
|
11
|
+
|
|
12
|
+
https://wow.groq.com/
|
|
13
|
+
|
|
14
|
+
Model Input Output
|
|
15
|
+
llama-3.3-70b-versatile $0.59 / 1M tokens $0.79 / 1M tokens
|
|
16
|
+
llama-3.3-70b-specdec $0.59 / 1M tokens $0.99 / 1M tokens
|
|
17
|
+
llama-3.1-8b-instant $0.05 / 1M tokens $0.08 / 1M tokens
|
|
18
|
+
llama3-70b-8192 $0.59 / 1M tokens $0.79 / 1M tokens
|
|
19
|
+
llama3-8b-8192 $0.05 / 1M tokens $0.08 / 1M tokens
|
|
20
|
+
llama-guard-3-8b $0.20 / 1M tokens $0.20 / 1M tokens
|
|
21
|
+
mixtral-8x7b-32768 $0.24 / 1M tokens $0.24 / 1M tokens
|
|
22
|
+
gemma-7b-it $0.07 / 1M tokens $0.07 / 1M tokens
|
|
23
|
+
gemma2-9b-it $0.20 / 1M tokens $0.20 / 1M tokens
|
|
24
|
+
mistral-saba-24b $0.79 / 1M tokens $0.79 / 1M tokens
|
|
25
|
+
qwen-2.5-32b $0.79 / 1M tokens $0.79 / 1M tokens
|
|
26
|
+
qwen-2.5-coder-32b $0.79 / 1M tokens $0.79 / 1M tokens
|
|
27
|
+
deepseek-r1-distill-qwen-32b $0.69 / 1M tokens $0.69 / 1M tokens
|
|
28
|
+
deepseek-r1-distill-llama-70b $0.75 / 1M tokens $0.99 / 1M tokens
|
|
29
|
+
deepseek-r1-distill-llama-70b-specdec $0.75 / 1M tokens $0.99 / 1M tokens
|
|
30
|
+
llama-3.2-1b-preview $0.04 / 1M tokens $0.04 / 1M tokens
|
|
31
|
+
llama-3.2-3b-preview $0.06 / 1M tokens $0.06 / 1M tokens
|
|
32
|
+
llama-3.2-11b-vision-preview $0.18 / 1M tokens $0.18 / 1M tokens
|
|
33
|
+
llama-3.2-90b-vision-preview $0.90 / 1M tokens $0.90 / 1M tokens
|
|
34
|
+
"""
|
|
35
|
+
pricing = {
|
|
36
|
+
"llama-3.3-70b-versatile": {
|
|
37
|
+
"prompt": 0.000_000_59,
|
|
38
|
+
"completion": 0.000_000_79,
|
|
39
|
+
},
|
|
40
|
+
"llama-3.3-70b-specdec": {
|
|
41
|
+
"prompt": 0.000_000_59,
|
|
42
|
+
"completion": 0.000_000_99,
|
|
43
|
+
},
|
|
44
|
+
"llama3-groq-70b-8192-tool-use-preview": {
|
|
45
|
+
"prompt": 0.000_000_89,
|
|
46
|
+
"completion": 0.000_000_89,
|
|
47
|
+
},
|
|
48
|
+
"llama3-groq-8b-8192-tool-use-preview": {
|
|
49
|
+
"prompt": 0.000_000_19,
|
|
50
|
+
"completion": 0.000_000_19,
|
|
51
|
+
},
|
|
52
|
+
"llama-3.1-8b-instant": {
|
|
53
|
+
"prompt": 0.000_000_05,
|
|
54
|
+
"completion": 0.000_000_08,
|
|
55
|
+
},
|
|
56
|
+
"llama-guard-3-8b": {
|
|
57
|
+
"prompt": 0.000_000_2,
|
|
58
|
+
"completion": 0.000_000_2,
|
|
59
|
+
},
|
|
60
|
+
"llama3-70b-8192": {
|
|
61
|
+
"prompt": 0.000_000_59,
|
|
62
|
+
"completion": 0.000_000_79,
|
|
63
|
+
},
|
|
64
|
+
"llama3-8b-8192": {
|
|
65
|
+
"prompt": 0.000_000_05,
|
|
66
|
+
"completion": 0.000_000_08,
|
|
67
|
+
},
|
|
68
|
+
"mixtral-8x7b-32768": {
|
|
69
|
+
"prompt": 0.000_000_24,
|
|
70
|
+
"completion": 0.000_000_24,
|
|
71
|
+
},
|
|
72
|
+
"gemma-7b-it": {
|
|
73
|
+
"prompt": 0.000_000_07,
|
|
74
|
+
"completion": 0.000_000_07,
|
|
75
|
+
},
|
|
76
|
+
"gemma2-9b-it": {
|
|
77
|
+
"prompt": 0.000_000_2,
|
|
78
|
+
"completion": 0.000_000_2,
|
|
79
|
+
},
|
|
80
|
+
"mistral-saba-24b": {
|
|
81
|
+
"prompt": 0.000_000_79,
|
|
82
|
+
"completion": 0.000_000_79,
|
|
83
|
+
},
|
|
84
|
+
"qwen-2.5-32b": {
|
|
85
|
+
"prompt": 0.000_000_79,
|
|
86
|
+
"completion": 0.000_000_79,
|
|
87
|
+
},
|
|
88
|
+
"qwen-2.5-coder-32b": {
|
|
89
|
+
"prompt": 0.000_000_79,
|
|
90
|
+
"completion": 0.000_000_79,
|
|
91
|
+
},
|
|
92
|
+
"deepseek-r1-distill-qwen-32b": {
|
|
93
|
+
"prompt": 0.000_000_69,
|
|
94
|
+
"completion": 0.000_000_69,
|
|
95
|
+
},
|
|
96
|
+
"deepseek-r1-distill-llama-70b": {
|
|
97
|
+
"prompt": 0.000_000_75,
|
|
98
|
+
"completion": 0.000_000_99,
|
|
99
|
+
},
|
|
100
|
+
"deepseek-r1-distill-llama-70b-specdec": {
|
|
101
|
+
"prompt": 0.000_000_75,
|
|
102
|
+
"completion": 0.000_000_99,
|
|
103
|
+
},
|
|
104
|
+
"llama-3.2-1b-preview": {
|
|
105
|
+
"prompt": 0.000_000_04,
|
|
106
|
+
"completion": 0.000_000_04,
|
|
107
|
+
},
|
|
108
|
+
"llama-3.2-3b-preview": {
|
|
109
|
+
"prompt": 0.000_000_06,
|
|
110
|
+
"completion": 0.000_000_06,
|
|
111
|
+
},
|
|
112
|
+
# Vision models
|
|
113
|
+
"llama-3.2-11b-vision-preview": {
|
|
114
|
+
"prompt": 0.000_000_18,
|
|
115
|
+
"completion": 0.000_000_18,
|
|
116
|
+
},
|
|
117
|
+
"llama-3.2-90b-vision-preview": {
|
|
118
|
+
"prompt": 0.000_000_90,
|
|
119
|
+
"completion": 0.000_000_90,
|
|
120
|
+
},
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if metadata.input_tokens is None or metadata.output_tokens is None:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
model_pricing = pricing[model]
|
|
128
|
+
except KeyError:
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
# Calculate cost for text tokens
|
|
132
|
+
prompt_cost = metadata.input_tokens * model_pricing["prompt"]
|
|
133
|
+
completion_cost = metadata.output_tokens * model_pricing["completion"]
|
|
134
|
+
total_cost = prompt_cost + completion_cost
|
|
135
|
+
|
|
136
|
+
# Calculate cost for images if present
|
|
137
|
+
# Groq bills 6,400 tokens per image for vision models
|
|
138
|
+
# https://groq.com/pricing/
|
|
139
|
+
image_cost = 0.0
|
|
140
|
+
if metadata.images and "vision" in model:
|
|
141
|
+
# For Groq vision models, each image is billed at 6,400 tokens
|
|
142
|
+
tokens_per_image = 6400
|
|
143
|
+
|
|
144
|
+
# Count the number of images
|
|
145
|
+
image_count = len(metadata.images)
|
|
146
|
+
|
|
147
|
+
# Calculate total image tokens
|
|
148
|
+
total_image_tokens = image_count * tokens_per_image
|
|
149
|
+
|
|
150
|
+
# Images are charged at the prompt token rate
|
|
151
|
+
image_cost = total_image_tokens * model_pricing["prompt"]
|
|
152
|
+
|
|
153
|
+
# Add image cost to total
|
|
154
|
+
total_cost += image_cost
|
|
155
|
+
|
|
156
|
+
return total_cost
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Calculate the cost of a completion using the Mistral API."""
|
|
2
|
+
|
|
3
|
+
from ..base.types import CostMetadata
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def calculate_cost(
|
|
7
|
+
metadata: CostMetadata,
|
|
8
|
+
model: str = "open-mistral-7b",
|
|
9
|
+
) -> float | None:
|
|
10
|
+
"""Calculate the cost of a completion using the Mistral API.
|
|
11
|
+
|
|
12
|
+
https://mistral.ai/technology/#pricing
|
|
13
|
+
|
|
14
|
+
Model Input Cached Output
|
|
15
|
+
mistral-large-latest $2/1M tokens $6/1M tokens
|
|
16
|
+
pixtral-large-latest $2/1M tokens $6/1M tokens
|
|
17
|
+
mistral-small-latest $0.1/1M tokens $0.3/1M tokens
|
|
18
|
+
mistral-saba-latest $0.2/1M tokens $0.6/1M tokens
|
|
19
|
+
codestral-latest $0.3/1M tokens $0.9/1M tokens
|
|
20
|
+
ministral-8b-latest $0.1/1M tokens $0.1/1M tokens
|
|
21
|
+
ministral-3b-latest $0.04/1M tokens $0.04/1M tokens
|
|
22
|
+
mistral-embed $0.1/1M tokens -
|
|
23
|
+
mistral-moderation-latest $0.1/1M tokens -
|
|
24
|
+
open-mistral-nemo $0.3/1M tokens $0.3/1M tokens
|
|
25
|
+
open-mistral-7b $0.25/1M tokens $0.25/1M tokens
|
|
26
|
+
open-mixtral-8x7b $0.7/1M tokens $0.7/1M tokens
|
|
27
|
+
open-mixtral-8x22b $2/1M tokens $6/1M tokens
|
|
28
|
+
"""
|
|
29
|
+
pricing = {
|
|
30
|
+
"mistral-large-latest": {"prompt": 0.000_002, "completion": 0.000_006},
|
|
31
|
+
"pixtral-large-latest": {"prompt": 0.000_002, "completion": 0.000_006},
|
|
32
|
+
"mistral-small-latest": {"prompt": 0.000_000_1, "completion": 0.000_000_3},
|
|
33
|
+
"mistral-saba-latest": {"prompt": 0.000_000_2, "completion": 0.000_000_6},
|
|
34
|
+
"codestral-latest": {"prompt": 0.000_000_3, "completion": 0.000_000_9},
|
|
35
|
+
"ministral-8b-latest": {"prompt": 0.000_000_1, "completion": 0.000_000_1},
|
|
36
|
+
"ministral-3b-latest": {"prompt": 0.000_000_04, "completion": 0.000_000_04},
|
|
37
|
+
"mistral-embed": {"prompt": 0.000_000_1, "completion": 0},
|
|
38
|
+
"mistral-moderation-latest": {"prompt": 0.000_000_1, "completion": 0},
|
|
39
|
+
"open-mistral-nemo": {"prompt": 0.000_000_3, "completion": 0.000_000_3},
|
|
40
|
+
"open-mistral-nemo-2407": {"prompt": 0.000_000_3, "completion": 0.000_000_3},
|
|
41
|
+
"open-mistral-7b": {"prompt": 0.000_000_25, "completion": 0.000_000_25},
|
|
42
|
+
"open-mixtral-8x7b": {"prompt": 0.000_000_7, "completion": 0.000_000_7},
|
|
43
|
+
"open-mixtral-8x22b": {"prompt": 0.000_002, "completion": 0.000_006},
|
|
44
|
+
"mistral-large-2407": {"prompt": 0.000_003, "completion": 0.000_009},
|
|
45
|
+
"mistral-medium-latest": {"prompt": 0.000_002_75, "completion": 0.000_008_1},
|
|
46
|
+
"pixtral-12b-2409": {"prompt": 0.000_002, "completion": 0.000_006},
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if metadata.input_tokens is None or metadata.output_tokens is None:
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
model_pricing = pricing[model]
|
|
54
|
+
except KeyError:
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
# Calculate cost for text tokens
|
|
58
|
+
prompt_cost = metadata.input_tokens * model_pricing["prompt"]
|
|
59
|
+
completion_cost = metadata.output_tokens * model_pricing["completion"]
|
|
60
|
+
total_cost = prompt_cost + completion_cost
|
|
61
|
+
|
|
62
|
+
# Image tokens is included in the cost
|
|
63
|
+
|
|
64
|
+
return total_cost
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
"""Calculate the cost of a completion using the OpenAI API."""
|
|
2
|
+
|
|
3
|
+
from ..base.types import CostMetadata
|
|
4
|
+
|
|
5
|
+
# Constants for image token calculation
|
|
6
|
+
LOW_DETAIL_IMAGE_TOKENS = 85
|
|
7
|
+
HIGH_DETAIL_TILE_TOKENS = 170
|
|
8
|
+
HIGH_DETAIL_BASE_TOKENS = 85
|
|
9
|
+
TILE_SIZE = 512
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _calculate_image_tokens(metadata: CostMetadata) -> int | None:
|
|
13
|
+
"""Calculate tokens used by images based on their size and detail level.
|
|
14
|
+
|
|
15
|
+
https://platform.openai.com/docs/guides/vision
|
|
16
|
+
Following OpenAI's pricing structure:
|
|
17
|
+
- Low detail: 85 tokens per image
|
|
18
|
+
- High detail: 85 tokens base + 170 tokens per 512px tile
|
|
19
|
+
(after scaling to fit within 2048x2048 and making shortest side 768px)
|
|
20
|
+
"""
|
|
21
|
+
if not metadata.images or not metadata.images:
|
|
22
|
+
return 0
|
|
23
|
+
|
|
24
|
+
total_image_tokens = 0
|
|
25
|
+
|
|
26
|
+
for img in metadata.images:
|
|
27
|
+
if not img.width or not img.height:
|
|
28
|
+
continue
|
|
29
|
+
|
|
30
|
+
# If image already has precalculated tokens, use those
|
|
31
|
+
if img.tokens is not None:
|
|
32
|
+
total_image_tokens += img.tokens
|
|
33
|
+
continue
|
|
34
|
+
|
|
35
|
+
if img.detail is not None and img.detail != "auto":
|
|
36
|
+
detail = img.detail
|
|
37
|
+
else:
|
|
38
|
+
# Default to high detail for auto
|
|
39
|
+
# We can't determine detail level from image alone
|
|
40
|
+
detail = "high"
|
|
41
|
+
if detail == "low":
|
|
42
|
+
# Low detail is a fixed cost regardless of size
|
|
43
|
+
total_image_tokens += LOW_DETAIL_IMAGE_TOKENS
|
|
44
|
+
else:
|
|
45
|
+
# High detail calculation
|
|
46
|
+
|
|
47
|
+
# Scale to fit within 2048x2048 square
|
|
48
|
+
width, height = img.width, img.height
|
|
49
|
+
if width > 2048 or height > 2048:
|
|
50
|
+
aspect_ratio = width / height
|
|
51
|
+
if width > height:
|
|
52
|
+
width = 2048
|
|
53
|
+
height = int(width / aspect_ratio)
|
|
54
|
+
else:
|
|
55
|
+
height = 2048
|
|
56
|
+
width = int(height * aspect_ratio)
|
|
57
|
+
|
|
58
|
+
# Scale so shortest side is 768px
|
|
59
|
+
if min(width, height) > 768:
|
|
60
|
+
if width < height:
|
|
61
|
+
scale_factor = 768 / width
|
|
62
|
+
width = 768
|
|
63
|
+
height = int(height * scale_factor)
|
|
64
|
+
else:
|
|
65
|
+
scale_factor = 768 / height
|
|
66
|
+
height = 768
|
|
67
|
+
width = int(width * scale_factor)
|
|
68
|
+
|
|
69
|
+
# Calculate number of 512px tiles needed
|
|
70
|
+
tiles_x = (width + TILE_SIZE - 1) // TILE_SIZE
|
|
71
|
+
tiles_y = (height + TILE_SIZE - 1) // TILE_SIZE
|
|
72
|
+
num_tiles = tiles_x * tiles_y
|
|
73
|
+
|
|
74
|
+
# Calculate token cost
|
|
75
|
+
image_tokens = (
|
|
76
|
+
HIGH_DETAIL_TILE_TOKENS * num_tiles
|
|
77
|
+
) + HIGH_DETAIL_BASE_TOKENS
|
|
78
|
+
total_image_tokens += image_tokens
|
|
79
|
+
|
|
80
|
+
return total_image_tokens
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def calculate_cost(
|
|
84
|
+
metadata: CostMetadata,
|
|
85
|
+
model: str = "gpt-3.5-turbo-16k",
|
|
86
|
+
) -> float | None:
|
|
87
|
+
"""Calculate the cost of a completion using the OpenAI API.
|
|
88
|
+
|
|
89
|
+
https://openai.com/pricing
|
|
90
|
+
|
|
91
|
+
Model Input Cached Output
|
|
92
|
+
gpt-4.5-preview $75.00 / 1M tokens $37.50 / 1M tokens $150.00 / 1M tokens
|
|
93
|
+
gpt-4.5-preview-2025-02-27 $75.00 / 1M tokens $37.50 / 1M tokens $150.00 / 1M tokens
|
|
94
|
+
gpt-4o $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
|
|
95
|
+
gpt-4o-2024-11-20 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
|
|
96
|
+
gpt-4o-2024-08-06 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
|
|
97
|
+
gpt-4o-2024-05-13 $5.00 / 1M tokens $15.00 / 1M tokens
|
|
98
|
+
gpt-4o-audio-preview $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
|
|
99
|
+
gpt-4o-audio-preview-2024-12-17 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
|
|
100
|
+
gpt-4o-audio-preview-2024-10-01 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
|
|
101
|
+
gpt-4o-realtime-preview $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
|
|
102
|
+
gpt-4o-realtime-preview-2024-12-17 $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
|
|
103
|
+
gpt-4o-realtime-preview-2024-10-01 $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
|
|
104
|
+
gpt-4o-mini $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
|
|
105
|
+
gpt-4o-mini-2024-07-18 $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
|
|
106
|
+
gpt-4o-mini-audio-preview $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
|
|
107
|
+
gpt-4o-mini-audio-preview-2024-12-17 $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
|
|
108
|
+
gpt-4o-mini-realtime-preview $0.60 / 1M tokens $0.30 / 1M tokens $2.40 / 1M tokens
|
|
109
|
+
gpt-4o-mini-realtime-preview-2024-12-17 $0.60 / 1M tokens $0.30 / 1M tokens $2.40 / 1M tokens
|
|
110
|
+
o1 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
|
|
111
|
+
o1-2024-12-17 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
|
|
112
|
+
o1-preview $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
|
|
113
|
+
o1-preview-2024-09-12 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
|
|
114
|
+
o3-mini $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
|
|
115
|
+
o3-mini-2025-01-31 $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
|
|
116
|
+
o1-mini $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
|
|
117
|
+
o1-mini-2024-09-12 $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
|
|
118
|
+
chatgpt-4o-latest $5.00 / 1M tokens $15.00 / 1M tokens
|
|
119
|
+
gpt-4-turbo $10.00 / 1M tokens $30.00 / 1M tokens
|
|
120
|
+
gpt-4-turbo-2024-04-09 $10.00 / 1M tokens $30.00 / 1M tokens
|
|
121
|
+
gpt-3.5-turbo-0125 $0.50 / 1M tokens $1.50 / 1M tokens
|
|
122
|
+
gpt-3.5-turbo-1106 $1.00 / 1M tokens $2.00 / 1M tokens
|
|
123
|
+
gpt-4-0125-preview $10.00 / 1M tokens $30.00 / 1M tokens
|
|
124
|
+
gpt-4-1106-preview $10.00 / 1M tokens $30.00 / 1M tokens
|
|
125
|
+
gpt-4-vision-preview $10.00 / 1M tokens $30.00 / 1M tokens
|
|
126
|
+
gpt-4 $30.00 / 1M tokens $60.00 / 1M tokens
|
|
127
|
+
gpt-4-32k $60.00 / 1M tokens $120.00 / 1M tokens
|
|
128
|
+
text-embedding-3-small $0.02 / 1M tokens
|
|
129
|
+
text-embedding-3-large $0.13 / 1M tokens
|
|
130
|
+
text-embedding-ada-002 $0.10 / 1M tokens
|
|
131
|
+
"""
|
|
132
|
+
pricing = {
|
|
133
|
+
"gpt-4.5-preview": {
|
|
134
|
+
"prompt": 0.000_075,
|
|
135
|
+
"cached": 0.000_037_5,
|
|
136
|
+
"completion": 0.000_15,
|
|
137
|
+
"batch_prompt": 0.000_037_5,
|
|
138
|
+
"batch_completion": 0.000_075,
|
|
139
|
+
},
|
|
140
|
+
"gpt-4.5-preview-2025-02-27": {
|
|
141
|
+
"prompt": 0.000_075,
|
|
142
|
+
"cached": 0.000_037_5,
|
|
143
|
+
"completion": 0.000_15,
|
|
144
|
+
},
|
|
145
|
+
"gpt-4o": {
|
|
146
|
+
"prompt": 0.000_002_5,
|
|
147
|
+
"cached": 0.000_001_25,
|
|
148
|
+
"completion": 0.000_01,
|
|
149
|
+
"batch_prompt": 0.000_001_25,
|
|
150
|
+
"batch_completion": 0.000_005,
|
|
151
|
+
},
|
|
152
|
+
"gpt-4o-2024-11-20": {
|
|
153
|
+
"prompt": 0.000_002_5,
|
|
154
|
+
"cached": 0.000_001_25,
|
|
155
|
+
"completion": 0.000_01,
|
|
156
|
+
},
|
|
157
|
+
"gpt-4o-2024-08-06": {
|
|
158
|
+
"prompt": 0.000_002_5,
|
|
159
|
+
"cached": 0.000_001_25,
|
|
160
|
+
"completion": 0.000_01,
|
|
161
|
+
},
|
|
162
|
+
"gpt-4o-2024-05-13": {
|
|
163
|
+
"prompt": 0.000_005,
|
|
164
|
+
"cached": 0.000_002_5,
|
|
165
|
+
"completion": 0.000_015,
|
|
166
|
+
},
|
|
167
|
+
"gpt-4o-audio-preview": {
|
|
168
|
+
"prompt": 0.000_002_5,
|
|
169
|
+
"cached": 0.000_001_25,
|
|
170
|
+
"completion": 0.000_01,
|
|
171
|
+
},
|
|
172
|
+
"gpt-4o-audio-preview-2024-12-17": {
|
|
173
|
+
"prompt": 0.000_002_5,
|
|
174
|
+
"cached": 0.000_001_25,
|
|
175
|
+
"completion": 0.000_01,
|
|
176
|
+
},
|
|
177
|
+
"gpt-4o-audio-preview-2024-10-01": {
|
|
178
|
+
"prompt": 0.000_002_5,
|
|
179
|
+
"cached": 0.000_001_25,
|
|
180
|
+
"completion": 0.000_01,
|
|
181
|
+
},
|
|
182
|
+
"gpt-4o-realtime-preview": {
|
|
183
|
+
"prompt": 0.000_005,
|
|
184
|
+
"cached": 0.000_002_5,
|
|
185
|
+
"completion": 0.000_02,
|
|
186
|
+
},
|
|
187
|
+
"gpt-4o-realtime-preview-2024-12-17": {
|
|
188
|
+
"prompt": 0.000_005,
|
|
189
|
+
"cached": 0.000_002_5,
|
|
190
|
+
"completion": 0.000_02,
|
|
191
|
+
},
|
|
192
|
+
"gpt-4o-realtime-preview-2024-10-01": {
|
|
193
|
+
"prompt": 0.000_005,
|
|
194
|
+
"cached": 0.000_002_5,
|
|
195
|
+
"completion": 0.000_02,
|
|
196
|
+
},
|
|
197
|
+
"gpt-4o-mini": {
|
|
198
|
+
"prompt": 0.000_000_15,
|
|
199
|
+
"cached": 0.000_000_075,
|
|
200
|
+
"completion": 0.000_000_6,
|
|
201
|
+
},
|
|
202
|
+
"gpt-4o-mini-2024-07-18": {
|
|
203
|
+
"prompt": 0.000_000_15,
|
|
204
|
+
"cached": 0.000_000_075,
|
|
205
|
+
"completion": 0.000_000_6,
|
|
206
|
+
},
|
|
207
|
+
"gpt-4o-mini-audio-preview": {
|
|
208
|
+
"prompt": 0.000_000_15,
|
|
209
|
+
"cached": 0.000_000_075,
|
|
210
|
+
"completion": 0.000_000_6,
|
|
211
|
+
},
|
|
212
|
+
"gpt-4o-mini-audio-preview-2024-12-17": {
|
|
213
|
+
"prompt": 0.000_000_15,
|
|
214
|
+
"cached": 0.000_000_075,
|
|
215
|
+
"completion": 0.000_000_6,
|
|
216
|
+
},
|
|
217
|
+
"gpt-4o-mini-realtime-preview": {
|
|
218
|
+
"prompt": 0.000_000_6,
|
|
219
|
+
"cached": 0.000_000_3,
|
|
220
|
+
"completion": 0.000_002_4,
|
|
221
|
+
},
|
|
222
|
+
"gpt-4o-mini-realtime-preview-2024-12-17": {
|
|
223
|
+
"prompt": 0.000_000_6,
|
|
224
|
+
"cached": 0.000_000_3,
|
|
225
|
+
"completion": 0.000_002_4,
|
|
226
|
+
},
|
|
227
|
+
"o1": {
|
|
228
|
+
"prompt": 0.000_015,
|
|
229
|
+
"cached": 0.000_007_5,
|
|
230
|
+
"completion": 0.000_06,
|
|
231
|
+
},
|
|
232
|
+
"o1-2024-12-17": {
|
|
233
|
+
"prompt": 0.000_015,
|
|
234
|
+
"cached": 0.000_007_5,
|
|
235
|
+
"completion": 0.000_06,
|
|
236
|
+
},
|
|
237
|
+
"o1-preview": {
|
|
238
|
+
"prompt": 0.000_015,
|
|
239
|
+
"cached": 0.000_007_5,
|
|
240
|
+
"completion": 0.000_06,
|
|
241
|
+
},
|
|
242
|
+
"o1-preview-2024-09-12": {
|
|
243
|
+
"prompt": 0.000_015,
|
|
244
|
+
"cached": 0.000_007_5,
|
|
245
|
+
"completion": 0.000_06,
|
|
246
|
+
},
|
|
247
|
+
"o3-mini": {
|
|
248
|
+
"prompt": 0.000_001_1,
|
|
249
|
+
"cached": 0.000_000_55,
|
|
250
|
+
"completion": 0.000_004_4,
|
|
251
|
+
},
|
|
252
|
+
"o3-mini-2025-01-31": {
|
|
253
|
+
"prompt": 0.000_001_1,
|
|
254
|
+
"cached": 0.000_000_55,
|
|
255
|
+
"completion": 0.000_004_4,
|
|
256
|
+
},
|
|
257
|
+
"o1-mini": {
|
|
258
|
+
"prompt": 0.000_001_1,
|
|
259
|
+
"cached": 0.000_000_55,
|
|
260
|
+
"completion": 0.000_004_4,
|
|
261
|
+
},
|
|
262
|
+
"o1-mini-2024-09-12": {
|
|
263
|
+
"prompt": 0.000_001_1,
|
|
264
|
+
"cached": 0.000_000_55,
|
|
265
|
+
"completion": 0.000_004_4,
|
|
266
|
+
},
|
|
267
|
+
"chatgpt-4o-latest": {
|
|
268
|
+
"prompt": 0.000_005,
|
|
269
|
+
"cached": 0,
|
|
270
|
+
"completion": 0.000_015,
|
|
271
|
+
},
|
|
272
|
+
"gpt-4-turbo": {
|
|
273
|
+
"prompt": 0.000_01,
|
|
274
|
+
"cached": 0,
|
|
275
|
+
"completion": 0.000_03,
|
|
276
|
+
},
|
|
277
|
+
"gpt-4-turbo-2024-04-09": {
|
|
278
|
+
"prompt": 0.000_01,
|
|
279
|
+
"cached": 0,
|
|
280
|
+
"completion": 0.000_03,
|
|
281
|
+
},
|
|
282
|
+
"gpt-3.5-turbo-0125": {
|
|
283
|
+
"prompt": 0.000_000_5,
|
|
284
|
+
"cached": 0,
|
|
285
|
+
"completion": 0.000_001_5,
|
|
286
|
+
},
|
|
287
|
+
"gpt-3.5-turbo-1106": {
|
|
288
|
+
"prompt": 0.000_001,
|
|
289
|
+
"cached": 0,
|
|
290
|
+
"completion": 0.000_002,
|
|
291
|
+
},
|
|
292
|
+
"gpt-4-0125-preview": {
|
|
293
|
+
"prompt": 0.000_01,
|
|
294
|
+
"cached": 0,
|
|
295
|
+
"completion": 0.000_03,
|
|
296
|
+
},
|
|
297
|
+
"gpt-4-1106-preview": {
|
|
298
|
+
"prompt": 0.000_01,
|
|
299
|
+
"cached": 0,
|
|
300
|
+
"completion": 0.000_03,
|
|
301
|
+
},
|
|
302
|
+
"gpt-4-vision-preview": {
|
|
303
|
+
"prompt": 0.000_01,
|
|
304
|
+
"cached": 0,
|
|
305
|
+
"completion": 0.000_03,
|
|
306
|
+
},
|
|
307
|
+
"gpt-4": {
|
|
308
|
+
"prompt": 0.000_03,
|
|
309
|
+
"cached": 0,
|
|
310
|
+
"completion": 0.000_06,
|
|
311
|
+
},
|
|
312
|
+
"gpt-4-32k": {
|
|
313
|
+
"prompt": 0.000_06,
|
|
314
|
+
"cached": 0,
|
|
315
|
+
"completion": 0.000_12,
|
|
316
|
+
},
|
|
317
|
+
"gpt-3.5-turbo-4k": {
|
|
318
|
+
"prompt": 0.000_015,
|
|
319
|
+
"cached": 0,
|
|
320
|
+
"completion": 0.000_02,
|
|
321
|
+
},
|
|
322
|
+
"gpt-3.5-turbo-16k": {
|
|
323
|
+
"prompt": 0.000_003,
|
|
324
|
+
"cached": 0,
|
|
325
|
+
"completion": 0.000_004,
|
|
326
|
+
},
|
|
327
|
+
"gpt-4-8k": {
|
|
328
|
+
"prompt": 0.000_03,
|
|
329
|
+
"cached": 0,
|
|
330
|
+
"completion": 0.000_06,
|
|
331
|
+
},
|
|
332
|
+
"text-embedding-3-small": {
|
|
333
|
+
"prompt": 0.000_000_02,
|
|
334
|
+
"cached": 0,
|
|
335
|
+
"completion": 0,
|
|
336
|
+
"batch_prompt": 0.000_000_01,
|
|
337
|
+
},
|
|
338
|
+
"text-embedding-ada-002": {
|
|
339
|
+
"prompt": 0.000_000_1,
|
|
340
|
+
"cached": 0,
|
|
341
|
+
"completion": 0,
|
|
342
|
+
"batch_prompt": 0.000_000_05,
|
|
343
|
+
},
|
|
344
|
+
"text-embedding-3-large": {
|
|
345
|
+
"prompt": 0.000_000_13,
|
|
346
|
+
"cached": 0,
|
|
347
|
+
"completion": 0,
|
|
348
|
+
"batch_prompt": 0.000_000_065,
|
|
349
|
+
},
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
# Audio pricing for audio models (per-minute rates in dollars)
|
|
353
|
+
|
|
354
|
+
if metadata.cost is not None:
|
|
355
|
+
return metadata.cost
|
|
356
|
+
|
|
357
|
+
# Audio input/output costs
|
|
358
|
+
# ChatCompletion.usage has brake down of audio input and output.
|
|
359
|
+
# The total cost already includes the audio input/output cost.
|
|
360
|
+
|
|
361
|
+
# Initialize cached tokens if not provided
|
|
362
|
+
if metadata.cached_tokens is None:
|
|
363
|
+
metadata.cached_tokens = 0
|
|
364
|
+
|
|
365
|
+
# Try to get model pricing
|
|
366
|
+
try:
|
|
367
|
+
model_pricing = pricing[model]
|
|
368
|
+
except KeyError:
|
|
369
|
+
return None
|
|
370
|
+
|
|
371
|
+
image_tokens = _calculate_image_tokens(metadata) or 0
|
|
372
|
+
|
|
373
|
+
input_tokens = (metadata.input_tokens or 0) + image_tokens
|
|
374
|
+
|
|
375
|
+
# Calculate costs for each component
|
|
376
|
+
prompt_cost = input_tokens * model_pricing["prompt"]
|
|
377
|
+
cached_cost = metadata.cached_tokens * model_pricing["cached"]
|
|
378
|
+
completion_cost = (metadata.output_tokens or 0) * model_pricing["completion"]
|
|
379
|
+
|
|
380
|
+
# Special handling for embedding models (only input tokens matter)
|
|
381
|
+
if "embedding" in model:
|
|
382
|
+
total_cost = prompt_cost
|
|
383
|
+
else:
|
|
384
|
+
total_cost = prompt_cost + cached_cost + completion_cost
|
|
385
|
+
|
|
386
|
+
# Apply batch discounts if applicable
|
|
387
|
+
if metadata.batch_mode:
|
|
388
|
+
# Based on the OpenAI pricing table, batch mode typically provides
|
|
389
|
+
# approximately 50% discount for both input and output tokens
|
|
390
|
+
if "embedding" in model.lower():
|
|
391
|
+
# Embedding models have specific batch pricing
|
|
392
|
+
if model == "text-embedding-3-small":
|
|
393
|
+
prompt_cost = (
|
|
394
|
+
input_tokens * 0.000_000_01
|
|
395
|
+
) # $0.01 per 1M tokens in batch mode
|
|
396
|
+
elif model == "text-embedding-3-large":
|
|
397
|
+
prompt_cost = (
|
|
398
|
+
input_tokens * 0.000_000_065
|
|
399
|
+
) # $0.065 per 1M tokens in batch mode
|
|
400
|
+
elif model == "text-embedding-ada-002":
|
|
401
|
+
prompt_cost = (
|
|
402
|
+
input_tokens * 0.000_000_05
|
|
403
|
+
) # $0.05 per 1M tokens in batch mode
|
|
404
|
+
else:
|
|
405
|
+
# For LLM models, typically 50% discount
|
|
406
|
+
prompt_cost *= 0.5
|
|
407
|
+
cached_cost *= 0.5
|
|
408
|
+
completion_cost *= 0.5
|
|
409
|
+
|
|
410
|
+
# Recalculate total cost with batch pricing
|
|
411
|
+
if "embedding" in model:
|
|
412
|
+
total_cost = prompt_cost
|
|
413
|
+
else:
|
|
414
|
+
total_cost = prompt_cost + cached_cost + completion_cost
|
|
415
|
+
|
|
416
|
+
return total_cost
|