mirascope 1.19.0__py3-none-any.whl → 1.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. mirascope/__init__.py +4 -0
  2. mirascope/beta/openai/realtime/realtime.py +7 -8
  3. mirascope/beta/openai/realtime/tool.py +2 -2
  4. mirascope/core/__init__.py +10 -1
  5. mirascope/core/anthropic/_utils/__init__.py +0 -2
  6. mirascope/core/anthropic/_utils/_convert_message_params.py +1 -7
  7. mirascope/core/anthropic/_utils/_message_param_converter.py +48 -31
  8. mirascope/core/anthropic/call_response.py +7 -9
  9. mirascope/core/anthropic/call_response_chunk.py +10 -0
  10. mirascope/core/anthropic/stream.py +6 -8
  11. mirascope/core/azure/_utils/__init__.py +0 -2
  12. mirascope/core/azure/call_response.py +7 -10
  13. mirascope/core/azure/call_response_chunk.py +6 -1
  14. mirascope/core/azure/stream.py +6 -8
  15. mirascope/core/base/__init__.py +10 -1
  16. mirascope/core/base/_utils/__init__.py +2 -0
  17. mirascope/core/base/_utils/_get_image_dimensions.py +39 -0
  18. mirascope/core/base/call_response.py +36 -6
  19. mirascope/core/base/call_response_chunk.py +15 -1
  20. mirascope/core/base/stream.py +25 -3
  21. mirascope/core/base/types.py +276 -2
  22. mirascope/core/bedrock/_utils/__init__.py +0 -2
  23. mirascope/core/bedrock/call_response.py +7 -10
  24. mirascope/core/bedrock/call_response_chunk.py +6 -0
  25. mirascope/core/bedrock/stream.py +6 -10
  26. mirascope/core/cohere/_utils/__init__.py +0 -2
  27. mirascope/core/cohere/call_response.py +7 -10
  28. mirascope/core/cohere/call_response_chunk.py +6 -0
  29. mirascope/core/cohere/stream.py +5 -8
  30. mirascope/core/costs/__init__.py +5 -0
  31. mirascope/core/{anthropic/_utils/_calculate_cost.py → costs/_anthropic_calculate_cost.py} +45 -14
  32. mirascope/core/{azure/_utils/_calculate_cost.py → costs/_azure_calculate_cost.py} +3 -3
  33. mirascope/core/{bedrock/_utils/_calculate_cost.py → costs/_bedrock_calculate_cost.py} +3 -3
  34. mirascope/core/{cohere/_utils/_calculate_cost.py → costs/_cohere_calculate_cost.py} +12 -8
  35. mirascope/core/{gemini/_utils/_calculate_cost.py → costs/_gemini_calculate_cost.py} +7 -7
  36. mirascope/core/costs/_google_calculate_cost.py +427 -0
  37. mirascope/core/costs/_groq_calculate_cost.py +156 -0
  38. mirascope/core/costs/_litellm_calculate_cost.py +11 -0
  39. mirascope/core/costs/_mistral_calculate_cost.py +64 -0
  40. mirascope/core/costs/_openai_calculate_cost.py +416 -0
  41. mirascope/core/{vertex/_utils/_calculate_cost.py → costs/_vertex_calculate_cost.py} +8 -7
  42. mirascope/core/{xai/_utils/_calculate_cost.py → costs/_xai_calculate_cost.py} +9 -9
  43. mirascope/core/costs/calculate_cost.py +86 -0
  44. mirascope/core/gemini/_utils/__init__.py +0 -2
  45. mirascope/core/gemini/call_response.py +7 -10
  46. mirascope/core/gemini/call_response_chunk.py +6 -1
  47. mirascope/core/gemini/stream.py +5 -8
  48. mirascope/core/google/_utils/__init__.py +0 -2
  49. mirascope/core/google/_utils/_setup_call.py +21 -2
  50. mirascope/core/google/call_response.py +9 -10
  51. mirascope/core/google/call_response_chunk.py +6 -1
  52. mirascope/core/google/stream.py +5 -8
  53. mirascope/core/groq/_utils/__init__.py +0 -2
  54. mirascope/core/groq/call_response.py +22 -10
  55. mirascope/core/groq/call_response_chunk.py +6 -0
  56. mirascope/core/groq/stream.py +5 -8
  57. mirascope/core/litellm/call_response.py +3 -4
  58. mirascope/core/litellm/stream.py +30 -22
  59. mirascope/core/mistral/_utils/__init__.py +0 -2
  60. mirascope/core/mistral/call_response.py +7 -10
  61. mirascope/core/mistral/call_response_chunk.py +6 -0
  62. mirascope/core/mistral/stream.py +5 -8
  63. mirascope/core/openai/_utils/__init__.py +0 -2
  64. mirascope/core/openai/_utils/_convert_message_params.py +4 -4
  65. mirascope/core/openai/call_response.py +30 -10
  66. mirascope/core/openai/call_response_chunk.py +6 -0
  67. mirascope/core/openai/stream.py +5 -8
  68. mirascope/core/vertex/_utils/__init__.py +0 -2
  69. mirascope/core/vertex/call_response.py +5 -10
  70. mirascope/core/vertex/call_response_chunk.py +6 -0
  71. mirascope/core/vertex/stream.py +5 -8
  72. mirascope/core/xai/_utils/__init__.py +1 -2
  73. mirascope/core/xai/call_response.py +0 -11
  74. mirascope/llm/__init__.py +10 -2
  75. mirascope/llm/_protocols.py +8 -28
  76. mirascope/llm/call_response.py +6 -6
  77. mirascope/llm/call_response_chunk.py +12 -3
  78. mirascope/llm/llm_call.py +21 -23
  79. mirascope/llm/llm_override.py +56 -27
  80. mirascope/llm/stream.py +7 -7
  81. mirascope/llm/tool.py +1 -1
  82. mirascope/retries/fallback.py +1 -1
  83. {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/METADATA +1 -1
  84. {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/RECORD +86 -82
  85. mirascope/core/google/_utils/_calculate_cost.py +0 -215
  86. mirascope/core/groq/_utils/_calculate_cost.py +0 -69
  87. mirascope/core/mistral/_utils/_calculate_cost.py +0 -48
  88. mirascope/core/openai/_utils/_calculate_cost.py +0 -246
  89. {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/WHEEL +0 -0
  90. {mirascope-1.19.0.dist-info → mirascope-1.20.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,156 @@
1
+ """Calculate the cost of a completion using the Groq API."""
2
+
3
+ from ..base.types import CostMetadata
4
+
5
+
6
+ def calculate_cost(
7
+ metadata: CostMetadata,
8
+ model: str = "mixtral-8x7b-32768",
9
+ ) -> float | None:
10
+ """Calculate the cost of a completion using the Groq API.
11
+
12
+ https://wow.groq.com/
13
+
14
+ Model Input Output
15
+ llama-3.3-70b-versatile $0.59 / 1M tokens $0.79 / 1M tokens
16
+ llama-3.3-70b-specdec $0.59 / 1M tokens $0.99 / 1M tokens
17
+ llama-3.1-8b-instant $0.05 / 1M tokens $0.08 / 1M tokens
18
+ llama3-70b-8192 $0.59 / 1M tokens $0.79 / 1M tokens
19
+ llama3-8b-8192 $0.05 / 1M tokens $0.08 / 1M tokens
20
+ llama-guard-3-8b $0.20 / 1M tokens $0.20 / 1M tokens
21
+ mixtral-8x7b-32768 $0.24 / 1M tokens $0.24 / 1M tokens
22
+ gemma-7b-it $0.07 / 1M tokens $0.07 / 1M tokens
23
+ gemma2-9b-it $0.20 / 1M tokens $0.20 / 1M tokens
24
+ mistral-saba-24b $0.79 / 1M tokens $0.79 / 1M tokens
25
+ qwen-2.5-32b $0.79 / 1M tokens $0.79 / 1M tokens
26
+ qwen-2.5-coder-32b $0.79 / 1M tokens $0.79 / 1M tokens
27
+ deepseek-r1-distill-qwen-32b $0.69 / 1M tokens $0.69 / 1M tokens
28
+ deepseek-r1-distill-llama-70b $0.75 / 1M tokens $0.99 / 1M tokens
29
+ deepseek-r1-distill-llama-70b-specdec $0.75 / 1M tokens $0.99 / 1M tokens
30
+ llama-3.2-1b-preview $0.04 / 1M tokens $0.04 / 1M tokens
31
+ llama-3.2-3b-preview $0.06 / 1M tokens $0.06 / 1M tokens
32
+ llama-3.2-11b-vision-preview $0.18 / 1M tokens $0.18 / 1M tokens
33
+ llama-3.2-90b-vision-preview $0.90 / 1M tokens $0.90 / 1M tokens
34
+ """
35
+ pricing = {
36
+ "llama-3.3-70b-versatile": {
37
+ "prompt": 0.000_000_59,
38
+ "completion": 0.000_000_79,
39
+ },
40
+ "llama-3.3-70b-specdec": {
41
+ "prompt": 0.000_000_59,
42
+ "completion": 0.000_000_99,
43
+ },
44
+ "llama3-groq-70b-8192-tool-use-preview": {
45
+ "prompt": 0.000_000_89,
46
+ "completion": 0.000_000_89,
47
+ },
48
+ "llama3-groq-8b-8192-tool-use-preview": {
49
+ "prompt": 0.000_000_19,
50
+ "completion": 0.000_000_19,
51
+ },
52
+ "llama-3.1-8b-instant": {
53
+ "prompt": 0.000_000_05,
54
+ "completion": 0.000_000_08,
55
+ },
56
+ "llama-guard-3-8b": {
57
+ "prompt": 0.000_000_2,
58
+ "completion": 0.000_000_2,
59
+ },
60
+ "llama3-70b-8192": {
61
+ "prompt": 0.000_000_59,
62
+ "completion": 0.000_000_79,
63
+ },
64
+ "llama3-8b-8192": {
65
+ "prompt": 0.000_000_05,
66
+ "completion": 0.000_000_08,
67
+ },
68
+ "mixtral-8x7b-32768": {
69
+ "prompt": 0.000_000_24,
70
+ "completion": 0.000_000_24,
71
+ },
72
+ "gemma-7b-it": {
73
+ "prompt": 0.000_000_07,
74
+ "completion": 0.000_000_07,
75
+ },
76
+ "gemma2-9b-it": {
77
+ "prompt": 0.000_000_2,
78
+ "completion": 0.000_000_2,
79
+ },
80
+ "mistral-saba-24b": {
81
+ "prompt": 0.000_000_79,
82
+ "completion": 0.000_000_79,
83
+ },
84
+ "qwen-2.5-32b": {
85
+ "prompt": 0.000_000_79,
86
+ "completion": 0.000_000_79,
87
+ },
88
+ "qwen-2.5-coder-32b": {
89
+ "prompt": 0.000_000_79,
90
+ "completion": 0.000_000_79,
91
+ },
92
+ "deepseek-r1-distill-qwen-32b": {
93
+ "prompt": 0.000_000_69,
94
+ "completion": 0.000_000_69,
95
+ },
96
+ "deepseek-r1-distill-llama-70b": {
97
+ "prompt": 0.000_000_75,
98
+ "completion": 0.000_000_99,
99
+ },
100
+ "deepseek-r1-distill-llama-70b-specdec": {
101
+ "prompt": 0.000_000_75,
102
+ "completion": 0.000_000_99,
103
+ },
104
+ "llama-3.2-1b-preview": {
105
+ "prompt": 0.000_000_04,
106
+ "completion": 0.000_000_04,
107
+ },
108
+ "llama-3.2-3b-preview": {
109
+ "prompt": 0.000_000_06,
110
+ "completion": 0.000_000_06,
111
+ },
112
+ # Vision models
113
+ "llama-3.2-11b-vision-preview": {
114
+ "prompt": 0.000_000_18,
115
+ "completion": 0.000_000_18,
116
+ },
117
+ "llama-3.2-90b-vision-preview": {
118
+ "prompt": 0.000_000_90,
119
+ "completion": 0.000_000_90,
120
+ },
121
+ }
122
+
123
+ if metadata.input_tokens is None or metadata.output_tokens is None:
124
+ return None
125
+
126
+ try:
127
+ model_pricing = pricing[model]
128
+ except KeyError:
129
+ return None
130
+
131
+ # Calculate cost for text tokens
132
+ prompt_cost = metadata.input_tokens * model_pricing["prompt"]
133
+ completion_cost = metadata.output_tokens * model_pricing["completion"]
134
+ total_cost = prompt_cost + completion_cost
135
+
136
+ # Calculate cost for images if present
137
+ # Groq bills 6,400 tokens per image for vision models
138
+ # https://groq.com/pricing/
139
+ image_cost = 0.0
140
+ if metadata.images and "vision" in model:
141
+ # For Groq vision models, each image is billed at 6,400 tokens
142
+ tokens_per_image = 6400
143
+
144
+ # Count the number of images
145
+ image_count = len(metadata.images)
146
+
147
+ # Calculate total image tokens
148
+ total_image_tokens = image_count * tokens_per_image
149
+
150
+ # Images are charged at the prompt token rate
151
+ image_cost = total_image_tokens * model_pricing["prompt"]
152
+
153
+ # Add image cost to total
154
+ total_cost += image_cost
155
+
156
+ return total_cost
@@ -0,0 +1,11 @@
1
+ """Calculate the cost of a Litellm call."""
2
+
3
+ from ..base.types import CostMetadata
4
+
5
+
6
+ def calculate_cost(
7
+ metadata: CostMetadata,
8
+ model: str,
9
+ ) -> float | None:
10
+ """Calculate the cost of a Litellm call."""
11
+ return metadata.cost
@@ -0,0 +1,64 @@
1
+ """Calculate the cost of a completion using the Mistral API."""
2
+
3
+ from ..base.types import CostMetadata
4
+
5
+
6
+ def calculate_cost(
7
+ metadata: CostMetadata,
8
+ model: str = "open-mistral-7b",
9
+ ) -> float | None:
10
+ """Calculate the cost of a completion using the Mistral API.
11
+
12
+ https://mistral.ai/technology/#pricing
13
+
14
+ Model Input Cached Output
15
+ mistral-large-latest $2/1M tokens $6/1M tokens
16
+ pixtral-large-latest $2/1M tokens $6/1M tokens
17
+ mistral-small-latest $0.1/1M tokens $0.3/1M tokens
18
+ mistral-saba-latest $0.2/1M tokens $0.6/1M tokens
19
+ codestral-latest $0.3/1M tokens $0.9/1M tokens
20
+ ministral-8b-latest $0.1/1M tokens $0.1/1M tokens
21
+ ministral-3b-latest $0.04/1M tokens $0.04/1M tokens
22
+ mistral-embed $0.1/1M tokens -
23
+ mistral-moderation-latest $0.1/1M tokens -
24
+ open-mistral-nemo $0.3/1M tokens $0.3/1M tokens
25
+ open-mistral-7b $0.25/1M tokens $0.25/1M tokens
26
+ open-mixtral-8x7b $0.7/1M tokens $0.7/1M tokens
27
+ open-mixtral-8x22b $2/1M tokens $6/1M tokens
28
+ """
29
+ pricing = {
30
+ "mistral-large-latest": {"prompt": 0.000_002, "completion": 0.000_006},
31
+ "pixtral-large-latest": {"prompt": 0.000_002, "completion": 0.000_006},
32
+ "mistral-small-latest": {"prompt": 0.000_000_1, "completion": 0.000_000_3},
33
+ "mistral-saba-latest": {"prompt": 0.000_000_2, "completion": 0.000_000_6},
34
+ "codestral-latest": {"prompt": 0.000_000_3, "completion": 0.000_000_9},
35
+ "ministral-8b-latest": {"prompt": 0.000_000_1, "completion": 0.000_000_1},
36
+ "ministral-3b-latest": {"prompt": 0.000_000_04, "completion": 0.000_000_04},
37
+ "mistral-embed": {"prompt": 0.000_000_1, "completion": 0},
38
+ "mistral-moderation-latest": {"prompt": 0.000_000_1, "completion": 0},
39
+ "open-mistral-nemo": {"prompt": 0.000_000_3, "completion": 0.000_000_3},
40
+ "open-mistral-nemo-2407": {"prompt": 0.000_000_3, "completion": 0.000_000_3},
41
+ "open-mistral-7b": {"prompt": 0.000_000_25, "completion": 0.000_000_25},
42
+ "open-mixtral-8x7b": {"prompt": 0.000_000_7, "completion": 0.000_000_7},
43
+ "open-mixtral-8x22b": {"prompt": 0.000_002, "completion": 0.000_006},
44
+ "mistral-large-2407": {"prompt": 0.000_003, "completion": 0.000_009},
45
+ "mistral-medium-latest": {"prompt": 0.000_002_75, "completion": 0.000_008_1},
46
+ "pixtral-12b-2409": {"prompt": 0.000_002, "completion": 0.000_006},
47
+ }
48
+
49
+ if metadata.input_tokens is None or metadata.output_tokens is None:
50
+ return None
51
+
52
+ try:
53
+ model_pricing = pricing[model]
54
+ except KeyError:
55
+ return None
56
+
57
+ # Calculate cost for text tokens
58
+ prompt_cost = metadata.input_tokens * model_pricing["prompt"]
59
+ completion_cost = metadata.output_tokens * model_pricing["completion"]
60
+ total_cost = prompt_cost + completion_cost
61
+
62
+ # Image tokens is included in the cost
63
+
64
+ return total_cost
@@ -0,0 +1,416 @@
1
+ """Calculate the cost of a completion using the OpenAI API."""
2
+
3
+ from ..base.types import CostMetadata
4
+
5
+ # Constants for image token calculation
6
+ LOW_DETAIL_IMAGE_TOKENS = 85
7
+ HIGH_DETAIL_TILE_TOKENS = 170
8
+ HIGH_DETAIL_BASE_TOKENS = 85
9
+ TILE_SIZE = 512
10
+
11
+
12
+ def _calculate_image_tokens(metadata: CostMetadata) -> int | None:
13
+ """Calculate tokens used by images based on their size and detail level.
14
+
15
+ https://platform.openai.com/docs/guides/vision
16
+ Following OpenAI's pricing structure:
17
+ - Low detail: 85 tokens per image
18
+ - High detail: 85 tokens base + 170 tokens per 512px tile
19
+ (after scaling to fit within 2048x2048 and making shortest side 768px)
20
+ """
21
+ if not metadata.images or not metadata.images:
22
+ return 0
23
+
24
+ total_image_tokens = 0
25
+
26
+ for img in metadata.images:
27
+ if not img.width or not img.height:
28
+ continue
29
+
30
+ # If image already has precalculated tokens, use those
31
+ if img.tokens is not None:
32
+ total_image_tokens += img.tokens
33
+ continue
34
+
35
+ if img.detail is not None and img.detail != "auto":
36
+ detail = img.detail
37
+ else:
38
+ # Default to high detail for auto
39
+ # We can't determine detail level from image alone
40
+ detail = "high"
41
+ if detail == "low":
42
+ # Low detail is a fixed cost regardless of size
43
+ total_image_tokens += LOW_DETAIL_IMAGE_TOKENS
44
+ else:
45
+ # High detail calculation
46
+
47
+ # Scale to fit within 2048x2048 square
48
+ width, height = img.width, img.height
49
+ if width > 2048 or height > 2048:
50
+ aspect_ratio = width / height
51
+ if width > height:
52
+ width = 2048
53
+ height = int(width / aspect_ratio)
54
+ else:
55
+ height = 2048
56
+ width = int(height * aspect_ratio)
57
+
58
+ # Scale so shortest side is 768px
59
+ if min(width, height) > 768:
60
+ if width < height:
61
+ scale_factor = 768 / width
62
+ width = 768
63
+ height = int(height * scale_factor)
64
+ else:
65
+ scale_factor = 768 / height
66
+ height = 768
67
+ width = int(width * scale_factor)
68
+
69
+ # Calculate number of 512px tiles needed
70
+ tiles_x = (width + TILE_SIZE - 1) // TILE_SIZE
71
+ tiles_y = (height + TILE_SIZE - 1) // TILE_SIZE
72
+ num_tiles = tiles_x * tiles_y
73
+
74
+ # Calculate token cost
75
+ image_tokens = (
76
+ HIGH_DETAIL_TILE_TOKENS * num_tiles
77
+ ) + HIGH_DETAIL_BASE_TOKENS
78
+ total_image_tokens += image_tokens
79
+
80
+ return total_image_tokens
81
+
82
+
83
+ def calculate_cost(
84
+ metadata: CostMetadata,
85
+ model: str = "gpt-3.5-turbo-16k",
86
+ ) -> float | None:
87
+ """Calculate the cost of a completion using the OpenAI API.
88
+
89
+ https://openai.com/pricing
90
+
91
+ Model Input Cached Output
92
+ gpt-4.5-preview $75.00 / 1M tokens $37.50 / 1M tokens $150.00 / 1M tokens
93
+ gpt-4.5-preview-2025-02-27 $75.00 / 1M tokens $37.50 / 1M tokens $150.00 / 1M tokens
94
+ gpt-4o $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
95
+ gpt-4o-2024-11-20 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
96
+ gpt-4o-2024-08-06 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
97
+ gpt-4o-2024-05-13 $5.00 / 1M tokens $15.00 / 1M tokens
98
+ gpt-4o-audio-preview $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
99
+ gpt-4o-audio-preview-2024-12-17 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
100
+ gpt-4o-audio-preview-2024-10-01 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
101
+ gpt-4o-realtime-preview $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
102
+ gpt-4o-realtime-preview-2024-12-17 $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
103
+ gpt-4o-realtime-preview-2024-10-01 $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
104
+ gpt-4o-mini $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
105
+ gpt-4o-mini-2024-07-18 $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
106
+ gpt-4o-mini-audio-preview $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
107
+ gpt-4o-mini-audio-preview-2024-12-17 $0.150 / 1M tokens $0.075 / 1M tokens $0.600 / 1M tokens
108
+ gpt-4o-mini-realtime-preview $0.60 / 1M tokens $0.30 / 1M tokens $2.40 / 1M tokens
109
+ gpt-4o-mini-realtime-preview-2024-12-17 $0.60 / 1M tokens $0.30 / 1M tokens $2.40 / 1M tokens
110
+ o1 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
111
+ o1-2024-12-17 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
112
+ o1-preview $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
113
+ o1-preview-2024-09-12 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
114
+ o3-mini $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
115
+ o3-mini-2025-01-31 $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
116
+ o1-mini $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
117
+ o1-mini-2024-09-12 $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
118
+ chatgpt-4o-latest $5.00 / 1M tokens $15.00 / 1M tokens
119
+ gpt-4-turbo $10.00 / 1M tokens $30.00 / 1M tokens
120
+ gpt-4-turbo-2024-04-09 $10.00 / 1M tokens $30.00 / 1M tokens
121
+ gpt-3.5-turbo-0125 $0.50 / 1M tokens $1.50 / 1M tokens
122
+ gpt-3.5-turbo-1106 $1.00 / 1M tokens $2.00 / 1M tokens
123
+ gpt-4-0125-preview $10.00 / 1M tokens $30.00 / 1M tokens
124
+ gpt-4-1106-preview $10.00 / 1M tokens $30.00 / 1M tokens
125
+ gpt-4-vision-preview $10.00 / 1M tokens $30.00 / 1M tokens
126
+ gpt-4 $30.00 / 1M tokens $60.00 / 1M tokens
127
+ gpt-4-32k $60.00 / 1M tokens $120.00 / 1M tokens
128
+ text-embedding-3-small $0.02 / 1M tokens
129
+ text-embedding-3-large $0.13 / 1M tokens
130
+ text-embedding-ada-002 $0.10 / 1M tokens
131
+ """
132
+ pricing = {
133
+ "gpt-4.5-preview": {
134
+ "prompt": 0.000_075,
135
+ "cached": 0.000_037_5,
136
+ "completion": 0.000_15,
137
+ "batch_prompt": 0.000_037_5,
138
+ "batch_completion": 0.000_075,
139
+ },
140
+ "gpt-4.5-preview-2025-02-27": {
141
+ "prompt": 0.000_075,
142
+ "cached": 0.000_037_5,
143
+ "completion": 0.000_15,
144
+ },
145
+ "gpt-4o": {
146
+ "prompt": 0.000_002_5,
147
+ "cached": 0.000_001_25,
148
+ "completion": 0.000_01,
149
+ "batch_prompt": 0.000_001_25,
150
+ "batch_completion": 0.000_005,
151
+ },
152
+ "gpt-4o-2024-11-20": {
153
+ "prompt": 0.000_002_5,
154
+ "cached": 0.000_001_25,
155
+ "completion": 0.000_01,
156
+ },
157
+ "gpt-4o-2024-08-06": {
158
+ "prompt": 0.000_002_5,
159
+ "cached": 0.000_001_25,
160
+ "completion": 0.000_01,
161
+ },
162
+ "gpt-4o-2024-05-13": {
163
+ "prompt": 0.000_005,
164
+ "cached": 0.000_002_5,
165
+ "completion": 0.000_015,
166
+ },
167
+ "gpt-4o-audio-preview": {
168
+ "prompt": 0.000_002_5,
169
+ "cached": 0.000_001_25,
170
+ "completion": 0.000_01,
171
+ },
172
+ "gpt-4o-audio-preview-2024-12-17": {
173
+ "prompt": 0.000_002_5,
174
+ "cached": 0.000_001_25,
175
+ "completion": 0.000_01,
176
+ },
177
+ "gpt-4o-audio-preview-2024-10-01": {
178
+ "prompt": 0.000_002_5,
179
+ "cached": 0.000_001_25,
180
+ "completion": 0.000_01,
181
+ },
182
+ "gpt-4o-realtime-preview": {
183
+ "prompt": 0.000_005,
184
+ "cached": 0.000_002_5,
185
+ "completion": 0.000_02,
186
+ },
187
+ "gpt-4o-realtime-preview-2024-12-17": {
188
+ "prompt": 0.000_005,
189
+ "cached": 0.000_002_5,
190
+ "completion": 0.000_02,
191
+ },
192
+ "gpt-4o-realtime-preview-2024-10-01": {
193
+ "prompt": 0.000_005,
194
+ "cached": 0.000_002_5,
195
+ "completion": 0.000_02,
196
+ },
197
+ "gpt-4o-mini": {
198
+ "prompt": 0.000_000_15,
199
+ "cached": 0.000_000_075,
200
+ "completion": 0.000_000_6,
201
+ },
202
+ "gpt-4o-mini-2024-07-18": {
203
+ "prompt": 0.000_000_15,
204
+ "cached": 0.000_000_075,
205
+ "completion": 0.000_000_6,
206
+ },
207
+ "gpt-4o-mini-audio-preview": {
208
+ "prompt": 0.000_000_15,
209
+ "cached": 0.000_000_075,
210
+ "completion": 0.000_000_6,
211
+ },
212
+ "gpt-4o-mini-audio-preview-2024-12-17": {
213
+ "prompt": 0.000_000_15,
214
+ "cached": 0.000_000_075,
215
+ "completion": 0.000_000_6,
216
+ },
217
+ "gpt-4o-mini-realtime-preview": {
218
+ "prompt": 0.000_000_6,
219
+ "cached": 0.000_000_3,
220
+ "completion": 0.000_002_4,
221
+ },
222
+ "gpt-4o-mini-realtime-preview-2024-12-17": {
223
+ "prompt": 0.000_000_6,
224
+ "cached": 0.000_000_3,
225
+ "completion": 0.000_002_4,
226
+ },
227
+ "o1": {
228
+ "prompt": 0.000_015,
229
+ "cached": 0.000_007_5,
230
+ "completion": 0.000_06,
231
+ },
232
+ "o1-2024-12-17": {
233
+ "prompt": 0.000_015,
234
+ "cached": 0.000_007_5,
235
+ "completion": 0.000_06,
236
+ },
237
+ "o1-preview": {
238
+ "prompt": 0.000_015,
239
+ "cached": 0.000_007_5,
240
+ "completion": 0.000_06,
241
+ },
242
+ "o1-preview-2024-09-12": {
243
+ "prompt": 0.000_015,
244
+ "cached": 0.000_007_5,
245
+ "completion": 0.000_06,
246
+ },
247
+ "o3-mini": {
248
+ "prompt": 0.000_001_1,
249
+ "cached": 0.000_000_55,
250
+ "completion": 0.000_004_4,
251
+ },
252
+ "o3-mini-2025-01-31": {
253
+ "prompt": 0.000_001_1,
254
+ "cached": 0.000_000_55,
255
+ "completion": 0.000_004_4,
256
+ },
257
+ "o1-mini": {
258
+ "prompt": 0.000_001_1,
259
+ "cached": 0.000_000_55,
260
+ "completion": 0.000_004_4,
261
+ },
262
+ "o1-mini-2024-09-12": {
263
+ "prompt": 0.000_001_1,
264
+ "cached": 0.000_000_55,
265
+ "completion": 0.000_004_4,
266
+ },
267
+ "chatgpt-4o-latest": {
268
+ "prompt": 0.000_005,
269
+ "cached": 0,
270
+ "completion": 0.000_015,
271
+ },
272
+ "gpt-4-turbo": {
273
+ "prompt": 0.000_01,
274
+ "cached": 0,
275
+ "completion": 0.000_03,
276
+ },
277
+ "gpt-4-turbo-2024-04-09": {
278
+ "prompt": 0.000_01,
279
+ "cached": 0,
280
+ "completion": 0.000_03,
281
+ },
282
+ "gpt-3.5-turbo-0125": {
283
+ "prompt": 0.000_000_5,
284
+ "cached": 0,
285
+ "completion": 0.000_001_5,
286
+ },
287
+ "gpt-3.5-turbo-1106": {
288
+ "prompt": 0.000_001,
289
+ "cached": 0,
290
+ "completion": 0.000_002,
291
+ },
292
+ "gpt-4-0125-preview": {
293
+ "prompt": 0.000_01,
294
+ "cached": 0,
295
+ "completion": 0.000_03,
296
+ },
297
+ "gpt-4-1106-preview": {
298
+ "prompt": 0.000_01,
299
+ "cached": 0,
300
+ "completion": 0.000_03,
301
+ },
302
+ "gpt-4-vision-preview": {
303
+ "prompt": 0.000_01,
304
+ "cached": 0,
305
+ "completion": 0.000_03,
306
+ },
307
+ "gpt-4": {
308
+ "prompt": 0.000_03,
309
+ "cached": 0,
310
+ "completion": 0.000_06,
311
+ },
312
+ "gpt-4-32k": {
313
+ "prompt": 0.000_06,
314
+ "cached": 0,
315
+ "completion": 0.000_12,
316
+ },
317
+ "gpt-3.5-turbo-4k": {
318
+ "prompt": 0.000_015,
319
+ "cached": 0,
320
+ "completion": 0.000_02,
321
+ },
322
+ "gpt-3.5-turbo-16k": {
323
+ "prompt": 0.000_003,
324
+ "cached": 0,
325
+ "completion": 0.000_004,
326
+ },
327
+ "gpt-4-8k": {
328
+ "prompt": 0.000_03,
329
+ "cached": 0,
330
+ "completion": 0.000_06,
331
+ },
332
+ "text-embedding-3-small": {
333
+ "prompt": 0.000_000_02,
334
+ "cached": 0,
335
+ "completion": 0,
336
+ "batch_prompt": 0.000_000_01,
337
+ },
338
+ "text-embedding-ada-002": {
339
+ "prompt": 0.000_000_1,
340
+ "cached": 0,
341
+ "completion": 0,
342
+ "batch_prompt": 0.000_000_05,
343
+ },
344
+ "text-embedding-3-large": {
345
+ "prompt": 0.000_000_13,
346
+ "cached": 0,
347
+ "completion": 0,
348
+ "batch_prompt": 0.000_000_065,
349
+ },
350
+ }
351
+
352
+ # Audio pricing for audio models (per-minute rates in dollars)
353
+
354
+ if metadata.cost is not None:
355
+ return metadata.cost
356
+
357
+ # Audio input/output costs
358
+ # ChatCompletion.usage has brake down of audio input and output.
359
+ # The total cost already includes the audio input/output cost.
360
+
361
+ # Initialize cached tokens if not provided
362
+ if metadata.cached_tokens is None:
363
+ metadata.cached_tokens = 0
364
+
365
+ # Try to get model pricing
366
+ try:
367
+ model_pricing = pricing[model]
368
+ except KeyError:
369
+ return None
370
+
371
+ image_tokens = _calculate_image_tokens(metadata) or 0
372
+
373
+ input_tokens = (metadata.input_tokens or 0) + image_tokens
374
+
375
+ # Calculate costs for each component
376
+ prompt_cost = input_tokens * model_pricing["prompt"]
377
+ cached_cost = metadata.cached_tokens * model_pricing["cached"]
378
+ completion_cost = (metadata.output_tokens or 0) * model_pricing["completion"]
379
+
380
+ # Special handling for embedding models (only input tokens matter)
381
+ if "embedding" in model:
382
+ total_cost = prompt_cost
383
+ else:
384
+ total_cost = prompt_cost + cached_cost + completion_cost
385
+
386
+ # Apply batch discounts if applicable
387
+ if metadata.batch_mode:
388
+ # Based on the OpenAI pricing table, batch mode typically provides
389
+ # approximately 50% discount for both input and output tokens
390
+ if "embedding" in model.lower():
391
+ # Embedding models have specific batch pricing
392
+ if model == "text-embedding-3-small":
393
+ prompt_cost = (
394
+ input_tokens * 0.000_000_01
395
+ ) # $0.01 per 1M tokens in batch mode
396
+ elif model == "text-embedding-3-large":
397
+ prompt_cost = (
398
+ input_tokens * 0.000_000_065
399
+ ) # $0.065 per 1M tokens in batch mode
400
+ elif model == "text-embedding-ada-002":
401
+ prompt_cost = (
402
+ input_tokens * 0.000_000_05
403
+ ) # $0.05 per 1M tokens in batch mode
404
+ else:
405
+ # For LLM models, typically 50% discount
406
+ prompt_cost *= 0.5
407
+ cached_cost *= 0.5
408
+ completion_cost *= 0.5
409
+
410
+ # Recalculate total cost with batch pricing
411
+ if "embedding" in model:
412
+ total_cost = prompt_cost
413
+ else:
414
+ total_cost = prompt_cost + cached_cost + completion_cost
415
+
416
+ return total_cost