mirascope 1.18.3__py3-none-any.whl → 1.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. mirascope/__init__.py +20 -2
  2. mirascope/beta/openai/__init__.py +1 -1
  3. mirascope/beta/openai/realtime/__init__.py +1 -1
  4. mirascope/beta/openai/realtime/tool.py +1 -1
  5. mirascope/beta/rag/__init__.py +2 -2
  6. mirascope/beta/rag/base/__init__.py +2 -2
  7. mirascope/beta/rag/weaviate/__init__.py +1 -1
  8. mirascope/core/__init__.py +26 -8
  9. mirascope/core/anthropic/__init__.py +3 -3
  10. mirascope/core/anthropic/_utils/_calculate_cost.py +114 -47
  11. mirascope/core/anthropic/call_response.py +9 -1
  12. mirascope/core/anthropic/call_response_chunk.py +7 -0
  13. mirascope/core/anthropic/stream.py +3 -1
  14. mirascope/core/azure/__init__.py +2 -2
  15. mirascope/core/azure/_utils/_calculate_cost.py +4 -1
  16. mirascope/core/azure/call_response.py +9 -1
  17. mirascope/core/azure/call_response_chunk.py +5 -0
  18. mirascope/core/azure/stream.py +3 -1
  19. mirascope/core/base/__init__.py +11 -9
  20. mirascope/core/base/_utils/__init__.py +10 -10
  21. mirascope/core/base/_utils/_get_common_usage.py +8 -4
  22. mirascope/core/base/_utils/_get_create_fn_or_async_create_fn.py +2 -2
  23. mirascope/core/base/_utils/_protocols.py +9 -8
  24. mirascope/core/base/call_response.py +22 -20
  25. mirascope/core/base/call_response_chunk.py +12 -1
  26. mirascope/core/base/stream.py +24 -21
  27. mirascope/core/base/tool.py +7 -5
  28. mirascope/core/base/types.py +22 -5
  29. mirascope/core/bedrock/__init__.py +3 -3
  30. mirascope/core/bedrock/_utils/_calculate_cost.py +4 -1
  31. mirascope/core/bedrock/call_response.py +8 -1
  32. mirascope/core/bedrock/call_response_chunk.py +5 -0
  33. mirascope/core/bedrock/stream.py +3 -1
  34. mirascope/core/cohere/__init__.py +2 -2
  35. mirascope/core/cohere/_utils/_calculate_cost.py +4 -3
  36. mirascope/core/cohere/call_response.py +9 -1
  37. mirascope/core/cohere/call_response_chunk.py +5 -0
  38. mirascope/core/cohere/stream.py +3 -1
  39. mirascope/core/gemini/__init__.py +2 -2
  40. mirascope/core/gemini/_utils/_calculate_cost.py +4 -1
  41. mirascope/core/gemini/_utils/_convert_message_params.py +1 -1
  42. mirascope/core/gemini/call_response.py +9 -1
  43. mirascope/core/gemini/call_response_chunk.py +5 -0
  44. mirascope/core/gemini/stream.py +3 -1
  45. mirascope/core/google/__init__.py +2 -2
  46. mirascope/core/google/_utils/_calculate_cost.py +141 -14
  47. mirascope/core/google/_utils/_convert_message_params.py +23 -51
  48. mirascope/core/google/_utils/_message_param_converter.py +34 -33
  49. mirascope/core/google/_utils/_validate_media_type.py +34 -0
  50. mirascope/core/google/call_response.py +26 -4
  51. mirascope/core/google/call_response_chunk.py +17 -9
  52. mirascope/core/google/stream.py +20 -2
  53. mirascope/core/groq/__init__.py +2 -2
  54. mirascope/core/groq/_utils/_calculate_cost.py +12 -11
  55. mirascope/core/groq/call_response.py +9 -1
  56. mirascope/core/groq/call_response_chunk.py +5 -0
  57. mirascope/core/groq/stream.py +3 -1
  58. mirascope/core/litellm/__init__.py +1 -1
  59. mirascope/core/litellm/_utils/_setup_call.py +7 -3
  60. mirascope/core/mistral/__init__.py +2 -2
  61. mirascope/core/mistral/_utils/_calculate_cost.py +10 -9
  62. mirascope/core/mistral/call_response.py +9 -1
  63. mirascope/core/mistral/call_response_chunk.py +5 -0
  64. mirascope/core/mistral/stream.py +3 -1
  65. mirascope/core/openai/__init__.py +2 -2
  66. mirascope/core/openai/_utils/_calculate_cost.py +78 -37
  67. mirascope/core/openai/call_params.py +13 -0
  68. mirascope/core/openai/call_response.py +14 -1
  69. mirascope/core/openai/call_response_chunk.py +12 -0
  70. mirascope/core/openai/stream.py +6 -4
  71. mirascope/core/vertex/__init__.py +1 -1
  72. mirascope/core/vertex/_utils/_calculate_cost.py +1 -0
  73. mirascope/core/vertex/_utils/_convert_message_params.py +1 -1
  74. mirascope/core/vertex/call_response.py +9 -1
  75. mirascope/core/vertex/call_response_chunk.py +5 -0
  76. mirascope/core/vertex/stream.py +3 -1
  77. mirascope/integrations/_middleware_factory.py +6 -6
  78. mirascope/integrations/logfire/_utils.py +1 -1
  79. mirascope/llm/__init__.py +1 -1
  80. mirascope/llm/_protocols.py +5 -5
  81. mirascope/llm/call_response.py +16 -7
  82. mirascope/llm/stream.py +43 -31
  83. mirascope/retries/__init__.py +1 -1
  84. mirascope/tools/__init__.py +2 -2
  85. {mirascope-1.18.3.dist-info → mirascope-1.18.4.dist-info}/METADATA +1 -1
  86. {mirascope-1.18.3.dist-info → mirascope-1.18.4.dist-info}/RECORD +88 -87
  87. {mirascope-1.18.3.dist-info → mirascope-1.18.4.dist-info}/WHEEL +0 -0
  88. {mirascope-1.18.3.dist-info → mirascope-1.18.4.dist-info}/licenses/LICENSE +0 -0
@@ -3,6 +3,7 @@
3
3
 
4
4
  def calculate_cost(
5
5
  input_tokens: int | float | None,
6
+ cached_tokens: int | float | None,
6
7
  output_tokens: int | float | None,
7
8
  model: str = "gpt-3.5-turbo-16k",
8
9
  ) -> float | None:
@@ -10,196 +11,236 @@ def calculate_cost(
10
11
 
11
12
  https://openai.com/pricing
12
13
 
13
- Model Input Output
14
- gpt-4o $2.50 / 1M tokens $10.00 / 1M tokens
15
- gpt-4o-2024-11-20 $2.50 / 1M tokens $10.00 / 1M tokens
16
- gpt-4o-2024-08-06 $2.50 / 1M tokens $10.00 / 1M tokens
17
- gpt-4o-2024-05-13 $5.00 / 1M tokens $15.00 / 1M tokens
18
- gpt-4o-audio-preview $2.50 / 1M tokens $10.00 / 1M tokens
19
- gpt-4o-audio-preview-2024-12-17 $2.50 / 1M tokens $10.00 / 1M tokens
20
- gpt-4o-audio-preview-2024-10-01 $2.50 / 1M tokens $10.00 / 1M tokens
21
- gpt-4o-realtime-preview $5.00 / 1M tokens $20.00 / 1M tokens
22
- gpt-4o-realtime-preview-2024-12-17 $5.00 / 1M tokens $20.00 / 1M tokens
23
- gpt-4o-realtime-preview-2024-10-01 $5.00 / 1M tokens $20.00 / 1M tokens
24
- gpt-4o-mini $0.15 / 1M tokens $0.60 / 1M tokens
25
- gpt-4o-mini-2024-07-18 $0.15 / 1M tokens $0.60 / 1M tokens
26
- gpt-4o-mini-audio-preview $0.15 / 1M tokens $0.60 / 1M tokens
27
- gpt-4o-mini-audio-preview-2024-12-17 $0.15 / 1M tokens $0.60 / 1M tokens
28
- gpt-4o-mini-realtime-preview $0.60 / 1M tokens $2.40 / 1M tokens
29
- gpt-4o-mini-realtime-preview-2024-12-17 $0.60 / 1M tokens $2.40 / 1M tokens
30
- o1 $15.00 / 1M tokens $60.00 / 1M tokens
31
- o1-2024-12-17 $15.00 / 1M tokens $60.00 / 1M tokens
32
- o1-preview-2024-09-12 $15.00 / 1M tokens $60.00 / 1M tokens
33
- o3-mini $1.10 / 1M tokens $4.40 / 1M tokens
34
- o3-mini-2025-01-31 $1.10 / 1M tokens $4.40 / 1M tokens
35
- o1-mini $1.10 / 1M tokens $4.40 / 1M tokens
36
- o1-mini-2024-09-12 $1.10 / 1M tokens $4.40 / 1M tokens
37
- gpt-4-turbo $10.00 / 1M tokens $30.00 / 1M tokens
38
- gpt-4-turbo-2024-04-09 $10.00 / 1M tokens $30.00 / 1M tokens
39
- gpt-3.5-turbo-0125 $0.50 / 1M tokens $1.50 / 1M tokens
40
- gpt-3.5-turbo-1106 $1.00 / 1M tokens $2.00 / 1M tokens
41
- gpt-4-1106-preview $10.00 / 1M tokens $30.00 / 1M tokens
42
- gpt-4 $30.00 / 1M tokens $60.00 / 1M tokens
43
- text-embedding-3-small $0.02 / 1M tokens
44
- text-embedding-3-large $0.13 / 1M tokens
45
- text-embedding-ada-0002 $0.10 / 1M tokens
14
+ Model Input Cached Output
15
+ gpt-4o $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
16
+ gpt-4o-2024-11-20 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
17
+ gpt-4o-2024-08-06 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
18
+ gpt-4o-2024-05-13 $5.00 / 1M tokens $2.50 / 1M tokens $15.00 / 1M tokens
19
+ gpt-4o-audio-preview $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
20
+ gpt-4o-audio-preview-2024-12-17 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
21
+ gpt-4o-audio-preview-2024-10-01 $2.50 / 1M tokens $1.25 / 1M tokens $10.00 / 1M tokens
22
+ gpt-4o-realtime-preview $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
23
+ gpt-4o-realtime-preview-2024-12-17 $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
24
+ gpt-4o-realtime-preview-2024-10-01 $5.00 / 1M tokens $2.50 / 1M tokens $20.00 / 1M tokens
25
+ gpt-4o-mini $0.15 / 1M tokens $0.08 / 1M tokens $0.60 / 1M tokens
26
+ gpt-4o-mini-2024-07-18 $0.15 / 1M tokens $0.08 / 1M tokens $0.60 / 1M tokens
27
+ gpt-4o-mini-audio-preview $0.15 / 1M tokens $0.08 / 1M tokens $0.60 / 1M tokens
28
+ gpt-4o-mini-audio-preview-2024-12-17 $0.15 / 1M tokens $0.08 / 1M tokens $0.60 / 1M tokens
29
+ gpt-4o-mini-realtime-preview $0.60 / 1M tokens $0.30 / 1M tokens $2.40 / 1M tokens
30
+ gpt-4o-mini-realtime-preview-2024-12-17 $0.60 / 1M tokens $0.30 / 1M tokens $2.40 / 1M tokens
31
+ o1 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
32
+ o1-2024-12-17 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
33
+ o1-preview-2024-09-12 $15.00 / 1M tokens $7.50 / 1M tokens $60.00 / 1M tokens
34
+ o3-mini $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
35
+ o3-mini-2025-01-31 $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
36
+ o1-mini $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
37
+ o1-mini-2024-09-12 $1.10 / 1M tokens $0.55 / 1M tokens $4.40 / 1M tokens
38
+ gpt-4-turbo $10.00 / 1M tokens $30.00 / 1M tokens
39
+ gpt-4-turbo-2024-04-09 $10.00 / 1M tokens $30.00 / 1M tokens
40
+ gpt-3.5-turbo-0125 $0.50 / 1M tokens $1.50 / 1M tokens
41
+ gpt-3.5-turbo-1106 $1.00 / 1M tokens $2.00 / 1M tokens
42
+ gpt-4-1106-preview $10.00 / 1M tokens $30.00 / 1M tokens
43
+ gpt-4 $30.00 / 1M tokens $60.00 / 1M tokens
44
+ text-embedding-3-small $0.02 / 1M tokens
45
+ text-embedding-3-large $0.13 / 1M tokens
46
+ text-embedding-ada-0002 $0.10 / 1M tokens
46
47
  """
47
48
  pricing = {
48
49
  "gpt-4o": {
49
50
  "prompt": 0.000_002_5,
51
+ "cached": 0.000_001_25,
50
52
  "completion": 0.000_01,
51
53
  },
52
54
  "gpt-4o-2024-11-20": {
53
55
  "prompt": 0.000_002_5,
56
+ "cached": 0.000_001_25,
54
57
  "completion": 0.000_01,
55
58
  },
56
59
  "gpt-4o-2024-08-06": {
57
60
  "prompt": 0.000_002_5,
61
+ "cached": 0.000_001_25,
58
62
  "completion": 0.000_01,
59
63
  },
60
64
  "gpt-4o-2024-05-13": {
61
65
  "prompt": 0.000_005,
66
+ "cached": 0.000_002_5,
62
67
  "completion": 0.000_015,
63
68
  },
64
69
  "gpt-4o-audio-preview": {
65
70
  "prompt": 0.000_002_5,
71
+ "cached": 0.000_001_25,
66
72
  "completion": 0.000_01,
67
73
  },
68
74
  "gpt-4o-audio-preview-2024-12-17": {
69
75
  "prompt": 0.000_002_5,
76
+ "cached": 0.000_001_25,
70
77
  "completion": 0.000_01,
71
78
  },
72
79
  "gpt-4o-audio-preview-2024-10-01": {
73
80
  "prompt": 0.000_002_5,
81
+ "cached": 0.000_001_25,
74
82
  "completion": 0.000_01,
75
83
  },
76
84
  "gpt-4o-realtime-preview": {
77
85
  "prompt": 0.000_005,
86
+ "cached": 0.000_002_5,
78
87
  "completion": 0.000_02,
79
88
  },
80
89
  "gpt-4o-realtime-preview-2024-12-17": {
81
90
  "prompt": 0.000_005,
91
+ "cached": 0.000_002_5,
82
92
  "completion": 0.000_02,
83
93
  },
84
94
  "gpt-4o-realtime-preview-2024-10-01": {
85
95
  "prompt": 0.000_005,
96
+ "cached": 0.000_002_5,
86
97
  "completion": 0.000_02,
87
98
  },
88
99
  "gpt-4o-mini": {
89
100
  "prompt": 0.000_000_15,
101
+ "cached": 0.000_000_08,
90
102
  "completion": 0.000_000_6,
91
103
  },
92
104
  "gpt-4o-mini-2024-07-18": {
93
105
  "prompt": 0.000_000_15,
106
+ "cached": 0.000_000_08,
94
107
  "completion": 0.000_000_6,
95
108
  },
96
109
  "gpt-4o-mini-audio-preview": {
97
110
  "prompt": 0.000_000_15,
111
+ "cached": 0.000_000_08,
98
112
  "completion": 0.000_000_6,
99
113
  },
100
114
  "gpt-4o-mini-audio-preview-2024-12-17": {
101
115
  "prompt": 0.000_000_15,
116
+ "cached": 0.000_000_08,
102
117
  "completion": 0.000_000_6,
103
118
  },
104
119
  "gpt-4o-mini-realtime-preview": {
105
120
  "prompt": 0.000_000_6,
121
+ "cached": 0.000_000_3,
106
122
  "completion": 0.000_002_4,
107
123
  },
108
124
  "gpt-4o-mini-realtime-preview-2024-12-17": {
109
125
  "prompt": 0.000_000_6,
126
+ "cached": 0.000_000_3,
110
127
  "completion": 0.000_002_4,
111
128
  },
112
129
  "o1": {
113
130
  "prompt": 0.000_015,
131
+ "cached": 0.000_007_5,
114
132
  "completion": 0.000_06,
115
133
  },
116
134
  "o1-2024-12-17": {
117
135
  "prompt": 0.000_015,
136
+ "cached": 0.000_007_5,
118
137
  "completion": 0.000_06,
119
138
  },
120
139
  "o1-preview-2024-09-12": {
121
140
  "prompt": 0.000_015,
141
+ "cached": 0.000_007_5,
122
142
  "completion": 0.000_06,
123
143
  },
124
144
  "o3-mini": {
125
145
  "prompt": 0.000_001_1,
146
+ "cached": 0.000_000_55,
126
147
  "completion": 0.000_004_4,
127
148
  },
128
149
  "o3-mini-2025-01-31": {
129
150
  "prompt": 0.000_001_1,
151
+ "cached": 0.000_000_55,
130
152
  "completion": 0.000_004_4,
131
153
  },
132
154
  "o1-mini": {
133
155
  "prompt": 0.000_001_1,
156
+ "cached": 0.000_000_55,
134
157
  "completion": 0.000_004_4,
135
158
  },
136
159
  "o1-mini-2024-09-12": {
137
160
  "prompt": 0.000_001_1,
161
+ "cached": 0.000_000_55,
138
162
  "completion": 0.000_004_4,
139
163
  },
140
164
  "gpt-4-turbo": {
141
165
  "prompt": 0.000_01,
166
+ "cached": 0,
142
167
  "completion": 0.000_03,
143
168
  },
144
169
  "gpt-4-turbo-2024-04-09": {
145
170
  "prompt": 0.000_01,
171
+ "cached": 0,
146
172
  "completion": 0.000_03,
147
173
  },
148
174
  "gpt-3.5-turbo-0125": {
149
175
  "prompt": 0.000_000_5,
176
+ "cached": 0,
150
177
  "completion": 0.000_001_5,
151
178
  },
152
179
  "gpt-3.5-turbo-1106": {
153
180
  "prompt": 0.000_001,
181
+ "cached": 0,
154
182
  "completion": 0.000_002,
155
183
  },
156
184
  "gpt-4-1106-preview": {
157
185
  "prompt": 0.000_01,
186
+ "cached": 0,
158
187
  "completion": 0.000_03,
159
188
  },
160
189
  "gpt-4": {
161
190
  "prompt": 0.000_003,
191
+ "cached": 0,
162
192
  "completion": 0.000_006,
163
193
  },
164
194
  "gpt-3.5-turbo-4k": {
165
195
  "prompt": 0.000_015,
196
+ "cached": 0,
166
197
  "completion": 0.000_02,
167
198
  },
168
199
  "gpt-3.5-turbo-16k": {
169
200
  "prompt": 0.000_003,
201
+ "cached": 0,
170
202
  "completion": 0.000_004,
171
203
  },
172
204
  "gpt-4-8k": {
173
205
  "prompt": 0.000_003,
206
+ "cached": 0,
174
207
  "completion": 0.000_006,
175
208
  },
176
209
  "gpt-4-32k": {
177
210
  "prompt": 0.000_006,
211
+ "cached": 0,
178
212
  "completion": 0.000_012,
179
213
  },
180
214
  "text-embedding-3-small": {
181
215
  "prompt": 0.000_000_02,
182
- "completion": 0.000_000_02,
216
+ "cached": 0,
217
+ "completion": 0,
183
218
  },
184
219
  "text-embedding-ada-002": {
185
220
  "prompt": 0.000_000_1,
186
- "completion": 0.000_000_1,
221
+ "cached": 0,
222
+ "completion": 0,
187
223
  },
188
224
  "text-embedding-3-large": {
189
225
  "prompt": 0.000_000_13,
190
- "completion": 0.000_000_13,
226
+ "cached": 0,
227
+ "completion": 0,
191
228
  },
192
229
  }
193
230
  if input_tokens is None or output_tokens is None:
194
231
  return None
195
232
 
233
+ if cached_tokens is None:
234
+ cached_tokens = 0
235
+
196
236
  try:
197
237
  model_pricing = pricing[model]
198
238
  except KeyError:
199
239
  return None
200
240
 
201
241
  prompt_cost = input_tokens * model_pricing["prompt"]
242
+ cached_cost = cached_tokens * model_pricing["cached"]
202
243
  completion_cost = output_tokens * model_pricing["completion"]
203
- total_cost = prompt_cost + completion_cost
244
+ total_cost = prompt_cost + cached_cost + completion_cost
204
245
 
205
246
  return total_cost
@@ -18,6 +18,9 @@ if TYPE_CHECKING:
18
18
  ChatCompletionAudioParam,
19
19
  ChatCompletionModality, # pyright: ignore [reportAttributeAccessIssue]
20
20
  )
21
+ from openai.types.chat.chat_completion_reasoning_effort import ( # pyright: ignore [reportMissingImports]
22
+ ChatCompletionReasoningEffort, # pyright: ignore [reportAttributeAccessIssue]
23
+ )
21
24
  else:
22
25
  try:
23
26
  from openai.types.chat.chat_completion_audio_param import ( # pyright: ignore [reportMissingImports]
@@ -30,6 +33,14 @@ else:
30
33
 
31
34
  class ChatCompletionModality: ...
32
35
 
36
+ try:
37
+ from openai.types.chat.chat_completion_reasoning_effort import ( # pyright: ignore [reportMissingImports]
38
+ ChatCompletionReasoningEffort,
39
+ )
40
+ except ImportError:
41
+
42
+ class ChatCompletionReasoningEffort: ...
43
+
33
44
 
34
45
  class OpenAICallParams(BaseCallParams):
35
46
  """The parameters to use when calling the OpenAI API.
@@ -46,6 +57,7 @@ class OpenAICallParams(BaseCallParams):
46
57
  n: ...
47
58
  parallel_tool_calls: ...
48
59
  presence_penalty: ...
60
+ reasoning_effort: ...
49
61
  response_format: ...
50
62
  seed: ...
51
63
  stop: ...
@@ -67,6 +79,7 @@ class OpenAICallParams(BaseCallParams):
67
79
  n: NotRequired[int | None]
68
80
  parallel_tool_calls: NotRequired[bool]
69
81
  presence_penalty: NotRequired[float | None]
82
+ reasoning_effort: NotRequired[ChatCompletionReasoningEffort | None]
70
83
  response_format: NotRequired[ResponseFormat]
71
84
  seed: NotRequired[int | None]
72
85
  stop: NotRequired[str | list[str] | None]
@@ -118,6 +118,17 @@ class OpenAICallResponse(
118
118
  """Returns the number of input tokens."""
119
119
  return self.usage.prompt_tokens if self.usage else None
120
120
 
121
+ @computed_field
122
+ @property
123
+ def cached_tokens(self) -> int | None:
124
+ """Returns the number of cached tokens."""
125
+ return (
126
+ details.cached_tokens
127
+ if self.usage
128
+ and (details := getattr(self.usage, "prompt_tokens_details", None))
129
+ else None
130
+ )
131
+
121
132
  @computed_field
122
133
  @property
123
134
  def output_tokens(self) -> int | None:
@@ -128,7 +139,9 @@ class OpenAICallResponse(
128
139
  @property
129
140
  def cost(self) -> float | None:
130
141
  """Returns the cost of the call."""
131
- return calculate_cost(self.input_tokens, self.output_tokens, self.model)
142
+ return calculate_cost(
143
+ self.input_tokens, self.cached_tokens, self.output_tokens, self.model
144
+ )
132
145
 
133
146
  @computed_field
134
147
  @cached_property
@@ -79,6 +79,18 @@ class OpenAICallResponseChunk(BaseCallResponseChunk[ChatCompletionChunk, FinishR
79
79
  return self.chunk.usage
80
80
  return None
81
81
 
82
+ @computed_field
83
+ @property
84
+ def cached_tokens(self) -> int | None:
85
+ """Returns the number of cached tokens."""
86
+ return (
87
+ details.cached_tokens
88
+ if hasattr(self.chunk, "usage")
89
+ and self.usage
90
+ and (details := getattr(self.usage, "prompt_tokens_details", None))
91
+ else None
92
+ )
93
+
82
94
  @property
83
95
  def input_tokens(self) -> int | None:
84
96
  """Returns the number of input tokens."""
@@ -87,9 +87,9 @@ class OpenAIStream(
87
87
  ) -> AsyncGenerator[tuple[OpenAICallResponseChunk, OpenAITool | None], None]:
88
88
  aiter = super().__aiter__()
89
89
 
90
- async def generator() -> (
91
- AsyncGenerator[tuple[OpenAICallResponseChunk, OpenAITool | None], None]
92
- ):
90
+ async def generator() -> AsyncGenerator[
91
+ tuple[OpenAICallResponseChunk, OpenAITool | None], None
92
+ ]:
93
93
  async for chunk, tool in aiter:
94
94
  if (
95
95
  (choices := chunk.chunk.choices)
@@ -104,7 +104,9 @@ class OpenAIStream(
104
104
  @property
105
105
  def cost(self) -> float | None:
106
106
  """Returns the cost of the call."""
107
- return calculate_cost(self.input_tokens, self.output_tokens, self.model)
107
+ return calculate_cost(
108
+ self.input_tokens, self.cached_tokens, self.output_tokens, self.model
109
+ )
108
110
 
109
111
  def _construct_message_param(
110
112
  self,
@@ -33,7 +33,6 @@ warnings.warn(
33
33
  )
34
34
 
35
35
  __all__ = [
36
- "call",
37
36
  "VertexCallParams",
38
37
  "VertexCallResponse",
39
38
  "VertexCallResponseChunk",
@@ -41,5 +40,6 @@ __all__ = [
41
40
  "VertexMessageParam",
42
41
  "VertexStream",
43
42
  "VertexTool",
43
+ "call",
44
44
  "vertex_call",
45
45
  ]
@@ -3,6 +3,7 @@
3
3
 
4
4
  def calculate_cost(
5
5
  input_chars: int | float | None,
6
+ cached_chars: int | float | None,
6
7
  output_chars: int | float | None,
7
8
  model: str = "gemini-1.5-pro",
8
9
  context_length: int = 0,
@@ -106,7 +106,7 @@ def convert_message_params(
106
106
  elif part.type == "audio_url":
107
107
  # Should download the audio to determine the media type
108
108
  audio = _load_media(part.url)
109
- audio_type = get_audio_type(audio)
109
+ audio_type = f"audio/{get_audio_type(audio)}"
110
110
  if audio_type not in [
111
111
  "audio/wav",
112
112
  "audio/mp3",
@@ -112,6 +112,12 @@ class VertexCallResponse(
112
112
  """Returns the number of input tokens."""
113
113
  return self.usage.prompt_token_count
114
114
 
115
+ @computed_field
116
+ @property
117
+ def cached_tokens(self) -> int:
118
+ """Returns the number of cached tokens."""
119
+ return 0
120
+
115
121
  @computed_field
116
122
  @property
117
123
  def output_tokens(self) -> int:
@@ -122,7 +128,9 @@ class VertexCallResponse(
122
128
  @property
123
129
  def cost(self) -> float | None:
124
130
  """Returns the cost of the call."""
125
- return calculate_cost(self.input_tokens, self.output_tokens, self.model)
131
+ return calculate_cost(
132
+ self.input_tokens, self.cached_tokens, self.output_tokens, self.model
133
+ )
126
134
 
127
135
  @computed_field
128
136
  @cached_property
@@ -76,6 +76,11 @@ class VertexCallResponseChunk(
76
76
  """Returns the number of input tokens."""
77
77
  return None
78
78
 
79
+ @property
80
+ def cached_tokens(self) -> None:
81
+ """Returns the number of cached tokens."""
82
+ return None
83
+
79
84
  @property
80
85
  def output_tokens(self) -> None:
81
86
  """Returns the number of output tokens."""
@@ -64,7 +64,9 @@ class VertexStream(
64
64
  @property
65
65
  def cost(self) -> float | None:
66
66
  """Returns the cost of the call."""
67
- return calculate_cost(self.input_tokens, self.output_tokens, self.model)
67
+ return calculate_cost(
68
+ self.input_tokens, self.cached_tokens, self.output_tokens, self.model
69
+ )
68
70
 
69
71
  def _construct_message_param(
70
72
  self,
@@ -175,9 +175,9 @@ def middleware_factory(
175
175
  def new_stream_aiter(
176
176
  self: Any, # noqa: ANN401
177
177
  ) -> AsyncGenerator[tuple[Any, Any | None], Any]: # noqa: ANN401
178
- async def generator() -> (
179
- AsyncGenerator[tuple[Any, Any | None], Any]
180
- ):
178
+ async def generator() -> AsyncGenerator[
179
+ tuple[Any, Any | None], Any
180
+ ]:
181
181
  try:
182
182
  async for chunk, tool in original_aiter():
183
183
  yield chunk, tool
@@ -226,9 +226,9 @@ def middleware_factory(
226
226
  def new_aiter(
227
227
  self: Any, # noqa: ANN401
228
228
  ) -> AsyncGenerator[tuple[Any, Any | None], Any]: # noqa: ANN401
229
- async def generator() -> (
230
- AsyncGenerator[tuple[Any, Any | None], Any]
231
- ):
229
+ async def generator() -> AsyncGenerator[
230
+ tuple[Any, Any | None], Any
231
+ ]:
232
232
  try:
233
233
  async for chunk in original_aiter():
234
234
  yield chunk
@@ -23,7 +23,7 @@ def custom_context_manager(
23
23
  ) -> Generator[logfire.LogfireSpan, Any, None]:
24
24
  metadata: Metadata = _utils.get_metadata(fn, None)
25
25
  tags = metadata.get("tags", [])
26
- with logfire.with_settings(custom_scope_suffix="mirascope", tags=list(tags)).span(
26
+ with logfire.with_settings(custom_scope_suffix="mirascope", tags=list(tags)).span( # pyright: ignore[reportGeneralTypeIssues]
27
27
  fn.__name__
28
28
  ) as logfire_span:
29
29
  yield logfire_span
mirascope/llm/__init__.py CHANGED
@@ -3,4 +3,4 @@ from .call_response import CallResponse
3
3
  from .llm_call import call
4
4
  from .llm_override import override
5
5
 
6
- __all__ = ["call", "override", "CallResponse", "Provider"]
6
+ __all__ = ["CallResponse", "Provider", "call", "override"]
@@ -90,7 +90,7 @@ class _CallDecorator(
90
90
  ],
91
91
  ):
92
92
  @overload
93
- def __call__(
93
+ def __call__( # pyright: ignore[reportOverlappingOverload]
94
94
  self,
95
95
  provider: Provider,
96
96
  model: str,
@@ -110,7 +110,7 @@ class _CallDecorator(
110
110
  ]: ...
111
111
 
112
112
  @overload
113
- def __call__(
113
+ def __call__( # pyright: ignore[reportOverlappingOverload]
114
114
  self,
115
115
  provider: Provider,
116
116
  model: str,
@@ -140,7 +140,7 @@ class _CallDecorator(
140
140
  ) -> SyncLLMFunctionDecorator[_BaseDynamicConfigT, _BaseCallResponseT]: ...
141
141
 
142
142
  @overload
143
- def __call__(
143
+ def __call__( # pyright: ignore[reportOverlappingOverload]
144
144
  self,
145
145
  provider: Provider,
146
146
  model: str,
@@ -288,7 +288,7 @@ class _CallDecorator(
288
288
  ) -> NoReturn: ...
289
289
 
290
290
  @overload
291
- def __call__(
291
+ def __call__( # pyright: ignore[reportOverlappingOverload]
292
292
  self,
293
293
  provider: Provider,
294
294
  model: str,
@@ -382,7 +382,7 @@ class _CallDecorator(
382
382
  ) -> SyncLLMFunctionDecorator[_BaseDynamicConfigT, _ParsedOutputT]: ...
383
383
 
384
384
  @overload
385
- def __call__(
385
+ def __call__( # pyright: ignore[reportOverlappingOverload]
386
386
  self,
387
387
  provider: Provider,
388
388
  model: str,
@@ -13,6 +13,7 @@ from mirascope.core.base import (
13
13
  BaseCallResponse,
14
14
  BaseMessageParam,
15
15
  BaseTool,
16
+ Usage,
16
17
  transform_tool_outputs,
17
18
  )
18
19
  from mirascope.core.base.message_param import ToolResultPart
@@ -22,14 +23,11 @@ from mirascope.llm.tool import Tool
22
23
 
23
24
  _ResponseT = TypeVar("_ResponseT")
24
25
 
25
- _ToolMessageParamT = TypeVar("_ToolMessageParamT")
26
- _BaseToolT = TypeVar("_BaseToolT", bound=BaseTool)
27
-
28
26
 
29
27
  class CallResponse(
30
28
  BaseCallResponse[
31
29
  _ResponseT,
32
- _BaseToolT,
30
+ Tool,
33
31
  Any,
34
32
  BaseDynamicConfig[Any, Any, Any],
35
33
  BaseMessageParam,
@@ -44,11 +42,11 @@ class CallResponse(
44
42
  We rely on _response having `common_` methods or properties for normalization.
45
43
  """
46
44
 
47
- _response: BaseCallResponse[_ResponseT, _BaseToolT, Any, Any, Any, Any, Any]
45
+ _response: BaseCallResponse[_ResponseT, Tool, Any, Any, Any, Any, Any]
48
46
 
49
47
  def __init__(
50
48
  self,
51
- response: BaseCallResponse[_ResponseT, _BaseToolT, Any, Any, Any, Any, Any],
49
+ response: BaseCallResponse[_ResponseT, Tool, Any, Any, Any, Any, Any],
52
50
  ) -> None:
53
51
  super().__init__(
54
52
  **{
@@ -65,7 +63,13 @@ class CallResponse(
65
63
  def __getattribute__(self, name: str) -> Any: # noqa: ANN401
66
64
  special_names = {
67
65
  "_response",
66
+ "finish_reasons",
67
+ "usage",
68
+ "message_param",
68
69
  "user_message_param",
70
+ "tools",
71
+ "tool",
72
+ "tool_message_params",
69
73
  "__dict__",
70
74
  "__class__",
71
75
  "model_fields",
@@ -97,6 +101,11 @@ class CallResponse(
97
101
  def finish_reasons(self) -> list[FinishReason] | None: # pyright: ignore [reportIncompatibleMethodOverride]
98
102
  return self._response.common_finish_reasons
99
103
 
104
+ @property
105
+ def usage(self) -> Usage | None:
106
+ """Returns the usage of the chat completion."""
107
+ return self._response.common_usage
108
+
100
109
  @computed_field
101
110
  @cached_property
102
111
  def message_param(self) -> BaseMessageParam:
@@ -116,7 +125,7 @@ class CallResponse(
116
125
  @classmethod
117
126
  @transform_tool_outputs
118
127
  def tool_message_params(
119
- cls, tools_and_outputs: list[tuple[BaseTool, str]]
128
+ cls, tools_and_outputs: list[tuple[Tool, str]]
120
129
  ) -> list[BaseMessageParam]:
121
130
  """Returns the tool message parameters for tool call results.
122
131