mirascope 2.0.0a3__py3-none-any.whl → 2.0.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. mirascope/api/_generated/__init__.py +62 -6
  2. mirascope/api/_generated/client.py +8 -0
  3. mirascope/api/_generated/errors/__init__.py +11 -1
  4. mirascope/api/_generated/errors/conflict_error.py +15 -0
  5. mirascope/api/_generated/errors/forbidden_error.py +15 -0
  6. mirascope/api/_generated/errors/internal_server_error.py +15 -0
  7. mirascope/api/_generated/errors/not_found_error.py +15 -0
  8. mirascope/api/_generated/organizations/__init__.py +25 -0
  9. mirascope/api/_generated/organizations/client.py +380 -0
  10. mirascope/api/_generated/organizations/raw_client.py +876 -0
  11. mirascope/api/_generated/organizations/types/__init__.py +23 -0
  12. mirascope/api/_generated/organizations/types/organizations_create_response.py +24 -0
  13. mirascope/api/_generated/organizations/types/organizations_create_response_role.py +7 -0
  14. mirascope/api/_generated/organizations/types/organizations_get_response.py +24 -0
  15. mirascope/api/_generated/organizations/types/organizations_get_response_role.py +7 -0
  16. mirascope/api/_generated/organizations/types/organizations_list_response_item.py +24 -0
  17. mirascope/api/_generated/organizations/types/organizations_list_response_item_role.py +7 -0
  18. mirascope/api/_generated/organizations/types/organizations_update_response.py +24 -0
  19. mirascope/api/_generated/organizations/types/organizations_update_response_role.py +7 -0
  20. mirascope/api/_generated/projects/__init__.py +17 -0
  21. mirascope/api/_generated/projects/client.py +458 -0
  22. mirascope/api/_generated/projects/raw_client.py +1016 -0
  23. mirascope/api/_generated/projects/types/__init__.py +15 -0
  24. mirascope/api/_generated/projects/types/projects_create_response.py +30 -0
  25. mirascope/api/_generated/projects/types/projects_get_response.py +30 -0
  26. mirascope/api/_generated/projects/types/projects_list_response_item.py +30 -0
  27. mirascope/api/_generated/projects/types/projects_update_response.py +30 -0
  28. mirascope/api/_generated/reference.md +586 -0
  29. mirascope/api/_generated/types/__init__.py +20 -4
  30. mirascope/api/_generated/types/already_exists_error.py +24 -0
  31. mirascope/api/_generated/types/already_exists_error_tag.py +5 -0
  32. mirascope/api/_generated/types/database_error.py +24 -0
  33. mirascope/api/_generated/types/database_error_tag.py +5 -0
  34. mirascope/api/_generated/types/http_api_decode_error.py +1 -3
  35. mirascope/api/_generated/types/issue.py +1 -5
  36. mirascope/api/_generated/types/not_found_error_body.py +24 -0
  37. mirascope/api/_generated/types/not_found_error_tag.py +5 -0
  38. mirascope/api/_generated/types/permission_denied_error.py +24 -0
  39. mirascope/api/_generated/types/permission_denied_error_tag.py +7 -0
  40. mirascope/api/_generated/types/property_key.py +2 -2
  41. mirascope/api/_generated/types/{property_key_tag.py → property_key_key.py} +3 -5
  42. mirascope/api/_generated/types/{property_key_tag_tag.py → property_key_key_tag.py} +1 -1
  43. mirascope/llm/__init__.py +4 -0
  44. mirascope/llm/providers/__init__.py +6 -0
  45. mirascope/llm/providers/anthropic/__init__.py +6 -1
  46. mirascope/llm/providers/anthropic/_utils/__init__.py +15 -5
  47. mirascope/llm/providers/anthropic/_utils/beta_decode.py +271 -0
  48. mirascope/llm/providers/anthropic/_utils/beta_encode.py +216 -0
  49. mirascope/llm/providers/anthropic/_utils/decode.py +39 -7
  50. mirascope/llm/providers/anthropic/_utils/encode.py +156 -64
  51. mirascope/llm/providers/anthropic/beta_provider.py +322 -0
  52. mirascope/llm/providers/anthropic/model_id.py +10 -27
  53. mirascope/llm/providers/anthropic/model_info.py +87 -0
  54. mirascope/llm/providers/anthropic/provider.py +127 -145
  55. mirascope/llm/providers/base/_utils.py +15 -1
  56. mirascope/llm/providers/google/_utils/decode.py +55 -3
  57. mirascope/llm/providers/google/_utils/encode.py +14 -6
  58. mirascope/llm/providers/google/model_id.py +7 -13
  59. mirascope/llm/providers/google/model_info.py +62 -0
  60. mirascope/llm/providers/google/provider.py +8 -4
  61. mirascope/llm/providers/load_provider.py +8 -2
  62. mirascope/llm/providers/mlx/_utils.py +23 -1
  63. mirascope/llm/providers/mlx/encoding/transformers.py +17 -1
  64. mirascope/llm/providers/mlx/provider.py +4 -0
  65. mirascope/llm/providers/ollama/__init__.py +19 -0
  66. mirascope/llm/providers/ollama/provider.py +71 -0
  67. mirascope/llm/providers/openai/completions/__init__.py +6 -1
  68. mirascope/llm/providers/openai/completions/_utils/decode.py +57 -5
  69. mirascope/llm/providers/openai/completions/_utils/encode.py +9 -8
  70. mirascope/llm/providers/openai/completions/base_provider.py +513 -0
  71. mirascope/llm/providers/openai/completions/provider.py +13 -447
  72. mirascope/llm/providers/openai/model_info.py +57 -0
  73. mirascope/llm/providers/openai/provider.py +16 -4
  74. mirascope/llm/providers/openai/responses/_utils/decode.py +55 -4
  75. mirascope/llm/providers/openai/responses/_utils/encode.py +9 -9
  76. mirascope/llm/providers/openai/responses/provider.py +20 -21
  77. mirascope/llm/providers/provider_id.py +11 -1
  78. mirascope/llm/providers/provider_registry.py +3 -1
  79. mirascope/llm/providers/together/__init__.py +19 -0
  80. mirascope/llm/providers/together/provider.py +40 -0
  81. mirascope/llm/responses/__init__.py +3 -0
  82. mirascope/llm/responses/base_response.py +4 -0
  83. mirascope/llm/responses/base_stream_response.py +25 -1
  84. mirascope/llm/responses/finish_reason.py +1 -0
  85. mirascope/llm/responses/response.py +9 -0
  86. mirascope/llm/responses/root_response.py +5 -1
  87. mirascope/llm/responses/usage.py +95 -0
  88. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/METADATA +3 -3
  89. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/RECORD +91 -50
  90. mirascope/llm/providers/openai/shared/__init__.py +0 -7
  91. mirascope/llm/providers/openai/shared/_utils.py +0 -59
  92. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/WHEEL +0 -0
  93. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/licenses/LICENSE +0 -0
@@ -6,7 +6,7 @@ from typing_extensions import Unpack
6
6
  from anthropic import Anthropic, AsyncAnthropic
7
7
 
8
8
  from ...context import Context, DepsT
9
- from ...formatting import Format, FormattableT
9
+ from ...formatting import Format, FormattableT, resolve_format
10
10
  from ...messages import Message
11
11
  from ...responses import (
12
12
  AsyncContextResponse,
@@ -30,7 +30,24 @@ from ...tools import (
30
30
  )
31
31
  from ..base import BaseProvider, Params
32
32
  from . import _utils
33
+ from .beta_provider import AnthropicBetaProvider
33
34
  from .model_id import AnthropicModelId, model_name
35
+ from .model_info import MODELS_WITHOUT_STRICT_STRUCTURED_OUTPUTS
36
+
37
+
38
+ def _should_use_beta(
39
+ model_id: AnthropicModelId,
40
+ format: type[FormattableT] | Format[FormattableT] | None,
41
+ ) -> bool:
42
+ """Determine whether to use the beta API based on format mode.
43
+
44
+ If the format resolves to strict mode, and the model plausibly has
45
+ strict structured output support, then we will use the beta provider.
46
+ """
47
+ resolved = resolve_format(format, default_mode=_utils.DEFAULT_FORMAT_MODE)
48
+ if resolved is None or resolved.mode != "strict":
49
+ return False
50
+ return model_name(model_id) not in MODELS_WITHOUT_STRICT_STRUCTURED_OUTPUTS
34
51
 
35
52
 
36
53
  class AnthropicProvider(BaseProvider[Anthropic]):
@@ -38,6 +55,7 @@ class AnthropicProvider(BaseProvider[Anthropic]):
38
55
 
39
56
  id = "anthropic"
40
57
  default_scope = "anthropic/"
58
+ _beta_provider: AnthropicBetaProvider
41
59
 
42
60
  def __init__(
43
61
  self, *, api_key: str | None = None, base_url: str | None = None
@@ -45,6 +63,7 @@ class AnthropicProvider(BaseProvider[Anthropic]):
45
63
  """Initialize the Anthropic client."""
46
64
  self.client = Anthropic(api_key=api_key, base_url=base_url)
47
65
  self.async_client = AsyncAnthropic(api_key=api_key, base_url=base_url)
66
+ self._beta_provider = AnthropicBetaProvider(api_key=api_key, base_url=base_url)
48
67
 
49
68
  def _call(
50
69
  self,
@@ -55,32 +74,27 @@ class AnthropicProvider(BaseProvider[Anthropic]):
55
74
  format: type[FormattableT] | Format[FormattableT] | None = None,
56
75
  **params: Unpack[Params],
57
76
  ) -> Response | Response[FormattableT]:
58
- """Generate an `llm.Response` by synchronously calling the Anthropic Messages API.
59
-
60
- Args:
61
- model_id: Model identifier to use.
62
- messages: Messages to send to the LLM.
63
- tools: Optional tools that the model may invoke.
64
- format: Optional response format specifier.
65
- **params: Additional parameters to configure output (e.g. temperature). See `llm.Params`.
66
-
67
- Returns:
68
- An `llm.Response` object containing the LLM-generated content.
69
- """
70
- input_messages, format, kwargs = _utils.encode_request(
77
+ """Generate an `llm.Response` by synchronously calling the Anthropic Messages API."""
78
+ if _should_use_beta(model_id, format):
79
+ return self._beta_provider.call(
80
+ model_id=model_id,
81
+ messages=messages,
82
+ tools=tools,
83
+ format=format,
84
+ **params,
85
+ )
86
+
87
+ input_messages, resolved_format, kwargs = _utils.encode_request(
71
88
  model_id=model_id,
72
89
  messages=messages,
73
90
  tools=tools,
74
91
  format=format,
75
92
  params=params,
76
93
  )
77
-
78
94
  anthropic_response = self.client.messages.create(**kwargs)
79
-
80
- assistant_message, finish_reason = _utils.decode_response(
95
+ assistant_message, finish_reason, usage = _utils.decode_response(
81
96
  anthropic_response, model_id
82
97
  )
83
-
84
98
  return Response(
85
99
  raw=anthropic_response,
86
100
  provider_id="anthropic",
@@ -91,7 +105,8 @@ class AnthropicProvider(BaseProvider[Anthropic]):
91
105
  input_messages=input_messages,
92
106
  assistant_message=assistant_message,
93
107
  finish_reason=finish_reason,
94
- format=format,
108
+ usage=usage,
109
+ format=resolved_format,
95
110
  )
96
111
 
97
112
  def _context_call(
@@ -106,33 +121,28 @@ class AnthropicProvider(BaseProvider[Anthropic]):
106
121
  format: type[FormattableT] | Format[FormattableT] | None = None,
107
122
  **params: Unpack[Params],
108
123
  ) -> ContextResponse[DepsT, None] | ContextResponse[DepsT, FormattableT]:
109
- """Generate an `llm.ContextResponse` by synchronously calling the Anthropic Messages API.
110
-
111
- Args:
112
- ctx: Context object with dependencies for tools.
113
- model_id: Model identifier to use.
114
- messages: Messages to send to the LLM.
115
- tools: Optional tools that the model may invoke.
116
- format: Optional response format specifier.
117
- **params: Additional parameters to configure output (e.g. temperature). See `llm.Params`.
118
-
119
- Returns:
120
- An `llm.ContextResponse` object containing the LLM-generated content.
121
- """
122
- input_messages, format, kwargs = _utils.encode_request(
124
+ """Generate an `llm.ContextResponse` by synchronously calling the Anthropic Messages API."""
125
+ if _should_use_beta(model_id, format):
126
+ return self._beta_provider.context_call(
127
+ ctx=ctx,
128
+ model_id=model_id,
129
+ messages=messages,
130
+ tools=tools,
131
+ format=format,
132
+ **params,
133
+ )
134
+
135
+ input_messages, resolved_format, kwargs = _utils.encode_request(
123
136
  model_id=model_id,
124
137
  messages=messages,
125
138
  tools=tools,
126
139
  format=format,
127
140
  params=params,
128
141
  )
129
-
130
142
  anthropic_response = self.client.messages.create(**kwargs)
131
-
132
- assistant_message, finish_reason = _utils.decode_response(
143
+ assistant_message, finish_reason, usage = _utils.decode_response(
133
144
  anthropic_response, model_id
134
145
  )
135
-
136
146
  return ContextResponse(
137
147
  raw=anthropic_response,
138
148
  provider_id="anthropic",
@@ -143,7 +153,8 @@ class AnthropicProvider(BaseProvider[Anthropic]):
143
153
  input_messages=input_messages,
144
154
  assistant_message=assistant_message,
145
155
  finish_reason=finish_reason,
146
- format=format,
156
+ usage=usage,
157
+ format=resolved_format,
147
158
  )
148
159
 
149
160
  async def _call_async(
@@ -155,32 +166,27 @@ class AnthropicProvider(BaseProvider[Anthropic]):
155
166
  format: type[FormattableT] | Format[FormattableT] | None = None,
156
167
  **params: Unpack[Params],
157
168
  ) -> AsyncResponse | AsyncResponse[FormattableT]:
158
- """Generate an `llm.AsyncResponse` by asynchronously calling the Anthropic Messages API.
159
-
160
- Args:
161
- model_id: Model identifier to use.
162
- messages: Messages to send to the LLM.
163
- tools: Optional tools that the model may invoke.
164
- format: Optional response format specifier.
165
- **params: Additional parameters to configure output (e.g. temperature). See `llm.Params`.
166
-
167
- Returns:
168
- An `llm.AsyncResponse` object containing the LLM-generated content.
169
- """
170
- input_messages, format, kwargs = _utils.encode_request(
169
+ """Generate an `llm.AsyncResponse` by asynchronously calling the Anthropic Messages API."""
170
+ if _should_use_beta(model_id, format):
171
+ return await self._beta_provider.call_async(
172
+ model_id=model_id,
173
+ messages=messages,
174
+ tools=tools,
175
+ format=format,
176
+ **params,
177
+ )
178
+
179
+ input_messages, resolved_format, kwargs = _utils.encode_request(
171
180
  model_id=model_id,
172
181
  messages=messages,
173
182
  tools=tools,
174
183
  format=format,
175
184
  params=params,
176
185
  )
177
-
178
186
  anthropic_response = await self.async_client.messages.create(**kwargs)
179
-
180
- assistant_message, finish_reason = _utils.decode_response(
187
+ assistant_message, finish_reason, usage = _utils.decode_response(
181
188
  anthropic_response, model_id
182
189
  )
183
-
184
190
  return AsyncResponse(
185
191
  raw=anthropic_response,
186
192
  provider_id="anthropic",
@@ -191,7 +197,8 @@ class AnthropicProvider(BaseProvider[Anthropic]):
191
197
  input_messages=input_messages,
192
198
  assistant_message=assistant_message,
193
199
  finish_reason=finish_reason,
194
- format=format,
200
+ usage=usage,
201
+ format=resolved_format,
195
202
  )
196
203
 
197
204
  async def _context_call_async(
@@ -206,33 +213,28 @@ class AnthropicProvider(BaseProvider[Anthropic]):
206
213
  format: type[FormattableT] | Format[FormattableT] | None = None,
207
214
  **params: Unpack[Params],
208
215
  ) -> AsyncContextResponse[DepsT, None] | AsyncContextResponse[DepsT, FormattableT]:
209
- """Generate an `llm.AsyncContextResponse` by asynchronously calling the Anthropic Messages API.
210
-
211
- Args:
212
- ctx: Context object with dependencies for tools.
213
- model_id: Model identifier to use.
214
- messages: Messages to send to the LLM.
215
- tools: Optional tools that the model may invoke.
216
- format: Optional response format specifier.
217
- **params: Additional parameters to configure output (e.g. temperature). See `llm.Params`.
218
-
219
- Returns:
220
- An `llm.AsyncContextResponse` object containing the LLM-generated content.
221
- """
222
- input_messages, format, kwargs = _utils.encode_request(
216
+ """Generate an `llm.AsyncContextResponse` by asynchronously calling the Anthropic Messages API."""
217
+ if _should_use_beta(model_id, format):
218
+ return await self._beta_provider.context_call_async(
219
+ ctx=ctx,
220
+ model_id=model_id,
221
+ messages=messages,
222
+ tools=tools,
223
+ format=format,
224
+ **params,
225
+ )
226
+
227
+ input_messages, resolved_format, kwargs = _utils.encode_request(
223
228
  model_id=model_id,
224
229
  messages=messages,
225
230
  tools=tools,
226
231
  format=format,
227
232
  params=params,
228
233
  )
229
-
230
234
  anthropic_response = await self.async_client.messages.create(**kwargs)
231
-
232
- assistant_message, finish_reason = _utils.decode_response(
235
+ assistant_message, finish_reason, usage = _utils.decode_response(
233
236
  anthropic_response, model_id
234
237
  )
235
-
236
238
  return AsyncContextResponse(
237
239
  raw=anthropic_response,
238
240
  provider_id="anthropic",
@@ -243,7 +245,8 @@ class AnthropicProvider(BaseProvider[Anthropic]):
243
245
  input_messages=input_messages,
244
246
  assistant_message=assistant_message,
245
247
  finish_reason=finish_reason,
246
- format=format,
248
+ usage=usage,
249
+ format=resolved_format,
247
250
  )
248
251
 
249
252
  def _stream(
@@ -255,30 +258,25 @@ class AnthropicProvider(BaseProvider[Anthropic]):
255
258
  format: type[FormattableT] | Format[FormattableT] | None = None,
256
259
  **params: Unpack[Params],
257
260
  ) -> StreamResponse | StreamResponse[FormattableT]:
258
- """Generate an `llm.StreamResponse` by synchronously streaming from the Anthropic Messages API.
259
-
260
- Args:
261
- model_id: Model identifier to use.
262
- messages: Messages to send to the LLM.
263
- tools: Optional tools that the model may invoke.
264
- format: Optional response format specifier.
265
- **params: Additional parameters to configure output (e.g. temperature). See `llm.Params`.
266
-
267
- Returns:
268
- An `llm.StreamResponse` object for iterating over the LLM-generated content.
269
- """
270
- input_messages, format, kwargs = _utils.encode_request(
261
+ """Generate an `llm.StreamResponse` by synchronously streaming from the Anthropic Messages API."""
262
+ if _should_use_beta(model_id, format):
263
+ return self._beta_provider.stream(
264
+ model_id=model_id,
265
+ messages=messages,
266
+ tools=tools,
267
+ format=format,
268
+ **params,
269
+ )
270
+
271
+ input_messages, resolved_format, kwargs = _utils.encode_request(
271
272
  model_id=model_id,
272
273
  messages=messages,
273
274
  tools=tools,
274
275
  format=format,
275
276
  params=params,
276
277
  )
277
-
278
278
  anthropic_stream = self.client.messages.stream(**kwargs)
279
-
280
279
  chunk_iterator = _utils.decode_stream(anthropic_stream)
281
-
282
280
  return StreamResponse(
283
281
  provider_id="anthropic",
284
282
  model_id=model_id,
@@ -287,7 +285,7 @@ class AnthropicProvider(BaseProvider[Anthropic]):
287
285
  tools=tools,
288
286
  input_messages=input_messages,
289
287
  chunk_iterator=chunk_iterator,
290
- format=format,
288
+ format=resolved_format,
291
289
  )
292
290
 
293
291
  def _context_stream(
@@ -302,31 +300,26 @@ class AnthropicProvider(BaseProvider[Anthropic]):
302
300
  format: type[FormattableT] | Format[FormattableT] | None = None,
303
301
  **params: Unpack[Params],
304
302
  ) -> ContextStreamResponse[DepsT] | ContextStreamResponse[DepsT, FormattableT]:
305
- """Generate an `llm.ContextStreamResponse` by synchronously streaming from the Anthropic Messages API.
306
-
307
- Args:
308
- ctx: Context object with dependencies for tools.
309
- model_id: Model identifier to use.
310
- messages: Messages to send to the LLM.
311
- tools: Optional tools that the model may invoke.
312
- format: Optional response format specifier.
313
- **params: Additional parameters to configure output (e.g. temperature). See `llm.Params`.
314
-
315
- Returns:
316
- An `llm.ContextStreamResponse` object for iterating over the LLM-generated content.
317
- """
318
- input_messages, format, kwargs = _utils.encode_request(
303
+ """Generate an `llm.ContextStreamResponse` by synchronously streaming from the Anthropic Messages API."""
304
+ if _should_use_beta(model_id, format):
305
+ return self._beta_provider.context_stream(
306
+ ctx=ctx,
307
+ model_id=model_id,
308
+ messages=messages,
309
+ tools=tools,
310
+ format=format,
311
+ **params,
312
+ )
313
+
314
+ input_messages, resolved_format, kwargs = _utils.encode_request(
319
315
  model_id=model_id,
320
316
  messages=messages,
321
317
  tools=tools,
322
318
  format=format,
323
319
  params=params,
324
320
  )
325
-
326
321
  anthropic_stream = self.client.messages.stream(**kwargs)
327
-
328
322
  chunk_iterator = _utils.decode_stream(anthropic_stream)
329
-
330
323
  return ContextStreamResponse(
331
324
  provider_id="anthropic",
332
325
  model_id=model_id,
@@ -335,7 +328,7 @@ class AnthropicProvider(BaseProvider[Anthropic]):
335
328
  tools=tools,
336
329
  input_messages=input_messages,
337
330
  chunk_iterator=chunk_iterator,
338
- format=format,
331
+ format=resolved_format,
339
332
  )
340
333
 
341
334
  async def _stream_async(
@@ -347,30 +340,24 @@ class AnthropicProvider(BaseProvider[Anthropic]):
347
340
  format: type[FormattableT] | Format[FormattableT] | None = None,
348
341
  **params: Unpack[Params],
349
342
  ) -> AsyncStreamResponse | AsyncStreamResponse[FormattableT]:
350
- """Generate an `llm.AsyncStreamResponse` by asynchronously streaming from the Anthropic Messages API.
351
-
352
- Args:
353
- model_id: Model identifier to use.
354
- messages: Messages to send to the LLM.
355
- tools: Optional tools that the model may invoke.
356
- format: Optional response format specifier.
357
- **params: Additional parameters to configure output (e.g. temperature). See `llm.Params`.
358
-
359
- Returns:
360
- An `llm.AsyncStreamResponse` object for asynchronously iterating over the LLM-generated content.
361
- """
362
- input_messages, format, kwargs = _utils.encode_request(
343
+ """Generate an `llm.AsyncStreamResponse` by asynchronously streaming from the Anthropic Messages API."""
344
+ if _should_use_beta(model_id, format):
345
+ return await self._beta_provider.stream_async(
346
+ model_id=model_id,
347
+ messages=messages,
348
+ tools=tools,
349
+ format=format,
350
+ **params,
351
+ )
352
+ input_messages, resolved_format, kwargs = _utils.encode_request(
363
353
  model_id=model_id,
364
354
  messages=messages,
365
355
  tools=tools,
366
356
  format=format,
367
357
  params=params,
368
358
  )
369
-
370
359
  anthropic_stream = self.async_client.messages.stream(**kwargs)
371
-
372
360
  chunk_iterator = _utils.decode_async_stream(anthropic_stream)
373
-
374
361
  return AsyncStreamResponse(
375
362
  provider_id="anthropic",
376
363
  model_id=model_id,
@@ -379,7 +366,7 @@ class AnthropicProvider(BaseProvider[Anthropic]):
379
366
  tools=tools,
380
367
  input_messages=input_messages,
381
368
  chunk_iterator=chunk_iterator,
382
- format=format,
369
+ format=resolved_format,
383
370
  )
384
371
 
385
372
  async def _context_stream_async(
@@ -397,31 +384,26 @@ class AnthropicProvider(BaseProvider[Anthropic]):
397
384
  AsyncContextStreamResponse[DepsT]
398
385
  | AsyncContextStreamResponse[DepsT, FormattableT]
399
386
  ):
400
- """Generate an `llm.AsyncContextStreamResponse` by asynchronously streaming from the Anthropic Messages API.
401
-
402
- Args:
403
- ctx: Context object with dependencies for tools.
404
- model_id: Model identifier to use.
405
- messages: Messages to send to the LLM.
406
- tools: Optional tools that the model may invoke.
407
- format: Optional response format specifier.
408
- **params: Additional parameters to configure output (e.g. temperature). See `llm.Params`.
409
-
410
- Returns:
411
- An `llm.AsyncContextStreamResponse` object for asynchronously iterating over the LLM-generated content.
412
- """
413
- input_messages, format, kwargs = _utils.encode_request(
387
+ """Generate an `llm.AsyncContextStreamResponse` by asynchronously streaming from the Anthropic Messages API."""
388
+ if _should_use_beta(model_id, format):
389
+ return await self._beta_provider.context_stream_async(
390
+ ctx=ctx,
391
+ model_id=model_id,
392
+ messages=messages,
393
+ tools=tools,
394
+ format=format,
395
+ **params,
396
+ )
397
+
398
+ input_messages, resolved_format, kwargs = _utils.encode_request(
414
399
  model_id=model_id,
415
400
  messages=messages,
416
401
  tools=tools,
417
402
  format=format,
418
403
  params=params,
419
404
  )
420
-
421
405
  anthropic_stream = self.async_client.messages.stream(**kwargs)
422
-
423
406
  chunk_iterator = _utils.decode_async_stream(anthropic_stream)
424
-
425
407
  return AsyncContextStreamResponse(
426
408
  provider_id="anthropic",
427
409
  model_id=model_id,
@@ -430,5 +412,5 @@ class AnthropicProvider(BaseProvider[Anthropic]):
430
412
  tools=tools,
431
413
  input_messages=input_messages,
432
414
  chunk_iterator=chunk_iterator,
433
- format=format,
415
+ format=resolved_format,
434
416
  )
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from collections.abc import Generator, Sequence
3
3
  from contextlib import contextmanager
4
- from typing import TYPE_CHECKING, TypeAlias, get_type_hints
4
+ from typing import TYPE_CHECKING, TypeAlias, cast, get_type_hints
5
5
 
6
6
  from ...content import Text
7
7
  from ...messages import AssistantMessage, Message, SystemMessage, UserMessage
@@ -16,6 +16,20 @@ logger = logging.getLogger(__name__)
16
16
  SystemMessageContent: TypeAlias = str | None
17
17
 
18
18
 
19
+ def ensure_additional_properties_false(obj: object) -> None:
20
+ """Recursively adds additionalProperties = False to a schema, required for strict mode."""
21
+ if isinstance(obj, dict):
22
+ obj = cast(dict[str, object], obj)
23
+ if obj.get("type") == "object" and "additionalProperties" not in obj:
24
+ obj["additionalProperties"] = False
25
+ for value in obj.values():
26
+ ensure_additional_properties_false(value)
27
+ elif isinstance(obj, list):
28
+ obj = cast(list[object], obj)
29
+ for item in obj:
30
+ ensure_additional_properties_false(item)
31
+
32
+
19
33
  def add_system_instructions(
20
34
  messages: Sequence[Message], additional_system_instructions: str
21
35
  ) -> Sequence[Message]:
@@ -29,6 +29,8 @@ from ....responses import (
29
29
  FinishReasonChunk,
30
30
  RawMessageChunk,
31
31
  RawStreamEventChunk,
32
+ Usage,
33
+ UsageDeltaChunk,
32
34
  )
33
35
  from ..model_id import GoogleModelId, model_name
34
36
  from .encode import UNKNOWN_TOOL_ID
@@ -43,6 +45,30 @@ GOOGLE_FINISH_REASON_MAP = {
43
45
  }
44
46
 
45
47
 
48
+ def _decode_usage(
49
+ usage: genai_types.GenerateContentResponseUsageMetadata | None,
50
+ ) -> Usage | None:
51
+ """Convert Google UsageMetadata to Mirascope Usage."""
52
+ if (
53
+ usage is None
54
+ or usage.prompt_token_count is None
55
+ or usage.candidates_token_count is None
56
+ ): # pragma: no cover
57
+ return None
58
+
59
+ reasoning_tokens = usage.thoughts_token_count or 0
60
+ output_tokens = usage.candidates_token_count + reasoning_tokens
61
+
62
+ return Usage(
63
+ input_tokens=usage.prompt_token_count,
64
+ output_tokens=output_tokens,
65
+ cache_read_tokens=usage.cached_content_token_count or 0,
66
+ cache_write_tokens=0,
67
+ reasoning_tokens=usage.thoughts_token_count or 0,
68
+ raw=usage,
69
+ )
70
+
71
+
46
72
  def _decode_content_part(part: genai_types.Part) -> AssistantContentPart | None:
47
73
  """Returns an `AssistantContentPart` (or `None`) decoded from a google `Part`"""
48
74
  if part.thought and part.text:
@@ -100,8 +126,8 @@ def _decode_candidate_content(
100
126
  def decode_response(
101
127
  response: genai_types.GenerateContentResponse,
102
128
  model_id: GoogleModelId,
103
- ) -> tuple[AssistantMessage, FinishReason | None]:
104
- """Returns an `AssistantMessage` and `FinishReason` extracted from a `GenerateContentResponse`"""
129
+ ) -> tuple[AssistantMessage, FinishReason | None, Usage | None]:
130
+ """Returns an `AssistantMessage`, `FinishReason`, and `Usage` extracted from a `GenerateContentResponse`"""
105
131
  content: Sequence[AssistantContentPart] = []
106
132
  candidate_content: genai_types.Content | None = None
107
133
  finish_reason: FinishReason | None = None
@@ -122,7 +148,8 @@ def decode_response(
122
148
  raw_message=candidate_content.model_dump(),
123
149
  )
124
150
 
125
- return assistant_message, finish_reason
151
+ usage = _decode_usage(response.usage_metadata)
152
+ return assistant_message, finish_reason, usage
126
153
 
127
154
 
128
155
  class _GoogleChunkProcessor:
@@ -132,6 +159,8 @@ class _GoogleChunkProcessor:
132
159
  self.current_content_type: Literal["text", "tool_call", "thought"] | None = None
133
160
  self.accumulated_parts: list[genai_types.Part] = []
134
161
  self.reconstructed_content = genai_types.Content(parts=[])
162
+ # Track previous cumulative usage to compute deltas
163
+ self.prev_usage = Usage()
135
164
 
136
165
  def process_chunk(
137
166
  self, chunk: genai_types.GenerateContentResponse
@@ -207,6 +236,29 @@ class _GoogleChunkProcessor:
207
236
  if finish_reason is not None:
208
237
  yield FinishReasonChunk(finish_reason=finish_reason)
209
238
 
239
+ # Emit usage delta if usage metadata is present
240
+ if chunk.usage_metadata:
241
+ usage_metadata = chunk.usage_metadata
242
+ current_input = usage_metadata.prompt_token_count or 0
243
+ current_output = usage_metadata.candidates_token_count or 0
244
+ current_cache_read = usage_metadata.cached_content_token_count or 0
245
+ current_reasoning = usage_metadata.thoughts_token_count or 0
246
+
247
+ yield UsageDeltaChunk(
248
+ input_tokens=current_input - self.prev_usage.input_tokens,
249
+ output_tokens=current_output - self.prev_usage.output_tokens,
250
+ cache_read_tokens=current_cache_read
251
+ - self.prev_usage.cache_read_tokens,
252
+ cache_write_tokens=0,
253
+ reasoning_tokens=current_reasoning - self.prev_usage.reasoning_tokens,
254
+ )
255
+
256
+ # Update previous usage
257
+ self.prev_usage.input_tokens = current_input
258
+ self.prev_usage.output_tokens = current_output
259
+ self.prev_usage.cache_read_tokens = current_cache_read
260
+ self.prev_usage.reasoning_tokens = current_reasoning
261
+
210
262
  def raw_message_chunk(self) -> RawMessageChunk:
211
263
  content = genai_types.Content(role="model", parts=self.accumulated_parts)
212
264
  return RawMessageChunk(raw_message=content.model_dump())
@@ -21,6 +21,7 @@ from ....messages import AssistantMessage, Message, UserMessage
21
21
  from ....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
22
22
  from ...base import Params, _utils as _base_utils
23
23
  from ..model_id import GoogleModelId, model_name
24
+ from ..model_info import MODELS_WITHOUT_STRUCTURED_OUTPUT_AND_TOOLS_SUPPORT
24
25
 
25
26
  UNKNOWN_TOOL_ID = "google_unknown_tool_id"
26
27
 
@@ -187,6 +188,7 @@ def encode_request(
187
188
  genai_types.GenerateContentConfigDict()
188
189
  )
189
190
  encode_thoughts = False
191
+ google_model_name = model_name(model_id)
190
192
 
191
193
  with _base_utils.ensure_all_params_accessed(
192
194
  params=params, provider_id="google"
@@ -219,17 +221,23 @@ def encode_request(
219
221
  tools = tools.tools if isinstance(tools, BaseToolkit) else tools or []
220
222
  google_tools: list[genai_types.ToolDict] = []
221
223
 
222
- format = resolve_format(
223
- format,
224
- # Google does not support strict outputs when tools are present
225
- # (Gemini 2.5 will error, 2.0 and below will ignore tools)
226
- default_mode="strict" if not tools else "tool",
224
+ allows_strict_mode_with_tools = (
225
+ google_model_name not in MODELS_WITHOUT_STRUCTURED_OUTPUT_AND_TOOLS_SUPPORT
227
226
  )
227
+ # Older google models do not allow strict mode when using tools; if so, we use tool
228
+ # mode when tools are present by default for compatibility. Otherwise, prefer strict mode.
229
+ default_mode = "tool" if tools and not allows_strict_mode_with_tools else "strict"
230
+ format = resolve_format(format, default_mode=default_mode)
228
231
  if format is not None:
229
- if format.mode in ("strict", "json") and tools:
232
+ if (
233
+ format.mode in ("strict", "json")
234
+ and tools
235
+ and not allows_strict_mode_with_tools
236
+ ):
230
237
  raise FeatureNotSupportedError(
231
238
  feature=f"formatting_mode:{format.mode} with tools",
232
239
  provider_id="google",
240
+ model_id=model_id,
233
241
  )
234
242
 
235
243
  if format.mode == "strict":