mirascope 2.0.0a3__py3-none-any.whl → 2.0.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. mirascope/api/_generated/__init__.py +62 -6
  2. mirascope/api/_generated/client.py +8 -0
  3. mirascope/api/_generated/errors/__init__.py +11 -1
  4. mirascope/api/_generated/errors/conflict_error.py +15 -0
  5. mirascope/api/_generated/errors/forbidden_error.py +15 -0
  6. mirascope/api/_generated/errors/internal_server_error.py +15 -0
  7. mirascope/api/_generated/errors/not_found_error.py +15 -0
  8. mirascope/api/_generated/organizations/__init__.py +25 -0
  9. mirascope/api/_generated/organizations/client.py +380 -0
  10. mirascope/api/_generated/organizations/raw_client.py +876 -0
  11. mirascope/api/_generated/organizations/types/__init__.py +23 -0
  12. mirascope/api/_generated/organizations/types/organizations_create_response.py +24 -0
  13. mirascope/api/_generated/organizations/types/organizations_create_response_role.py +7 -0
  14. mirascope/api/_generated/organizations/types/organizations_get_response.py +24 -0
  15. mirascope/api/_generated/organizations/types/organizations_get_response_role.py +7 -0
  16. mirascope/api/_generated/organizations/types/organizations_list_response_item.py +24 -0
  17. mirascope/api/_generated/organizations/types/organizations_list_response_item_role.py +7 -0
  18. mirascope/api/_generated/organizations/types/organizations_update_response.py +24 -0
  19. mirascope/api/_generated/organizations/types/organizations_update_response_role.py +7 -0
  20. mirascope/api/_generated/projects/__init__.py +17 -0
  21. mirascope/api/_generated/projects/client.py +458 -0
  22. mirascope/api/_generated/projects/raw_client.py +1016 -0
  23. mirascope/api/_generated/projects/types/__init__.py +15 -0
  24. mirascope/api/_generated/projects/types/projects_create_response.py +30 -0
  25. mirascope/api/_generated/projects/types/projects_get_response.py +30 -0
  26. mirascope/api/_generated/projects/types/projects_list_response_item.py +30 -0
  27. mirascope/api/_generated/projects/types/projects_update_response.py +30 -0
  28. mirascope/api/_generated/reference.md +586 -0
  29. mirascope/api/_generated/types/__init__.py +20 -4
  30. mirascope/api/_generated/types/already_exists_error.py +24 -0
  31. mirascope/api/_generated/types/already_exists_error_tag.py +5 -0
  32. mirascope/api/_generated/types/database_error.py +24 -0
  33. mirascope/api/_generated/types/database_error_tag.py +5 -0
  34. mirascope/api/_generated/types/http_api_decode_error.py +1 -3
  35. mirascope/api/_generated/types/issue.py +1 -5
  36. mirascope/api/_generated/types/not_found_error_body.py +24 -0
  37. mirascope/api/_generated/types/not_found_error_tag.py +5 -0
  38. mirascope/api/_generated/types/permission_denied_error.py +24 -0
  39. mirascope/api/_generated/types/permission_denied_error_tag.py +7 -0
  40. mirascope/api/_generated/types/property_key.py +2 -2
  41. mirascope/api/_generated/types/{property_key_tag.py → property_key_key.py} +3 -5
  42. mirascope/api/_generated/types/{property_key_tag_tag.py → property_key_key_tag.py} +1 -1
  43. mirascope/llm/__init__.py +4 -0
  44. mirascope/llm/providers/__init__.py +6 -0
  45. mirascope/llm/providers/anthropic/__init__.py +6 -1
  46. mirascope/llm/providers/anthropic/_utils/__init__.py +15 -5
  47. mirascope/llm/providers/anthropic/_utils/beta_decode.py +271 -0
  48. mirascope/llm/providers/anthropic/_utils/beta_encode.py +216 -0
  49. mirascope/llm/providers/anthropic/_utils/decode.py +39 -7
  50. mirascope/llm/providers/anthropic/_utils/encode.py +156 -64
  51. mirascope/llm/providers/anthropic/beta_provider.py +322 -0
  52. mirascope/llm/providers/anthropic/model_id.py +10 -27
  53. mirascope/llm/providers/anthropic/model_info.py +87 -0
  54. mirascope/llm/providers/anthropic/provider.py +127 -145
  55. mirascope/llm/providers/base/_utils.py +15 -1
  56. mirascope/llm/providers/google/_utils/decode.py +55 -3
  57. mirascope/llm/providers/google/_utils/encode.py +14 -6
  58. mirascope/llm/providers/google/model_id.py +7 -13
  59. mirascope/llm/providers/google/model_info.py +62 -0
  60. mirascope/llm/providers/google/provider.py +8 -4
  61. mirascope/llm/providers/load_provider.py +8 -2
  62. mirascope/llm/providers/mlx/_utils.py +23 -1
  63. mirascope/llm/providers/mlx/encoding/transformers.py +17 -1
  64. mirascope/llm/providers/mlx/provider.py +4 -0
  65. mirascope/llm/providers/ollama/__init__.py +19 -0
  66. mirascope/llm/providers/ollama/provider.py +71 -0
  67. mirascope/llm/providers/openai/completions/__init__.py +6 -1
  68. mirascope/llm/providers/openai/completions/_utils/decode.py +57 -5
  69. mirascope/llm/providers/openai/completions/_utils/encode.py +9 -8
  70. mirascope/llm/providers/openai/completions/base_provider.py +513 -0
  71. mirascope/llm/providers/openai/completions/provider.py +13 -447
  72. mirascope/llm/providers/openai/model_info.py +57 -0
  73. mirascope/llm/providers/openai/provider.py +16 -4
  74. mirascope/llm/providers/openai/responses/_utils/decode.py +55 -4
  75. mirascope/llm/providers/openai/responses/_utils/encode.py +9 -9
  76. mirascope/llm/providers/openai/responses/provider.py +20 -21
  77. mirascope/llm/providers/provider_id.py +11 -1
  78. mirascope/llm/providers/provider_registry.py +3 -1
  79. mirascope/llm/providers/together/__init__.py +19 -0
  80. mirascope/llm/providers/together/provider.py +40 -0
  81. mirascope/llm/responses/__init__.py +3 -0
  82. mirascope/llm/responses/base_response.py +4 -0
  83. mirascope/llm/responses/base_stream_response.py +25 -1
  84. mirascope/llm/responses/finish_reason.py +1 -0
  85. mirascope/llm/responses/response.py +9 -0
  86. mirascope/llm/responses/root_response.py +5 -1
  87. mirascope/llm/responses/usage.py +95 -0
  88. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/METADATA +3 -3
  89. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/RECORD +91 -50
  90. mirascope/llm/providers/openai/shared/__init__.py +0 -7
  91. mirascope/llm/providers/openai/shared/_utils.py +0 -59
  92. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/WHEEL +0 -0
  93. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/licenses/LICENSE +0 -0
@@ -40,8 +40,11 @@ from .....messages import AssistantMessage, Message, UserMessage
40
40
  from .....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
41
41
  from ....base import Params, _utils as _base_utils
42
42
  from ...model_id import OpenAIModelId, model_name
43
- from ...model_info import NON_REASONING_MODELS
44
- from ...shared import _utils as _shared_utils
43
+ from ...model_info import (
44
+ MODELS_WITHOUT_JSON_OBJECT_SUPPORT,
45
+ MODELS_WITHOUT_JSON_SCHEMA_SUPPORT,
46
+ NON_REASONING_MODELS,
47
+ )
45
48
 
46
49
 
47
50
  class ResponseCreateKwargs(TypedDict, total=False):
@@ -197,7 +200,7 @@ def _convert_tool_to_function_tool_param(tool: AnyToolSchema) -> FunctionToolPar
197
200
  """Convert a Mirascope ToolSchema to OpenAI Responses FunctionToolParam."""
198
201
  schema_dict = tool.parameters.model_dump(by_alias=True, exclude_none=True)
199
202
  schema_dict["type"] = "object"
200
- _shared_utils.ensure_additional_properties_false(schema_dict)
203
+ _base_utils.ensure_additional_properties_false(schema_dict)
201
204
 
202
205
  return FunctionToolParam(
203
206
  type="function",
@@ -220,7 +223,7 @@ def _create_strict_response_format(
220
223
  ResponseFormatTextJSONSchemaConfigParam for strict structured outputs
221
224
  """
222
225
  schema = format.schema.copy()
223
- _shared_utils.ensure_additional_properties_false(schema)
226
+ _base_utils.ensure_additional_properties_false(schema)
224
227
 
225
228
  response_format: ResponseFormatTextJSONSchemaConfigParam = {
226
229
  "type": "json_schema",
@@ -294,9 +297,7 @@ def encode_request(
294
297
  tools = tools.tools if isinstance(tools, BaseToolkit) else tools or []
295
298
  openai_tools = [_convert_tool_to_function_tool_param(tool) for tool in tools]
296
299
 
297
- model_supports_strict = (
298
- model_id not in _shared_utils.MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
299
- )
300
+ model_supports_strict = model_id not in MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
300
301
  default_mode = "strict" if model_supports_strict else "tool"
301
302
 
302
303
  format = resolve_format(format, default_mode=default_mode)
@@ -323,8 +324,7 @@ def encode_request(
323
324
  name=FORMAT_TOOL_NAME,
324
325
  )
325
326
  elif (
326
- format.mode == "json"
327
- and model_id not in _shared_utils.MODELS_WITHOUT_JSON_OBJECT_SUPPORT
327
+ format.mode == "json" and model_id not in MODELS_WITHOUT_JSON_OBJECT_SUPPORT
328
328
  ):
329
329
  kwargs["text"] = {"format": ResponseFormatJSONObject(type="json_object")}
330
330
 
@@ -1,7 +1,6 @@
1
1
  """OpenAI Responses API client implementation."""
2
2
 
3
3
  from collections.abc import Sequence
4
- from typing import Literal
5
4
  from typing_extensions import Unpack
6
5
 
7
6
  from openai import AsyncOpenAI, OpenAI
@@ -45,14 +44,10 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
45
44
  *,
46
45
  api_key: str | None = None,
47
46
  base_url: str | None = None,
48
- wrapped_by_openai_provider: bool = False,
49
47
  ) -> None:
50
48
  """Initialize the OpenAI Responses client."""
51
49
  self.client = OpenAI(api_key=api_key, base_url=base_url)
52
50
  self.async_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
53
- self.active_provider_id: Literal["openai", "openai:responses"] = (
54
- "openai" if wrapped_by_openai_provider else "openai:responses"
55
- )
56
51
 
57
52
  def _call(
58
53
  self,
@@ -85,14 +80,14 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
85
80
 
86
81
  openai_response = self.client.responses.create(**kwargs)
87
82
 
88
- assistant_message, finish_reason = _utils.decode_response(
89
- openai_response, model_id, self.active_provider_id
83
+ assistant_message, finish_reason, usage = _utils.decode_response(
84
+ openai_response, model_id, self.id
90
85
  )
91
86
  provider_model_name = model_name(model_id, "responses")
92
87
 
93
88
  return Response(
94
89
  raw=openai_response,
95
- provider_id=self.active_provider_id,
90
+ provider_id=self.id,
96
91
  model_id=model_id,
97
92
  provider_model_name=provider_model_name,
98
93
  params=params,
@@ -100,6 +95,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
100
95
  input_messages=messages,
101
96
  assistant_message=assistant_message,
102
97
  finish_reason=finish_reason,
98
+ usage=usage,
103
99
  format=format,
104
100
  )
105
101
 
@@ -134,14 +130,14 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
134
130
 
135
131
  openai_response = await self.async_client.responses.create(**kwargs)
136
132
 
137
- assistant_message, finish_reason = _utils.decode_response(
138
- openai_response, model_id, self.active_provider_id
133
+ assistant_message, finish_reason, usage = _utils.decode_response(
134
+ openai_response, model_id, self.id
139
135
  )
140
136
  provider_model_name = model_name(model_id, "responses")
141
137
 
142
138
  return AsyncResponse(
143
139
  raw=openai_response,
144
- provider_id=self.active_provider_id,
140
+ provider_id=self.id,
145
141
  model_id=model_id,
146
142
  provider_model_name=provider_model_name,
147
143
  params=params,
@@ -149,6 +145,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
149
145
  input_messages=messages,
150
146
  assistant_message=assistant_message,
151
147
  finish_reason=finish_reason,
148
+ usage=usage,
152
149
  format=format,
153
150
  )
154
151
 
@@ -192,7 +189,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
192
189
  provider_model_name = model_name(model_id, "responses")
193
190
 
194
191
  return StreamResponse(
195
- provider_id=self.active_provider_id,
192
+ provider_id=self.id,
196
193
  model_id=model_id,
197
194
  provider_model_name=provider_model_name,
198
195
  params=params,
@@ -242,7 +239,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
242
239
  provider_model_name = model_name(model_id, "responses")
243
240
 
244
241
  return AsyncStreamResponse(
245
- provider_id=self.active_provider_id,
242
+ provider_id=self.id,
246
243
  model_id=model_id,
247
244
  provider_model_name=provider_model_name,
248
245
  params=params,
@@ -287,14 +284,14 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
287
284
 
288
285
  openai_response = self.client.responses.create(**kwargs)
289
286
 
290
- assistant_message, finish_reason = _utils.decode_response(
291
- openai_response, model_id, self.active_provider_id
287
+ assistant_message, finish_reason, usage = _utils.decode_response(
288
+ openai_response, model_id, self.id
292
289
  )
293
290
  provider_model_name = model_name(model_id, "responses")
294
291
 
295
292
  return ContextResponse(
296
293
  raw=openai_response,
297
- provider_id=self.active_provider_id,
294
+ provider_id=self.id,
298
295
  model_id=model_id,
299
296
  provider_model_name=provider_model_name,
300
297
  params=params,
@@ -302,6 +299,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
302
299
  input_messages=messages,
303
300
  assistant_message=assistant_message,
304
301
  finish_reason=finish_reason,
302
+ usage=usage,
305
303
  format=format,
306
304
  )
307
305
 
@@ -340,14 +338,14 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
340
338
 
341
339
  openai_response = await self.async_client.responses.create(**kwargs)
342
340
 
343
- assistant_message, finish_reason = _utils.decode_response(
344
- openai_response, model_id, self.active_provider_id
341
+ assistant_message, finish_reason, usage = _utils.decode_response(
342
+ openai_response, model_id, self.id
345
343
  )
346
344
  provider_model_name = model_name(model_id, "responses")
347
345
 
348
346
  return AsyncContextResponse(
349
347
  raw=openai_response,
350
- provider_id=self.active_provider_id,
348
+ provider_id=self.id,
351
349
  model_id=model_id,
352
350
  provider_model_name=provider_model_name,
353
351
  params=params,
@@ -355,6 +353,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
355
353
  input_messages=messages,
356
354
  assistant_message=assistant_message,
357
355
  finish_reason=finish_reason,
356
+ usage=usage,
358
357
  format=format,
359
358
  )
360
359
 
@@ -402,7 +401,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
402
401
  provider_model_name = model_name(model_id, "responses")
403
402
 
404
403
  return ContextStreamResponse(
405
- provider_id=self.active_provider_id,
404
+ provider_id=self.id,
406
405
  model_id=model_id,
407
406
  provider_model_name=provider_model_name,
408
407
  params=params,
@@ -459,7 +458,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
459
458
  provider_model_name = model_name(model_id, "responses")
460
459
 
461
460
  return AsyncContextStreamResponse(
462
- provider_id=self.active_provider_id,
461
+ provider_id=self.id,
463
462
  model_id=model_id,
464
463
  provider_model_name=provider_model_name,
465
464
  params=params,
@@ -4,10 +4,20 @@ from typing import Literal, TypeAlias, get_args
4
4
 
5
5
  KnownProviderId: TypeAlias = Literal[
6
6
  "anthropic", # Anthropic provider via AnthropicProvider
7
+ "anthropic-beta", # Anthropic beta provider via AnthropicBetaProvider
7
8
  "google", # Google provider via GoogleProvider
8
- "openai", # OpenAI provider via OpenAIProvider
9
9
  "mlx", # Local inference powered by `mlx-lm`, via MLXProvider
10
+ "ollama", # Ollama provider via OllamaProvider
11
+ "openai", # OpenAI provider via OpenAIProvider (prefers Responses routing when available)
12
+ "together", # Together AI provider via TogetherProvider
10
13
  ]
11
14
  KNOWN_PROVIDER_IDS = get_args(KnownProviderId)
12
15
 
13
16
  ProviderId = KnownProviderId | str
17
+
18
+ OpenAICompletionsCompatibleProviderId: TypeAlias = Literal[
19
+ "ollama", # Ollama (OpenAI-compatible)
20
+ "openai", # OpenAI via OpenAIProvider (routes to completions)
21
+ "openai:completions", # OpenAI Completions API directly
22
+ "together", # Together AI (OpenAI-compatible)
23
+ ]
@@ -16,8 +16,10 @@ PROVIDER_REGISTRY: dict[str, Provider] = {}
16
16
  DEFAULT_AUTO_REGISTER_SCOPES: dict[str, ProviderId] = {
17
17
  "anthropic/": "anthropic",
18
18
  "google/": "google",
19
- "openai/": "openai",
20
19
  "mlx-community/": "mlx",
20
+ "ollama/": "ollama",
21
+ "openai/": "openai",
22
+ "together/": "together",
21
23
  }
22
24
 
23
25
 
@@ -0,0 +1,19 @@
1
+ """Together AI provider implementation."""
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ if TYPE_CHECKING:
6
+ from .provider import TogetherProvider
7
+ else:
8
+ try:
9
+ from .provider import TogetherProvider
10
+ except ImportError: # pragma: no cover
11
+ from .._missing_import_stubs import (
12
+ create_provider_stub,
13
+ )
14
+
15
+ TogetherProvider = create_provider_stub("openai", "TogetherProvider")
16
+
17
+ __all__ = [
18
+ "TogetherProvider",
19
+ ]
@@ -0,0 +1,40 @@
1
+ """Together AI provider implementation."""
2
+
3
+ from typing import ClassVar
4
+
5
+ from ..openai.completions.base_provider import BaseOpenAICompletionsProvider
6
+
7
+
8
+ class TogetherProvider(BaseOpenAICompletionsProvider):
9
+ """Provider for Together AI's OpenAI-compatible API.
10
+
11
+ Inherits from BaseOpenAICompletionsProvider with Together-specific configuration:
12
+ - Uses Together AI's API endpoint
13
+ - Requires TOGETHER_API_KEY
14
+
15
+ Usage:
16
+ Register the provider with model ID prefixes you want to use:
17
+
18
+ ```python
19
+ import llm
20
+
21
+ # Register for meta-llama models
22
+ llm.register_provider("together", "meta-llama/")
23
+
24
+ # Now you can use meta-llama models directly
25
+ @llm.call("meta-llama/Llama-3.3-70B-Instruct-Turbo")
26
+ def my_prompt():
27
+ return [llm.messages.user("Hello!")]
28
+ ```
29
+ """
30
+
31
+ id: ClassVar[str] = "together"
32
+ default_scope: ClassVar[str | list[str]] = []
33
+ default_base_url: ClassVar[str | None] = "https://api.together.xyz/v1"
34
+ api_key_env_var: ClassVar[str] = "TOGETHER_API_KEY"
35
+ api_key_required: ClassVar[bool] = True
36
+ provider_name: ClassVar[str | None] = "Together"
37
+
38
+ def _model_name(self, model_id: str) -> str:
39
+ """Return the model ID as-is for Together API."""
40
+ return model_id
@@ -27,6 +27,7 @@ from .streams import (
27
27
  ThoughtStream,
28
28
  ToolCallStream,
29
29
  )
30
+ from .usage import Usage, UsageDeltaChunk
30
31
 
31
32
  __all__ = [
32
33
  "AsyncChunkIterator",
@@ -53,5 +54,7 @@ __all__ = [
53
54
  "TextStream",
54
55
  "ThoughtStream",
55
56
  "ToolCallStream",
57
+ "Usage",
58
+ "UsageDeltaChunk",
56
59
  "_utils",
57
60
  ]
@@ -9,6 +9,7 @@ from ..messages import AssistantMessage, Message
9
9
  from ..tools import FORMAT_TOOL_NAME, ToolkitT
10
10
  from .finish_reason import FinishReason
11
11
  from .root_response import RootResponse
12
+ from .usage import Usage
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from ..providers import ModelId, Params, ProviderId
@@ -30,6 +31,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
30
31
  input_messages: Sequence[Message],
31
32
  assistant_message: AssistantMessage,
32
33
  finish_reason: FinishReason | None,
34
+ usage: Usage | None,
33
35
  ) -> None:
34
36
  """Initialize a Response.
35
37
 
@@ -45,6 +47,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
45
47
  input_messages: The message history before the final assistant message.
46
48
  assistant_message: The final assistant message containing the response content.
47
49
  finish_reason: The reason why the LLM finished generating a response.
50
+ usage: Token usage statistics for the response.
48
51
  """
49
52
  self.raw = raw
50
53
  self.provider_id = provider_id
@@ -53,6 +56,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
53
56
  self.params = params
54
57
  self.toolkit = toolkit
55
58
  self.finish_reason = finish_reason
59
+ self.usage = usage
56
60
  self.format = format
57
61
 
58
62
  # Process content in the assistant message, organizing it by type and
@@ -36,6 +36,7 @@ from .streams import (
36
36
  ThoughtStream,
37
37
  ToolCallStream,
38
38
  )
39
+ from .usage import Usage, UsageDeltaChunk
39
40
 
40
41
  if TYPE_CHECKING:
41
42
  from ..providers import ModelId, Params, ProviderId
@@ -76,7 +77,11 @@ class RawMessageChunk:
76
77
 
77
78
 
78
79
  StreamResponseChunk: TypeAlias = (
79
- AssistantContentChunk | FinishReasonChunk | RawStreamEventChunk | RawMessageChunk
80
+ AssistantContentChunk
81
+ | FinishReasonChunk
82
+ | RawStreamEventChunk
83
+ | RawMessageChunk
84
+ | UsageDeltaChunk
80
85
  )
81
86
 
82
87
  ChunkIterator: TypeAlias = Iterator[StreamResponseChunk]
@@ -165,6 +170,7 @@ class BaseStreamResponse(
165
170
  format: Format[FormattableT] | None = None,
166
171
  input_messages: Sequence[Message],
167
172
  chunk_iterator: ChunkIteratorT,
173
+ usage: Usage | None = None,
168
174
  ) -> None:
169
175
  """Initialize the BaseStreamResponse.
170
176
 
@@ -177,6 +183,7 @@ class BaseStreamResponse(
177
183
  toolkit: Toolkit containing all the tools used to generate the response.
178
184
  format: The `Format` for the expected structured output format (or None).
179
185
  input_messages: The input messages that were sent to the LLM
186
+ usage: Token usage statistics for the response.
180
187
 
181
188
  The BaseStreamResponse will process the tuples to build the chunks and raw lists
182
189
  as the stream is consumed.
@@ -187,6 +194,7 @@ class BaseStreamResponse(
187
194
  self.provider_model_name = provider_model_name
188
195
  self.params = params
189
196
  self.toolkit = toolkit
197
+ self.usage = usage
190
198
  self.format = format
191
199
 
192
200
  # Internal-only lists which we mutate (append) during chunk processing
@@ -475,6 +483,14 @@ class BaseSyncStreamResponse(BaseStreamResponse[ChunkIterator, ToolkitT, Formatt
475
483
  self._assistant_message.raw_message = chunk.raw_message
476
484
  elif chunk.type == "finish_reason_chunk":
477
485
  self.finish_reason = chunk.finish_reason
486
+ elif chunk.type == "usage_delta_chunk":
487
+ if self.usage is None:
488
+ self.usage = Usage()
489
+ self.usage.input_tokens += chunk.input_tokens
490
+ self.usage.output_tokens += chunk.output_tokens
491
+ self.usage.cache_read_tokens += chunk.cache_read_tokens
492
+ self.usage.cache_write_tokens += chunk.cache_write_tokens
493
+ self.usage.reasoning_tokens += chunk.reasoning_tokens
478
494
  else:
479
495
  yield self._handle_chunk(chunk)
480
496
 
@@ -648,6 +664,14 @@ class BaseAsyncStreamResponse(
648
664
  self._assistant_message.raw_message = chunk.raw_message
649
665
  elif chunk.type == "finish_reason_chunk":
650
666
  self.finish_reason = chunk.finish_reason
667
+ elif chunk.type == "usage_delta_chunk":
668
+ if self.usage is None:
669
+ self.usage = Usage()
670
+ self.usage.input_tokens += chunk.input_tokens
671
+ self.usage.output_tokens += chunk.output_tokens
672
+ self.usage.cache_read_tokens += chunk.cache_read_tokens
673
+ self.usage.cache_write_tokens += chunk.cache_write_tokens
674
+ self.usage.reasoning_tokens += chunk.reasoning_tokens
651
675
  else:
652
676
  yield self._handle_chunk(chunk)
653
677
 
@@ -15,6 +15,7 @@ class FinishReason(str, Enum):
15
15
 
16
16
  MAX_TOKENS = "max_tokens"
17
17
  REFUSAL = "refusal"
18
+ CONTEXT_LENGTH_EXCEEDED = "context_length_exceeded"
18
19
 
19
20
 
20
21
  @dataclass(kw_only=True)
@@ -21,6 +21,7 @@ from ..tools import (
21
21
  from ..types import Jsonable
22
22
  from .base_response import BaseResponse
23
23
  from .finish_reason import FinishReason
24
+ from .usage import Usage
24
25
 
25
26
  if TYPE_CHECKING:
26
27
  from ..providers import ModelId, Params, ProviderId
@@ -42,6 +43,7 @@ class Response(BaseResponse[Toolkit, FormattableT]):
42
43
  input_messages: Sequence[Message],
43
44
  assistant_message: AssistantMessage,
44
45
  finish_reason: FinishReason | None,
46
+ usage: Usage | None,
45
47
  ) -> None:
46
48
  """Initialize a `Response`."""
47
49
  toolkit = tools if isinstance(tools, Toolkit) else Toolkit(tools=tools)
@@ -56,6 +58,7 @@ class Response(BaseResponse[Toolkit, FormattableT]):
56
58
  input_messages=input_messages,
57
59
  assistant_message=assistant_message,
58
60
  finish_reason=finish_reason,
61
+ usage=usage,
59
62
  )
60
63
 
61
64
  def execute_tools(self) -> Sequence[ToolOutput[Jsonable]]:
@@ -113,6 +116,7 @@ class AsyncResponse(BaseResponse[AsyncToolkit, FormattableT]):
113
116
  input_messages: Sequence[Message],
114
117
  assistant_message: AssistantMessage,
115
118
  finish_reason: FinishReason | None,
119
+ usage: Usage | None,
116
120
  ) -> None:
117
121
  """Initialize an `AsyncResponse`."""
118
122
  toolkit = (
@@ -129,6 +133,7 @@ class AsyncResponse(BaseResponse[AsyncToolkit, FormattableT]):
129
133
  input_messages=input_messages,
130
134
  assistant_message=assistant_message,
131
135
  finish_reason=finish_reason,
136
+ usage=usage,
132
137
  )
133
138
 
134
139
  async def execute_tools(self) -> Sequence[ToolOutput[Jsonable]]:
@@ -195,6 +200,7 @@ class ContextResponse(
195
200
  input_messages: Sequence[Message],
196
201
  assistant_message: AssistantMessage,
197
202
  finish_reason: FinishReason | None,
203
+ usage: Usage | None,
198
204
  ) -> None:
199
205
  """Initialize a `ContextResponse`."""
200
206
  toolkit = (
@@ -211,6 +217,7 @@ class ContextResponse(
211
217
  input_messages=input_messages,
212
218
  assistant_message=assistant_message,
213
219
  finish_reason=finish_reason,
220
+ usage=usage,
214
221
  )
215
222
 
216
223
  def execute_tools(self, ctx: Context[DepsT]) -> Sequence[ToolOutput[Jsonable]]:
@@ -283,6 +290,7 @@ class AsyncContextResponse(
283
290
  input_messages: Sequence[Message],
284
291
  assistant_message: AssistantMessage,
285
292
  finish_reason: FinishReason | None,
293
+ usage: Usage | None,
286
294
  ) -> None:
287
295
  """Initialize an `AsyncContextResponse`."""
288
296
  toolkit = (
@@ -301,6 +309,7 @@ class AsyncContextResponse(
301
309
  input_messages=input_messages,
302
310
  assistant_message=assistant_message,
303
311
  finish_reason=finish_reason,
312
+ usage=usage,
304
313
  )
305
314
 
306
315
  async def execute_tools(
@@ -11,6 +11,7 @@ from ..messages import Message
11
11
  from ..tools import ToolkitT
12
12
  from . import _utils
13
13
  from .finish_reason import FinishReason
14
+ from .usage import Usage
14
15
 
15
16
  if TYPE_CHECKING:
16
17
  from ..models import Model
@@ -55,12 +56,15 @@ class RootResponse(Generic[ToolkitT, FormattableT], ABC):
55
56
  """
56
57
  finish_reason: FinishReason | None
57
58
  """The reason why the LLM finished generating a response, if set.
58
-
59
+
59
60
  `finish_reason` is only set if the response did not finish generating normally,
60
61
  e.g. `FinishReason.MAX_TOKENS` if the model ran out of tokens before completing.
61
62
  When the response generates normally, `response.finish_reason` will be `None`.
62
63
  """
63
64
 
65
+ usage: Usage | None
66
+ """Token usage statistics for this response, if available."""
67
+
64
68
  format: Format[FormattableT] | None
65
69
  """The `Format` describing the structured response format, if available."""
66
70
 
@@ -0,0 +1,95 @@
1
+ """Provider-agnostic usage statistics for LLM API calls."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Literal
7
+
8
+
9
+ @dataclass(kw_only=True)
10
+ class UsageDeltaChunk:
11
+ """A chunk containing incremental token usage information from a streaming response.
12
+
13
+ This represents a delta/increment in usage statistics as they arrive during streaming.
14
+ Multiple UsageDeltaChunks are accumulated to produce the final Usage object.
15
+ """
16
+
17
+ type: Literal["usage_delta_chunk"] = "usage_delta_chunk"
18
+
19
+ input_tokens: int = 0
20
+ """Delta in input tokens."""
21
+
22
+ output_tokens: int = 0
23
+ """Delta in output tokens."""
24
+
25
+ cache_read_tokens: int = 0
26
+ """Delta in cache read tokens."""
27
+
28
+ cache_write_tokens: int = 0
29
+ """Delta in cache write tokens."""
30
+
31
+ reasoning_tokens: int = 0
32
+ """Delta in reasoning/thinking tokens."""
33
+
34
+
35
+ @dataclass(kw_only=True)
36
+ class Usage:
37
+ """Token usage statistics from an LLM API call.
38
+
39
+ This abstraction captures common usage metrics across providers while preserving
40
+ access to the raw provider-specific usage data.
41
+ """
42
+
43
+ input_tokens: int = 0
44
+ """The number of input tokens used.
45
+
46
+ This includes ALL input tokens, including cache read and write tokens.
47
+
48
+ Will be 0 if not reported by the provider.
49
+ """
50
+
51
+ output_tokens: int = 0
52
+ """The number of output tokens used.
53
+
54
+ This includes ALL output tokens, including `reasoning_tokens` that may not be
55
+ in the user's visible output, or other "hidden" tokens.
56
+
57
+ Will be 0 if not reported by the provider.
58
+ """
59
+
60
+ cache_read_tokens: int = 0
61
+ """The number of tokens read from cache (prompt caching).
62
+
63
+ These are input tokens that were read from cache. Cache read tokens are generally
64
+ much less expensive than regular input tokens.
65
+
66
+ Will be 0 if not reported by the provider or if caching was not used.
67
+ """
68
+
69
+ cache_write_tokens: int = 0
70
+ """The number of tokens written to cache (cache creation).
71
+
72
+ These are input tokens that were written to cache, for future reuse and retrieval.
73
+ Cache write tokens are generally more expensive than uncached input tokens,
74
+ but may lead to cost savings down the line when they are re-read as cache_read_tokens.
75
+
76
+ Will be 0 if not reported by the provider or if caching was not used.
77
+ """
78
+
79
+ reasoning_tokens: int = 0
80
+ """The number of tokens used for reasoning/thinking.
81
+
82
+ Reasoning tokens are a subset of output_tokens that were generated as part of the model's
83
+ interior reasoning process. They are billed as output tokens, though they are generally
84
+ not shown to the user.
85
+
86
+ Will be 0 if not reported by the provider or if the model does not support reasoning.
87
+ """
88
+
89
+ raw: Any = None
90
+ """The raw usage object from the provider."""
91
+
92
+ @property
93
+ def total_tokens(self) -> int:
94
+ """The total number of tokens used (input + output)."""
95
+ return self.input_tokens + self.output_tokens
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mirascope
3
- Version: 2.0.0a3
3
+ Version: 2.0.0a4
4
4
  Summary: LLM abstractions that aren't obstructions
5
5
  Project-URL: Homepage, https://mirascope.com
6
6
  Project-URL: Documentation, https://mirascope.com/docs/mirascope/v2
@@ -51,7 +51,7 @@ Requires-Dist: httpx>=0.27.0
51
51
  Requires-Dist: pydantic>=2.0.0
52
52
  Requires-Dist: typing-extensions>=4.10.0
53
53
  Provides-Extra: all
54
- Requires-Dist: anthropic<1.0,>=0.72.0; extra == 'all'
54
+ Requires-Dist: anthropic<1.0,>=0.75.0; extra == 'all'
55
55
  Requires-Dist: google-genai<2,>=1.48.0; extra == 'all'
56
56
  Requires-Dist: libcst>=1.8.6; extra == 'all'
57
57
  Requires-Dist: mcp<2,>=1.0.0; extra == 'all'
@@ -70,7 +70,7 @@ Requires-Dist: pillow<11,>=10.4.0; extra == 'all'
70
70
  Requires-Dist: proto-plus>=1.24.0; extra == 'all'
71
71
  Requires-Dist: pydantic-settings>=2.12.0; extra == 'all'
72
72
  Provides-Extra: anthropic
73
- Requires-Dist: anthropic<1.0,>=0.72.0; extra == 'anthropic'
73
+ Requires-Dist: anthropic<1.0,>=0.75.0; extra == 'anthropic'
74
74
  Provides-Extra: api
75
75
  Requires-Dist: pydantic-settings>=2.12.0; extra == 'api'
76
76
  Provides-Extra: google