mirascope 2.0.0a3__py3-none-any.whl → 2.0.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mirascope/api/_generated/__init__.py +62 -6
- mirascope/api/_generated/client.py +8 -0
- mirascope/api/_generated/errors/__init__.py +11 -1
- mirascope/api/_generated/errors/conflict_error.py +15 -0
- mirascope/api/_generated/errors/forbidden_error.py +15 -0
- mirascope/api/_generated/errors/internal_server_error.py +15 -0
- mirascope/api/_generated/errors/not_found_error.py +15 -0
- mirascope/api/_generated/organizations/__init__.py +25 -0
- mirascope/api/_generated/organizations/client.py +380 -0
- mirascope/api/_generated/organizations/raw_client.py +876 -0
- mirascope/api/_generated/organizations/types/__init__.py +23 -0
- mirascope/api/_generated/organizations/types/organizations_create_response.py +24 -0
- mirascope/api/_generated/organizations/types/organizations_create_response_role.py +7 -0
- mirascope/api/_generated/organizations/types/organizations_get_response.py +24 -0
- mirascope/api/_generated/organizations/types/organizations_get_response_role.py +7 -0
- mirascope/api/_generated/organizations/types/organizations_list_response_item.py +24 -0
- mirascope/api/_generated/organizations/types/organizations_list_response_item_role.py +7 -0
- mirascope/api/_generated/organizations/types/organizations_update_response.py +24 -0
- mirascope/api/_generated/organizations/types/organizations_update_response_role.py +7 -0
- mirascope/api/_generated/projects/__init__.py +17 -0
- mirascope/api/_generated/projects/client.py +458 -0
- mirascope/api/_generated/projects/raw_client.py +1016 -0
- mirascope/api/_generated/projects/types/__init__.py +15 -0
- mirascope/api/_generated/projects/types/projects_create_response.py +30 -0
- mirascope/api/_generated/projects/types/projects_get_response.py +30 -0
- mirascope/api/_generated/projects/types/projects_list_response_item.py +30 -0
- mirascope/api/_generated/projects/types/projects_update_response.py +30 -0
- mirascope/api/_generated/reference.md +586 -0
- mirascope/api/_generated/types/__init__.py +20 -4
- mirascope/api/_generated/types/already_exists_error.py +24 -0
- mirascope/api/_generated/types/already_exists_error_tag.py +5 -0
- mirascope/api/_generated/types/database_error.py +24 -0
- mirascope/api/_generated/types/database_error_tag.py +5 -0
- mirascope/api/_generated/types/http_api_decode_error.py +1 -3
- mirascope/api/_generated/types/issue.py +1 -5
- mirascope/api/_generated/types/not_found_error_body.py +24 -0
- mirascope/api/_generated/types/not_found_error_tag.py +5 -0
- mirascope/api/_generated/types/permission_denied_error.py +24 -0
- mirascope/api/_generated/types/permission_denied_error_tag.py +7 -0
- mirascope/api/_generated/types/property_key.py +2 -2
- mirascope/api/_generated/types/{property_key_tag.py → property_key_key.py} +3 -5
- mirascope/api/_generated/types/{property_key_tag_tag.py → property_key_key_tag.py} +1 -1
- mirascope/llm/__init__.py +4 -0
- mirascope/llm/providers/__init__.py +6 -0
- mirascope/llm/providers/anthropic/__init__.py +6 -1
- mirascope/llm/providers/anthropic/_utils/__init__.py +15 -5
- mirascope/llm/providers/anthropic/_utils/beta_decode.py +271 -0
- mirascope/llm/providers/anthropic/_utils/beta_encode.py +216 -0
- mirascope/llm/providers/anthropic/_utils/decode.py +39 -7
- mirascope/llm/providers/anthropic/_utils/encode.py +156 -64
- mirascope/llm/providers/anthropic/beta_provider.py +322 -0
- mirascope/llm/providers/anthropic/model_id.py +10 -27
- mirascope/llm/providers/anthropic/model_info.py +87 -0
- mirascope/llm/providers/anthropic/provider.py +127 -145
- mirascope/llm/providers/base/_utils.py +15 -1
- mirascope/llm/providers/google/_utils/decode.py +55 -3
- mirascope/llm/providers/google/_utils/encode.py +14 -6
- mirascope/llm/providers/google/model_id.py +7 -13
- mirascope/llm/providers/google/model_info.py +62 -0
- mirascope/llm/providers/google/provider.py +8 -4
- mirascope/llm/providers/load_provider.py +8 -2
- mirascope/llm/providers/mlx/_utils.py +23 -1
- mirascope/llm/providers/mlx/encoding/transformers.py +17 -1
- mirascope/llm/providers/mlx/provider.py +4 -0
- mirascope/llm/providers/ollama/__init__.py +19 -0
- mirascope/llm/providers/ollama/provider.py +71 -0
- mirascope/llm/providers/openai/completions/__init__.py +6 -1
- mirascope/llm/providers/openai/completions/_utils/decode.py +57 -5
- mirascope/llm/providers/openai/completions/_utils/encode.py +9 -8
- mirascope/llm/providers/openai/completions/base_provider.py +513 -0
- mirascope/llm/providers/openai/completions/provider.py +13 -447
- mirascope/llm/providers/openai/model_info.py +57 -0
- mirascope/llm/providers/openai/provider.py +16 -4
- mirascope/llm/providers/openai/responses/_utils/decode.py +55 -4
- mirascope/llm/providers/openai/responses/_utils/encode.py +9 -9
- mirascope/llm/providers/openai/responses/provider.py +20 -21
- mirascope/llm/providers/provider_id.py +11 -1
- mirascope/llm/providers/provider_registry.py +3 -1
- mirascope/llm/providers/together/__init__.py +19 -0
- mirascope/llm/providers/together/provider.py +40 -0
- mirascope/llm/responses/__init__.py +3 -0
- mirascope/llm/responses/base_response.py +4 -0
- mirascope/llm/responses/base_stream_response.py +25 -1
- mirascope/llm/responses/finish_reason.py +1 -0
- mirascope/llm/responses/response.py +9 -0
- mirascope/llm/responses/root_response.py +5 -1
- mirascope/llm/responses/usage.py +95 -0
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/METADATA +3 -3
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/RECORD +91 -50
- mirascope/llm/providers/openai/shared/__init__.py +0 -7
- mirascope/llm/providers/openai/shared/_utils.py +0 -59
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/WHEEL +0 -0
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a4.dist-info}/licenses/LICENSE +0 -0
|
@@ -40,8 +40,11 @@ from .....messages import AssistantMessage, Message, UserMessage
|
|
|
40
40
|
from .....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
|
|
41
41
|
from ....base import Params, _utils as _base_utils
|
|
42
42
|
from ...model_id import OpenAIModelId, model_name
|
|
43
|
-
from ...model_info import
|
|
44
|
-
|
|
43
|
+
from ...model_info import (
|
|
44
|
+
MODELS_WITHOUT_JSON_OBJECT_SUPPORT,
|
|
45
|
+
MODELS_WITHOUT_JSON_SCHEMA_SUPPORT,
|
|
46
|
+
NON_REASONING_MODELS,
|
|
47
|
+
)
|
|
45
48
|
|
|
46
49
|
|
|
47
50
|
class ResponseCreateKwargs(TypedDict, total=False):
|
|
@@ -197,7 +200,7 @@ def _convert_tool_to_function_tool_param(tool: AnyToolSchema) -> FunctionToolPar
|
|
|
197
200
|
"""Convert a Mirascope ToolSchema to OpenAI Responses FunctionToolParam."""
|
|
198
201
|
schema_dict = tool.parameters.model_dump(by_alias=True, exclude_none=True)
|
|
199
202
|
schema_dict["type"] = "object"
|
|
200
|
-
|
|
203
|
+
_base_utils.ensure_additional_properties_false(schema_dict)
|
|
201
204
|
|
|
202
205
|
return FunctionToolParam(
|
|
203
206
|
type="function",
|
|
@@ -220,7 +223,7 @@ def _create_strict_response_format(
|
|
|
220
223
|
ResponseFormatTextJSONSchemaConfigParam for strict structured outputs
|
|
221
224
|
"""
|
|
222
225
|
schema = format.schema.copy()
|
|
223
|
-
|
|
226
|
+
_base_utils.ensure_additional_properties_false(schema)
|
|
224
227
|
|
|
225
228
|
response_format: ResponseFormatTextJSONSchemaConfigParam = {
|
|
226
229
|
"type": "json_schema",
|
|
@@ -294,9 +297,7 @@ def encode_request(
|
|
|
294
297
|
tools = tools.tools if isinstance(tools, BaseToolkit) else tools or []
|
|
295
298
|
openai_tools = [_convert_tool_to_function_tool_param(tool) for tool in tools]
|
|
296
299
|
|
|
297
|
-
model_supports_strict =
|
|
298
|
-
model_id not in _shared_utils.MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
|
|
299
|
-
)
|
|
300
|
+
model_supports_strict = model_id not in MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
|
|
300
301
|
default_mode = "strict" if model_supports_strict else "tool"
|
|
301
302
|
|
|
302
303
|
format = resolve_format(format, default_mode=default_mode)
|
|
@@ -323,8 +324,7 @@ def encode_request(
|
|
|
323
324
|
name=FORMAT_TOOL_NAME,
|
|
324
325
|
)
|
|
325
326
|
elif (
|
|
326
|
-
format.mode == "json"
|
|
327
|
-
and model_id not in _shared_utils.MODELS_WITHOUT_JSON_OBJECT_SUPPORT
|
|
327
|
+
format.mode == "json" and model_id not in MODELS_WITHOUT_JSON_OBJECT_SUPPORT
|
|
328
328
|
):
|
|
329
329
|
kwargs["text"] = {"format": ResponseFormatJSONObject(type="json_object")}
|
|
330
330
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""OpenAI Responses API client implementation."""
|
|
2
2
|
|
|
3
3
|
from collections.abc import Sequence
|
|
4
|
-
from typing import Literal
|
|
5
4
|
from typing_extensions import Unpack
|
|
6
5
|
|
|
7
6
|
from openai import AsyncOpenAI, OpenAI
|
|
@@ -45,14 +44,10 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
45
44
|
*,
|
|
46
45
|
api_key: str | None = None,
|
|
47
46
|
base_url: str | None = None,
|
|
48
|
-
wrapped_by_openai_provider: bool = False,
|
|
49
47
|
) -> None:
|
|
50
48
|
"""Initialize the OpenAI Responses client."""
|
|
51
49
|
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
|
52
50
|
self.async_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
53
|
-
self.active_provider_id: Literal["openai", "openai:responses"] = (
|
|
54
|
-
"openai" if wrapped_by_openai_provider else "openai:responses"
|
|
55
|
-
)
|
|
56
51
|
|
|
57
52
|
def _call(
|
|
58
53
|
self,
|
|
@@ -85,14 +80,14 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
85
80
|
|
|
86
81
|
openai_response = self.client.responses.create(**kwargs)
|
|
87
82
|
|
|
88
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
89
|
-
openai_response, model_id, self.
|
|
83
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
84
|
+
openai_response, model_id, self.id
|
|
90
85
|
)
|
|
91
86
|
provider_model_name = model_name(model_id, "responses")
|
|
92
87
|
|
|
93
88
|
return Response(
|
|
94
89
|
raw=openai_response,
|
|
95
|
-
provider_id=self.
|
|
90
|
+
provider_id=self.id,
|
|
96
91
|
model_id=model_id,
|
|
97
92
|
provider_model_name=provider_model_name,
|
|
98
93
|
params=params,
|
|
@@ -100,6 +95,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
100
95
|
input_messages=messages,
|
|
101
96
|
assistant_message=assistant_message,
|
|
102
97
|
finish_reason=finish_reason,
|
|
98
|
+
usage=usage,
|
|
103
99
|
format=format,
|
|
104
100
|
)
|
|
105
101
|
|
|
@@ -134,14 +130,14 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
134
130
|
|
|
135
131
|
openai_response = await self.async_client.responses.create(**kwargs)
|
|
136
132
|
|
|
137
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
138
|
-
openai_response, model_id, self.
|
|
133
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
134
|
+
openai_response, model_id, self.id
|
|
139
135
|
)
|
|
140
136
|
provider_model_name = model_name(model_id, "responses")
|
|
141
137
|
|
|
142
138
|
return AsyncResponse(
|
|
143
139
|
raw=openai_response,
|
|
144
|
-
provider_id=self.
|
|
140
|
+
provider_id=self.id,
|
|
145
141
|
model_id=model_id,
|
|
146
142
|
provider_model_name=provider_model_name,
|
|
147
143
|
params=params,
|
|
@@ -149,6 +145,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
149
145
|
input_messages=messages,
|
|
150
146
|
assistant_message=assistant_message,
|
|
151
147
|
finish_reason=finish_reason,
|
|
148
|
+
usage=usage,
|
|
152
149
|
format=format,
|
|
153
150
|
)
|
|
154
151
|
|
|
@@ -192,7 +189,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
192
189
|
provider_model_name = model_name(model_id, "responses")
|
|
193
190
|
|
|
194
191
|
return StreamResponse(
|
|
195
|
-
provider_id=self.
|
|
192
|
+
provider_id=self.id,
|
|
196
193
|
model_id=model_id,
|
|
197
194
|
provider_model_name=provider_model_name,
|
|
198
195
|
params=params,
|
|
@@ -242,7 +239,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
242
239
|
provider_model_name = model_name(model_id, "responses")
|
|
243
240
|
|
|
244
241
|
return AsyncStreamResponse(
|
|
245
|
-
provider_id=self.
|
|
242
|
+
provider_id=self.id,
|
|
246
243
|
model_id=model_id,
|
|
247
244
|
provider_model_name=provider_model_name,
|
|
248
245
|
params=params,
|
|
@@ -287,14 +284,14 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
287
284
|
|
|
288
285
|
openai_response = self.client.responses.create(**kwargs)
|
|
289
286
|
|
|
290
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
291
|
-
openai_response, model_id, self.
|
|
287
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
288
|
+
openai_response, model_id, self.id
|
|
292
289
|
)
|
|
293
290
|
provider_model_name = model_name(model_id, "responses")
|
|
294
291
|
|
|
295
292
|
return ContextResponse(
|
|
296
293
|
raw=openai_response,
|
|
297
|
-
provider_id=self.
|
|
294
|
+
provider_id=self.id,
|
|
298
295
|
model_id=model_id,
|
|
299
296
|
provider_model_name=provider_model_name,
|
|
300
297
|
params=params,
|
|
@@ -302,6 +299,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
302
299
|
input_messages=messages,
|
|
303
300
|
assistant_message=assistant_message,
|
|
304
301
|
finish_reason=finish_reason,
|
|
302
|
+
usage=usage,
|
|
305
303
|
format=format,
|
|
306
304
|
)
|
|
307
305
|
|
|
@@ -340,14 +338,14 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
340
338
|
|
|
341
339
|
openai_response = await self.async_client.responses.create(**kwargs)
|
|
342
340
|
|
|
343
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
344
|
-
openai_response, model_id, self.
|
|
341
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
342
|
+
openai_response, model_id, self.id
|
|
345
343
|
)
|
|
346
344
|
provider_model_name = model_name(model_id, "responses")
|
|
347
345
|
|
|
348
346
|
return AsyncContextResponse(
|
|
349
347
|
raw=openai_response,
|
|
350
|
-
provider_id=self.
|
|
348
|
+
provider_id=self.id,
|
|
351
349
|
model_id=model_id,
|
|
352
350
|
provider_model_name=provider_model_name,
|
|
353
351
|
params=params,
|
|
@@ -355,6 +353,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
355
353
|
input_messages=messages,
|
|
356
354
|
assistant_message=assistant_message,
|
|
357
355
|
finish_reason=finish_reason,
|
|
356
|
+
usage=usage,
|
|
358
357
|
format=format,
|
|
359
358
|
)
|
|
360
359
|
|
|
@@ -402,7 +401,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
402
401
|
provider_model_name = model_name(model_id, "responses")
|
|
403
402
|
|
|
404
403
|
return ContextStreamResponse(
|
|
405
|
-
provider_id=self.
|
|
404
|
+
provider_id=self.id,
|
|
406
405
|
model_id=model_id,
|
|
407
406
|
provider_model_name=provider_model_name,
|
|
408
407
|
params=params,
|
|
@@ -459,7 +458,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
459
458
|
provider_model_name = model_name(model_id, "responses")
|
|
460
459
|
|
|
461
460
|
return AsyncContextStreamResponse(
|
|
462
|
-
provider_id=self.
|
|
461
|
+
provider_id=self.id,
|
|
463
462
|
model_id=model_id,
|
|
464
463
|
provider_model_name=provider_model_name,
|
|
465
464
|
params=params,
|
|
@@ -4,10 +4,20 @@ from typing import Literal, TypeAlias, get_args
|
|
|
4
4
|
|
|
5
5
|
KnownProviderId: TypeAlias = Literal[
|
|
6
6
|
"anthropic", # Anthropic provider via AnthropicProvider
|
|
7
|
+
"anthropic-beta", # Anthropic beta provider via AnthropicBetaProvider
|
|
7
8
|
"google", # Google provider via GoogleProvider
|
|
8
|
-
"openai", # OpenAI provider via OpenAIProvider
|
|
9
9
|
"mlx", # Local inference powered by `mlx-lm`, via MLXProvider
|
|
10
|
+
"ollama", # Ollama provider via OllamaProvider
|
|
11
|
+
"openai", # OpenAI provider via OpenAIProvider (prefers Responses routing when available)
|
|
12
|
+
"together", # Together AI provider via TogetherProvider
|
|
10
13
|
]
|
|
11
14
|
KNOWN_PROVIDER_IDS = get_args(KnownProviderId)
|
|
12
15
|
|
|
13
16
|
ProviderId = KnownProviderId | str
|
|
17
|
+
|
|
18
|
+
OpenAICompletionsCompatibleProviderId: TypeAlias = Literal[
|
|
19
|
+
"ollama", # Ollama (OpenAI-compatible)
|
|
20
|
+
"openai", # OpenAI via OpenAIProvider (routes to completions)
|
|
21
|
+
"openai:completions", # OpenAI Completions API directly
|
|
22
|
+
"together", # Together AI (OpenAI-compatible)
|
|
23
|
+
]
|
|
@@ -16,8 +16,10 @@ PROVIDER_REGISTRY: dict[str, Provider] = {}
|
|
|
16
16
|
DEFAULT_AUTO_REGISTER_SCOPES: dict[str, ProviderId] = {
|
|
17
17
|
"anthropic/": "anthropic",
|
|
18
18
|
"google/": "google",
|
|
19
|
-
"openai/": "openai",
|
|
20
19
|
"mlx-community/": "mlx",
|
|
20
|
+
"ollama/": "ollama",
|
|
21
|
+
"openai/": "openai",
|
|
22
|
+
"together/": "together",
|
|
21
23
|
}
|
|
22
24
|
|
|
23
25
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Together AI provider implementation."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from .provider import TogetherProvider
|
|
7
|
+
else:
|
|
8
|
+
try:
|
|
9
|
+
from .provider import TogetherProvider
|
|
10
|
+
except ImportError: # pragma: no cover
|
|
11
|
+
from .._missing_import_stubs import (
|
|
12
|
+
create_provider_stub,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
TogetherProvider = create_provider_stub("openai", "TogetherProvider")
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"TogetherProvider",
|
|
19
|
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Together AI provider implementation."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from ..openai.completions.base_provider import BaseOpenAICompletionsProvider
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TogetherProvider(BaseOpenAICompletionsProvider):
|
|
9
|
+
"""Provider for Together AI's OpenAI-compatible API.
|
|
10
|
+
|
|
11
|
+
Inherits from BaseOpenAICompletionsProvider with Together-specific configuration:
|
|
12
|
+
- Uses Together AI's API endpoint
|
|
13
|
+
- Requires TOGETHER_API_KEY
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
Register the provider with model ID prefixes you want to use:
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
import llm
|
|
20
|
+
|
|
21
|
+
# Register for meta-llama models
|
|
22
|
+
llm.register_provider("together", "meta-llama/")
|
|
23
|
+
|
|
24
|
+
# Now you can use meta-llama models directly
|
|
25
|
+
@llm.call("meta-llama/Llama-3.3-70B-Instruct-Turbo")
|
|
26
|
+
def my_prompt():
|
|
27
|
+
return [llm.messages.user("Hello!")]
|
|
28
|
+
```
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
id: ClassVar[str] = "together"
|
|
32
|
+
default_scope: ClassVar[str | list[str]] = []
|
|
33
|
+
default_base_url: ClassVar[str | None] = "https://api.together.xyz/v1"
|
|
34
|
+
api_key_env_var: ClassVar[str] = "TOGETHER_API_KEY"
|
|
35
|
+
api_key_required: ClassVar[bool] = True
|
|
36
|
+
provider_name: ClassVar[str | None] = "Together"
|
|
37
|
+
|
|
38
|
+
def _model_name(self, model_id: str) -> str:
|
|
39
|
+
"""Return the model ID as-is for Together API."""
|
|
40
|
+
return model_id
|
|
@@ -27,6 +27,7 @@ from .streams import (
|
|
|
27
27
|
ThoughtStream,
|
|
28
28
|
ToolCallStream,
|
|
29
29
|
)
|
|
30
|
+
from .usage import Usage, UsageDeltaChunk
|
|
30
31
|
|
|
31
32
|
__all__ = [
|
|
32
33
|
"AsyncChunkIterator",
|
|
@@ -53,5 +54,7 @@ __all__ = [
|
|
|
53
54
|
"TextStream",
|
|
54
55
|
"ThoughtStream",
|
|
55
56
|
"ToolCallStream",
|
|
57
|
+
"Usage",
|
|
58
|
+
"UsageDeltaChunk",
|
|
56
59
|
"_utils",
|
|
57
60
|
]
|
|
@@ -9,6 +9,7 @@ from ..messages import AssistantMessage, Message
|
|
|
9
9
|
from ..tools import FORMAT_TOOL_NAME, ToolkitT
|
|
10
10
|
from .finish_reason import FinishReason
|
|
11
11
|
from .root_response import RootResponse
|
|
12
|
+
from .usage import Usage
|
|
12
13
|
|
|
13
14
|
if TYPE_CHECKING:
|
|
14
15
|
from ..providers import ModelId, Params, ProviderId
|
|
@@ -30,6 +31,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
|
|
|
30
31
|
input_messages: Sequence[Message],
|
|
31
32
|
assistant_message: AssistantMessage,
|
|
32
33
|
finish_reason: FinishReason | None,
|
|
34
|
+
usage: Usage | None,
|
|
33
35
|
) -> None:
|
|
34
36
|
"""Initialize a Response.
|
|
35
37
|
|
|
@@ -45,6 +47,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
|
|
|
45
47
|
input_messages: The message history before the final assistant message.
|
|
46
48
|
assistant_message: The final assistant message containing the response content.
|
|
47
49
|
finish_reason: The reason why the LLM finished generating a response.
|
|
50
|
+
usage: Token usage statistics for the response.
|
|
48
51
|
"""
|
|
49
52
|
self.raw = raw
|
|
50
53
|
self.provider_id = provider_id
|
|
@@ -53,6 +56,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
|
|
|
53
56
|
self.params = params
|
|
54
57
|
self.toolkit = toolkit
|
|
55
58
|
self.finish_reason = finish_reason
|
|
59
|
+
self.usage = usage
|
|
56
60
|
self.format = format
|
|
57
61
|
|
|
58
62
|
# Process content in the assistant message, organizing it by type and
|
|
@@ -36,6 +36,7 @@ from .streams import (
|
|
|
36
36
|
ThoughtStream,
|
|
37
37
|
ToolCallStream,
|
|
38
38
|
)
|
|
39
|
+
from .usage import Usage, UsageDeltaChunk
|
|
39
40
|
|
|
40
41
|
if TYPE_CHECKING:
|
|
41
42
|
from ..providers import ModelId, Params, ProviderId
|
|
@@ -76,7 +77,11 @@ class RawMessageChunk:
|
|
|
76
77
|
|
|
77
78
|
|
|
78
79
|
StreamResponseChunk: TypeAlias = (
|
|
79
|
-
AssistantContentChunk
|
|
80
|
+
AssistantContentChunk
|
|
81
|
+
| FinishReasonChunk
|
|
82
|
+
| RawStreamEventChunk
|
|
83
|
+
| RawMessageChunk
|
|
84
|
+
| UsageDeltaChunk
|
|
80
85
|
)
|
|
81
86
|
|
|
82
87
|
ChunkIterator: TypeAlias = Iterator[StreamResponseChunk]
|
|
@@ -165,6 +170,7 @@ class BaseStreamResponse(
|
|
|
165
170
|
format: Format[FormattableT] | None = None,
|
|
166
171
|
input_messages: Sequence[Message],
|
|
167
172
|
chunk_iterator: ChunkIteratorT,
|
|
173
|
+
usage: Usage | None = None,
|
|
168
174
|
) -> None:
|
|
169
175
|
"""Initialize the BaseStreamResponse.
|
|
170
176
|
|
|
@@ -177,6 +183,7 @@ class BaseStreamResponse(
|
|
|
177
183
|
toolkit: Toolkit containing all the tools used to generate the response.
|
|
178
184
|
format: The `Format` for the expected structured output format (or None).
|
|
179
185
|
input_messages: The input messages that were sent to the LLM
|
|
186
|
+
usage: Token usage statistics for the response.
|
|
180
187
|
|
|
181
188
|
The BaseStreamResponse will process the tuples to build the chunks and raw lists
|
|
182
189
|
as the stream is consumed.
|
|
@@ -187,6 +194,7 @@ class BaseStreamResponse(
|
|
|
187
194
|
self.provider_model_name = provider_model_name
|
|
188
195
|
self.params = params
|
|
189
196
|
self.toolkit = toolkit
|
|
197
|
+
self.usage = usage
|
|
190
198
|
self.format = format
|
|
191
199
|
|
|
192
200
|
# Internal-only lists which we mutate (append) during chunk processing
|
|
@@ -475,6 +483,14 @@ class BaseSyncStreamResponse(BaseStreamResponse[ChunkIterator, ToolkitT, Formatt
|
|
|
475
483
|
self._assistant_message.raw_message = chunk.raw_message
|
|
476
484
|
elif chunk.type == "finish_reason_chunk":
|
|
477
485
|
self.finish_reason = chunk.finish_reason
|
|
486
|
+
elif chunk.type == "usage_delta_chunk":
|
|
487
|
+
if self.usage is None:
|
|
488
|
+
self.usage = Usage()
|
|
489
|
+
self.usage.input_tokens += chunk.input_tokens
|
|
490
|
+
self.usage.output_tokens += chunk.output_tokens
|
|
491
|
+
self.usage.cache_read_tokens += chunk.cache_read_tokens
|
|
492
|
+
self.usage.cache_write_tokens += chunk.cache_write_tokens
|
|
493
|
+
self.usage.reasoning_tokens += chunk.reasoning_tokens
|
|
478
494
|
else:
|
|
479
495
|
yield self._handle_chunk(chunk)
|
|
480
496
|
|
|
@@ -648,6 +664,14 @@ class BaseAsyncStreamResponse(
|
|
|
648
664
|
self._assistant_message.raw_message = chunk.raw_message
|
|
649
665
|
elif chunk.type == "finish_reason_chunk":
|
|
650
666
|
self.finish_reason = chunk.finish_reason
|
|
667
|
+
elif chunk.type == "usage_delta_chunk":
|
|
668
|
+
if self.usage is None:
|
|
669
|
+
self.usage = Usage()
|
|
670
|
+
self.usage.input_tokens += chunk.input_tokens
|
|
671
|
+
self.usage.output_tokens += chunk.output_tokens
|
|
672
|
+
self.usage.cache_read_tokens += chunk.cache_read_tokens
|
|
673
|
+
self.usage.cache_write_tokens += chunk.cache_write_tokens
|
|
674
|
+
self.usage.reasoning_tokens += chunk.reasoning_tokens
|
|
651
675
|
else:
|
|
652
676
|
yield self._handle_chunk(chunk)
|
|
653
677
|
|
|
@@ -21,6 +21,7 @@ from ..tools import (
|
|
|
21
21
|
from ..types import Jsonable
|
|
22
22
|
from .base_response import BaseResponse
|
|
23
23
|
from .finish_reason import FinishReason
|
|
24
|
+
from .usage import Usage
|
|
24
25
|
|
|
25
26
|
if TYPE_CHECKING:
|
|
26
27
|
from ..providers import ModelId, Params, ProviderId
|
|
@@ -42,6 +43,7 @@ class Response(BaseResponse[Toolkit, FormattableT]):
|
|
|
42
43
|
input_messages: Sequence[Message],
|
|
43
44
|
assistant_message: AssistantMessage,
|
|
44
45
|
finish_reason: FinishReason | None,
|
|
46
|
+
usage: Usage | None,
|
|
45
47
|
) -> None:
|
|
46
48
|
"""Initialize a `Response`."""
|
|
47
49
|
toolkit = tools if isinstance(tools, Toolkit) else Toolkit(tools=tools)
|
|
@@ -56,6 +58,7 @@ class Response(BaseResponse[Toolkit, FormattableT]):
|
|
|
56
58
|
input_messages=input_messages,
|
|
57
59
|
assistant_message=assistant_message,
|
|
58
60
|
finish_reason=finish_reason,
|
|
61
|
+
usage=usage,
|
|
59
62
|
)
|
|
60
63
|
|
|
61
64
|
def execute_tools(self) -> Sequence[ToolOutput[Jsonable]]:
|
|
@@ -113,6 +116,7 @@ class AsyncResponse(BaseResponse[AsyncToolkit, FormattableT]):
|
|
|
113
116
|
input_messages: Sequence[Message],
|
|
114
117
|
assistant_message: AssistantMessage,
|
|
115
118
|
finish_reason: FinishReason | None,
|
|
119
|
+
usage: Usage | None,
|
|
116
120
|
) -> None:
|
|
117
121
|
"""Initialize an `AsyncResponse`."""
|
|
118
122
|
toolkit = (
|
|
@@ -129,6 +133,7 @@ class AsyncResponse(BaseResponse[AsyncToolkit, FormattableT]):
|
|
|
129
133
|
input_messages=input_messages,
|
|
130
134
|
assistant_message=assistant_message,
|
|
131
135
|
finish_reason=finish_reason,
|
|
136
|
+
usage=usage,
|
|
132
137
|
)
|
|
133
138
|
|
|
134
139
|
async def execute_tools(self) -> Sequence[ToolOutput[Jsonable]]:
|
|
@@ -195,6 +200,7 @@ class ContextResponse(
|
|
|
195
200
|
input_messages: Sequence[Message],
|
|
196
201
|
assistant_message: AssistantMessage,
|
|
197
202
|
finish_reason: FinishReason | None,
|
|
203
|
+
usage: Usage | None,
|
|
198
204
|
) -> None:
|
|
199
205
|
"""Initialize a `ContextResponse`."""
|
|
200
206
|
toolkit = (
|
|
@@ -211,6 +217,7 @@ class ContextResponse(
|
|
|
211
217
|
input_messages=input_messages,
|
|
212
218
|
assistant_message=assistant_message,
|
|
213
219
|
finish_reason=finish_reason,
|
|
220
|
+
usage=usage,
|
|
214
221
|
)
|
|
215
222
|
|
|
216
223
|
def execute_tools(self, ctx: Context[DepsT]) -> Sequence[ToolOutput[Jsonable]]:
|
|
@@ -283,6 +290,7 @@ class AsyncContextResponse(
|
|
|
283
290
|
input_messages: Sequence[Message],
|
|
284
291
|
assistant_message: AssistantMessage,
|
|
285
292
|
finish_reason: FinishReason | None,
|
|
293
|
+
usage: Usage | None,
|
|
286
294
|
) -> None:
|
|
287
295
|
"""Initialize an `AsyncContextResponse`."""
|
|
288
296
|
toolkit = (
|
|
@@ -301,6 +309,7 @@ class AsyncContextResponse(
|
|
|
301
309
|
input_messages=input_messages,
|
|
302
310
|
assistant_message=assistant_message,
|
|
303
311
|
finish_reason=finish_reason,
|
|
312
|
+
usage=usage,
|
|
304
313
|
)
|
|
305
314
|
|
|
306
315
|
async def execute_tools(
|
|
@@ -11,6 +11,7 @@ from ..messages import Message
|
|
|
11
11
|
from ..tools import ToolkitT
|
|
12
12
|
from . import _utils
|
|
13
13
|
from .finish_reason import FinishReason
|
|
14
|
+
from .usage import Usage
|
|
14
15
|
|
|
15
16
|
if TYPE_CHECKING:
|
|
16
17
|
from ..models import Model
|
|
@@ -55,12 +56,15 @@ class RootResponse(Generic[ToolkitT, FormattableT], ABC):
|
|
|
55
56
|
"""
|
|
56
57
|
finish_reason: FinishReason | None
|
|
57
58
|
"""The reason why the LLM finished generating a response, if set.
|
|
58
|
-
|
|
59
|
+
|
|
59
60
|
`finish_reason` is only set if the response did not finish generating normally,
|
|
60
61
|
e.g. `FinishReason.MAX_TOKENS` if the model ran out of tokens before completing.
|
|
61
62
|
When the response generates normally, `response.finish_reason` will be `None`.
|
|
62
63
|
"""
|
|
63
64
|
|
|
65
|
+
usage: Usage | None
|
|
66
|
+
"""Token usage statistics for this response, if available."""
|
|
67
|
+
|
|
64
68
|
format: Format[FormattableT] | None
|
|
65
69
|
"""The `Format` describing the structured response format, if available."""
|
|
66
70
|
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Provider-agnostic usage statistics for LLM API calls."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Literal
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(kw_only=True)
|
|
10
|
+
class UsageDeltaChunk:
|
|
11
|
+
"""A chunk containing incremental token usage information from a streaming response.
|
|
12
|
+
|
|
13
|
+
This represents a delta/increment in usage statistics as they arrive during streaming.
|
|
14
|
+
Multiple UsageDeltaChunks are accumulated to produce the final Usage object.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
type: Literal["usage_delta_chunk"] = "usage_delta_chunk"
|
|
18
|
+
|
|
19
|
+
input_tokens: int = 0
|
|
20
|
+
"""Delta in input tokens."""
|
|
21
|
+
|
|
22
|
+
output_tokens: int = 0
|
|
23
|
+
"""Delta in output tokens."""
|
|
24
|
+
|
|
25
|
+
cache_read_tokens: int = 0
|
|
26
|
+
"""Delta in cache read tokens."""
|
|
27
|
+
|
|
28
|
+
cache_write_tokens: int = 0
|
|
29
|
+
"""Delta in cache write tokens."""
|
|
30
|
+
|
|
31
|
+
reasoning_tokens: int = 0
|
|
32
|
+
"""Delta in reasoning/thinking tokens."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(kw_only=True)
|
|
36
|
+
class Usage:
|
|
37
|
+
"""Token usage statistics from an LLM API call.
|
|
38
|
+
|
|
39
|
+
This abstraction captures common usage metrics across providers while preserving
|
|
40
|
+
access to the raw provider-specific usage data.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
input_tokens: int = 0
|
|
44
|
+
"""The number of input tokens used.
|
|
45
|
+
|
|
46
|
+
This includes ALL input tokens, including cache read and write tokens.
|
|
47
|
+
|
|
48
|
+
Will be 0 if not reported by the provider.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
output_tokens: int = 0
|
|
52
|
+
"""The number of output tokens used.
|
|
53
|
+
|
|
54
|
+
This includes ALL output tokens, including `reasoning_tokens` that may not be
|
|
55
|
+
in the user's visible output, or other "hidden" tokens.
|
|
56
|
+
|
|
57
|
+
Will be 0 if not reported by the provider.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
cache_read_tokens: int = 0
|
|
61
|
+
"""The number of tokens read from cache (prompt caching).
|
|
62
|
+
|
|
63
|
+
These are input tokens that were read from cache. Cache read tokens are generally
|
|
64
|
+
much less expensive than regular input tokens.
|
|
65
|
+
|
|
66
|
+
Will be 0 if not reported by the provider or if caching was not used.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
cache_write_tokens: int = 0
|
|
70
|
+
"""The number of tokens written to cache (cache creation).
|
|
71
|
+
|
|
72
|
+
These are input tokens that were written to cache, for future reuse and retrieval.
|
|
73
|
+
Cache write tokens are generally more expensive than uncached input tokens,
|
|
74
|
+
but may lead to cost savings down the line when they are re-read as cache_read_tokens.
|
|
75
|
+
|
|
76
|
+
Will be 0 if not reported by the provider or if caching was not used.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
reasoning_tokens: int = 0
|
|
80
|
+
"""The number of tokens used for reasoning/thinking.
|
|
81
|
+
|
|
82
|
+
Reasoning tokens are a subset of output_tokens that were generated as part of the model's
|
|
83
|
+
interior reasoning process. They are billed as output tokens, though they are generally
|
|
84
|
+
not shown to the user.
|
|
85
|
+
|
|
86
|
+
Will be 0 if not reported by the provider or if the model does not support reasoning.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
raw: Any = None
|
|
90
|
+
"""The raw usage object from the provider."""
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def total_tokens(self) -> int:
|
|
94
|
+
"""The total number of tokens used (input + output)."""
|
|
95
|
+
return self.input_tokens + self.output_tokens
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mirascope
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.0a4
|
|
4
4
|
Summary: LLM abstractions that aren't obstructions
|
|
5
5
|
Project-URL: Homepage, https://mirascope.com
|
|
6
6
|
Project-URL: Documentation, https://mirascope.com/docs/mirascope/v2
|
|
@@ -51,7 +51,7 @@ Requires-Dist: httpx>=0.27.0
|
|
|
51
51
|
Requires-Dist: pydantic>=2.0.0
|
|
52
52
|
Requires-Dist: typing-extensions>=4.10.0
|
|
53
53
|
Provides-Extra: all
|
|
54
|
-
Requires-Dist: anthropic<1.0,>=0.
|
|
54
|
+
Requires-Dist: anthropic<1.0,>=0.75.0; extra == 'all'
|
|
55
55
|
Requires-Dist: google-genai<2,>=1.48.0; extra == 'all'
|
|
56
56
|
Requires-Dist: libcst>=1.8.6; extra == 'all'
|
|
57
57
|
Requires-Dist: mcp<2,>=1.0.0; extra == 'all'
|
|
@@ -70,7 +70,7 @@ Requires-Dist: pillow<11,>=10.4.0; extra == 'all'
|
|
|
70
70
|
Requires-Dist: proto-plus>=1.24.0; extra == 'all'
|
|
71
71
|
Requires-Dist: pydantic-settings>=2.12.0; extra == 'all'
|
|
72
72
|
Provides-Extra: anthropic
|
|
73
|
-
Requires-Dist: anthropic<1.0,>=0.
|
|
73
|
+
Requires-Dist: anthropic<1.0,>=0.75.0; extra == 'anthropic'
|
|
74
74
|
Provides-Extra: api
|
|
75
75
|
Requires-Dist: pydantic-settings>=2.12.0; extra == 'api'
|
|
76
76
|
Provides-Extra: google
|