mirascope 2.0.0a3__py3-none-any.whl → 2.0.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mirascope/api/_generated/__init__.py +78 -6
- mirascope/api/_generated/api_keys/__init__.py +7 -0
- mirascope/api/_generated/api_keys/client.py +453 -0
- mirascope/api/_generated/api_keys/raw_client.py +853 -0
- mirascope/api/_generated/api_keys/types/__init__.py +9 -0
- mirascope/api/_generated/api_keys/types/api_keys_create_response.py +36 -0
- mirascope/api/_generated/api_keys/types/api_keys_get_response.py +35 -0
- mirascope/api/_generated/api_keys/types/api_keys_list_response_item.py +35 -0
- mirascope/api/_generated/client.py +14 -0
- mirascope/api/_generated/environments/__init__.py +17 -0
- mirascope/api/_generated/environments/client.py +532 -0
- mirascope/api/_generated/environments/raw_client.py +1088 -0
- mirascope/api/_generated/environments/types/__init__.py +15 -0
- mirascope/api/_generated/environments/types/environments_create_response.py +26 -0
- mirascope/api/_generated/environments/types/environments_get_response.py +26 -0
- mirascope/api/_generated/environments/types/environments_list_response_item.py +26 -0
- mirascope/api/_generated/environments/types/environments_update_response.py +26 -0
- mirascope/api/_generated/errors/__init__.py +11 -1
- mirascope/api/_generated/errors/conflict_error.py +15 -0
- mirascope/api/_generated/errors/forbidden_error.py +15 -0
- mirascope/api/_generated/errors/internal_server_error.py +15 -0
- mirascope/api/_generated/errors/not_found_error.py +15 -0
- mirascope/api/_generated/organizations/__init__.py +25 -0
- mirascope/api/_generated/organizations/client.py +404 -0
- mirascope/api/_generated/organizations/raw_client.py +902 -0
- mirascope/api/_generated/organizations/types/__init__.py +23 -0
- mirascope/api/_generated/organizations/types/organizations_create_response.py +25 -0
- mirascope/api/_generated/organizations/types/organizations_create_response_role.py +7 -0
- mirascope/api/_generated/organizations/types/organizations_get_response.py +25 -0
- mirascope/api/_generated/organizations/types/organizations_get_response_role.py +7 -0
- mirascope/api/_generated/organizations/types/organizations_list_response_item.py +25 -0
- mirascope/api/_generated/organizations/types/organizations_list_response_item_role.py +7 -0
- mirascope/api/_generated/organizations/types/organizations_update_response.py +25 -0
- mirascope/api/_generated/organizations/types/organizations_update_response_role.py +7 -0
- mirascope/api/_generated/projects/__init__.py +17 -0
- mirascope/api/_generated/projects/client.py +482 -0
- mirascope/api/_generated/projects/raw_client.py +1058 -0
- mirascope/api/_generated/projects/types/__init__.py +15 -0
- mirascope/api/_generated/projects/types/projects_create_response.py +31 -0
- mirascope/api/_generated/projects/types/projects_get_response.py +31 -0
- mirascope/api/_generated/projects/types/projects_list_response_item.py +31 -0
- mirascope/api/_generated/projects/types/projects_update_response.py +31 -0
- mirascope/api/_generated/reference.md +1311 -0
- mirascope/api/_generated/types/__init__.py +20 -4
- mirascope/api/_generated/types/already_exists_error.py +24 -0
- mirascope/api/_generated/types/already_exists_error_tag.py +5 -0
- mirascope/api/_generated/types/database_error.py +24 -0
- mirascope/api/_generated/types/database_error_tag.py +5 -0
- mirascope/api/_generated/types/http_api_decode_error.py +1 -3
- mirascope/api/_generated/types/issue.py +1 -5
- mirascope/api/_generated/types/not_found_error_body.py +24 -0
- mirascope/api/_generated/types/not_found_error_tag.py +5 -0
- mirascope/api/_generated/types/permission_denied_error.py +24 -0
- mirascope/api/_generated/types/permission_denied_error_tag.py +7 -0
- mirascope/api/_generated/types/property_key.py +2 -2
- mirascope/api/_generated/types/{property_key_tag.py → property_key_key.py} +3 -5
- mirascope/api/_generated/types/{property_key_tag_tag.py → property_key_key_tag.py} +1 -1
- mirascope/llm/__init__.py +6 -2
- mirascope/llm/exceptions.py +28 -0
- mirascope/llm/providers/__init__.py +12 -4
- mirascope/llm/providers/anthropic/__init__.py +6 -1
- mirascope/llm/providers/anthropic/_utils/__init__.py +17 -5
- mirascope/llm/providers/anthropic/_utils/beta_decode.py +271 -0
- mirascope/llm/providers/anthropic/_utils/beta_encode.py +216 -0
- mirascope/llm/providers/anthropic/_utils/decode.py +39 -7
- mirascope/llm/providers/anthropic/_utils/encode.py +156 -64
- mirascope/llm/providers/anthropic/_utils/errors.py +46 -0
- mirascope/llm/providers/anthropic/beta_provider.py +328 -0
- mirascope/llm/providers/anthropic/model_id.py +10 -27
- mirascope/llm/providers/anthropic/model_info.py +87 -0
- mirascope/llm/providers/anthropic/provider.py +132 -145
- mirascope/llm/providers/base/__init__.py +2 -1
- mirascope/llm/providers/base/_utils.py +15 -1
- mirascope/llm/providers/base/base_provider.py +173 -58
- mirascope/llm/providers/google/_utils/__init__.py +2 -0
- mirascope/llm/providers/google/_utils/decode.py +55 -3
- mirascope/llm/providers/google/_utils/encode.py +14 -6
- mirascope/llm/providers/google/_utils/errors.py +49 -0
- mirascope/llm/providers/google/model_id.py +7 -13
- mirascope/llm/providers/google/model_info.py +62 -0
- mirascope/llm/providers/google/provider.py +13 -8
- mirascope/llm/providers/mlx/_utils.py +31 -2
- mirascope/llm/providers/mlx/encoding/transformers.py +17 -1
- mirascope/llm/providers/mlx/provider.py +12 -0
- mirascope/llm/providers/ollama/__init__.py +19 -0
- mirascope/llm/providers/ollama/provider.py +71 -0
- mirascope/llm/providers/openai/__init__.py +10 -1
- mirascope/llm/providers/openai/_utils/__init__.py +5 -0
- mirascope/llm/providers/openai/_utils/errors.py +46 -0
- mirascope/llm/providers/openai/completions/__init__.py +6 -1
- mirascope/llm/providers/openai/completions/_utils/decode.py +57 -5
- mirascope/llm/providers/openai/completions/_utils/encode.py +9 -8
- mirascope/llm/providers/openai/completions/base_provider.py +513 -0
- mirascope/llm/providers/openai/completions/provider.py +13 -447
- mirascope/llm/providers/openai/model_info.py +57 -0
- mirascope/llm/providers/openai/provider.py +30 -5
- mirascope/llm/providers/openai/responses/_utils/decode.py +55 -4
- mirascope/llm/providers/openai/responses/_utils/encode.py +9 -9
- mirascope/llm/providers/openai/responses/provider.py +33 -28
- mirascope/llm/providers/provider_id.py +11 -1
- mirascope/llm/providers/provider_registry.py +59 -4
- mirascope/llm/providers/together/__init__.py +19 -0
- mirascope/llm/providers/together/provider.py +40 -0
- mirascope/llm/responses/__init__.py +3 -0
- mirascope/llm/responses/base_response.py +4 -0
- mirascope/llm/responses/base_stream_response.py +25 -1
- mirascope/llm/responses/finish_reason.py +1 -0
- mirascope/llm/responses/response.py +9 -0
- mirascope/llm/responses/root_response.py +5 -1
- mirascope/llm/responses/usage.py +95 -0
- mirascope/ops/_internal/closure.py +62 -11
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a5.dist-info}/METADATA +3 -3
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a5.dist-info}/RECORD +115 -56
- mirascope/llm/providers/load_provider.py +0 -48
- mirascope/llm/providers/openai/shared/__init__.py +0 -7
- mirascope/llm/providers/openai/shared/_utils.py +0 -59
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a5.dist-info}/WHEEL +0 -0
- {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a5.dist-info}/licenses/LICENSE +0 -0
|
@@ -29,6 +29,8 @@ from .....responses import (
|
|
|
29
29
|
FinishReasonChunk,
|
|
30
30
|
RawMessageChunk,
|
|
31
31
|
RawStreamEventChunk,
|
|
32
|
+
Usage,
|
|
33
|
+
UsageDeltaChunk,
|
|
32
34
|
)
|
|
33
35
|
from ...model_id import OpenAIModelId, model_name
|
|
34
36
|
|
|
@@ -38,6 +40,33 @@ INCOMPLETE_DETAILS_TO_FINISH_REASON = {
|
|
|
38
40
|
}
|
|
39
41
|
|
|
40
42
|
|
|
43
|
+
def _decode_usage(
|
|
44
|
+
usage: openai_types.ResponseUsage | None,
|
|
45
|
+
) -> Usage | None:
|
|
46
|
+
"""Convert OpenAI ResponseUsage to Mirascope Usage."""
|
|
47
|
+
if usage is None: # pragma: no cover
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
return Usage(
|
|
51
|
+
input_tokens=usage.input_tokens,
|
|
52
|
+
output_tokens=usage.output_tokens,
|
|
53
|
+
cache_read_tokens=(
|
|
54
|
+
usage.input_tokens_details.cached_tokens
|
|
55
|
+
if usage.input_tokens_details
|
|
56
|
+
else None
|
|
57
|
+
)
|
|
58
|
+
or 0,
|
|
59
|
+
cache_write_tokens=0,
|
|
60
|
+
reasoning_tokens=(
|
|
61
|
+
usage.output_tokens_details.reasoning_tokens
|
|
62
|
+
if usage.output_tokens_details
|
|
63
|
+
else None
|
|
64
|
+
)
|
|
65
|
+
or 0,
|
|
66
|
+
raw=usage,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
41
70
|
def _serialize_output_item(
|
|
42
71
|
item: openai_types.ResponseOutputItem,
|
|
43
72
|
) -> dict[str, Any]:
|
|
@@ -48,9 +77,9 @@ def _serialize_output_item(
|
|
|
48
77
|
def decode_response(
|
|
49
78
|
response: openai_types.Response,
|
|
50
79
|
model_id: OpenAIModelId,
|
|
51
|
-
provider_id:
|
|
52
|
-
) -> tuple[AssistantMessage, FinishReason | None]:
|
|
53
|
-
"""Convert OpenAI Responses Response to mirascope AssistantMessage."""
|
|
80
|
+
provider_id: str,
|
|
81
|
+
) -> tuple[AssistantMessage, FinishReason | None, Usage | None]:
|
|
82
|
+
"""Convert OpenAI Responses Response to mirascope AssistantMessage and usage."""
|
|
54
83
|
parts: list[AssistantContentPart] = []
|
|
55
84
|
finish_reason: FinishReason | None = None
|
|
56
85
|
refused = False
|
|
@@ -100,7 +129,8 @@ def decode_response(
|
|
|
100
129
|
],
|
|
101
130
|
)
|
|
102
131
|
|
|
103
|
-
|
|
132
|
+
usage = _decode_usage(response.usage)
|
|
133
|
+
return assistant_message, finish_reason, usage
|
|
104
134
|
|
|
105
135
|
|
|
106
136
|
class _OpenAIResponsesChunkProcessor:
|
|
@@ -176,6 +206,27 @@ class _OpenAIResponsesChunkProcessor:
|
|
|
176
206
|
if self.refusal_encountered:
|
|
177
207
|
yield FinishReasonChunk(finish_reason=FinishReason.REFUSAL)
|
|
178
208
|
|
|
209
|
+
# Emit usage delta if present
|
|
210
|
+
if event.response.usage:
|
|
211
|
+
usage = event.response.usage
|
|
212
|
+
yield UsageDeltaChunk(
|
|
213
|
+
input_tokens=usage.input_tokens,
|
|
214
|
+
output_tokens=usage.output_tokens,
|
|
215
|
+
cache_read_tokens=(
|
|
216
|
+
usage.input_tokens_details.cached_tokens
|
|
217
|
+
if usage.input_tokens_details
|
|
218
|
+
else None
|
|
219
|
+
)
|
|
220
|
+
or 0,
|
|
221
|
+
cache_write_tokens=0,
|
|
222
|
+
reasoning_tokens=(
|
|
223
|
+
usage.output_tokens_details.reasoning_tokens
|
|
224
|
+
if usage.output_tokens_details
|
|
225
|
+
else None
|
|
226
|
+
)
|
|
227
|
+
or 0,
|
|
228
|
+
)
|
|
229
|
+
|
|
179
230
|
|
|
180
231
|
def decode_stream(
|
|
181
232
|
openai_stream: Stream[ResponseStreamEvent],
|
|
@@ -40,8 +40,11 @@ from .....messages import AssistantMessage, Message, UserMessage
|
|
|
40
40
|
from .....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
|
|
41
41
|
from ....base import Params, _utils as _base_utils
|
|
42
42
|
from ...model_id import OpenAIModelId, model_name
|
|
43
|
-
from ...model_info import
|
|
44
|
-
|
|
43
|
+
from ...model_info import (
|
|
44
|
+
MODELS_WITHOUT_JSON_OBJECT_SUPPORT,
|
|
45
|
+
MODELS_WITHOUT_JSON_SCHEMA_SUPPORT,
|
|
46
|
+
NON_REASONING_MODELS,
|
|
47
|
+
)
|
|
45
48
|
|
|
46
49
|
|
|
47
50
|
class ResponseCreateKwargs(TypedDict, total=False):
|
|
@@ -197,7 +200,7 @@ def _convert_tool_to_function_tool_param(tool: AnyToolSchema) -> FunctionToolPar
|
|
|
197
200
|
"""Convert a Mirascope ToolSchema to OpenAI Responses FunctionToolParam."""
|
|
198
201
|
schema_dict = tool.parameters.model_dump(by_alias=True, exclude_none=True)
|
|
199
202
|
schema_dict["type"] = "object"
|
|
200
|
-
|
|
203
|
+
_base_utils.ensure_additional_properties_false(schema_dict)
|
|
201
204
|
|
|
202
205
|
return FunctionToolParam(
|
|
203
206
|
type="function",
|
|
@@ -220,7 +223,7 @@ def _create_strict_response_format(
|
|
|
220
223
|
ResponseFormatTextJSONSchemaConfigParam for strict structured outputs
|
|
221
224
|
"""
|
|
222
225
|
schema = format.schema.copy()
|
|
223
|
-
|
|
226
|
+
_base_utils.ensure_additional_properties_false(schema)
|
|
224
227
|
|
|
225
228
|
response_format: ResponseFormatTextJSONSchemaConfigParam = {
|
|
226
229
|
"type": "json_schema",
|
|
@@ -294,9 +297,7 @@ def encode_request(
|
|
|
294
297
|
tools = tools.tools if isinstance(tools, BaseToolkit) else tools or []
|
|
295
298
|
openai_tools = [_convert_tool_to_function_tool_param(tool) for tool in tools]
|
|
296
299
|
|
|
297
|
-
model_supports_strict =
|
|
298
|
-
model_id not in _shared_utils.MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
|
|
299
|
-
)
|
|
300
|
+
model_supports_strict = model_id not in MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
|
|
300
301
|
default_mode = "strict" if model_supports_strict else "tool"
|
|
301
302
|
|
|
302
303
|
format = resolve_format(format, default_mode=default_mode)
|
|
@@ -323,8 +324,7 @@ def encode_request(
|
|
|
323
324
|
name=FORMAT_TOOL_NAME,
|
|
324
325
|
)
|
|
325
326
|
elif (
|
|
326
|
-
format.mode == "json"
|
|
327
|
-
and model_id not in _shared_utils.MODELS_WITHOUT_JSON_OBJECT_SUPPORT
|
|
327
|
+
format.mode == "json" and model_id not in MODELS_WITHOUT_JSON_OBJECT_SUPPORT
|
|
328
328
|
):
|
|
329
329
|
kwargs["text"] = {"format": ResponseFormatJSONObject(type="json_object")}
|
|
330
330
|
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
"""OpenAI Responses API client implementation."""
|
|
2
2
|
|
|
3
3
|
from collections.abc import Sequence
|
|
4
|
-
from typing import Literal
|
|
5
4
|
from typing_extensions import Unpack
|
|
6
5
|
|
|
7
|
-
from openai import AsyncOpenAI, OpenAI
|
|
6
|
+
from openai import AsyncOpenAI, BadRequestError as OpenAIBadRequestError, OpenAI
|
|
8
7
|
|
|
9
8
|
from ....context import Context, DepsT
|
|
9
|
+
from ....exceptions import BadRequestError, NotFoundError
|
|
10
10
|
from ....formatting import Format, FormattableT
|
|
11
11
|
from ....messages import Message
|
|
12
12
|
from ....responses import (
|
|
@@ -30,6 +30,7 @@ from ....tools import (
|
|
|
30
30
|
Toolkit,
|
|
31
31
|
)
|
|
32
32
|
from ...base import BaseProvider, Params
|
|
33
|
+
from .. import _utils as _shared_utils
|
|
33
34
|
from ..model_id import OpenAIModelId, model_name
|
|
34
35
|
from . import _utils
|
|
35
36
|
|
|
@@ -39,20 +40,26 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
39
40
|
|
|
40
41
|
id = "openai:responses"
|
|
41
42
|
default_scope = "openai/"
|
|
43
|
+
error_map = {
|
|
44
|
+
**_shared_utils.OPENAI_ERROR_MAP,
|
|
45
|
+
OpenAIBadRequestError: lambda e: NotFoundError
|
|
46
|
+
if hasattr(e, "code") and e.code == "model_not_found" # pyright: ignore[reportAttributeAccessIssue,reportUnknownMemberType]
|
|
47
|
+
else BadRequestError,
|
|
48
|
+
}
|
|
42
49
|
|
|
43
50
|
def __init__(
|
|
44
51
|
self,
|
|
45
52
|
*,
|
|
46
53
|
api_key: str | None = None,
|
|
47
54
|
base_url: str | None = None,
|
|
48
|
-
wrapped_by_openai_provider: bool = False,
|
|
49
55
|
) -> None:
|
|
50
56
|
"""Initialize the OpenAI Responses client."""
|
|
51
57
|
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
|
52
58
|
self.async_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
59
|
+
|
|
60
|
+
def get_error_status(self, e: Exception) -> int | None:
|
|
61
|
+
"""Extract HTTP status code from OpenAI exception."""
|
|
62
|
+
return getattr(e, "status_code", None)
|
|
56
63
|
|
|
57
64
|
def _call(
|
|
58
65
|
self,
|
|
@@ -82,17 +89,16 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
82
89
|
format=format,
|
|
83
90
|
params=params,
|
|
84
91
|
)
|
|
85
|
-
|
|
86
92
|
openai_response = self.client.responses.create(**kwargs)
|
|
87
93
|
|
|
88
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
89
|
-
openai_response, model_id, self.
|
|
94
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
95
|
+
openai_response, model_id, self.id
|
|
90
96
|
)
|
|
91
97
|
provider_model_name = model_name(model_id, "responses")
|
|
92
98
|
|
|
93
99
|
return Response(
|
|
94
100
|
raw=openai_response,
|
|
95
|
-
provider_id=self.
|
|
101
|
+
provider_id=self.id,
|
|
96
102
|
model_id=model_id,
|
|
97
103
|
provider_model_name=provider_model_name,
|
|
98
104
|
params=params,
|
|
@@ -100,6 +106,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
100
106
|
input_messages=messages,
|
|
101
107
|
assistant_message=assistant_message,
|
|
102
108
|
finish_reason=finish_reason,
|
|
109
|
+
usage=usage,
|
|
103
110
|
format=format,
|
|
104
111
|
)
|
|
105
112
|
|
|
@@ -131,17 +138,16 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
131
138
|
format=format,
|
|
132
139
|
params=params,
|
|
133
140
|
)
|
|
134
|
-
|
|
135
141
|
openai_response = await self.async_client.responses.create(**kwargs)
|
|
136
142
|
|
|
137
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
138
|
-
openai_response, model_id, self.
|
|
143
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
144
|
+
openai_response, model_id, self.id
|
|
139
145
|
)
|
|
140
146
|
provider_model_name = model_name(model_id, "responses")
|
|
141
147
|
|
|
142
148
|
return AsyncResponse(
|
|
143
149
|
raw=openai_response,
|
|
144
|
-
provider_id=self.
|
|
150
|
+
provider_id=self.id,
|
|
145
151
|
model_id=model_id,
|
|
146
152
|
provider_model_name=provider_model_name,
|
|
147
153
|
params=params,
|
|
@@ -149,6 +155,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
149
155
|
input_messages=messages,
|
|
150
156
|
assistant_message=assistant_message,
|
|
151
157
|
finish_reason=finish_reason,
|
|
158
|
+
usage=usage,
|
|
152
159
|
format=format,
|
|
153
160
|
)
|
|
154
161
|
|
|
@@ -180,7 +187,6 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
180
187
|
format=format,
|
|
181
188
|
params=params,
|
|
182
189
|
)
|
|
183
|
-
|
|
184
190
|
openai_stream = self.client.responses.create(
|
|
185
191
|
**kwargs,
|
|
186
192
|
stream=True,
|
|
@@ -192,7 +198,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
192
198
|
provider_model_name = model_name(model_id, "responses")
|
|
193
199
|
|
|
194
200
|
return StreamResponse(
|
|
195
|
-
provider_id=self.
|
|
201
|
+
provider_id=self.id,
|
|
196
202
|
model_id=model_id,
|
|
197
203
|
provider_model_name=provider_model_name,
|
|
198
204
|
params=params,
|
|
@@ -230,7 +236,6 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
230
236
|
format=format,
|
|
231
237
|
params=params,
|
|
232
238
|
)
|
|
233
|
-
|
|
234
239
|
openai_stream = await self.async_client.responses.create(
|
|
235
240
|
**kwargs,
|
|
236
241
|
stream=True,
|
|
@@ -242,7 +247,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
242
247
|
provider_model_name = model_name(model_id, "responses")
|
|
243
248
|
|
|
244
249
|
return AsyncStreamResponse(
|
|
245
|
-
provider_id=self.
|
|
250
|
+
provider_id=self.id,
|
|
246
251
|
model_id=model_id,
|
|
247
252
|
provider_model_name=provider_model_name,
|
|
248
253
|
params=params,
|
|
@@ -284,17 +289,16 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
284
289
|
format=format,
|
|
285
290
|
params=params,
|
|
286
291
|
)
|
|
287
|
-
|
|
288
292
|
openai_response = self.client.responses.create(**kwargs)
|
|
289
293
|
|
|
290
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
291
|
-
openai_response, model_id, self.
|
|
294
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
295
|
+
openai_response, model_id, self.id
|
|
292
296
|
)
|
|
293
297
|
provider_model_name = model_name(model_id, "responses")
|
|
294
298
|
|
|
295
299
|
return ContextResponse(
|
|
296
300
|
raw=openai_response,
|
|
297
|
-
provider_id=self.
|
|
301
|
+
provider_id=self.id,
|
|
298
302
|
model_id=model_id,
|
|
299
303
|
provider_model_name=provider_model_name,
|
|
300
304
|
params=params,
|
|
@@ -302,6 +306,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
302
306
|
input_messages=messages,
|
|
303
307
|
assistant_message=assistant_message,
|
|
304
308
|
finish_reason=finish_reason,
|
|
309
|
+
usage=usage,
|
|
305
310
|
format=format,
|
|
306
311
|
)
|
|
307
312
|
|
|
@@ -337,17 +342,16 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
337
342
|
format=format,
|
|
338
343
|
params=params,
|
|
339
344
|
)
|
|
340
|
-
|
|
341
345
|
openai_response = await self.async_client.responses.create(**kwargs)
|
|
342
346
|
|
|
343
|
-
assistant_message, finish_reason = _utils.decode_response(
|
|
344
|
-
openai_response, model_id, self.
|
|
347
|
+
assistant_message, finish_reason, usage = _utils.decode_response(
|
|
348
|
+
openai_response, model_id, self.id
|
|
345
349
|
)
|
|
346
350
|
provider_model_name = model_name(model_id, "responses")
|
|
347
351
|
|
|
348
352
|
return AsyncContextResponse(
|
|
349
353
|
raw=openai_response,
|
|
350
|
-
provider_id=self.
|
|
354
|
+
provider_id=self.id,
|
|
351
355
|
model_id=model_id,
|
|
352
356
|
provider_model_name=provider_model_name,
|
|
353
357
|
params=params,
|
|
@@ -355,6 +359,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
355
359
|
input_messages=messages,
|
|
356
360
|
assistant_message=assistant_message,
|
|
357
361
|
finish_reason=finish_reason,
|
|
362
|
+
usage=usage,
|
|
358
363
|
format=format,
|
|
359
364
|
)
|
|
360
365
|
|
|
@@ -402,7 +407,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
402
407
|
provider_model_name = model_name(model_id, "responses")
|
|
403
408
|
|
|
404
409
|
return ContextStreamResponse(
|
|
405
|
-
provider_id=self.
|
|
410
|
+
provider_id=self.id,
|
|
406
411
|
model_id=model_id,
|
|
407
412
|
provider_model_name=provider_model_name,
|
|
408
413
|
params=params,
|
|
@@ -459,7 +464,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
|
|
|
459
464
|
provider_model_name = model_name(model_id, "responses")
|
|
460
465
|
|
|
461
466
|
return AsyncContextStreamResponse(
|
|
462
|
-
provider_id=self.
|
|
467
|
+
provider_id=self.id,
|
|
463
468
|
model_id=model_id,
|
|
464
469
|
provider_model_name=provider_model_name,
|
|
465
470
|
params=params,
|
|
@@ -4,10 +4,20 @@ from typing import Literal, TypeAlias, get_args
|
|
|
4
4
|
|
|
5
5
|
KnownProviderId: TypeAlias = Literal[
|
|
6
6
|
"anthropic", # Anthropic provider via AnthropicProvider
|
|
7
|
+
"anthropic-beta", # Anthropic beta provider via AnthropicBetaProvider
|
|
7
8
|
"google", # Google provider via GoogleProvider
|
|
8
|
-
"openai", # OpenAI provider via OpenAIProvider
|
|
9
9
|
"mlx", # Local inference powered by `mlx-lm`, via MLXProvider
|
|
10
|
+
"ollama", # Ollama provider via OllamaProvider
|
|
11
|
+
"openai", # OpenAI provider via OpenAIProvider (prefers Responses routing when available)
|
|
12
|
+
"together", # Together AI provider via TogetherProvider
|
|
10
13
|
]
|
|
11
14
|
KNOWN_PROVIDER_IDS = get_args(KnownProviderId)
|
|
12
15
|
|
|
13
16
|
ProviderId = KnownProviderId | str
|
|
17
|
+
|
|
18
|
+
OpenAICompletionsCompatibleProviderId: TypeAlias = Literal[
|
|
19
|
+
"ollama", # Ollama (OpenAI-compatible)
|
|
20
|
+
"openai", # OpenAI via OpenAIProvider (routes to completions)
|
|
21
|
+
"openai:completions", # OpenAI Completions API directly
|
|
22
|
+
"together", # Together AI (OpenAI-compatible)
|
|
23
|
+
]
|
|
@@ -1,26 +1,81 @@
|
|
|
1
1
|
"""Provider registry for managing provider instances and scopes."""
|
|
2
2
|
|
|
3
|
+
from functools import lru_cache
|
|
3
4
|
from typing import overload
|
|
4
5
|
|
|
5
6
|
from ..exceptions import NoRegisteredProviderError
|
|
7
|
+
from .anthropic import AnthropicProvider
|
|
6
8
|
from .base import Provider
|
|
7
|
-
from .
|
|
9
|
+
from .google import GoogleProvider
|
|
10
|
+
from .mlx import MLXProvider
|
|
11
|
+
from .ollama import OllamaProvider
|
|
12
|
+
from .openai import OpenAIProvider
|
|
13
|
+
from .openai.completions.provider import OpenAICompletionsProvider
|
|
14
|
+
from .openai.responses.provider import OpenAIResponsesProvider
|
|
8
15
|
from .provider_id import ProviderId
|
|
16
|
+
from .together import TogetherProvider
|
|
9
17
|
|
|
10
18
|
# Global registry mapping scopes to providers
|
|
11
19
|
# Scopes are matched by prefix (longest match wins)
|
|
12
20
|
PROVIDER_REGISTRY: dict[str, Provider] = {}
|
|
13
21
|
|
|
22
|
+
|
|
23
|
+
def reset_provider_registry() -> None:
|
|
24
|
+
"""Resets the provider registry, clearing all registered providers."""
|
|
25
|
+
PROVIDER_REGISTRY.clear()
|
|
26
|
+
provider_singleton.cache_clear()
|
|
27
|
+
|
|
28
|
+
|
|
14
29
|
# Default auto-registration mapping for built-in providers
|
|
15
30
|
# These providers will be automatically registered on first use
|
|
16
31
|
DEFAULT_AUTO_REGISTER_SCOPES: dict[str, ProviderId] = {
|
|
17
32
|
"anthropic/": "anthropic",
|
|
18
33
|
"google/": "google",
|
|
19
|
-
"openai/": "openai",
|
|
20
34
|
"mlx-community/": "mlx",
|
|
35
|
+
"ollama/": "ollama",
|
|
36
|
+
"openai/": "openai",
|
|
37
|
+
"together/": "together",
|
|
21
38
|
}
|
|
22
39
|
|
|
23
40
|
|
|
41
|
+
@lru_cache(maxsize=256)
|
|
42
|
+
def provider_singleton(
|
|
43
|
+
provider_id: ProviderId, *, api_key: str | None = None, base_url: str | None = None
|
|
44
|
+
) -> Provider:
|
|
45
|
+
"""Create a cached provider instance for the specified provider id.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
provider_id: The provider name ("openai", "anthropic", or "google").
|
|
49
|
+
api_key: API key for authentication. If None, uses provider-specific env var.
|
|
50
|
+
base_url: Base URL for the API. If None, uses provider-specific env var.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
A cached provider instance for the specified provider with the given parameters.
|
|
54
|
+
|
|
55
|
+
Raises:
|
|
56
|
+
ValueError: If the provider_id is not supported.
|
|
57
|
+
"""
|
|
58
|
+
match provider_id:
|
|
59
|
+
case "anthropic":
|
|
60
|
+
return AnthropicProvider(api_key=api_key, base_url=base_url)
|
|
61
|
+
case "google":
|
|
62
|
+
return GoogleProvider(api_key=api_key, base_url=base_url)
|
|
63
|
+
case "mlx": # pragma: no cover (MLX is only available on macOS)
|
|
64
|
+
return MLXProvider()
|
|
65
|
+
case "ollama":
|
|
66
|
+
return OllamaProvider(api_key=api_key, base_url=base_url)
|
|
67
|
+
case "openai":
|
|
68
|
+
return OpenAIProvider(api_key=api_key, base_url=base_url)
|
|
69
|
+
case "openai:completions":
|
|
70
|
+
return OpenAICompletionsProvider(api_key=api_key, base_url=base_url)
|
|
71
|
+
case "openai:responses":
|
|
72
|
+
return OpenAIResponsesProvider(api_key=api_key, base_url=base_url)
|
|
73
|
+
case "together":
|
|
74
|
+
return TogetherProvider(api_key=api_key, base_url=base_url)
|
|
75
|
+
case _: # pragma: no cover
|
|
76
|
+
raise ValueError(f"Unknown provider: '{provider_id}'")
|
|
77
|
+
|
|
78
|
+
|
|
24
79
|
@overload
|
|
25
80
|
def register_provider(
|
|
26
81
|
provider: Provider,
|
|
@@ -98,7 +153,7 @@ def register_provider(
|
|
|
98
153
|
"""
|
|
99
154
|
|
|
100
155
|
if isinstance(provider, str):
|
|
101
|
-
provider =
|
|
156
|
+
provider = provider_singleton(provider, api_key=api_key, base_url=base_url)
|
|
102
157
|
|
|
103
158
|
if scope is None:
|
|
104
159
|
scope = provider.default_scope
|
|
@@ -158,7 +213,7 @@ def get_provider_for_model(model_id: str) -> Provider:
|
|
|
158
213
|
if matching_defaults:
|
|
159
214
|
best_scope = max(matching_defaults, key=len)
|
|
160
215
|
provider_id = DEFAULT_AUTO_REGISTER_SCOPES[best_scope]
|
|
161
|
-
provider =
|
|
216
|
+
provider = provider_singleton(provider_id)
|
|
162
217
|
# Auto-register for future calls
|
|
163
218
|
PROVIDER_REGISTRY[best_scope] = provider
|
|
164
219
|
return provider
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Together AI provider implementation."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from .provider import TogetherProvider
|
|
7
|
+
else:
|
|
8
|
+
try:
|
|
9
|
+
from .provider import TogetherProvider
|
|
10
|
+
except ImportError: # pragma: no cover
|
|
11
|
+
from .._missing_import_stubs import (
|
|
12
|
+
create_provider_stub,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
TogetherProvider = create_provider_stub("openai", "TogetherProvider")
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"TogetherProvider",
|
|
19
|
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Together AI provider implementation."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from ..openai.completions.base_provider import BaseOpenAICompletionsProvider
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TogetherProvider(BaseOpenAICompletionsProvider):
|
|
9
|
+
"""Provider for Together AI's OpenAI-compatible API.
|
|
10
|
+
|
|
11
|
+
Inherits from BaseOpenAICompletionsProvider with Together-specific configuration:
|
|
12
|
+
- Uses Together AI's API endpoint
|
|
13
|
+
- Requires TOGETHER_API_KEY
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
Register the provider with model ID prefixes you want to use:
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
import llm
|
|
20
|
+
|
|
21
|
+
# Register for meta-llama models
|
|
22
|
+
llm.register_provider("together", "meta-llama/")
|
|
23
|
+
|
|
24
|
+
# Now you can use meta-llama models directly
|
|
25
|
+
@llm.call("meta-llama/Llama-3.3-70B-Instruct-Turbo")
|
|
26
|
+
def my_prompt():
|
|
27
|
+
return [llm.messages.user("Hello!")]
|
|
28
|
+
```
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
id: ClassVar[str] = "together"
|
|
32
|
+
default_scope: ClassVar[str | list[str]] = []
|
|
33
|
+
default_base_url: ClassVar[str | None] = "https://api.together.xyz/v1"
|
|
34
|
+
api_key_env_var: ClassVar[str] = "TOGETHER_API_KEY"
|
|
35
|
+
api_key_required: ClassVar[bool] = True
|
|
36
|
+
provider_name: ClassVar[str | None] = "Together"
|
|
37
|
+
|
|
38
|
+
def _model_name(self, model_id: str) -> str:
|
|
39
|
+
"""Return the model ID as-is for Together API."""
|
|
40
|
+
return model_id
|
|
@@ -27,6 +27,7 @@ from .streams import (
|
|
|
27
27
|
ThoughtStream,
|
|
28
28
|
ToolCallStream,
|
|
29
29
|
)
|
|
30
|
+
from .usage import Usage, UsageDeltaChunk
|
|
30
31
|
|
|
31
32
|
__all__ = [
|
|
32
33
|
"AsyncChunkIterator",
|
|
@@ -53,5 +54,7 @@ __all__ = [
|
|
|
53
54
|
"TextStream",
|
|
54
55
|
"ThoughtStream",
|
|
55
56
|
"ToolCallStream",
|
|
57
|
+
"Usage",
|
|
58
|
+
"UsageDeltaChunk",
|
|
56
59
|
"_utils",
|
|
57
60
|
]
|
|
@@ -9,6 +9,7 @@ from ..messages import AssistantMessage, Message
|
|
|
9
9
|
from ..tools import FORMAT_TOOL_NAME, ToolkitT
|
|
10
10
|
from .finish_reason import FinishReason
|
|
11
11
|
from .root_response import RootResponse
|
|
12
|
+
from .usage import Usage
|
|
12
13
|
|
|
13
14
|
if TYPE_CHECKING:
|
|
14
15
|
from ..providers import ModelId, Params, ProviderId
|
|
@@ -30,6 +31,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
|
|
|
30
31
|
input_messages: Sequence[Message],
|
|
31
32
|
assistant_message: AssistantMessage,
|
|
32
33
|
finish_reason: FinishReason | None,
|
|
34
|
+
usage: Usage | None,
|
|
33
35
|
) -> None:
|
|
34
36
|
"""Initialize a Response.
|
|
35
37
|
|
|
@@ -45,6 +47,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
|
|
|
45
47
|
input_messages: The message history before the final assistant message.
|
|
46
48
|
assistant_message: The final assistant message containing the response content.
|
|
47
49
|
finish_reason: The reason why the LLM finished generating a response.
|
|
50
|
+
usage: Token usage statistics for the response.
|
|
48
51
|
"""
|
|
49
52
|
self.raw = raw
|
|
50
53
|
self.provider_id = provider_id
|
|
@@ -53,6 +56,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
|
|
|
53
56
|
self.params = params
|
|
54
57
|
self.toolkit = toolkit
|
|
55
58
|
self.finish_reason = finish_reason
|
|
59
|
+
self.usage = usage
|
|
56
60
|
self.format = format
|
|
57
61
|
|
|
58
62
|
# Process content in the assistant message, organizing it by type and
|
|
@@ -36,6 +36,7 @@ from .streams import (
|
|
|
36
36
|
ThoughtStream,
|
|
37
37
|
ToolCallStream,
|
|
38
38
|
)
|
|
39
|
+
from .usage import Usage, UsageDeltaChunk
|
|
39
40
|
|
|
40
41
|
if TYPE_CHECKING:
|
|
41
42
|
from ..providers import ModelId, Params, ProviderId
|
|
@@ -76,7 +77,11 @@ class RawMessageChunk:
|
|
|
76
77
|
|
|
77
78
|
|
|
78
79
|
StreamResponseChunk: TypeAlias = (
|
|
79
|
-
AssistantContentChunk
|
|
80
|
+
AssistantContentChunk
|
|
81
|
+
| FinishReasonChunk
|
|
82
|
+
| RawStreamEventChunk
|
|
83
|
+
| RawMessageChunk
|
|
84
|
+
| UsageDeltaChunk
|
|
80
85
|
)
|
|
81
86
|
|
|
82
87
|
ChunkIterator: TypeAlias = Iterator[StreamResponseChunk]
|
|
@@ -165,6 +170,7 @@ class BaseStreamResponse(
|
|
|
165
170
|
format: Format[FormattableT] | None = None,
|
|
166
171
|
input_messages: Sequence[Message],
|
|
167
172
|
chunk_iterator: ChunkIteratorT,
|
|
173
|
+
usage: Usage | None = None,
|
|
168
174
|
) -> None:
|
|
169
175
|
"""Initialize the BaseStreamResponse.
|
|
170
176
|
|
|
@@ -177,6 +183,7 @@ class BaseStreamResponse(
|
|
|
177
183
|
toolkit: Toolkit containing all the tools used to generate the response.
|
|
178
184
|
format: The `Format` for the expected structured output format (or None).
|
|
179
185
|
input_messages: The input messages that were sent to the LLM
|
|
186
|
+
usage: Token usage statistics for the response.
|
|
180
187
|
|
|
181
188
|
The BaseStreamResponse will process the tuples to build the chunks and raw lists
|
|
182
189
|
as the stream is consumed.
|
|
@@ -187,6 +194,7 @@ class BaseStreamResponse(
|
|
|
187
194
|
self.provider_model_name = provider_model_name
|
|
188
195
|
self.params = params
|
|
189
196
|
self.toolkit = toolkit
|
|
197
|
+
self.usage = usage
|
|
190
198
|
self.format = format
|
|
191
199
|
|
|
192
200
|
# Internal-only lists which we mutate (append) during chunk processing
|
|
@@ -475,6 +483,14 @@ class BaseSyncStreamResponse(BaseStreamResponse[ChunkIterator, ToolkitT, Formatt
|
|
|
475
483
|
self._assistant_message.raw_message = chunk.raw_message
|
|
476
484
|
elif chunk.type == "finish_reason_chunk":
|
|
477
485
|
self.finish_reason = chunk.finish_reason
|
|
486
|
+
elif chunk.type == "usage_delta_chunk":
|
|
487
|
+
if self.usage is None:
|
|
488
|
+
self.usage = Usage()
|
|
489
|
+
self.usage.input_tokens += chunk.input_tokens
|
|
490
|
+
self.usage.output_tokens += chunk.output_tokens
|
|
491
|
+
self.usage.cache_read_tokens += chunk.cache_read_tokens
|
|
492
|
+
self.usage.cache_write_tokens += chunk.cache_write_tokens
|
|
493
|
+
self.usage.reasoning_tokens += chunk.reasoning_tokens
|
|
478
494
|
else:
|
|
479
495
|
yield self._handle_chunk(chunk)
|
|
480
496
|
|
|
@@ -648,6 +664,14 @@ class BaseAsyncStreamResponse(
|
|
|
648
664
|
self._assistant_message.raw_message = chunk.raw_message
|
|
649
665
|
elif chunk.type == "finish_reason_chunk":
|
|
650
666
|
self.finish_reason = chunk.finish_reason
|
|
667
|
+
elif chunk.type == "usage_delta_chunk":
|
|
668
|
+
if self.usage is None:
|
|
669
|
+
self.usage = Usage()
|
|
670
|
+
self.usage.input_tokens += chunk.input_tokens
|
|
671
|
+
self.usage.output_tokens += chunk.output_tokens
|
|
672
|
+
self.usage.cache_read_tokens += chunk.cache_read_tokens
|
|
673
|
+
self.usage.cache_write_tokens += chunk.cache_write_tokens
|
|
674
|
+
self.usage.reasoning_tokens += chunk.reasoning_tokens
|
|
651
675
|
else:
|
|
652
676
|
yield self._handle_chunk(chunk)
|
|
653
677
|
|