mirascope 2.0.0a3__py3-none-any.whl → 2.0.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. mirascope/api/_generated/__init__.py +78 -6
  2. mirascope/api/_generated/api_keys/__init__.py +7 -0
  3. mirascope/api/_generated/api_keys/client.py +453 -0
  4. mirascope/api/_generated/api_keys/raw_client.py +853 -0
  5. mirascope/api/_generated/api_keys/types/__init__.py +9 -0
  6. mirascope/api/_generated/api_keys/types/api_keys_create_response.py +36 -0
  7. mirascope/api/_generated/api_keys/types/api_keys_get_response.py +35 -0
  8. mirascope/api/_generated/api_keys/types/api_keys_list_response_item.py +35 -0
  9. mirascope/api/_generated/client.py +14 -0
  10. mirascope/api/_generated/environments/__init__.py +17 -0
  11. mirascope/api/_generated/environments/client.py +532 -0
  12. mirascope/api/_generated/environments/raw_client.py +1088 -0
  13. mirascope/api/_generated/environments/types/__init__.py +15 -0
  14. mirascope/api/_generated/environments/types/environments_create_response.py +26 -0
  15. mirascope/api/_generated/environments/types/environments_get_response.py +26 -0
  16. mirascope/api/_generated/environments/types/environments_list_response_item.py +26 -0
  17. mirascope/api/_generated/environments/types/environments_update_response.py +26 -0
  18. mirascope/api/_generated/errors/__init__.py +11 -1
  19. mirascope/api/_generated/errors/conflict_error.py +15 -0
  20. mirascope/api/_generated/errors/forbidden_error.py +15 -0
  21. mirascope/api/_generated/errors/internal_server_error.py +15 -0
  22. mirascope/api/_generated/errors/not_found_error.py +15 -0
  23. mirascope/api/_generated/organizations/__init__.py +25 -0
  24. mirascope/api/_generated/organizations/client.py +404 -0
  25. mirascope/api/_generated/organizations/raw_client.py +902 -0
  26. mirascope/api/_generated/organizations/types/__init__.py +23 -0
  27. mirascope/api/_generated/organizations/types/organizations_create_response.py +25 -0
  28. mirascope/api/_generated/organizations/types/organizations_create_response_role.py +7 -0
  29. mirascope/api/_generated/organizations/types/organizations_get_response.py +25 -0
  30. mirascope/api/_generated/organizations/types/organizations_get_response_role.py +7 -0
  31. mirascope/api/_generated/organizations/types/organizations_list_response_item.py +25 -0
  32. mirascope/api/_generated/organizations/types/organizations_list_response_item_role.py +7 -0
  33. mirascope/api/_generated/organizations/types/organizations_update_response.py +25 -0
  34. mirascope/api/_generated/organizations/types/organizations_update_response_role.py +7 -0
  35. mirascope/api/_generated/projects/__init__.py +17 -0
  36. mirascope/api/_generated/projects/client.py +482 -0
  37. mirascope/api/_generated/projects/raw_client.py +1058 -0
  38. mirascope/api/_generated/projects/types/__init__.py +15 -0
  39. mirascope/api/_generated/projects/types/projects_create_response.py +31 -0
  40. mirascope/api/_generated/projects/types/projects_get_response.py +31 -0
  41. mirascope/api/_generated/projects/types/projects_list_response_item.py +31 -0
  42. mirascope/api/_generated/projects/types/projects_update_response.py +31 -0
  43. mirascope/api/_generated/reference.md +1311 -0
  44. mirascope/api/_generated/types/__init__.py +20 -4
  45. mirascope/api/_generated/types/already_exists_error.py +24 -0
  46. mirascope/api/_generated/types/already_exists_error_tag.py +5 -0
  47. mirascope/api/_generated/types/database_error.py +24 -0
  48. mirascope/api/_generated/types/database_error_tag.py +5 -0
  49. mirascope/api/_generated/types/http_api_decode_error.py +1 -3
  50. mirascope/api/_generated/types/issue.py +1 -5
  51. mirascope/api/_generated/types/not_found_error_body.py +24 -0
  52. mirascope/api/_generated/types/not_found_error_tag.py +5 -0
  53. mirascope/api/_generated/types/permission_denied_error.py +24 -0
  54. mirascope/api/_generated/types/permission_denied_error_tag.py +7 -0
  55. mirascope/api/_generated/types/property_key.py +2 -2
  56. mirascope/api/_generated/types/{property_key_tag.py → property_key_key.py} +3 -5
  57. mirascope/api/_generated/types/{property_key_tag_tag.py → property_key_key_tag.py} +1 -1
  58. mirascope/llm/__init__.py +6 -2
  59. mirascope/llm/exceptions.py +28 -0
  60. mirascope/llm/providers/__init__.py +12 -4
  61. mirascope/llm/providers/anthropic/__init__.py +6 -1
  62. mirascope/llm/providers/anthropic/_utils/__init__.py +17 -5
  63. mirascope/llm/providers/anthropic/_utils/beta_decode.py +271 -0
  64. mirascope/llm/providers/anthropic/_utils/beta_encode.py +216 -0
  65. mirascope/llm/providers/anthropic/_utils/decode.py +39 -7
  66. mirascope/llm/providers/anthropic/_utils/encode.py +156 -64
  67. mirascope/llm/providers/anthropic/_utils/errors.py +46 -0
  68. mirascope/llm/providers/anthropic/beta_provider.py +328 -0
  69. mirascope/llm/providers/anthropic/model_id.py +10 -27
  70. mirascope/llm/providers/anthropic/model_info.py +87 -0
  71. mirascope/llm/providers/anthropic/provider.py +132 -145
  72. mirascope/llm/providers/base/__init__.py +2 -1
  73. mirascope/llm/providers/base/_utils.py +15 -1
  74. mirascope/llm/providers/base/base_provider.py +173 -58
  75. mirascope/llm/providers/google/_utils/__init__.py +2 -0
  76. mirascope/llm/providers/google/_utils/decode.py +55 -3
  77. mirascope/llm/providers/google/_utils/encode.py +14 -6
  78. mirascope/llm/providers/google/_utils/errors.py +49 -0
  79. mirascope/llm/providers/google/model_id.py +7 -13
  80. mirascope/llm/providers/google/model_info.py +62 -0
  81. mirascope/llm/providers/google/provider.py +13 -8
  82. mirascope/llm/providers/mlx/_utils.py +31 -2
  83. mirascope/llm/providers/mlx/encoding/transformers.py +17 -1
  84. mirascope/llm/providers/mlx/provider.py +12 -0
  85. mirascope/llm/providers/ollama/__init__.py +19 -0
  86. mirascope/llm/providers/ollama/provider.py +71 -0
  87. mirascope/llm/providers/openai/__init__.py +10 -1
  88. mirascope/llm/providers/openai/_utils/__init__.py +5 -0
  89. mirascope/llm/providers/openai/_utils/errors.py +46 -0
  90. mirascope/llm/providers/openai/completions/__init__.py +6 -1
  91. mirascope/llm/providers/openai/completions/_utils/decode.py +57 -5
  92. mirascope/llm/providers/openai/completions/_utils/encode.py +9 -8
  93. mirascope/llm/providers/openai/completions/base_provider.py +513 -0
  94. mirascope/llm/providers/openai/completions/provider.py +13 -447
  95. mirascope/llm/providers/openai/model_info.py +57 -0
  96. mirascope/llm/providers/openai/provider.py +30 -5
  97. mirascope/llm/providers/openai/responses/_utils/decode.py +55 -4
  98. mirascope/llm/providers/openai/responses/_utils/encode.py +9 -9
  99. mirascope/llm/providers/openai/responses/provider.py +33 -28
  100. mirascope/llm/providers/provider_id.py +11 -1
  101. mirascope/llm/providers/provider_registry.py +59 -4
  102. mirascope/llm/providers/together/__init__.py +19 -0
  103. mirascope/llm/providers/together/provider.py +40 -0
  104. mirascope/llm/responses/__init__.py +3 -0
  105. mirascope/llm/responses/base_response.py +4 -0
  106. mirascope/llm/responses/base_stream_response.py +25 -1
  107. mirascope/llm/responses/finish_reason.py +1 -0
  108. mirascope/llm/responses/response.py +9 -0
  109. mirascope/llm/responses/root_response.py +5 -1
  110. mirascope/llm/responses/usage.py +95 -0
  111. mirascope/ops/_internal/closure.py +62 -11
  112. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a5.dist-info}/METADATA +3 -3
  113. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a5.dist-info}/RECORD +115 -56
  114. mirascope/llm/providers/load_provider.py +0 -48
  115. mirascope/llm/providers/openai/shared/__init__.py +0 -7
  116. mirascope/llm/providers/openai/shared/_utils.py +0 -59
  117. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a5.dist-info}/WHEEL +0 -0
  118. {mirascope-2.0.0a3.dist-info → mirascope-2.0.0a5.dist-info}/licenses/LICENSE +0 -0
@@ -29,6 +29,8 @@ from .....responses import (
29
29
  FinishReasonChunk,
30
30
  RawMessageChunk,
31
31
  RawStreamEventChunk,
32
+ Usage,
33
+ UsageDeltaChunk,
32
34
  )
33
35
  from ...model_id import OpenAIModelId, model_name
34
36
 
@@ -38,6 +40,33 @@ INCOMPLETE_DETAILS_TO_FINISH_REASON = {
38
40
  }
39
41
 
40
42
 
43
+ def _decode_usage(
44
+ usage: openai_types.ResponseUsage | None,
45
+ ) -> Usage | None:
46
+ """Convert OpenAI ResponseUsage to Mirascope Usage."""
47
+ if usage is None: # pragma: no cover
48
+ return None
49
+
50
+ return Usage(
51
+ input_tokens=usage.input_tokens,
52
+ output_tokens=usage.output_tokens,
53
+ cache_read_tokens=(
54
+ usage.input_tokens_details.cached_tokens
55
+ if usage.input_tokens_details
56
+ else None
57
+ )
58
+ or 0,
59
+ cache_write_tokens=0,
60
+ reasoning_tokens=(
61
+ usage.output_tokens_details.reasoning_tokens
62
+ if usage.output_tokens_details
63
+ else None
64
+ )
65
+ or 0,
66
+ raw=usage,
67
+ )
68
+
69
+
41
70
  def _serialize_output_item(
42
71
  item: openai_types.ResponseOutputItem,
43
72
  ) -> dict[str, Any]:
@@ -48,9 +77,9 @@ def _serialize_output_item(
48
77
  def decode_response(
49
78
  response: openai_types.Response,
50
79
  model_id: OpenAIModelId,
51
- provider_id: Literal["openai", "openai:responses"],
52
- ) -> tuple[AssistantMessage, FinishReason | None]:
53
- """Convert OpenAI Responses Response to mirascope AssistantMessage."""
80
+ provider_id: str,
81
+ ) -> tuple[AssistantMessage, FinishReason | None, Usage | None]:
82
+ """Convert OpenAI Responses Response to mirascope AssistantMessage and usage."""
54
83
  parts: list[AssistantContentPart] = []
55
84
  finish_reason: FinishReason | None = None
56
85
  refused = False
@@ -100,7 +129,8 @@ def decode_response(
100
129
  ],
101
130
  )
102
131
 
103
- return assistant_message, finish_reason
132
+ usage = _decode_usage(response.usage)
133
+ return assistant_message, finish_reason, usage
104
134
 
105
135
 
106
136
  class _OpenAIResponsesChunkProcessor:
@@ -176,6 +206,27 @@ class _OpenAIResponsesChunkProcessor:
176
206
  if self.refusal_encountered:
177
207
  yield FinishReasonChunk(finish_reason=FinishReason.REFUSAL)
178
208
 
209
+ # Emit usage delta if present
210
+ if event.response.usage:
211
+ usage = event.response.usage
212
+ yield UsageDeltaChunk(
213
+ input_tokens=usage.input_tokens,
214
+ output_tokens=usage.output_tokens,
215
+ cache_read_tokens=(
216
+ usage.input_tokens_details.cached_tokens
217
+ if usage.input_tokens_details
218
+ else None
219
+ )
220
+ or 0,
221
+ cache_write_tokens=0,
222
+ reasoning_tokens=(
223
+ usage.output_tokens_details.reasoning_tokens
224
+ if usage.output_tokens_details
225
+ else None
226
+ )
227
+ or 0,
228
+ )
229
+
179
230
 
180
231
  def decode_stream(
181
232
  openai_stream: Stream[ResponseStreamEvent],
@@ -40,8 +40,11 @@ from .....messages import AssistantMessage, Message, UserMessage
40
40
  from .....tools import FORMAT_TOOL_NAME, AnyToolSchema, BaseToolkit
41
41
  from ....base import Params, _utils as _base_utils
42
42
  from ...model_id import OpenAIModelId, model_name
43
- from ...model_info import NON_REASONING_MODELS
44
- from ...shared import _utils as _shared_utils
43
+ from ...model_info import (
44
+ MODELS_WITHOUT_JSON_OBJECT_SUPPORT,
45
+ MODELS_WITHOUT_JSON_SCHEMA_SUPPORT,
46
+ NON_REASONING_MODELS,
47
+ )
45
48
 
46
49
 
47
50
  class ResponseCreateKwargs(TypedDict, total=False):
@@ -197,7 +200,7 @@ def _convert_tool_to_function_tool_param(tool: AnyToolSchema) -> FunctionToolPar
197
200
  """Convert a Mirascope ToolSchema to OpenAI Responses FunctionToolParam."""
198
201
  schema_dict = tool.parameters.model_dump(by_alias=True, exclude_none=True)
199
202
  schema_dict["type"] = "object"
200
- _shared_utils.ensure_additional_properties_false(schema_dict)
203
+ _base_utils.ensure_additional_properties_false(schema_dict)
201
204
 
202
205
  return FunctionToolParam(
203
206
  type="function",
@@ -220,7 +223,7 @@ def _create_strict_response_format(
220
223
  ResponseFormatTextJSONSchemaConfigParam for strict structured outputs
221
224
  """
222
225
  schema = format.schema.copy()
223
- _shared_utils.ensure_additional_properties_false(schema)
226
+ _base_utils.ensure_additional_properties_false(schema)
224
227
 
225
228
  response_format: ResponseFormatTextJSONSchemaConfigParam = {
226
229
  "type": "json_schema",
@@ -294,9 +297,7 @@ def encode_request(
294
297
  tools = tools.tools if isinstance(tools, BaseToolkit) else tools or []
295
298
  openai_tools = [_convert_tool_to_function_tool_param(tool) for tool in tools]
296
299
 
297
- model_supports_strict = (
298
- model_id not in _shared_utils.MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
299
- )
300
+ model_supports_strict = model_id not in MODELS_WITHOUT_JSON_SCHEMA_SUPPORT
300
301
  default_mode = "strict" if model_supports_strict else "tool"
301
302
 
302
303
  format = resolve_format(format, default_mode=default_mode)
@@ -323,8 +324,7 @@ def encode_request(
323
324
  name=FORMAT_TOOL_NAME,
324
325
  )
325
326
  elif (
326
- format.mode == "json"
327
- and model_id not in _shared_utils.MODELS_WITHOUT_JSON_OBJECT_SUPPORT
327
+ format.mode == "json" and model_id not in MODELS_WITHOUT_JSON_OBJECT_SUPPORT
328
328
  ):
329
329
  kwargs["text"] = {"format": ResponseFormatJSONObject(type="json_object")}
330
330
 
@@ -1,12 +1,12 @@
1
1
  """OpenAI Responses API client implementation."""
2
2
 
3
3
  from collections.abc import Sequence
4
- from typing import Literal
5
4
  from typing_extensions import Unpack
6
5
 
7
- from openai import AsyncOpenAI, OpenAI
6
+ from openai import AsyncOpenAI, BadRequestError as OpenAIBadRequestError, OpenAI
8
7
 
9
8
  from ....context import Context, DepsT
9
+ from ....exceptions import BadRequestError, NotFoundError
10
10
  from ....formatting import Format, FormattableT
11
11
  from ....messages import Message
12
12
  from ....responses import (
@@ -30,6 +30,7 @@ from ....tools import (
30
30
  Toolkit,
31
31
  )
32
32
  from ...base import BaseProvider, Params
33
+ from .. import _utils as _shared_utils
33
34
  from ..model_id import OpenAIModelId, model_name
34
35
  from . import _utils
35
36
 
@@ -39,20 +40,26 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
39
40
 
40
41
  id = "openai:responses"
41
42
  default_scope = "openai/"
43
+ error_map = {
44
+ **_shared_utils.OPENAI_ERROR_MAP,
45
+ OpenAIBadRequestError: lambda e: NotFoundError
46
+ if hasattr(e, "code") and e.code == "model_not_found" # pyright: ignore[reportAttributeAccessIssue,reportUnknownMemberType]
47
+ else BadRequestError,
48
+ }
42
49
 
43
50
  def __init__(
44
51
  self,
45
52
  *,
46
53
  api_key: str | None = None,
47
54
  base_url: str | None = None,
48
- wrapped_by_openai_provider: bool = False,
49
55
  ) -> None:
50
56
  """Initialize the OpenAI Responses client."""
51
57
  self.client = OpenAI(api_key=api_key, base_url=base_url)
52
58
  self.async_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
53
- self.active_provider_id: Literal["openai", "openai:responses"] = (
54
- "openai" if wrapped_by_openai_provider else "openai:responses"
55
- )
59
+
60
+ def get_error_status(self, e: Exception) -> int | None:
61
+ """Extract HTTP status code from OpenAI exception."""
62
+ return getattr(e, "status_code", None)
56
63
 
57
64
  def _call(
58
65
  self,
@@ -82,17 +89,16 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
82
89
  format=format,
83
90
  params=params,
84
91
  )
85
-
86
92
  openai_response = self.client.responses.create(**kwargs)
87
93
 
88
- assistant_message, finish_reason = _utils.decode_response(
89
- openai_response, model_id, self.active_provider_id
94
+ assistant_message, finish_reason, usage = _utils.decode_response(
95
+ openai_response, model_id, self.id
90
96
  )
91
97
  provider_model_name = model_name(model_id, "responses")
92
98
 
93
99
  return Response(
94
100
  raw=openai_response,
95
- provider_id=self.active_provider_id,
101
+ provider_id=self.id,
96
102
  model_id=model_id,
97
103
  provider_model_name=provider_model_name,
98
104
  params=params,
@@ -100,6 +106,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
100
106
  input_messages=messages,
101
107
  assistant_message=assistant_message,
102
108
  finish_reason=finish_reason,
109
+ usage=usage,
103
110
  format=format,
104
111
  )
105
112
 
@@ -131,17 +138,16 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
131
138
  format=format,
132
139
  params=params,
133
140
  )
134
-
135
141
  openai_response = await self.async_client.responses.create(**kwargs)
136
142
 
137
- assistant_message, finish_reason = _utils.decode_response(
138
- openai_response, model_id, self.active_provider_id
143
+ assistant_message, finish_reason, usage = _utils.decode_response(
144
+ openai_response, model_id, self.id
139
145
  )
140
146
  provider_model_name = model_name(model_id, "responses")
141
147
 
142
148
  return AsyncResponse(
143
149
  raw=openai_response,
144
- provider_id=self.active_provider_id,
150
+ provider_id=self.id,
145
151
  model_id=model_id,
146
152
  provider_model_name=provider_model_name,
147
153
  params=params,
@@ -149,6 +155,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
149
155
  input_messages=messages,
150
156
  assistant_message=assistant_message,
151
157
  finish_reason=finish_reason,
158
+ usage=usage,
152
159
  format=format,
153
160
  )
154
161
 
@@ -180,7 +187,6 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
180
187
  format=format,
181
188
  params=params,
182
189
  )
183
-
184
190
  openai_stream = self.client.responses.create(
185
191
  **kwargs,
186
192
  stream=True,
@@ -192,7 +198,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
192
198
  provider_model_name = model_name(model_id, "responses")
193
199
 
194
200
  return StreamResponse(
195
- provider_id=self.active_provider_id,
201
+ provider_id=self.id,
196
202
  model_id=model_id,
197
203
  provider_model_name=provider_model_name,
198
204
  params=params,
@@ -230,7 +236,6 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
230
236
  format=format,
231
237
  params=params,
232
238
  )
233
-
234
239
  openai_stream = await self.async_client.responses.create(
235
240
  **kwargs,
236
241
  stream=True,
@@ -242,7 +247,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
242
247
  provider_model_name = model_name(model_id, "responses")
243
248
 
244
249
  return AsyncStreamResponse(
245
- provider_id=self.active_provider_id,
250
+ provider_id=self.id,
246
251
  model_id=model_id,
247
252
  provider_model_name=provider_model_name,
248
253
  params=params,
@@ -284,17 +289,16 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
284
289
  format=format,
285
290
  params=params,
286
291
  )
287
-
288
292
  openai_response = self.client.responses.create(**kwargs)
289
293
 
290
- assistant_message, finish_reason = _utils.decode_response(
291
- openai_response, model_id, self.active_provider_id
294
+ assistant_message, finish_reason, usage = _utils.decode_response(
295
+ openai_response, model_id, self.id
292
296
  )
293
297
  provider_model_name = model_name(model_id, "responses")
294
298
 
295
299
  return ContextResponse(
296
300
  raw=openai_response,
297
- provider_id=self.active_provider_id,
301
+ provider_id=self.id,
298
302
  model_id=model_id,
299
303
  provider_model_name=provider_model_name,
300
304
  params=params,
@@ -302,6 +306,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
302
306
  input_messages=messages,
303
307
  assistant_message=assistant_message,
304
308
  finish_reason=finish_reason,
309
+ usage=usage,
305
310
  format=format,
306
311
  )
307
312
 
@@ -337,17 +342,16 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
337
342
  format=format,
338
343
  params=params,
339
344
  )
340
-
341
345
  openai_response = await self.async_client.responses.create(**kwargs)
342
346
 
343
- assistant_message, finish_reason = _utils.decode_response(
344
- openai_response, model_id, self.active_provider_id
347
+ assistant_message, finish_reason, usage = _utils.decode_response(
348
+ openai_response, model_id, self.id
345
349
  )
346
350
  provider_model_name = model_name(model_id, "responses")
347
351
 
348
352
  return AsyncContextResponse(
349
353
  raw=openai_response,
350
- provider_id=self.active_provider_id,
354
+ provider_id=self.id,
351
355
  model_id=model_id,
352
356
  provider_model_name=provider_model_name,
353
357
  params=params,
@@ -355,6 +359,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
355
359
  input_messages=messages,
356
360
  assistant_message=assistant_message,
357
361
  finish_reason=finish_reason,
362
+ usage=usage,
358
363
  format=format,
359
364
  )
360
365
 
@@ -402,7 +407,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
402
407
  provider_model_name = model_name(model_id, "responses")
403
408
 
404
409
  return ContextStreamResponse(
405
- provider_id=self.active_provider_id,
410
+ provider_id=self.id,
406
411
  model_id=model_id,
407
412
  provider_model_name=provider_model_name,
408
413
  params=params,
@@ -459,7 +464,7 @@ class OpenAIResponsesProvider(BaseProvider[OpenAI]):
459
464
  provider_model_name = model_name(model_id, "responses")
460
465
 
461
466
  return AsyncContextStreamResponse(
462
- provider_id=self.active_provider_id,
467
+ provider_id=self.id,
463
468
  model_id=model_id,
464
469
  provider_model_name=provider_model_name,
465
470
  params=params,
@@ -4,10 +4,20 @@ from typing import Literal, TypeAlias, get_args
4
4
 
5
5
  KnownProviderId: TypeAlias = Literal[
6
6
  "anthropic", # Anthropic provider via AnthropicProvider
7
+ "anthropic-beta", # Anthropic beta provider via AnthropicBetaProvider
7
8
  "google", # Google provider via GoogleProvider
8
- "openai", # OpenAI provider via OpenAIProvider
9
9
  "mlx", # Local inference powered by `mlx-lm`, via MLXProvider
10
+ "ollama", # Ollama provider via OllamaProvider
11
+ "openai", # OpenAI provider via OpenAIProvider (prefers Responses routing when available)
12
+ "together", # Together AI provider via TogetherProvider
10
13
  ]
11
14
  KNOWN_PROVIDER_IDS = get_args(KnownProviderId)
12
15
 
13
16
  ProviderId = KnownProviderId | str
17
+
18
+ OpenAICompletionsCompatibleProviderId: TypeAlias = Literal[
19
+ "ollama", # Ollama (OpenAI-compatible)
20
+ "openai", # OpenAI via OpenAIProvider (routes to completions)
21
+ "openai:completions", # OpenAI Completions API directly
22
+ "together", # Together AI (OpenAI-compatible)
23
+ ]
@@ -1,26 +1,81 @@
1
1
  """Provider registry for managing provider instances and scopes."""
2
2
 
3
+ from functools import lru_cache
3
4
  from typing import overload
4
5
 
5
6
  from ..exceptions import NoRegisteredProviderError
7
+ from .anthropic import AnthropicProvider
6
8
  from .base import Provider
7
- from .load_provider import load_provider
9
+ from .google import GoogleProvider
10
+ from .mlx import MLXProvider
11
+ from .ollama import OllamaProvider
12
+ from .openai import OpenAIProvider
13
+ from .openai.completions.provider import OpenAICompletionsProvider
14
+ from .openai.responses.provider import OpenAIResponsesProvider
8
15
  from .provider_id import ProviderId
16
+ from .together import TogetherProvider
9
17
 
10
18
  # Global registry mapping scopes to providers
11
19
  # Scopes are matched by prefix (longest match wins)
12
20
  PROVIDER_REGISTRY: dict[str, Provider] = {}
13
21
 
22
+
23
+ def reset_provider_registry() -> None:
24
+ """Resets the provider registry, clearing all registered providers."""
25
+ PROVIDER_REGISTRY.clear()
26
+ provider_singleton.cache_clear()
27
+
28
+
14
29
  # Default auto-registration mapping for built-in providers
15
30
  # These providers will be automatically registered on first use
16
31
  DEFAULT_AUTO_REGISTER_SCOPES: dict[str, ProviderId] = {
17
32
  "anthropic/": "anthropic",
18
33
  "google/": "google",
19
- "openai/": "openai",
20
34
  "mlx-community/": "mlx",
35
+ "ollama/": "ollama",
36
+ "openai/": "openai",
37
+ "together/": "together",
21
38
  }
22
39
 
23
40
 
41
+ @lru_cache(maxsize=256)
42
+ def provider_singleton(
43
+ provider_id: ProviderId, *, api_key: str | None = None, base_url: str | None = None
44
+ ) -> Provider:
45
+ """Create a cached provider instance for the specified provider id.
46
+
47
+ Args:
48
+ provider_id: The provider name ("openai", "anthropic", or "google").
49
+ api_key: API key for authentication. If None, uses provider-specific env var.
50
+ base_url: Base URL for the API. If None, uses provider-specific env var.
51
+
52
+ Returns:
53
+ A cached provider instance for the specified provider with the given parameters.
54
+
55
+ Raises:
56
+ ValueError: If the provider_id is not supported.
57
+ """
58
+ match provider_id:
59
+ case "anthropic":
60
+ return AnthropicProvider(api_key=api_key, base_url=base_url)
61
+ case "google":
62
+ return GoogleProvider(api_key=api_key, base_url=base_url)
63
+ case "mlx": # pragma: no cover (MLX is only available on macOS)
64
+ return MLXProvider()
65
+ case "ollama":
66
+ return OllamaProvider(api_key=api_key, base_url=base_url)
67
+ case "openai":
68
+ return OpenAIProvider(api_key=api_key, base_url=base_url)
69
+ case "openai:completions":
70
+ return OpenAICompletionsProvider(api_key=api_key, base_url=base_url)
71
+ case "openai:responses":
72
+ return OpenAIResponsesProvider(api_key=api_key, base_url=base_url)
73
+ case "together":
74
+ return TogetherProvider(api_key=api_key, base_url=base_url)
75
+ case _: # pragma: no cover
76
+ raise ValueError(f"Unknown provider: '{provider_id}'")
77
+
78
+
24
79
  @overload
25
80
  def register_provider(
26
81
  provider: Provider,
@@ -98,7 +153,7 @@ def register_provider(
98
153
  """
99
154
 
100
155
  if isinstance(provider, str):
101
- provider = load_provider(provider, api_key=api_key, base_url=base_url)
156
+ provider = provider_singleton(provider, api_key=api_key, base_url=base_url)
102
157
 
103
158
  if scope is None:
104
159
  scope = provider.default_scope
@@ -158,7 +213,7 @@ def get_provider_for_model(model_id: str) -> Provider:
158
213
  if matching_defaults:
159
214
  best_scope = max(matching_defaults, key=len)
160
215
  provider_id = DEFAULT_AUTO_REGISTER_SCOPES[best_scope]
161
- provider = load_provider(provider_id)
216
+ provider = provider_singleton(provider_id)
162
217
  # Auto-register for future calls
163
218
  PROVIDER_REGISTRY[best_scope] = provider
164
219
  return provider
@@ -0,0 +1,19 @@
1
+ """Together AI provider implementation."""
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ if TYPE_CHECKING:
6
+ from .provider import TogetherProvider
7
+ else:
8
+ try:
9
+ from .provider import TogetherProvider
10
+ except ImportError: # pragma: no cover
11
+ from .._missing_import_stubs import (
12
+ create_provider_stub,
13
+ )
14
+
15
+ TogetherProvider = create_provider_stub("openai", "TogetherProvider")
16
+
17
+ __all__ = [
18
+ "TogetherProvider",
19
+ ]
@@ -0,0 +1,40 @@
1
+ """Together AI provider implementation."""
2
+
3
+ from typing import ClassVar
4
+
5
+ from ..openai.completions.base_provider import BaseOpenAICompletionsProvider
6
+
7
+
8
+ class TogetherProvider(BaseOpenAICompletionsProvider):
9
+ """Provider for Together AI's OpenAI-compatible API.
10
+
11
+ Inherits from BaseOpenAICompletionsProvider with Together-specific configuration:
12
+ - Uses Together AI's API endpoint
13
+ - Requires TOGETHER_API_KEY
14
+
15
+ Usage:
16
+ Register the provider with model ID prefixes you want to use:
17
+
18
+ ```python
19
+ import llm
20
+
21
+ # Register for meta-llama models
22
+ llm.register_provider("together", "meta-llama/")
23
+
24
+ # Now you can use meta-llama models directly
25
+ @llm.call("meta-llama/Llama-3.3-70B-Instruct-Turbo")
26
+ def my_prompt():
27
+ return [llm.messages.user("Hello!")]
28
+ ```
29
+ """
30
+
31
+ id: ClassVar[str] = "together"
32
+ default_scope: ClassVar[str | list[str]] = []
33
+ default_base_url: ClassVar[str | None] = "https://api.together.xyz/v1"
34
+ api_key_env_var: ClassVar[str] = "TOGETHER_API_KEY"
35
+ api_key_required: ClassVar[bool] = True
36
+ provider_name: ClassVar[str | None] = "Together"
37
+
38
+ def _model_name(self, model_id: str) -> str:
39
+ """Return the model ID as-is for Together API."""
40
+ return model_id
@@ -27,6 +27,7 @@ from .streams import (
27
27
  ThoughtStream,
28
28
  ToolCallStream,
29
29
  )
30
+ from .usage import Usage, UsageDeltaChunk
30
31
 
31
32
  __all__ = [
32
33
  "AsyncChunkIterator",
@@ -53,5 +54,7 @@ __all__ = [
53
54
  "TextStream",
54
55
  "ThoughtStream",
55
56
  "ToolCallStream",
57
+ "Usage",
58
+ "UsageDeltaChunk",
56
59
  "_utils",
57
60
  ]
@@ -9,6 +9,7 @@ from ..messages import AssistantMessage, Message
9
9
  from ..tools import FORMAT_TOOL_NAME, ToolkitT
10
10
  from .finish_reason import FinishReason
11
11
  from .root_response import RootResponse
12
+ from .usage import Usage
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from ..providers import ModelId, Params, ProviderId
@@ -30,6 +31,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
30
31
  input_messages: Sequence[Message],
31
32
  assistant_message: AssistantMessage,
32
33
  finish_reason: FinishReason | None,
34
+ usage: Usage | None,
33
35
  ) -> None:
34
36
  """Initialize a Response.
35
37
 
@@ -45,6 +47,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
45
47
  input_messages: The message history before the final assistant message.
46
48
  assistant_message: The final assistant message containing the response content.
47
49
  finish_reason: The reason why the LLM finished generating a response.
50
+ usage: Token usage statistics for the response.
48
51
  """
49
52
  self.raw = raw
50
53
  self.provider_id = provider_id
@@ -53,6 +56,7 @@ class BaseResponse(RootResponse[ToolkitT, FormattableT]):
53
56
  self.params = params
54
57
  self.toolkit = toolkit
55
58
  self.finish_reason = finish_reason
59
+ self.usage = usage
56
60
  self.format = format
57
61
 
58
62
  # Process content in the assistant message, organizing it by type and
@@ -36,6 +36,7 @@ from .streams import (
36
36
  ThoughtStream,
37
37
  ToolCallStream,
38
38
  )
39
+ from .usage import Usage, UsageDeltaChunk
39
40
 
40
41
  if TYPE_CHECKING:
41
42
  from ..providers import ModelId, Params, ProviderId
@@ -76,7 +77,11 @@ class RawMessageChunk:
76
77
 
77
78
 
78
79
  StreamResponseChunk: TypeAlias = (
79
- AssistantContentChunk | FinishReasonChunk | RawStreamEventChunk | RawMessageChunk
80
+ AssistantContentChunk
81
+ | FinishReasonChunk
82
+ | RawStreamEventChunk
83
+ | RawMessageChunk
84
+ | UsageDeltaChunk
80
85
  )
81
86
 
82
87
  ChunkIterator: TypeAlias = Iterator[StreamResponseChunk]
@@ -165,6 +170,7 @@ class BaseStreamResponse(
165
170
  format: Format[FormattableT] | None = None,
166
171
  input_messages: Sequence[Message],
167
172
  chunk_iterator: ChunkIteratorT,
173
+ usage: Usage | None = None,
168
174
  ) -> None:
169
175
  """Initialize the BaseStreamResponse.
170
176
 
@@ -177,6 +183,7 @@ class BaseStreamResponse(
177
183
  toolkit: Toolkit containing all the tools used to generate the response.
178
184
  format: The `Format` for the expected structured output format (or None).
179
185
  input_messages: The input messages that were sent to the LLM
186
+ usage: Token usage statistics for the response.
180
187
 
181
188
  The BaseStreamResponse will process the tuples to build the chunks and raw lists
182
189
  as the stream is consumed.
@@ -187,6 +194,7 @@ class BaseStreamResponse(
187
194
  self.provider_model_name = provider_model_name
188
195
  self.params = params
189
196
  self.toolkit = toolkit
197
+ self.usage = usage
190
198
  self.format = format
191
199
 
192
200
  # Internal-only lists which we mutate (append) during chunk processing
@@ -475,6 +483,14 @@ class BaseSyncStreamResponse(BaseStreamResponse[ChunkIterator, ToolkitT, Formatt
475
483
  self._assistant_message.raw_message = chunk.raw_message
476
484
  elif chunk.type == "finish_reason_chunk":
477
485
  self.finish_reason = chunk.finish_reason
486
+ elif chunk.type == "usage_delta_chunk":
487
+ if self.usage is None:
488
+ self.usage = Usage()
489
+ self.usage.input_tokens += chunk.input_tokens
490
+ self.usage.output_tokens += chunk.output_tokens
491
+ self.usage.cache_read_tokens += chunk.cache_read_tokens
492
+ self.usage.cache_write_tokens += chunk.cache_write_tokens
493
+ self.usage.reasoning_tokens += chunk.reasoning_tokens
478
494
  else:
479
495
  yield self._handle_chunk(chunk)
480
496
 
@@ -648,6 +664,14 @@ class BaseAsyncStreamResponse(
648
664
  self._assistant_message.raw_message = chunk.raw_message
649
665
  elif chunk.type == "finish_reason_chunk":
650
666
  self.finish_reason = chunk.finish_reason
667
+ elif chunk.type == "usage_delta_chunk":
668
+ if self.usage is None:
669
+ self.usage = Usage()
670
+ self.usage.input_tokens += chunk.input_tokens
671
+ self.usage.output_tokens += chunk.output_tokens
672
+ self.usage.cache_read_tokens += chunk.cache_read_tokens
673
+ self.usage.cache_write_tokens += chunk.cache_write_tokens
674
+ self.usage.reasoning_tokens += chunk.reasoning_tokens
651
675
  else:
652
676
  yield self._handle_chunk(chunk)
653
677
 
@@ -15,6 +15,7 @@ class FinishReason(str, Enum):
15
15
 
16
16
  MAX_TOKENS = "max_tokens"
17
17
  REFUSAL = "refusal"
18
+ CONTEXT_LENGTH_EXCEEDED = "context_length_exceeded"
18
19
 
19
20
 
20
21
  @dataclass(kw_only=True)