mirascope 2.0.0a2__py3-none-any.whl → 2.0.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. mirascope/__init__.py +2 -2
  2. mirascope/api/__init__.py +6 -0
  3. mirascope/api/_generated/README.md +207 -0
  4. mirascope/api/_generated/__init__.py +141 -0
  5. mirascope/api/_generated/client.py +163 -0
  6. mirascope/api/_generated/core/__init__.py +52 -0
  7. mirascope/api/_generated/core/api_error.py +23 -0
  8. mirascope/api/_generated/core/client_wrapper.py +58 -0
  9. mirascope/api/_generated/core/datetime_utils.py +30 -0
  10. mirascope/api/_generated/core/file.py +70 -0
  11. mirascope/api/_generated/core/force_multipart.py +16 -0
  12. mirascope/api/_generated/core/http_client.py +619 -0
  13. mirascope/api/_generated/core/http_response.py +55 -0
  14. mirascope/api/_generated/core/jsonable_encoder.py +102 -0
  15. mirascope/api/_generated/core/pydantic_utilities.py +310 -0
  16. mirascope/api/_generated/core/query_encoder.py +60 -0
  17. mirascope/api/_generated/core/remove_none_from_dict.py +11 -0
  18. mirascope/api/_generated/core/request_options.py +35 -0
  19. mirascope/api/_generated/core/serialization.py +282 -0
  20. mirascope/api/_generated/docs/__init__.py +4 -0
  21. mirascope/api/_generated/docs/client.py +95 -0
  22. mirascope/api/_generated/docs/raw_client.py +132 -0
  23. mirascope/api/_generated/environment.py +9 -0
  24. mirascope/api/_generated/errors/__init__.py +17 -0
  25. mirascope/api/_generated/errors/bad_request_error.py +15 -0
  26. mirascope/api/_generated/errors/conflict_error.py +15 -0
  27. mirascope/api/_generated/errors/forbidden_error.py +15 -0
  28. mirascope/api/_generated/errors/internal_server_error.py +15 -0
  29. mirascope/api/_generated/errors/not_found_error.py +15 -0
  30. mirascope/api/_generated/health/__init__.py +7 -0
  31. mirascope/api/_generated/health/client.py +96 -0
  32. mirascope/api/_generated/health/raw_client.py +129 -0
  33. mirascope/api/_generated/health/types/__init__.py +8 -0
  34. mirascope/api/_generated/health/types/health_check_response.py +24 -0
  35. mirascope/api/_generated/health/types/health_check_response_status.py +5 -0
  36. mirascope/api/_generated/organizations/__init__.py +25 -0
  37. mirascope/api/_generated/organizations/client.py +380 -0
  38. mirascope/api/_generated/organizations/raw_client.py +876 -0
  39. mirascope/api/_generated/organizations/types/__init__.py +23 -0
  40. mirascope/api/_generated/organizations/types/organizations_create_response.py +24 -0
  41. mirascope/api/_generated/organizations/types/organizations_create_response_role.py +7 -0
  42. mirascope/api/_generated/organizations/types/organizations_get_response.py +24 -0
  43. mirascope/api/_generated/organizations/types/organizations_get_response_role.py +7 -0
  44. mirascope/api/_generated/organizations/types/organizations_list_response_item.py +24 -0
  45. mirascope/api/_generated/organizations/types/organizations_list_response_item_role.py +7 -0
  46. mirascope/api/_generated/organizations/types/organizations_update_response.py +24 -0
  47. mirascope/api/_generated/organizations/types/organizations_update_response_role.py +7 -0
  48. mirascope/api/_generated/projects/__init__.py +17 -0
  49. mirascope/api/_generated/projects/client.py +458 -0
  50. mirascope/api/_generated/projects/raw_client.py +1016 -0
  51. mirascope/api/_generated/projects/types/__init__.py +15 -0
  52. mirascope/api/_generated/projects/types/projects_create_response.py +30 -0
  53. mirascope/api/_generated/projects/types/projects_get_response.py +30 -0
  54. mirascope/api/_generated/projects/types/projects_list_response_item.py +30 -0
  55. mirascope/api/_generated/projects/types/projects_update_response.py +30 -0
  56. mirascope/api/_generated/reference.md +753 -0
  57. mirascope/api/_generated/traces/__init__.py +55 -0
  58. mirascope/api/_generated/traces/client.py +162 -0
  59. mirascope/api/_generated/traces/raw_client.py +168 -0
  60. mirascope/api/_generated/traces/types/__init__.py +95 -0
  61. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item.py +36 -0
  62. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_resource.py +31 -0
  63. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_resource_attributes_item.py +25 -0
  64. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_resource_attributes_item_value.py +54 -0
  65. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_resource_attributes_item_value_array_value.py +23 -0
  66. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_resource_attributes_item_value_kvlist_value.py +28 -0
  67. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_resource_attributes_item_value_kvlist_value_values_item.py +24 -0
  68. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item.py +35 -0
  69. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_scope.py +35 -0
  70. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_scope_attributes_item.py +27 -0
  71. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_scope_attributes_item_value.py +54 -0
  72. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_scope_attributes_item_value_array_value.py +23 -0
  73. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_scope_attributes_item_value_kvlist_value.py +28 -0
  74. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_scope_attributes_item_value_kvlist_value_values_item.py +24 -0
  75. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_spans_item.py +60 -0
  76. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_spans_item_attributes_item.py +29 -0
  77. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_spans_item_attributes_item_value.py +54 -0
  78. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_spans_item_attributes_item_value_array_value.py +23 -0
  79. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_spans_item_attributes_item_value_kvlist_value.py +28 -0
  80. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_spans_item_attributes_item_value_kvlist_value_values_item.py +24 -0
  81. mirascope/api/_generated/traces/types/traces_create_request_resource_spans_item_scope_spans_item_spans_item_status.py +24 -0
  82. mirascope/api/_generated/traces/types/traces_create_response.py +27 -0
  83. mirascope/api/_generated/traces/types/traces_create_response_partial_success.py +28 -0
  84. mirascope/api/_generated/types/__init__.py +37 -0
  85. mirascope/api/_generated/types/already_exists_error.py +24 -0
  86. mirascope/api/_generated/types/already_exists_error_tag.py +5 -0
  87. mirascope/api/_generated/types/database_error.py +24 -0
  88. mirascope/api/_generated/types/database_error_tag.py +5 -0
  89. mirascope/api/_generated/types/http_api_decode_error.py +29 -0
  90. mirascope/api/_generated/types/http_api_decode_error_tag.py +5 -0
  91. mirascope/api/_generated/types/issue.py +40 -0
  92. mirascope/api/_generated/types/issue_tag.py +17 -0
  93. mirascope/api/_generated/types/not_found_error_body.py +24 -0
  94. mirascope/api/_generated/types/not_found_error_tag.py +5 -0
  95. mirascope/api/_generated/types/permission_denied_error.py +24 -0
  96. mirascope/api/_generated/types/permission_denied_error_tag.py +7 -0
  97. mirascope/api/_generated/types/property_key.py +7 -0
  98. mirascope/api/_generated/types/property_key_key.py +27 -0
  99. mirascope/api/_generated/types/property_key_key_tag.py +5 -0
  100. mirascope/api/client.py +255 -0
  101. mirascope/api/settings.py +81 -0
  102. mirascope/llm/__init__.py +45 -11
  103. mirascope/llm/calls/calls.py +81 -57
  104. mirascope/llm/calls/decorator.py +121 -115
  105. mirascope/llm/content/__init__.py +3 -2
  106. mirascope/llm/context/_utils.py +19 -6
  107. mirascope/llm/exceptions.py +30 -16
  108. mirascope/llm/formatting/_utils.py +9 -5
  109. mirascope/llm/formatting/format.py +2 -2
  110. mirascope/llm/formatting/from_call_args.py +2 -2
  111. mirascope/llm/messages/message.py +13 -5
  112. mirascope/llm/models/__init__.py +2 -2
  113. mirascope/llm/models/models.py +189 -81
  114. mirascope/llm/prompts/__init__.py +13 -12
  115. mirascope/llm/prompts/_utils.py +27 -24
  116. mirascope/llm/prompts/decorator.py +133 -204
  117. mirascope/llm/prompts/prompts.py +424 -0
  118. mirascope/llm/prompts/protocols.py +25 -59
  119. mirascope/llm/providers/__init__.py +44 -0
  120. mirascope/llm/{clients → providers}/_missing_import_stubs.py +8 -6
  121. mirascope/llm/providers/anthropic/__init__.py +29 -0
  122. mirascope/llm/providers/anthropic/_utils/__init__.py +23 -0
  123. mirascope/llm/providers/anthropic/_utils/beta_decode.py +271 -0
  124. mirascope/llm/providers/anthropic/_utils/beta_encode.py +216 -0
  125. mirascope/llm/{clients → providers}/anthropic/_utils/decode.py +44 -11
  126. mirascope/llm/providers/anthropic/_utils/encode.py +356 -0
  127. mirascope/llm/providers/anthropic/beta_provider.py +322 -0
  128. mirascope/llm/providers/anthropic/model_id.py +23 -0
  129. mirascope/llm/providers/anthropic/model_info.py +87 -0
  130. mirascope/llm/providers/anthropic/provider.py +416 -0
  131. mirascope/llm/{clients → providers}/base/__init__.py +3 -3
  132. mirascope/llm/{clients → providers}/base/_utils.py +25 -8
  133. mirascope/llm/{clients/base/client.py → providers/base/base_provider.py} +255 -126
  134. mirascope/llm/providers/google/__init__.py +21 -0
  135. mirascope/llm/{clients → providers}/google/_utils/decode.py +61 -7
  136. mirascope/llm/{clients → providers}/google/_utils/encode.py +44 -30
  137. mirascope/llm/providers/google/model_id.py +22 -0
  138. mirascope/llm/providers/google/model_info.py +62 -0
  139. mirascope/llm/providers/google/provider.py +442 -0
  140. mirascope/llm/providers/load_provider.py +54 -0
  141. mirascope/llm/providers/mlx/__init__.py +24 -0
  142. mirascope/llm/providers/mlx/_utils.py +129 -0
  143. mirascope/llm/providers/mlx/encoding/__init__.py +8 -0
  144. mirascope/llm/providers/mlx/encoding/base.py +69 -0
  145. mirascope/llm/providers/mlx/encoding/transformers.py +147 -0
  146. mirascope/llm/providers/mlx/mlx.py +237 -0
  147. mirascope/llm/providers/mlx/model_id.py +17 -0
  148. mirascope/llm/providers/mlx/provider.py +415 -0
  149. mirascope/llm/providers/model_id.py +16 -0
  150. mirascope/llm/providers/ollama/__init__.py +19 -0
  151. mirascope/llm/providers/ollama/provider.py +71 -0
  152. mirascope/llm/providers/openai/__init__.py +6 -0
  153. mirascope/llm/providers/openai/completions/__init__.py +25 -0
  154. mirascope/llm/{clients → providers}/openai/completions/_utils/__init__.py +2 -0
  155. mirascope/llm/{clients → providers}/openai/completions/_utils/decode.py +60 -6
  156. mirascope/llm/{clients → providers}/openai/completions/_utils/encode.py +37 -26
  157. mirascope/llm/providers/openai/completions/base_provider.py +513 -0
  158. mirascope/llm/providers/openai/completions/provider.py +22 -0
  159. mirascope/llm/providers/openai/model_id.py +31 -0
  160. mirascope/llm/providers/openai/model_info.py +303 -0
  161. mirascope/llm/providers/openai/provider.py +398 -0
  162. mirascope/llm/providers/openai/responses/__init__.py +21 -0
  163. mirascope/llm/{clients → providers}/openai/responses/_utils/decode.py +59 -6
  164. mirascope/llm/{clients → providers}/openai/responses/_utils/encode.py +34 -23
  165. mirascope/llm/providers/openai/responses/provider.py +469 -0
  166. mirascope/llm/providers/provider_id.py +23 -0
  167. mirascope/llm/providers/provider_registry.py +169 -0
  168. mirascope/llm/providers/together/__init__.py +19 -0
  169. mirascope/llm/providers/together/provider.py +40 -0
  170. mirascope/llm/responses/__init__.py +3 -0
  171. mirascope/llm/responses/base_response.py +14 -5
  172. mirascope/llm/responses/base_stream_response.py +35 -6
  173. mirascope/llm/responses/finish_reason.py +1 -0
  174. mirascope/llm/responses/response.py +33 -13
  175. mirascope/llm/responses/root_response.py +12 -13
  176. mirascope/llm/responses/stream_response.py +35 -23
  177. mirascope/llm/responses/usage.py +95 -0
  178. mirascope/llm/tools/__init__.py +9 -2
  179. mirascope/llm/tools/_utils.py +12 -3
  180. mirascope/llm/tools/protocols.py +4 -4
  181. mirascope/llm/tools/tool_schema.py +44 -9
  182. mirascope/llm/tools/tools.py +10 -9
  183. mirascope/ops/__init__.py +156 -0
  184. mirascope/ops/_internal/__init__.py +5 -0
  185. mirascope/ops/_internal/closure.py +1118 -0
  186. mirascope/ops/_internal/configuration.py +126 -0
  187. mirascope/ops/_internal/context.py +76 -0
  188. mirascope/ops/_internal/exporters/__init__.py +26 -0
  189. mirascope/ops/_internal/exporters/exporters.py +342 -0
  190. mirascope/ops/_internal/exporters/processors.py +104 -0
  191. mirascope/ops/_internal/exporters/types.py +165 -0
  192. mirascope/ops/_internal/exporters/utils.py +29 -0
  193. mirascope/ops/_internal/instrumentation/__init__.py +8 -0
  194. mirascope/ops/_internal/instrumentation/llm/__init__.py +8 -0
  195. mirascope/ops/_internal/instrumentation/llm/encode.py +238 -0
  196. mirascope/ops/_internal/instrumentation/llm/gen_ai_types/__init__.py +38 -0
  197. mirascope/ops/_internal/instrumentation/llm/gen_ai_types/gen_ai_input_messages.py +31 -0
  198. mirascope/ops/_internal/instrumentation/llm/gen_ai_types/gen_ai_output_messages.py +38 -0
  199. mirascope/ops/_internal/instrumentation/llm/gen_ai_types/gen_ai_system_instructions.py +18 -0
  200. mirascope/ops/_internal/instrumentation/llm/gen_ai_types/shared.py +100 -0
  201. mirascope/ops/_internal/instrumentation/llm/llm.py +1288 -0
  202. mirascope/ops/_internal/propagation.py +198 -0
  203. mirascope/ops/_internal/protocols.py +51 -0
  204. mirascope/ops/_internal/session.py +139 -0
  205. mirascope/ops/_internal/spans.py +232 -0
  206. mirascope/ops/_internal/traced_calls.py +371 -0
  207. mirascope/ops/_internal/traced_functions.py +394 -0
  208. mirascope/ops/_internal/tracing.py +276 -0
  209. mirascope/ops/_internal/types.py +13 -0
  210. mirascope/ops/_internal/utils.py +75 -0
  211. mirascope/ops/_internal/versioned_calls.py +512 -0
  212. mirascope/ops/_internal/versioned_functions.py +346 -0
  213. mirascope/ops/_internal/versioning.py +303 -0
  214. mirascope/ops/exceptions.py +21 -0
  215. {mirascope-2.0.0a2.dist-info → mirascope-2.0.0a4.dist-info}/METADATA +78 -3
  216. mirascope-2.0.0a4.dist-info/RECORD +247 -0
  217. {mirascope-2.0.0a2.dist-info → mirascope-2.0.0a4.dist-info}/WHEEL +1 -1
  218. mirascope/graphs/__init__.py +0 -22
  219. mirascope/graphs/finite_state_machine.py +0 -625
  220. mirascope/llm/agents/__init__.py +0 -15
  221. mirascope/llm/agents/agent.py +0 -97
  222. mirascope/llm/agents/agent_template.py +0 -45
  223. mirascope/llm/agents/decorator.py +0 -176
  224. mirascope/llm/calls/base_call.py +0 -33
  225. mirascope/llm/clients/__init__.py +0 -34
  226. mirascope/llm/clients/anthropic/__init__.py +0 -25
  227. mirascope/llm/clients/anthropic/_utils/encode.py +0 -243
  228. mirascope/llm/clients/anthropic/clients.py +0 -819
  229. mirascope/llm/clients/anthropic/model_ids.py +0 -8
  230. mirascope/llm/clients/google/__init__.py +0 -20
  231. mirascope/llm/clients/google/clients.py +0 -853
  232. mirascope/llm/clients/google/model_ids.py +0 -15
  233. mirascope/llm/clients/openai/__init__.py +0 -25
  234. mirascope/llm/clients/openai/completions/__init__.py +0 -28
  235. mirascope/llm/clients/openai/completions/_utils/model_features.py +0 -81
  236. mirascope/llm/clients/openai/completions/clients.py +0 -833
  237. mirascope/llm/clients/openai/completions/model_ids.py +0 -8
  238. mirascope/llm/clients/openai/responses/__init__.py +0 -26
  239. mirascope/llm/clients/openai/responses/_utils/__init__.py +0 -13
  240. mirascope/llm/clients/openai/responses/_utils/model_features.py +0 -87
  241. mirascope/llm/clients/openai/responses/clients.py +0 -832
  242. mirascope/llm/clients/openai/responses/model_ids.py +0 -8
  243. mirascope/llm/clients/openai/shared/__init__.py +0 -7
  244. mirascope/llm/clients/openai/shared/_utils.py +0 -55
  245. mirascope/llm/clients/providers.py +0 -175
  246. mirascope-2.0.0a2.dist-info/RECORD +0 -102
  247. /mirascope/llm/{clients → providers}/base/kwargs.py +0 -0
  248. /mirascope/llm/{clients → providers}/base/params.py +0 -0
  249. /mirascope/llm/{clients/anthropic → providers/google}/_utils/__init__.py +0 -0
  250. /mirascope/llm/{clients → providers}/google/message.py +0 -0
  251. /mirascope/llm/{clients/google → providers/openai/responses}/_utils/__init__.py +0 -0
  252. {mirascope-2.0.0a2.dist-info → mirascope-2.0.0a4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,69 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ from collections.abc import Iterable, Sequence
5
+ from typing import TypeAlias
6
+
7
+ from mlx_lm.generate import GenerationResponse
8
+
9
+ from ....formatting import Format, FormattableT
10
+ from ....messages import AssistantContent, Message
11
+ from ....responses import ChunkIterator
12
+ from ....tools import AnyToolSchema, BaseToolkit
13
+
14
+ TokenIds: TypeAlias = list[int]
15
+
16
+
17
+ class BaseEncoder(abc.ABC):
18
+ """Abstract base class for Mirascope <> MLX encoding and decoding."""
19
+
20
+ @abc.abstractmethod
21
+ def encode_request(
22
+ self,
23
+ messages: Sequence[Message],
24
+ tools: Sequence[AnyToolSchema] | BaseToolkit[AnyToolSchema] | None,
25
+ format: type[FormattableT] | Format[FormattableT] | None,
26
+ ) -> tuple[Sequence[Message], Format[FormattableT] | None, TokenIds]:
27
+ """Encode the request messages into a format suitable for the model.
28
+
29
+ Args:
30
+ messages: The sequence of messages to encode.
31
+ tools: Optional sequence of tool schemas or toolkit for the model.
32
+ format: Optional format specification for structured outputs.
33
+
34
+ Returns:
35
+ A tuple containing:
36
+ - The processed messages
37
+ - The format specification (if provided)
38
+ - The encoded prompt as token IDs
39
+ """
40
+
41
+ ...
42
+
43
+ @abc.abstractmethod
44
+ def decode_response(
45
+ self, stream: Iterable[GenerationResponse]
46
+ ) -> tuple[AssistantContent, GenerationResponse | None]:
47
+ """Decode a stream of MLX generation responses into assistant content.
48
+
49
+ Args:
50
+ stream: An iterable of MLX generation responses.
51
+
52
+ Returns:
53
+ A tuple containing:
54
+ - The decoded assistant content
55
+ - The final generation response (if available)
56
+ """
57
+ ...
58
+
59
+ @abc.abstractmethod
60
+ def decode_stream(self, stream: Iterable[GenerationResponse]) -> ChunkIterator:
61
+ """Decode a stream of MLX generation responses into an iterable of chunks.
62
+
63
+ Args:
64
+ stream: An iterable of MLX generation responses.
65
+
66
+ Returns:
67
+ A ChunkIterator yielding content chunks for streaming responses.
68
+ """
69
+ ...
@@ -0,0 +1,147 @@
1
+ import io
2
+ from collections.abc import Iterable, Sequence
3
+ from dataclasses import dataclass
4
+ from typing import Literal, cast
5
+ from typing_extensions import TypedDict
6
+
7
+ from mlx_lm.generate import GenerationResponse
8
+ from transformers import PreTrainedTokenizer
9
+
10
+ from ....content import ContentPart, TextChunk, TextEndChunk, TextStartChunk
11
+ from ....formatting import Format, FormattableT
12
+ from ....messages import AssistantContent, Message
13
+ from ....responses import (
14
+ ChunkIterator,
15
+ FinishReasonChunk,
16
+ RawStreamEventChunk,
17
+ UsageDeltaChunk,
18
+ )
19
+ from ....tools import AnyToolSchema, BaseToolkit
20
+ from .. import _utils
21
+ from .base import BaseEncoder, TokenIds
22
+
23
+ HFRole = Literal["system", "user", "assistant"] | str
24
+
25
+
26
+ class TransformersMessage(TypedDict):
27
+ """Message in Transformers format."""
28
+
29
+ role: HFRole
30
+ content: str
31
+
32
+
33
+ def _encode_content(content: Sequence[ContentPart]) -> str:
34
+ """Encode content parts into a string.
35
+
36
+ Args:
37
+ content: The sequence of content parts to encode.
38
+
39
+ Returns:
40
+ The encoded content as a string.
41
+
42
+ Raises:
43
+ NotImplementedError: If content contains non-text parts.
44
+ """
45
+ if len(content) == 1 and content[0].type == "text":
46
+ return content[0].text
47
+
48
+ raise NotImplementedError("Only text content is supported in this example.")
49
+
50
+
51
+ def _encode_message(message: Message) -> TransformersMessage:
52
+ """Encode a Mirascope message into Transformers format.
53
+
54
+ Args:
55
+ message: The message to encode.
56
+
57
+ Returns:
58
+ The encoded message in Transformers format.
59
+
60
+ Raises:
61
+ ValueError: If the message role is not supported.
62
+ """
63
+ if message.role == "system":
64
+ return TransformersMessage(role="system", content=message.content.text)
65
+ elif message.role == "assistant" or message.role == "user":
66
+ return TransformersMessage(
67
+ role=message.role, content=_encode_content(message.content)
68
+ )
69
+ else:
70
+ raise ValueError(f"Unsupported message type: {type(message)}")
71
+
72
+
73
+ @dataclass(frozen=True)
74
+ class TransformersEncoder(BaseEncoder):
75
+ """Encoder for Transformers models."""
76
+
77
+ tokenizer: PreTrainedTokenizer
78
+ """The tokenizer to use for encoding."""
79
+
80
+ def encode_request(
81
+ self,
82
+ messages: Sequence[Message],
83
+ tools: Sequence[AnyToolSchema] | BaseToolkit[AnyToolSchema] | None,
84
+ format: type[FormattableT] | Format[FormattableT] | None,
85
+ ) -> tuple[Sequence[Message], Format[FormattableT] | None, TokenIds]:
86
+ """Encode a request into a format suitable for the model."""
87
+ tool_schemas = tools.tools if isinstance(tools, BaseToolkit) else tools or []
88
+ if len(tool_schemas) > 0:
89
+ raise NotImplementedError("Tool usage is not supported.")
90
+ if format is not None:
91
+ raise NotImplementedError("Formatting is not supported.")
92
+
93
+ hf_messages: list[TransformersMessage] = [
94
+ _encode_message(msg) for msg in messages
95
+ ]
96
+ prompt_text = cast(
97
+ str,
98
+ self.tokenizer.apply_chat_template( # pyright: ignore[reportUnknownMemberType]
99
+ cast(list[dict[str, str]], hf_messages),
100
+ tokenize=False,
101
+ add_generation_prompt=True,
102
+ ),
103
+ )
104
+ return (
105
+ messages,
106
+ format,
107
+ self.tokenizer.encode(prompt_text, add_special_tokens=False), # pyright: ignore[reportUnknownMemberType]
108
+ )
109
+
110
+ def decode_response(
111
+ self, stream: Iterable[GenerationResponse]
112
+ ) -> tuple[AssistantContent, GenerationResponse | None]:
113
+ """Decode a response into a format suitable for the model."""
114
+ with io.StringIO() as buffer:
115
+ last_response: GenerationResponse | None = None
116
+ for response in stream:
117
+ buffer.write(response.text)
118
+ last_response = response
119
+
120
+ return buffer.getvalue(), last_response
121
+
122
+ def decode_stream(self, stream: Iterable[GenerationResponse]) -> ChunkIterator:
123
+ """Decode a stream of responses into a format suitable for the model."""
124
+ yield TextStartChunk()
125
+
126
+ response: GenerationResponse | None = None
127
+ for response in stream:
128
+ yield RawStreamEventChunk(raw_stream_event=response)
129
+ yield TextChunk(delta=response.text)
130
+
131
+ assert response is not None
132
+ finish_reason = _utils.extract_finish_reason(response)
133
+ if finish_reason is not None:
134
+ yield FinishReasonChunk(finish_reason=finish_reason)
135
+ else:
136
+ yield TextEndChunk()
137
+
138
+ # Emit usage delta if available
139
+ usage = _utils.extract_usage(response)
140
+ if usage:
141
+ yield UsageDeltaChunk(
142
+ input_tokens=usage.input_tokens,
143
+ output_tokens=usage.output_tokens,
144
+ cache_read_tokens=usage.cache_read_tokens,
145
+ cache_write_tokens=usage.cache_write_tokens,
146
+ reasoning_tokens=usage.reasoning_tokens,
147
+ )
@@ -0,0 +1,237 @@
1
+ import asyncio
2
+ import threading
3
+ from collections.abc import Iterable, Sequence
4
+ from dataclasses import dataclass, field
5
+ from typing_extensions import Unpack
6
+
7
+ import mlx.core as mx
8
+ import mlx.nn as nn
9
+ from mlx_lm import stream_generate # type: ignore[reportPrivateImportUsage]
10
+ from mlx_lm.generate import GenerationResponse
11
+ from transformers import PreTrainedTokenizer
12
+
13
+ from ...formatting import Format, FormattableT
14
+ from ...messages import AssistantMessage, Message, assistant
15
+ from ...responses import AsyncChunkIterator, ChunkIterator, StreamResponseChunk
16
+ from ...tools import AnyToolSchema, BaseToolkit
17
+ from ..base import Params
18
+ from . import _utils
19
+ from .encoding import BaseEncoder, TokenIds
20
+ from .model_id import MLXModelId
21
+
22
+
23
+ def _consume_sync_stream_into_queue(
24
+ generation_stream: ChunkIterator,
25
+ loop: asyncio.AbstractEventLoop,
26
+ queue: asyncio.Queue[StreamResponseChunk | Exception | None],
27
+ ) -> None:
28
+ """Consume a synchronous stream and put chunks into an async queue.
29
+
30
+ Args:
31
+ sync_stream: The synchronous chunk iterator to consume.
32
+ loop: The event loop for scheduling queue operations.
33
+ queue: The async queue to put chunks into.
34
+ """
35
+ try:
36
+ for response in generation_stream:
37
+ asyncio.run_coroutine_threadsafe(queue.put(response), loop)
38
+ except Exception as e:
39
+ asyncio.run_coroutine_threadsafe(queue.put(e), loop)
40
+
41
+ asyncio.run_coroutine_threadsafe(queue.put(None), loop)
42
+
43
+
44
+ @dataclass(frozen=True)
45
+ class MLX:
46
+ """MLX model wrapper for synchronous and asynchronous generation.
47
+
48
+ Args:
49
+ model_id: The MLX model identifier.
50
+ model: The underlying MLX model.
51
+ tokenizer: The tokenizer for the model.
52
+ encoder: The encoder for prompts and responses.
53
+ """
54
+
55
+ model_id: MLXModelId
56
+ """The MLX model identifier."""
57
+
58
+ model: nn.Module
59
+ """The underlying MLX model."""
60
+
61
+ tokenizer: PreTrainedTokenizer
62
+ """The tokenizer for the model."""
63
+
64
+ encoder: BaseEncoder
65
+ """The encoder for prompts and responses."""
66
+
67
+ _lock: threading.Lock = field(default_factory=threading.Lock)
68
+ """The lock for thread-safety."""
69
+
70
+ def _stream_generate(
71
+ self,
72
+ prompt: TokenIds,
73
+ seed: int | None,
74
+ **kwargs: Unpack[_utils.StreamGenerateKwargs],
75
+ ) -> Iterable[GenerationResponse]:
76
+ """Generator that streams generation responses.
77
+
78
+ Using this generator instead of calling stream_generate directly ensures
79
+ thread-safety when using the model in a multi-threaded context.
80
+ """
81
+ with self._lock:
82
+ if seed is not None:
83
+ mx.random.seed(seed)
84
+
85
+ return stream_generate(
86
+ self.model,
87
+ self.tokenizer,
88
+ prompt,
89
+ **kwargs,
90
+ )
91
+
92
+ async def _stream_generate_async(
93
+ self,
94
+ prompt: TokenIds,
95
+ seed: int | None,
96
+ **kwargs: Unpack[_utils.StreamGenerateKwargs],
97
+ ) -> AsyncChunkIterator:
98
+ """Async generator that streams generation responses.
99
+
100
+ Note that, while stream_generate returns an iterable of GenerationResponse,
101
+ here we return an `AsyncChunkIterator`, in order to avoid having to implement
102
+ both synchronous and asynchronous versions of BaseEncoder.decode_stream.
103
+ This makes sense as in this case, there is nothing to gain from consuming the
104
+ generation asyncnronously.
105
+ """
106
+ loop = asyncio.get_running_loop()
107
+ generation_queue: asyncio.Queue[StreamResponseChunk | Exception | None] = (
108
+ asyncio.Queue()
109
+ )
110
+
111
+ sync_stream = self.encoder.decode_stream(
112
+ self._stream_generate(
113
+ prompt,
114
+ seed,
115
+ **kwargs,
116
+ )
117
+ )
118
+
119
+ consume_task = asyncio.create_task(
120
+ asyncio.to_thread(
121
+ _consume_sync_stream_into_queue, sync_stream, loop, generation_queue
122
+ ),
123
+ )
124
+ while item := await generation_queue.get():
125
+ if isinstance(item, Exception):
126
+ raise item
127
+
128
+ yield item
129
+
130
+ await consume_task
131
+
132
+ def stream(
133
+ self,
134
+ messages: Sequence[Message],
135
+ tools: Sequence[AnyToolSchema] | BaseToolkit[AnyToolSchema] | None,
136
+ format: type[FormattableT] | Format[FormattableT] | None,
137
+ params: Params,
138
+ ) -> tuple[Sequence[Message], Format[FormattableT] | None, ChunkIterator]:
139
+ """Stream response chunks synchronously.
140
+
141
+ Args:
142
+ messages: The input messages.
143
+ tools: Optional tools for the model.
144
+ format: Optional response format.
145
+
146
+ Returns:
147
+ Tuple of messages, format, and chunk iterator.
148
+ """
149
+ messages, format, prompt = self.encoder.encode_request(messages, tools, format)
150
+ seed, kwargs = _utils.encode_params(params)
151
+
152
+ stream = self._stream_generate(prompt, seed, **kwargs)
153
+ return messages, format, self.encoder.decode_stream(stream)
154
+
155
+ async def stream_async(
156
+ self,
157
+ messages: Sequence[Message],
158
+ tools: Sequence[AnyToolSchema] | BaseToolkit[AnyToolSchema] | None,
159
+ format: type[FormattableT] | Format[FormattableT] | None,
160
+ params: Params,
161
+ ) -> tuple[Sequence[Message], Format[FormattableT] | None, AsyncChunkIterator]:
162
+ """Stream response chunks asynchronously.
163
+
164
+ Args:
165
+ messages: The input messages.
166
+ tools: Optional tools for the model.
167
+ format: Optional response format.
168
+ Returns:
169
+ Tuple of messages, format, and async chunk iterator.
170
+ """
171
+ messages, format, prompt = await asyncio.to_thread(
172
+ self.encoder.encode_request, messages, tools, format
173
+ )
174
+ seed, kwargs = _utils.encode_params(params)
175
+
176
+ chunk_iterator = self._stream_generate_async(prompt, seed, **kwargs)
177
+ return messages, format, chunk_iterator
178
+
179
+ def generate(
180
+ self,
181
+ messages: Sequence[Message],
182
+ tools: Sequence[AnyToolSchema] | BaseToolkit[AnyToolSchema] | None,
183
+ format: type[FormattableT] | Format[FormattableT] | None,
184
+ params: Params,
185
+ ) -> tuple[
186
+ Sequence[Message],
187
+ Format[FormattableT] | None,
188
+ AssistantMessage,
189
+ GenerationResponse | None,
190
+ ]:
191
+ """Generate a response synchronously.
192
+
193
+ Args:
194
+ messages: The input messages.
195
+ tools: Optional tools for the model.
196
+ format: Optional response format.
197
+ params: Generation parameters.
198
+ Returns:
199
+ Tuple of messages, format, assistant message, and last generation response.
200
+ """
201
+ messages, format, prompt = self.encoder.encode_request(messages, tools, format)
202
+ seed, kwargs = _utils.encode_params(params)
203
+
204
+ stream = self._stream_generate(prompt, seed, **kwargs)
205
+ assistant_content, last_response = self.encoder.decode_response(stream)
206
+ assistant_message = assistant(
207
+ content=assistant_content,
208
+ model_id=self.model_id,
209
+ provider_id="mlx",
210
+ raw_message=None,
211
+ name=None,
212
+ )
213
+ return messages, format, assistant_message, last_response
214
+
215
+ async def generate_async(
216
+ self,
217
+ messages: Sequence[Message],
218
+ tools: Sequence[AnyToolSchema] | BaseToolkit[AnyToolSchema] | None,
219
+ format: type[FormattableT] | Format[FormattableT] | None,
220
+ params: Params,
221
+ ) -> tuple[
222
+ Sequence[Message],
223
+ Format[FormattableT] | None,
224
+ AssistantMessage,
225
+ GenerationResponse | None,
226
+ ]:
227
+ """Generate a response asynchronously.
228
+
229
+ Args:
230
+ messages: The input messages.
231
+ tools: Optional tools for the model.
232
+ format: Optional response format.
233
+ params: Generation parameters.
234
+ Returns:
235
+ Tuple of messages, format, assistant message, and last generation response.
236
+ """
237
+ return await asyncio.to_thread(self.generate, messages, tools, format, params)
@@ -0,0 +1,17 @@
1
+ from typing import TypeAlias
2
+
3
+ # TODO: Add more explicit literals
4
+ # TODO: Ensure automatic model downloads are supported.
5
+ # TODO: Ensure instructions are clear for examples that run as copied
6
+ MLXModelId: TypeAlias = str
7
+ """The identifier of the MLX model to be loaded by the MLX client.
8
+
9
+ An MLX model identifier might be a local path to a model's file, or a huggingface
10
+ repository such as:
11
+ - "mlx-community/Qwen3-8B-4bit-DWQ-053125"
12
+ - "mlx-community/gpt-oss-20b-MXFP4-Q8"
13
+
14
+ For more details, see:
15
+ - https://github.com/ml-explore/mlx-lm/?tab=readme-ov-file#supported-models
16
+ - https://huggingface.co/mlx-community
17
+ """