ommlds 0.0.0.dev465__py3-none-any.whl → 0.0.0.dev467__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ommlds might be problematic. Click here for more details.
- ommlds/.omlish-manifests.json +22 -6
- ommlds/backends/google/protocol/types.py +4 -1
- ommlds/cli/sessions/chat/backends/catalog.py +1 -1
- ommlds/cli/sessions/chat/chat/ai/services.py +5 -7
- ommlds/minichain/__init__.py +6 -0
- ommlds/minichain/backends/impls/anthropic/stream.py +17 -0
- ommlds/minichain/backends/impls/google/stream.py +105 -20
- ommlds/minichain/backends/impls/mlx/chat.py +95 -21
- ommlds/minichain/backends/impls/openai/chat.py +2 -2
- ommlds/minichain/backends/impls/openai/format.py +108 -104
- ommlds/minichain/backends/impls/openai/stream.py +14 -13
- ommlds/minichain/chat/stream/adapters.py +5 -50
- ommlds/minichain/chat/stream/joining.py +96 -0
- ommlds/minichain/chat/stream/types.py +20 -4
- {ommlds-0.0.0.dev465.dist-info → ommlds-0.0.0.dev467.dist-info}/METADATA +3 -3
- {ommlds-0.0.0.dev465.dist-info → ommlds-0.0.0.dev467.dist-info}/RECORD +20 -20
- ommlds/minichain/backends/impls/openai/format2.py +0 -210
- {ommlds-0.0.0.dev465.dist-info → ommlds-0.0.0.dev467.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev465.dist-info → ommlds-0.0.0.dev467.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev465.dist-info → ommlds-0.0.0.dev467.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev465.dist-info → ommlds-0.0.0.dev467.dist-info}/top_level.txt +0 -0
ommlds/.omlish-manifests.json
CHANGED
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
"module": ".minichain.backends.impls.anthropic.stream",
|
|
64
64
|
"attr": null,
|
|
65
65
|
"file": "ommlds/minichain/backends/impls/anthropic/stream.py",
|
|
66
|
-
"line":
|
|
66
|
+
"line": 36,
|
|
67
67
|
"value": {
|
|
68
68
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
69
69
|
"module": "ommlds.minichain.backends.impls.anthropic.stream",
|
|
@@ -137,7 +137,7 @@
|
|
|
137
137
|
"module": ".minichain.backends.impls.google.stream",
|
|
138
138
|
"attr": null,
|
|
139
139
|
"file": "ommlds/minichain/backends/impls/google/stream.py",
|
|
140
|
-
"line":
|
|
140
|
+
"line": 41,
|
|
141
141
|
"value": {
|
|
142
142
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
143
143
|
"module": "ommlds.minichain.backends.impls.google.stream",
|
|
@@ -229,11 +229,12 @@
|
|
|
229
229
|
"module": ".minichain.backends.impls.mlx.chat",
|
|
230
230
|
"attr": null,
|
|
231
231
|
"file": "ommlds/minichain/backends/impls/mlx/chat.py",
|
|
232
|
-
"line":
|
|
232
|
+
"line": 39,
|
|
233
233
|
"value": {
|
|
234
234
|
"!.minichain.backends.strings.manifests.BackendStringsManifest": {
|
|
235
235
|
"service_cls_names": [
|
|
236
|
-
"ChatChoicesService"
|
|
236
|
+
"ChatChoicesService",
|
|
237
|
+
"ChatChoicesStreamService"
|
|
237
238
|
],
|
|
238
239
|
"backend_name": "mlx",
|
|
239
240
|
"model_names": null
|
|
@@ -244,7 +245,7 @@
|
|
|
244
245
|
"module": ".minichain.backends.impls.mlx.chat",
|
|
245
246
|
"attr": null,
|
|
246
247
|
"file": "ommlds/minichain/backends/impls/mlx/chat.py",
|
|
247
|
-
"line":
|
|
248
|
+
"line": 133,
|
|
248
249
|
"value": {
|
|
249
250
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
250
251
|
"module": "ommlds.minichain.backends.impls.mlx.chat",
|
|
@@ -255,6 +256,21 @@
|
|
|
255
256
|
}
|
|
256
257
|
}
|
|
257
258
|
},
|
|
259
|
+
{
|
|
260
|
+
"module": ".minichain.backends.impls.mlx.chat",
|
|
261
|
+
"attr": null,
|
|
262
|
+
"file": "ommlds/minichain/backends/impls/mlx/chat.py",
|
|
263
|
+
"line": 164,
|
|
264
|
+
"value": {
|
|
265
|
+
"!.minichain.registries.manifests.RegistryManifest": {
|
|
266
|
+
"module": "ommlds.minichain.backends.impls.mlx.chat",
|
|
267
|
+
"attr": "MlxChatChoicesStreamService",
|
|
268
|
+
"name": "mlx",
|
|
269
|
+
"aliases": null,
|
|
270
|
+
"type": "ChatChoicesStreamService"
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
},
|
|
258
274
|
{
|
|
259
275
|
"module": ".minichain.backends.impls.openai.chat",
|
|
260
276
|
"attr": null,
|
|
@@ -356,7 +372,7 @@
|
|
|
356
372
|
"module": ".minichain.backends.impls.openai.stream",
|
|
357
373
|
"attr": null,
|
|
358
374
|
"file": "ommlds/minichain/backends/impls/openai/stream.py",
|
|
359
|
-
"line":
|
|
375
|
+
"line": 38,
|
|
360
376
|
"value": {
|
|
361
377
|
"!.minichain.registries.manifests.RegistryManifest": {
|
|
362
378
|
"module": "ommlds.minichain.backends.impls.openai.stream",
|
|
@@ -148,11 +148,14 @@ class Part(lang.Final):
|
|
|
148
148
|
video_metadata: VideoMetadata | None = None
|
|
149
149
|
|
|
150
150
|
|
|
151
|
+
ContentRole: ta.TypeAlias = ta.Literal['user', 'model']
|
|
152
|
+
|
|
153
|
+
|
|
151
154
|
@dc.dataclass(frozen=True, kw_only=True)
|
|
152
155
|
@_set_class_marshal_options
|
|
153
156
|
class Content(lang.Final):
|
|
154
157
|
parts: ta.Sequence[Part] | None = None
|
|
155
|
-
role:
|
|
158
|
+
role: ContentRole | None = None
|
|
156
159
|
|
|
157
160
|
|
|
158
161
|
##
|
|
@@ -32,7 +32,7 @@ class _CatalogBackendProvider(BackendProvider[ServiceT], lang.Abstract):
|
|
|
32
32
|
@contextlib.asynccontextmanager
|
|
33
33
|
async def _provide_backend(self, cls: type[ServiceT]) -> ta.AsyncIterator[ServiceT]:
|
|
34
34
|
service: ServiceT
|
|
35
|
-
async with lang.
|
|
35
|
+
async with lang.async_or_sync_maybe_managing(self._catalog.get_backend(
|
|
36
36
|
cls,
|
|
37
37
|
self._name,
|
|
38
38
|
*(self._configs or []),
|
|
@@ -63,19 +63,17 @@ class ChatChoicesStreamServiceStreamAiChatGenerator(StreamAiChatGenerator):
|
|
|
63
63
|
) -> mc.AiChat:
|
|
64
64
|
opts = self._options() if self._options is not None else []
|
|
65
65
|
|
|
66
|
-
lst: list[str] = []
|
|
67
|
-
|
|
68
66
|
async with self._service_provider.provide_backend() as service:
|
|
67
|
+
joiner = mc.AiChoiceDeltaJoiner()
|
|
68
|
+
|
|
69
69
|
async with (await service.invoke(mc.ChatChoicesStreamRequest(chat, opts))).v as st_resp:
|
|
70
70
|
async for o in st_resp:
|
|
71
|
+
joiner.add(o.choices)
|
|
72
|
+
|
|
71
73
|
choice = check.single(o.choices)
|
|
72
74
|
|
|
73
75
|
for delta in choice.deltas:
|
|
74
76
|
if delta_callback is not None:
|
|
75
77
|
await delta_callback(delta)
|
|
76
78
|
|
|
77
|
-
|
|
78
|
-
if c is not None:
|
|
79
|
-
lst.append(check.isinstance(c, str))
|
|
80
|
-
|
|
81
|
-
return [mc.AiMessage(''.join(lst))]
|
|
79
|
+
return check.single(joiner.build())
|
ommlds/minichain/__init__.py
CHANGED
|
@@ -92,6 +92,10 @@ with _lang.auto_proxy_init(
|
|
|
92
92
|
ChatChoicesStreamServiceChatChoicesService,
|
|
93
93
|
)
|
|
94
94
|
|
|
95
|
+
from .chat.stream.joining import ( # noqa
|
|
96
|
+
AiChoiceDeltaJoiner,
|
|
97
|
+
)
|
|
98
|
+
|
|
95
99
|
from .chat.stream.services import ( # noqa
|
|
96
100
|
ChatChoicesStreamRequest,
|
|
97
101
|
ChatChoicesStreamResponse,
|
|
@@ -109,7 +113,9 @@ with _lang.auto_proxy_init(
|
|
|
109
113
|
|
|
110
114
|
AiChoiceDelta,
|
|
111
115
|
ContentAiChoiceDelta,
|
|
116
|
+
AnyToolUseAiChoiceDelta,
|
|
112
117
|
ToolUseAiChoiceDelta,
|
|
118
|
+
PartialToolUseAiChoiceDelta,
|
|
113
119
|
|
|
114
120
|
AiChoiceDeltas,
|
|
115
121
|
AiChoicesDeltas,
|
|
@@ -17,6 +17,7 @@ from ....chat.stream.services import static_check_is_chat_choices_stream_service
|
|
|
17
17
|
from ....chat.stream.types import AiChoiceDeltas
|
|
18
18
|
from ....chat.stream.types import AiChoicesDeltas
|
|
19
19
|
from ....chat.stream.types import ContentAiChoiceDelta
|
|
20
|
+
from ....chat.stream.types import PartialToolUseAiChoiceDelta
|
|
20
21
|
from ....chat.tools.types import Tool
|
|
21
22
|
from ....configs import Config
|
|
22
23
|
from ....resources import UseResources
|
|
@@ -123,19 +124,35 @@ class AnthropicChatChoicesStreamService:
|
|
|
123
124
|
check.not_none(msg_start)
|
|
124
125
|
check.none(cbk_start)
|
|
125
126
|
cbk_start = ae
|
|
127
|
+
|
|
126
128
|
if isinstance(ae.content_block, AnthropicSseDecoderEvents.ContentBlockStart.Text): # noqa
|
|
127
129
|
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiChoiceDelta(
|
|
128
130
|
ae.content_block.text,
|
|
129
131
|
)])]))
|
|
132
|
+
|
|
133
|
+
elif isinstance(ae.content_block, AnthropicSseDecoderEvents.ContentBlockStart.ToolUse): # noqa
|
|
134
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([PartialToolUseAiChoiceDelta( # noqa
|
|
135
|
+
id=ae.content_block.id,
|
|
136
|
+
name=ae.content_block.name,
|
|
137
|
+
raw_args=ae.content_block.input,
|
|
138
|
+
)])]))
|
|
139
|
+
|
|
130
140
|
else:
|
|
131
141
|
raise TypeError(ae.content_block)
|
|
132
142
|
|
|
133
143
|
case AnthropicSseDecoderEvents.ContentBlockDelta():
|
|
134
144
|
check.not_none(cbk_start)
|
|
145
|
+
|
|
135
146
|
if isinstance(ae.delta, AnthropicSseDecoderEvents.ContentBlockDelta.TextDelta):
|
|
136
147
|
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiChoiceDelta(
|
|
137
148
|
ae.delta.text,
|
|
138
149
|
)])]))
|
|
150
|
+
|
|
151
|
+
elif isinstance(ae.delta, AnthropicSseDecoderEvents.ContentBlockDelta.InputJsonDelta): # noqa
|
|
152
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([PartialToolUseAiChoiceDelta( # noqa
|
|
153
|
+
raw_args=ae.delta.partial_json,
|
|
154
|
+
)])]))
|
|
155
|
+
|
|
139
156
|
else:
|
|
140
157
|
raise TypeError(ae.delta)
|
|
141
158
|
|
|
@@ -15,6 +15,8 @@ from ....chat.choices.types import ChatChoicesOutputs
|
|
|
15
15
|
from ....chat.messages import AiMessage
|
|
16
16
|
from ....chat.messages import Message
|
|
17
17
|
from ....chat.messages import SystemMessage
|
|
18
|
+
from ....chat.messages import ToolUseMessage
|
|
19
|
+
from ....chat.messages import ToolUseResultMessage
|
|
18
20
|
from ....chat.messages import UserMessage
|
|
19
21
|
from ....chat.stream.services import ChatChoicesStreamRequest
|
|
20
22
|
from ....chat.stream.services import ChatChoicesStreamResponse
|
|
@@ -22,12 +24,15 @@ from ....chat.stream.services import static_check_is_chat_choices_stream_service
|
|
|
22
24
|
from ....chat.stream.types import AiChoiceDeltas
|
|
23
25
|
from ....chat.stream.types import AiChoicesDeltas
|
|
24
26
|
from ....chat.stream.types import ContentAiChoiceDelta
|
|
27
|
+
from ....chat.stream.types import ToolUseAiChoiceDelta
|
|
28
|
+
from ....chat.tools.types import Tool
|
|
25
29
|
from ....models.configs import ModelName
|
|
26
30
|
from ....resources import UseResources
|
|
27
31
|
from ....standard import ApiKey
|
|
28
32
|
from ....stream.services import StreamResponseSink
|
|
29
33
|
from ....stream.services import new_stream_response
|
|
30
34
|
from .names import MODEL_NAMES
|
|
35
|
+
from .tools import build_tool_spec_schema
|
|
31
36
|
|
|
32
37
|
|
|
33
38
|
##
|
|
@@ -48,22 +53,70 @@ class GoogleChatChoicesStreamService:
|
|
|
48
53
|
self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
|
|
49
54
|
self._api_key = ApiKey.pop_secret(cc, env='GEMINI_API_KEY')
|
|
50
55
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
|
|
56
|
+
def _make_str_content(
|
|
57
|
+
self,
|
|
58
|
+
s: str | None,
|
|
59
|
+
*,
|
|
60
|
+
role: pt.ContentRole | None = None,
|
|
61
|
+
) -> pt.Content | None:
|
|
62
|
+
if s is None:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
return pt.Content(
|
|
66
|
+
parts=[pt.Part(
|
|
67
|
+
text=check.not_none(s),
|
|
68
|
+
)],
|
|
69
|
+
role=role,
|
|
70
|
+
)
|
|
54
71
|
|
|
55
|
-
|
|
56
|
-
|
|
72
|
+
def _make_msg_content(self, m: Message) -> pt.Content:
|
|
73
|
+
if isinstance(m, (AiMessage, SystemMessage, UserMessage)):
|
|
74
|
+
return check.not_none(self._make_str_content(
|
|
75
|
+
check.isinstance(m.c, str),
|
|
76
|
+
role=self.ROLES_MAP[type(m)],
|
|
77
|
+
))
|
|
78
|
+
|
|
79
|
+
elif isinstance(m, ToolUseResultMessage):
|
|
80
|
+
tr_resp_val: pt.Value
|
|
81
|
+
if m.tur.c is None:
|
|
82
|
+
tr_resp_val = pt.NullValue() # type: ignore[unreachable]
|
|
83
|
+
elif isinstance(m.tur.c, str):
|
|
84
|
+
tr_resp_val = pt.StringValue(m.tur.c)
|
|
85
|
+
else:
|
|
86
|
+
raise TypeError(m.tur.c)
|
|
87
|
+
return pt.Content(
|
|
88
|
+
parts=[pt.Part(
|
|
89
|
+
function_response=pt.FunctionResponse(
|
|
90
|
+
id=m.tur.id,
|
|
91
|
+
name=m.tur.name,
|
|
92
|
+
response={
|
|
93
|
+
'value': tr_resp_val,
|
|
94
|
+
},
|
|
95
|
+
),
|
|
96
|
+
)],
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
elif isinstance(m, ToolUseMessage):
|
|
100
|
+
return pt.Content(
|
|
101
|
+
parts=[pt.Part(
|
|
102
|
+
function_call=pt.FunctionCall(
|
|
103
|
+
id=m.tu.id,
|
|
104
|
+
name=m.tu.name,
|
|
105
|
+
args=m.tu.args,
|
|
106
|
+
),
|
|
107
|
+
)],
|
|
108
|
+
role='model',
|
|
109
|
+
)
|
|
57
110
|
|
|
58
111
|
else:
|
|
59
112
|
raise TypeError(m)
|
|
60
113
|
|
|
61
114
|
BASE_URL: ta.ClassVar[str] = 'https://generativelanguage.googleapis.com/v1beta/models'
|
|
62
115
|
|
|
63
|
-
ROLES_MAP: ta.ClassVar[ta.Mapping[type[Message],
|
|
64
|
-
SystemMessage:
|
|
116
|
+
ROLES_MAP: ta.ClassVar[ta.Mapping[type[Message], pt.ContentRole | None]] = { # noqa
|
|
117
|
+
SystemMessage: None,
|
|
65
118
|
UserMessage: 'user',
|
|
66
|
-
AiMessage: '
|
|
119
|
+
AiMessage: 'model',
|
|
67
120
|
}
|
|
68
121
|
|
|
69
122
|
READ_CHUNK_SIZE = 64 * 1024
|
|
@@ -74,16 +127,28 @@ class GoogleChatChoicesStreamService:
|
|
|
74
127
|
) -> ChatChoicesStreamResponse:
|
|
75
128
|
key = check.not_none(self._api_key).reveal()
|
|
76
129
|
|
|
130
|
+
msgs = list(request.v)
|
|
131
|
+
|
|
132
|
+
system_inst: pt.Content | None = None
|
|
133
|
+
if msgs and isinstance(m0 := msgs[0], SystemMessage):
|
|
134
|
+
system_inst = self._make_msg_content(m0)
|
|
135
|
+
msgs.pop(0)
|
|
136
|
+
|
|
137
|
+
g_tools: list[pt.Tool] = []
|
|
138
|
+
with tv.TypedValues(*request.options).consume() as oc:
|
|
139
|
+
t: Tool
|
|
140
|
+
for t in oc.pop(Tool, []):
|
|
141
|
+
g_tools.append(pt.Tool(
|
|
142
|
+
function_declarations=[build_tool_spec_schema(t.spec)],
|
|
143
|
+
))
|
|
144
|
+
|
|
77
145
|
g_req = pt.GenerateContentRequest(
|
|
78
146
|
contents=[
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
text=check.not_none(self._get_msg_content(m)),
|
|
82
|
-
)],
|
|
83
|
-
role=self.ROLES_MAP[type(m)], # type: ignore[arg-type]
|
|
84
|
-
)
|
|
85
|
-
for m in request.v
|
|
147
|
+
self._make_msg_content(m)
|
|
148
|
+
for m in msgs
|
|
86
149
|
],
|
|
150
|
+
tools=g_tools or None,
|
|
151
|
+
system_instruction=system_inst,
|
|
87
152
|
)
|
|
88
153
|
|
|
89
154
|
req_dct = msh.marshal(g_req)
|
|
@@ -110,18 +175,38 @@ class GoogleChatChoicesStreamService:
|
|
|
110
175
|
if isinstance(bl, DelimitingBuffer.Incomplete):
|
|
111
176
|
# FIXME: handle
|
|
112
177
|
return []
|
|
178
|
+
|
|
113
179
|
l = bl.decode('utf-8')
|
|
114
180
|
if not l:
|
|
115
181
|
continue
|
|
182
|
+
|
|
116
183
|
if l.startswith('data: '):
|
|
117
184
|
gcr = msh.unmarshal(json.loads(l[6:]), pt.GenerateContentResponse) # noqa
|
|
118
185
|
cnd = check.single(check.not_none(gcr.candidates))
|
|
186
|
+
|
|
119
187
|
for p in check.not_none(cnd.content).parts or []:
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
188
|
+
if (txt := p.text) is not None:
|
|
189
|
+
check.none(p.function_call)
|
|
190
|
+
await sink.emit(AiChoicesDeltas([
|
|
191
|
+
AiChoiceDeltas([
|
|
192
|
+
ContentAiChoiceDelta(check.not_none(txt)),
|
|
193
|
+
]),
|
|
194
|
+
]))
|
|
195
|
+
|
|
196
|
+
elif (fc := p.function_call) is not None:
|
|
197
|
+
check.none(p.text)
|
|
198
|
+
await sink.emit(AiChoicesDeltas([
|
|
199
|
+
AiChoiceDeltas([
|
|
200
|
+
ToolUseAiChoiceDelta(
|
|
201
|
+
id=fc.id,
|
|
202
|
+
name=fc.name,
|
|
203
|
+
args=fc.args,
|
|
204
|
+
),
|
|
205
|
+
]),
|
|
206
|
+
]))
|
|
207
|
+
|
|
208
|
+
else:
|
|
209
|
+
raise ValueError(p)
|
|
125
210
|
|
|
126
211
|
if not b:
|
|
127
212
|
return []
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import typing as ta
|
|
2
3
|
|
|
3
4
|
from omlish import check
|
|
@@ -5,6 +6,7 @@ from omlish import lang
|
|
|
5
6
|
from omlish import typedvalues as tv
|
|
6
7
|
|
|
7
8
|
from .....backends import mlx as mlxu
|
|
9
|
+
from ....chat.choices.services import ChatChoicesOutputs
|
|
8
10
|
from ....chat.choices.services import ChatChoicesRequest
|
|
9
11
|
from ....chat.choices.services import ChatChoicesResponse
|
|
10
12
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
@@ -14,19 +16,28 @@ from ....chat.messages import AiMessage
|
|
|
14
16
|
from ....chat.messages import Message
|
|
15
17
|
from ....chat.messages import SystemMessage
|
|
16
18
|
from ....chat.messages import UserMessage
|
|
19
|
+
from ....chat.stream.services import ChatChoicesStreamRequest
|
|
20
|
+
from ....chat.stream.services import ChatChoicesStreamResponse
|
|
21
|
+
from ....chat.stream.services import static_check_is_chat_choices_stream_service
|
|
22
|
+
from ....chat.stream.types import AiChoiceDeltas
|
|
23
|
+
from ....chat.stream.types import AiChoicesDeltas
|
|
24
|
+
from ....chat.stream.types import ContentAiChoiceDelta
|
|
17
25
|
from ....configs import Config
|
|
18
26
|
from ....llms.types import MaxTokens
|
|
19
27
|
from ....models.configs import ModelPath
|
|
20
28
|
from ....models.configs import ModelRepo
|
|
21
29
|
from ....models.configs import ModelSpecifier
|
|
30
|
+
from ....resources import UseResources
|
|
22
31
|
from ....standard import DefaultOptions
|
|
32
|
+
from ....stream.services import StreamResponseSink
|
|
33
|
+
from ....stream.services import new_stream_response
|
|
23
34
|
|
|
24
35
|
|
|
25
36
|
##
|
|
26
37
|
|
|
27
38
|
|
|
28
39
|
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
29
|
-
# ['ChatChoicesService'],
|
|
40
|
+
# ['ChatChoicesService', 'ChatChoicesStreamService'],
|
|
30
41
|
# 'mlx',
|
|
31
42
|
# )
|
|
32
43
|
|
|
@@ -34,12 +45,7 @@ from ....standard import DefaultOptions
|
|
|
34
45
|
##
|
|
35
46
|
|
|
36
47
|
|
|
37
|
-
|
|
38
|
-
# name='mlx',
|
|
39
|
-
# type='ChatChoicesService',
|
|
40
|
-
# )
|
|
41
|
-
@static_check_is_chat_choices_service
|
|
42
|
-
class MlxChatChoicesService(lang.ExitStacked):
|
|
48
|
+
class BaseMlxChatChoicesService(lang.ExitStacked):
|
|
43
49
|
DEFAULT_MODEL: ta.ClassVar[ModelSpecifier] = (
|
|
44
50
|
# 'mlx-community/DeepSeek-Coder-V2-Lite-Instruct-8bit'
|
|
45
51
|
# 'mlx-community/Llama-3.3-70B-Instruct-4bit'
|
|
@@ -52,8 +58,8 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
52
58
|
# 'mlx-community/Qwen2.5-0.5B-4bit'
|
|
53
59
|
# 'mlx-community/Qwen2.5-32B-Instruct-8bit'
|
|
54
60
|
# 'mlx-community/Qwen2.5-Coder-32B-Instruct-8bit'
|
|
55
|
-
# 'mlx-community/mamba-2.8b-hf-f16'
|
|
56
61
|
# 'mlx-community/Qwen3-30B-A3B-6bit'
|
|
62
|
+
# 'mlx-community/mamba-2.8b-hf-f16'
|
|
57
63
|
)
|
|
58
64
|
|
|
59
65
|
def __init__(self, *configs: Config) -> None:
|
|
@@ -70,10 +76,7 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
70
76
|
}
|
|
71
77
|
|
|
72
78
|
def _get_msg_content(self, m: Message) -> str | None:
|
|
73
|
-
if isinstance(m, AiMessage):
|
|
74
|
-
return check.isinstance(m.c, str)
|
|
75
|
-
|
|
76
|
-
elif isinstance(m, (SystemMessage, UserMessage)):
|
|
79
|
+
if isinstance(m, (AiMessage, SystemMessage, UserMessage)):
|
|
77
80
|
return check.isinstance(m.c, str)
|
|
78
81
|
|
|
79
82
|
else:
|
|
@@ -96,10 +99,9 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
96
99
|
max_tokens=MaxTokens,
|
|
97
100
|
)
|
|
98
101
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
tokenizer = loaded_model.tokenization.tokenizer
|
|
102
|
+
@lang.cached_function(transient=True)
|
|
103
|
+
def _get_tokenizer(self) -> mlxu.tokenization.Tokenizer:
|
|
104
|
+
tokenizer = self._load_model().tokenization.tokenizer
|
|
103
105
|
|
|
104
106
|
if not (
|
|
105
107
|
hasattr(tokenizer, 'apply_chat_template') and
|
|
@@ -107,26 +109,44 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
107
109
|
):
|
|
108
110
|
raise RuntimeError(tokenizer)
|
|
109
111
|
|
|
110
|
-
|
|
112
|
+
return tokenizer
|
|
113
|
+
|
|
114
|
+
def _build_prompt(self, messages: ta.Sequence[Message]) -> str:
|
|
115
|
+
return check.isinstance(self._get_tokenizer().apply_chat_template(
|
|
111
116
|
[ # type: ignore[arg-type]
|
|
112
117
|
dict(
|
|
113
118
|
role=self.ROLES_MAP[type(m)],
|
|
114
119
|
content=self._get_msg_content(m),
|
|
115
120
|
)
|
|
116
|
-
for m in
|
|
121
|
+
for m in messages
|
|
117
122
|
],
|
|
118
123
|
tokenize=False,
|
|
119
124
|
add_generation_prompt=True,
|
|
120
|
-
)
|
|
125
|
+
), str)
|
|
121
126
|
|
|
122
|
-
|
|
127
|
+
def _build_kwargs(self, oc: tv.TypedValuesConsumer) -> dict[str, ta.Any]:
|
|
128
|
+
kwargs: dict[str, ta.Any] = {}
|
|
129
|
+
kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
|
|
130
|
+
return kwargs
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
134
|
+
# name='mlx',
|
|
135
|
+
# type='ChatChoicesService',
|
|
136
|
+
# )
|
|
137
|
+
@static_check_is_chat_choices_service
|
|
138
|
+
class MlxChatChoicesService(BaseMlxChatChoicesService):
|
|
139
|
+
async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
140
|
+
loaded_model = self._load_model()
|
|
141
|
+
|
|
142
|
+
prompt = self._build_prompt(request.v)
|
|
123
143
|
|
|
124
144
|
with tv.consume(
|
|
125
145
|
*self._default_options,
|
|
126
146
|
*request.options,
|
|
127
147
|
override=True,
|
|
128
148
|
) as oc:
|
|
129
|
-
kwargs.
|
|
149
|
+
kwargs = self._build_kwargs(oc)
|
|
130
150
|
|
|
131
151
|
response = mlxu.generate(
|
|
132
152
|
loaded_model.model,
|
|
@@ -139,3 +159,57 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
139
159
|
return ChatChoicesResponse([
|
|
140
160
|
AiChoice([AiMessage(response)]) # noqa
|
|
141
161
|
])
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
165
|
+
# name='mlx',
|
|
166
|
+
# type='ChatChoicesStreamService',
|
|
167
|
+
# )
|
|
168
|
+
@static_check_is_chat_choices_stream_service
|
|
169
|
+
class MlxChatChoicesStreamService(BaseMlxChatChoicesService):
|
|
170
|
+
def __init__(self, *configs: Config) -> None:
|
|
171
|
+
super().__init__()
|
|
172
|
+
|
|
173
|
+
with tv.consume(*configs) as cc:
|
|
174
|
+
self._model = cc.pop(MlxChatChoicesService.DEFAULT_MODEL)
|
|
175
|
+
self._default_options: tv.TypedValues = DefaultOptions.pop(cc)
|
|
176
|
+
|
|
177
|
+
READ_CHUNK_SIZE = 64 * 1024
|
|
178
|
+
|
|
179
|
+
async def invoke(
|
|
180
|
+
self,
|
|
181
|
+
request: ChatChoicesStreamRequest,
|
|
182
|
+
*,
|
|
183
|
+
max_tokens: int = 4096, # FIXME: ChatOption
|
|
184
|
+
) -> ChatChoicesStreamResponse:
|
|
185
|
+
loaded_model = self._load_model()
|
|
186
|
+
|
|
187
|
+
prompt = self._build_prompt(request.v)
|
|
188
|
+
|
|
189
|
+
with tv.consume(
|
|
190
|
+
*self._default_options,
|
|
191
|
+
*request.options,
|
|
192
|
+
override=True,
|
|
193
|
+
) as oc:
|
|
194
|
+
oc.pop(UseResources, None)
|
|
195
|
+
kwargs = self._build_kwargs(oc)
|
|
196
|
+
|
|
197
|
+
async with UseResources.or_new(request.options) as rs:
|
|
198
|
+
gen: ta.Iterator[mlxu.GenerationOutput] = rs.enter_context(contextlib.closing(mlxu.stream_generate(
|
|
199
|
+
loaded_model.model,
|
|
200
|
+
loaded_model.tokenization,
|
|
201
|
+
check.isinstance(prompt, str),
|
|
202
|
+
mlxu.GenerationParams(**kwargs),
|
|
203
|
+
# verbose=True,
|
|
204
|
+
)))
|
|
205
|
+
|
|
206
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
|
|
207
|
+
for go in gen:
|
|
208
|
+
if go.text:
|
|
209
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([
|
|
210
|
+
ContentAiChoiceDelta(go.text),
|
|
211
|
+
])]))
|
|
212
|
+
|
|
213
|
+
return []
|
|
214
|
+
|
|
215
|
+
return await new_stream_response(rs, inner)
|
|
@@ -26,8 +26,8 @@ from ....chat.choices.services import static_check_is_chat_choices_service
|
|
|
26
26
|
from ....models.configs import ModelName
|
|
27
27
|
from ....standard import ApiKey
|
|
28
28
|
from ....standard import DefaultOptions
|
|
29
|
-
from .
|
|
30
|
-
from .
|
|
29
|
+
from .format import OpenaiChatRequestHandler
|
|
30
|
+
from .format import build_mc_choices_response
|
|
31
31
|
from .names import MODEL_NAMES
|
|
32
32
|
|
|
33
33
|
|