ommlds 0.0.0.dev466__py3-none-any.whl → 0.0.0.dev467__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ommlds might be problematic. Click here for more details.

@@ -229,11 +229,12 @@
229
229
  "module": ".minichain.backends.impls.mlx.chat",
230
230
  "attr": null,
231
231
  "file": "ommlds/minichain/backends/impls/mlx/chat.py",
232
- "line": 28,
232
+ "line": 39,
233
233
  "value": {
234
234
  "!.minichain.backends.strings.manifests.BackendStringsManifest": {
235
235
  "service_cls_names": [
236
- "ChatChoicesService"
236
+ "ChatChoicesService",
237
+ "ChatChoicesStreamService"
237
238
  ],
238
239
  "backend_name": "mlx",
239
240
  "model_names": null
@@ -244,7 +245,7 @@
244
245
  "module": ".minichain.backends.impls.mlx.chat",
245
246
  "attr": null,
246
247
  "file": "ommlds/minichain/backends/impls/mlx/chat.py",
247
- "line": 37,
248
+ "line": 133,
248
249
  "value": {
249
250
  "!.minichain.registries.manifests.RegistryManifest": {
250
251
  "module": "ommlds.minichain.backends.impls.mlx.chat",
@@ -255,6 +256,21 @@
255
256
  }
256
257
  }
257
258
  },
259
+ {
260
+ "module": ".minichain.backends.impls.mlx.chat",
261
+ "attr": null,
262
+ "file": "ommlds/minichain/backends/impls/mlx/chat.py",
263
+ "line": 164,
264
+ "value": {
265
+ "!.minichain.registries.manifests.RegistryManifest": {
266
+ "module": "ommlds.minichain.backends.impls.mlx.chat",
267
+ "attr": "MlxChatChoicesStreamService",
268
+ "name": "mlx",
269
+ "aliases": null,
270
+ "type": "ChatChoicesStreamService"
271
+ }
272
+ }
273
+ },
258
274
  {
259
275
  "module": ".minichain.backends.impls.openai.chat",
260
276
  "attr": null,
@@ -356,7 +372,7 @@
356
372
  "module": ".minichain.backends.impls.openai.stream",
357
373
  "attr": null,
358
374
  "file": "ommlds/minichain/backends/impls/openai/stream.py",
359
- "line": 37,
375
+ "line": 38,
360
376
  "value": {
361
377
  "!.minichain.registries.manifests.RegistryManifest": {
362
378
  "module": "ommlds.minichain.backends.impls.openai.stream",
@@ -32,7 +32,7 @@ class _CatalogBackendProvider(BackendProvider[ServiceT], lang.Abstract):
32
32
  @contextlib.asynccontextmanager
33
33
  async def _provide_backend(self, cls: type[ServiceT]) -> ta.AsyncIterator[ServiceT]:
34
34
  service: ServiceT
35
- async with lang.async_maybe_managing(self._catalog.get_backend(
35
+ async with lang.async_or_sync_maybe_managing(self._catalog.get_backend(
36
36
  cls,
37
37
  self._name,
38
38
  *(self._configs or []),
@@ -1,3 +1,4 @@
1
+ import contextlib
1
2
  import typing as ta
2
3
 
3
4
  from omlish import check
@@ -5,6 +6,7 @@ from omlish import lang
5
6
  from omlish import typedvalues as tv
6
7
 
7
8
  from .....backends import mlx as mlxu
9
+ from ....chat.choices.services import ChatChoicesOutputs
8
10
  from ....chat.choices.services import ChatChoicesRequest
9
11
  from ....chat.choices.services import ChatChoicesResponse
10
12
  from ....chat.choices.services import static_check_is_chat_choices_service
@@ -14,19 +16,28 @@ from ....chat.messages import AiMessage
14
16
  from ....chat.messages import Message
15
17
  from ....chat.messages import SystemMessage
16
18
  from ....chat.messages import UserMessage
19
+ from ....chat.stream.services import ChatChoicesStreamRequest
20
+ from ....chat.stream.services import ChatChoicesStreamResponse
21
+ from ....chat.stream.services import static_check_is_chat_choices_stream_service
22
+ from ....chat.stream.types import AiChoiceDeltas
23
+ from ....chat.stream.types import AiChoicesDeltas
24
+ from ....chat.stream.types import ContentAiChoiceDelta
17
25
  from ....configs import Config
18
26
  from ....llms.types import MaxTokens
19
27
  from ....models.configs import ModelPath
20
28
  from ....models.configs import ModelRepo
21
29
  from ....models.configs import ModelSpecifier
30
+ from ....resources import UseResources
22
31
  from ....standard import DefaultOptions
32
+ from ....stream.services import StreamResponseSink
33
+ from ....stream.services import new_stream_response
23
34
 
24
35
 
25
36
  ##
26
37
 
27
38
 
28
39
  # @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
29
- # ['ChatChoicesService'],
40
+ # ['ChatChoicesService', 'ChatChoicesStreamService'],
30
41
  # 'mlx',
31
42
  # )
32
43
 
@@ -34,12 +45,7 @@ from ....standard import DefaultOptions
34
45
  ##
35
46
 
36
47
 
37
- # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
38
- # name='mlx',
39
- # type='ChatChoicesService',
40
- # )
41
- @static_check_is_chat_choices_service
42
- class MlxChatChoicesService(lang.ExitStacked):
48
+ class BaseMlxChatChoicesService(lang.ExitStacked):
43
49
  DEFAULT_MODEL: ta.ClassVar[ModelSpecifier] = (
44
50
  # 'mlx-community/DeepSeek-Coder-V2-Lite-Instruct-8bit'
45
51
  # 'mlx-community/Llama-3.3-70B-Instruct-4bit'
@@ -52,8 +58,8 @@ class MlxChatChoicesService(lang.ExitStacked):
52
58
  # 'mlx-community/Qwen2.5-0.5B-4bit'
53
59
  # 'mlx-community/Qwen2.5-32B-Instruct-8bit'
54
60
  # 'mlx-community/Qwen2.5-Coder-32B-Instruct-8bit'
55
- # 'mlx-community/mamba-2.8b-hf-f16'
56
61
  # 'mlx-community/Qwen3-30B-A3B-6bit'
62
+ # 'mlx-community/mamba-2.8b-hf-f16'
57
63
  )
58
64
 
59
65
  def __init__(self, *configs: Config) -> None:
@@ -70,10 +76,7 @@ class MlxChatChoicesService(lang.ExitStacked):
70
76
  }
71
77
 
72
78
  def _get_msg_content(self, m: Message) -> str | None:
73
- if isinstance(m, AiMessage):
74
- return check.isinstance(m.c, str)
75
-
76
- elif isinstance(m, (SystemMessage, UserMessage)):
79
+ if isinstance(m, (AiMessage, SystemMessage, UserMessage)):
77
80
  return check.isinstance(m.c, str)
78
81
 
79
82
  else:
@@ -96,10 +99,9 @@ class MlxChatChoicesService(lang.ExitStacked):
96
99
  max_tokens=MaxTokens,
97
100
  )
98
101
 
99
- async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
100
- loaded_model = self._load_model()
101
-
102
- tokenizer = loaded_model.tokenization.tokenizer
102
+ @lang.cached_function(transient=True)
103
+ def _get_tokenizer(self) -> mlxu.tokenization.Tokenizer:
104
+ tokenizer = self._load_model().tokenization.tokenizer
103
105
 
104
106
  if not (
105
107
  hasattr(tokenizer, 'apply_chat_template') and
@@ -107,26 +109,44 @@ class MlxChatChoicesService(lang.ExitStacked):
107
109
  ):
108
110
  raise RuntimeError(tokenizer)
109
111
 
110
- prompt = tokenizer.apply_chat_template(
112
+ return tokenizer
113
+
114
+ def _build_prompt(self, messages: ta.Sequence[Message]) -> str:
115
+ return check.isinstance(self._get_tokenizer().apply_chat_template(
111
116
  [ # type: ignore[arg-type]
112
117
  dict(
113
118
  role=self.ROLES_MAP[type(m)],
114
119
  content=self._get_msg_content(m),
115
120
  )
116
- for m in request.v
121
+ for m in messages
117
122
  ],
118
123
  tokenize=False,
119
124
  add_generation_prompt=True,
120
- )
125
+ ), str)
121
126
 
122
- kwargs = dict()
127
+ def _build_kwargs(self, oc: tv.TypedValuesConsumer) -> dict[str, ta.Any]:
128
+ kwargs: dict[str, ta.Any] = {}
129
+ kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
130
+ return kwargs
131
+
132
+
133
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
134
+ # name='mlx',
135
+ # type='ChatChoicesService',
136
+ # )
137
+ @static_check_is_chat_choices_service
138
+ class MlxChatChoicesService(BaseMlxChatChoicesService):
139
+ async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
140
+ loaded_model = self._load_model()
141
+
142
+ prompt = self._build_prompt(request.v)
123
143
 
124
144
  with tv.consume(
125
145
  *self._default_options,
126
146
  *request.options,
127
147
  override=True,
128
148
  ) as oc:
129
- kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
149
+ kwargs = self._build_kwargs(oc)
130
150
 
131
151
  response = mlxu.generate(
132
152
  loaded_model.model,
@@ -139,3 +159,57 @@ class MlxChatChoicesService(lang.ExitStacked):
139
159
  return ChatChoicesResponse([
140
160
  AiChoice([AiMessage(response)]) # noqa
141
161
  ])
162
+
163
+
164
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
165
+ # name='mlx',
166
+ # type='ChatChoicesStreamService',
167
+ # )
168
+ @static_check_is_chat_choices_stream_service
169
+ class MlxChatChoicesStreamService(BaseMlxChatChoicesService):
170
+ def __init__(self, *configs: Config) -> None:
171
+ super().__init__()
172
+
173
+ with tv.consume(*configs) as cc:
174
+ self._model = cc.pop(MlxChatChoicesService.DEFAULT_MODEL)
175
+ self._default_options: tv.TypedValues = DefaultOptions.pop(cc)
176
+
177
+ READ_CHUNK_SIZE = 64 * 1024
178
+
179
+ async def invoke(
180
+ self,
181
+ request: ChatChoicesStreamRequest,
182
+ *,
183
+ max_tokens: int = 4096, # FIXME: ChatOption
184
+ ) -> ChatChoicesStreamResponse:
185
+ loaded_model = self._load_model()
186
+
187
+ prompt = self._build_prompt(request.v)
188
+
189
+ with tv.consume(
190
+ *self._default_options,
191
+ *request.options,
192
+ override=True,
193
+ ) as oc:
194
+ oc.pop(UseResources, None)
195
+ kwargs = self._build_kwargs(oc)
196
+
197
+ async with UseResources.or_new(request.options) as rs:
198
+ gen: ta.Iterator[mlxu.GenerationOutput] = rs.enter_context(contextlib.closing(mlxu.stream_generate(
199
+ loaded_model.model,
200
+ loaded_model.tokenization,
201
+ check.isinstance(prompt, str),
202
+ mlxu.GenerationParams(**kwargs),
203
+ # verbose=True,
204
+ )))
205
+
206
+ async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
207
+ for go in gen:
208
+ if go.text:
209
+ await sink.emit(AiChoicesDeltas([AiChoiceDeltas([
210
+ ContentAiChoiceDelta(go.text),
211
+ ])]))
212
+
213
+ return []
214
+
215
+ return await new_stream_response(rs, inner)
@@ -26,8 +26,8 @@ from ....chat.choices.services import static_check_is_chat_choices_service
26
26
  from ....models.configs import ModelName
27
27
  from ....standard import ApiKey
28
28
  from ....standard import DefaultOptions
29
- from .format2 import OpenaiChatRequestHandler
30
- from .format2 import build_mc_choices_response
29
+ from .format import OpenaiChatRequestHandler
30
+ from .format import build_mc_choices_response
31
31
  from .names import MODEL_NAMES
32
32
 
33
33
 
@@ -2,18 +2,17 @@ import typing as ta
2
2
 
3
3
  from omlish import cached
4
4
  from omlish import check
5
- from omlish import lang
6
5
  from omlish import typedvalues as tv
7
6
  from omlish.formats import json
8
7
 
8
+ from .....backends.openai import protocol as pt
9
9
  from ....chat.choices.services import ChatChoicesResponse
10
10
  from ....chat.choices.types import AiChoice
11
+ from ....chat.choices.types import AiChoices
11
12
  from ....chat.choices.types import ChatChoicesOptions
12
- from ....chat.messages import AiChat
13
13
  from ....chat.messages import AiMessage
14
14
  from ....chat.messages import AnyAiMessage
15
15
  from ....chat.messages import Chat
16
- from ....chat.messages import Message
17
16
  from ....chat.messages import SystemMessage
18
17
  from ....chat.messages import ToolUseMessage
19
18
  from ....chat.messages import ToolUseResultMessage
@@ -28,7 +27,7 @@ from ....llms.types import MaxTokens
28
27
  from ....llms.types import Temperature
29
28
  from ....llms.types import TokenUsage
30
29
  from ....llms.types import TokenUsageOutput
31
- from ....tools.jsonschema import build_tool_spec_json_schema
30
+ from ....tools.jsonschema import build_tool_spec_params_json_schema
32
31
  from ....tools.types import ToolSpec
33
32
  from ....tools.types import ToolUse
34
33
  from ....types import Option
@@ -37,61 +36,115 @@ from ....types import Option
37
36
  ##
38
37
 
39
38
 
40
- def build_request_messages(chat: Chat) -> ta.Sequence[ta.Mapping[str, ta.Any]]:
41
- out: list[dict[str, ta.Any]] = []
39
+ def build_oai_request_msgs(mc_chat: Chat) -> ta.Sequence[pt.ChatCompletionMessage]:
40
+ oai_msgs: list[pt.ChatCompletionMessage] = []
42
41
 
43
- for m in chat:
44
- if isinstance(m, SystemMessage):
45
- out.append(dict(
46
- role='system',
47
- content=m.c,
42
+ for mc_msg in mc_chat:
43
+ if isinstance(mc_msg, SystemMessage):
44
+ oai_msgs.append(pt.SystemChatCompletionMessage(
45
+ content=check.isinstance(mc_msg.c, str),
48
46
  ))
49
47
 
50
- elif isinstance(m, AiMessage):
51
- out.append(dict(
52
- role='assistant',
53
- content=check.isinstance(m.c, (str, None)),
48
+ elif isinstance(mc_msg, AiMessage):
49
+ oai_msgs.append(pt.AssistantChatCompletionMessage(
50
+ content=check.isinstance(mc_msg.c, (str, None)),
54
51
  ))
55
52
 
56
- elif isinstance(m, ToolUseMessage):
57
- out.append(dict(
58
- role='assistant',
59
- tool_calls=[
60
- dict(
61
- id=m.tu.id,
62
- function=dict(
63
- arguments=check.not_none(m.tu.raw_args),
64
- name=m.tu.name,
65
- ),
66
- type='function',
53
+ elif isinstance(mc_msg, ToolUseMessage):
54
+ oai_msgs.append(pt.AssistantChatCompletionMessage(
55
+ tool_calls=[pt.AssistantChatCompletionMessage.ToolCall(
56
+ id=check.not_none(mc_msg.tu.id),
57
+ function=pt.AssistantChatCompletionMessage.ToolCall.Function(
58
+ arguments=check.not_none(mc_msg.tu.raw_args),
59
+ name=mc_msg.tu.name,
67
60
  ),
68
- ],
61
+ )],
69
62
  ))
70
63
 
71
- elif isinstance(m, UserMessage):
72
- out.append(dict(
73
- role='user',
74
- content=prepare_content_str(m.c),
64
+ elif isinstance(mc_msg, UserMessage):
65
+ oai_msgs.append(pt.UserChatCompletionMessage(
66
+ content=prepare_content_str(mc_msg.c),
75
67
  ))
76
68
 
77
- elif isinstance(m, ToolUseResultMessage):
69
+ elif isinstance(mc_msg, ToolUseResultMessage):
78
70
  tc: str
79
- if isinstance(m.tur.c, str):
80
- tc = m.tur.c
81
- elif isinstance(m.tur.c, JsonContent):
82
- tc = json.dumps_compact(m.tur.c)
71
+ if isinstance(mc_msg.tur.c, str):
72
+ tc = mc_msg.tur.c
73
+ elif isinstance(mc_msg.tur.c, JsonContent):
74
+ tc = json.dumps_compact(mc_msg.tur.c)
83
75
  else:
84
- raise TypeError(m.tur.c)
85
- out.append(dict(
86
- role='tool',
87
- tool_call_id=m.tur.id,
76
+ raise TypeError(mc_msg.tur.c)
77
+ oai_msgs.append(pt.ToolChatCompletionMessage(
78
+ tool_call_id=check.not_none(mc_msg.tur.id),
88
79
  content=tc,
89
80
  ))
90
81
 
91
82
  else:
92
- raise TypeError(m)
83
+ raise TypeError(mc_msg)
93
84
 
94
- return out
85
+ return oai_msgs
86
+
87
+
88
+ #
89
+
90
+
91
+ def build_mc_ai_choice(oai_choice: pt.ChatCompletionResponseChoice) -> AiChoice:
92
+ cur: list[AnyAiMessage] = []
93
+
94
+ oai_msg = oai_choice.message
95
+
96
+ if (oai_c := oai_msg.content) is not None:
97
+ cur.append(AiMessage(check.isinstance(oai_c, str)))
98
+
99
+ for oai_tc in oai_msg.tool_calls or []:
100
+ cur.append(ToolUseMessage(ToolUse(
101
+ id=oai_tc.id,
102
+ name=oai_tc.function.name,
103
+ args=json.loads(oai_tc.function.arguments or '{}'),
104
+ raw_args=oai_tc.function.arguments,
105
+ )))
106
+
107
+ return AiChoice(cur)
108
+
109
+
110
+ def build_mc_ai_choices(oai_resp: pt.ChatCompletionResponse) -> AiChoices:
111
+ return [
112
+ build_mc_ai_choice(oai_choice)
113
+ for oai_choice in oai_resp.choices
114
+ ]
115
+
116
+
117
+ def build_mc_choices_response(oai_resp: pt.ChatCompletionResponse) -> ChatChoicesResponse:
118
+ return ChatChoicesResponse(
119
+ build_mc_ai_choices(oai_resp),
120
+
121
+ tv.TypedValues(
122
+ *([TokenUsageOutput(TokenUsage(
123
+ input=tu.prompt_tokens,
124
+ output=tu.completion_tokens,
125
+ total=tu.total_tokens,
126
+ ))] if (tu := oai_resp.usage) is not None else []),
127
+ ),
128
+ )
129
+
130
+
131
+ def build_mc_ai_choice_delta(delta: pt.ChatCompletionChunkChoiceDelta) -> AiChoiceDelta:
132
+ if delta.content is not None:
133
+ check.state(not delta.tool_calls)
134
+ return ContentAiChoiceDelta(delta.content)
135
+
136
+ elif delta.tool_calls is not None:
137
+ check.state(delta.content is None)
138
+ tc = check.single(delta.tool_calls)
139
+ tc_fn = check.not_none(tc.function)
140
+ return PartialToolUseAiChoiceDelta(
141
+ id=tc.id,
142
+ name=tc_fn.name,
143
+ raw_args=tc_fn.arguments,
144
+ )
145
+
146
+ else:
147
+ raise ValueError(delta)
95
148
 
96
149
 
97
150
  ##
@@ -112,14 +165,6 @@ class OpenaiChatRequestHandler:
112
165
  self._model = model
113
166
  self._mandatory_kwargs = mandatory_kwargs
114
167
 
115
- ROLES_MAP: ta.ClassVar[ta.Mapping[type[Message], str]] = {
116
- SystemMessage: 'system',
117
- UserMessage: 'user',
118
- AiMessage: 'assistant',
119
- ToolUseMessage: 'assistant',
120
- ToolUseResultMessage: 'tool',
121
- }
122
-
123
168
  DEFAULT_OPTIONS: ta.ClassVar[tv.TypedValues[Option]] = tv.TypedValues[Option](
124
169
  Temperature(0.),
125
170
  MaxTokens(1024),
@@ -162,72 +207,26 @@ class OpenaiChatRequestHandler:
162
207
  )
163
208
 
164
209
  @cached.function
165
- def raw_request(self) -> ta.Mapping[str, ta.Any]:
210
+ def oai_request(self) -> pt.ChatCompletionRequest:
166
211
  po = self._process_options()
167
212
 
168
- tools = [
169
- dict(
170
- type='function',
171
- function=build_tool_spec_json_schema(ts),
213
+ tools: list[pt.ChatCompletionRequestTool] = [
214
+ pt.ChatCompletionRequestTool(
215
+ function=pt.ChatCompletionRequestTool.Function(
216
+ name=check.not_none(ts.name),
217
+ description=prepare_content_str(ts.desc),
218
+ parameters=build_tool_spec_params_json_schema(ts),
219
+ ),
172
220
  )
173
221
  for ts in po.tools_by_name.values()
174
222
  ]
175
223
 
176
- return dict(
224
+ return pt.ChatCompletionRequest(
177
225
  model=self._model,
178
- messages=build_request_messages(self._chat),
226
+ messages=build_oai_request_msgs(self._chat),
179
227
  top_p=1,
180
- **lang.opt_kw(tools=tools),
228
+ tools=tools or None,
181
229
  frequency_penalty=0.0,
182
230
  presence_penalty=0.0,
183
231
  **po.kwargs,
184
232
  )
185
-
186
- def build_ai_chat(self, message: ta.Mapping[str, ta.Any]) -> AiChat:
187
- out: list[AnyAiMessage] = []
188
- if (c := message.get('content')) is not None:
189
- out.append(AiMessage(c))
190
- for tc in message.get('tool_calls', []):
191
- out.append(ToolUseMessage(
192
- ToolUse(
193
- id=tc['id'],
194
- name=tc['function']['name'],
195
- args=json.loads(tc['function']['arguments'] or '{}'),
196
- raw_args=tc['function']['arguments'],
197
- ),
198
- ))
199
- return out
200
-
201
- def build_response(self, raw_response: ta.Mapping[str, ta.Any]) -> ChatChoicesResponse:
202
- return ChatChoicesResponse(
203
- [
204
- AiChoice(self.build_ai_chat(choice['message']))
205
- for choice in raw_response['choices']
206
- ],
207
-
208
- tv.TypedValues(
209
- *([TokenUsageOutput(TokenUsage(
210
- input=tu['prompt_tokens'],
211
- output=tu['completion_tokens'],
212
- total=tu['total_tokens'],
213
- ))] if (tu := raw_response.get('usage')) is not None else []),
214
- ),
215
- )
216
-
217
- def build_ai_choice_delta(self, delta: ta.Mapping[str, ta.Any]) -> AiChoiceDelta:
218
- if (c := delta.get('content')) is not None:
219
- check.state(not delta.get('tool_calls'))
220
- return ContentAiChoiceDelta(c)
221
-
222
- elif (tcs := delta.get('tool_calls')) is not None: # noqa
223
- check.state(delta.get('content') is None)
224
- tc = check.single(tcs)
225
- tc_fn = tc['function']
226
- return PartialToolUseAiChoiceDelta(
227
- id=tc.get('id'),
228
- name=tc_fn.get('name'),
229
- raw_args=tc_fn.get('arguments'),
230
- )
231
-
232
- else:
233
- raise ValueError(delta)
@@ -11,7 +11,7 @@ from omlish.http import all as http
11
11
  from omlish.http import sse
12
12
  from omlish.io.buffers import DelimitingBuffer
13
13
 
14
- from .....backends.openai.protocol.chatcompletion.chunk import ChatCompletionChunk
14
+ from .....backends.openai import protocol as pt
15
15
  from ....chat.choices.services import ChatChoicesOutputs
16
16
  from ....chat.stream.services import ChatChoicesStreamRequest
17
17
  from ....chat.stream.services import ChatChoicesStreamResponse
@@ -28,6 +28,7 @@ from ....stream.services import StreamResponseSink
28
28
  from ....stream.services import new_stream_response
29
29
  from .chat import OpenaiChatChoicesService
30
30
  from .format import OpenaiChatRequestHandler
31
+ from .format import build_mc_ai_choice_delta
31
32
  from .names import MODEL_NAMES
32
33
 
33
34
 
@@ -62,13 +63,13 @@ class OpenaiChatChoicesStreamService:
62
63
  model=MODEL_NAMES.resolve(self._model_name.v),
63
64
  mandatory_kwargs=dict(
64
65
  stream=True,
65
- stream_options=dict(
66
+ stream_options=pt.ChatCompletionRequest.StreamOptions(
66
67
  include_usage=True,
67
68
  ),
68
69
  ),
69
70
  )
70
71
 
71
- raw_request = rh.raw_request()
72
+ raw_request = msh.marshal(rh.oai_request())
72
73
 
73
74
  http_request = http.HttpRequest(
74
75
  'https://api.openai.com/v1/chat/completions',
@@ -105,20 +106,20 @@ class OpenaiChatChoicesStreamService:
105
106
 
106
107
  check.state(sj['object'] == 'chat.completion.chunk')
107
108
 
108
- ccc = msh.unmarshal(sj, ChatCompletionChunk) # noqa
109
- # print(ccc)
109
+ ccc = msh.unmarshal(sj, pt.ChatCompletionChunk)
110
110
 
111
111
  # FIXME: stop reason
112
- if not sj['choices']:
112
+ if not ccc.choices:
113
113
  continue
114
114
 
115
- if any(choice['delta'] for choice in sj['choices']):
116
- await sink.emit(AiChoicesDeltas([
117
- AiChoiceDeltas(
118
- [rh.build_ai_choice_delta(choice['delta'])] if choice['delta'] else [],
119
- )
120
- for choice in sj['choices']
121
- ]))
115
+ if any(choice.finish_reason for choice in ccc.choices):
116
+ check.state(all(choice.finish_reason for choice in ccc.choices))
117
+ break
118
+
119
+ await sink.emit(AiChoicesDeltas([
120
+ AiChoiceDeltas([build_mc_ai_choice_delta(choice.delta)])
121
+ for choice in ccc.choices
122
+ ]))
122
123
 
123
124
  if not b:
124
125
  return []
@@ -42,6 +42,9 @@ class AiChoiceDelta(lang.Sealed, lang.Abstract):
42
42
  pass
43
43
 
44
44
 
45
+ #
46
+
47
+
45
48
  @dc.dataclass(frozen=True)
46
49
  class ContentAiChoiceDelta(AiChoiceDelta, lang.Final):
47
50
  c: Content
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ommlds
3
- Version: 0.0.0.dev466
3
+ Version: 0.0.0.dev467
4
4
  Summary: ommlds
5
5
  Author: wrmsr
6
6
  License-Expression: BSD-3-Clause
@@ -14,8 +14,8 @@ Classifier: Programming Language :: Python :: 3.13
14
14
  Requires-Python: >=3.13
15
15
  Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
- Requires-Dist: omdev==0.0.0.dev466
18
- Requires-Dist: omlish==0.0.0.dev466
17
+ Requires-Dist: omdev==0.0.0.dev467
18
+ Requires-Dist: omlish==0.0.0.dev467
19
19
  Provides-Extra: all
20
20
  Requires-Dist: llama-cpp-python~=0.3; extra == "all"
21
21
  Requires-Dist: mlx~=0.29; extra == "all"
@@ -1,4 +1,4 @@
1
- ommlds/.omlish-manifests.json,sha256=ZrDlaAwG8hoshkjW-up0pk0dMvDI3g5dW1M92uaf5KI,17930
1
+ ommlds/.omlish-manifests.json,sha256=MyJQsh5T1CMMXcGdxwqI9abQ8-j-ZlRGRluiikbeKRY,18414
2
2
  ommlds/__about__.py,sha256=uAJgr2I_m_oZPlV5P8XLFeYpBlEM-DdzeyF6O5OK_qs,1759
3
3
  ommlds/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  ommlds/huggingface.py,sha256=JfEyfKOxU3-SY_ojtXBJFNeD-NIuKjvMe3GL3e93wNA,1175
@@ -91,7 +91,7 @@ ommlds/cli/sessions/chat/driver.py,sha256=ddnCYTKqWiPxV8U4UbFwb7E3yi81ItjZ9j3AJd
91
91
  ommlds/cli/sessions/chat/inject.py,sha256=7Yg6wUs2Oej4UjNZCAWCJCEsDJZWvT4G8XvkvVUMC7U,1928
92
92
  ommlds/cli/sessions/chat/session.py,sha256=eqwelLE74JFC-fBpk_hdwMD2nP4pLv3ZPwUn99200B8,521
93
93
  ommlds/cli/sessions/chat/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
- ommlds/cli/sessions/chat/backends/catalog.py,sha256=GroogxBQf_zlrhEEwTSq18v13HWsuXOP276VcuphPic,1756
94
+ ommlds/cli/sessions/chat/backends/catalog.py,sha256=gc03dqXEUUSi4WCWJ30HrkHPWlAkZHXkj1pOq7KVakU,1764
95
95
  ommlds/cli/sessions/chat/backends/inject.py,sha256=VbZ-Fb679kTItRpAhIYCqSM8vXUFeRDQWssUfrFgGi8,882
96
96
  ommlds/cli/sessions/chat/backends/injection.py,sha256=GCn5OvNIEowgB70kQVuU84z3i8lLA4vOVkTZlQG8s0o,327
97
97
  ommlds/cli/sessions/chat/backends/types.py,sha256=5eImYHXLKqbC5MDrN443eMGamP9snCmV1n7LtAsqgPk,696
@@ -190,15 +190,14 @@ ommlds/minichain/backends/impls/llamacpp/completion.py,sha256=oJ2I6wUoIPXYLm9Vc7
190
190
  ommlds/minichain/backends/impls/llamacpp/format.py,sha256=fcLMwk7r7FbNrYCH39G3fDRInKvlPIqcoxyLj95CooA,778
191
191
  ommlds/minichain/backends/impls/llamacpp/stream.py,sha256=uGog3xPNqCjGgyZjXEjhlxKbIbakWbapjANAEsmW-U4,3378
192
192
  ommlds/minichain/backends/impls/mlx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
193
- ommlds/minichain/backends/impls/mlx/chat.py,sha256=kNIDvkvNpoB80LfA1y7UpSzEEm2Z4K2w56HOuMwT9zE,4558
193
+ ommlds/minichain/backends/impls/mlx/chat.py,sha256=sMlhgiFZrxAC-kKkLSJ6c-2uJn0IHZXH4EiPET_-CKI,7458
194
194
  ommlds/minichain/backends/impls/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
195
- ommlds/minichain/backends/impls/openai/chat.py,sha256=3hiKX2WqRo1cwF4AhcnzcCb2fsmwNLW7YPGYpask41A,2660
195
+ ommlds/minichain/backends/impls/openai/chat.py,sha256=eMRjxPNrzrRjaw83LJuYzP9DGvwGyY2ObJSZub4Z9bY,2658
196
196
  ommlds/minichain/backends/impls/openai/completion.py,sha256=0XTC08mZzbW23Y2DNW2xfRR0eDX4nTyejF8CR1BdHZs,1756
197
197
  ommlds/minichain/backends/impls/openai/embedding.py,sha256=kkDJ3_0EqwQ_E0eXsSH1TuWXQmRqaijK8zG90fnlf3s,1582
198
- ommlds/minichain/backends/impls/openai/format.py,sha256=M1AYWDhz1QApazFeae4xTO9ng_59sx4uYs0FKt0GIKM,7275
199
- ommlds/minichain/backends/impls/openai/format2.py,sha256=OQ3N8VR4uL3PvHxjOQSdgg1bQ4_WiDz_sOV4WhVEXpQ,6611
198
+ ommlds/minichain/backends/impls/openai/format.py,sha256=teGX8mNU3sXNWP4YWGD8d59M4X9_r75ImSzfTJgtNCM,7351
200
199
  ommlds/minichain/backends/impls/openai/names.py,sha256=b74t8FwSbGEveVtVz4SqM5tiRDyTKNlUKlseV6AX3Yo,1211
201
- ommlds/minichain/backends/impls/openai/stream.py,sha256=X45qIXgwAk7IVe4LL6gzL3uJivdaB-hUGutltHeswTc,5280
200
+ ommlds/minichain/backends/impls/openai/stream.py,sha256=M7II7kZFsy33j8NQwdM1CCeKet3lw-XLOQdDzrzn-Yo,5297
202
201
  ommlds/minichain/backends/impls/sentencepiece/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
203
202
  ommlds/minichain/backends/impls/sentencepiece/tokens.py,sha256=tUEBKyBgkTowssS_AdcAuPkyFzfyDfE935x4JG8PXM0,1602
204
203
  ommlds/minichain/backends/impls/tinygrad/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -231,7 +230,7 @@ ommlds/minichain/chat/stream/_marshal.py,sha256=r6NYBUviV7jIssaFprzv2rVEj8cFEuBl
231
230
  ommlds/minichain/chat/stream/adapters.py,sha256=3hKo3-MLtVIB-Nhdlxt17LP9vZESr-2fBZQ3Yr6l_Ps,1077
232
231
  ommlds/minichain/chat/stream/joining.py,sha256=oPxLT4qEYWCaxclnZvt54ztQP5md4V6u6Uwn4qd2e9M,2936
233
232
  ommlds/minichain/chat/stream/services.py,sha256=TxNEOm85QEFYtKb59q_uP6eSNh75v1fF-IpsJjhY4to,1252
234
- ommlds/minichain/chat/stream/types.py,sha256=t1udlFSMlSlEyQHRnBEQYI_f-FuE6twRBFGzR66blWQ,1585
233
+ ommlds/minichain/chat/stream/types.py,sha256=kpHsWLNHk7hmaNPDSCqLH-ECSAiz83lRfr00LhSWb5U,1589
235
234
  ommlds/minichain/chat/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
236
235
  ommlds/minichain/chat/tools/execution.py,sha256=tCPsz1kCt5RcoRX7dwfaJRvObniJJv_D2hCwz1Slo_A,573
237
236
  ommlds/minichain/chat/tools/ids.py,sha256=DFBKrpeDTCnMcU-P38VbPWX0YBDaz_HzMgx3yXWjFWQ,759
@@ -368,9 +367,9 @@ ommlds/wiki/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
368
367
  ommlds/wiki/utils/io.py,sha256=UKgDJGtmpnWvIqVd2mJc2QNPOqlToEY1GEveNp6_pMo,7088
369
368
  ommlds/wiki/utils/progress.py,sha256=EhvKcMFYtsarCQhIahlO6f0SboyAKP3UwUyrnVnP-Vk,3222
370
369
  ommlds/wiki/utils/xml.py,sha256=vVV8Ctn13aaRM9eYfs9Wd6rHn5WOCEUzQ44fIhOvJdg,3754
371
- ommlds-0.0.0.dev466.dist-info/licenses/LICENSE,sha256=B_hVtavaA8zCYDW99DYdcpDLKz1n3BBRjZrcbv8uG8c,1451
372
- ommlds-0.0.0.dev466.dist-info/METADATA,sha256=cTdtmfR8ON19GS5ay_ImJD5oZ5uXwylxukZIaX7NNUM,3224
373
- ommlds-0.0.0.dev466.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
374
- ommlds-0.0.0.dev466.dist-info/entry_points.txt,sha256=Z5YWtX7ClfiCKdW-dd_CSVvM0h4yQpJPi-2G3q6gNFo,35
375
- ommlds-0.0.0.dev466.dist-info/top_level.txt,sha256=Rbnk5d5wi58vnAXx13WFZqdQ4VX8hBCS2hEL3WeXOhY,7
376
- ommlds-0.0.0.dev466.dist-info/RECORD,,
370
+ ommlds-0.0.0.dev467.dist-info/licenses/LICENSE,sha256=B_hVtavaA8zCYDW99DYdcpDLKz1n3BBRjZrcbv8uG8c,1451
371
+ ommlds-0.0.0.dev467.dist-info/METADATA,sha256=NvYqf0PtfEdrj2en5RFFpTHB_cGjm41groVHgy54WZQ,3224
372
+ ommlds-0.0.0.dev467.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
373
+ ommlds-0.0.0.dev467.dist-info/entry_points.txt,sha256=Z5YWtX7ClfiCKdW-dd_CSVvM0h4yQpJPi-2G3q6gNFo,35
374
+ ommlds-0.0.0.dev467.dist-info/top_level.txt,sha256=Rbnk5d5wi58vnAXx13WFZqdQ4VX8hBCS2hEL3WeXOhY,7
375
+ ommlds-0.0.0.dev467.dist-info/RECORD,,
@@ -1,210 +0,0 @@
1
- import typing as ta
2
-
3
- from omlish import cached
4
- from omlish import check
5
- from omlish import typedvalues as tv
6
- from omlish.formats import json
7
-
8
- from .....backends.openai import protocol as pt
9
- from ....chat.choices.services import ChatChoicesResponse
10
- from ....chat.choices.types import AiChoice
11
- from ....chat.choices.types import AiChoices
12
- from ....chat.choices.types import ChatChoicesOptions
13
- from ....chat.messages import AiMessage
14
- from ....chat.messages import AnyAiMessage
15
- from ....chat.messages import Chat
16
- from ....chat.messages import SystemMessage
17
- from ....chat.messages import ToolUseMessage
18
- from ....chat.messages import ToolUseResultMessage
19
- from ....chat.messages import UserMessage
20
- from ....chat.tools.types import Tool
21
- from ....content.json import JsonContent
22
- from ....content.prepare import prepare_content_str
23
- from ....llms.types import MaxTokens
24
- from ....llms.types import Temperature
25
- from ....llms.types import TokenUsage
26
- from ....llms.types import TokenUsageOutput
27
- from ....tools.jsonschema import build_tool_spec_params_json_schema
28
- from ....tools.types import ToolSpec
29
- from ....tools.types import ToolUse
30
- from ....types import Option
31
-
32
-
33
- ##
34
-
35
-
36
- def build_oai_request_msgs(mc_chat: Chat) -> ta.Sequence[pt.ChatCompletionMessage]:
37
- oai_msgs: list[pt.ChatCompletionMessage] = []
38
-
39
- for mc_msg in mc_chat:
40
- if isinstance(mc_msg, SystemMessage):
41
- oai_msgs.append(pt.SystemChatCompletionMessage(
42
- content=check.isinstance(mc_msg.c, str),
43
- ))
44
-
45
- elif isinstance(mc_msg, AiMessage):
46
- oai_msgs.append(pt.AssistantChatCompletionMessage(
47
- content=check.isinstance(mc_msg.c, (str, None)),
48
- ))
49
-
50
- elif isinstance(mc_msg, ToolUseMessage):
51
- oai_msgs.append(pt.AssistantChatCompletionMessage(
52
- tool_calls=[pt.AssistantChatCompletionMessage.ToolCall(
53
- id=check.not_none(mc_msg.tu.id),
54
- function=pt.AssistantChatCompletionMessage.ToolCall.Function(
55
- arguments=check.not_none(mc_msg.tu.raw_args),
56
- name=mc_msg.tu.name,
57
- ),
58
- )],
59
- ))
60
-
61
- elif isinstance(mc_msg, UserMessage):
62
- oai_msgs.append(pt.UserChatCompletionMessage(
63
- content=prepare_content_str(mc_msg.c),
64
- ))
65
-
66
- elif isinstance(mc_msg, ToolUseResultMessage):
67
- tc: str
68
- if isinstance(mc_msg.tur.c, str):
69
- tc = mc_msg.tur.c
70
- elif isinstance(mc_msg.tur.c, JsonContent):
71
- tc = json.dumps_compact(mc_msg.tur.c)
72
- else:
73
- raise TypeError(mc_msg.tur.c)
74
- oai_msgs.append(pt.ToolChatCompletionMessage(
75
- tool_call_id=check.not_none(mc_msg.tur.id),
76
- content=tc,
77
- ))
78
-
79
- else:
80
- raise TypeError(mc_msg)
81
-
82
- return oai_msgs
83
-
84
-
85
- #
86
-
87
-
88
- def build_mc_ai_choice(oai_choice: pt.ChatCompletionResponseChoice) -> AiChoice:
89
- cur: list[AnyAiMessage] = []
90
-
91
- oai_msg = oai_choice.message
92
-
93
- if (oai_c := oai_msg.content) is not None:
94
- cur.append(AiMessage(check.isinstance(oai_c, str)))
95
-
96
- for oai_tc in oai_msg.tool_calls or []:
97
- cur.append(ToolUseMessage(ToolUse(
98
- id=oai_tc.id,
99
- name=oai_tc.function.name,
100
- args=json.loads(oai_tc.function.arguments or '{}'),
101
- raw_args=oai_tc.function.arguments,
102
- )))
103
-
104
- return AiChoice(cur)
105
-
106
-
107
- def build_mc_ai_choices(oai_resp: pt.ChatCompletionResponse) -> AiChoices:
108
- return [
109
- build_mc_ai_choice(oai_choice)
110
- for oai_choice in oai_resp.choices
111
- ]
112
-
113
-
114
- def build_mc_choices_response(oai_resp: pt.ChatCompletionResponse) -> ChatChoicesResponse:
115
- return ChatChoicesResponse(
116
- build_mc_ai_choices(oai_resp),
117
-
118
- tv.TypedValues(
119
- *([TokenUsageOutput(TokenUsage(
120
- input=tu.prompt_tokens,
121
- output=tu.completion_tokens,
122
- total=tu.total_tokens,
123
- ))] if (tu := oai_resp.usage) is not None else []),
124
- ),
125
- )
126
-
127
-
128
- ##
129
-
130
-
131
- class OpenaiChatRequestHandler:
132
- def __init__(
133
- self,
134
- chat: Chat,
135
- *options: ChatChoicesOptions,
136
- model: str,
137
- mandatory_kwargs: ta.Mapping[str, ta.Any] | None = None,
138
- ) -> None:
139
- super().__init__()
140
-
141
- self._chat = chat
142
- self._options = options
143
- self._model = model
144
- self._mandatory_kwargs = mandatory_kwargs
145
-
146
- DEFAULT_OPTIONS: ta.ClassVar[tv.TypedValues[Option]] = tv.TypedValues[Option](
147
- Temperature(0.),
148
- MaxTokens(1024),
149
- )
150
-
151
- _OPTION_KWARG_NAMES_MAP: ta.ClassVar[ta.Mapping[str, type[ChatChoicesOptions]]] = dict(
152
- temperature=Temperature,
153
- max_tokens=MaxTokens,
154
- )
155
-
156
- class _ProcessedOptions(ta.NamedTuple):
157
- kwargs: dict[str, ta.Any]
158
- tools_by_name: dict[str, ToolSpec]
159
-
160
- @cached.function
161
- def _process_options(self) -> _ProcessedOptions:
162
- kwargs: dict = dict(
163
- temperature=0,
164
- max_tokens=1024,
165
- )
166
-
167
- tools_by_name: dict[str, ToolSpec] = {}
168
-
169
- with tv.TypedValues(*self._options).consume() as oc:
170
- kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
171
-
172
- for t in oc.pop(Tool, []):
173
- if t.spec.name in tools_by_name:
174
- raise NameError(t.spec.name)
175
- tools_by_name[check.non_empty_str(t.spec.name)] = t.spec
176
-
177
- if (mk := self._mandatory_kwargs):
178
- for k, v in mk.items():
179
- check.not_in(k, kwargs)
180
- kwargs[k] = v
181
-
182
- return self._ProcessedOptions(
183
- kwargs=kwargs,
184
- tools_by_name=tools_by_name,
185
- )
186
-
187
- @cached.function
188
- def oai_request(self) -> pt.ChatCompletionRequest:
189
- po = self._process_options()
190
-
191
- tools: list[pt.ChatCompletionRequestTool] = [
192
- pt.ChatCompletionRequestTool(
193
- function=pt.ChatCompletionRequestTool.Function(
194
- name=check.not_none(ts.name),
195
- description=prepare_content_str(ts.desc),
196
- parameters=build_tool_spec_params_json_schema(ts),
197
- ),
198
- )
199
- for ts in po.tools_by_name.values()
200
- ]
201
-
202
- return pt.ChatCompletionRequest(
203
- model=self._model,
204
- messages=build_oai_request_msgs(self._chat),
205
- top_p=1,
206
- tools=tools or None,
207
- frequency_penalty=0.0,
208
- presence_penalty=0.0,
209
- **po.kwargs,
210
- )