ommlds 0.0.0.dev436__py3-none-any.whl → 0.0.0.dev480__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ommlds/.omlish-manifests.json +332 -35
- ommlds/__about__.py +15 -9
- ommlds/_hacks/__init__.py +4 -0
- ommlds/_hacks/funcs.py +110 -0
- ommlds/_hacks/names.py +158 -0
- ommlds/_hacks/params.py +73 -0
- ommlds/_hacks/patches.py +0 -3
- ommlds/backends/anthropic/protocol/_marshal.py +2 -2
- ommlds/backends/anthropic/protocol/sse/_marshal.py +1 -1
- ommlds/backends/anthropic/protocol/sse/assemble.py +23 -7
- ommlds/backends/anthropic/protocol/sse/events.py +13 -0
- ommlds/backends/anthropic/protocol/types.py +30 -9
- ommlds/backends/google/protocol/__init__.py +3 -0
- ommlds/backends/google/protocol/_marshal.py +16 -0
- ommlds/backends/google/protocol/types.py +626 -0
- ommlds/backends/groq/_marshal.py +23 -0
- ommlds/backends/groq/protocol.py +249 -0
- ommlds/backends/mlx/generation.py +1 -1
- ommlds/backends/mlx/loading.py +58 -1
- ommlds/backends/ollama/__init__.py +0 -0
- ommlds/backends/ollama/protocol.py +170 -0
- ommlds/backends/openai/protocol/__init__.py +9 -28
- ommlds/backends/openai/protocol/_common.py +18 -0
- ommlds/backends/openai/protocol/_marshal.py +27 -0
- ommlds/backends/openai/protocol/chatcompletion/chunk.py +58 -31
- ommlds/backends/openai/protocol/chatcompletion/contentpart.py +49 -44
- ommlds/backends/openai/protocol/chatcompletion/message.py +55 -43
- ommlds/backends/openai/protocol/chatcompletion/request.py +114 -66
- ommlds/backends/openai/protocol/chatcompletion/response.py +71 -45
- ommlds/backends/openai/protocol/chatcompletion/responseformat.py +27 -20
- ommlds/backends/openai/protocol/chatcompletion/tokenlogprob.py +16 -7
- ommlds/backends/openai/protocol/completionusage.py +24 -15
- ommlds/backends/tavily/__init__.py +0 -0
- ommlds/backends/tavily/protocol.py +301 -0
- ommlds/backends/tinygrad/models/llama3/__init__.py +22 -14
- ommlds/backends/transformers/__init__.py +0 -0
- ommlds/backends/transformers/filecache.py +109 -0
- ommlds/backends/transformers/streamers.py +73 -0
- ommlds/cli/asyncs.py +30 -0
- ommlds/cli/backends/catalog.py +93 -0
- ommlds/cli/backends/configs.py +9 -0
- ommlds/cli/backends/inject.py +31 -36
- ommlds/cli/backends/injection.py +16 -0
- ommlds/cli/backends/types.py +46 -0
- ommlds/cli/content/__init__.py +0 -0
- ommlds/cli/content/messages.py +34 -0
- ommlds/cli/content/strings.py +42 -0
- ommlds/cli/inject.py +15 -32
- ommlds/cli/inputs/__init__.py +0 -0
- ommlds/cli/inputs/asyncs.py +32 -0
- ommlds/cli/inputs/sync.py +75 -0
- ommlds/cli/main.py +270 -110
- ommlds/cli/rendering/__init__.py +0 -0
- ommlds/cli/rendering/configs.py +9 -0
- ommlds/cli/rendering/inject.py +31 -0
- ommlds/cli/rendering/markdown.py +52 -0
- ommlds/cli/rendering/raw.py +73 -0
- ommlds/cli/rendering/types.py +21 -0
- ommlds/cli/secrets.py +21 -0
- ommlds/cli/sessions/base.py +1 -1
- ommlds/cli/sessions/chat/chat/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/ai/inject.py +74 -0
- ommlds/cli/sessions/chat/chat/ai/injection.py +14 -0
- ommlds/cli/sessions/chat/chat/ai/rendering.py +70 -0
- ommlds/cli/sessions/chat/chat/ai/services.py +79 -0
- ommlds/cli/sessions/chat/chat/ai/tools.py +44 -0
- ommlds/cli/sessions/chat/chat/ai/types.py +28 -0
- ommlds/cli/sessions/chat/chat/state/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/state/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/state/inject.py +36 -0
- ommlds/cli/sessions/chat/chat/state/inmemory.py +33 -0
- ommlds/cli/sessions/chat/chat/state/storage.py +52 -0
- ommlds/cli/sessions/chat/chat/state/types.py +38 -0
- ommlds/cli/sessions/chat/chat/user/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/user/configs.py +17 -0
- ommlds/cli/sessions/chat/chat/user/inject.py +62 -0
- ommlds/cli/sessions/chat/chat/user/interactive.py +31 -0
- ommlds/cli/sessions/chat/chat/user/oneshot.py +25 -0
- ommlds/cli/sessions/chat/chat/user/types.py +15 -0
- ommlds/cli/sessions/chat/configs.py +27 -0
- ommlds/cli/sessions/chat/driver.py +43 -0
- ommlds/cli/sessions/chat/inject.py +33 -65
- ommlds/cli/sessions/chat/phases/__init__.py +0 -0
- ommlds/cli/sessions/chat/phases/inject.py +27 -0
- ommlds/cli/sessions/chat/phases/injection.py +14 -0
- ommlds/cli/sessions/chat/phases/manager.py +29 -0
- ommlds/cli/sessions/chat/phases/types.py +29 -0
- ommlds/cli/sessions/chat/session.py +27 -0
- ommlds/cli/sessions/chat/tools/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/configs.py +22 -0
- ommlds/cli/sessions/chat/tools/confirmation.py +46 -0
- ommlds/cli/sessions/chat/tools/execution.py +66 -0
- ommlds/cli/sessions/chat/tools/fs/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/fs/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/fs/inject.py +35 -0
- ommlds/cli/sessions/chat/tools/inject.py +88 -0
- ommlds/cli/sessions/chat/tools/injection.py +44 -0
- ommlds/cli/sessions/chat/tools/rendering.py +58 -0
- ommlds/cli/sessions/chat/tools/todo/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/todo/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/todo/inject.py +31 -0
- ommlds/cli/sessions/chat/tools/weather/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/weather/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/weather/inject.py +22 -0
- ommlds/cli/{tools/weather.py → sessions/chat/tools/weather/tools.py} +1 -1
- ommlds/cli/sessions/completion/configs.py +21 -0
- ommlds/cli/sessions/completion/inject.py +42 -0
- ommlds/cli/sessions/completion/session.py +35 -0
- ommlds/cli/sessions/embedding/configs.py +21 -0
- ommlds/cli/sessions/embedding/inject.py +42 -0
- ommlds/cli/sessions/embedding/session.py +33 -0
- ommlds/cli/sessions/inject.py +28 -11
- ommlds/cli/state/__init__.py +0 -0
- ommlds/cli/state/inject.py +28 -0
- ommlds/cli/{state.py → state/storage.py} +41 -24
- ommlds/minichain/__init__.py +84 -24
- ommlds/minichain/_marshal.py +49 -9
- ommlds/minichain/_typedvalues.py +2 -4
- ommlds/minichain/backends/catalogs/base.py +20 -1
- ommlds/minichain/backends/catalogs/simple.py +2 -2
- ommlds/minichain/backends/catalogs/strings.py +10 -8
- ommlds/minichain/backends/impls/anthropic/chat.py +65 -27
- ommlds/minichain/backends/impls/anthropic/names.py +10 -8
- ommlds/minichain/backends/impls/anthropic/protocol.py +109 -0
- ommlds/minichain/backends/impls/anthropic/stream.py +111 -43
- ommlds/minichain/backends/impls/duckduckgo/search.py +1 -1
- ommlds/minichain/backends/impls/dummy/__init__.py +0 -0
- ommlds/minichain/backends/impls/dummy/chat.py +69 -0
- ommlds/minichain/backends/impls/google/chat.py +114 -22
- ommlds/minichain/backends/impls/google/search.py +7 -2
- ommlds/minichain/backends/impls/google/stream.py +219 -0
- ommlds/minichain/backends/impls/google/tools.py +149 -0
- ommlds/minichain/backends/impls/groq/__init__.py +0 -0
- ommlds/minichain/backends/impls/groq/chat.py +75 -0
- ommlds/minichain/backends/impls/groq/names.py +48 -0
- ommlds/minichain/backends/impls/groq/protocol.py +143 -0
- ommlds/minichain/backends/impls/groq/stream.py +125 -0
- ommlds/minichain/backends/impls/llamacpp/chat.py +33 -18
- ommlds/minichain/backends/impls/llamacpp/completion.py +1 -1
- ommlds/minichain/backends/impls/llamacpp/format.py +4 -2
- ommlds/minichain/backends/impls/llamacpp/stream.py +37 -20
- ommlds/minichain/backends/impls/mistral.py +20 -5
- ommlds/minichain/backends/impls/mlx/chat.py +96 -22
- ommlds/minichain/backends/impls/ollama/__init__.py +0 -0
- ommlds/minichain/backends/impls/ollama/chat.py +199 -0
- ommlds/minichain/backends/impls/openai/chat.py +18 -8
- ommlds/minichain/backends/impls/openai/completion.py +10 -3
- ommlds/minichain/backends/impls/openai/embedding.py +10 -3
- ommlds/minichain/backends/impls/openai/format.py +131 -106
- ommlds/minichain/backends/impls/openai/names.py +31 -5
- ommlds/minichain/backends/impls/openai/stream.py +43 -25
- ommlds/minichain/backends/impls/tavily.py +66 -0
- ommlds/minichain/backends/impls/tinygrad/chat.py +23 -16
- ommlds/minichain/backends/impls/transformers/sentence.py +1 -1
- ommlds/minichain/backends/impls/transformers/tokens.py +1 -1
- ommlds/minichain/backends/impls/transformers/transformers.py +155 -34
- ommlds/minichain/backends/strings/parsing.py +1 -1
- ommlds/minichain/backends/strings/resolving.py +4 -1
- ommlds/minichain/chat/_marshal.py +16 -9
- ommlds/minichain/chat/choices/adapters.py +4 -4
- ommlds/minichain/chat/choices/services.py +1 -1
- ommlds/minichain/chat/choices/stream/__init__.py +0 -0
- ommlds/minichain/chat/choices/stream/adapters.py +35 -0
- ommlds/minichain/chat/choices/stream/joining.py +31 -0
- ommlds/minichain/chat/choices/stream/services.py +45 -0
- ommlds/minichain/chat/choices/stream/types.py +43 -0
- ommlds/minichain/chat/choices/types.py +2 -2
- ommlds/minichain/chat/history.py +3 -3
- ommlds/minichain/chat/messages.py +55 -19
- ommlds/minichain/chat/services.py +3 -3
- ommlds/minichain/chat/stream/_marshal.py +16 -0
- ommlds/minichain/chat/stream/joining.py +85 -0
- ommlds/minichain/chat/stream/services.py +15 -21
- ommlds/minichain/chat/stream/types.py +32 -19
- ommlds/minichain/chat/tools/execution.py +8 -7
- ommlds/minichain/chat/tools/ids.py +9 -15
- ommlds/minichain/chat/tools/parsing.py +17 -26
- ommlds/minichain/chat/transforms/base.py +29 -38
- ommlds/minichain/chat/transforms/metadata.py +30 -4
- ommlds/minichain/chat/transforms/services.py +9 -11
- ommlds/minichain/content/_marshal.py +44 -20
- ommlds/minichain/content/json.py +13 -0
- ommlds/minichain/content/materialize.py +14 -21
- ommlds/minichain/content/prepare.py +4 -0
- ommlds/minichain/content/transforms/interleave.py +1 -1
- ommlds/minichain/content/transforms/squeeze.py +1 -1
- ommlds/minichain/content/transforms/stringify.py +1 -1
- ommlds/minichain/json.py +20 -0
- ommlds/minichain/lib/code/__init__.py +0 -0
- ommlds/minichain/lib/code/prompts.py +6 -0
- ommlds/minichain/lib/fs/binfiles.py +108 -0
- ommlds/minichain/lib/fs/context.py +126 -0
- ommlds/minichain/lib/fs/errors.py +101 -0
- ommlds/minichain/lib/fs/suggestions.py +36 -0
- ommlds/minichain/lib/fs/tools/__init__.py +0 -0
- ommlds/minichain/lib/fs/tools/edit.py +104 -0
- ommlds/minichain/lib/fs/tools/ls.py +38 -0
- ommlds/minichain/lib/fs/tools/read.py +115 -0
- ommlds/minichain/lib/fs/tools/recursivels/__init__.py +0 -0
- ommlds/minichain/lib/fs/tools/recursivels/execution.py +40 -0
- ommlds/minichain/lib/todo/__init__.py +0 -0
- ommlds/minichain/lib/todo/context.py +54 -0
- ommlds/minichain/lib/todo/tools/__init__.py +0 -0
- ommlds/minichain/lib/todo/tools/read.py +44 -0
- ommlds/minichain/lib/todo/tools/write.py +335 -0
- ommlds/minichain/lib/todo/types.py +60 -0
- ommlds/minichain/llms/_marshal.py +25 -17
- ommlds/minichain/llms/types.py +4 -0
- ommlds/minichain/registries/globals.py +18 -4
- ommlds/minichain/resources.py +66 -43
- ommlds/minichain/search.py +1 -1
- ommlds/minichain/services/_marshal.py +46 -39
- ommlds/minichain/services/facades.py +3 -3
- ommlds/minichain/services/services.py +1 -1
- ommlds/minichain/standard.py +8 -0
- ommlds/minichain/stream/services.py +152 -38
- ommlds/minichain/stream/wrap.py +22 -24
- ommlds/minichain/tools/_marshal.py +1 -1
- ommlds/minichain/tools/execution/catalog.py +2 -1
- ommlds/minichain/tools/execution/context.py +34 -14
- ommlds/minichain/tools/execution/errors.py +15 -0
- ommlds/minichain/tools/execution/executors.py +8 -3
- ommlds/minichain/tools/execution/reflect.py +40 -5
- ommlds/minichain/tools/fns.py +46 -9
- ommlds/minichain/tools/jsonschema.py +14 -5
- ommlds/minichain/tools/reflect.py +54 -18
- ommlds/minichain/tools/types.py +33 -1
- ommlds/minichain/utils.py +27 -0
- ommlds/minichain/vectors/_marshal.py +11 -10
- ommlds/nanochat/LICENSE +21 -0
- ommlds/nanochat/__init__.py +0 -0
- ommlds/nanochat/rustbpe/LICENSE +21 -0
- ommlds/nanochat/tokenizers.py +406 -0
- ommlds/server/server.py +3 -3
- ommlds/specs/__init__.py +0 -0
- ommlds/specs/mcp/__init__.py +0 -0
- ommlds/specs/mcp/_marshal.py +23 -0
- ommlds/specs/mcp/protocol.py +266 -0
- ommlds/tools/git.py +27 -10
- ommlds/tools/ocr.py +8 -9
- ommlds/wiki/analyze.py +2 -2
- ommlds/wiki/text/mfh.py +1 -5
- ommlds/wiki/text/wtp.py +1 -3
- ommlds/wiki/utils/xml.py +5 -5
- {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/METADATA +24 -21
- ommlds-0.0.0.dev480.dist-info/RECORD +427 -0
- ommlds/cli/backends/standard.py +0 -20
- ommlds/cli/sessions/chat/base.py +0 -42
- ommlds/cli/sessions/chat/interactive.py +0 -73
- ommlds/cli/sessions/chat/printing.py +0 -96
- ommlds/cli/sessions/chat/prompt.py +0 -143
- ommlds/cli/sessions/chat/state.py +0 -109
- ommlds/cli/sessions/chat/tools.py +0 -91
- ommlds/cli/sessions/completion/completion.py +0 -44
- ommlds/cli/sessions/embedding/embedding.py +0 -42
- ommlds/cli/tools/config.py +0 -13
- ommlds/cli/tools/inject.py +0 -64
- ommlds/minichain/chat/stream/adapters.py +0 -69
- ommlds/minichain/lib/fs/ls/execution.py +0 -32
- ommlds-0.0.0.dev436.dist-info/RECORD +0 -303
- /ommlds/{cli/tools → backends/google}/__init__.py +0 -0
- /ommlds/{minichain/lib/fs/ls → backends/groq}/__init__.py +0 -0
- /ommlds/{huggingface.py → backends/huggingface.py} +0 -0
- /ommlds/minichain/lib/fs/{ls → tools/recursivels}/rendering.py +0 -0
- /ommlds/minichain/lib/fs/{ls → tools/recursivels}/running.py +0 -0
- {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev436.dist-info → ommlds-0.0.0.dev480.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import typing as ta
|
|
2
|
+
|
|
3
|
+
from omlish import check
|
|
4
|
+
from omlish import marshal as msh
|
|
5
|
+
from omlish import typedvalues as tv
|
|
6
|
+
from omlish.formats import json
|
|
7
|
+
from omlish.http import all as http
|
|
8
|
+
from omlish.http import sse
|
|
9
|
+
from omlish.io.buffers import DelimitingBuffer
|
|
10
|
+
|
|
11
|
+
from .....backends.groq import protocol as pt
|
|
12
|
+
from ....chat.choices.services import ChatChoicesOutputs
|
|
13
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
14
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
15
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
16
|
+
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
17
|
+
from ....chat.tools.types import Tool
|
|
18
|
+
from ....configs import Config
|
|
19
|
+
from ....resources import UseResources
|
|
20
|
+
from ....standard import ApiKey
|
|
21
|
+
from ....stream.services import StreamResponseSink
|
|
22
|
+
from ....stream.services import new_stream_response
|
|
23
|
+
from .chat import GroqChatChoicesService
|
|
24
|
+
from .names import MODEL_NAMES
|
|
25
|
+
from .protocol import build_gq_request_messages
|
|
26
|
+
from .protocol import build_gq_request_tool
|
|
27
|
+
from .protocol import build_mc_ai_choice_deltas
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
##
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
34
|
+
# name='groq',
|
|
35
|
+
# type='ChatChoicesStreamService',
|
|
36
|
+
# )
|
|
37
|
+
@static_check_is_chat_choices_stream_service
|
|
38
|
+
class GroqChatChoicesStreamService:
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
*configs: Config,
|
|
42
|
+
http_client: http.AsyncHttpClient | None = None,
|
|
43
|
+
) -> None:
|
|
44
|
+
super().__init__()
|
|
45
|
+
|
|
46
|
+
self._http_client = http_client
|
|
47
|
+
|
|
48
|
+
with tv.consume(*configs) as cc:
|
|
49
|
+
self._model_name = cc.pop(GroqChatChoicesService.DEFAULT_MODEL_NAME)
|
|
50
|
+
self._api_key = ApiKey.pop_secret(cc, env='GROQ_API_KEY')
|
|
51
|
+
|
|
52
|
+
READ_CHUNK_SIZE: ta.ClassVar[int] = -1
|
|
53
|
+
|
|
54
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
55
|
+
tools: list[pt.ChatCompletionRequest.Tool] = []
|
|
56
|
+
with tv.TypedValues(*request.options).consume() as oc:
|
|
57
|
+
t: Tool
|
|
58
|
+
for t in oc.pop(Tool, []):
|
|
59
|
+
tools.append(build_gq_request_tool(t))
|
|
60
|
+
|
|
61
|
+
gq_request = pt.ChatCompletionRequest(
|
|
62
|
+
messages=build_gq_request_messages(request.v),
|
|
63
|
+
model=MODEL_NAMES.resolve(self._model_name.v),
|
|
64
|
+
tools=tools or None,
|
|
65
|
+
stream=True,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
raw_request = msh.marshal(gq_request)
|
|
69
|
+
|
|
70
|
+
http_request = http.HttpRequest(
|
|
71
|
+
'https://api.groq.com/openai/v1/chat/completions',
|
|
72
|
+
headers={
|
|
73
|
+
http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
|
|
74
|
+
http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
|
|
75
|
+
},
|
|
76
|
+
data=json.dumps(raw_request).encode('utf-8'),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
async with UseResources.or_new(request.options) as rs:
|
|
80
|
+
http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
|
|
81
|
+
http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
|
|
82
|
+
|
|
83
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
|
|
84
|
+
db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
|
|
85
|
+
sd = sse.SseDecoder()
|
|
86
|
+
while True:
|
|
87
|
+
b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
|
|
88
|
+
for l in db.feed(b):
|
|
89
|
+
if isinstance(l, DelimitingBuffer.Incomplete):
|
|
90
|
+
# FIXME: handle
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
# FIXME: https://platform.openai.com/docs/guides/function-calling?api-mode=responses#streaming
|
|
94
|
+
for so in sd.process_line(l):
|
|
95
|
+
if isinstance(so, sse.SseEvent) and so.type == b'message':
|
|
96
|
+
ss = so.data.decode('utf-8')
|
|
97
|
+
if ss == '[DONE]':
|
|
98
|
+
return []
|
|
99
|
+
|
|
100
|
+
sj = json.loads(ss) # ChatCompletionChunk
|
|
101
|
+
|
|
102
|
+
check.state(sj['object'] == 'chat.completion.chunk')
|
|
103
|
+
|
|
104
|
+
ccc = msh.unmarshal(sj, pt.ChatCompletionChunk)
|
|
105
|
+
|
|
106
|
+
# FIXME: stop reason
|
|
107
|
+
if not ccc.choices:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
if any(choice.finish_reason for choice in ccc.choices):
|
|
111
|
+
check.state(all(choice.finish_reason for choice in ccc.choices))
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
await sink.emit(AiChoicesDeltas([
|
|
115
|
+
build_mc_ai_choice_deltas(choice.delta)
|
|
116
|
+
for choice in ccc.choices
|
|
117
|
+
]))
|
|
118
|
+
|
|
119
|
+
if not b:
|
|
120
|
+
return []
|
|
121
|
+
|
|
122
|
+
# raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))
|
|
123
|
+
# return rh.build_response(raw_response)
|
|
124
|
+
|
|
125
|
+
return await new_stream_response(rs, inner)
|
|
@@ -15,7 +15,8 @@ from ....chat.choices.services import static_check_is_chat_choices_service
|
|
|
15
15
|
from ....chat.choices.types import AiChoice
|
|
16
16
|
from ....chat.choices.types import ChatChoicesOptions
|
|
17
17
|
from ....chat.messages import AiMessage
|
|
18
|
-
from ....chat.messages import
|
|
18
|
+
from ....chat.messages import ToolUseMessage
|
|
19
|
+
from ....chat.messages import ToolUseResultMessage
|
|
19
20
|
from ....chat.tools.types import Tool
|
|
20
21
|
from ....configs import Config
|
|
21
22
|
from ....llms.types import MaxTokens
|
|
@@ -29,6 +30,15 @@ from .format import get_msg_content
|
|
|
29
30
|
##
|
|
30
31
|
|
|
31
32
|
|
|
33
|
+
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
34
|
+
# ['ChatChoicesService'],
|
|
35
|
+
# 'llamacpp',
|
|
36
|
+
# )
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
##
|
|
40
|
+
|
|
41
|
+
|
|
32
42
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
33
43
|
# name='llamacpp',
|
|
34
44
|
# type='ChatChoicesService',
|
|
@@ -54,7 +64,7 @@ class LlamacppChatChoicesService:
|
|
|
54
64
|
temperatur=Temperature,
|
|
55
65
|
)
|
|
56
66
|
|
|
57
|
-
def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
67
|
+
async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
58
68
|
kwargs: dict = dict(
|
|
59
69
|
# temperature=0,
|
|
60
70
|
max_tokens=1024,
|
|
@@ -100,29 +110,34 @@ class LlamacppChatChoicesService:
|
|
|
100
110
|
|
|
101
111
|
ims: list = []
|
|
102
112
|
for rm in request.v:
|
|
103
|
-
if isinstance(rm,
|
|
113
|
+
if isinstance(rm, ToolUseResultMessage):
|
|
104
114
|
ims.append(dict(
|
|
105
115
|
role='tool',
|
|
106
|
-
**(dict(id=rm.id) if rm.id is not None else {}),
|
|
107
|
-
name=rm.name,
|
|
108
|
-
content=check.isinstance(rm.c, str),
|
|
116
|
+
**(dict(id=rm.tur.id) if rm.tur.id is not None else {}),
|
|
117
|
+
name=rm.tur.name,
|
|
118
|
+
content=check.isinstance(rm.tur.c, str),
|
|
109
119
|
))
|
|
120
|
+
|
|
110
121
|
elif isinstance(rm, AiMessage):
|
|
111
|
-
tcs: list[dict] = []
|
|
112
|
-
for ter in rm.tool_exec_requests or []:
|
|
113
|
-
tcs.append(dict(
|
|
114
|
-
id=check.not_none(ter.id),
|
|
115
|
-
type='function',
|
|
116
|
-
function=dict(
|
|
117
|
-
name=ter.name,
|
|
118
|
-
arguments=check.isinstance(ter.raw_args, str),
|
|
119
|
-
),
|
|
120
|
-
))
|
|
121
122
|
ims.append(dict(
|
|
122
123
|
role=ROLES_MAP[type(rm)],
|
|
123
124
|
**(dict(content=mc) if (mc := get_msg_content(rm)) is not None else {}),
|
|
124
|
-
**(dict(tool_calls=tcs) if tcs else {}),
|
|
125
125
|
))
|
|
126
|
+
|
|
127
|
+
elif isinstance(rm, ToolUseMessage):
|
|
128
|
+
ims.append(dict(
|
|
129
|
+
role=ROLES_MAP[type(rm)],
|
|
130
|
+
content='',
|
|
131
|
+
tool_calls=[dict(
|
|
132
|
+
id=check.not_none(rm.tu.id),
|
|
133
|
+
type='function',
|
|
134
|
+
function=dict(
|
|
135
|
+
name=rm.tu.name,
|
|
136
|
+
arguments=check.isinstance(rm.tu.raw_args, str),
|
|
137
|
+
),
|
|
138
|
+
)],
|
|
139
|
+
))
|
|
140
|
+
|
|
126
141
|
else:
|
|
127
142
|
ims.append(dict(
|
|
128
143
|
role=ROLES_MAP[type(rm)],
|
|
@@ -137,6 +152,6 @@ class LlamacppChatChoicesService:
|
|
|
137
152
|
out: list[AiChoice] = []
|
|
138
153
|
for c in ta.cast(ta.Any, output)['choices']:
|
|
139
154
|
m = c['message']
|
|
140
|
-
out.append(AiChoice(AiMessage(m['content'])))
|
|
155
|
+
out.append(AiChoice([AiMessage(m['content'])]))
|
|
141
156
|
|
|
142
157
|
return ChatChoicesResponse(out)
|
|
@@ -51,7 +51,7 @@ class LlamacppCompletionService:
|
|
|
51
51
|
temperatur=Temperature,
|
|
52
52
|
)
|
|
53
53
|
|
|
54
|
-
def invoke(self, request: CompletionRequest) -> CompletionResponse:
|
|
54
|
+
async def invoke(self, request: CompletionRequest) -> CompletionResponse:
|
|
55
55
|
kwargs: dict = dict(
|
|
56
56
|
# temperature=0,
|
|
57
57
|
max_tokens=1024,
|
|
@@ -5,7 +5,8 @@ from omlish import check
|
|
|
5
5
|
from ....chat.messages import AiMessage
|
|
6
6
|
from ....chat.messages import Message
|
|
7
7
|
from ....chat.messages import SystemMessage
|
|
8
|
-
from ....chat.messages import
|
|
8
|
+
from ....chat.messages import ToolUseMessage
|
|
9
|
+
from ....chat.messages import ToolUseResultMessage
|
|
9
10
|
from ....chat.messages import UserMessage
|
|
10
11
|
|
|
11
12
|
|
|
@@ -16,7 +17,8 @@ ROLES_MAP: ta.Mapping[type[Message], str] = {
|
|
|
16
17
|
SystemMessage: 'system',
|
|
17
18
|
UserMessage: 'user',
|
|
18
19
|
AiMessage: 'assistant',
|
|
19
|
-
|
|
20
|
+
ToolUseMessage: 'assistant',
|
|
21
|
+
ToolUseResultMessage: 'tool',
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
|
|
@@ -10,15 +10,16 @@ from omlish import typedvalues as tv
|
|
|
10
10
|
|
|
11
11
|
from .....backends import llamacpp as lcu
|
|
12
12
|
from ....chat.choices.services import ChatChoicesOutputs
|
|
13
|
-
from ....chat.stream.services import ChatChoicesStreamRequest
|
|
14
|
-
from ....chat.stream.services import ChatChoicesStreamResponse
|
|
15
|
-
from ....chat.stream.services import static_check_is_chat_choices_stream_service
|
|
16
|
-
from ....chat.stream.types import
|
|
17
|
-
from ....chat.stream.types import
|
|
18
|
-
from ....chat.stream.types import
|
|
13
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
14
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
15
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
16
|
+
from ....chat.choices.stream.types import AiChoiceDeltas
|
|
17
|
+
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
18
|
+
from ....chat.stream.types import ContentAiDelta
|
|
19
19
|
from ....configs import Config
|
|
20
20
|
from ....models.configs import ModelPath
|
|
21
21
|
from ....resources import UseResources
|
|
22
|
+
from ....stream.services import StreamResponseSink
|
|
22
23
|
from ....stream.services import new_stream_response
|
|
23
24
|
from .chat import LlamacppChatChoicesService
|
|
24
25
|
from .format import ROLES_MAP
|
|
@@ -28,6 +29,15 @@ from .format import get_msg_content
|
|
|
28
29
|
##
|
|
29
30
|
|
|
30
31
|
|
|
32
|
+
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
33
|
+
# ['ChatChoicesStreamService'],
|
|
34
|
+
# 'llamacpp',
|
|
35
|
+
# )
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
##
|
|
39
|
+
|
|
40
|
+
|
|
31
41
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
32
42
|
# name='llamacpp',
|
|
33
43
|
# type='ChatChoicesStreamService',
|
|
@@ -49,10 +59,10 @@ class LlamacppChatChoicesStreamService(lang.ExitStacked):
|
|
|
49
59
|
verbose=False,
|
|
50
60
|
)))
|
|
51
61
|
|
|
52
|
-
def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
62
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
53
63
|
lcu.install_logging_hook()
|
|
54
64
|
|
|
55
|
-
with UseResources.or_new(request.options) as rs:
|
|
65
|
+
async with UseResources.or_new(request.options) as rs:
|
|
56
66
|
rs.enter_context(self._lock)
|
|
57
67
|
|
|
58
68
|
model: ta.Any = self._load_model() # FIXME: the types are awful lol
|
|
@@ -74,19 +84,26 @@ class LlamacppChatChoicesStreamService(lang.ExitStacked):
|
|
|
74
84
|
|
|
75
85
|
rs.enter_context(lang.defer(close_output))
|
|
76
86
|
|
|
77
|
-
def
|
|
87
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
|
|
88
|
+
last_role: ta.Any = None
|
|
89
|
+
|
|
78
90
|
for chunk in output:
|
|
79
91
|
check.state(chunk['object'] == 'chat.completion.chunk')
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
92
|
+
|
|
93
|
+
choice = check.single(chunk['choices'])
|
|
94
|
+
|
|
95
|
+
if not (delta := choice.get('delta', {})):
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# FIXME: check role is assistant
|
|
99
|
+
if (role := delta.get('role')) != last_role:
|
|
100
|
+
last_role = role
|
|
101
|
+
|
|
102
|
+
# FIXME: stop reason
|
|
103
|
+
|
|
104
|
+
if (content := delta.get('content', '')):
|
|
105
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiDelta(content)])]))
|
|
106
|
+
|
|
90
107
|
return None
|
|
91
108
|
|
|
92
|
-
return new_stream_response(rs,
|
|
109
|
+
return await new_stream_response(rs, inner)
|
|
@@ -21,13 +21,18 @@ from ...chat.messages import UserMessage
|
|
|
21
21
|
##
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
# TODO: generalize lol
|
|
25
|
+
class TooManyRequestsMistralError(Exception):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
24
29
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
25
30
|
# name='mistral',
|
|
26
31
|
# type='ChatChoicesService',
|
|
27
32
|
# )
|
|
28
33
|
@static_check_is_chat_choices_service
|
|
29
34
|
class MistralChatChoicesService:
|
|
30
|
-
model: ta.ClassVar[str] = 'mistral-
|
|
35
|
+
model: ta.ClassVar[str] = 'mistral-medium-2508'
|
|
31
36
|
|
|
32
37
|
ROLES_MAP: ta.ClassVar[ta.Mapping[type[Message], str]] = {
|
|
33
38
|
SystemMessage: 'system',
|
|
@@ -35,10 +40,16 @@ class MistralChatChoicesService:
|
|
|
35
40
|
AiMessage: 'assistant',
|
|
36
41
|
}
|
|
37
42
|
|
|
38
|
-
def __init__(
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
*,
|
|
46
|
+
api_key: str | None = None,
|
|
47
|
+
http_client: http.AsyncHttpClient | None = None,
|
|
48
|
+
) -> None:
|
|
39
49
|
super().__init__()
|
|
40
50
|
|
|
41
51
|
self._api_key = api_key
|
|
52
|
+
self._http_client = http_client
|
|
42
53
|
|
|
43
54
|
def _get_msg_content(self, m: Message) -> str | None:
|
|
44
55
|
if isinstance(m, AiMessage):
|
|
@@ -50,7 +61,7 @@ class MistralChatChoicesService:
|
|
|
50
61
|
else:
|
|
51
62
|
raise TypeError(m)
|
|
52
63
|
|
|
53
|
-
def invoke(
|
|
64
|
+
async def invoke(
|
|
54
65
|
self,
|
|
55
66
|
request: ChatChoicesRequest,
|
|
56
67
|
) -> ChatChoicesResponse:
|
|
@@ -68,7 +79,7 @@ class MistralChatChoicesService:
|
|
|
68
79
|
],
|
|
69
80
|
}
|
|
70
81
|
|
|
71
|
-
resp = http.
|
|
82
|
+
resp = await http.async_request(
|
|
72
83
|
'https://api.mistral.ai/v1/chat/completions',
|
|
73
84
|
method='POST',
|
|
74
85
|
data=json.dumps_compact(req_dct).encode('utf-8'),
|
|
@@ -77,11 +88,15 @@ class MistralChatChoicesService:
|
|
|
77
88
|
'Accept': 'application/json',
|
|
78
89
|
'Authorization': f'Bearer {key}',
|
|
79
90
|
},
|
|
91
|
+
client=self._http_client,
|
|
80
92
|
)
|
|
81
93
|
|
|
94
|
+
if resp.status == 429:
|
|
95
|
+
raise TooManyRequestsMistralError
|
|
96
|
+
|
|
82
97
|
resp_dct = json.loads(check.not_none(resp.data).decode('utf-8'))
|
|
83
98
|
|
|
84
99
|
return ChatChoicesResponse([
|
|
85
|
-
AiChoice(AiMessage(c['message']['content']))
|
|
100
|
+
AiChoice([AiMessage(c['message']['content'])])
|
|
86
101
|
for c in resp_dct['choices']
|
|
87
102
|
])
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import typing as ta
|
|
2
3
|
|
|
3
4
|
from omlish import check
|
|
@@ -5,28 +6,38 @@ from omlish import lang
|
|
|
5
6
|
from omlish import typedvalues as tv
|
|
6
7
|
|
|
7
8
|
from .....backends import mlx as mlxu
|
|
9
|
+
from ....chat.choices.services import ChatChoicesOutputs
|
|
8
10
|
from ....chat.choices.services import ChatChoicesRequest
|
|
9
11
|
from ....chat.choices.services import ChatChoicesResponse
|
|
10
12
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
13
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
14
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
15
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
16
|
+
from ....chat.choices.stream.types import AiChoiceDeltas
|
|
17
|
+
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
11
18
|
from ....chat.choices.types import AiChoice
|
|
12
19
|
from ....chat.choices.types import ChatChoicesOptions
|
|
13
20
|
from ....chat.messages import AiMessage
|
|
14
21
|
from ....chat.messages import Message
|
|
15
22
|
from ....chat.messages import SystemMessage
|
|
16
23
|
from ....chat.messages import UserMessage
|
|
24
|
+
from ....chat.stream.types import ContentAiDelta
|
|
17
25
|
from ....configs import Config
|
|
18
26
|
from ....llms.types import MaxTokens
|
|
19
27
|
from ....models.configs import ModelPath
|
|
20
28
|
from ....models.configs import ModelRepo
|
|
21
29
|
from ....models.configs import ModelSpecifier
|
|
30
|
+
from ....resources import UseResources
|
|
22
31
|
from ....standard import DefaultOptions
|
|
32
|
+
from ....stream.services import StreamResponseSink
|
|
33
|
+
from ....stream.services import new_stream_response
|
|
23
34
|
|
|
24
35
|
|
|
25
36
|
##
|
|
26
37
|
|
|
27
38
|
|
|
28
39
|
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
29
|
-
# ['ChatChoicesService'],
|
|
40
|
+
# ['ChatChoicesService', 'ChatChoicesStreamService'],
|
|
30
41
|
# 'mlx',
|
|
31
42
|
# )
|
|
32
43
|
|
|
@@ -34,12 +45,7 @@ from ....standard import DefaultOptions
|
|
|
34
45
|
##
|
|
35
46
|
|
|
36
47
|
|
|
37
|
-
|
|
38
|
-
# name='mlx',
|
|
39
|
-
# type='ChatChoicesService',
|
|
40
|
-
# )
|
|
41
|
-
@static_check_is_chat_choices_service
|
|
42
|
-
class MlxChatChoicesService(lang.ExitStacked):
|
|
48
|
+
class BaseMlxChatChoicesService(lang.ExitStacked):
|
|
43
49
|
DEFAULT_MODEL: ta.ClassVar[ModelSpecifier] = (
|
|
44
50
|
# 'mlx-community/DeepSeek-Coder-V2-Lite-Instruct-8bit'
|
|
45
51
|
# 'mlx-community/Llama-3.3-70B-Instruct-4bit'
|
|
@@ -52,8 +58,8 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
52
58
|
# 'mlx-community/Qwen2.5-0.5B-4bit'
|
|
53
59
|
# 'mlx-community/Qwen2.5-32B-Instruct-8bit'
|
|
54
60
|
# 'mlx-community/Qwen2.5-Coder-32B-Instruct-8bit'
|
|
55
|
-
# 'mlx-community/mamba-2.8b-hf-f16'
|
|
56
61
|
# 'mlx-community/Qwen3-30B-A3B-6bit'
|
|
62
|
+
# 'mlx-community/mamba-2.8b-hf-f16'
|
|
57
63
|
)
|
|
58
64
|
|
|
59
65
|
def __init__(self, *configs: Config) -> None:
|
|
@@ -70,10 +76,7 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
70
76
|
}
|
|
71
77
|
|
|
72
78
|
def _get_msg_content(self, m: Message) -> str | None:
|
|
73
|
-
if isinstance(m, AiMessage):
|
|
74
|
-
return check.isinstance(m.c, str)
|
|
75
|
-
|
|
76
|
-
elif isinstance(m, (SystemMessage, UserMessage)):
|
|
79
|
+
if isinstance(m, (AiMessage, SystemMessage, UserMessage)):
|
|
77
80
|
return check.isinstance(m.c, str)
|
|
78
81
|
|
|
79
82
|
else:
|
|
@@ -96,10 +99,9 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
96
99
|
max_tokens=MaxTokens,
|
|
97
100
|
)
|
|
98
101
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
tokenizer = loaded_model.tokenization.tokenizer
|
|
102
|
+
@lang.cached_function(transient=True)
|
|
103
|
+
def _get_tokenizer(self) -> mlxu.tokenization.Tokenizer:
|
|
104
|
+
tokenizer = self._load_model().tokenization.tokenizer
|
|
103
105
|
|
|
104
106
|
if not (
|
|
105
107
|
hasattr(tokenizer, 'apply_chat_template') and
|
|
@@ -107,26 +109,44 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
107
109
|
):
|
|
108
110
|
raise RuntimeError(tokenizer)
|
|
109
111
|
|
|
110
|
-
|
|
112
|
+
return tokenizer
|
|
113
|
+
|
|
114
|
+
def _build_prompt(self, messages: ta.Sequence[Message]) -> str:
|
|
115
|
+
return check.isinstance(self._get_tokenizer().apply_chat_template(
|
|
111
116
|
[ # type: ignore[arg-type]
|
|
112
117
|
dict(
|
|
113
118
|
role=self.ROLES_MAP[type(m)],
|
|
114
119
|
content=self._get_msg_content(m),
|
|
115
120
|
)
|
|
116
|
-
for m in
|
|
121
|
+
for m in messages
|
|
117
122
|
],
|
|
118
123
|
tokenize=False,
|
|
119
124
|
add_generation_prompt=True,
|
|
120
|
-
)
|
|
125
|
+
), str)
|
|
121
126
|
|
|
122
|
-
|
|
127
|
+
def _build_kwargs(self, oc: tv.TypedValuesConsumer) -> dict[str, ta.Any]:
|
|
128
|
+
kwargs: dict[str, ta.Any] = {}
|
|
129
|
+
kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
|
|
130
|
+
return kwargs
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
134
|
+
# name='mlx',
|
|
135
|
+
# type='ChatChoicesService',
|
|
136
|
+
# )
|
|
137
|
+
@static_check_is_chat_choices_service
|
|
138
|
+
class MlxChatChoicesService(BaseMlxChatChoicesService):
|
|
139
|
+
async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
140
|
+
loaded_model = self._load_model()
|
|
141
|
+
|
|
142
|
+
prompt = self._build_prompt(request.v)
|
|
123
143
|
|
|
124
144
|
with tv.consume(
|
|
125
145
|
*self._default_options,
|
|
126
146
|
*request.options,
|
|
127
147
|
override=True,
|
|
128
148
|
) as oc:
|
|
129
|
-
kwargs.
|
|
149
|
+
kwargs = self._build_kwargs(oc)
|
|
130
150
|
|
|
131
151
|
response = mlxu.generate(
|
|
132
152
|
loaded_model.model,
|
|
@@ -137,5 +157,59 @@ class MlxChatChoicesService(lang.ExitStacked):
|
|
|
137
157
|
)
|
|
138
158
|
|
|
139
159
|
return ChatChoicesResponse([
|
|
140
|
-
AiChoice(AiMessage(response)) # noqa
|
|
160
|
+
AiChoice([AiMessage(response)]) # noqa
|
|
141
161
|
])
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
165
|
+
# name='mlx',
|
|
166
|
+
# type='ChatChoicesStreamService',
|
|
167
|
+
# )
|
|
168
|
+
@static_check_is_chat_choices_stream_service
|
|
169
|
+
class MlxChatChoicesStreamService(BaseMlxChatChoicesService):
|
|
170
|
+
def __init__(self, *configs: Config) -> None:
|
|
171
|
+
super().__init__()
|
|
172
|
+
|
|
173
|
+
with tv.consume(*configs) as cc:
|
|
174
|
+
self._model = cc.pop(MlxChatChoicesService.DEFAULT_MODEL)
|
|
175
|
+
self._default_options: tv.TypedValues = DefaultOptions.pop(cc)
|
|
176
|
+
|
|
177
|
+
READ_CHUNK_SIZE = 64 * 1024
|
|
178
|
+
|
|
179
|
+
async def invoke(
|
|
180
|
+
self,
|
|
181
|
+
request: ChatChoicesStreamRequest,
|
|
182
|
+
*,
|
|
183
|
+
max_tokens: int = 4096, # FIXME: ChatOption
|
|
184
|
+
) -> ChatChoicesStreamResponse:
|
|
185
|
+
loaded_model = self._load_model()
|
|
186
|
+
|
|
187
|
+
prompt = self._build_prompt(request.v)
|
|
188
|
+
|
|
189
|
+
with tv.consume(
|
|
190
|
+
*self._default_options,
|
|
191
|
+
*request.options,
|
|
192
|
+
override=True,
|
|
193
|
+
) as oc:
|
|
194
|
+
oc.pop(UseResources, None)
|
|
195
|
+
kwargs = self._build_kwargs(oc)
|
|
196
|
+
|
|
197
|
+
async with UseResources.or_new(request.options) as rs:
|
|
198
|
+
gen: ta.Iterator[mlxu.GenerationOutput] = rs.enter_context(contextlib.closing(mlxu.stream_generate(
|
|
199
|
+
loaded_model.model,
|
|
200
|
+
loaded_model.tokenization,
|
|
201
|
+
check.isinstance(prompt, str),
|
|
202
|
+
mlxu.GenerationParams(**kwargs),
|
|
203
|
+
# verbose=True,
|
|
204
|
+
)))
|
|
205
|
+
|
|
206
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
|
|
207
|
+
for go in gen:
|
|
208
|
+
if go.text:
|
|
209
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([
|
|
210
|
+
ContentAiDelta(go.text),
|
|
211
|
+
])]))
|
|
212
|
+
|
|
213
|
+
return []
|
|
214
|
+
|
|
215
|
+
return await new_stream_response(rs, inner)
|
|
File without changes
|