ommlds 0.0.0.dev426__py3-none-any.whl → 0.0.0.dev485__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ommlds/.omlish-manifests.json +336 -39
- ommlds/__about__.py +16 -10
- ommlds/_hacks/__init__.py +4 -0
- ommlds/_hacks/funcs.py +110 -0
- ommlds/_hacks/names.py +158 -0
- ommlds/_hacks/params.py +73 -0
- ommlds/_hacks/patches.py +0 -3
- ommlds/backends/anthropic/protocol/__init__.py +13 -1
- ommlds/backends/anthropic/protocol/_dataclasses.py +1625 -0
- ommlds/backends/anthropic/protocol/_marshal.py +2 -2
- ommlds/backends/anthropic/protocol/sse/_marshal.py +1 -1
- ommlds/backends/anthropic/protocol/sse/assemble.py +23 -7
- ommlds/backends/anthropic/protocol/sse/events.py +13 -0
- ommlds/backends/anthropic/protocol/types.py +40 -8
- ommlds/backends/google/protocol/__init__.py +16 -0
- ommlds/backends/google/protocol/_dataclasses.py +5997 -0
- ommlds/backends/google/protocol/_marshal.py +16 -0
- ommlds/backends/google/protocol/types.py +626 -0
- ommlds/backends/groq/__init__.py +7 -0
- ommlds/backends/groq/_dataclasses.py +3901 -0
- ommlds/backends/groq/_marshal.py +23 -0
- ommlds/backends/groq/protocol.py +249 -0
- ommlds/backends/llamacpp/logging.py +4 -1
- ommlds/backends/mlx/caching.py +7 -3
- ommlds/backends/mlx/cli.py +10 -7
- ommlds/backends/mlx/generation.py +19 -17
- ommlds/backends/mlx/limits.py +10 -6
- ommlds/backends/mlx/loading.py +65 -5
- ommlds/backends/ollama/__init__.py +7 -0
- ommlds/backends/ollama/_dataclasses.py +3458 -0
- ommlds/backends/ollama/protocol.py +170 -0
- ommlds/backends/openai/protocol/__init__.py +24 -29
- ommlds/backends/openai/protocol/_common.py +18 -0
- ommlds/backends/openai/protocol/_dataclasses.py +7708 -0
- ommlds/backends/openai/protocol/_marshal.py +27 -0
- ommlds/backends/openai/protocol/chatcompletion/chunk.py +58 -31
- ommlds/backends/openai/protocol/chatcompletion/contentpart.py +49 -44
- ommlds/backends/openai/protocol/chatcompletion/message.py +55 -43
- ommlds/backends/openai/protocol/chatcompletion/request.py +114 -66
- ommlds/backends/openai/protocol/chatcompletion/response.py +71 -45
- ommlds/backends/openai/protocol/chatcompletion/responseformat.py +27 -20
- ommlds/backends/openai/protocol/chatcompletion/tokenlogprob.py +16 -7
- ommlds/backends/openai/protocol/completionusage.py +24 -15
- ommlds/backends/tavily/__init__.py +7 -0
- ommlds/backends/tavily/_dataclasses.py +1734 -0
- ommlds/backends/tavily/protocol.py +301 -0
- ommlds/backends/tinygrad/models/llama3/__init__.py +22 -14
- ommlds/backends/transformers/__init__.py +14 -0
- ommlds/backends/transformers/filecache.py +109 -0
- ommlds/backends/transformers/streamers.py +73 -0
- ommlds/cli/__init__.py +7 -0
- ommlds/cli/_dataclasses.py +2562 -0
- ommlds/cli/asyncs.py +30 -0
- ommlds/cli/backends/catalog.py +93 -0
- ommlds/cli/backends/configs.py +9 -0
- ommlds/cli/backends/inject.py +31 -36
- ommlds/cli/backends/injection.py +16 -0
- ommlds/cli/backends/types.py +46 -0
- ommlds/cli/content/messages.py +34 -0
- ommlds/cli/content/strings.py +42 -0
- ommlds/cli/inject.py +17 -32
- ommlds/cli/inputs/__init__.py +0 -0
- ommlds/cli/inputs/asyncs.py +32 -0
- ommlds/cli/inputs/sync.py +75 -0
- ommlds/cli/main.py +270 -110
- ommlds/cli/rendering/__init__.py +0 -0
- ommlds/cli/rendering/configs.py +9 -0
- ommlds/cli/rendering/inject.py +31 -0
- ommlds/cli/rendering/markdown.py +52 -0
- ommlds/cli/rendering/raw.py +73 -0
- ommlds/cli/rendering/types.py +21 -0
- ommlds/cli/secrets.py +21 -0
- ommlds/cli/sessions/base.py +1 -1
- ommlds/cli/sessions/chat/chat/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/ai/inject.py +74 -0
- ommlds/cli/sessions/chat/chat/ai/injection.py +14 -0
- ommlds/cli/sessions/chat/chat/ai/rendering.py +70 -0
- ommlds/cli/sessions/chat/chat/ai/services.py +79 -0
- ommlds/cli/sessions/chat/chat/ai/tools.py +44 -0
- ommlds/cli/sessions/chat/chat/ai/types.py +28 -0
- ommlds/cli/sessions/chat/chat/state/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/state/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/state/inject.py +36 -0
- ommlds/cli/sessions/chat/chat/state/inmemory.py +33 -0
- ommlds/cli/sessions/chat/chat/state/storage.py +52 -0
- ommlds/cli/sessions/chat/chat/state/types.py +38 -0
- ommlds/cli/sessions/chat/chat/user/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/user/configs.py +17 -0
- ommlds/cli/sessions/chat/chat/user/inject.py +62 -0
- ommlds/cli/sessions/chat/chat/user/interactive.py +31 -0
- ommlds/cli/sessions/chat/chat/user/oneshot.py +25 -0
- ommlds/cli/sessions/chat/chat/user/types.py +15 -0
- ommlds/cli/sessions/chat/configs.py +27 -0
- ommlds/cli/sessions/chat/driver.py +43 -0
- ommlds/cli/sessions/chat/inject.py +33 -65
- ommlds/cli/sessions/chat/phases/__init__.py +0 -0
- ommlds/cli/sessions/chat/phases/inject.py +27 -0
- ommlds/cli/sessions/chat/phases/injection.py +14 -0
- ommlds/cli/sessions/chat/phases/manager.py +29 -0
- ommlds/cli/sessions/chat/phases/types.py +29 -0
- ommlds/cli/sessions/chat/session.py +27 -0
- ommlds/cli/sessions/chat/tools/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/configs.py +22 -0
- ommlds/cli/sessions/chat/tools/confirmation.py +46 -0
- ommlds/cli/sessions/chat/tools/execution.py +66 -0
- ommlds/cli/sessions/chat/tools/fs/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/fs/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/fs/inject.py +35 -0
- ommlds/cli/sessions/chat/tools/inject.py +88 -0
- ommlds/cli/sessions/chat/tools/injection.py +44 -0
- ommlds/cli/sessions/chat/tools/rendering.py +58 -0
- ommlds/cli/sessions/chat/tools/todo/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/todo/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/todo/inject.py +31 -0
- ommlds/cli/sessions/chat/tools/weather/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/weather/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/weather/inject.py +22 -0
- ommlds/cli/{tools/weather.py → sessions/chat/tools/weather/tools.py} +1 -1
- ommlds/cli/sessions/completion/configs.py +21 -0
- ommlds/cli/sessions/completion/inject.py +42 -0
- ommlds/cli/sessions/completion/session.py +35 -0
- ommlds/cli/sessions/embedding/configs.py +21 -0
- ommlds/cli/sessions/embedding/inject.py +42 -0
- ommlds/cli/sessions/embedding/session.py +33 -0
- ommlds/cli/sessions/inject.py +28 -11
- ommlds/cli/state/__init__.py +0 -0
- ommlds/cli/state/inject.py +28 -0
- ommlds/cli/{state.py → state/storage.py} +41 -24
- ommlds/minichain/__init__.py +84 -24
- ommlds/minichain/_dataclasses.py +15401 -0
- ommlds/minichain/_marshal.py +49 -9
- ommlds/minichain/_typedvalues.py +2 -4
- ommlds/minichain/backends/catalogs/base.py +20 -1
- ommlds/minichain/backends/catalogs/simple.py +2 -2
- ommlds/minichain/backends/catalogs/strings.py +10 -8
- ommlds/minichain/backends/impls/anthropic/chat.py +65 -27
- ommlds/minichain/backends/impls/anthropic/names.py +10 -8
- ommlds/minichain/backends/impls/anthropic/protocol.py +109 -0
- ommlds/minichain/backends/impls/anthropic/stream.py +111 -43
- ommlds/minichain/backends/impls/duckduckgo/search.py +6 -2
- ommlds/minichain/backends/impls/dummy/__init__.py +0 -0
- ommlds/minichain/backends/impls/dummy/chat.py +69 -0
- ommlds/minichain/backends/impls/google/chat.py +114 -22
- ommlds/minichain/backends/impls/google/search.py +7 -2
- ommlds/minichain/backends/impls/google/stream.py +219 -0
- ommlds/minichain/backends/impls/google/tools.py +149 -0
- ommlds/minichain/backends/impls/groq/__init__.py +0 -0
- ommlds/minichain/backends/impls/groq/chat.py +75 -0
- ommlds/minichain/backends/impls/groq/names.py +48 -0
- ommlds/minichain/backends/impls/groq/protocol.py +143 -0
- ommlds/minichain/backends/impls/groq/stream.py +125 -0
- ommlds/minichain/backends/impls/huggingface/repos.py +1 -5
- ommlds/minichain/backends/impls/llamacpp/chat.py +40 -22
- ommlds/minichain/backends/impls/llamacpp/completion.py +9 -5
- ommlds/minichain/backends/impls/llamacpp/format.py +4 -2
- ommlds/minichain/backends/impls/llamacpp/stream.py +43 -23
- ommlds/minichain/backends/impls/mistral.py +20 -5
- ommlds/minichain/backends/impls/mlx/chat.py +101 -24
- ommlds/minichain/backends/impls/ollama/__init__.py +0 -0
- ommlds/minichain/backends/impls/ollama/chat.py +199 -0
- ommlds/minichain/backends/impls/openai/chat.py +18 -8
- ommlds/minichain/backends/impls/openai/completion.py +10 -3
- ommlds/minichain/backends/impls/openai/embedding.py +10 -3
- ommlds/minichain/backends/impls/openai/format.py +131 -106
- ommlds/minichain/backends/impls/openai/names.py +31 -5
- ommlds/minichain/backends/impls/openai/stream.py +43 -25
- ommlds/minichain/backends/impls/sentencepiece/tokens.py +9 -6
- ommlds/minichain/backends/impls/tavily.py +66 -0
- ommlds/minichain/backends/impls/tinygrad/chat.py +30 -20
- ommlds/minichain/backends/impls/tokenizers/tokens.py +9 -6
- ommlds/minichain/backends/impls/transformers/sentence.py +6 -3
- ommlds/minichain/backends/impls/transformers/tokens.py +10 -7
- ommlds/minichain/backends/impls/transformers/transformers.py +160 -37
- ommlds/minichain/backends/strings/parsing.py +1 -1
- ommlds/minichain/backends/strings/resolving.py +4 -1
- ommlds/minichain/chat/_marshal.py +16 -9
- ommlds/minichain/chat/choices/adapters.py +4 -4
- ommlds/minichain/chat/choices/services.py +1 -1
- ommlds/minichain/chat/choices/stream/__init__.py +0 -0
- ommlds/minichain/chat/choices/stream/adapters.py +35 -0
- ommlds/minichain/chat/choices/stream/joining.py +31 -0
- ommlds/minichain/chat/choices/stream/services.py +45 -0
- ommlds/minichain/chat/choices/stream/types.py +43 -0
- ommlds/minichain/chat/choices/types.py +2 -2
- ommlds/minichain/chat/history.py +3 -3
- ommlds/minichain/chat/messages.py +55 -19
- ommlds/minichain/chat/services.py +3 -3
- ommlds/minichain/chat/stream/_marshal.py +16 -0
- ommlds/minichain/chat/stream/joining.py +85 -0
- ommlds/minichain/chat/stream/services.py +15 -21
- ommlds/minichain/chat/stream/types.py +32 -19
- ommlds/minichain/chat/tools/execution.py +8 -7
- ommlds/minichain/chat/tools/ids.py +9 -15
- ommlds/minichain/chat/tools/parsing.py +17 -26
- ommlds/minichain/chat/transforms/base.py +29 -38
- ommlds/minichain/chat/transforms/metadata.py +30 -4
- ommlds/minichain/chat/transforms/services.py +9 -11
- ommlds/minichain/content/_marshal.py +44 -20
- ommlds/minichain/content/json.py +13 -0
- ommlds/minichain/content/materialize.py +14 -21
- ommlds/minichain/content/prepare.py +4 -0
- ommlds/minichain/content/transforms/interleave.py +1 -1
- ommlds/minichain/content/transforms/squeeze.py +1 -1
- ommlds/minichain/content/transforms/stringify.py +1 -1
- ommlds/minichain/json.py +20 -0
- ommlds/minichain/lib/code/__init__.py +0 -0
- ommlds/minichain/lib/code/prompts.py +6 -0
- ommlds/minichain/lib/fs/binfiles.py +108 -0
- ommlds/minichain/lib/fs/context.py +126 -0
- ommlds/minichain/lib/fs/errors.py +101 -0
- ommlds/minichain/lib/fs/suggestions.py +36 -0
- ommlds/minichain/lib/fs/tools/__init__.py +0 -0
- ommlds/minichain/lib/fs/tools/edit.py +104 -0
- ommlds/minichain/lib/fs/tools/ls.py +38 -0
- ommlds/minichain/lib/fs/tools/read.py +115 -0
- ommlds/minichain/lib/fs/tools/recursivels/__init__.py +0 -0
- ommlds/minichain/lib/fs/tools/recursivels/execution.py +40 -0
- ommlds/minichain/lib/todo/__init__.py +0 -0
- ommlds/minichain/lib/todo/context.py +54 -0
- ommlds/minichain/lib/todo/tools/__init__.py +0 -0
- ommlds/minichain/lib/todo/tools/read.py +44 -0
- ommlds/minichain/lib/todo/tools/write.py +335 -0
- ommlds/minichain/lib/todo/types.py +60 -0
- ommlds/minichain/llms/_marshal.py +25 -17
- ommlds/minichain/llms/types.py +4 -0
- ommlds/minichain/registries/globals.py +18 -4
- ommlds/minichain/resources.py +68 -45
- ommlds/minichain/search.py +1 -1
- ommlds/minichain/services/_marshal.py +46 -39
- ommlds/minichain/services/facades.py +3 -3
- ommlds/minichain/services/services.py +1 -1
- ommlds/minichain/standard.py +8 -0
- ommlds/minichain/stream/services.py +152 -38
- ommlds/minichain/stream/wrap.py +22 -24
- ommlds/minichain/text/toolparsing/llamacpp/hermes2.py +3 -2
- ommlds/minichain/text/toolparsing/llamacpp/llama31.py +3 -2
- ommlds/minichain/text/toolparsing/llamacpp/utils.py +3 -2
- ommlds/minichain/tools/_marshal.py +1 -1
- ommlds/minichain/tools/execution/catalog.py +2 -1
- ommlds/minichain/tools/execution/context.py +34 -14
- ommlds/minichain/tools/execution/errors.py +15 -0
- ommlds/minichain/tools/execution/executors.py +8 -3
- ommlds/minichain/tools/execution/reflect.py +40 -5
- ommlds/minichain/tools/fns.py +46 -9
- ommlds/minichain/tools/jsonschema.py +14 -5
- ommlds/minichain/tools/reflect.py +54 -18
- ommlds/minichain/tools/types.py +33 -1
- ommlds/minichain/utils.py +27 -0
- ommlds/minichain/vectors/_marshal.py +11 -10
- ommlds/minichain/vectors/types.py +1 -1
- ommlds/nanochat/LICENSE +21 -0
- ommlds/nanochat/__init__.py +0 -0
- ommlds/nanochat/rustbpe/LICENSE +21 -0
- ommlds/nanochat/tokenizers.py +406 -0
- ommlds/server/cli.py +1 -2
- ommlds/server/server.py +5 -5
- ommlds/server/service.py +1 -1
- ommlds/specs/__init__.py +0 -0
- ommlds/specs/mcp/__init__.py +0 -0
- ommlds/specs/mcp/_marshal.py +23 -0
- ommlds/specs/mcp/clients.py +146 -0
- ommlds/specs/mcp/protocol.py +371 -0
- ommlds/tools/git.py +35 -12
- ommlds/tools/ocr.py +8 -9
- ommlds/wiki/analyze.py +6 -7
- ommlds/wiki/text/mfh.py +1 -5
- ommlds/wiki/text/wtp.py +1 -3
- ommlds/wiki/utils/xml.py +5 -5
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/METADATA +24 -21
- ommlds-0.0.0.dev485.dist-info/RECORD +436 -0
- ommlds/cli/backends/standard.py +0 -20
- ommlds/cli/sessions/chat/base.py +0 -42
- ommlds/cli/sessions/chat/interactive.py +0 -73
- ommlds/cli/sessions/chat/printing.py +0 -96
- ommlds/cli/sessions/chat/prompt.py +0 -143
- ommlds/cli/sessions/chat/state.py +0 -109
- ommlds/cli/sessions/chat/tools.py +0 -91
- ommlds/cli/sessions/completion/completion.py +0 -44
- ommlds/cli/sessions/embedding/embedding.py +0 -42
- ommlds/cli/tools/config.py +0 -13
- ommlds/cli/tools/inject.py +0 -64
- ommlds/minichain/chat/stream/adapters.py +0 -69
- ommlds/minichain/lib/fs/ls/execution.py +0 -32
- ommlds-0.0.0.dev426.dist-info/RECORD +0 -303
- /ommlds/{cli/tools → backends/google}/__init__.py +0 -0
- /ommlds/{huggingface.py → backends/huggingface.py} +0 -0
- /ommlds/{minichain/lib/fs/ls → cli/content}/__init__.py +0 -0
- /ommlds/minichain/lib/fs/{ls → tools/recursivels}/rendering.py +0 -0
- /ommlds/minichain/lib/fs/{ls → tools/recursivels}/running.py +0 -0
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/top_level.txt +0 -0
|
@@ -4,28 +4,33 @@ import typing as ta
|
|
|
4
4
|
from omlish import check
|
|
5
5
|
from omlish import lang
|
|
6
6
|
|
|
7
|
-
from .....backends.tinygrad.models import llama3 as tgl3
|
|
8
7
|
from ....chat.choices.services import ChatChoicesOptions
|
|
9
8
|
from ....chat.choices.services import ChatChoicesRequest
|
|
10
9
|
from ....chat.choices.services import ChatChoicesResponse
|
|
11
10
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
11
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
12
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
13
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
14
|
+
from ....chat.choices.stream.types import AiChoiceDeltas
|
|
15
|
+
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
12
16
|
from ....chat.choices.types import AiChoice
|
|
17
|
+
from ....chat.choices.types import ChatChoicesOutputs
|
|
13
18
|
from ....chat.messages import AiMessage
|
|
14
19
|
from ....chat.messages import Chat
|
|
15
20
|
from ....chat.messages import SystemMessage
|
|
16
21
|
from ....chat.messages import UserMessage
|
|
17
|
-
from ....chat.stream.
|
|
18
|
-
from ....chat.stream.services import ChatChoicesStreamRequest
|
|
19
|
-
from ....chat.stream.services import ChatChoicesStreamResponse
|
|
20
|
-
from ....chat.stream.services import static_check_is_chat_choices_stream_service
|
|
21
|
-
from ....chat.stream.types import AiChoiceDelta
|
|
22
|
-
from ....chat.stream.types import AiMessageDelta
|
|
22
|
+
from ....chat.stream.types import ContentAiDelta
|
|
23
23
|
from ....chat.types import ChatOption
|
|
24
24
|
from ....llms.types import LlmOption
|
|
25
25
|
from ....resources import UseResources
|
|
26
|
+
from ....stream.services import StreamResponseSink
|
|
26
27
|
from ....stream.services import new_stream_response
|
|
27
28
|
|
|
28
29
|
|
|
30
|
+
with lang.auto_proxy_import(globals()):
|
|
31
|
+
from .....backends.tinygrad.models import llama3 as tgl3
|
|
32
|
+
|
|
33
|
+
|
|
29
34
|
##
|
|
30
35
|
|
|
31
36
|
|
|
@@ -37,7 +42,7 @@ def _load_model(
|
|
|
37
42
|
*,
|
|
38
43
|
size: str | None = None,
|
|
39
44
|
temperature: float | None = None,
|
|
40
|
-
) -> tgl3.Llama3Llm:
|
|
45
|
+
) -> 'tgl3.Llama3Llm':
|
|
41
46
|
if size is None:
|
|
42
47
|
size = DEFAULT_SIZE
|
|
43
48
|
if temperature is None:
|
|
@@ -56,7 +61,7 @@ def _load_model(
|
|
|
56
61
|
|
|
57
62
|
|
|
58
63
|
def _prepare_toks(
|
|
59
|
-
llm: tgl3.Llama3Llm,
|
|
64
|
+
llm: 'tgl3.Llama3Llm',
|
|
60
65
|
chat: Chat,
|
|
61
66
|
options: ta.Sequence[ChatChoicesOptions],
|
|
62
67
|
) -> list[int]:
|
|
@@ -98,7 +103,7 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
|
|
|
98
103
|
self._temperature = temperature
|
|
99
104
|
|
|
100
105
|
@lang.cached_function(transient=True)
|
|
101
|
-
def _load_model(self) -> tgl3.Llama3Llm:
|
|
106
|
+
def _load_model(self) -> 'tgl3.Llama3Llm':
|
|
102
107
|
check.not_none(self._exit_stack)
|
|
103
108
|
|
|
104
109
|
return _load_model(
|
|
@@ -111,12 +116,12 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
|
|
|
111
116
|
|
|
112
117
|
|
|
113
118
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
114
|
-
# name='
|
|
119
|
+
# name='tinygrad-llama3',
|
|
115
120
|
# type='ChatChoicesService',
|
|
116
121
|
# )
|
|
117
122
|
@static_check_is_chat_choices_service
|
|
118
123
|
class TinygradLlama3ChatChoicesService(BaseTinygradLlama3ChatService):
|
|
119
|
-
def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
124
|
+
async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
120
125
|
llm = self._load_model()
|
|
121
126
|
toks = _prepare_toks(llm, request.v, request.options)
|
|
122
127
|
|
|
@@ -124,20 +129,20 @@ class TinygradLlama3ChatChoicesService(BaseTinygradLlama3ChatService):
|
|
|
124
129
|
for s in tgl3.run_llm(llm, toks):
|
|
125
130
|
out.append(s)
|
|
126
131
|
|
|
127
|
-
return ChatChoicesResponse([AiChoice(AiMessage(''.join(out)))])
|
|
132
|
+
return ChatChoicesResponse([AiChoice([AiMessage(''.join(out))])])
|
|
128
133
|
|
|
129
134
|
|
|
130
135
|
##
|
|
131
136
|
|
|
132
137
|
|
|
133
138
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
134
|
-
# name='
|
|
139
|
+
# name='tinygrad-llama3',
|
|
135
140
|
# type='ChatChoicesStreamService',
|
|
136
141
|
# )
|
|
137
142
|
@static_check_is_chat_choices_stream_service
|
|
138
143
|
class TinygradLlama3ChatChoicesStreamService(BaseTinygradLlama3ChatService):
|
|
139
|
-
def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
140
|
-
with UseResources.or_new(request.options) as rs:
|
|
144
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
145
|
+
async with UseResources.or_new(request.options) as rs:
|
|
141
146
|
llm = self._load_model()
|
|
142
147
|
toks = _prepare_toks(
|
|
143
148
|
llm,
|
|
@@ -145,12 +150,17 @@ class TinygradLlama3ChatChoicesStreamService(BaseTinygradLlama3ChatService):
|
|
|
145
150
|
request.options.get_any((ChatOption, LlmOption)), # FIXME # noqa
|
|
146
151
|
)
|
|
147
152
|
|
|
148
|
-
def
|
|
153
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
|
|
149
154
|
for s in tgl3.run_llm(llm, toks):
|
|
150
|
-
|
|
155
|
+
await sink.emit(AiChoicesDeltas([
|
|
156
|
+
AiChoiceDeltas([
|
|
157
|
+
ContentAiDelta(s),
|
|
158
|
+
]),
|
|
159
|
+
]))
|
|
160
|
+
|
|
151
161
|
return []
|
|
152
162
|
|
|
153
|
-
return new_stream_response(rs,
|
|
163
|
+
return await new_stream_response(rs, inner)
|
|
154
164
|
|
|
155
165
|
|
|
156
166
|
##
|
|
@@ -161,5 +171,5 @@ class TinygradLlama3ChatChoicesStreamService(BaseTinygradLlama3ChatService):
|
|
|
161
171
|
# 'ChatChoicesService',
|
|
162
172
|
# 'ChatChoicesStreamService',
|
|
163
173
|
# ],
|
|
164
|
-
# '
|
|
174
|
+
# 'tinygrad-llama3',
|
|
165
175
|
# )
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import tokenizers as tos
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
4
|
+
from omlish import lang
|
|
6
5
|
|
|
7
6
|
from .... import tokens as tks
|
|
8
7
|
|
|
9
8
|
|
|
9
|
+
with lang.auto_proxy_import(globals()):
|
|
10
|
+
import tokenizers as tos
|
|
11
|
+
|
|
12
|
+
|
|
10
13
|
##
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def build_vocab(tos_tokenizer: tos.Tokenizer) -> tks.Vocab:
|
|
16
|
+
def build_vocab(tos_tokenizer: 'tos.Tokenizer') -> tks.Vocab:
|
|
14
17
|
return tks.Vocab([
|
|
15
18
|
(ta.cast(tks.Token, i), tks.TokenStr(s))
|
|
16
19
|
for s, i in tos_tokenizer.get_vocab().items()
|
|
17
20
|
])
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
23
|
+
def build_specials(tos_tokenizer: 'tos.Tokenizer') -> tks.SpecialTokens:
|
|
21
24
|
# FIXME
|
|
22
25
|
return tks.SpecialTokens([])
|
|
23
26
|
|
|
@@ -28,7 +31,7 @@ def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
|
28
31
|
class TokenizersTokenizer(tks.BaseTokenizer):
|
|
29
32
|
def __init__(
|
|
30
33
|
self,
|
|
31
|
-
tos_tokenizer: tos.Tokenizer,
|
|
34
|
+
tos_tokenizer: 'tos.Tokenizer',
|
|
32
35
|
) -> None:
|
|
33
36
|
self._tos_tokenizer = check.isinstance(tos_tokenizer, tos.Tokenizer)
|
|
34
37
|
|
|
@@ -38,7 +41,7 @@ class TokenizersTokenizer(tks.BaseTokenizer):
|
|
|
38
41
|
)
|
|
39
42
|
|
|
40
43
|
@property
|
|
41
|
-
def tos_tokenizer(self) -> tos.Tokenizer:
|
|
44
|
+
def tos_tokenizer(self) -> 'tos.Tokenizer':
|
|
42
45
|
return self._tos_tokenizer
|
|
43
46
|
|
|
44
47
|
#
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
from omlish import lang
|
|
5
4
|
from omlish import typedvalues as tv
|
|
6
5
|
|
|
7
6
|
from ....configs import Config
|
|
@@ -13,6 +12,10 @@ from ....vectors.embeddings import static_check_is_embedding_service
|
|
|
13
12
|
from ....vectors.types import Vector
|
|
14
13
|
|
|
15
14
|
|
|
15
|
+
with lang.auto_proxy_import(globals()):
|
|
16
|
+
import sentence_transformers as stfm
|
|
17
|
+
|
|
18
|
+
|
|
16
19
|
##
|
|
17
20
|
|
|
18
21
|
|
|
@@ -33,7 +36,7 @@ class SentenceTransformersEmbeddingService:
|
|
|
33
36
|
with tv.consume(*configs) as cc:
|
|
34
37
|
self._model_path = cc.pop(ModelPath(self.DEFAULT_MODEL))
|
|
35
38
|
|
|
36
|
-
def invoke(self, request: EmbeddingRequest) -> EmbeddingResponse:
|
|
39
|
+
async def invoke(self, request: EmbeddingRequest) -> EmbeddingResponse:
|
|
37
40
|
mdl = stfm.SentenceTransformer(
|
|
38
41
|
self._model_path.v,
|
|
39
42
|
)
|
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import transformers as tfm
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
6
4
|
from omlish import collections as col
|
|
5
|
+
from omlish import lang
|
|
7
6
|
|
|
8
7
|
from .... import tokens as tks
|
|
9
8
|
|
|
10
9
|
|
|
10
|
+
with lang.auto_proxy_import(globals()):
|
|
11
|
+
import transformers as tfm
|
|
12
|
+
|
|
13
|
+
|
|
11
14
|
##
|
|
12
15
|
|
|
13
16
|
|
|
14
|
-
def build_vocab(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.Vocab:
|
|
17
|
+
def build_vocab(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.Vocab:
|
|
15
18
|
return tks.Vocab([
|
|
16
19
|
(ta.cast(tks.Token, i), tks.TokenStr(s))
|
|
17
20
|
for s, i in tfm_tokenizer.get_vocab().items()
|
|
@@ -32,7 +35,7 @@ SPECIAL_TOKEN_ATTR_MAP: col.BiMap[type[tks.SpecialToken], str] = col.make_bi_map
|
|
|
32
35
|
})
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTokens:
|
|
38
|
+
def build_specials(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.SpecialTokens:
|
|
36
39
|
return tks.SpecialTokens.from_dict({
|
|
37
40
|
st: getattr(tfm_tokenizer, a)
|
|
38
41
|
for st, a in SPECIAL_TOKEN_ATTR_MAP.items()
|
|
@@ -45,7 +48,7 @@ def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTok
|
|
|
45
48
|
class TransformersTokenizer(tks.BaseTokenizer):
|
|
46
49
|
def __init__(
|
|
47
50
|
self,
|
|
48
|
-
tfm_tokenizer: tfm.PreTrainedTokenizerBase,
|
|
51
|
+
tfm_tokenizer: 'tfm.PreTrainedTokenizerBase',
|
|
49
52
|
) -> None:
|
|
50
53
|
self._tfm_tokenizer = check.isinstance(tfm_tokenizer, tfm.PreTrainedTokenizerBase)
|
|
51
54
|
|
|
@@ -55,7 +58,7 @@ class TransformersTokenizer(tks.BaseTokenizer):
|
|
|
55
58
|
)
|
|
56
59
|
|
|
57
60
|
@property
|
|
58
|
-
def tfm_tokenizer(self) -> tfm.PreTrainedTokenizerBase:
|
|
61
|
+
def tfm_tokenizer(self) -> 'tfm.PreTrainedTokenizerBase':
|
|
59
62
|
return self._tfm_tokenizer
|
|
60
63
|
|
|
61
64
|
#
|
|
@@ -72,7 +75,7 @@ class TransformersTokenizer(tks.BaseTokenizer):
|
|
|
72
75
|
self,
|
|
73
76
|
tokens: ta.Iterable[tks.Token],
|
|
74
77
|
) -> str:
|
|
75
|
-
return self._tfm_tokenizer.decode(tokens)
|
|
78
|
+
return self._tfm_tokenizer.decode(tokens) # type: ignore[arg-type]
|
|
76
79
|
|
|
77
80
|
|
|
78
81
|
##
|
|
@@ -4,30 +4,57 @@ TODO:
|
|
|
4
4
|
- https://huggingface.co/blog/aifeifei798/transformers-streaming-output
|
|
5
5
|
"""
|
|
6
6
|
import sys
|
|
7
|
+
import threading
|
|
7
8
|
import typing as ta
|
|
8
9
|
|
|
9
|
-
import transformers as tfm
|
|
10
|
-
|
|
11
10
|
from omlish import check
|
|
12
11
|
from omlish import lang
|
|
13
12
|
from omlish import typedvalues as tv
|
|
13
|
+
from omlish.asyncs.asyncio.sync import AsyncioBufferRelay
|
|
14
14
|
|
|
15
15
|
from ....chat.choices.services import ChatChoicesRequest
|
|
16
16
|
from ....chat.choices.services import ChatChoicesResponse
|
|
17
17
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
18
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
19
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
20
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
21
|
+
from ....chat.choices.stream.types import AiChoiceDeltas # noqa
|
|
22
|
+
from ....chat.choices.stream.types import AiChoicesDeltas # noqa
|
|
23
|
+
from ....chat.choices.types import AiChoice
|
|
24
|
+
from ....chat.choices.types import ChatChoicesOutputs
|
|
18
25
|
from ....chat.messages import AiMessage
|
|
19
26
|
from ....chat.messages import Message
|
|
20
27
|
from ....chat.messages import SystemMessage
|
|
21
|
-
from ....chat.messages import
|
|
28
|
+
from ....chat.messages import ToolUseMessage
|
|
29
|
+
from ....chat.messages import ToolUseResultMessage
|
|
22
30
|
from ....chat.messages import UserMessage
|
|
31
|
+
from ....chat.stream.types import ContentAiDelta # noqa
|
|
23
32
|
from ....completion import CompletionRequest
|
|
24
33
|
from ....completion import CompletionResponse
|
|
25
34
|
from ....completion import static_check_is_completion_service
|
|
26
35
|
from ....configs import Config
|
|
27
36
|
from ....models.configs import ModelPath
|
|
37
|
+
from ....resources import UseResources
|
|
38
|
+
from ....stream.services import StreamResponseSink
|
|
39
|
+
from ....stream.services import new_stream_response
|
|
28
40
|
from ...impls.huggingface.configs import HuggingfaceHubToken
|
|
29
41
|
|
|
30
42
|
|
|
43
|
+
with lang.auto_proxy_import(globals()):
|
|
44
|
+
import transformers as tfm
|
|
45
|
+
|
|
46
|
+
from .....backends import transformers as tfm_u
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
##
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
53
|
+
# ['ChatChoicesService', 'ChatChoicesStreamService'],
|
|
54
|
+
# 'transformers',
|
|
55
|
+
# )
|
|
56
|
+
|
|
57
|
+
|
|
31
58
|
##
|
|
32
59
|
|
|
33
60
|
|
|
@@ -59,7 +86,7 @@ class TransformersCompletionService(lang.ExitStacked):
|
|
|
59
86
|
self._pipeline_kwargs = cc.pop(TransformersPipelineKwargs, [])
|
|
60
87
|
self._huggingface_hub_token = HuggingfaceHubToken.pop_secret(cc, env='HUGGINGFACE_HUB_TOKEN')
|
|
61
88
|
|
|
62
|
-
def invoke(self, request: CompletionRequest) -> CompletionResponse:
|
|
89
|
+
async def invoke(self, request: CompletionRequest) -> CompletionResponse:
|
|
63
90
|
pkw: dict[str, ta.Any] = dict(
|
|
64
91
|
model=self._model_path.v,
|
|
65
92
|
device='mps' if sys.platform == 'darwin' else 'cuda',
|
|
@@ -94,17 +121,19 @@ def build_chat_message(m: Message) -> ta.Mapping[str, ta.Any]:
|
|
|
94
121
|
return dict(
|
|
95
122
|
role='assistant',
|
|
96
123
|
content=check.isinstance(m.c, str),
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
elif isinstance(m, ToolUseMessage):
|
|
127
|
+
return dict(
|
|
128
|
+
role='assistant',
|
|
129
|
+
tool_calls=[dict(
|
|
130
|
+
id=m.tu.id,
|
|
131
|
+
function=dict(
|
|
132
|
+
arguments=m.tu.args,
|
|
133
|
+
name=m.tu.name,
|
|
134
|
+
),
|
|
135
|
+
type='function',
|
|
136
|
+
)],
|
|
108
137
|
)
|
|
109
138
|
|
|
110
139
|
elif isinstance(m, UserMessage):
|
|
@@ -113,24 +142,21 @@ def build_chat_message(m: Message) -> ta.Mapping[str, ta.Any]:
|
|
|
113
142
|
content=check.isinstance(m.c, str),
|
|
114
143
|
)
|
|
115
144
|
|
|
116
|
-
elif isinstance(m,
|
|
145
|
+
elif isinstance(m, ToolUseResultMessage):
|
|
117
146
|
return dict(
|
|
118
147
|
role='tool',
|
|
119
|
-
tool_call_id=m.id,
|
|
120
|
-
content=check.isinstance(m.c, str),
|
|
148
|
+
tool_call_id=m.tur.id,
|
|
149
|
+
content=check.isinstance(m.tur.c, str),
|
|
121
150
|
)
|
|
122
151
|
|
|
123
152
|
else:
|
|
124
153
|
raise TypeError(m)
|
|
125
154
|
|
|
126
155
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
# )
|
|
132
|
-
@static_check_is_chat_choices_service
|
|
133
|
-
class TransformersChatChoicesService(lang.ExitStacked):
|
|
156
|
+
##
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class BaseTransformersChatChoicesService(lang.ExitStacked):
|
|
134
160
|
DEFAULT_MODEL: ta.ClassVar[str] = (
|
|
135
161
|
'meta-llama/Llama-3.2-1B-Instruct'
|
|
136
162
|
)
|
|
@@ -144,7 +170,7 @@ class TransformersChatChoicesService(lang.ExitStacked):
|
|
|
144
170
|
self._huggingface_hub_token = HuggingfaceHubToken.pop_secret(cc, env='HUGGINGFACE_HUB_TOKEN')
|
|
145
171
|
|
|
146
172
|
@lang.cached_function(transient=True)
|
|
147
|
-
def _load_pipeline(self) -> tfm.Pipeline:
|
|
173
|
+
def _load_pipeline(self) -> 'tfm.Pipeline':
|
|
148
174
|
# FIXME: unload
|
|
149
175
|
check.not_none(self._exit_stack)
|
|
150
176
|
|
|
@@ -157,21 +183,118 @@ class TransformersChatChoicesService(lang.ExitStacked):
|
|
|
157
183
|
for pkw_cfg in self._pipeline_kwargs:
|
|
158
184
|
pkw.update(pkw_cfg.v)
|
|
159
185
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
)
|
|
186
|
+
with tfm_u.file_cache_patch_context(
|
|
187
|
+
local_first=True,
|
|
188
|
+
local_config_present_is_authoritative=True,
|
|
189
|
+
):
|
|
190
|
+
return tfm.pipeline(
|
|
191
|
+
'text-generation',
|
|
192
|
+
**pkw,
|
|
193
|
+
)
|
|
194
|
+
|
|
164
195
|
|
|
165
|
-
|
|
196
|
+
##
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
200
|
+
# name='transformers',
|
|
201
|
+
# aliases=['tfm'],
|
|
202
|
+
# type='ChatChoicesService',
|
|
203
|
+
# )
|
|
204
|
+
@static_check_is_chat_choices_service
|
|
205
|
+
class TransformersChatChoicesService(BaseTransformersChatChoicesService):
|
|
206
|
+
async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
166
207
|
check.empty(request.options)
|
|
167
208
|
|
|
168
209
|
pipeline = self._load_pipeline()
|
|
169
210
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
211
|
+
inputs = [
|
|
212
|
+
build_chat_message(m)
|
|
213
|
+
for m in request.v
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
outputs = pipeline(inputs)
|
|
217
|
+
|
|
218
|
+
gts = check.single(outputs)['generated_text']
|
|
219
|
+
ugt, agt = gts
|
|
220
|
+
check.state(ugt['role'] == 'user')
|
|
221
|
+
check.state(agt['role'] == 'assistant')
|
|
222
|
+
|
|
223
|
+
return ChatChoicesResponse([AiChoice([AiMessage(agt['content'])])])
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
##
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
230
|
+
# name='transformers',
|
|
231
|
+
# type='ChatChoicesStreamService',
|
|
232
|
+
# )
|
|
233
|
+
@static_check_is_chat_choices_stream_service
|
|
234
|
+
class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
|
|
235
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
236
|
+
check.empty(request.options)
|
|
237
|
+
|
|
238
|
+
pipeline = self._load_pipeline() # noqa
|
|
239
|
+
|
|
240
|
+
inputs = [ # noqa
|
|
241
|
+
build_chat_message(m)
|
|
242
|
+
for m in request.v
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
relay: AsyncioBufferRelay = AsyncioBufferRelay()
|
|
246
|
+
|
|
247
|
+
def streamer_callback(text: str, *, stream_end: bool) -> None:
|
|
248
|
+
if text or stream_end:
|
|
249
|
+
relay.push(text, *([None] if stream_end else []))
|
|
250
|
+
|
|
251
|
+
streamer = tfm_u.CancellableTextStreamer(
|
|
252
|
+
check.not_none(pipeline.tokenizer), # type: ignore[arg-type]
|
|
253
|
+
streamer_callback, # noqa
|
|
254
|
+
skip_prompt=True,
|
|
255
|
+
skip_special_tokens=True,
|
|
175
256
|
)
|
|
176
257
|
|
|
177
|
-
|
|
258
|
+
async with UseResources.or_new(request.options) as rs:
|
|
259
|
+
thread = threading.Thread(
|
|
260
|
+
target=tfm_u.CancellableTextStreamer.ignoring_cancelled(pipeline),
|
|
261
|
+
args=(
|
|
262
|
+
inputs,
|
|
263
|
+
),
|
|
264
|
+
kwargs=dict(
|
|
265
|
+
streamer=streamer,
|
|
266
|
+
),
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def stop_thread() -> None:
|
|
270
|
+
streamer.cancel()
|
|
271
|
+
# thread.join()
|
|
272
|
+
|
|
273
|
+
rs.enter_context(lang.defer(stop_thread))
|
|
274
|
+
|
|
275
|
+
thread.start()
|
|
276
|
+
|
|
277
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
|
|
278
|
+
while True:
|
|
279
|
+
await relay.wait()
|
|
280
|
+
got = relay.swap()
|
|
281
|
+
|
|
282
|
+
if not got:
|
|
283
|
+
raise RuntimeError
|
|
284
|
+
|
|
285
|
+
if got[-1] is None:
|
|
286
|
+
out = ''.join(got[:-1])
|
|
287
|
+
end = True
|
|
288
|
+
else:
|
|
289
|
+
out = ''.join(got)
|
|
290
|
+
end = False
|
|
291
|
+
|
|
292
|
+
if out:
|
|
293
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiDelta(out)])]))
|
|
294
|
+
|
|
295
|
+
if end:
|
|
296
|
+
break
|
|
297
|
+
|
|
298
|
+
return []
|
|
299
|
+
|
|
300
|
+
return await new_stream_response(rs, inner)
|
|
@@ -108,7 +108,10 @@ class ManifestBackendStringResolver(BackendStringResolver):
|
|
|
108
108
|
|
|
109
109
|
mn: str | None = mdl.name
|
|
110
110
|
|
|
111
|
-
if
|
|
111
|
+
if args.parsed.backend == m.backend_name and mn is not None:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
elif mn == m.backend_name:
|
|
112
115
|
if m.model_names is not None:
|
|
113
116
|
mn = m.model_names.resolved_default
|
|
114
117
|
else:
|
|
@@ -5,6 +5,8 @@ TODO:
|
|
|
5
5
|
from omlish import lang
|
|
6
6
|
from omlish import marshal as msh
|
|
7
7
|
|
|
8
|
+
from .messages import AnyAiMessage
|
|
9
|
+
from .messages import AnyUserMessage
|
|
8
10
|
from .messages import Message
|
|
9
11
|
|
|
10
12
|
|
|
@@ -12,13 +14,18 @@ from .messages import Message
|
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
@lang.static_init
|
|
15
|
-
def
|
|
16
|
-
|
|
17
|
+
def _install_standard_marshaling() -> None:
|
|
18
|
+
for cls in [
|
|
19
|
+
AnyAiMessage,
|
|
20
|
+
AnyUserMessage,
|
|
17
21
|
Message,
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
]:
|
|
23
|
+
cls_poly = msh.polymorphism_from_subclasses(
|
|
24
|
+
cls,
|
|
25
|
+
naming=msh.Naming.SNAKE,
|
|
26
|
+
strip_suffix='Message',
|
|
27
|
+
)
|
|
28
|
+
msh.install_standard_factories(
|
|
29
|
+
msh.PolymorphismMarshalerFactory(cls_poly),
|
|
30
|
+
msh.PolymorphismUnmarshalerFactory(cls_poly),
|
|
31
|
+
)
|
|
@@ -2,7 +2,7 @@ from omlish import check
|
|
|
2
2
|
from omlish import dataclasses as dc
|
|
3
3
|
|
|
4
4
|
from ...services import Response
|
|
5
|
-
from ..messages import
|
|
5
|
+
from ..messages import AiChat
|
|
6
6
|
from ..services import ChatRequest
|
|
7
7
|
from ..services import static_check_is_chat_service
|
|
8
8
|
from .services import ChatChoicesService
|
|
@@ -17,6 +17,6 @@ from .types import ChatChoicesOutputs
|
|
|
17
17
|
class ChatChoicesServiceChatService:
|
|
18
18
|
service: ChatChoicesService
|
|
19
19
|
|
|
20
|
-
def invoke(self, request: ChatRequest) -> Response[
|
|
21
|
-
resp = self.service.invoke(request)
|
|
22
|
-
return Response(check.single(resp.v).
|
|
20
|
+
async def invoke(self, request: ChatRequest) -> Response[AiChat, ChatChoicesOutputs]:
|
|
21
|
+
resp = await self.service.invoke(request)
|
|
22
|
+
return Response(check.single(resp.v).ms, resp.outputs)
|
|
@@ -36,5 +36,5 @@ def static_check_is_chat_choices_service[T: ChatChoicesService](t: type[T]) -> t
|
|
|
36
36
|
@static_check_is_chat_choices_service
|
|
37
37
|
class AbstractChatChoicesService(lang.Abstract):
|
|
38
38
|
@abc.abstractmethod
|
|
39
|
-
def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
39
|
+
def invoke(self, request: ChatChoicesRequest) -> ta.Awaitable[ChatChoicesResponse]:
|
|
40
40
|
raise NotImplementedError
|
|
File without changes
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from omlish import dataclasses as dc
|
|
2
|
+
|
|
3
|
+
from ....services import Response
|
|
4
|
+
from ..services import ChatChoicesRequest
|
|
5
|
+
from ..services import static_check_is_chat_choices_service
|
|
6
|
+
from ..types import AiChoice
|
|
7
|
+
from ..types import AiChoices
|
|
8
|
+
from .joining import AiChoicesDeltaJoiner
|
|
9
|
+
from .services import ChatChoicesOutputs
|
|
10
|
+
from .services import ChatChoicesStreamOutputs
|
|
11
|
+
from .services import ChatChoicesStreamService
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
##
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@static_check_is_chat_choices_service
|
|
18
|
+
@dc.dataclass(frozen=True)
|
|
19
|
+
class ChatChoicesStreamServiceChatChoicesService:
|
|
20
|
+
service: ChatChoicesStreamService
|
|
21
|
+
|
|
22
|
+
async def invoke(self, request: ChatChoicesRequest) -> Response[
|
|
23
|
+
AiChoices,
|
|
24
|
+
ChatChoicesOutputs | ChatChoicesStreamOutputs,
|
|
25
|
+
]:
|
|
26
|
+
joiner = AiChoicesDeltaJoiner()
|
|
27
|
+
|
|
28
|
+
async with (resp := await self.service.invoke(request)).v as it: # noqa
|
|
29
|
+
async for cs in it:
|
|
30
|
+
joiner.add(cs.choices)
|
|
31
|
+
|
|
32
|
+
# check.state(resp_v.is_done)
|
|
33
|
+
|
|
34
|
+
# FIXME: outputs lol
|
|
35
|
+
return Response([AiChoice(ms) for ms in joiner.build()])
|