ommlds 0.0.0.dev480__py3-none-any.whl → 0.0.0.dev503__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ommlds/.omlish-manifests.json +100 -33
- ommlds/README.md +11 -0
- ommlds/__about__.py +9 -6
- ommlds/backends/anthropic/protocol/__init__.py +13 -1
- ommlds/backends/anthropic/protocol/_dataclasses.py +1625 -0
- ommlds/backends/anthropic/protocol/sse/events.py +2 -0
- ommlds/backends/cerebras/__init__.py +7 -0
- ommlds/backends/cerebras/_dataclasses.py +4254 -0
- ommlds/backends/cerebras/_marshal.py +24 -0
- ommlds/backends/cerebras/protocol.py +312 -0
- ommlds/backends/google/protocol/__init__.py +13 -0
- ommlds/backends/google/protocol/_dataclasses.py +5997 -0
- ommlds/backends/groq/__init__.py +7 -0
- ommlds/backends/groq/_dataclasses.py +3901 -0
- ommlds/backends/groq/clients.py +9 -0
- ommlds/backends/llamacpp/logging.py +4 -1
- ommlds/backends/mlx/caching.py +7 -3
- ommlds/backends/mlx/cli.py +10 -7
- ommlds/backends/mlx/generation.py +18 -16
- ommlds/backends/mlx/limits.py +10 -6
- ommlds/backends/mlx/loading.py +7 -4
- ommlds/backends/ollama/__init__.py +7 -0
- ommlds/backends/ollama/_dataclasses.py +3488 -0
- ommlds/backends/ollama/protocol.py +3 -0
- ommlds/backends/openai/protocol/__init__.py +15 -1
- ommlds/backends/openai/protocol/_dataclasses.py +7708 -0
- ommlds/backends/tavily/__init__.py +7 -0
- ommlds/backends/tavily/_dataclasses.py +1734 -0
- ommlds/backends/transformers/__init__.py +14 -0
- ommlds/cli/__init__.py +7 -0
- ommlds/cli/_dataclasses.py +3515 -0
- ommlds/cli/backends/catalog.py +0 -5
- ommlds/cli/backends/inject.py +70 -7
- ommlds/cli/backends/meta.py +82 -0
- ommlds/cli/content/messages.py +1 -1
- ommlds/cli/inject.py +11 -3
- ommlds/cli/main.py +137 -68
- ommlds/cli/rendering/types.py +6 -0
- ommlds/cli/secrets.py +2 -1
- ommlds/cli/sessions/base.py +1 -10
- ommlds/cli/sessions/chat/configs.py +9 -17
- ommlds/cli/sessions/chat/{chat → drivers}/ai/configs.py +3 -1
- ommlds/cli/sessions/chat/drivers/ai/events.py +57 -0
- ommlds/cli/sessions/chat/{chat → drivers}/ai/inject.py +10 -3
- ommlds/cli/sessions/chat/{chat → drivers}/ai/rendering.py +1 -1
- ommlds/cli/sessions/chat/{chat → drivers}/ai/services.py +1 -1
- ommlds/cli/sessions/chat/{chat → drivers}/ai/tools.py +4 -8
- ommlds/cli/sessions/chat/{chat → drivers}/ai/types.py +9 -0
- ommlds/cli/sessions/chat/drivers/configs.py +25 -0
- ommlds/cli/sessions/chat/drivers/events/inject.py +27 -0
- ommlds/cli/sessions/chat/drivers/events/injection.py +14 -0
- ommlds/cli/sessions/chat/drivers/events/manager.py +16 -0
- ommlds/cli/sessions/chat/drivers/events/types.py +38 -0
- ommlds/cli/sessions/chat/drivers/impl.py +50 -0
- ommlds/cli/sessions/chat/drivers/inject.py +70 -0
- ommlds/cli/sessions/chat/{chat → drivers}/state/configs.py +2 -0
- ommlds/cli/sessions/chat/drivers/state/ids.py +25 -0
- ommlds/cli/sessions/chat/drivers/state/inject.py +83 -0
- ommlds/cli/sessions/chat/{chat → drivers}/state/inmemory.py +0 -4
- ommlds/cli/sessions/chat/{chat → drivers}/state/storage.py +17 -10
- ommlds/cli/sessions/chat/{chat → drivers}/state/types.py +10 -5
- ommlds/cli/sessions/chat/{tools → drivers/tools}/configs.py +2 -2
- ommlds/cli/sessions/chat/drivers/tools/confirmation.py +44 -0
- ommlds/cli/sessions/chat/drivers/tools/errorhandling.py +39 -0
- ommlds/cli/sessions/chat/{tools → drivers/tools}/execution.py +3 -4
- ommlds/cli/sessions/chat/{tools → drivers/tools}/fs/inject.py +3 -3
- ommlds/cli/sessions/chat/{tools → drivers/tools}/inject.py +7 -12
- ommlds/cli/sessions/chat/{tools → drivers/tools}/injection.py +5 -5
- ommlds/cli/sessions/chat/{tools → drivers/tools}/rendering.py +3 -3
- ommlds/cli/sessions/chat/{tools → drivers/tools}/todo/inject.py +3 -3
- ommlds/cli/sessions/chat/{tools → drivers/tools}/weather/tools.py +1 -1
- ommlds/cli/sessions/chat/drivers/types.py +31 -0
- ommlds/cli/sessions/chat/{chat → drivers}/user/configs.py +0 -3
- ommlds/cli/sessions/chat/drivers/user/inject.py +41 -0
- ommlds/cli/sessions/chat/facades/__init__.py +0 -0
- ommlds/cli/sessions/chat/facades/commands/__init__.py +0 -0
- ommlds/cli/sessions/chat/facades/commands/base.py +83 -0
- ommlds/cli/sessions/chat/facades/commands/configs.py +9 -0
- ommlds/cli/sessions/chat/facades/commands/inject.py +41 -0
- ommlds/cli/sessions/chat/facades/commands/injection.py +15 -0
- ommlds/cli/sessions/chat/facades/commands/manager.py +59 -0
- ommlds/cli/sessions/chat/facades/commands/simple.py +34 -0
- ommlds/cli/sessions/chat/facades/commands/types.py +13 -0
- ommlds/cli/sessions/chat/facades/configs.py +11 -0
- ommlds/cli/sessions/chat/facades/facade.py +26 -0
- ommlds/cli/sessions/chat/facades/inject.py +35 -0
- ommlds/cli/sessions/chat/facades/ui.py +34 -0
- ommlds/cli/sessions/chat/inject.py +8 -31
- ommlds/cli/sessions/chat/interfaces/__init__.py +0 -0
- ommlds/cli/sessions/chat/interfaces/bare/__init__.py +0 -0
- ommlds/cli/sessions/chat/interfaces/bare/configs.py +15 -0
- ommlds/cli/sessions/chat/interfaces/bare/inject.py +69 -0
- ommlds/cli/sessions/chat/interfaces/bare/interactive.py +49 -0
- ommlds/cli/sessions/chat/interfaces/bare/oneshot.py +21 -0
- ommlds/cli/sessions/chat/{tools/confirmation.py → interfaces/bare/tools.py} +3 -22
- ommlds/cli/sessions/chat/interfaces/base.py +13 -0
- ommlds/cli/sessions/chat/interfaces/configs.py +11 -0
- ommlds/cli/sessions/chat/interfaces/inject.py +29 -0
- ommlds/cli/sessions/chat/interfaces/textual/__init__.py +0 -0
- ommlds/cli/sessions/chat/interfaces/textual/app.py +310 -0
- ommlds/cli/sessions/chat/interfaces/textual/configs.py +11 -0
- ommlds/cli/sessions/chat/interfaces/textual/facades.py +19 -0
- ommlds/cli/sessions/chat/interfaces/textual/inject.py +97 -0
- ommlds/cli/sessions/chat/interfaces/textual/interface.py +24 -0
- ommlds/cli/sessions/chat/interfaces/textual/styles/__init__.py +29 -0
- ommlds/cli/sessions/chat/interfaces/textual/styles/input.tcss +53 -0
- ommlds/cli/sessions/chat/interfaces/textual/styles/markdown.tcss +7 -0
- ommlds/cli/sessions/chat/interfaces/textual/styles/messages.tcss +157 -0
- ommlds/cli/sessions/chat/interfaces/textual/tools.py +38 -0
- ommlds/cli/sessions/chat/interfaces/textual/widgets/__init__.py +0 -0
- ommlds/cli/sessions/chat/interfaces/textual/widgets/input.py +36 -0
- ommlds/cli/sessions/chat/interfaces/textual/widgets/messages.py +197 -0
- ommlds/cli/sessions/chat/session.py +8 -13
- ommlds/cli/sessions/completion/configs.py +3 -4
- ommlds/cli/sessions/completion/inject.py +1 -2
- ommlds/cli/sessions/completion/session.py +4 -8
- ommlds/cli/sessions/configs.py +10 -0
- ommlds/cli/sessions/embedding/configs.py +3 -4
- ommlds/cli/sessions/embedding/inject.py +1 -2
- ommlds/cli/sessions/embedding/session.py +4 -8
- ommlds/cli/sessions/inject.py +15 -15
- ommlds/cli/state/storage.py +7 -1
- ommlds/minichain/__init__.py +161 -38
- ommlds/minichain/_dataclasses.py +20452 -0
- ommlds/minichain/_typedvalues.py +11 -4
- ommlds/minichain/backends/impls/anthropic/names.py +3 -3
- ommlds/minichain/backends/impls/anthropic/protocol.py +2 -2
- ommlds/minichain/backends/impls/anthropic/stream.py +1 -1
- ommlds/minichain/backends/impls/cerebras/__init__.py +0 -0
- ommlds/minichain/backends/impls/cerebras/chat.py +80 -0
- ommlds/minichain/backends/impls/cerebras/names.py +45 -0
- ommlds/minichain/backends/impls/cerebras/protocol.py +143 -0
- ommlds/minichain/backends/impls/cerebras/stream.py +125 -0
- ommlds/minichain/backends/impls/duckduckgo/search.py +5 -1
- ommlds/minichain/backends/impls/google/names.py +6 -0
- ommlds/minichain/backends/impls/google/stream.py +1 -1
- ommlds/minichain/backends/impls/google/tools.py +2 -2
- ommlds/minichain/backends/impls/groq/chat.py +2 -0
- ommlds/minichain/backends/impls/groq/protocol.py +2 -2
- ommlds/minichain/backends/impls/groq/stream.py +3 -1
- ommlds/minichain/backends/impls/huggingface/repos.py +1 -5
- ommlds/minichain/backends/impls/llamacpp/chat.py +6 -3
- ommlds/minichain/backends/impls/llamacpp/completion.py +7 -3
- ommlds/minichain/backends/impls/llamacpp/stream.py +6 -3
- ommlds/minichain/backends/impls/mlx/chat.py +6 -3
- ommlds/minichain/backends/impls/ollama/chat.py +51 -57
- ommlds/minichain/backends/impls/ollama/protocol.py +144 -0
- ommlds/minichain/backends/impls/openai/format.py +4 -3
- ommlds/minichain/backends/impls/openai/names.py +3 -1
- ommlds/minichain/backends/impls/openai/stream.py +33 -1
- ommlds/minichain/backends/impls/sentencepiece/tokens.py +9 -6
- ommlds/minichain/backends/impls/tinygrad/chat.py +7 -4
- ommlds/minichain/backends/impls/tokenizers/tokens.py +9 -6
- ommlds/minichain/backends/impls/transformers/sentence.py +5 -2
- ommlds/minichain/backends/impls/transformers/tokens.py +9 -6
- ommlds/minichain/backends/impls/transformers/transformers.py +10 -8
- ommlds/minichain/backends/strings/resolving.py +1 -1
- ommlds/minichain/chat/content.py +42 -0
- ommlds/minichain/chat/messages.py +43 -39
- ommlds/minichain/chat/stream/joining.py +36 -12
- ommlds/minichain/chat/stream/types.py +1 -1
- ommlds/minichain/chat/templating.py +3 -3
- ommlds/minichain/content/__init__.py +19 -3
- ommlds/minichain/content/_marshal.py +181 -55
- ommlds/minichain/content/code.py +26 -0
- ommlds/minichain/content/composite.py +28 -0
- ommlds/minichain/content/content.py +27 -0
- ommlds/minichain/content/dynamic.py +12 -0
- ommlds/minichain/content/emphasis.py +27 -0
- ommlds/minichain/content/images.py +2 -2
- ommlds/minichain/content/json.py +2 -2
- ommlds/minichain/content/link.py +13 -0
- ommlds/minichain/content/markdown.py +12 -0
- ommlds/minichain/content/metadata.py +10 -0
- ommlds/minichain/content/namespaces.py +8 -0
- ommlds/minichain/content/placeholders.py +10 -9
- ommlds/minichain/content/quote.py +26 -0
- ommlds/minichain/content/raw.py +49 -0
- ommlds/minichain/content/recursive.py +12 -0
- ommlds/minichain/content/section.py +26 -0
- ommlds/minichain/content/sequence.py +17 -3
- ommlds/minichain/content/standard.py +32 -0
- ommlds/minichain/content/tag.py +28 -0
- ommlds/minichain/content/templates.py +13 -0
- ommlds/minichain/content/text.py +2 -2
- ommlds/minichain/content/transform/__init__.py +0 -0
- ommlds/minichain/content/transform/json.py +55 -0
- ommlds/minichain/content/transform/markdown.py +8 -0
- ommlds/minichain/content/transform/materialize.py +51 -0
- ommlds/minichain/content/transform/metadata.py +16 -0
- ommlds/minichain/content/{prepare.py → transform/prepare.py} +10 -15
- ommlds/minichain/content/transform/recursive.py +97 -0
- ommlds/minichain/content/transform/standard.py +43 -0
- ommlds/minichain/content/{transforms → transform}/stringify.py +1 -7
- ommlds/minichain/content/transform/strings.py +33 -0
- ommlds/minichain/content/transform/templates.py +25 -0
- ommlds/minichain/content/visitors.py +231 -0
- ommlds/minichain/lib/fs/tools/read.py +1 -1
- ommlds/minichain/lib/fs/tools/recursivels/rendering.py +1 -1
- ommlds/minichain/lib/fs/tools/recursivels/running.py +1 -1
- ommlds/minichain/lib/todo/tools/write.py +2 -1
- ommlds/minichain/lib/todo/types.py +1 -1
- ommlds/minichain/metadata.py +56 -2
- ommlds/minichain/resources.py +22 -1
- ommlds/minichain/services/README.md +154 -0
- ommlds/minichain/services/__init__.py +6 -2
- ommlds/minichain/services/_marshal.py +46 -10
- ommlds/minichain/services/_origclasses.py +11 -0
- ommlds/minichain/services/_typedvalues.py +8 -3
- ommlds/minichain/services/requests.py +73 -3
- ommlds/minichain/services/responses.py +73 -3
- ommlds/minichain/services/services.py +9 -0
- ommlds/minichain/stream/services.py +24 -1
- ommlds/minichain/text/applypatch.py +2 -1
- ommlds/minichain/text/toolparsing/llamacpp/types.py +1 -1
- ommlds/minichain/tokens/specials.py +1 -1
- ommlds/minichain/tools/execution/catalog.py +1 -1
- ommlds/minichain/tools/execution/errorhandling.py +36 -0
- ommlds/minichain/tools/execution/errors.py +2 -2
- ommlds/minichain/tools/execution/executors.py +1 -1
- ommlds/minichain/tools/fns.py +1 -1
- ommlds/minichain/tools/jsonschema.py +2 -2
- ommlds/minichain/tools/reflect.py +6 -6
- ommlds/minichain/tools/types.py +12 -15
- ommlds/minichain/vectors/_marshal.py +1 -1
- ommlds/minichain/vectors/embeddings.py +1 -1
- ommlds/minichain/wrappers/__init__.py +7 -0
- ommlds/minichain/wrappers/firstinwins.py +144 -0
- ommlds/minichain/wrappers/instrument.py +146 -0
- ommlds/minichain/wrappers/retry.py +168 -0
- ommlds/minichain/wrappers/services.py +98 -0
- ommlds/minichain/wrappers/stream.py +57 -0
- ommlds/nanochat/rustbpe/README.md +9 -0
- ommlds/nanochat/tokenizers.py +40 -6
- ommlds/specs/mcp/clients.py +146 -0
- ommlds/specs/mcp/protocol.py +123 -18
- ommlds/tools/git.py +82 -65
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev503.dist-info}/METADATA +13 -11
- ommlds-0.0.0.dev503.dist-info/RECORD +520 -0
- ommlds/cli/sessions/chat/chat/state/inject.py +0 -36
- ommlds/cli/sessions/chat/chat/user/inject.py +0 -62
- ommlds/cli/sessions/chat/chat/user/interactive.py +0 -31
- ommlds/cli/sessions/chat/chat/user/oneshot.py +0 -25
- ommlds/cli/sessions/chat/chat/user/types.py +0 -15
- ommlds/cli/sessions/chat/driver.py +0 -43
- ommlds/minichain/content/materialize.py +0 -196
- ommlds/minichain/content/simple.py +0 -47
- ommlds/minichain/content/transforms/base.py +0 -46
- ommlds/minichain/content/transforms/interleave.py +0 -70
- ommlds/minichain/content/transforms/squeeze.py +0 -72
- ommlds/minichain/content/transforms/strings.py +0 -24
- ommlds/minichain/content/types.py +0 -43
- ommlds/minichain/stream/wrap.py +0 -62
- ommlds-0.0.0.dev480.dist-info/RECORD +0 -427
- /ommlds/cli/sessions/chat/{chat → drivers}/__init__.py +0 -0
- /ommlds/cli/sessions/chat/{chat → drivers}/ai/__init__.py +0 -0
- /ommlds/cli/sessions/chat/{chat → drivers}/ai/injection.py +0 -0
- /ommlds/cli/sessions/chat/{chat/state → drivers/events}/__init__.py +0 -0
- /ommlds/cli/sessions/chat/{chat/user → drivers/phases}/__init__.py +0 -0
- /ommlds/cli/sessions/chat/{phases → drivers/phases}/inject.py +0 -0
- /ommlds/cli/sessions/chat/{phases → drivers/phases}/injection.py +0 -0
- /ommlds/cli/sessions/chat/{phases → drivers/phases}/manager.py +0 -0
- /ommlds/cli/sessions/chat/{phases → drivers/phases}/types.py +0 -0
- /ommlds/cli/sessions/chat/{phases → drivers/state}/__init__.py +0 -0
- /ommlds/cli/sessions/chat/{tools → drivers/tools}/__init__.py +0 -0
- /ommlds/cli/sessions/chat/{tools → drivers/tools}/fs/__init__.py +0 -0
- /ommlds/cli/sessions/chat/{tools → drivers/tools}/fs/configs.py +0 -0
- /ommlds/cli/sessions/chat/{tools → drivers/tools}/todo/__init__.py +0 -0
- /ommlds/cli/sessions/chat/{tools → drivers/tools}/todo/configs.py +0 -0
- /ommlds/cli/sessions/chat/{tools → drivers/tools}/weather/__init__.py +0 -0
- /ommlds/cli/sessions/chat/{tools → drivers/tools}/weather/configs.py +0 -0
- /ommlds/cli/sessions/chat/{tools → drivers/tools}/weather/inject.py +0 -0
- /ommlds/{minichain/content/transforms → cli/sessions/chat/drivers/user}/__init__.py +0 -0
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev503.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev503.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev503.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev503.dist-info}/top_level.txt +0 -0
|
@@ -2,13 +2,10 @@ import contextlib
|
|
|
2
2
|
import threading
|
|
3
3
|
import typing as ta # noqa
|
|
4
4
|
|
|
5
|
-
import llama_cpp as lcc
|
|
6
|
-
|
|
7
5
|
from omlish import check
|
|
8
6
|
from omlish import lang
|
|
9
7
|
from omlish import typedvalues as tv
|
|
10
8
|
|
|
11
|
-
from .....backends import llamacpp as lcu
|
|
12
9
|
from ....chat.choices.services import ChatChoicesOutputs
|
|
13
10
|
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
14
11
|
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
@@ -26,6 +23,12 @@ from .format import ROLES_MAP
|
|
|
26
23
|
from .format import get_msg_content
|
|
27
24
|
|
|
28
25
|
|
|
26
|
+
with lang.auto_proxy_import(globals()):
|
|
27
|
+
import llama_cpp as lcc
|
|
28
|
+
|
|
29
|
+
from .....backends import llamacpp as lcu
|
|
30
|
+
|
|
31
|
+
|
|
29
32
|
##
|
|
30
33
|
|
|
31
34
|
|
|
@@ -5,7 +5,6 @@ from omlish import check
|
|
|
5
5
|
from omlish import lang
|
|
6
6
|
from omlish import typedvalues as tv
|
|
7
7
|
|
|
8
|
-
from .....backends import mlx as mlxu
|
|
9
8
|
from ....chat.choices.services import ChatChoicesOutputs
|
|
10
9
|
from ....chat.choices.services import ChatChoicesRequest
|
|
11
10
|
from ....chat.choices.services import ChatChoicesResponse
|
|
@@ -33,6 +32,10 @@ from ....stream.services import StreamResponseSink
|
|
|
33
32
|
from ....stream.services import new_stream_response
|
|
34
33
|
|
|
35
34
|
|
|
35
|
+
with lang.auto_proxy_import(globals()):
|
|
36
|
+
from .....backends import mlx as mlxu
|
|
37
|
+
|
|
38
|
+
|
|
36
39
|
##
|
|
37
40
|
|
|
38
41
|
|
|
@@ -83,7 +86,7 @@ class BaseMlxChatChoicesService(lang.ExitStacked):
|
|
|
83
86
|
raise TypeError(m)
|
|
84
87
|
|
|
85
88
|
@lang.cached_function(transient=True)
|
|
86
|
-
def _load_model(self) -> mlxu.LoadedModel:
|
|
89
|
+
def _load_model(self) -> 'mlxu.LoadedModel':
|
|
87
90
|
# FIXME: walk state, find all mx.arrays, dealloc/set to empty
|
|
88
91
|
check.not_none(self._exit_stack)
|
|
89
92
|
|
|
@@ -100,7 +103,7 @@ class BaseMlxChatChoicesService(lang.ExitStacked):
|
|
|
100
103
|
)
|
|
101
104
|
|
|
102
105
|
@lang.cached_function(transient=True)
|
|
103
|
-
def _get_tokenizer(self) -> mlxu.tokenization.Tokenizer:
|
|
106
|
+
def _get_tokenizer(self) -> 'mlxu.tokenization.Tokenizer':
|
|
104
107
|
tokenizer = self._load_model().tokenization.tokenizer
|
|
105
108
|
|
|
106
109
|
if not (
|
|
@@ -1,3 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
dolphin3:latest d5ab9ae8e1f2 4.9 GB 11 months ago (no tools)
|
|
3
|
+
|
|
4
|
+
functiongemma:270m 7c19b650567a 300 MB 5 minutes ago
|
|
5
|
+
gemma3:27b a418f5838eaf 17 GB 7 weeks ago (no tools)
|
|
6
|
+
gemma3:4b a2af6cc3eb7f 3.3 GB 7 weeks ago (no tools)
|
|
7
|
+
|
|
8
|
+
llama3.2:1b baf6a787fdff 1.3 GB 13 months ago (too stupid for tools)
|
|
9
|
+
llama3.2:latest a80c4f17acd5 2.0 GB 13 months ago
|
|
10
|
+
|
|
11
|
+
devstral-small-2:24b 24277f07f62d 15 GB 15 hours ago
|
|
12
|
+
ministral-3:14b 4760c35aeb9d 9.1 GB 11 hours ago
|
|
13
|
+
mistral:latest 6577803aa9a0 4.4 GB 3 seconds ago
|
|
14
|
+
|
|
15
|
+
nemotron-3-nano:30b b725f1117407 24 GB 15 hours ago
|
|
16
|
+
|
|
17
|
+
olmo-3.1:32b-instruct a16b6a5be6cf 19 GB 11 hours ago (no tools)
|
|
18
|
+
olmo-3.1:32b-think 223d4ec84d91 19 GB 11 hours ago (no tools)
|
|
19
|
+
|
|
20
|
+
phi4-mini:latest 78fad5d182a7 2.5 GB 8 months ago (no tools)
|
|
21
|
+
|
|
22
|
+
qwen3-coder:30b 06c1097efce0 18 GB 11 hours ago
|
|
23
|
+
qwen3-next:80b b2ebb986e4e9 50 GB 11 hours ago
|
|
24
|
+
qwen3:30b ad815644918f 18 GB 11 hours ago
|
|
25
|
+
qwen3:32b 030ee887880f 20 GB 11 hours ago
|
|
26
|
+
"""
|
|
1
27
|
import typing as ta
|
|
2
28
|
|
|
3
29
|
from omlish import check
|
|
@@ -16,20 +42,17 @@ from ....chat.choices.services import static_check_is_chat_choices_service
|
|
|
16
42
|
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
17
43
|
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
18
44
|
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
19
|
-
from ....chat.choices.stream.types import AiChoiceDeltas
|
|
20
45
|
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
21
|
-
from ....chat.
|
|
22
|
-
from ....chat.messages import AiMessage
|
|
23
|
-
from ....chat.messages import AnyAiMessage
|
|
24
|
-
from ....chat.messages import Message
|
|
25
|
-
from ....chat.messages import SystemMessage
|
|
26
|
-
from ....chat.messages import UserMessage
|
|
27
|
-
from ....chat.stream.types import ContentAiDelta
|
|
46
|
+
from ....chat.tools.types import Tool
|
|
28
47
|
from ....models.configs import ModelName
|
|
29
48
|
from ....resources import UseResources
|
|
30
49
|
from ....standard import ApiUrl
|
|
31
50
|
from ....stream.services import StreamResponseSink
|
|
32
51
|
from ....stream.services import new_stream_response
|
|
52
|
+
from .protocol import build_mc_ai_choice_deltas
|
|
53
|
+
from .protocol import build_mc_choices_response
|
|
54
|
+
from .protocol import build_ol_request_messages
|
|
55
|
+
from .protocol import build_ol_request_tool
|
|
33
56
|
|
|
34
57
|
|
|
35
58
|
##
|
|
@@ -64,31 +87,6 @@ class BaseOllamaChatChoicesService(lang.Abstract):
|
|
|
64
87
|
self._api_url = cc.pop(self.DEFAULT_API_URL)
|
|
65
88
|
self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
|
|
66
89
|
|
|
67
|
-
#
|
|
68
|
-
|
|
69
|
-
ROLE_MAP: ta.ClassVar[ta.Mapping[type[Message], pt.Role]] = { # noqa
|
|
70
|
-
SystemMessage: 'system',
|
|
71
|
-
UserMessage: 'user',
|
|
72
|
-
AiMessage: 'assistant',
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
@classmethod
|
|
76
|
-
def _get_message_content(cls, m: Message) -> str | None:
|
|
77
|
-
if isinstance(m, (AiMessage, UserMessage, SystemMessage)):
|
|
78
|
-
return check.isinstance(m.c, str)
|
|
79
|
-
else:
|
|
80
|
-
raise TypeError(m)
|
|
81
|
-
|
|
82
|
-
@classmethod
|
|
83
|
-
def _build_request_messages(cls, mc_msgs: ta.Iterable[Message]) -> ta.Sequence[pt.Message]:
|
|
84
|
-
messages: list[pt.Message] = []
|
|
85
|
-
for m in mc_msgs:
|
|
86
|
-
messages.append(pt.Message(
|
|
87
|
-
role=cls.ROLE_MAP[type(m)],
|
|
88
|
-
content=cls._get_message_content(m),
|
|
89
|
-
))
|
|
90
|
-
return messages
|
|
91
|
-
|
|
92
90
|
|
|
93
91
|
##
|
|
94
92
|
|
|
@@ -103,12 +101,18 @@ class OllamaChatChoicesService(BaseOllamaChatChoicesService):
|
|
|
103
101
|
self,
|
|
104
102
|
request: ChatChoicesRequest,
|
|
105
103
|
) -> ChatChoicesResponse:
|
|
106
|
-
messages =
|
|
104
|
+
messages = build_ol_request_messages(request.v)
|
|
105
|
+
|
|
106
|
+
tools: list[pt.Tool] = []
|
|
107
|
+
with tv.TypedValues(*request.options).consume() as oc:
|
|
108
|
+
t: Tool
|
|
109
|
+
for t in oc.pop(Tool, []):
|
|
110
|
+
tools.append(build_ol_request_tool(t))
|
|
107
111
|
|
|
108
112
|
a_req = pt.ChatRequest(
|
|
109
113
|
model=self._model_name.v,
|
|
110
114
|
messages=messages,
|
|
111
|
-
|
|
115
|
+
tools=tools or None,
|
|
112
116
|
stream=False,
|
|
113
117
|
)
|
|
114
118
|
|
|
@@ -124,17 +128,7 @@ class OllamaChatChoicesService(BaseOllamaChatChoicesService):
|
|
|
124
128
|
|
|
125
129
|
resp = msh.unmarshal(json_response, pt.ChatResponse)
|
|
126
130
|
|
|
127
|
-
|
|
128
|
-
if resp.message.role == 'assistant':
|
|
129
|
-
out.append(AiMessage(
|
|
130
|
-
check.not_none(resp.message.content),
|
|
131
|
-
))
|
|
132
|
-
else:
|
|
133
|
-
raise TypeError(resp.message.role)
|
|
134
|
-
|
|
135
|
-
return ChatChoicesResponse([
|
|
136
|
-
AiChoice(out),
|
|
137
|
-
])
|
|
131
|
+
return build_mc_choices_response(resp)
|
|
138
132
|
|
|
139
133
|
|
|
140
134
|
##
|
|
@@ -152,12 +146,18 @@ class OllamaChatChoicesStreamService(BaseOllamaChatChoicesService):
|
|
|
152
146
|
self,
|
|
153
147
|
request: ChatChoicesStreamRequest,
|
|
154
148
|
) -> ChatChoicesStreamResponse:
|
|
155
|
-
messages =
|
|
149
|
+
messages = build_ol_request_messages(request.v)
|
|
150
|
+
|
|
151
|
+
tools: list[pt.Tool] = []
|
|
152
|
+
with tv.TypedValues(*request.options).consume() as oc:
|
|
153
|
+
t: Tool
|
|
154
|
+
for t in oc.pop(Tool, []):
|
|
155
|
+
tools.append(build_ol_request_tool(t))
|
|
156
156
|
|
|
157
157
|
a_req = pt.ChatRequest(
|
|
158
158
|
model=self._model_name.v,
|
|
159
159
|
messages=messages,
|
|
160
|
-
|
|
160
|
+
tools=tools or None,
|
|
161
161
|
stream=True,
|
|
162
162
|
)
|
|
163
163
|
|
|
@@ -179,19 +179,13 @@ class OllamaChatChoicesStreamService(BaseOllamaChatChoicesService):
|
|
|
179
179
|
for l in db.feed(b):
|
|
180
180
|
if isinstance(l, DelimitingBuffer.Incomplete):
|
|
181
181
|
# FIXME: handle
|
|
182
|
-
|
|
182
|
+
raise TypeError(l)
|
|
183
183
|
|
|
184
184
|
lj = json.loads(l.decode('utf-8'))
|
|
185
185
|
lp: pt.ChatResponse = msh.unmarshal(lj, pt.ChatResponse)
|
|
186
186
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
check.state(not lp.message.tool_calls)
|
|
190
|
-
|
|
191
|
-
if (c := lp.message.content):
|
|
192
|
-
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiDelta(
|
|
193
|
-
c,
|
|
194
|
-
)])]))
|
|
187
|
+
if (ds := build_mc_ai_choice_deltas(lp)).deltas:
|
|
188
|
+
await sink.emit(AiChoicesDeltas([ds]))
|
|
195
189
|
|
|
196
190
|
if not b:
|
|
197
191
|
return []
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
|
|
3
|
+
from omlish import check
|
|
4
|
+
|
|
5
|
+
from .....backends.ollama import protocol as pt
|
|
6
|
+
from ....chat.choices.services import ChatChoicesResponse
|
|
7
|
+
from ....chat.choices.stream.types import AiChoiceDeltas
|
|
8
|
+
from ....chat.choices.types import AiChoice
|
|
9
|
+
from ....chat.messages import AiMessage
|
|
10
|
+
from ....chat.messages import AnyAiMessage
|
|
11
|
+
from ....chat.messages import Chat
|
|
12
|
+
from ....chat.messages import SystemMessage
|
|
13
|
+
from ....chat.messages import ToolUseMessage
|
|
14
|
+
from ....chat.messages import ToolUseResultMessage
|
|
15
|
+
from ....chat.messages import UserMessage
|
|
16
|
+
from ....chat.stream.types import AiDelta
|
|
17
|
+
from ....chat.stream.types import ContentAiDelta
|
|
18
|
+
from ....chat.stream.types import ToolUseAiDelta
|
|
19
|
+
from ....chat.tools.types import Tool
|
|
20
|
+
from ....content.transform.prepare import prepare_content_str
|
|
21
|
+
from ....tools.jsonschema import build_tool_spec_params_json_schema
|
|
22
|
+
from ....tools.types import ToolUse
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
##
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def build_ol_request_messages(chat: Chat) -> list[pt.Message]:
|
|
29
|
+
ol_msgs: list[pt.Message] = []
|
|
30
|
+
|
|
31
|
+
for _, g in itertools.groupby(chat, lambda mc_m: isinstance(mc_m, AnyAiMessage)):
|
|
32
|
+
mc_msgs = list(g)
|
|
33
|
+
|
|
34
|
+
if isinstance(mc_msgs[0], AnyAiMessage):
|
|
35
|
+
tups: list[tuple[AiMessage | None, list[ToolUseMessage]]] = []
|
|
36
|
+
for mc_msg in mc_msgs:
|
|
37
|
+
if isinstance(mc_msg, AiMessage):
|
|
38
|
+
tups.append((mc_msg, []))
|
|
39
|
+
|
|
40
|
+
elif isinstance(mc_msg, ToolUseMessage):
|
|
41
|
+
if not tups:
|
|
42
|
+
tups.append((None, []))
|
|
43
|
+
tups[-1][1].append(mc_msg)
|
|
44
|
+
|
|
45
|
+
else:
|
|
46
|
+
raise TypeError(mc_msg)
|
|
47
|
+
|
|
48
|
+
for mc_ai_msg, mc_tu_msgs in tups:
|
|
49
|
+
ol_msgs.append(pt.Message(
|
|
50
|
+
role='assistant',
|
|
51
|
+
content=check.isinstance(mc_ai_msg.c, str) if mc_ai_msg is not None else None,
|
|
52
|
+
tool_calls=[
|
|
53
|
+
pt.Message.ToolCall(
|
|
54
|
+
function=pt.Message.ToolCall.Function(
|
|
55
|
+
name=mc_tu_msg.tu.name,
|
|
56
|
+
arguments=mc_tu_msg.tu.args,
|
|
57
|
+
),
|
|
58
|
+
id=check.not_none(mc_tu_msg.tu.id),
|
|
59
|
+
)
|
|
60
|
+
for mc_tu_msg in mc_tu_msgs
|
|
61
|
+
] if mc_tu_msgs else None,
|
|
62
|
+
))
|
|
63
|
+
|
|
64
|
+
else:
|
|
65
|
+
for mc_msg in mc_msgs:
|
|
66
|
+
if isinstance(mc_msg, SystemMessage):
|
|
67
|
+
ol_msgs.append(pt.Message(
|
|
68
|
+
role='system',
|
|
69
|
+
content=check.isinstance(mc_msg.c, str),
|
|
70
|
+
))
|
|
71
|
+
|
|
72
|
+
elif isinstance(mc_msg, UserMessage):
|
|
73
|
+
ol_msgs.append(pt.Message(
|
|
74
|
+
role='user',
|
|
75
|
+
content=check.isinstance(mc_msg.c, str),
|
|
76
|
+
))
|
|
77
|
+
|
|
78
|
+
elif isinstance(mc_msg, ToolUseResultMessage):
|
|
79
|
+
ol_msgs.append(pt.Message(
|
|
80
|
+
role='tool',
|
|
81
|
+
tool_name=mc_msg.tur.name,
|
|
82
|
+
content=check.isinstance(mc_msg.tur.c, str),
|
|
83
|
+
))
|
|
84
|
+
|
|
85
|
+
else:
|
|
86
|
+
raise TypeError(mc_msg)
|
|
87
|
+
|
|
88
|
+
return ol_msgs
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def build_ol_request_tool(t: Tool) -> pt.Tool:
|
|
92
|
+
return pt.Tool(
|
|
93
|
+
function=pt.Tool.Function(
|
|
94
|
+
name=check.not_none(t.spec.name),
|
|
95
|
+
description=prepare_content_str(t.spec.desc) if t.spec.desc is not None else None,
|
|
96
|
+
parameters=build_tool_spec_params_json_schema(t.spec),
|
|
97
|
+
),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def build_mc_choices_response(ol_resp: pt.ChatResponse) -> ChatChoicesResponse:
|
|
102
|
+
ol_msg = ol_resp.message
|
|
103
|
+
|
|
104
|
+
lst: list[AnyAiMessage] = []
|
|
105
|
+
|
|
106
|
+
if ol_msg.role in (None, 'assistant'):
|
|
107
|
+
if ol_msg.content is not None:
|
|
108
|
+
lst.append(AiMessage(
|
|
109
|
+
check.isinstance(ol_msg.content, str),
|
|
110
|
+
))
|
|
111
|
+
|
|
112
|
+
for ol_tc in ol_msg.tool_calls or []:
|
|
113
|
+
lst.append(ToolUseMessage(ToolUse(
|
|
114
|
+
id=ol_tc.id,
|
|
115
|
+
name=ol_tc.function.name,
|
|
116
|
+
args=ol_tc.function.arguments,
|
|
117
|
+
)))
|
|
118
|
+
|
|
119
|
+
else:
|
|
120
|
+
raise ValueError(ol_msg)
|
|
121
|
+
|
|
122
|
+
return ChatChoicesResponse([AiChoice(lst)])
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def build_mc_ai_choice_deltas(ol_resp: pt.ChatResponse) -> AiChoiceDeltas:
|
|
126
|
+
ol_msg = ol_resp.message
|
|
127
|
+
|
|
128
|
+
if ol_msg.role in (None, 'assistant'):
|
|
129
|
+
lst: list[AiDelta] = []
|
|
130
|
+
|
|
131
|
+
if ol_msg.content is not None:
|
|
132
|
+
lst.append(ContentAiDelta(ol_msg.content))
|
|
133
|
+
|
|
134
|
+
for tc in ol_msg.tool_calls or []:
|
|
135
|
+
lst.append(ToolUseAiDelta(
|
|
136
|
+
id=tc.id,
|
|
137
|
+
name=check.not_none(tc.function.name),
|
|
138
|
+
args=tc.function.arguments,
|
|
139
|
+
))
|
|
140
|
+
|
|
141
|
+
return AiChoiceDeltas(lst)
|
|
142
|
+
|
|
143
|
+
else:
|
|
144
|
+
raise ValueError(ol_msg)
|
|
@@ -22,7 +22,7 @@ from ....chat.stream.types import ContentAiDelta
|
|
|
22
22
|
from ....chat.stream.types import PartialToolUseAiDelta
|
|
23
23
|
from ....chat.tools.types import Tool
|
|
24
24
|
from ....content.json import JsonContent
|
|
25
|
-
from ....content.prepare import prepare_content_str
|
|
25
|
+
from ....content.transform.prepare import prepare_content_str
|
|
26
26
|
from ....llms.types import MaxCompletionTokens
|
|
27
27
|
from ....llms.types import MaxTokens
|
|
28
28
|
from ....llms.types import Temperature
|
|
@@ -145,7 +145,8 @@ def build_mc_ai_delta(delta: pt.ChatCompletionChunkChoiceDelta) -> AiDelta:
|
|
|
145
145
|
)
|
|
146
146
|
|
|
147
147
|
else:
|
|
148
|
-
|
|
148
|
+
# FIXME: no
|
|
149
|
+
return ContentAiDelta('')
|
|
149
150
|
|
|
150
151
|
|
|
151
152
|
##
|
|
@@ -216,7 +217,7 @@ class OpenaiChatRequestHandler:
|
|
|
216
217
|
pt.ChatCompletionRequestTool(
|
|
217
218
|
function=pt.ChatCompletionRequestTool.Function(
|
|
218
219
|
name=check.not_none(ts.name),
|
|
219
|
-
description=prepare_content_str(ts.desc),
|
|
220
|
+
description=prepare_content_str(ts.desc) if ts.desc is not None else None,
|
|
220
221
|
parameters=build_tool_spec_params_json_schema(ts),
|
|
221
222
|
),
|
|
222
223
|
)
|
|
@@ -32,6 +32,8 @@ _GPT_MODEL_NAMES = [
|
|
|
32
32
|
'gpt-5-nano',
|
|
33
33
|
|
|
34
34
|
'gpt-5.1',
|
|
35
|
+
|
|
36
|
+
'gpt-5.2',
|
|
35
37
|
]
|
|
36
38
|
|
|
37
39
|
|
|
@@ -48,7 +50,7 @@ CHAT_MODEL_NAMES = ModelNameCollection(
|
|
|
48
50
|
for n in _GPT_MODEL_NAMES
|
|
49
51
|
},
|
|
50
52
|
|
|
51
|
-
'gpt': 'gpt-5.
|
|
53
|
+
'gpt': 'gpt-5.2',
|
|
52
54
|
'gpt-mini': 'gpt-5-mini',
|
|
53
55
|
|
|
54
56
|
#
|
|
@@ -4,6 +4,7 @@ https://platform.openai.com/docs/api-reference/responses-streaming
|
|
|
4
4
|
import typing as ta
|
|
5
5
|
|
|
6
6
|
from omlish import check
|
|
7
|
+
from omlish import dataclasses as dc
|
|
7
8
|
from omlish import marshal as msh
|
|
8
9
|
from omlish import typedvalues as tv
|
|
9
10
|
from omlish.formats import json
|
|
@@ -35,6 +36,12 @@ from .names import CHAT_MODEL_NAMES
|
|
|
35
36
|
##
|
|
36
37
|
|
|
37
38
|
|
|
39
|
+
@dc.dataclass()
|
|
40
|
+
class OpenaiChatChoicesStreamServiceError(Exception):
|
|
41
|
+
status: int
|
|
42
|
+
data: ta.Any | None = None
|
|
43
|
+
|
|
44
|
+
|
|
38
45
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
39
46
|
# name='openai',
|
|
40
47
|
# type='ChatChoicesStreamService',
|
|
@@ -90,18 +97,43 @@ class OpenaiChatChoicesStreamService:
|
|
|
90
97
|
http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
|
|
91
98
|
http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
|
|
92
99
|
|
|
100
|
+
if http_response.status != 200:
|
|
101
|
+
data: ta.Any
|
|
102
|
+
try:
|
|
103
|
+
data = await http_response.stream.readall()
|
|
104
|
+
except Exception as e: # noqa
|
|
105
|
+
data = e
|
|
106
|
+
try:
|
|
107
|
+
data_obj = json.loads(data.decode())
|
|
108
|
+
except Exception as e: # noqa
|
|
109
|
+
pass
|
|
110
|
+
else:
|
|
111
|
+
data = data_obj
|
|
112
|
+
raise OpenaiChatChoicesStreamServiceError(http_response.status, data)
|
|
113
|
+
|
|
93
114
|
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
|
|
94
115
|
db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
|
|
95
116
|
sd = sse.SseDecoder()
|
|
117
|
+
|
|
118
|
+
# bs = []
|
|
119
|
+
# ls = []
|
|
120
|
+
# sos = []
|
|
121
|
+
|
|
96
122
|
while True:
|
|
97
123
|
b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
|
|
124
|
+
# bs.append(b)
|
|
125
|
+
|
|
98
126
|
for l in db.feed(b):
|
|
127
|
+
# ls.append(l)
|
|
128
|
+
|
|
99
129
|
if isinstance(l, DelimitingBuffer.Incomplete):
|
|
100
130
|
# FIXME: handle
|
|
101
|
-
|
|
131
|
+
raise TypeError(l)
|
|
102
132
|
|
|
103
133
|
# FIXME: https://platform.openai.com/docs/guides/function-calling?api-mode=responses#streaming
|
|
104
134
|
for so in sd.process_line(l):
|
|
135
|
+
# sos.append(so)
|
|
136
|
+
|
|
105
137
|
if isinstance(so, sse.SseEvent) and so.type == b'message':
|
|
106
138
|
ss = so.data.decode('utf-8')
|
|
107
139
|
if ss == '[DONE]':
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import sentencepiece as spm
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
4
|
+
from omlish import lang
|
|
6
5
|
|
|
7
6
|
from .... import tokens as tks
|
|
8
7
|
|
|
9
8
|
|
|
9
|
+
with lang.auto_proxy_import(globals()):
|
|
10
|
+
import sentencepiece as spm
|
|
11
|
+
|
|
12
|
+
|
|
10
13
|
##
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def build_vocab(spm_tokenizer: spm.SentencePieceProcessor) -> tks.Vocab:
|
|
16
|
+
def build_vocab(spm_tokenizer: 'spm.SentencePieceProcessor') -> tks.Vocab:
|
|
14
17
|
return tks.Vocab([
|
|
15
18
|
(ta.cast(tks.Token, i), tks.TokenStr(spm_tokenizer.id_to_piece(i))) # noqa
|
|
16
19
|
for i in range(spm_tokenizer.get_piece_size()) # noqa
|
|
17
20
|
])
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def build_specials(spm_tokenizer: spm.SentencePieceProcessor) -> tks.SpecialTokens:
|
|
23
|
+
def build_specials(spm_tokenizer: 'spm.SentencePieceProcessor') -> tks.SpecialTokens:
|
|
21
24
|
# FIXME
|
|
22
25
|
return tks.SpecialTokens([])
|
|
23
26
|
|
|
@@ -28,7 +31,7 @@ def build_specials(spm_tokenizer: spm.SentencePieceProcessor) -> tks.SpecialToke
|
|
|
28
31
|
class SentencepieceTokenizer(tks.BaseTokenizer):
|
|
29
32
|
def __init__(
|
|
30
33
|
self,
|
|
31
|
-
spm_tokenizer: spm.SentencePieceProcessor,
|
|
34
|
+
spm_tokenizer: 'spm.SentencePieceProcessor',
|
|
32
35
|
) -> None:
|
|
33
36
|
self._spm_tokenizer = check.isinstance(spm_tokenizer, spm.SentencePieceProcessor)
|
|
34
37
|
|
|
@@ -38,7 +41,7 @@ class SentencepieceTokenizer(tks.BaseTokenizer):
|
|
|
38
41
|
)
|
|
39
42
|
|
|
40
43
|
@property
|
|
41
|
-
def spm_tokenizer(self) -> spm.SentencePieceProcessor:
|
|
44
|
+
def spm_tokenizer(self) -> 'spm.SentencePieceProcessor':
|
|
42
45
|
return self._spm_tokenizer
|
|
43
46
|
|
|
44
47
|
#
|
|
@@ -4,7 +4,6 @@ import typing as ta
|
|
|
4
4
|
from omlish import check
|
|
5
5
|
from omlish import lang
|
|
6
6
|
|
|
7
|
-
from .....backends.tinygrad.models import llama3 as tgl3
|
|
8
7
|
from ....chat.choices.services import ChatChoicesOptions
|
|
9
8
|
from ....chat.choices.services import ChatChoicesRequest
|
|
10
9
|
from ....chat.choices.services import ChatChoicesResponse
|
|
@@ -28,6 +27,10 @@ from ....stream.services import StreamResponseSink
|
|
|
28
27
|
from ....stream.services import new_stream_response
|
|
29
28
|
|
|
30
29
|
|
|
30
|
+
with lang.auto_proxy_import(globals()):
|
|
31
|
+
from .....backends.tinygrad.models import llama3 as tgl3
|
|
32
|
+
|
|
33
|
+
|
|
31
34
|
##
|
|
32
35
|
|
|
33
36
|
|
|
@@ -39,7 +42,7 @@ def _load_model(
|
|
|
39
42
|
*,
|
|
40
43
|
size: str | None = None,
|
|
41
44
|
temperature: float | None = None,
|
|
42
|
-
) -> tgl3.Llama3Llm:
|
|
45
|
+
) -> 'tgl3.Llama3Llm':
|
|
43
46
|
if size is None:
|
|
44
47
|
size = DEFAULT_SIZE
|
|
45
48
|
if temperature is None:
|
|
@@ -58,7 +61,7 @@ def _load_model(
|
|
|
58
61
|
|
|
59
62
|
|
|
60
63
|
def _prepare_toks(
|
|
61
|
-
llm: tgl3.Llama3Llm,
|
|
64
|
+
llm: 'tgl3.Llama3Llm',
|
|
62
65
|
chat: Chat,
|
|
63
66
|
options: ta.Sequence[ChatChoicesOptions],
|
|
64
67
|
) -> list[int]:
|
|
@@ -100,7 +103,7 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
|
|
|
100
103
|
self._temperature = temperature
|
|
101
104
|
|
|
102
105
|
@lang.cached_function(transient=True)
|
|
103
|
-
def _load_model(self) -> tgl3.Llama3Llm:
|
|
106
|
+
def _load_model(self) -> 'tgl3.Llama3Llm':
|
|
104
107
|
check.not_none(self._exit_stack)
|
|
105
108
|
|
|
106
109
|
return _load_model(
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import tokenizers as tos
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
4
|
+
from omlish import lang
|
|
6
5
|
|
|
7
6
|
from .... import tokens as tks
|
|
8
7
|
|
|
9
8
|
|
|
9
|
+
with lang.auto_proxy_import(globals()):
|
|
10
|
+
import tokenizers as tos
|
|
11
|
+
|
|
12
|
+
|
|
10
13
|
##
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def build_vocab(tos_tokenizer: tos.Tokenizer) -> tks.Vocab:
|
|
16
|
+
def build_vocab(tos_tokenizer: 'tos.Tokenizer') -> tks.Vocab:
|
|
14
17
|
return tks.Vocab([
|
|
15
18
|
(ta.cast(tks.Token, i), tks.TokenStr(s))
|
|
16
19
|
for s, i in tos_tokenizer.get_vocab().items()
|
|
17
20
|
])
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
23
|
+
def build_specials(tos_tokenizer: 'tos.Tokenizer') -> tks.SpecialTokens:
|
|
21
24
|
# FIXME
|
|
22
25
|
return tks.SpecialTokens([])
|
|
23
26
|
|
|
@@ -28,7 +31,7 @@ def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
|
28
31
|
class TokenizersTokenizer(tks.BaseTokenizer):
|
|
29
32
|
def __init__(
|
|
30
33
|
self,
|
|
31
|
-
tos_tokenizer: tos.Tokenizer,
|
|
34
|
+
tos_tokenizer: 'tos.Tokenizer',
|
|
32
35
|
) -> None:
|
|
33
36
|
self._tos_tokenizer = check.isinstance(tos_tokenizer, tos.Tokenizer)
|
|
34
37
|
|
|
@@ -38,7 +41,7 @@ class TokenizersTokenizer(tks.BaseTokenizer):
|
|
|
38
41
|
)
|
|
39
42
|
|
|
40
43
|
@property
|
|
41
|
-
def tos_tokenizer(self) -> tos.Tokenizer:
|
|
44
|
+
def tos_tokenizer(self) -> 'tos.Tokenizer':
|
|
42
45
|
return self._tos_tokenizer
|
|
43
46
|
|
|
44
47
|
#
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
from omlish import lang
|
|
5
4
|
from omlish import typedvalues as tv
|
|
6
5
|
|
|
7
6
|
from ....configs import Config
|
|
@@ -13,6 +12,10 @@ from ....vectors.embeddings import static_check_is_embedding_service
|
|
|
13
12
|
from ....vectors.types import Vector
|
|
14
13
|
|
|
15
14
|
|
|
15
|
+
with lang.auto_proxy_import(globals()):
|
|
16
|
+
import sentence_transformers as stfm
|
|
17
|
+
|
|
18
|
+
|
|
16
19
|
##
|
|
17
20
|
|
|
18
21
|
|