ommlds 0.0.0.dev456__py3-none-any.whl → 0.0.0.dev485__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ommlds/.omlish-manifests.json +314 -33
- ommlds/__about__.py +15 -9
- ommlds/_hacks/__init__.py +4 -0
- ommlds/_hacks/funcs.py +110 -0
- ommlds/_hacks/names.py +158 -0
- ommlds/_hacks/params.py +73 -0
- ommlds/_hacks/patches.py +0 -3
- ommlds/backends/anthropic/protocol/__init__.py +13 -1
- ommlds/backends/anthropic/protocol/_dataclasses.py +1625 -0
- ommlds/backends/anthropic/protocol/sse/assemble.py +22 -6
- ommlds/backends/anthropic/protocol/sse/events.py +13 -0
- ommlds/backends/google/protocol/__init__.py +13 -0
- ommlds/backends/google/protocol/_dataclasses.py +5997 -0
- ommlds/backends/google/protocol/types.py +5 -1
- ommlds/backends/groq/__init__.py +7 -0
- ommlds/backends/groq/_dataclasses.py +3901 -0
- ommlds/backends/groq/_marshal.py +23 -0
- ommlds/backends/groq/protocol.py +249 -0
- ommlds/backends/llamacpp/logging.py +4 -1
- ommlds/backends/mlx/caching.py +7 -3
- ommlds/backends/mlx/cli.py +10 -7
- ommlds/backends/mlx/generation.py +18 -16
- ommlds/backends/mlx/limits.py +10 -6
- ommlds/backends/mlx/loading.py +65 -5
- ommlds/backends/ollama/__init__.py +7 -0
- ommlds/backends/ollama/_dataclasses.py +3458 -0
- ommlds/backends/ollama/protocol.py +170 -0
- ommlds/backends/openai/protocol/__init__.py +15 -1
- ommlds/backends/openai/protocol/_dataclasses.py +7708 -0
- ommlds/backends/tavily/__init__.py +7 -0
- ommlds/backends/tavily/_dataclasses.py +1734 -0
- ommlds/backends/tavily/protocol.py +301 -0
- ommlds/backends/tinygrad/models/llama3/__init__.py +22 -14
- ommlds/backends/transformers/__init__.py +14 -0
- ommlds/backends/transformers/filecache.py +109 -0
- ommlds/backends/transformers/streamers.py +73 -0
- ommlds/cli/__init__.py +7 -0
- ommlds/cli/_dataclasses.py +2562 -0
- ommlds/cli/asyncs.py +30 -0
- ommlds/cli/backends/catalog.py +93 -0
- ommlds/cli/backends/configs.py +9 -0
- ommlds/cli/backends/inject.py +31 -36
- ommlds/cli/backends/injection.py +16 -0
- ommlds/cli/backends/types.py +46 -0
- ommlds/cli/content/messages.py +34 -0
- ommlds/cli/content/strings.py +42 -0
- ommlds/cli/inject.py +15 -32
- ommlds/cli/inputs/__init__.py +0 -0
- ommlds/cli/inputs/asyncs.py +32 -0
- ommlds/cli/inputs/sync.py +75 -0
- ommlds/cli/main.py +267 -128
- ommlds/cli/rendering/__init__.py +0 -0
- ommlds/cli/rendering/configs.py +9 -0
- ommlds/cli/rendering/inject.py +31 -0
- ommlds/cli/rendering/markdown.py +52 -0
- ommlds/cli/rendering/raw.py +73 -0
- ommlds/cli/rendering/types.py +21 -0
- ommlds/cli/secrets.py +21 -0
- ommlds/cli/sessions/base.py +1 -1
- ommlds/cli/sessions/chat/chat/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/ai/inject.py +74 -0
- ommlds/cli/sessions/chat/chat/ai/injection.py +14 -0
- ommlds/cli/sessions/chat/chat/ai/rendering.py +70 -0
- ommlds/cli/sessions/chat/chat/ai/services.py +79 -0
- ommlds/cli/sessions/chat/chat/ai/tools.py +44 -0
- ommlds/cli/sessions/chat/chat/ai/types.py +28 -0
- ommlds/cli/sessions/chat/chat/state/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/state/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/state/inject.py +36 -0
- ommlds/cli/sessions/chat/chat/state/inmemory.py +33 -0
- ommlds/cli/sessions/chat/chat/state/storage.py +52 -0
- ommlds/cli/sessions/chat/chat/state/types.py +38 -0
- ommlds/cli/sessions/chat/chat/user/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/user/configs.py +17 -0
- ommlds/cli/sessions/chat/chat/user/inject.py +62 -0
- ommlds/cli/sessions/chat/chat/user/interactive.py +31 -0
- ommlds/cli/sessions/chat/chat/user/oneshot.py +25 -0
- ommlds/cli/sessions/chat/chat/user/types.py +15 -0
- ommlds/cli/sessions/chat/configs.py +27 -0
- ommlds/cli/sessions/chat/driver.py +43 -0
- ommlds/cli/sessions/chat/inject.py +33 -65
- ommlds/cli/sessions/chat/phases/__init__.py +0 -0
- ommlds/cli/sessions/chat/phases/inject.py +27 -0
- ommlds/cli/sessions/chat/phases/injection.py +14 -0
- ommlds/cli/sessions/chat/phases/manager.py +29 -0
- ommlds/cli/sessions/chat/phases/types.py +29 -0
- ommlds/cli/sessions/chat/session.py +27 -0
- ommlds/cli/sessions/chat/tools/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/configs.py +22 -0
- ommlds/cli/sessions/chat/tools/confirmation.py +46 -0
- ommlds/cli/sessions/chat/tools/execution.py +66 -0
- ommlds/cli/sessions/chat/tools/fs/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/fs/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/fs/inject.py +35 -0
- ommlds/cli/sessions/chat/tools/inject.py +88 -0
- ommlds/cli/sessions/chat/tools/injection.py +44 -0
- ommlds/cli/sessions/chat/tools/rendering.py +58 -0
- ommlds/cli/sessions/chat/tools/todo/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/todo/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/todo/inject.py +31 -0
- ommlds/cli/sessions/chat/tools/weather/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/weather/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/weather/inject.py +22 -0
- ommlds/cli/{tools/weather.py → sessions/chat/tools/weather/tools.py} +1 -1
- ommlds/cli/sessions/completion/configs.py +21 -0
- ommlds/cli/sessions/completion/inject.py +42 -0
- ommlds/cli/sessions/completion/session.py +35 -0
- ommlds/cli/sessions/embedding/configs.py +21 -0
- ommlds/cli/sessions/embedding/inject.py +42 -0
- ommlds/cli/sessions/embedding/session.py +33 -0
- ommlds/cli/sessions/inject.py +28 -11
- ommlds/cli/state/__init__.py +0 -0
- ommlds/cli/state/inject.py +28 -0
- ommlds/cli/{state.py → state/storage.py} +41 -24
- ommlds/minichain/__init__.py +46 -17
- ommlds/minichain/_dataclasses.py +15401 -0
- ommlds/minichain/backends/catalogs/base.py +20 -1
- ommlds/minichain/backends/catalogs/simple.py +2 -2
- ommlds/minichain/backends/catalogs/strings.py +10 -8
- ommlds/minichain/backends/impls/anthropic/chat.py +31 -65
- ommlds/minichain/backends/impls/anthropic/names.py +3 -4
- ommlds/minichain/backends/impls/anthropic/protocol.py +109 -0
- ommlds/minichain/backends/impls/anthropic/stream.py +53 -31
- ommlds/minichain/backends/impls/duckduckgo/search.py +5 -1
- ommlds/minichain/backends/impls/dummy/__init__.py +0 -0
- ommlds/minichain/backends/impls/dummy/chat.py +69 -0
- ommlds/minichain/backends/impls/google/chat.py +9 -2
- ommlds/minichain/backends/impls/google/search.py +6 -1
- ommlds/minichain/backends/impls/google/stream.py +122 -32
- ommlds/minichain/backends/impls/groq/__init__.py +0 -0
- ommlds/minichain/backends/impls/groq/chat.py +75 -0
- ommlds/minichain/backends/impls/groq/names.py +48 -0
- ommlds/minichain/backends/impls/groq/protocol.py +143 -0
- ommlds/minichain/backends/impls/groq/stream.py +125 -0
- ommlds/minichain/backends/impls/huggingface/repos.py +1 -5
- ommlds/minichain/backends/impls/llamacpp/chat.py +15 -3
- ommlds/minichain/backends/impls/llamacpp/completion.py +7 -3
- ommlds/minichain/backends/impls/llamacpp/stream.py +38 -19
- ommlds/minichain/backends/impls/mistral.py +9 -2
- ommlds/minichain/backends/impls/mlx/chat.py +100 -23
- ommlds/minichain/backends/impls/ollama/__init__.py +0 -0
- ommlds/minichain/backends/impls/ollama/chat.py +199 -0
- ommlds/minichain/backends/impls/openai/chat.py +14 -7
- ommlds/minichain/backends/impls/openai/completion.py +9 -2
- ommlds/minichain/backends/impls/openai/embedding.py +9 -2
- ommlds/minichain/backends/impls/openai/format.py +115 -109
- ommlds/minichain/backends/impls/openai/names.py +31 -5
- ommlds/minichain/backends/impls/openai/stream.py +33 -27
- ommlds/minichain/backends/impls/sentencepiece/tokens.py +9 -6
- ommlds/minichain/backends/impls/tavily.py +66 -0
- ommlds/minichain/backends/impls/tinygrad/chat.py +17 -14
- ommlds/minichain/backends/impls/tokenizers/tokens.py +9 -6
- ommlds/minichain/backends/impls/transformers/sentence.py +5 -2
- ommlds/minichain/backends/impls/transformers/tokens.py +10 -7
- ommlds/minichain/backends/impls/transformers/transformers.py +139 -20
- ommlds/minichain/backends/strings/parsing.py +1 -1
- ommlds/minichain/backends/strings/resolving.py +4 -1
- ommlds/minichain/chat/choices/stream/__init__.py +0 -0
- ommlds/minichain/chat/choices/stream/adapters.py +35 -0
- ommlds/minichain/chat/choices/stream/joining.py +31 -0
- ommlds/minichain/chat/choices/stream/services.py +45 -0
- ommlds/minichain/chat/choices/stream/types.py +43 -0
- ommlds/minichain/chat/stream/_marshal.py +4 -4
- ommlds/minichain/chat/stream/joining.py +85 -0
- ommlds/minichain/chat/stream/services.py +15 -15
- ommlds/minichain/chat/stream/types.py +24 -18
- ommlds/minichain/llms/types.py +4 -0
- ommlds/minichain/registries/globals.py +18 -4
- ommlds/minichain/resources.py +28 -3
- ommlds/minichain/search.py +1 -1
- ommlds/minichain/standard.py +8 -0
- ommlds/minichain/stream/services.py +19 -16
- ommlds/minichain/tools/reflect.py +5 -1
- ommlds/nanochat/LICENSE +21 -0
- ommlds/nanochat/__init__.py +0 -0
- ommlds/nanochat/rustbpe/LICENSE +21 -0
- ommlds/nanochat/tokenizers.py +406 -0
- ommlds/specs/__init__.py +0 -0
- ommlds/specs/mcp/__init__.py +0 -0
- ommlds/specs/mcp/_marshal.py +23 -0
- ommlds/specs/mcp/clients.py +146 -0
- ommlds/specs/mcp/protocol.py +371 -0
- ommlds/tools/git.py +13 -6
- ommlds/tools/ocr.py +1 -8
- ommlds/wiki/analyze.py +2 -2
- ommlds/wiki/text/mfh.py +1 -5
- ommlds/wiki/text/wtp.py +1 -3
- ommlds/wiki/utils/xml.py +5 -5
- {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/METADATA +22 -19
- {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/RECORD +198 -95
- ommlds/cli/backends/standard.py +0 -20
- ommlds/cli/sessions/chat/base.py +0 -42
- ommlds/cli/sessions/chat/code.py +0 -129
- ommlds/cli/sessions/chat/interactive.py +0 -71
- ommlds/cli/sessions/chat/printing.py +0 -97
- ommlds/cli/sessions/chat/prompt.py +0 -151
- ommlds/cli/sessions/chat/state.py +0 -110
- ommlds/cli/sessions/chat/tools.py +0 -100
- ommlds/cli/sessions/completion/completion.py +0 -44
- ommlds/cli/sessions/embedding/embedding.py +0 -42
- ommlds/cli/tools/config.py +0 -14
- ommlds/cli/tools/inject.py +0 -75
- ommlds/minichain/backends/impls/openai/format2.py +0 -210
- ommlds/minichain/chat/stream/adapters.py +0 -80
- /ommlds/{huggingface.py → backends/huggingface.py} +0 -0
- /ommlds/cli/{tools → content}/__init__.py +0 -0
- {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev456.dist-info → ommlds-0.0.0.dev485.dist-info}/top_level.txt +0 -0
|
@@ -4,23 +4,22 @@ import typing as ta
|
|
|
4
4
|
from omlish import check
|
|
5
5
|
from omlish import lang
|
|
6
6
|
|
|
7
|
-
from .....backends.tinygrad.models import llama3 as tgl3
|
|
8
7
|
from ....chat.choices.services import ChatChoicesOptions
|
|
9
8
|
from ....chat.choices.services import ChatChoicesRequest
|
|
10
9
|
from ....chat.choices.services import ChatChoicesResponse
|
|
11
10
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
11
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
12
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
13
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
14
|
+
from ....chat.choices.stream.types import AiChoiceDeltas
|
|
15
|
+
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
12
16
|
from ....chat.choices.types import AiChoice
|
|
13
17
|
from ....chat.choices.types import ChatChoicesOutputs
|
|
14
18
|
from ....chat.messages import AiMessage
|
|
15
19
|
from ....chat.messages import Chat
|
|
16
20
|
from ....chat.messages import SystemMessage
|
|
17
21
|
from ....chat.messages import UserMessage
|
|
18
|
-
from ....chat.stream.
|
|
19
|
-
from ....chat.stream.services import ChatChoicesStreamResponse
|
|
20
|
-
from ....chat.stream.services import static_check_is_chat_choices_stream_service
|
|
21
|
-
from ....chat.stream.types import AiChoiceDeltas
|
|
22
|
-
from ....chat.stream.types import AiChoicesDeltas
|
|
23
|
-
from ....chat.stream.types import ContentAiChoiceDelta
|
|
22
|
+
from ....chat.stream.types import ContentAiDelta
|
|
24
23
|
from ....chat.types import ChatOption
|
|
25
24
|
from ....llms.types import LlmOption
|
|
26
25
|
from ....resources import UseResources
|
|
@@ -28,6 +27,10 @@ from ....stream.services import StreamResponseSink
|
|
|
28
27
|
from ....stream.services import new_stream_response
|
|
29
28
|
|
|
30
29
|
|
|
30
|
+
with lang.auto_proxy_import(globals()):
|
|
31
|
+
from .....backends.tinygrad.models import llama3 as tgl3
|
|
32
|
+
|
|
33
|
+
|
|
31
34
|
##
|
|
32
35
|
|
|
33
36
|
|
|
@@ -39,7 +42,7 @@ def _load_model(
|
|
|
39
42
|
*,
|
|
40
43
|
size: str | None = None,
|
|
41
44
|
temperature: float | None = None,
|
|
42
|
-
) -> tgl3.Llama3Llm:
|
|
45
|
+
) -> 'tgl3.Llama3Llm':
|
|
43
46
|
if size is None:
|
|
44
47
|
size = DEFAULT_SIZE
|
|
45
48
|
if temperature is None:
|
|
@@ -58,7 +61,7 @@ def _load_model(
|
|
|
58
61
|
|
|
59
62
|
|
|
60
63
|
def _prepare_toks(
|
|
61
|
-
llm: tgl3.Llama3Llm,
|
|
64
|
+
llm: 'tgl3.Llama3Llm',
|
|
62
65
|
chat: Chat,
|
|
63
66
|
options: ta.Sequence[ChatChoicesOptions],
|
|
64
67
|
) -> list[int]:
|
|
@@ -100,7 +103,7 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
|
|
|
100
103
|
self._temperature = temperature
|
|
101
104
|
|
|
102
105
|
@lang.cached_function(transient=True)
|
|
103
|
-
def _load_model(self) -> tgl3.Llama3Llm:
|
|
106
|
+
def _load_model(self) -> 'tgl3.Llama3Llm':
|
|
104
107
|
check.not_none(self._exit_stack)
|
|
105
108
|
|
|
106
109
|
return _load_model(
|
|
@@ -113,7 +116,7 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
|
|
|
113
116
|
|
|
114
117
|
|
|
115
118
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
116
|
-
# name='
|
|
119
|
+
# name='tinygrad-llama3',
|
|
117
120
|
# type='ChatChoicesService',
|
|
118
121
|
# )
|
|
119
122
|
@static_check_is_chat_choices_service
|
|
@@ -133,7 +136,7 @@ class TinygradLlama3ChatChoicesService(BaseTinygradLlama3ChatService):
|
|
|
133
136
|
|
|
134
137
|
|
|
135
138
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
136
|
-
# name='
|
|
139
|
+
# name='tinygrad-llama3',
|
|
137
140
|
# type='ChatChoicesStreamService',
|
|
138
141
|
# )
|
|
139
142
|
@static_check_is_chat_choices_stream_service
|
|
@@ -151,7 +154,7 @@ class TinygradLlama3ChatChoicesStreamService(BaseTinygradLlama3ChatService):
|
|
|
151
154
|
for s in tgl3.run_llm(llm, toks):
|
|
152
155
|
await sink.emit(AiChoicesDeltas([
|
|
153
156
|
AiChoiceDeltas([
|
|
154
|
-
|
|
157
|
+
ContentAiDelta(s),
|
|
155
158
|
]),
|
|
156
159
|
]))
|
|
157
160
|
|
|
@@ -168,5 +171,5 @@ class TinygradLlama3ChatChoicesStreamService(BaseTinygradLlama3ChatService):
|
|
|
168
171
|
# 'ChatChoicesService',
|
|
169
172
|
# 'ChatChoicesStreamService',
|
|
170
173
|
# ],
|
|
171
|
-
# '
|
|
174
|
+
# 'tinygrad-llama3',
|
|
172
175
|
# )
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import tokenizers as tos
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
4
|
+
from omlish import lang
|
|
6
5
|
|
|
7
6
|
from .... import tokens as tks
|
|
8
7
|
|
|
9
8
|
|
|
9
|
+
with lang.auto_proxy_import(globals()):
|
|
10
|
+
import tokenizers as tos
|
|
11
|
+
|
|
12
|
+
|
|
10
13
|
##
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def build_vocab(tos_tokenizer: tos.Tokenizer) -> tks.Vocab:
|
|
16
|
+
def build_vocab(tos_tokenizer: 'tos.Tokenizer') -> tks.Vocab:
|
|
14
17
|
return tks.Vocab([
|
|
15
18
|
(ta.cast(tks.Token, i), tks.TokenStr(s))
|
|
16
19
|
for s, i in tos_tokenizer.get_vocab().items()
|
|
17
20
|
])
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
23
|
+
def build_specials(tos_tokenizer: 'tos.Tokenizer') -> tks.SpecialTokens:
|
|
21
24
|
# FIXME
|
|
22
25
|
return tks.SpecialTokens([])
|
|
23
26
|
|
|
@@ -28,7 +31,7 @@ def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
|
28
31
|
class TokenizersTokenizer(tks.BaseTokenizer):
|
|
29
32
|
def __init__(
|
|
30
33
|
self,
|
|
31
|
-
tos_tokenizer: tos.Tokenizer,
|
|
34
|
+
tos_tokenizer: 'tos.Tokenizer',
|
|
32
35
|
) -> None:
|
|
33
36
|
self._tos_tokenizer = check.isinstance(tos_tokenizer, tos.Tokenizer)
|
|
34
37
|
|
|
@@ -38,7 +41,7 @@ class TokenizersTokenizer(tks.BaseTokenizer):
|
|
|
38
41
|
)
|
|
39
42
|
|
|
40
43
|
@property
|
|
41
|
-
def tos_tokenizer(self) -> tos.Tokenizer:
|
|
44
|
+
def tos_tokenizer(self) -> 'tos.Tokenizer':
|
|
42
45
|
return self._tos_tokenizer
|
|
43
46
|
|
|
44
47
|
#
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
from omlish import lang
|
|
5
4
|
from omlish import typedvalues as tv
|
|
6
5
|
|
|
7
6
|
from ....configs import Config
|
|
@@ -13,6 +12,10 @@ from ....vectors.embeddings import static_check_is_embedding_service
|
|
|
13
12
|
from ....vectors.types import Vector
|
|
14
13
|
|
|
15
14
|
|
|
15
|
+
with lang.auto_proxy_import(globals()):
|
|
16
|
+
import sentence_transformers as stfm
|
|
17
|
+
|
|
18
|
+
|
|
16
19
|
##
|
|
17
20
|
|
|
18
21
|
|
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import transformers as tfm
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
6
4
|
from omlish import collections as col
|
|
5
|
+
from omlish import lang
|
|
7
6
|
|
|
8
7
|
from .... import tokens as tks
|
|
9
8
|
|
|
10
9
|
|
|
10
|
+
with lang.auto_proxy_import(globals()):
|
|
11
|
+
import transformers as tfm
|
|
12
|
+
|
|
13
|
+
|
|
11
14
|
##
|
|
12
15
|
|
|
13
16
|
|
|
14
|
-
def build_vocab(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.Vocab:
|
|
17
|
+
def build_vocab(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.Vocab:
|
|
15
18
|
return tks.Vocab([
|
|
16
19
|
(ta.cast(tks.Token, i), tks.TokenStr(s))
|
|
17
20
|
for s, i in tfm_tokenizer.get_vocab().items()
|
|
@@ -32,7 +35,7 @@ SPECIAL_TOKEN_ATTR_MAP: col.BiMap[type[tks.SpecialToken], str] = col.make_bi_map
|
|
|
32
35
|
})
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTokens:
|
|
38
|
+
def build_specials(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.SpecialTokens:
|
|
36
39
|
return tks.SpecialTokens.from_dict({
|
|
37
40
|
st: getattr(tfm_tokenizer, a)
|
|
38
41
|
for st, a in SPECIAL_TOKEN_ATTR_MAP.items()
|
|
@@ -45,7 +48,7 @@ def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTok
|
|
|
45
48
|
class TransformersTokenizer(tks.BaseTokenizer):
|
|
46
49
|
def __init__(
|
|
47
50
|
self,
|
|
48
|
-
tfm_tokenizer: tfm.PreTrainedTokenizerBase,
|
|
51
|
+
tfm_tokenizer: 'tfm.PreTrainedTokenizerBase',
|
|
49
52
|
) -> None:
|
|
50
53
|
self._tfm_tokenizer = check.isinstance(tfm_tokenizer, tfm.PreTrainedTokenizerBase)
|
|
51
54
|
|
|
@@ -55,7 +58,7 @@ class TransformersTokenizer(tks.BaseTokenizer):
|
|
|
55
58
|
)
|
|
56
59
|
|
|
57
60
|
@property
|
|
58
|
-
def tfm_tokenizer(self) -> tfm.PreTrainedTokenizerBase:
|
|
61
|
+
def tfm_tokenizer(self) -> 'tfm.PreTrainedTokenizerBase':
|
|
59
62
|
return self._tfm_tokenizer
|
|
60
63
|
|
|
61
64
|
#
|
|
@@ -72,7 +75,7 @@ class TransformersTokenizer(tks.BaseTokenizer):
|
|
|
72
75
|
self,
|
|
73
76
|
tokens: ta.Iterable[tks.Token],
|
|
74
77
|
) -> str:
|
|
75
|
-
return self._tfm_tokenizer.decode(tokens)
|
|
78
|
+
return self._tfm_tokenizer.decode(tokens) # type: ignore[arg-type]
|
|
76
79
|
|
|
77
80
|
|
|
78
81
|
##
|
|
@@ -4,32 +4,57 @@ TODO:
|
|
|
4
4
|
- https://huggingface.co/blog/aifeifei798/transformers-streaming-output
|
|
5
5
|
"""
|
|
6
6
|
import sys
|
|
7
|
+
import threading
|
|
7
8
|
import typing as ta
|
|
8
9
|
|
|
9
|
-
import transformers as tfm
|
|
10
|
-
|
|
11
10
|
from omlish import check
|
|
12
11
|
from omlish import lang
|
|
13
12
|
from omlish import typedvalues as tv
|
|
13
|
+
from omlish.asyncs.asyncio.sync import AsyncioBufferRelay
|
|
14
14
|
|
|
15
15
|
from ....chat.choices.services import ChatChoicesRequest
|
|
16
16
|
from ....chat.choices.services import ChatChoicesResponse
|
|
17
17
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
18
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
19
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
20
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
21
|
+
from ....chat.choices.stream.types import AiChoiceDeltas # noqa
|
|
22
|
+
from ....chat.choices.stream.types import AiChoicesDeltas # noqa
|
|
18
23
|
from ....chat.choices.types import AiChoice
|
|
24
|
+
from ....chat.choices.types import ChatChoicesOutputs
|
|
19
25
|
from ....chat.messages import AiMessage
|
|
20
26
|
from ....chat.messages import Message
|
|
21
27
|
from ....chat.messages import SystemMessage
|
|
22
28
|
from ....chat.messages import ToolUseMessage
|
|
23
29
|
from ....chat.messages import ToolUseResultMessage
|
|
24
30
|
from ....chat.messages import UserMessage
|
|
31
|
+
from ....chat.stream.types import ContentAiDelta # noqa
|
|
25
32
|
from ....completion import CompletionRequest
|
|
26
33
|
from ....completion import CompletionResponse
|
|
27
34
|
from ....completion import static_check_is_completion_service
|
|
28
35
|
from ....configs import Config
|
|
29
36
|
from ....models.configs import ModelPath
|
|
37
|
+
from ....resources import UseResources
|
|
38
|
+
from ....stream.services import StreamResponseSink
|
|
39
|
+
from ....stream.services import new_stream_response
|
|
30
40
|
from ...impls.huggingface.configs import HuggingfaceHubToken
|
|
31
41
|
|
|
32
42
|
|
|
43
|
+
with lang.auto_proxy_import(globals()):
|
|
44
|
+
import transformers as tfm
|
|
45
|
+
|
|
46
|
+
from .....backends import transformers as tfm_u
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
##
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
53
|
+
# ['ChatChoicesService', 'ChatChoicesStreamService'],
|
|
54
|
+
# 'transformers',
|
|
55
|
+
# )
|
|
56
|
+
|
|
57
|
+
|
|
33
58
|
##
|
|
34
59
|
|
|
35
60
|
|
|
@@ -128,13 +153,10 @@ def build_chat_message(m: Message) -> ta.Mapping[str, ta.Any]:
|
|
|
128
153
|
raise TypeError(m)
|
|
129
154
|
|
|
130
155
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
# )
|
|
136
|
-
@static_check_is_chat_choices_service
|
|
137
|
-
class TransformersChatChoicesService(lang.ExitStacked):
|
|
156
|
+
##
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class BaseTransformersChatChoicesService(lang.ExitStacked):
|
|
138
160
|
DEFAULT_MODEL: ta.ClassVar[str] = (
|
|
139
161
|
'meta-llama/Llama-3.2-1B-Instruct'
|
|
140
162
|
)
|
|
@@ -148,7 +170,7 @@ class TransformersChatChoicesService(lang.ExitStacked):
|
|
|
148
170
|
self._huggingface_hub_token = HuggingfaceHubToken.pop_secret(cc, env='HUGGINGFACE_HUB_TOKEN')
|
|
149
171
|
|
|
150
172
|
@lang.cached_function(transient=True)
|
|
151
|
-
def _load_pipeline(self) -> tfm.Pipeline:
|
|
173
|
+
def _load_pipeline(self) -> 'tfm.Pipeline':
|
|
152
174
|
# FIXME: unload
|
|
153
175
|
check.not_none(self._exit_stack)
|
|
154
176
|
|
|
@@ -161,21 +183,118 @@ class TransformersChatChoicesService(lang.ExitStacked):
|
|
|
161
183
|
for pkw_cfg in self._pipeline_kwargs:
|
|
162
184
|
pkw.update(pkw_cfg.v)
|
|
163
185
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
)
|
|
186
|
+
with tfm_u.file_cache_patch_context(
|
|
187
|
+
local_first=True,
|
|
188
|
+
local_config_present_is_authoritative=True,
|
|
189
|
+
):
|
|
190
|
+
return tfm.pipeline(
|
|
191
|
+
'text-generation',
|
|
192
|
+
**pkw,
|
|
193
|
+
)
|
|
168
194
|
|
|
195
|
+
|
|
196
|
+
##
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
200
|
+
# name='transformers',
|
|
201
|
+
# aliases=['tfm'],
|
|
202
|
+
# type='ChatChoicesService',
|
|
203
|
+
# )
|
|
204
|
+
@static_check_is_chat_choices_service
|
|
205
|
+
class TransformersChatChoicesService(BaseTransformersChatChoicesService):
|
|
169
206
|
async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
170
207
|
check.empty(request.options)
|
|
171
208
|
|
|
172
209
|
pipeline = self._load_pipeline()
|
|
173
210
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
211
|
+
inputs = [
|
|
212
|
+
build_chat_message(m)
|
|
213
|
+
for m in request.v
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
outputs = pipeline(inputs)
|
|
217
|
+
|
|
218
|
+
gts = check.single(outputs)['generated_text']
|
|
219
|
+
ugt, agt = gts
|
|
220
|
+
check.state(ugt['role'] == 'user')
|
|
221
|
+
check.state(agt['role'] == 'assistant')
|
|
222
|
+
|
|
223
|
+
return ChatChoicesResponse([AiChoice([AiMessage(agt['content'])])])
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
##
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
230
|
+
# name='transformers',
|
|
231
|
+
# type='ChatChoicesStreamService',
|
|
232
|
+
# )
|
|
233
|
+
@static_check_is_chat_choices_stream_service
|
|
234
|
+
class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
|
|
235
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
236
|
+
check.empty(request.options)
|
|
237
|
+
|
|
238
|
+
pipeline = self._load_pipeline() # noqa
|
|
239
|
+
|
|
240
|
+
inputs = [ # noqa
|
|
241
|
+
build_chat_message(m)
|
|
242
|
+
for m in request.v
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
relay: AsyncioBufferRelay = AsyncioBufferRelay()
|
|
246
|
+
|
|
247
|
+
def streamer_callback(text: str, *, stream_end: bool) -> None:
|
|
248
|
+
if text or stream_end:
|
|
249
|
+
relay.push(text, *([None] if stream_end else []))
|
|
250
|
+
|
|
251
|
+
streamer = tfm_u.CancellableTextStreamer(
|
|
252
|
+
check.not_none(pipeline.tokenizer), # type: ignore[arg-type]
|
|
253
|
+
streamer_callback, # noqa
|
|
254
|
+
skip_prompt=True,
|
|
255
|
+
skip_special_tokens=True,
|
|
179
256
|
)
|
|
180
257
|
|
|
181
|
-
|
|
258
|
+
async with UseResources.or_new(request.options) as rs:
|
|
259
|
+
thread = threading.Thread(
|
|
260
|
+
target=tfm_u.CancellableTextStreamer.ignoring_cancelled(pipeline),
|
|
261
|
+
args=(
|
|
262
|
+
inputs,
|
|
263
|
+
),
|
|
264
|
+
kwargs=dict(
|
|
265
|
+
streamer=streamer,
|
|
266
|
+
),
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def stop_thread() -> None:
|
|
270
|
+
streamer.cancel()
|
|
271
|
+
# thread.join()
|
|
272
|
+
|
|
273
|
+
rs.enter_context(lang.defer(stop_thread))
|
|
274
|
+
|
|
275
|
+
thread.start()
|
|
276
|
+
|
|
277
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
|
|
278
|
+
while True:
|
|
279
|
+
await relay.wait()
|
|
280
|
+
got = relay.swap()
|
|
281
|
+
|
|
282
|
+
if not got:
|
|
283
|
+
raise RuntimeError
|
|
284
|
+
|
|
285
|
+
if got[-1] is None:
|
|
286
|
+
out = ''.join(got[:-1])
|
|
287
|
+
end = True
|
|
288
|
+
else:
|
|
289
|
+
out = ''.join(got)
|
|
290
|
+
end = False
|
|
291
|
+
|
|
292
|
+
if out:
|
|
293
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiDelta(out)])]))
|
|
294
|
+
|
|
295
|
+
if end:
|
|
296
|
+
break
|
|
297
|
+
|
|
298
|
+
return []
|
|
299
|
+
|
|
300
|
+
return await new_stream_response(rs, inner)
|
|
@@ -108,7 +108,10 @@ class ManifestBackendStringResolver(BackendStringResolver):
|
|
|
108
108
|
|
|
109
109
|
mn: str | None = mdl.name
|
|
110
110
|
|
|
111
|
-
if
|
|
111
|
+
if args.parsed.backend == m.backend_name and mn is not None:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
elif mn == m.backend_name:
|
|
112
115
|
if m.model_names is not None:
|
|
113
116
|
mn = m.model_names.resolved_default
|
|
114
117
|
else:
|
|
File without changes
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from omlish import dataclasses as dc
|
|
2
|
+
|
|
3
|
+
from ....services import Response
|
|
4
|
+
from ..services import ChatChoicesRequest
|
|
5
|
+
from ..services import static_check_is_chat_choices_service
|
|
6
|
+
from ..types import AiChoice
|
|
7
|
+
from ..types import AiChoices
|
|
8
|
+
from .joining import AiChoicesDeltaJoiner
|
|
9
|
+
from .services import ChatChoicesOutputs
|
|
10
|
+
from .services import ChatChoicesStreamOutputs
|
|
11
|
+
from .services import ChatChoicesStreamService
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
##
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@static_check_is_chat_choices_service
|
|
18
|
+
@dc.dataclass(frozen=True)
|
|
19
|
+
class ChatChoicesStreamServiceChatChoicesService:
|
|
20
|
+
service: ChatChoicesStreamService
|
|
21
|
+
|
|
22
|
+
async def invoke(self, request: ChatChoicesRequest) -> Response[
|
|
23
|
+
AiChoices,
|
|
24
|
+
ChatChoicesOutputs | ChatChoicesStreamOutputs,
|
|
25
|
+
]:
|
|
26
|
+
joiner = AiChoicesDeltaJoiner()
|
|
27
|
+
|
|
28
|
+
async with (resp := await self.service.invoke(request)).v as it: # noqa
|
|
29
|
+
async for cs in it:
|
|
30
|
+
joiner.add(cs.choices)
|
|
31
|
+
|
|
32
|
+
# check.state(resp_v.is_done)
|
|
33
|
+
|
|
34
|
+
# FIXME: outputs lol
|
|
35
|
+
return Response([AiChoice(ms) for ms in joiner.build()])
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import typing as ta
|
|
2
|
+
|
|
3
|
+
from omlish import check
|
|
4
|
+
|
|
5
|
+
from ...messages import AiChat
|
|
6
|
+
from ...stream.joining import AiDeltaJoiner
|
|
7
|
+
from .types import AiChoiceDeltas
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AiChoicesDeltaJoiner:
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
super().__init__()
|
|
16
|
+
|
|
17
|
+
self._seq = 0
|
|
18
|
+
self._channels: list[AiDeltaJoiner] = []
|
|
19
|
+
|
|
20
|
+
def add(self, choices: ta.Sequence[AiChoiceDeltas]) -> None:
|
|
21
|
+
if not self._seq:
|
|
22
|
+
check.empty(self._channels)
|
|
23
|
+
self._channels.extend(AiDeltaJoiner() for _ in range(len(choices)))
|
|
24
|
+
|
|
25
|
+
for chan, c in zip(self._channels, choices, strict=True):
|
|
26
|
+
chan.add(c.deltas)
|
|
27
|
+
|
|
28
|
+
self._seq += 1
|
|
29
|
+
|
|
30
|
+
def build(self) -> list[AiChat]:
|
|
31
|
+
return [list(chan.build()) for chan in self._channels]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import typing as ta
|
|
3
|
+
|
|
4
|
+
from omlish import lang
|
|
5
|
+
|
|
6
|
+
from ....registries.globals import register_type
|
|
7
|
+
from ....services import Request
|
|
8
|
+
from ....services import Service
|
|
9
|
+
from ....stream.services import StreamResponse
|
|
10
|
+
from ...messages import Chat
|
|
11
|
+
from ..types import ChatChoicesOutputs
|
|
12
|
+
from .types import AiChoicesDeltas
|
|
13
|
+
from .types import ChatChoicesStreamOptions
|
|
14
|
+
from .types import ChatChoicesStreamOutputs
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
##
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
ChatChoicesStreamRequest: ta.TypeAlias = Request[Chat, ChatChoicesStreamOptions]
|
|
21
|
+
|
|
22
|
+
ChatChoicesStreamResponse: ta.TypeAlias = StreamResponse[
|
|
23
|
+
AiChoicesDeltas,
|
|
24
|
+
ChatChoicesOutputs,
|
|
25
|
+
ChatChoicesStreamOutputs,
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryTypeManifest
|
|
29
|
+
ChatChoicesStreamService: ta.TypeAlias = Service[ChatChoicesStreamRequest, ChatChoicesStreamResponse]
|
|
30
|
+
|
|
31
|
+
register_type(ChatChoicesStreamService, module=__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def static_check_is_chat_choices_stream_service[T: ChatChoicesStreamService](t: type[T]) -> type[T]:
|
|
35
|
+
return t
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
##
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@static_check_is_chat_choices_stream_service
|
|
42
|
+
class AbstractChatChoicesStreamService(lang.Abstract):
|
|
43
|
+
@abc.abstractmethod
|
|
44
|
+
def invoke(self, request: ChatChoicesStreamRequest) -> ta.Awaitable[ChatChoicesStreamResponse]:
|
|
45
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import typing as ta
|
|
2
|
+
|
|
3
|
+
from omlish import dataclasses as dc
|
|
4
|
+
from omlish import lang
|
|
5
|
+
|
|
6
|
+
from ....stream.services import StreamOptions
|
|
7
|
+
from ....types import Option
|
|
8
|
+
from ....types import Output
|
|
9
|
+
from ...stream.types import AiDeltas
|
|
10
|
+
from ..types import ChatChoicesOptions
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
##
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ChatChoicesStreamOption(Option, lang.Abstract, lang.PackageSealed):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
ChatChoicesStreamOptions: ta.TypeAlias = ChatChoicesStreamOption | StreamOptions | ChatChoicesOptions
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
##
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ChatChoicesStreamOutput(Output, lang.Abstract, lang.PackageSealed):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
ChatChoicesStreamOutputs: ta.TypeAlias = ChatChoicesStreamOutput
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
##
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dc.dataclass(frozen=True)
|
|
37
|
+
class AiChoiceDeltas(lang.Final):
|
|
38
|
+
deltas: AiDeltas
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dc.dataclass(frozen=True)
|
|
42
|
+
class AiChoicesDeltas(lang.Final):
|
|
43
|
+
choices: ta.Sequence[AiChoiceDeltas]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from omlish import lang
|
|
2
2
|
from omlish import marshal as msh
|
|
3
3
|
|
|
4
|
-
from .types import
|
|
4
|
+
from .types import AiDelta
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
##
|
|
@@ -9,8 +9,8 @@ from .types import AiChoiceDelta
|
|
|
9
9
|
|
|
10
10
|
@lang.static_init
|
|
11
11
|
def _install_standard_marshaling() -> None:
|
|
12
|
-
|
|
12
|
+
ad_poly = msh.polymorphism_from_subclasses(AiDelta, naming=msh.Naming.SNAKE)
|
|
13
13
|
msh.install_standard_factories(
|
|
14
|
-
msh.PolymorphismMarshalerFactory(
|
|
15
|
-
msh.PolymorphismUnmarshalerFactory(
|
|
14
|
+
msh.PolymorphismMarshalerFactory(ad_poly),
|
|
15
|
+
msh.PolymorphismUnmarshalerFactory(ad_poly),
|
|
16
16
|
)
|