ommlds 0.0.0.dev440__py3-none-any.whl → 0.0.0.dev480__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ommlds/.omlish-manifests.json +332 -35
- ommlds/__about__.py +15 -9
- ommlds/_hacks/__init__.py +4 -0
- ommlds/_hacks/funcs.py +110 -0
- ommlds/_hacks/names.py +158 -0
- ommlds/_hacks/params.py +73 -0
- ommlds/_hacks/patches.py +0 -3
- ommlds/backends/anthropic/protocol/_marshal.py +2 -2
- ommlds/backends/anthropic/protocol/sse/_marshal.py +1 -1
- ommlds/backends/anthropic/protocol/sse/assemble.py +23 -7
- ommlds/backends/anthropic/protocol/sse/events.py +13 -0
- ommlds/backends/anthropic/protocol/types.py +30 -9
- ommlds/backends/google/protocol/__init__.py +3 -0
- ommlds/backends/google/protocol/_marshal.py +16 -0
- ommlds/backends/google/protocol/types.py +626 -0
- ommlds/backends/groq/_marshal.py +23 -0
- ommlds/backends/groq/protocol.py +249 -0
- ommlds/backends/mlx/generation.py +1 -1
- ommlds/backends/mlx/loading.py +58 -1
- ommlds/backends/ollama/__init__.py +0 -0
- ommlds/backends/ollama/protocol.py +170 -0
- ommlds/backends/openai/protocol/__init__.py +9 -28
- ommlds/backends/openai/protocol/_common.py +18 -0
- ommlds/backends/openai/protocol/_marshal.py +27 -0
- ommlds/backends/openai/protocol/chatcompletion/chunk.py +58 -31
- ommlds/backends/openai/protocol/chatcompletion/contentpart.py +49 -44
- ommlds/backends/openai/protocol/chatcompletion/message.py +55 -43
- ommlds/backends/openai/protocol/chatcompletion/request.py +114 -66
- ommlds/backends/openai/protocol/chatcompletion/response.py +71 -45
- ommlds/backends/openai/protocol/chatcompletion/responseformat.py +27 -20
- ommlds/backends/openai/protocol/chatcompletion/tokenlogprob.py +16 -7
- ommlds/backends/openai/protocol/completionusage.py +24 -15
- ommlds/backends/tavily/__init__.py +0 -0
- ommlds/backends/tavily/protocol.py +301 -0
- ommlds/backends/tinygrad/models/llama3/__init__.py +22 -14
- ommlds/backends/transformers/__init__.py +0 -0
- ommlds/backends/transformers/filecache.py +109 -0
- ommlds/backends/transformers/streamers.py +73 -0
- ommlds/cli/asyncs.py +30 -0
- ommlds/cli/backends/catalog.py +93 -0
- ommlds/cli/backends/configs.py +9 -0
- ommlds/cli/backends/inject.py +31 -36
- ommlds/cli/backends/injection.py +16 -0
- ommlds/cli/backends/types.py +46 -0
- ommlds/cli/content/__init__.py +0 -0
- ommlds/cli/content/messages.py +34 -0
- ommlds/cli/content/strings.py +42 -0
- ommlds/cli/inject.py +15 -32
- ommlds/cli/inputs/__init__.py +0 -0
- ommlds/cli/inputs/asyncs.py +32 -0
- ommlds/cli/inputs/sync.py +75 -0
- ommlds/cli/main.py +270 -110
- ommlds/cli/rendering/__init__.py +0 -0
- ommlds/cli/rendering/configs.py +9 -0
- ommlds/cli/rendering/inject.py +31 -0
- ommlds/cli/rendering/markdown.py +52 -0
- ommlds/cli/rendering/raw.py +73 -0
- ommlds/cli/rendering/types.py +21 -0
- ommlds/cli/secrets.py +21 -0
- ommlds/cli/sessions/base.py +1 -1
- ommlds/cli/sessions/chat/chat/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/ai/inject.py +74 -0
- ommlds/cli/sessions/chat/chat/ai/injection.py +14 -0
- ommlds/cli/sessions/chat/chat/ai/rendering.py +70 -0
- ommlds/cli/sessions/chat/chat/ai/services.py +79 -0
- ommlds/cli/sessions/chat/chat/ai/tools.py +44 -0
- ommlds/cli/sessions/chat/chat/ai/types.py +28 -0
- ommlds/cli/sessions/chat/chat/state/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/state/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/state/inject.py +36 -0
- ommlds/cli/sessions/chat/chat/state/inmemory.py +33 -0
- ommlds/cli/sessions/chat/chat/state/storage.py +52 -0
- ommlds/cli/sessions/chat/chat/state/types.py +38 -0
- ommlds/cli/sessions/chat/chat/user/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/user/configs.py +17 -0
- ommlds/cli/sessions/chat/chat/user/inject.py +62 -0
- ommlds/cli/sessions/chat/chat/user/interactive.py +31 -0
- ommlds/cli/sessions/chat/chat/user/oneshot.py +25 -0
- ommlds/cli/sessions/chat/chat/user/types.py +15 -0
- ommlds/cli/sessions/chat/configs.py +27 -0
- ommlds/cli/sessions/chat/driver.py +43 -0
- ommlds/cli/sessions/chat/inject.py +33 -65
- ommlds/cli/sessions/chat/phases/__init__.py +0 -0
- ommlds/cli/sessions/chat/phases/inject.py +27 -0
- ommlds/cli/sessions/chat/phases/injection.py +14 -0
- ommlds/cli/sessions/chat/phases/manager.py +29 -0
- ommlds/cli/sessions/chat/phases/types.py +29 -0
- ommlds/cli/sessions/chat/session.py +27 -0
- ommlds/cli/sessions/chat/tools/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/configs.py +22 -0
- ommlds/cli/sessions/chat/tools/confirmation.py +46 -0
- ommlds/cli/sessions/chat/tools/execution.py +66 -0
- ommlds/cli/sessions/chat/tools/fs/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/fs/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/fs/inject.py +35 -0
- ommlds/cli/sessions/chat/tools/inject.py +88 -0
- ommlds/cli/sessions/chat/tools/injection.py +44 -0
- ommlds/cli/sessions/chat/tools/rendering.py +58 -0
- ommlds/cli/sessions/chat/tools/todo/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/todo/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/todo/inject.py +31 -0
- ommlds/cli/sessions/chat/tools/weather/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/weather/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/weather/inject.py +22 -0
- ommlds/cli/{tools/weather.py → sessions/chat/tools/weather/tools.py} +1 -1
- ommlds/cli/sessions/completion/configs.py +21 -0
- ommlds/cli/sessions/completion/inject.py +42 -0
- ommlds/cli/sessions/completion/session.py +35 -0
- ommlds/cli/sessions/embedding/configs.py +21 -0
- ommlds/cli/sessions/embedding/inject.py +42 -0
- ommlds/cli/sessions/embedding/session.py +33 -0
- ommlds/cli/sessions/inject.py +28 -11
- ommlds/cli/state/__init__.py +0 -0
- ommlds/cli/state/inject.py +28 -0
- ommlds/cli/{state.py → state/storage.py} +41 -24
- ommlds/minichain/__init__.py +84 -24
- ommlds/minichain/_marshal.py +49 -9
- ommlds/minichain/_typedvalues.py +2 -4
- ommlds/minichain/backends/catalogs/base.py +20 -1
- ommlds/minichain/backends/catalogs/simple.py +2 -2
- ommlds/minichain/backends/catalogs/strings.py +10 -8
- ommlds/minichain/backends/impls/anthropic/chat.py +65 -27
- ommlds/minichain/backends/impls/anthropic/names.py +10 -8
- ommlds/minichain/backends/impls/anthropic/protocol.py +109 -0
- ommlds/minichain/backends/impls/anthropic/stream.py +111 -43
- ommlds/minichain/backends/impls/duckduckgo/search.py +1 -1
- ommlds/minichain/backends/impls/dummy/__init__.py +0 -0
- ommlds/minichain/backends/impls/dummy/chat.py +69 -0
- ommlds/minichain/backends/impls/google/chat.py +114 -22
- ommlds/minichain/backends/impls/google/search.py +7 -2
- ommlds/minichain/backends/impls/google/stream.py +219 -0
- ommlds/minichain/backends/impls/google/tools.py +149 -0
- ommlds/minichain/backends/impls/groq/__init__.py +0 -0
- ommlds/minichain/backends/impls/groq/chat.py +75 -0
- ommlds/minichain/backends/impls/groq/names.py +48 -0
- ommlds/minichain/backends/impls/groq/protocol.py +143 -0
- ommlds/minichain/backends/impls/groq/stream.py +125 -0
- ommlds/minichain/backends/impls/llamacpp/chat.py +33 -18
- ommlds/minichain/backends/impls/llamacpp/completion.py +1 -1
- ommlds/minichain/backends/impls/llamacpp/format.py +4 -2
- ommlds/minichain/backends/impls/llamacpp/stream.py +37 -20
- ommlds/minichain/backends/impls/mistral.py +20 -5
- ommlds/minichain/backends/impls/mlx/chat.py +96 -22
- ommlds/minichain/backends/impls/ollama/__init__.py +0 -0
- ommlds/minichain/backends/impls/ollama/chat.py +199 -0
- ommlds/minichain/backends/impls/openai/chat.py +18 -8
- ommlds/minichain/backends/impls/openai/completion.py +10 -3
- ommlds/minichain/backends/impls/openai/embedding.py +10 -3
- ommlds/minichain/backends/impls/openai/format.py +131 -106
- ommlds/minichain/backends/impls/openai/names.py +31 -5
- ommlds/minichain/backends/impls/openai/stream.py +43 -25
- ommlds/minichain/backends/impls/tavily.py +66 -0
- ommlds/minichain/backends/impls/tinygrad/chat.py +23 -16
- ommlds/minichain/backends/impls/transformers/sentence.py +1 -1
- ommlds/minichain/backends/impls/transformers/tokens.py +1 -1
- ommlds/minichain/backends/impls/transformers/transformers.py +155 -34
- ommlds/minichain/backends/strings/parsing.py +1 -1
- ommlds/minichain/backends/strings/resolving.py +4 -1
- ommlds/minichain/chat/_marshal.py +16 -9
- ommlds/minichain/chat/choices/adapters.py +4 -4
- ommlds/minichain/chat/choices/services.py +1 -1
- ommlds/minichain/chat/choices/stream/__init__.py +0 -0
- ommlds/minichain/chat/choices/stream/adapters.py +35 -0
- ommlds/minichain/chat/choices/stream/joining.py +31 -0
- ommlds/minichain/chat/choices/stream/services.py +45 -0
- ommlds/minichain/chat/choices/stream/types.py +43 -0
- ommlds/minichain/chat/choices/types.py +2 -2
- ommlds/minichain/chat/history.py +3 -3
- ommlds/minichain/chat/messages.py +55 -19
- ommlds/minichain/chat/services.py +3 -3
- ommlds/minichain/chat/stream/_marshal.py +16 -0
- ommlds/minichain/chat/stream/joining.py +85 -0
- ommlds/minichain/chat/stream/services.py +15 -21
- ommlds/minichain/chat/stream/types.py +32 -19
- ommlds/minichain/chat/tools/execution.py +8 -7
- ommlds/minichain/chat/tools/ids.py +9 -15
- ommlds/minichain/chat/tools/parsing.py +17 -26
- ommlds/minichain/chat/transforms/base.py +29 -38
- ommlds/minichain/chat/transforms/metadata.py +30 -4
- ommlds/minichain/chat/transforms/services.py +9 -11
- ommlds/minichain/content/_marshal.py +44 -20
- ommlds/minichain/content/json.py +13 -0
- ommlds/minichain/content/materialize.py +14 -21
- ommlds/minichain/content/prepare.py +4 -0
- ommlds/minichain/content/transforms/interleave.py +1 -1
- ommlds/minichain/content/transforms/squeeze.py +1 -1
- ommlds/minichain/content/transforms/stringify.py +1 -1
- ommlds/minichain/json.py +20 -0
- ommlds/minichain/lib/code/__init__.py +0 -0
- ommlds/minichain/lib/code/prompts.py +6 -0
- ommlds/minichain/lib/fs/binfiles.py +108 -0
- ommlds/minichain/lib/fs/context.py +126 -0
- ommlds/minichain/lib/fs/errors.py +101 -0
- ommlds/minichain/lib/fs/suggestions.py +36 -0
- ommlds/minichain/lib/fs/tools/__init__.py +0 -0
- ommlds/minichain/lib/fs/tools/edit.py +104 -0
- ommlds/minichain/lib/fs/tools/ls.py +38 -0
- ommlds/minichain/lib/fs/tools/read.py +115 -0
- ommlds/minichain/lib/fs/tools/recursivels/__init__.py +0 -0
- ommlds/minichain/lib/fs/tools/recursivels/execution.py +40 -0
- ommlds/minichain/lib/todo/__init__.py +0 -0
- ommlds/minichain/lib/todo/context.py +54 -0
- ommlds/minichain/lib/todo/tools/__init__.py +0 -0
- ommlds/minichain/lib/todo/tools/read.py +44 -0
- ommlds/minichain/lib/todo/tools/write.py +335 -0
- ommlds/minichain/lib/todo/types.py +60 -0
- ommlds/minichain/llms/_marshal.py +25 -17
- ommlds/minichain/llms/types.py +4 -0
- ommlds/minichain/registries/globals.py +18 -4
- ommlds/minichain/resources.py +66 -43
- ommlds/minichain/search.py +1 -1
- ommlds/minichain/services/_marshal.py +46 -39
- ommlds/minichain/services/facades.py +3 -3
- ommlds/minichain/services/services.py +1 -1
- ommlds/minichain/standard.py +8 -0
- ommlds/minichain/stream/services.py +152 -38
- ommlds/minichain/stream/wrap.py +22 -24
- ommlds/minichain/tools/_marshal.py +1 -1
- ommlds/minichain/tools/execution/catalog.py +2 -1
- ommlds/minichain/tools/execution/context.py +34 -14
- ommlds/minichain/tools/execution/errors.py +15 -0
- ommlds/minichain/tools/execution/executors.py +8 -3
- ommlds/minichain/tools/execution/reflect.py +40 -5
- ommlds/minichain/tools/fns.py +46 -9
- ommlds/minichain/tools/jsonschema.py +14 -5
- ommlds/minichain/tools/reflect.py +54 -18
- ommlds/minichain/tools/types.py +33 -1
- ommlds/minichain/utils.py +27 -0
- ommlds/minichain/vectors/_marshal.py +11 -10
- ommlds/nanochat/LICENSE +21 -0
- ommlds/nanochat/__init__.py +0 -0
- ommlds/nanochat/rustbpe/LICENSE +21 -0
- ommlds/nanochat/tokenizers.py +406 -0
- ommlds/server/server.py +3 -3
- ommlds/specs/__init__.py +0 -0
- ommlds/specs/mcp/__init__.py +0 -0
- ommlds/specs/mcp/_marshal.py +23 -0
- ommlds/specs/mcp/protocol.py +266 -0
- ommlds/tools/git.py +27 -10
- ommlds/tools/ocr.py +8 -9
- ommlds/wiki/analyze.py +2 -2
- ommlds/wiki/text/mfh.py +1 -5
- ommlds/wiki/text/wtp.py +1 -3
- ommlds/wiki/utils/xml.py +5 -5
- {ommlds-0.0.0.dev440.dist-info → ommlds-0.0.0.dev480.dist-info}/METADATA +24 -21
- ommlds-0.0.0.dev480.dist-info/RECORD +427 -0
- ommlds/cli/backends/standard.py +0 -20
- ommlds/cli/sessions/chat/base.py +0 -42
- ommlds/cli/sessions/chat/interactive.py +0 -73
- ommlds/cli/sessions/chat/printing.py +0 -96
- ommlds/cli/sessions/chat/prompt.py +0 -143
- ommlds/cli/sessions/chat/state.py +0 -109
- ommlds/cli/sessions/chat/tools.py +0 -91
- ommlds/cli/sessions/completion/completion.py +0 -44
- ommlds/cli/sessions/embedding/embedding.py +0 -42
- ommlds/cli/tools/config.py +0 -13
- ommlds/cli/tools/inject.py +0 -64
- ommlds/minichain/chat/stream/adapters.py +0 -69
- ommlds/minichain/lib/fs/ls/execution.py +0 -32
- ommlds-0.0.0.dev440.dist-info/RECORD +0 -303
- /ommlds/{cli/tools → backends/google}/__init__.py +0 -0
- /ommlds/{minichain/lib/fs/ls → backends/groq}/__init__.py +0 -0
- /ommlds/{huggingface.py → backends/huggingface.py} +0 -0
- /ommlds/minichain/lib/fs/{ls → tools/recursivels}/rendering.py +0 -0
- /ommlds/minichain/lib/fs/{ls → tools/recursivels}/running.py +0 -0
- {ommlds-0.0.0.dev440.dist-info → ommlds-0.0.0.dev480.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev440.dist-info → ommlds-0.0.0.dev480.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev440.dist-info → ommlds-0.0.0.dev480.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev440.dist-info → ommlds-0.0.0.dev480.dist-info}/top_level.txt +0 -0
|
@@ -30,10 +30,12 @@ _GPT_MODEL_NAMES = [
|
|
|
30
30
|
'gpt-5-chat-latest',
|
|
31
31
|
'gpt-5-mini',
|
|
32
32
|
'gpt-5-nano',
|
|
33
|
+
|
|
34
|
+
'gpt-5.1',
|
|
33
35
|
]
|
|
34
36
|
|
|
35
37
|
|
|
36
|
-
|
|
38
|
+
CHAT_MODEL_NAMES = ModelNameCollection(
|
|
37
39
|
default='gpt',
|
|
38
40
|
aliases={
|
|
39
41
|
**{
|
|
@@ -46,8 +48,8 @@ MODEL_NAMES = ModelNameCollection(
|
|
|
46
48
|
for n in _GPT_MODEL_NAMES
|
|
47
49
|
},
|
|
48
50
|
|
|
49
|
-
'gpt': 'gpt-
|
|
50
|
-
'gpt-mini': 'gpt-
|
|
51
|
+
'gpt': 'gpt-5.1',
|
|
52
|
+
'gpt-mini': 'gpt-5-mini',
|
|
51
53
|
|
|
52
54
|
#
|
|
53
55
|
|
|
@@ -61,11 +63,35 @@ MODEL_NAMES = ModelNameCollection(
|
|
|
61
63
|
|
|
62
64
|
|
|
63
65
|
# @omlish-manifest
|
|
64
|
-
|
|
66
|
+
_CHAT_BACKEND_STRINGS_MANIFEST = BackendStringsManifest(
|
|
65
67
|
[
|
|
66
68
|
'ChatChoicesService',
|
|
67
69
|
'ChatChoicesStreamService',
|
|
68
70
|
],
|
|
69
71
|
'openai',
|
|
70
|
-
model_names=
|
|
72
|
+
model_names=CHAT_MODEL_NAMES,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
##
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# @omlish-manifest
|
|
80
|
+
_COMPLETION_BACKEND_STRINGS_MANIFEST = BackendStringsManifest(
|
|
81
|
+
[
|
|
82
|
+
'CompletionService',
|
|
83
|
+
],
|
|
84
|
+
'openai',
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
##
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# @omlish-manifest
|
|
92
|
+
_EMBEDDING_BACKEND_STRINGS_MANIFEST = BackendStringsManifest(
|
|
93
|
+
[
|
|
94
|
+
'EmbeddingService',
|
|
95
|
+
],
|
|
96
|
+
'openai',
|
|
71
97
|
)
|
|
@@ -1,28 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
https://platform.openai.com/docs/api-reference/responses-streaming
|
|
3
|
+
"""
|
|
1
4
|
import typing as ta
|
|
2
5
|
|
|
3
6
|
from omlish import check
|
|
7
|
+
from omlish import marshal as msh
|
|
4
8
|
from omlish import typedvalues as tv
|
|
5
9
|
from omlish.formats import json
|
|
6
10
|
from omlish.http import all as http
|
|
7
11
|
from omlish.http import sse
|
|
8
12
|
from omlish.io.buffers import DelimitingBuffer
|
|
9
13
|
|
|
14
|
+
from .....backends.openai import protocol as pt
|
|
10
15
|
from ....chat.choices.services import ChatChoicesOutputs
|
|
11
|
-
from ....chat.stream.services import ChatChoicesStreamRequest
|
|
12
|
-
from ....chat.stream.services import ChatChoicesStreamResponse
|
|
13
|
-
from ....chat.stream.services import static_check_is_chat_choices_stream_service
|
|
14
|
-
from ....chat.stream.types import
|
|
15
|
-
from ....chat.stream.types import
|
|
16
|
-
from ....chat.stream.types import ChatChoicesStreamOption
|
|
16
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
17
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
18
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
19
|
+
from ....chat.choices.stream.types import AiChoiceDeltas
|
|
20
|
+
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
21
|
+
from ....chat.choices.stream.types import ChatChoicesStreamOption
|
|
17
22
|
from ....configs import Config
|
|
18
23
|
from ....resources import ResourcesOption
|
|
19
24
|
from ....resources import UseResources
|
|
20
25
|
from ....standard import ApiKey
|
|
21
26
|
from ....stream.services import StreamOption
|
|
27
|
+
from ....stream.services import StreamResponseSink
|
|
22
28
|
from ....stream.services import new_stream_response
|
|
23
29
|
from .chat import OpenaiChatChoicesService
|
|
24
30
|
from .format import OpenaiChatRequestHandler
|
|
25
|
-
from .
|
|
31
|
+
from .format import build_mc_ai_delta
|
|
32
|
+
from .names import CHAT_MODEL_NAMES
|
|
26
33
|
|
|
27
34
|
|
|
28
35
|
##
|
|
@@ -34,16 +41,22 @@ from .names import MODEL_NAMES
|
|
|
34
41
|
# )
|
|
35
42
|
@static_check_is_chat_choices_stream_service
|
|
36
43
|
class OpenaiChatChoicesStreamService:
|
|
37
|
-
def __init__(
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
*configs: Config,
|
|
47
|
+
http_client: http.AsyncHttpClient | None = None,
|
|
48
|
+
) -> None:
|
|
38
49
|
super().__init__()
|
|
39
50
|
|
|
51
|
+
self._http_client = http_client
|
|
52
|
+
|
|
40
53
|
with tv.consume(*configs) as cc:
|
|
41
54
|
self._model_name = cc.pop(OpenaiChatChoicesService.DEFAULT_MODEL_NAME)
|
|
42
55
|
self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
|
|
43
56
|
|
|
44
|
-
READ_CHUNK_SIZE =
|
|
57
|
+
READ_CHUNK_SIZE: ta.ClassVar[int] = -1
|
|
45
58
|
|
|
46
|
-
def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
59
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
47
60
|
# check.isinstance(request, ChatRequest)
|
|
48
61
|
|
|
49
62
|
rh = OpenaiChatRequestHandler(
|
|
@@ -53,16 +66,16 @@ class OpenaiChatChoicesStreamService:
|
|
|
53
66
|
for o in request.options
|
|
54
67
|
if not isinstance(o, (ChatChoicesStreamOption, StreamOption, ResourcesOption))
|
|
55
68
|
],
|
|
56
|
-
model=
|
|
69
|
+
model=CHAT_MODEL_NAMES.resolve(self._model_name.v),
|
|
57
70
|
mandatory_kwargs=dict(
|
|
58
71
|
stream=True,
|
|
59
|
-
stream_options=
|
|
72
|
+
stream_options=pt.ChatCompletionRequest.StreamOptions(
|
|
60
73
|
include_usage=True,
|
|
61
74
|
),
|
|
62
75
|
),
|
|
63
76
|
)
|
|
64
77
|
|
|
65
|
-
raw_request = rh.
|
|
78
|
+
raw_request = msh.marshal(rh.oai_request())
|
|
66
79
|
|
|
67
80
|
http_request = http.HttpRequest(
|
|
68
81
|
'https://api.openai.com/v1/chat/completions',
|
|
@@ -73,16 +86,15 @@ class OpenaiChatChoicesStreamService:
|
|
|
73
86
|
data=json.dumps(raw_request).encode('utf-8'),
|
|
74
87
|
)
|
|
75
88
|
|
|
76
|
-
with UseResources.or_new(request.options) as rs:
|
|
77
|
-
http_client = rs.
|
|
78
|
-
http_response = rs.
|
|
89
|
+
async with UseResources.or_new(request.options) as rs:
|
|
90
|
+
http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
|
|
91
|
+
http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
|
|
79
92
|
|
|
80
|
-
def
|
|
93
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
|
|
81
94
|
db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
|
|
82
95
|
sd = sse.SseDecoder()
|
|
83
96
|
while True:
|
|
84
|
-
|
|
85
|
-
b = http_response.stream.read1(self.READ_CHUNK_SIZE) # type: ignore[attr-defined]
|
|
97
|
+
b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
|
|
86
98
|
for l in db.feed(b):
|
|
87
99
|
if isinstance(l, DelimitingBuffer.Incomplete):
|
|
88
100
|
# FIXME: handle
|
|
@@ -99,14 +111,20 @@ class OpenaiChatChoicesStreamService:
|
|
|
99
111
|
|
|
100
112
|
check.state(sj['object'] == 'chat.completion.chunk')
|
|
101
113
|
|
|
114
|
+
ccc = msh.unmarshal(sj, pt.ChatCompletionChunk)
|
|
115
|
+
|
|
102
116
|
# FIXME: stop reason
|
|
103
|
-
if not
|
|
117
|
+
if not ccc.choices:
|
|
104
118
|
continue
|
|
105
119
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
120
|
+
if any(choice.finish_reason for choice in ccc.choices):
|
|
121
|
+
check.state(all(choice.finish_reason for choice in ccc.choices))
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
await sink.emit(AiChoicesDeltas([
|
|
125
|
+
AiChoiceDeltas([build_mc_ai_delta(choice.delta)])
|
|
126
|
+
for choice in ccc.choices
|
|
127
|
+
]))
|
|
110
128
|
|
|
111
129
|
if not b:
|
|
112
130
|
return []
|
|
@@ -114,4 +132,4 @@ class OpenaiChatChoicesStreamService:
|
|
|
114
132
|
# raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))
|
|
115
133
|
# return rh.build_response(raw_response)
|
|
116
134
|
|
|
117
|
-
return new_stream_response(rs,
|
|
135
|
+
return await new_stream_response(rs, inner)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from omlish import check
|
|
2
|
+
from omlish import marshal as msh
|
|
3
|
+
from omlish import typedvalues as tv
|
|
4
|
+
from omlish.formats import json
|
|
5
|
+
from omlish.http import all as http
|
|
6
|
+
|
|
7
|
+
from ....backends.tavily import protocol as pt
|
|
8
|
+
from ...search import SearchHit
|
|
9
|
+
from ...search import SearchHits
|
|
10
|
+
from ...search import SearchRequest
|
|
11
|
+
from ...search import SearchResponse
|
|
12
|
+
from ...search import static_check_is_search_service
|
|
13
|
+
from ...standard import ApiKey
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
##
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
20
|
+
# name='tavily',
|
|
21
|
+
# type='SearchService',
|
|
22
|
+
# )
|
|
23
|
+
@static_check_is_search_service
|
|
24
|
+
class TavilySearchService:
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
*configs: ApiKey,
|
|
28
|
+
http_client: http.AsyncHttpClient | None = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
super().__init__()
|
|
31
|
+
|
|
32
|
+
self._http_client = http_client
|
|
33
|
+
|
|
34
|
+
with tv.consume(*configs) as cc:
|
|
35
|
+
self._api_key = ApiKey.pop_secret(cc, env='TAVILY_API_KEY')
|
|
36
|
+
|
|
37
|
+
async def invoke(self, request: SearchRequest) -> SearchResponse:
|
|
38
|
+
pt_request = pt.SearchRequest(
|
|
39
|
+
query=request.v,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
raw_request = msh.marshal(pt_request)
|
|
43
|
+
|
|
44
|
+
http_response = await http.async_request(
|
|
45
|
+
'https://api.tavily.com/search',
|
|
46
|
+
headers={
|
|
47
|
+
http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
|
|
48
|
+
http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
|
|
49
|
+
},
|
|
50
|
+
data=json.dumps(raw_request).encode('utf-8'),
|
|
51
|
+
client=self._http_client,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))
|
|
55
|
+
|
|
56
|
+
pt_response = msh.unmarshal(raw_response, pt.SearchResponse)
|
|
57
|
+
|
|
58
|
+
return SearchResponse(SearchHits(
|
|
59
|
+
l=[
|
|
60
|
+
SearchHit(
|
|
61
|
+
title=r.title,
|
|
62
|
+
url=r.url,
|
|
63
|
+
)
|
|
64
|
+
for r in pt_response.results or []
|
|
65
|
+
],
|
|
66
|
+
))
|
|
@@ -9,20 +9,22 @@ from ....chat.choices.services import ChatChoicesOptions
|
|
|
9
9
|
from ....chat.choices.services import ChatChoicesRequest
|
|
10
10
|
from ....chat.choices.services import ChatChoicesResponse
|
|
11
11
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
12
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
13
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
14
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
15
|
+
from ....chat.choices.stream.types import AiChoiceDeltas
|
|
16
|
+
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
12
17
|
from ....chat.choices.types import AiChoice
|
|
18
|
+
from ....chat.choices.types import ChatChoicesOutputs
|
|
13
19
|
from ....chat.messages import AiMessage
|
|
14
20
|
from ....chat.messages import Chat
|
|
15
21
|
from ....chat.messages import SystemMessage
|
|
16
22
|
from ....chat.messages import UserMessage
|
|
17
|
-
from ....chat.stream.
|
|
18
|
-
from ....chat.stream.services import ChatChoicesStreamRequest
|
|
19
|
-
from ....chat.stream.services import ChatChoicesStreamResponse
|
|
20
|
-
from ....chat.stream.services import static_check_is_chat_choices_stream_service
|
|
21
|
-
from ....chat.stream.types import AiChoiceDelta
|
|
22
|
-
from ....chat.stream.types import AiMessageDelta
|
|
23
|
+
from ....chat.stream.types import ContentAiDelta
|
|
23
24
|
from ....chat.types import ChatOption
|
|
24
25
|
from ....llms.types import LlmOption
|
|
25
26
|
from ....resources import UseResources
|
|
27
|
+
from ....stream.services import StreamResponseSink
|
|
26
28
|
from ....stream.services import new_stream_response
|
|
27
29
|
|
|
28
30
|
|
|
@@ -111,12 +113,12 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
|
|
|
111
113
|
|
|
112
114
|
|
|
113
115
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
114
|
-
# name='
|
|
116
|
+
# name='tinygrad-llama3',
|
|
115
117
|
# type='ChatChoicesService',
|
|
116
118
|
# )
|
|
117
119
|
@static_check_is_chat_choices_service
|
|
118
120
|
class TinygradLlama3ChatChoicesService(BaseTinygradLlama3ChatService):
|
|
119
|
-
def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
121
|
+
async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
120
122
|
llm = self._load_model()
|
|
121
123
|
toks = _prepare_toks(llm, request.v, request.options)
|
|
122
124
|
|
|
@@ -124,20 +126,20 @@ class TinygradLlama3ChatChoicesService(BaseTinygradLlama3ChatService):
|
|
|
124
126
|
for s in tgl3.run_llm(llm, toks):
|
|
125
127
|
out.append(s)
|
|
126
128
|
|
|
127
|
-
return ChatChoicesResponse([AiChoice(AiMessage(''.join(out)))])
|
|
129
|
+
return ChatChoicesResponse([AiChoice([AiMessage(''.join(out))])])
|
|
128
130
|
|
|
129
131
|
|
|
130
132
|
##
|
|
131
133
|
|
|
132
134
|
|
|
133
135
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
134
|
-
# name='
|
|
136
|
+
# name='tinygrad-llama3',
|
|
135
137
|
# type='ChatChoicesStreamService',
|
|
136
138
|
# )
|
|
137
139
|
@static_check_is_chat_choices_stream_service
|
|
138
140
|
class TinygradLlama3ChatChoicesStreamService(BaseTinygradLlama3ChatService):
|
|
139
|
-
def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
140
|
-
with UseResources.or_new(request.options) as rs:
|
|
141
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
142
|
+
async with UseResources.or_new(request.options) as rs:
|
|
141
143
|
llm = self._load_model()
|
|
142
144
|
toks = _prepare_toks(
|
|
143
145
|
llm,
|
|
@@ -145,12 +147,17 @@ class TinygradLlama3ChatChoicesStreamService(BaseTinygradLlama3ChatService):
|
|
|
145
147
|
request.options.get_any((ChatOption, LlmOption)), # FIXME # noqa
|
|
146
148
|
)
|
|
147
149
|
|
|
148
|
-
def
|
|
150
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
|
|
149
151
|
for s in tgl3.run_llm(llm, toks):
|
|
150
|
-
|
|
152
|
+
await sink.emit(AiChoicesDeltas([
|
|
153
|
+
AiChoiceDeltas([
|
|
154
|
+
ContentAiDelta(s),
|
|
155
|
+
]),
|
|
156
|
+
]))
|
|
157
|
+
|
|
151
158
|
return []
|
|
152
159
|
|
|
153
|
-
return new_stream_response(rs,
|
|
160
|
+
return await new_stream_response(rs, inner)
|
|
154
161
|
|
|
155
162
|
|
|
156
163
|
##
|
|
@@ -161,5 +168,5 @@ class TinygradLlama3ChatChoicesStreamService(BaseTinygradLlama3ChatService):
|
|
|
161
168
|
# 'ChatChoicesService',
|
|
162
169
|
# 'ChatChoicesStreamService',
|
|
163
170
|
# ],
|
|
164
|
-
# '
|
|
171
|
+
# 'tinygrad-llama3',
|
|
165
172
|
# )
|
|
@@ -33,7 +33,7 @@ class SentenceTransformersEmbeddingService:
|
|
|
33
33
|
with tv.consume(*configs) as cc:
|
|
34
34
|
self._model_path = cc.pop(ModelPath(self.DEFAULT_MODEL))
|
|
35
35
|
|
|
36
|
-
def invoke(self, request: EmbeddingRequest) -> EmbeddingResponse:
|
|
36
|
+
async def invoke(self, request: EmbeddingRequest) -> EmbeddingResponse:
|
|
37
37
|
mdl = stfm.SentenceTransformer(
|
|
38
38
|
self._model_path.v,
|
|
39
39
|
)
|
|
@@ -4,6 +4,7 @@ TODO:
|
|
|
4
4
|
- https://huggingface.co/blog/aifeifei798/transformers-streaming-output
|
|
5
5
|
"""
|
|
6
6
|
import sys
|
|
7
|
+
import threading
|
|
7
8
|
import typing as ta
|
|
8
9
|
|
|
9
10
|
import transformers as tfm
|
|
@@ -11,26 +12,50 @@ import transformers as tfm
|
|
|
11
12
|
from omlish import check
|
|
12
13
|
from omlish import lang
|
|
13
14
|
from omlish import typedvalues as tv
|
|
15
|
+
from omlish.asyncs.asyncio.sync import AsyncioBufferRelay
|
|
14
16
|
|
|
17
|
+
from .....backends.transformers.filecache import file_cache_patch_context
|
|
18
|
+
from .....backends.transformers.streamers import CancellableTextStreamer
|
|
15
19
|
from ....chat.choices.services import ChatChoicesRequest
|
|
16
20
|
from ....chat.choices.services import ChatChoicesResponse
|
|
17
21
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
22
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
23
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
24
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
25
|
+
from ....chat.choices.stream.types import AiChoiceDeltas # noqa
|
|
26
|
+
from ....chat.choices.stream.types import AiChoicesDeltas # noqa
|
|
27
|
+
from ....chat.choices.types import AiChoice
|
|
28
|
+
from ....chat.choices.types import ChatChoicesOutputs
|
|
18
29
|
from ....chat.messages import AiMessage
|
|
19
30
|
from ....chat.messages import Message
|
|
20
31
|
from ....chat.messages import SystemMessage
|
|
21
|
-
from ....chat.messages import
|
|
32
|
+
from ....chat.messages import ToolUseMessage
|
|
33
|
+
from ....chat.messages import ToolUseResultMessage
|
|
22
34
|
from ....chat.messages import UserMessage
|
|
35
|
+
from ....chat.stream.types import ContentAiDelta # noqa
|
|
23
36
|
from ....completion import CompletionRequest
|
|
24
37
|
from ....completion import CompletionResponse
|
|
25
38
|
from ....completion import static_check_is_completion_service
|
|
26
39
|
from ....configs import Config
|
|
27
40
|
from ....models.configs import ModelPath
|
|
41
|
+
from ....resources import UseResources
|
|
42
|
+
from ....stream.services import StreamResponseSink
|
|
43
|
+
from ....stream.services import new_stream_response
|
|
28
44
|
from ...impls.huggingface.configs import HuggingfaceHubToken
|
|
29
45
|
|
|
30
46
|
|
|
31
47
|
##
|
|
32
48
|
|
|
33
49
|
|
|
50
|
+
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
51
|
+
# ['ChatChoicesService', 'ChatChoicesStreamService'],
|
|
52
|
+
# 'transformers',
|
|
53
|
+
# )
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
##
|
|
57
|
+
|
|
58
|
+
|
|
34
59
|
class TransformersPipelineKwargs(Config, tv.ScalarTypedValue[ta.Mapping[str, ta.Any]]):
|
|
35
60
|
pass
|
|
36
61
|
|
|
@@ -59,7 +84,7 @@ class TransformersCompletionService(lang.ExitStacked):
|
|
|
59
84
|
self._pipeline_kwargs = cc.pop(TransformersPipelineKwargs, [])
|
|
60
85
|
self._huggingface_hub_token = HuggingfaceHubToken.pop_secret(cc, env='HUGGINGFACE_HUB_TOKEN')
|
|
61
86
|
|
|
62
|
-
def invoke(self, request: CompletionRequest) -> CompletionResponse:
|
|
87
|
+
async def invoke(self, request: CompletionRequest) -> CompletionResponse:
|
|
63
88
|
pkw: dict[str, ta.Any] = dict(
|
|
64
89
|
model=self._model_path.v,
|
|
65
90
|
device='mps' if sys.platform == 'darwin' else 'cuda',
|
|
@@ -94,17 +119,19 @@ def build_chat_message(m: Message) -> ta.Mapping[str, ta.Any]:
|
|
|
94
119
|
return dict(
|
|
95
120
|
role='assistant',
|
|
96
121
|
content=check.isinstance(m.c, str),
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
elif isinstance(m, ToolUseMessage):
|
|
125
|
+
return dict(
|
|
126
|
+
role='assistant',
|
|
127
|
+
tool_calls=[dict(
|
|
128
|
+
id=m.tu.id,
|
|
129
|
+
function=dict(
|
|
130
|
+
arguments=m.tu.args,
|
|
131
|
+
name=m.tu.name,
|
|
132
|
+
),
|
|
133
|
+
type='function',
|
|
134
|
+
)],
|
|
108
135
|
)
|
|
109
136
|
|
|
110
137
|
elif isinstance(m, UserMessage):
|
|
@@ -113,24 +140,21 @@ def build_chat_message(m: Message) -> ta.Mapping[str, ta.Any]:
|
|
|
113
140
|
content=check.isinstance(m.c, str),
|
|
114
141
|
)
|
|
115
142
|
|
|
116
|
-
elif isinstance(m,
|
|
143
|
+
elif isinstance(m, ToolUseResultMessage):
|
|
117
144
|
return dict(
|
|
118
145
|
role='tool',
|
|
119
|
-
tool_call_id=m.id,
|
|
120
|
-
content=check.isinstance(m.c, str),
|
|
146
|
+
tool_call_id=m.tur.id,
|
|
147
|
+
content=check.isinstance(m.tur.c, str),
|
|
121
148
|
)
|
|
122
149
|
|
|
123
150
|
else:
|
|
124
151
|
raise TypeError(m)
|
|
125
152
|
|
|
126
153
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
# )
|
|
132
|
-
@static_check_is_chat_choices_service
|
|
133
|
-
class TransformersChatChoicesService(lang.ExitStacked):
|
|
154
|
+
##
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class BaseTransformersChatChoicesService(lang.ExitStacked):
|
|
134
158
|
DEFAULT_MODEL: ta.ClassVar[str] = (
|
|
135
159
|
'meta-llama/Llama-3.2-1B-Instruct'
|
|
136
160
|
)
|
|
@@ -157,21 +181,118 @@ class TransformersChatChoicesService(lang.ExitStacked):
|
|
|
157
181
|
for pkw_cfg in self._pipeline_kwargs:
|
|
158
182
|
pkw.update(pkw_cfg.v)
|
|
159
183
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
)
|
|
184
|
+
with file_cache_patch_context(
|
|
185
|
+
local_first=True,
|
|
186
|
+
local_config_present_is_authoritative=True,
|
|
187
|
+
):
|
|
188
|
+
return tfm.pipeline(
|
|
189
|
+
'text-generation',
|
|
190
|
+
**pkw,
|
|
191
|
+
)
|
|
164
192
|
|
|
165
|
-
|
|
193
|
+
|
|
194
|
+
##
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
198
|
+
# name='transformers',
|
|
199
|
+
# aliases=['tfm'],
|
|
200
|
+
# type='ChatChoicesService',
|
|
201
|
+
# )
|
|
202
|
+
@static_check_is_chat_choices_service
|
|
203
|
+
class TransformersChatChoicesService(BaseTransformersChatChoicesService):
|
|
204
|
+
async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
166
205
|
check.empty(request.options)
|
|
167
206
|
|
|
168
207
|
pipeline = self._load_pipeline()
|
|
169
208
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
209
|
+
inputs = [
|
|
210
|
+
build_chat_message(m)
|
|
211
|
+
for m in request.v
|
|
212
|
+
]
|
|
213
|
+
|
|
214
|
+
outputs = pipeline(inputs)
|
|
215
|
+
|
|
216
|
+
gts = check.single(outputs)['generated_text']
|
|
217
|
+
ugt, agt = gts
|
|
218
|
+
check.state(ugt['role'] == 'user')
|
|
219
|
+
check.state(agt['role'] == 'assistant')
|
|
220
|
+
|
|
221
|
+
return ChatChoicesResponse([AiChoice([AiMessage(agt['content'])])])
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
##
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
228
|
+
# name='transformers',
|
|
229
|
+
# type='ChatChoicesStreamService',
|
|
230
|
+
# )
|
|
231
|
+
@static_check_is_chat_choices_stream_service
|
|
232
|
+
class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
|
|
233
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
234
|
+
check.empty(request.options)
|
|
235
|
+
|
|
236
|
+
pipeline = self._load_pipeline() # noqa
|
|
237
|
+
|
|
238
|
+
inputs = [ # noqa
|
|
239
|
+
build_chat_message(m)
|
|
240
|
+
for m in request.v
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
relay: AsyncioBufferRelay = AsyncioBufferRelay()
|
|
244
|
+
|
|
245
|
+
def streamer_callback(text: str, *, stream_end: bool) -> None:
|
|
246
|
+
if text or stream_end:
|
|
247
|
+
relay.push(text, *([None] if stream_end else []))
|
|
248
|
+
|
|
249
|
+
streamer = CancellableTextStreamer(
|
|
250
|
+
check.not_none(pipeline.tokenizer), # type: ignore[arg-type]
|
|
251
|
+
streamer_callback, # noqa
|
|
252
|
+
skip_prompt=True,
|
|
253
|
+
skip_special_tokens=True,
|
|
175
254
|
)
|
|
176
255
|
|
|
177
|
-
|
|
256
|
+
async with UseResources.or_new(request.options) as rs:
|
|
257
|
+
thread = threading.Thread(
|
|
258
|
+
target=CancellableTextStreamer.ignoring_cancelled(pipeline),
|
|
259
|
+
args=(
|
|
260
|
+
inputs,
|
|
261
|
+
),
|
|
262
|
+
kwargs=dict(
|
|
263
|
+
streamer=streamer,
|
|
264
|
+
),
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
def stop_thread() -> None:
|
|
268
|
+
streamer.cancel()
|
|
269
|
+
# thread.join()
|
|
270
|
+
|
|
271
|
+
rs.enter_context(lang.defer(stop_thread))
|
|
272
|
+
|
|
273
|
+
thread.start()
|
|
274
|
+
|
|
275
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
|
|
276
|
+
while True:
|
|
277
|
+
await relay.wait()
|
|
278
|
+
got = relay.swap()
|
|
279
|
+
|
|
280
|
+
if not got:
|
|
281
|
+
raise RuntimeError
|
|
282
|
+
|
|
283
|
+
if got[-1] is None:
|
|
284
|
+
out = ''.join(got[:-1])
|
|
285
|
+
end = True
|
|
286
|
+
else:
|
|
287
|
+
out = ''.join(got)
|
|
288
|
+
end = False
|
|
289
|
+
|
|
290
|
+
if out:
|
|
291
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiDelta(out)])]))
|
|
292
|
+
|
|
293
|
+
if end:
|
|
294
|
+
break
|
|
295
|
+
|
|
296
|
+
return []
|
|
297
|
+
|
|
298
|
+
return await new_stream_response(rs, inner)
|
|
@@ -108,7 +108,10 @@ class ManifestBackendStringResolver(BackendStringResolver):
|
|
|
108
108
|
|
|
109
109
|
mn: str | None = mdl.name
|
|
110
110
|
|
|
111
|
-
if
|
|
111
|
+
if args.parsed.backend == m.backend_name and mn is not None:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
elif mn == m.backend_name:
|
|
112
115
|
if m.model_names is not None:
|
|
113
116
|
mn = m.model_names.resolved_default
|
|
114
117
|
else:
|