ommlds 0.0.0.dev426__py3-none-any.whl → 0.0.0.dev485__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ommlds/.omlish-manifests.json +336 -39
- ommlds/__about__.py +16 -10
- ommlds/_hacks/__init__.py +4 -0
- ommlds/_hacks/funcs.py +110 -0
- ommlds/_hacks/names.py +158 -0
- ommlds/_hacks/params.py +73 -0
- ommlds/_hacks/patches.py +0 -3
- ommlds/backends/anthropic/protocol/__init__.py +13 -1
- ommlds/backends/anthropic/protocol/_dataclasses.py +1625 -0
- ommlds/backends/anthropic/protocol/_marshal.py +2 -2
- ommlds/backends/anthropic/protocol/sse/_marshal.py +1 -1
- ommlds/backends/anthropic/protocol/sse/assemble.py +23 -7
- ommlds/backends/anthropic/protocol/sse/events.py +13 -0
- ommlds/backends/anthropic/protocol/types.py +40 -8
- ommlds/backends/google/protocol/__init__.py +16 -0
- ommlds/backends/google/protocol/_dataclasses.py +5997 -0
- ommlds/backends/google/protocol/_marshal.py +16 -0
- ommlds/backends/google/protocol/types.py +626 -0
- ommlds/backends/groq/__init__.py +7 -0
- ommlds/backends/groq/_dataclasses.py +3901 -0
- ommlds/backends/groq/_marshal.py +23 -0
- ommlds/backends/groq/protocol.py +249 -0
- ommlds/backends/llamacpp/logging.py +4 -1
- ommlds/backends/mlx/caching.py +7 -3
- ommlds/backends/mlx/cli.py +10 -7
- ommlds/backends/mlx/generation.py +19 -17
- ommlds/backends/mlx/limits.py +10 -6
- ommlds/backends/mlx/loading.py +65 -5
- ommlds/backends/ollama/__init__.py +7 -0
- ommlds/backends/ollama/_dataclasses.py +3458 -0
- ommlds/backends/ollama/protocol.py +170 -0
- ommlds/backends/openai/protocol/__init__.py +24 -29
- ommlds/backends/openai/protocol/_common.py +18 -0
- ommlds/backends/openai/protocol/_dataclasses.py +7708 -0
- ommlds/backends/openai/protocol/_marshal.py +27 -0
- ommlds/backends/openai/protocol/chatcompletion/chunk.py +58 -31
- ommlds/backends/openai/protocol/chatcompletion/contentpart.py +49 -44
- ommlds/backends/openai/protocol/chatcompletion/message.py +55 -43
- ommlds/backends/openai/protocol/chatcompletion/request.py +114 -66
- ommlds/backends/openai/protocol/chatcompletion/response.py +71 -45
- ommlds/backends/openai/protocol/chatcompletion/responseformat.py +27 -20
- ommlds/backends/openai/protocol/chatcompletion/tokenlogprob.py +16 -7
- ommlds/backends/openai/protocol/completionusage.py +24 -15
- ommlds/backends/tavily/__init__.py +7 -0
- ommlds/backends/tavily/_dataclasses.py +1734 -0
- ommlds/backends/tavily/protocol.py +301 -0
- ommlds/backends/tinygrad/models/llama3/__init__.py +22 -14
- ommlds/backends/transformers/__init__.py +14 -0
- ommlds/backends/transformers/filecache.py +109 -0
- ommlds/backends/transformers/streamers.py +73 -0
- ommlds/cli/__init__.py +7 -0
- ommlds/cli/_dataclasses.py +2562 -0
- ommlds/cli/asyncs.py +30 -0
- ommlds/cli/backends/catalog.py +93 -0
- ommlds/cli/backends/configs.py +9 -0
- ommlds/cli/backends/inject.py +31 -36
- ommlds/cli/backends/injection.py +16 -0
- ommlds/cli/backends/types.py +46 -0
- ommlds/cli/content/messages.py +34 -0
- ommlds/cli/content/strings.py +42 -0
- ommlds/cli/inject.py +17 -32
- ommlds/cli/inputs/__init__.py +0 -0
- ommlds/cli/inputs/asyncs.py +32 -0
- ommlds/cli/inputs/sync.py +75 -0
- ommlds/cli/main.py +270 -110
- ommlds/cli/rendering/__init__.py +0 -0
- ommlds/cli/rendering/configs.py +9 -0
- ommlds/cli/rendering/inject.py +31 -0
- ommlds/cli/rendering/markdown.py +52 -0
- ommlds/cli/rendering/raw.py +73 -0
- ommlds/cli/rendering/types.py +21 -0
- ommlds/cli/secrets.py +21 -0
- ommlds/cli/sessions/base.py +1 -1
- ommlds/cli/sessions/chat/chat/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/ai/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/ai/inject.py +74 -0
- ommlds/cli/sessions/chat/chat/ai/injection.py +14 -0
- ommlds/cli/sessions/chat/chat/ai/rendering.py +70 -0
- ommlds/cli/sessions/chat/chat/ai/services.py +79 -0
- ommlds/cli/sessions/chat/chat/ai/tools.py +44 -0
- ommlds/cli/sessions/chat/chat/ai/types.py +28 -0
- ommlds/cli/sessions/chat/chat/state/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/state/configs.py +11 -0
- ommlds/cli/sessions/chat/chat/state/inject.py +36 -0
- ommlds/cli/sessions/chat/chat/state/inmemory.py +33 -0
- ommlds/cli/sessions/chat/chat/state/storage.py +52 -0
- ommlds/cli/sessions/chat/chat/state/types.py +38 -0
- ommlds/cli/sessions/chat/chat/user/__init__.py +0 -0
- ommlds/cli/sessions/chat/chat/user/configs.py +17 -0
- ommlds/cli/sessions/chat/chat/user/inject.py +62 -0
- ommlds/cli/sessions/chat/chat/user/interactive.py +31 -0
- ommlds/cli/sessions/chat/chat/user/oneshot.py +25 -0
- ommlds/cli/sessions/chat/chat/user/types.py +15 -0
- ommlds/cli/sessions/chat/configs.py +27 -0
- ommlds/cli/sessions/chat/driver.py +43 -0
- ommlds/cli/sessions/chat/inject.py +33 -65
- ommlds/cli/sessions/chat/phases/__init__.py +0 -0
- ommlds/cli/sessions/chat/phases/inject.py +27 -0
- ommlds/cli/sessions/chat/phases/injection.py +14 -0
- ommlds/cli/sessions/chat/phases/manager.py +29 -0
- ommlds/cli/sessions/chat/phases/types.py +29 -0
- ommlds/cli/sessions/chat/session.py +27 -0
- ommlds/cli/sessions/chat/tools/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/configs.py +22 -0
- ommlds/cli/sessions/chat/tools/confirmation.py +46 -0
- ommlds/cli/sessions/chat/tools/execution.py +66 -0
- ommlds/cli/sessions/chat/tools/fs/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/fs/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/fs/inject.py +35 -0
- ommlds/cli/sessions/chat/tools/inject.py +88 -0
- ommlds/cli/sessions/chat/tools/injection.py +44 -0
- ommlds/cli/sessions/chat/tools/rendering.py +58 -0
- ommlds/cli/sessions/chat/tools/todo/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/todo/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/todo/inject.py +31 -0
- ommlds/cli/sessions/chat/tools/weather/__init__.py +0 -0
- ommlds/cli/sessions/chat/tools/weather/configs.py +12 -0
- ommlds/cli/sessions/chat/tools/weather/inject.py +22 -0
- ommlds/cli/{tools/weather.py → sessions/chat/tools/weather/tools.py} +1 -1
- ommlds/cli/sessions/completion/configs.py +21 -0
- ommlds/cli/sessions/completion/inject.py +42 -0
- ommlds/cli/sessions/completion/session.py +35 -0
- ommlds/cli/sessions/embedding/configs.py +21 -0
- ommlds/cli/sessions/embedding/inject.py +42 -0
- ommlds/cli/sessions/embedding/session.py +33 -0
- ommlds/cli/sessions/inject.py +28 -11
- ommlds/cli/state/__init__.py +0 -0
- ommlds/cli/state/inject.py +28 -0
- ommlds/cli/{state.py → state/storage.py} +41 -24
- ommlds/minichain/__init__.py +84 -24
- ommlds/minichain/_dataclasses.py +15401 -0
- ommlds/minichain/_marshal.py +49 -9
- ommlds/minichain/_typedvalues.py +2 -4
- ommlds/minichain/backends/catalogs/base.py +20 -1
- ommlds/minichain/backends/catalogs/simple.py +2 -2
- ommlds/minichain/backends/catalogs/strings.py +10 -8
- ommlds/minichain/backends/impls/anthropic/chat.py +65 -27
- ommlds/minichain/backends/impls/anthropic/names.py +10 -8
- ommlds/minichain/backends/impls/anthropic/protocol.py +109 -0
- ommlds/minichain/backends/impls/anthropic/stream.py +111 -43
- ommlds/minichain/backends/impls/duckduckgo/search.py +6 -2
- ommlds/minichain/backends/impls/dummy/__init__.py +0 -0
- ommlds/minichain/backends/impls/dummy/chat.py +69 -0
- ommlds/minichain/backends/impls/google/chat.py +114 -22
- ommlds/minichain/backends/impls/google/search.py +7 -2
- ommlds/minichain/backends/impls/google/stream.py +219 -0
- ommlds/minichain/backends/impls/google/tools.py +149 -0
- ommlds/minichain/backends/impls/groq/__init__.py +0 -0
- ommlds/minichain/backends/impls/groq/chat.py +75 -0
- ommlds/minichain/backends/impls/groq/names.py +48 -0
- ommlds/minichain/backends/impls/groq/protocol.py +143 -0
- ommlds/minichain/backends/impls/groq/stream.py +125 -0
- ommlds/minichain/backends/impls/huggingface/repos.py +1 -5
- ommlds/minichain/backends/impls/llamacpp/chat.py +40 -22
- ommlds/minichain/backends/impls/llamacpp/completion.py +9 -5
- ommlds/minichain/backends/impls/llamacpp/format.py +4 -2
- ommlds/minichain/backends/impls/llamacpp/stream.py +43 -23
- ommlds/minichain/backends/impls/mistral.py +20 -5
- ommlds/minichain/backends/impls/mlx/chat.py +101 -24
- ommlds/minichain/backends/impls/ollama/__init__.py +0 -0
- ommlds/minichain/backends/impls/ollama/chat.py +199 -0
- ommlds/minichain/backends/impls/openai/chat.py +18 -8
- ommlds/minichain/backends/impls/openai/completion.py +10 -3
- ommlds/minichain/backends/impls/openai/embedding.py +10 -3
- ommlds/minichain/backends/impls/openai/format.py +131 -106
- ommlds/minichain/backends/impls/openai/names.py +31 -5
- ommlds/minichain/backends/impls/openai/stream.py +43 -25
- ommlds/minichain/backends/impls/sentencepiece/tokens.py +9 -6
- ommlds/minichain/backends/impls/tavily.py +66 -0
- ommlds/minichain/backends/impls/tinygrad/chat.py +30 -20
- ommlds/minichain/backends/impls/tokenizers/tokens.py +9 -6
- ommlds/minichain/backends/impls/transformers/sentence.py +6 -3
- ommlds/minichain/backends/impls/transformers/tokens.py +10 -7
- ommlds/minichain/backends/impls/transformers/transformers.py +160 -37
- ommlds/minichain/backends/strings/parsing.py +1 -1
- ommlds/minichain/backends/strings/resolving.py +4 -1
- ommlds/minichain/chat/_marshal.py +16 -9
- ommlds/minichain/chat/choices/adapters.py +4 -4
- ommlds/minichain/chat/choices/services.py +1 -1
- ommlds/minichain/chat/choices/stream/__init__.py +0 -0
- ommlds/minichain/chat/choices/stream/adapters.py +35 -0
- ommlds/minichain/chat/choices/stream/joining.py +31 -0
- ommlds/minichain/chat/choices/stream/services.py +45 -0
- ommlds/minichain/chat/choices/stream/types.py +43 -0
- ommlds/minichain/chat/choices/types.py +2 -2
- ommlds/minichain/chat/history.py +3 -3
- ommlds/minichain/chat/messages.py +55 -19
- ommlds/minichain/chat/services.py +3 -3
- ommlds/minichain/chat/stream/_marshal.py +16 -0
- ommlds/minichain/chat/stream/joining.py +85 -0
- ommlds/minichain/chat/stream/services.py +15 -21
- ommlds/minichain/chat/stream/types.py +32 -19
- ommlds/minichain/chat/tools/execution.py +8 -7
- ommlds/minichain/chat/tools/ids.py +9 -15
- ommlds/minichain/chat/tools/parsing.py +17 -26
- ommlds/minichain/chat/transforms/base.py +29 -38
- ommlds/minichain/chat/transforms/metadata.py +30 -4
- ommlds/minichain/chat/transforms/services.py +9 -11
- ommlds/minichain/content/_marshal.py +44 -20
- ommlds/minichain/content/json.py +13 -0
- ommlds/minichain/content/materialize.py +14 -21
- ommlds/minichain/content/prepare.py +4 -0
- ommlds/minichain/content/transforms/interleave.py +1 -1
- ommlds/minichain/content/transforms/squeeze.py +1 -1
- ommlds/minichain/content/transforms/stringify.py +1 -1
- ommlds/minichain/json.py +20 -0
- ommlds/minichain/lib/code/__init__.py +0 -0
- ommlds/minichain/lib/code/prompts.py +6 -0
- ommlds/minichain/lib/fs/binfiles.py +108 -0
- ommlds/minichain/lib/fs/context.py +126 -0
- ommlds/minichain/lib/fs/errors.py +101 -0
- ommlds/minichain/lib/fs/suggestions.py +36 -0
- ommlds/minichain/lib/fs/tools/__init__.py +0 -0
- ommlds/minichain/lib/fs/tools/edit.py +104 -0
- ommlds/minichain/lib/fs/tools/ls.py +38 -0
- ommlds/minichain/lib/fs/tools/read.py +115 -0
- ommlds/minichain/lib/fs/tools/recursivels/__init__.py +0 -0
- ommlds/minichain/lib/fs/tools/recursivels/execution.py +40 -0
- ommlds/minichain/lib/todo/__init__.py +0 -0
- ommlds/minichain/lib/todo/context.py +54 -0
- ommlds/minichain/lib/todo/tools/__init__.py +0 -0
- ommlds/minichain/lib/todo/tools/read.py +44 -0
- ommlds/minichain/lib/todo/tools/write.py +335 -0
- ommlds/minichain/lib/todo/types.py +60 -0
- ommlds/minichain/llms/_marshal.py +25 -17
- ommlds/minichain/llms/types.py +4 -0
- ommlds/minichain/registries/globals.py +18 -4
- ommlds/minichain/resources.py +68 -45
- ommlds/minichain/search.py +1 -1
- ommlds/minichain/services/_marshal.py +46 -39
- ommlds/minichain/services/facades.py +3 -3
- ommlds/minichain/services/services.py +1 -1
- ommlds/minichain/standard.py +8 -0
- ommlds/minichain/stream/services.py +152 -38
- ommlds/minichain/stream/wrap.py +22 -24
- ommlds/minichain/text/toolparsing/llamacpp/hermes2.py +3 -2
- ommlds/minichain/text/toolparsing/llamacpp/llama31.py +3 -2
- ommlds/minichain/text/toolparsing/llamacpp/utils.py +3 -2
- ommlds/minichain/tools/_marshal.py +1 -1
- ommlds/minichain/tools/execution/catalog.py +2 -1
- ommlds/minichain/tools/execution/context.py +34 -14
- ommlds/minichain/tools/execution/errors.py +15 -0
- ommlds/minichain/tools/execution/executors.py +8 -3
- ommlds/minichain/tools/execution/reflect.py +40 -5
- ommlds/minichain/tools/fns.py +46 -9
- ommlds/minichain/tools/jsonschema.py +14 -5
- ommlds/minichain/tools/reflect.py +54 -18
- ommlds/minichain/tools/types.py +33 -1
- ommlds/minichain/utils.py +27 -0
- ommlds/minichain/vectors/_marshal.py +11 -10
- ommlds/minichain/vectors/types.py +1 -1
- ommlds/nanochat/LICENSE +21 -0
- ommlds/nanochat/__init__.py +0 -0
- ommlds/nanochat/rustbpe/LICENSE +21 -0
- ommlds/nanochat/tokenizers.py +406 -0
- ommlds/server/cli.py +1 -2
- ommlds/server/server.py +5 -5
- ommlds/server/service.py +1 -1
- ommlds/specs/__init__.py +0 -0
- ommlds/specs/mcp/__init__.py +0 -0
- ommlds/specs/mcp/_marshal.py +23 -0
- ommlds/specs/mcp/clients.py +146 -0
- ommlds/specs/mcp/protocol.py +371 -0
- ommlds/tools/git.py +35 -12
- ommlds/tools/ocr.py +8 -9
- ommlds/wiki/analyze.py +6 -7
- ommlds/wiki/text/mfh.py +1 -5
- ommlds/wiki/text/wtp.py +1 -3
- ommlds/wiki/utils/xml.py +5 -5
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/METADATA +24 -21
- ommlds-0.0.0.dev485.dist-info/RECORD +436 -0
- ommlds/cli/backends/standard.py +0 -20
- ommlds/cli/sessions/chat/base.py +0 -42
- ommlds/cli/sessions/chat/interactive.py +0 -73
- ommlds/cli/sessions/chat/printing.py +0 -96
- ommlds/cli/sessions/chat/prompt.py +0 -143
- ommlds/cli/sessions/chat/state.py +0 -109
- ommlds/cli/sessions/chat/tools.py +0 -91
- ommlds/cli/sessions/completion/completion.py +0 -44
- ommlds/cli/sessions/embedding/embedding.py +0 -42
- ommlds/cli/tools/config.py +0 -13
- ommlds/cli/tools/inject.py +0 -64
- ommlds/minichain/chat/stream/adapters.py +0 -69
- ommlds/minichain/lib/fs/ls/execution.py +0 -32
- ommlds-0.0.0.dev426.dist-info/RECORD +0 -303
- /ommlds/{cli/tools → backends/google}/__init__.py +0 -0
- /ommlds/{huggingface.py → backends/huggingface.py} +0 -0
- /ommlds/{minichain/lib/fs/ls → cli/content}/__init__.py +0 -0
- /ommlds/minichain/lib/fs/{ls → tools/recursivels}/rendering.py +0 -0
- /ommlds/minichain/lib/fs/{ls → tools/recursivels}/running.py +0 -0
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev426.dist-info → ommlds-0.0.0.dev485.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import typing as ta
|
|
2
|
+
|
|
3
|
+
from omlish import check
|
|
4
|
+
from omlish import marshal as msh
|
|
5
|
+
from omlish import typedvalues as tv
|
|
6
|
+
from omlish.formats import json
|
|
7
|
+
from omlish.http import all as http
|
|
8
|
+
from omlish.http import sse
|
|
9
|
+
from omlish.io.buffers import DelimitingBuffer
|
|
10
|
+
|
|
11
|
+
from .....backends.groq import protocol as pt
|
|
12
|
+
from ....chat.choices.services import ChatChoicesOutputs
|
|
13
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
14
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
15
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
16
|
+
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
17
|
+
from ....chat.tools.types import Tool
|
|
18
|
+
from ....configs import Config
|
|
19
|
+
from ....resources import UseResources
|
|
20
|
+
from ....standard import ApiKey
|
|
21
|
+
from ....stream.services import StreamResponseSink
|
|
22
|
+
from ....stream.services import new_stream_response
|
|
23
|
+
from .chat import GroqChatChoicesService
|
|
24
|
+
from .names import MODEL_NAMES
|
|
25
|
+
from .protocol import build_gq_request_messages
|
|
26
|
+
from .protocol import build_gq_request_tool
|
|
27
|
+
from .protocol import build_mc_ai_choice_deltas
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
##
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
34
|
+
# name='groq',
|
|
35
|
+
# type='ChatChoicesStreamService',
|
|
36
|
+
# )
|
|
37
|
+
@static_check_is_chat_choices_stream_service
|
|
38
|
+
class GroqChatChoicesStreamService:
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
*configs: Config,
|
|
42
|
+
http_client: http.AsyncHttpClient | None = None,
|
|
43
|
+
) -> None:
|
|
44
|
+
super().__init__()
|
|
45
|
+
|
|
46
|
+
self._http_client = http_client
|
|
47
|
+
|
|
48
|
+
with tv.consume(*configs) as cc:
|
|
49
|
+
self._model_name = cc.pop(GroqChatChoicesService.DEFAULT_MODEL_NAME)
|
|
50
|
+
self._api_key = ApiKey.pop_secret(cc, env='GROQ_API_KEY')
|
|
51
|
+
|
|
52
|
+
READ_CHUNK_SIZE: ta.ClassVar[int] = -1
|
|
53
|
+
|
|
54
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
55
|
+
tools: list[pt.ChatCompletionRequest.Tool] = []
|
|
56
|
+
with tv.TypedValues(*request.options).consume() as oc:
|
|
57
|
+
t: Tool
|
|
58
|
+
for t in oc.pop(Tool, []):
|
|
59
|
+
tools.append(build_gq_request_tool(t))
|
|
60
|
+
|
|
61
|
+
gq_request = pt.ChatCompletionRequest(
|
|
62
|
+
messages=build_gq_request_messages(request.v),
|
|
63
|
+
model=MODEL_NAMES.resolve(self._model_name.v),
|
|
64
|
+
tools=tools or None,
|
|
65
|
+
stream=True,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
raw_request = msh.marshal(gq_request)
|
|
69
|
+
|
|
70
|
+
http_request = http.HttpRequest(
|
|
71
|
+
'https://api.groq.com/openai/v1/chat/completions',
|
|
72
|
+
headers={
|
|
73
|
+
http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
|
|
74
|
+
http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
|
|
75
|
+
},
|
|
76
|
+
data=json.dumps(raw_request).encode('utf-8'),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
async with UseResources.or_new(request.options) as rs:
|
|
80
|
+
http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
|
|
81
|
+
http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
|
|
82
|
+
|
|
83
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
|
|
84
|
+
db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
|
|
85
|
+
sd = sse.SseDecoder()
|
|
86
|
+
while True:
|
|
87
|
+
b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
|
|
88
|
+
for l in db.feed(b):
|
|
89
|
+
if isinstance(l, DelimitingBuffer.Incomplete):
|
|
90
|
+
# FIXME: handle
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
# FIXME: https://platform.openai.com/docs/guides/function-calling?api-mode=responses#streaming
|
|
94
|
+
for so in sd.process_line(l):
|
|
95
|
+
if isinstance(so, sse.SseEvent) and so.type == b'message':
|
|
96
|
+
ss = so.data.decode('utf-8')
|
|
97
|
+
if ss == '[DONE]':
|
|
98
|
+
return []
|
|
99
|
+
|
|
100
|
+
sj = json.loads(ss) # ChatCompletionChunk
|
|
101
|
+
|
|
102
|
+
check.state(sj['object'] == 'chat.completion.chunk')
|
|
103
|
+
|
|
104
|
+
ccc = msh.unmarshal(sj, pt.ChatCompletionChunk)
|
|
105
|
+
|
|
106
|
+
# FIXME: stop reason
|
|
107
|
+
if not ccc.choices:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
if any(choice.finish_reason for choice in ccc.choices):
|
|
111
|
+
check.state(all(choice.finish_reason for choice in ccc.choices))
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
await sink.emit(AiChoicesDeltas([
|
|
115
|
+
build_mc_ai_choice_deltas(choice.delta)
|
|
116
|
+
for choice in ccc.choices
|
|
117
|
+
]))
|
|
118
|
+
|
|
119
|
+
if not b:
|
|
120
|
+
return []
|
|
121
|
+
|
|
122
|
+
# raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))
|
|
123
|
+
# return rh.build_response(raw_response)
|
|
124
|
+
|
|
125
|
+
return await new_stream_response(rs, inner)
|
|
@@ -3,8 +3,6 @@ TODO:
|
|
|
3
3
|
- local-only check first
|
|
4
4
|
- cat ~/.cache/.../models/.../refs/main -> c5bfd839cd4cda0e5a39a97e00218d9c56e468af
|
|
5
5
|
"""
|
|
6
|
-
import typing as ta
|
|
7
|
-
|
|
8
6
|
from omlish import lang
|
|
9
7
|
|
|
10
8
|
from ....models.configs import ModelRepo
|
|
@@ -12,10 +10,8 @@ from ....models.repos.resolving import ModelRepoResolver
|
|
|
12
10
|
from ....models.repos.resolving import ResolvedModelRepo
|
|
13
11
|
|
|
14
12
|
|
|
15
|
-
|
|
13
|
+
with lang.auto_proxy_import(globals()):
|
|
16
14
|
import huggingface_hub as hf
|
|
17
|
-
else:
|
|
18
|
-
hf = lang.proxy_import('huggingface_hub')
|
|
19
15
|
|
|
20
16
|
|
|
21
17
|
##
|
|
@@ -2,20 +2,18 @@ import contextlib
|
|
|
2
2
|
import os.path
|
|
3
3
|
import typing as ta
|
|
4
4
|
|
|
5
|
-
import llama_cpp as lcc
|
|
6
|
-
|
|
7
5
|
from omlish import check
|
|
8
6
|
from omlish import lang
|
|
9
7
|
from omlish import typedvalues as tv
|
|
10
8
|
|
|
11
|
-
from .....backends import llamacpp as lcu
|
|
12
9
|
from ....chat.choices.services import ChatChoicesRequest
|
|
13
10
|
from ....chat.choices.services import ChatChoicesResponse
|
|
14
11
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
15
12
|
from ....chat.choices.types import AiChoice
|
|
16
13
|
from ....chat.choices.types import ChatChoicesOptions
|
|
17
14
|
from ....chat.messages import AiMessage
|
|
18
|
-
from ....chat.messages import
|
|
15
|
+
from ....chat.messages import ToolUseMessage
|
|
16
|
+
from ....chat.messages import ToolUseResultMessage
|
|
19
17
|
from ....chat.tools.types import Tool
|
|
20
18
|
from ....configs import Config
|
|
21
19
|
from ....llms.types import MaxTokens
|
|
@@ -26,6 +24,21 @@ from .format import ROLES_MAP
|
|
|
26
24
|
from .format import get_msg_content
|
|
27
25
|
|
|
28
26
|
|
|
27
|
+
with lang.auto_proxy_import(globals()):
|
|
28
|
+
import llama_cpp as lcc
|
|
29
|
+
|
|
30
|
+
from .....backends import llamacpp as lcu
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
##
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
37
|
+
# ['ChatChoicesService'],
|
|
38
|
+
# 'llamacpp',
|
|
39
|
+
# )
|
|
40
|
+
|
|
41
|
+
|
|
29
42
|
##
|
|
30
43
|
|
|
31
44
|
|
|
@@ -54,7 +67,7 @@ class LlamacppChatChoicesService:
|
|
|
54
67
|
temperatur=Temperature,
|
|
55
68
|
)
|
|
56
69
|
|
|
57
|
-
def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
70
|
+
async def invoke(self, request: ChatChoicesRequest) -> ChatChoicesResponse:
|
|
58
71
|
kwargs: dict = dict(
|
|
59
72
|
# temperature=0,
|
|
60
73
|
max_tokens=1024,
|
|
@@ -100,29 +113,34 @@ class LlamacppChatChoicesService:
|
|
|
100
113
|
|
|
101
114
|
ims: list = []
|
|
102
115
|
for rm in request.v:
|
|
103
|
-
if isinstance(rm,
|
|
116
|
+
if isinstance(rm, ToolUseResultMessage):
|
|
104
117
|
ims.append(dict(
|
|
105
118
|
role='tool',
|
|
106
|
-
**(dict(id=rm.id) if rm.id is not None else {}),
|
|
107
|
-
name=rm.name,
|
|
108
|
-
content=check.isinstance(rm.c, str),
|
|
119
|
+
**(dict(id=rm.tur.id) if rm.tur.id is not None else {}),
|
|
120
|
+
name=rm.tur.name,
|
|
121
|
+
content=check.isinstance(rm.tur.c, str),
|
|
109
122
|
))
|
|
123
|
+
|
|
110
124
|
elif isinstance(rm, AiMessage):
|
|
111
|
-
tcs: list[dict] = []
|
|
112
|
-
for ter in rm.tool_exec_requests or []:
|
|
113
|
-
tcs.append(dict(
|
|
114
|
-
id=check.not_none(ter.id),
|
|
115
|
-
type='function',
|
|
116
|
-
function=dict(
|
|
117
|
-
name=ter.name,
|
|
118
|
-
arguments=check.isinstance(ter.raw_args, str),
|
|
119
|
-
),
|
|
120
|
-
))
|
|
121
125
|
ims.append(dict(
|
|
122
126
|
role=ROLES_MAP[type(rm)],
|
|
123
127
|
**(dict(content=mc) if (mc := get_msg_content(rm)) is not None else {}),
|
|
124
|
-
**(dict(tool_calls=tcs) if tcs else {}),
|
|
125
128
|
))
|
|
129
|
+
|
|
130
|
+
elif isinstance(rm, ToolUseMessage):
|
|
131
|
+
ims.append(dict(
|
|
132
|
+
role=ROLES_MAP[type(rm)],
|
|
133
|
+
content='',
|
|
134
|
+
tool_calls=[dict(
|
|
135
|
+
id=check.not_none(rm.tu.id),
|
|
136
|
+
type='function',
|
|
137
|
+
function=dict(
|
|
138
|
+
name=rm.tu.name,
|
|
139
|
+
arguments=check.isinstance(rm.tu.raw_args, str),
|
|
140
|
+
),
|
|
141
|
+
)],
|
|
142
|
+
))
|
|
143
|
+
|
|
126
144
|
else:
|
|
127
145
|
ims.append(dict(
|
|
128
146
|
role=ROLES_MAP[type(rm)],
|
|
@@ -135,8 +153,8 @@ class LlamacppChatChoicesService:
|
|
|
135
153
|
)
|
|
136
154
|
|
|
137
155
|
out: list[AiChoice] = []
|
|
138
|
-
for c in output['choices']:
|
|
156
|
+
for c in ta.cast(ta.Any, output)['choices']:
|
|
139
157
|
m = c['message']
|
|
140
|
-
out.append(AiChoice(AiMessage(m['content'])))
|
|
158
|
+
out.append(AiChoice([AiMessage(m['content'])]))
|
|
141
159
|
|
|
142
160
|
return ChatChoicesResponse(out)
|
|
@@ -2,11 +2,9 @@ import contextlib
|
|
|
2
2
|
import os.path
|
|
3
3
|
import typing as ta
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
from omlish import lang
|
|
7
6
|
from omlish import typedvalues as tv
|
|
8
7
|
|
|
9
|
-
from .....backends import llamacpp as lcu
|
|
10
8
|
from ....completion import CompletionOption
|
|
11
9
|
from ....completion import CompletionRequest
|
|
12
10
|
from ....completion import CompletionResponse
|
|
@@ -18,6 +16,12 @@ from ....llms.types import Temperature
|
|
|
18
16
|
from ....models.configs import ModelPath
|
|
19
17
|
|
|
20
18
|
|
|
19
|
+
with lang.auto_proxy_import(globals()):
|
|
20
|
+
import llama_cpp as lcc
|
|
21
|
+
|
|
22
|
+
from .....backends import llamacpp as lcu
|
|
23
|
+
|
|
24
|
+
|
|
21
25
|
##
|
|
22
26
|
|
|
23
27
|
|
|
@@ -51,7 +55,7 @@ class LlamacppCompletionService:
|
|
|
51
55
|
temperatur=Temperature,
|
|
52
56
|
)
|
|
53
57
|
|
|
54
|
-
def invoke(self, request: CompletionRequest) -> CompletionResponse:
|
|
58
|
+
async def invoke(self, request: CompletionRequest) -> CompletionResponse:
|
|
55
59
|
kwargs: dict = dict(
|
|
56
60
|
# temperature=0,
|
|
57
61
|
max_tokens=1024,
|
|
@@ -74,4 +78,4 @@ class LlamacppCompletionService:
|
|
|
74
78
|
**kwargs,
|
|
75
79
|
)
|
|
76
80
|
|
|
77
|
-
return CompletionResponse(output['choices'][0]['text'])
|
|
81
|
+
return CompletionResponse(ta.cast(ta.Any, output)['choices'][0]['text'])
|
|
@@ -5,7 +5,8 @@ from omlish import check
|
|
|
5
5
|
from ....chat.messages import AiMessage
|
|
6
6
|
from ....chat.messages import Message
|
|
7
7
|
from ....chat.messages import SystemMessage
|
|
8
|
-
from ....chat.messages import
|
|
8
|
+
from ....chat.messages import ToolUseMessage
|
|
9
|
+
from ....chat.messages import ToolUseResultMessage
|
|
9
10
|
from ....chat.messages import UserMessage
|
|
10
11
|
|
|
11
12
|
|
|
@@ -16,7 +17,8 @@ ROLES_MAP: ta.Mapping[type[Message], str] = {
|
|
|
16
17
|
SystemMessage: 'system',
|
|
17
18
|
UserMessage: 'user',
|
|
18
19
|
AiMessage: 'assistant',
|
|
19
|
-
|
|
20
|
+
ToolUseMessage: 'assistant',
|
|
21
|
+
ToolUseResultMessage: 'tool',
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
|
|
@@ -2,29 +2,42 @@ import contextlib
|
|
|
2
2
|
import threading
|
|
3
3
|
import typing as ta # noqa
|
|
4
4
|
|
|
5
|
-
import llama_cpp as lcc
|
|
6
|
-
|
|
7
5
|
from omlish import check
|
|
8
6
|
from omlish import lang
|
|
9
7
|
from omlish import typedvalues as tv
|
|
10
8
|
|
|
11
|
-
from .....backends import llamacpp as lcu
|
|
12
9
|
from ....chat.choices.services import ChatChoicesOutputs
|
|
13
|
-
from ....chat.stream.services import ChatChoicesStreamRequest
|
|
14
|
-
from ....chat.stream.services import ChatChoicesStreamResponse
|
|
15
|
-
from ....chat.stream.services import static_check_is_chat_choices_stream_service
|
|
16
|
-
from ....chat.stream.types import
|
|
17
|
-
from ....chat.stream.types import
|
|
18
|
-
from ....chat.stream.types import
|
|
10
|
+
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
11
|
+
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
12
|
+
from ....chat.choices.stream.services import static_check_is_chat_choices_stream_service
|
|
13
|
+
from ....chat.choices.stream.types import AiChoiceDeltas
|
|
14
|
+
from ....chat.choices.stream.types import AiChoicesDeltas
|
|
15
|
+
from ....chat.stream.types import ContentAiDelta
|
|
19
16
|
from ....configs import Config
|
|
20
17
|
from ....models.configs import ModelPath
|
|
21
18
|
from ....resources import UseResources
|
|
19
|
+
from ....stream.services import StreamResponseSink
|
|
22
20
|
from ....stream.services import new_stream_response
|
|
23
21
|
from .chat import LlamacppChatChoicesService
|
|
24
22
|
from .format import ROLES_MAP
|
|
25
23
|
from .format import get_msg_content
|
|
26
24
|
|
|
27
25
|
|
|
26
|
+
with lang.auto_proxy_import(globals()):
|
|
27
|
+
import llama_cpp as lcc
|
|
28
|
+
|
|
29
|
+
from .....backends import llamacpp as lcu
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
##
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# @omlish-manifest $.minichain.backends.strings.manifests.BackendStringsManifest(
|
|
36
|
+
# ['ChatChoicesStreamService'],
|
|
37
|
+
# 'llamacpp',
|
|
38
|
+
# )
|
|
39
|
+
|
|
40
|
+
|
|
28
41
|
##
|
|
29
42
|
|
|
30
43
|
|
|
@@ -49,10 +62,10 @@ class LlamacppChatChoicesStreamService(lang.ExitStacked):
|
|
|
49
62
|
verbose=False,
|
|
50
63
|
)))
|
|
51
64
|
|
|
52
|
-
def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
65
|
+
async def invoke(self, request: ChatChoicesStreamRequest) -> ChatChoicesStreamResponse:
|
|
53
66
|
lcu.install_logging_hook()
|
|
54
67
|
|
|
55
|
-
with UseResources.or_new(request.options) as rs:
|
|
68
|
+
async with UseResources.or_new(request.options) as rs:
|
|
56
69
|
rs.enter_context(self._lock)
|
|
57
70
|
|
|
58
71
|
model: ta.Any = self._load_model() # FIXME: the types are awful lol
|
|
@@ -74,19 +87,26 @@ class LlamacppChatChoicesStreamService(lang.ExitStacked):
|
|
|
74
87
|
|
|
75
88
|
rs.enter_context(lang.defer(close_output))
|
|
76
89
|
|
|
77
|
-
def
|
|
90
|
+
async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
|
|
91
|
+
last_role: ta.Any = None
|
|
92
|
+
|
|
78
93
|
for chunk in output:
|
|
79
94
|
check.state(chunk['object'] == 'chat.completion.chunk')
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
95
|
+
|
|
96
|
+
choice = check.single(chunk['choices'])
|
|
97
|
+
|
|
98
|
+
if not (delta := choice.get('delta', {})):
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
# FIXME: check role is assistant
|
|
102
|
+
if (role := delta.get('role')) != last_role:
|
|
103
|
+
last_role = role
|
|
104
|
+
|
|
105
|
+
# FIXME: stop reason
|
|
106
|
+
|
|
107
|
+
if (content := delta.get('content', '')):
|
|
108
|
+
await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiDelta(content)])]))
|
|
109
|
+
|
|
90
110
|
return None
|
|
91
111
|
|
|
92
|
-
return new_stream_response(rs,
|
|
112
|
+
return await new_stream_response(rs, inner)
|
|
@@ -21,13 +21,18 @@ from ...chat.messages import UserMessage
|
|
|
21
21
|
##
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
# TODO: generalize lol
|
|
25
|
+
class TooManyRequestsMistralError(Exception):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
24
29
|
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
25
30
|
# name='mistral',
|
|
26
31
|
# type='ChatChoicesService',
|
|
27
32
|
# )
|
|
28
33
|
@static_check_is_chat_choices_service
|
|
29
34
|
class MistralChatChoicesService:
|
|
30
|
-
model: ta.ClassVar[str] = 'mistral-
|
|
35
|
+
model: ta.ClassVar[str] = 'mistral-medium-2508'
|
|
31
36
|
|
|
32
37
|
ROLES_MAP: ta.ClassVar[ta.Mapping[type[Message], str]] = {
|
|
33
38
|
SystemMessage: 'system',
|
|
@@ -35,10 +40,16 @@ class MistralChatChoicesService:
|
|
|
35
40
|
AiMessage: 'assistant',
|
|
36
41
|
}
|
|
37
42
|
|
|
38
|
-
def __init__(
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
*,
|
|
46
|
+
api_key: str | None = None,
|
|
47
|
+
http_client: http.AsyncHttpClient | None = None,
|
|
48
|
+
) -> None:
|
|
39
49
|
super().__init__()
|
|
40
50
|
|
|
41
51
|
self._api_key = api_key
|
|
52
|
+
self._http_client = http_client
|
|
42
53
|
|
|
43
54
|
def _get_msg_content(self, m: Message) -> str | None:
|
|
44
55
|
if isinstance(m, AiMessage):
|
|
@@ -50,7 +61,7 @@ class MistralChatChoicesService:
|
|
|
50
61
|
else:
|
|
51
62
|
raise TypeError(m)
|
|
52
63
|
|
|
53
|
-
def invoke(
|
|
64
|
+
async def invoke(
|
|
54
65
|
self,
|
|
55
66
|
request: ChatChoicesRequest,
|
|
56
67
|
) -> ChatChoicesResponse:
|
|
@@ -68,7 +79,7 @@ class MistralChatChoicesService:
|
|
|
68
79
|
],
|
|
69
80
|
}
|
|
70
81
|
|
|
71
|
-
resp = http.
|
|
82
|
+
resp = await http.async_request(
|
|
72
83
|
'https://api.mistral.ai/v1/chat/completions',
|
|
73
84
|
method='POST',
|
|
74
85
|
data=json.dumps_compact(req_dct).encode('utf-8'),
|
|
@@ -77,11 +88,15 @@ class MistralChatChoicesService:
|
|
|
77
88
|
'Accept': 'application/json',
|
|
78
89
|
'Authorization': f'Bearer {key}',
|
|
79
90
|
},
|
|
91
|
+
client=self._http_client,
|
|
80
92
|
)
|
|
81
93
|
|
|
94
|
+
if resp.status == 429:
|
|
95
|
+
raise TooManyRequestsMistralError
|
|
96
|
+
|
|
82
97
|
resp_dct = json.loads(check.not_none(resp.data).decode('utf-8'))
|
|
83
98
|
|
|
84
99
|
return ChatChoicesResponse([
|
|
85
|
-
AiChoice(AiMessage(c['message']['content']))
|
|
100
|
+
AiChoice([AiMessage(c['message']['content'])])
|
|
86
101
|
for c in resp_dct['choices']
|
|
87
102
|
])
|