ommlds 0.0.0.dev480__py3-none-any.whl → 0.0.0.dev481__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ommlds/.omlish-manifests.json +18 -18
- ommlds/backends/llamacpp/logging.py +4 -1
- ommlds/backends/mlx/caching.py +7 -3
- ommlds/backends/mlx/cli.py +10 -7
- ommlds/backends/mlx/generation.py +18 -16
- ommlds/backends/mlx/limits.py +10 -6
- ommlds/backends/mlx/loading.py +7 -4
- ommlds/backends/transformers/__init__.py +14 -0
- ommlds/minichain/_dataclasses.py +46282 -0
- ommlds/minichain/backends/impls/duckduckgo/search.py +5 -1
- ommlds/minichain/backends/impls/huggingface/repos.py +1 -5
- ommlds/minichain/backends/impls/llamacpp/chat.py +6 -3
- ommlds/minichain/backends/impls/llamacpp/completion.py +7 -3
- ommlds/minichain/backends/impls/llamacpp/stream.py +6 -3
- ommlds/minichain/backends/impls/mlx/chat.py +6 -3
- ommlds/minichain/backends/impls/sentencepiece/tokens.py +9 -6
- ommlds/minichain/backends/impls/tinygrad/chat.py +7 -4
- ommlds/minichain/backends/impls/tokenizers/tokens.py +9 -6
- ommlds/minichain/backends/impls/transformers/sentence.py +5 -2
- ommlds/minichain/backends/impls/transformers/tokens.py +9 -6
- ommlds/minichain/backends/impls/transformers/transformers.py +10 -8
- ommlds/specs/mcp/clients.py +146 -0
- ommlds/specs/mcp/protocol.py +123 -18
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev481.dist-info}/METADATA +3 -3
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev481.dist-info}/RECORD +29 -27
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev481.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev481.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev481.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev480.dist-info → ommlds-0.0.0.dev481.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
from omlish import lang
|
|
2
2
|
|
|
3
3
|
from ....search import SearchHit
|
|
4
4
|
from ....search import SearchHits
|
|
@@ -7,6 +7,10 @@ from ....search import SearchResponse
|
|
|
7
7
|
from ....search import static_check_is_search_service
|
|
8
8
|
|
|
9
9
|
|
|
10
|
+
with lang.auto_proxy_import(globals()):
|
|
11
|
+
import ddgs
|
|
12
|
+
|
|
13
|
+
|
|
10
14
|
##
|
|
11
15
|
|
|
12
16
|
|
|
@@ -3,8 +3,6 @@ TODO:
|
|
|
3
3
|
- local-only check first
|
|
4
4
|
- cat ~/.cache/.../models/.../refs/main -> c5bfd839cd4cda0e5a39a97e00218d9c56e468af
|
|
5
5
|
"""
|
|
6
|
-
import typing as ta
|
|
7
|
-
|
|
8
6
|
from omlish import lang
|
|
9
7
|
|
|
10
8
|
from ....models.configs import ModelRepo
|
|
@@ -12,10 +10,8 @@ from ....models.repos.resolving import ModelRepoResolver
|
|
|
12
10
|
from ....models.repos.resolving import ResolvedModelRepo
|
|
13
11
|
|
|
14
12
|
|
|
15
|
-
|
|
13
|
+
with lang.auto_proxy_import(globals()):
|
|
16
14
|
import huggingface_hub as hf
|
|
17
|
-
else:
|
|
18
|
-
hf = lang.proxy_import('huggingface_hub')
|
|
19
15
|
|
|
20
16
|
|
|
21
17
|
##
|
|
@@ -2,13 +2,10 @@ import contextlib
|
|
|
2
2
|
import os.path
|
|
3
3
|
import typing as ta
|
|
4
4
|
|
|
5
|
-
import llama_cpp as lcc
|
|
6
|
-
|
|
7
5
|
from omlish import check
|
|
8
6
|
from omlish import lang
|
|
9
7
|
from omlish import typedvalues as tv
|
|
10
8
|
|
|
11
|
-
from .....backends import llamacpp as lcu
|
|
12
9
|
from ....chat.choices.services import ChatChoicesRequest
|
|
13
10
|
from ....chat.choices.services import ChatChoicesResponse
|
|
14
11
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
@@ -27,6 +24,12 @@ from .format import ROLES_MAP
|
|
|
27
24
|
from .format import get_msg_content
|
|
28
25
|
|
|
29
26
|
|
|
27
|
+
with lang.auto_proxy_import(globals()):
|
|
28
|
+
import llama_cpp as lcc
|
|
29
|
+
|
|
30
|
+
from .....backends import llamacpp as lcu
|
|
31
|
+
|
|
32
|
+
|
|
30
33
|
##
|
|
31
34
|
|
|
32
35
|
|
|
@@ -2,11 +2,9 @@ import contextlib
|
|
|
2
2
|
import os.path
|
|
3
3
|
import typing as ta
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
from omlish import lang
|
|
7
6
|
from omlish import typedvalues as tv
|
|
8
7
|
|
|
9
|
-
from .....backends import llamacpp as lcu
|
|
10
8
|
from ....completion import CompletionOption
|
|
11
9
|
from ....completion import CompletionRequest
|
|
12
10
|
from ....completion import CompletionResponse
|
|
@@ -18,6 +16,12 @@ from ....llms.types import Temperature
|
|
|
18
16
|
from ....models.configs import ModelPath
|
|
19
17
|
|
|
20
18
|
|
|
19
|
+
with lang.auto_proxy_import(globals()):
|
|
20
|
+
import llama_cpp as lcc
|
|
21
|
+
|
|
22
|
+
from .....backends import llamacpp as lcu
|
|
23
|
+
|
|
24
|
+
|
|
21
25
|
##
|
|
22
26
|
|
|
23
27
|
|
|
@@ -2,13 +2,10 @@ import contextlib
|
|
|
2
2
|
import threading
|
|
3
3
|
import typing as ta # noqa
|
|
4
4
|
|
|
5
|
-
import llama_cpp as lcc
|
|
6
|
-
|
|
7
5
|
from omlish import check
|
|
8
6
|
from omlish import lang
|
|
9
7
|
from omlish import typedvalues as tv
|
|
10
8
|
|
|
11
|
-
from .....backends import llamacpp as lcu
|
|
12
9
|
from ....chat.choices.services import ChatChoicesOutputs
|
|
13
10
|
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
14
11
|
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
@@ -26,6 +23,12 @@ from .format import ROLES_MAP
|
|
|
26
23
|
from .format import get_msg_content
|
|
27
24
|
|
|
28
25
|
|
|
26
|
+
with lang.auto_proxy_import(globals()):
|
|
27
|
+
import llama_cpp as lcc
|
|
28
|
+
|
|
29
|
+
from .....backends import llamacpp as lcu
|
|
30
|
+
|
|
31
|
+
|
|
29
32
|
##
|
|
30
33
|
|
|
31
34
|
|
|
@@ -5,7 +5,6 @@ from omlish import check
|
|
|
5
5
|
from omlish import lang
|
|
6
6
|
from omlish import typedvalues as tv
|
|
7
7
|
|
|
8
|
-
from .....backends import mlx as mlxu
|
|
9
8
|
from ....chat.choices.services import ChatChoicesOutputs
|
|
10
9
|
from ....chat.choices.services import ChatChoicesRequest
|
|
11
10
|
from ....chat.choices.services import ChatChoicesResponse
|
|
@@ -33,6 +32,10 @@ from ....stream.services import StreamResponseSink
|
|
|
33
32
|
from ....stream.services import new_stream_response
|
|
34
33
|
|
|
35
34
|
|
|
35
|
+
with lang.auto_proxy_import(globals()):
|
|
36
|
+
from .....backends import mlx as mlxu
|
|
37
|
+
|
|
38
|
+
|
|
36
39
|
##
|
|
37
40
|
|
|
38
41
|
|
|
@@ -83,7 +86,7 @@ class BaseMlxChatChoicesService(lang.ExitStacked):
|
|
|
83
86
|
raise TypeError(m)
|
|
84
87
|
|
|
85
88
|
@lang.cached_function(transient=True)
|
|
86
|
-
def _load_model(self) -> mlxu.LoadedModel:
|
|
89
|
+
def _load_model(self) -> 'mlxu.LoadedModel':
|
|
87
90
|
# FIXME: walk state, find all mx.arrays, dealloc/set to empty
|
|
88
91
|
check.not_none(self._exit_stack)
|
|
89
92
|
|
|
@@ -100,7 +103,7 @@ class BaseMlxChatChoicesService(lang.ExitStacked):
|
|
|
100
103
|
)
|
|
101
104
|
|
|
102
105
|
@lang.cached_function(transient=True)
|
|
103
|
-
def _get_tokenizer(self) -> mlxu.tokenization.Tokenizer:
|
|
106
|
+
def _get_tokenizer(self) -> 'mlxu.tokenization.Tokenizer':
|
|
104
107
|
tokenizer = self._load_model().tokenization.tokenizer
|
|
105
108
|
|
|
106
109
|
if not (
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import sentencepiece as spm
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
4
|
+
from omlish import lang
|
|
6
5
|
|
|
7
6
|
from .... import tokens as tks
|
|
8
7
|
|
|
9
8
|
|
|
9
|
+
with lang.auto_proxy_import(globals()):
|
|
10
|
+
import sentencepiece as spm
|
|
11
|
+
|
|
12
|
+
|
|
10
13
|
##
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def build_vocab(spm_tokenizer: spm.SentencePieceProcessor) -> tks.Vocab:
|
|
16
|
+
def build_vocab(spm_tokenizer: 'spm.SentencePieceProcessor') -> tks.Vocab:
|
|
14
17
|
return tks.Vocab([
|
|
15
18
|
(ta.cast(tks.Token, i), tks.TokenStr(spm_tokenizer.id_to_piece(i))) # noqa
|
|
16
19
|
for i in range(spm_tokenizer.get_piece_size()) # noqa
|
|
17
20
|
])
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def build_specials(spm_tokenizer: spm.SentencePieceProcessor) -> tks.SpecialTokens:
|
|
23
|
+
def build_specials(spm_tokenizer: 'spm.SentencePieceProcessor') -> tks.SpecialTokens:
|
|
21
24
|
# FIXME
|
|
22
25
|
return tks.SpecialTokens([])
|
|
23
26
|
|
|
@@ -28,7 +31,7 @@ def build_specials(spm_tokenizer: spm.SentencePieceProcessor) -> tks.SpecialToke
|
|
|
28
31
|
class SentencepieceTokenizer(tks.BaseTokenizer):
|
|
29
32
|
def __init__(
|
|
30
33
|
self,
|
|
31
|
-
spm_tokenizer: spm.SentencePieceProcessor,
|
|
34
|
+
spm_tokenizer: 'spm.SentencePieceProcessor',
|
|
32
35
|
) -> None:
|
|
33
36
|
self._spm_tokenizer = check.isinstance(spm_tokenizer, spm.SentencePieceProcessor)
|
|
34
37
|
|
|
@@ -38,7 +41,7 @@ class SentencepieceTokenizer(tks.BaseTokenizer):
|
|
|
38
41
|
)
|
|
39
42
|
|
|
40
43
|
@property
|
|
41
|
-
def spm_tokenizer(self) -> spm.SentencePieceProcessor:
|
|
44
|
+
def spm_tokenizer(self) -> 'spm.SentencePieceProcessor':
|
|
42
45
|
return self._spm_tokenizer
|
|
43
46
|
|
|
44
47
|
#
|
|
@@ -4,7 +4,6 @@ import typing as ta
|
|
|
4
4
|
from omlish import check
|
|
5
5
|
from omlish import lang
|
|
6
6
|
|
|
7
|
-
from .....backends.tinygrad.models import llama3 as tgl3
|
|
8
7
|
from ....chat.choices.services import ChatChoicesOptions
|
|
9
8
|
from ....chat.choices.services import ChatChoicesRequest
|
|
10
9
|
from ....chat.choices.services import ChatChoicesResponse
|
|
@@ -28,6 +27,10 @@ from ....stream.services import StreamResponseSink
|
|
|
28
27
|
from ....stream.services import new_stream_response
|
|
29
28
|
|
|
30
29
|
|
|
30
|
+
with lang.auto_proxy_import(globals()):
|
|
31
|
+
from .....backends.tinygrad.models import llama3 as tgl3
|
|
32
|
+
|
|
33
|
+
|
|
31
34
|
##
|
|
32
35
|
|
|
33
36
|
|
|
@@ -39,7 +42,7 @@ def _load_model(
|
|
|
39
42
|
*,
|
|
40
43
|
size: str | None = None,
|
|
41
44
|
temperature: float | None = None,
|
|
42
|
-
) -> tgl3.Llama3Llm:
|
|
45
|
+
) -> 'tgl3.Llama3Llm':
|
|
43
46
|
if size is None:
|
|
44
47
|
size = DEFAULT_SIZE
|
|
45
48
|
if temperature is None:
|
|
@@ -58,7 +61,7 @@ def _load_model(
|
|
|
58
61
|
|
|
59
62
|
|
|
60
63
|
def _prepare_toks(
|
|
61
|
-
llm: tgl3.Llama3Llm,
|
|
64
|
+
llm: 'tgl3.Llama3Llm',
|
|
62
65
|
chat: Chat,
|
|
63
66
|
options: ta.Sequence[ChatChoicesOptions],
|
|
64
67
|
) -> list[int]:
|
|
@@ -100,7 +103,7 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
|
|
|
100
103
|
self._temperature = temperature
|
|
101
104
|
|
|
102
105
|
@lang.cached_function(transient=True)
|
|
103
|
-
def _load_model(self) -> tgl3.Llama3Llm:
|
|
106
|
+
def _load_model(self) -> 'tgl3.Llama3Llm':
|
|
104
107
|
check.not_none(self._exit_stack)
|
|
105
108
|
|
|
106
109
|
return _load_model(
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import tokenizers as tos
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
4
|
+
from omlish import lang
|
|
6
5
|
|
|
7
6
|
from .... import tokens as tks
|
|
8
7
|
|
|
9
8
|
|
|
9
|
+
with lang.auto_proxy_import(globals()):
|
|
10
|
+
import tokenizers as tos
|
|
11
|
+
|
|
12
|
+
|
|
10
13
|
##
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def build_vocab(tos_tokenizer: tos.Tokenizer) -> tks.Vocab:
|
|
16
|
+
def build_vocab(tos_tokenizer: 'tos.Tokenizer') -> tks.Vocab:
|
|
14
17
|
return tks.Vocab([
|
|
15
18
|
(ta.cast(tks.Token, i), tks.TokenStr(s))
|
|
16
19
|
for s, i in tos_tokenizer.get_vocab().items()
|
|
17
20
|
])
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
23
|
+
def build_specials(tos_tokenizer: 'tos.Tokenizer') -> tks.SpecialTokens:
|
|
21
24
|
# FIXME
|
|
22
25
|
return tks.SpecialTokens([])
|
|
23
26
|
|
|
@@ -28,7 +31,7 @@ def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
|
28
31
|
class TokenizersTokenizer(tks.BaseTokenizer):
|
|
29
32
|
def __init__(
|
|
30
33
|
self,
|
|
31
|
-
tos_tokenizer: tos.Tokenizer,
|
|
34
|
+
tos_tokenizer: 'tos.Tokenizer',
|
|
32
35
|
) -> None:
|
|
33
36
|
self._tos_tokenizer = check.isinstance(tos_tokenizer, tos.Tokenizer)
|
|
34
37
|
|
|
@@ -38,7 +41,7 @@ class TokenizersTokenizer(tks.BaseTokenizer):
|
|
|
38
41
|
)
|
|
39
42
|
|
|
40
43
|
@property
|
|
41
|
-
def tos_tokenizer(self) -> tos.Tokenizer:
|
|
44
|
+
def tos_tokenizer(self) -> 'tos.Tokenizer':
|
|
42
45
|
return self._tos_tokenizer
|
|
43
46
|
|
|
44
47
|
#
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
from omlish import lang
|
|
5
4
|
from omlish import typedvalues as tv
|
|
6
5
|
|
|
7
6
|
from ....configs import Config
|
|
@@ -13,6 +12,10 @@ from ....vectors.embeddings import static_check_is_embedding_service
|
|
|
13
12
|
from ....vectors.types import Vector
|
|
14
13
|
|
|
15
14
|
|
|
15
|
+
with lang.auto_proxy_import(globals()):
|
|
16
|
+
import sentence_transformers as stfm
|
|
17
|
+
|
|
18
|
+
|
|
16
19
|
##
|
|
17
20
|
|
|
18
21
|
|
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import transformers as tfm
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
6
4
|
from omlish import collections as col
|
|
5
|
+
from omlish import lang
|
|
7
6
|
|
|
8
7
|
from .... import tokens as tks
|
|
9
8
|
|
|
10
9
|
|
|
10
|
+
with lang.auto_proxy_import(globals()):
|
|
11
|
+
import transformers as tfm
|
|
12
|
+
|
|
13
|
+
|
|
11
14
|
##
|
|
12
15
|
|
|
13
16
|
|
|
14
|
-
def build_vocab(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.Vocab:
|
|
17
|
+
def build_vocab(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.Vocab:
|
|
15
18
|
return tks.Vocab([
|
|
16
19
|
(ta.cast(tks.Token, i), tks.TokenStr(s))
|
|
17
20
|
for s, i in tfm_tokenizer.get_vocab().items()
|
|
@@ -32,7 +35,7 @@ SPECIAL_TOKEN_ATTR_MAP: col.BiMap[type[tks.SpecialToken], str] = col.make_bi_map
|
|
|
32
35
|
})
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTokens:
|
|
38
|
+
def build_specials(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.SpecialTokens:
|
|
36
39
|
return tks.SpecialTokens.from_dict({
|
|
37
40
|
st: getattr(tfm_tokenizer, a)
|
|
38
41
|
for st, a in SPECIAL_TOKEN_ATTR_MAP.items()
|
|
@@ -45,7 +48,7 @@ def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTok
|
|
|
45
48
|
class TransformersTokenizer(tks.BaseTokenizer):
|
|
46
49
|
def __init__(
|
|
47
50
|
self,
|
|
48
|
-
tfm_tokenizer: tfm.PreTrainedTokenizerBase,
|
|
51
|
+
tfm_tokenizer: 'tfm.PreTrainedTokenizerBase',
|
|
49
52
|
) -> None:
|
|
50
53
|
self._tfm_tokenizer = check.isinstance(tfm_tokenizer, tfm.PreTrainedTokenizerBase)
|
|
51
54
|
|
|
@@ -55,7 +58,7 @@ class TransformersTokenizer(tks.BaseTokenizer):
|
|
|
55
58
|
)
|
|
56
59
|
|
|
57
60
|
@property
|
|
58
|
-
def tfm_tokenizer(self) -> tfm.PreTrainedTokenizerBase:
|
|
61
|
+
def tfm_tokenizer(self) -> 'tfm.PreTrainedTokenizerBase':
|
|
59
62
|
return self._tfm_tokenizer
|
|
60
63
|
|
|
61
64
|
#
|
|
@@ -7,15 +7,11 @@ import sys
|
|
|
7
7
|
import threading
|
|
8
8
|
import typing as ta
|
|
9
9
|
|
|
10
|
-
import transformers as tfm
|
|
11
|
-
|
|
12
10
|
from omlish import check
|
|
13
11
|
from omlish import lang
|
|
14
12
|
from omlish import typedvalues as tv
|
|
15
13
|
from omlish.asyncs.asyncio.sync import AsyncioBufferRelay
|
|
16
14
|
|
|
17
|
-
from .....backends.transformers.filecache import file_cache_patch_context
|
|
18
|
-
from .....backends.transformers.streamers import CancellableTextStreamer
|
|
19
15
|
from ....chat.choices.services import ChatChoicesRequest
|
|
20
16
|
from ....chat.choices.services import ChatChoicesResponse
|
|
21
17
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
@@ -44,6 +40,12 @@ from ....stream.services import new_stream_response
|
|
|
44
40
|
from ...impls.huggingface.configs import HuggingfaceHubToken
|
|
45
41
|
|
|
46
42
|
|
|
43
|
+
with lang.auto_proxy_import(globals()):
|
|
44
|
+
import transformers as tfm
|
|
45
|
+
|
|
46
|
+
from .....backends import transformers as tfm_u
|
|
47
|
+
|
|
48
|
+
|
|
47
49
|
##
|
|
48
50
|
|
|
49
51
|
|
|
@@ -168,7 +170,7 @@ class BaseTransformersChatChoicesService(lang.ExitStacked):
|
|
|
168
170
|
self._huggingface_hub_token = HuggingfaceHubToken.pop_secret(cc, env='HUGGINGFACE_HUB_TOKEN')
|
|
169
171
|
|
|
170
172
|
@lang.cached_function(transient=True)
|
|
171
|
-
def _load_pipeline(self) -> tfm.Pipeline:
|
|
173
|
+
def _load_pipeline(self) -> 'tfm.Pipeline':
|
|
172
174
|
# FIXME: unload
|
|
173
175
|
check.not_none(self._exit_stack)
|
|
174
176
|
|
|
@@ -181,7 +183,7 @@ class BaseTransformersChatChoicesService(lang.ExitStacked):
|
|
|
181
183
|
for pkw_cfg in self._pipeline_kwargs:
|
|
182
184
|
pkw.update(pkw_cfg.v)
|
|
183
185
|
|
|
184
|
-
with file_cache_patch_context(
|
|
186
|
+
with tfm_u.file_cache_patch_context(
|
|
185
187
|
local_first=True,
|
|
186
188
|
local_config_present_is_authoritative=True,
|
|
187
189
|
):
|
|
@@ -246,7 +248,7 @@ class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
|
|
|
246
248
|
if text or stream_end:
|
|
247
249
|
relay.push(text, *([None] if stream_end else []))
|
|
248
250
|
|
|
249
|
-
streamer = CancellableTextStreamer(
|
|
251
|
+
streamer = tfm_u.CancellableTextStreamer(
|
|
250
252
|
check.not_none(pipeline.tokenizer), # type: ignore[arg-type]
|
|
251
253
|
streamer_callback, # noqa
|
|
252
254
|
skip_prompt=True,
|
|
@@ -255,7 +257,7 @@ class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
|
|
|
255
257
|
|
|
256
258
|
async with UseResources.or_new(request.options) as rs:
|
|
257
259
|
thread = threading.Thread(
|
|
258
|
-
target=CancellableTextStreamer.ignoring_cancelled(pipeline),
|
|
260
|
+
target=tfm_u.CancellableTextStreamer.ignoring_cancelled(pipeline),
|
|
259
261
|
args=(
|
|
260
262
|
inputs,
|
|
261
263
|
),
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import subprocess
|
|
3
|
+
import typing as ta
|
|
4
|
+
|
|
5
|
+
import anyio.abc
|
|
6
|
+
|
|
7
|
+
from omlish import check
|
|
8
|
+
from omlish import dataclasses as dc
|
|
9
|
+
from omlish import marshal as msh
|
|
10
|
+
from omlish.asyncs import anyio as aiu
|
|
11
|
+
from omlish.specs import jsonrpc as jr
|
|
12
|
+
|
|
13
|
+
from . import protocol as pt
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
##
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class McpServerConnection:
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
tg: anyio.abc.TaskGroup,
|
|
23
|
+
stream: anyio.abc.ByteStream,
|
|
24
|
+
*,
|
|
25
|
+
default_timeout: float | None = 30.,
|
|
26
|
+
) -> None:
|
|
27
|
+
super().__init__()
|
|
28
|
+
|
|
29
|
+
self._conn = jr.Connection(
|
|
30
|
+
tg,
|
|
31
|
+
stream,
|
|
32
|
+
request_handler=self._handle_client_request,
|
|
33
|
+
notification_handler=self._handle_client_notification,
|
|
34
|
+
default_timeout=default_timeout,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
#
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def from_process(
|
|
41
|
+
cls,
|
|
42
|
+
tg: anyio.abc.TaskGroup,
|
|
43
|
+
proc: anyio.abc.Process,
|
|
44
|
+
**kwargs: ta.Any,
|
|
45
|
+
) -> 'McpServerConnection':
|
|
46
|
+
return cls(
|
|
47
|
+
tg,
|
|
48
|
+
aiu.StapledByteStream(
|
|
49
|
+
check.not_none(proc.stdin),
|
|
50
|
+
check.not_none(proc.stdout),
|
|
51
|
+
),
|
|
52
|
+
**kwargs,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def open_process(
|
|
57
|
+
cls,
|
|
58
|
+
tg: anyio.abc.TaskGroup,
|
|
59
|
+
cmd: ta.Sequence[str],
|
|
60
|
+
open_kwargs: ta.Mapping[str, ta.Any] | None = None,
|
|
61
|
+
**kwargs: ta.Any,
|
|
62
|
+
) -> ta.AsyncContextManager[tuple[anyio.abc.Process, 'McpServerConnection']]:
|
|
63
|
+
@contextlib.asynccontextmanager
|
|
64
|
+
async def inner():
|
|
65
|
+
async with await anyio.open_process(
|
|
66
|
+
cmd,
|
|
67
|
+
stdin=subprocess.PIPE,
|
|
68
|
+
stdout=subprocess.PIPE,
|
|
69
|
+
**open_kwargs or {},
|
|
70
|
+
) as proc:
|
|
71
|
+
async with cls.from_process(
|
|
72
|
+
tg,
|
|
73
|
+
proc,
|
|
74
|
+
**kwargs,
|
|
75
|
+
) as client:
|
|
76
|
+
yield (proc, client)
|
|
77
|
+
|
|
78
|
+
return inner()
|
|
79
|
+
|
|
80
|
+
#
|
|
81
|
+
|
|
82
|
+
async def __aenter__(self) -> 'McpServerConnection':
|
|
83
|
+
await self._conn.__aenter__()
|
|
84
|
+
return self
|
|
85
|
+
|
|
86
|
+
async def __aexit__(self, et, e, tb) -> None:
|
|
87
|
+
await self._conn.__aexit__(et, e, tb)
|
|
88
|
+
|
|
89
|
+
#
|
|
90
|
+
|
|
91
|
+
async def _handle_client_request(self, _client: jr.Connection, req: jr.Request) -> None:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
async def _handle_client_notification(self, _client: jr.Connection, no: jr.Request) -> None:
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
#
|
|
98
|
+
|
|
99
|
+
async def request(self, req: pt.ClientRequest[pt.ClientResultT]) -> pt.ClientResultT:
|
|
100
|
+
res_cls = pt.MESSAGE_TYPES_BY_JSON_RPC_METHOD_NAME[pt.ClientResult][req.json_rpc_method_name] # type: ignore[type-abstract] # noqa
|
|
101
|
+
req_mv = msh.marshal(req)
|
|
102
|
+
res_mv = await self._conn.request(req.json_rpc_method_name, req_mv) # type: ignore[arg-type]
|
|
103
|
+
res = msh.unmarshal(res_mv, res_cls)
|
|
104
|
+
return ta.cast(pt.ClientResultT, res)
|
|
105
|
+
|
|
106
|
+
async def notify(self, no: pt.Notification) -> None:
|
|
107
|
+
no_mv = msh.marshal(no)
|
|
108
|
+
await self._conn.notify(no.json_rpc_method_name, no_mv) # type: ignore[arg-type]
|
|
109
|
+
|
|
110
|
+
#
|
|
111
|
+
|
|
112
|
+
async def yield_cursor_request(
|
|
113
|
+
self,
|
|
114
|
+
req: pt.CursorClientRequest[pt.CursorClientResultT],
|
|
115
|
+
) -> ta.AsyncGenerator[pt.CursorClientResultT]:
|
|
116
|
+
check.none(req.cursor)
|
|
117
|
+
|
|
118
|
+
cursor: str | None = None
|
|
119
|
+
while True:
|
|
120
|
+
res = await self.request(dc.replace(req, cursor=cursor)) # noqa
|
|
121
|
+
yield res
|
|
122
|
+
|
|
123
|
+
if (cursor := res.next_cursor) is None:
|
|
124
|
+
break
|
|
125
|
+
|
|
126
|
+
async def list_cursor_request(
|
|
127
|
+
self,
|
|
128
|
+
req: pt.CursorClientRequest[pt.CursorClientResultT],
|
|
129
|
+
) -> list[pt.CursorClientResultT]:
|
|
130
|
+
return [res async for res in self.yield_cursor_request(req)]
|
|
131
|
+
|
|
132
|
+
#
|
|
133
|
+
|
|
134
|
+
async def list_tools(self) -> list[pt.Tool]:
|
|
135
|
+
return [
|
|
136
|
+
tool
|
|
137
|
+
async for res in self.yield_cursor_request(pt.ListToolsRequest())
|
|
138
|
+
for tool in res.tools
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
async def list_prompts(self) -> list[pt.Prompt]:
|
|
142
|
+
return [
|
|
143
|
+
prompt
|
|
144
|
+
async for res in self.yield_cursor_request(pt.ListPromptsRequest())
|
|
145
|
+
for prompt in res.prompts
|
|
146
|
+
]
|