ommlds 0.0.0.dev479__py3-none-any.whl → 0.0.0.dev481__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ommlds/.omlish-manifests.json +40 -23
- ommlds/__about__.py +1 -1
- ommlds/backends/llamacpp/logging.py +4 -1
- ommlds/backends/mlx/caching.py +7 -3
- ommlds/backends/mlx/cli.py +10 -7
- ommlds/backends/mlx/generation.py +18 -16
- ommlds/backends/mlx/limits.py +10 -6
- ommlds/backends/mlx/loading.py +7 -4
- ommlds/backends/tavily/__init__.py +0 -0
- ommlds/backends/tavily/protocol.py +301 -0
- ommlds/backends/transformers/__init__.py +14 -0
- ommlds/minichain/__init__.py +1 -0
- ommlds/minichain/_dataclasses.py +46282 -0
- ommlds/minichain/backends/impls/anthropic/chat.py +23 -4
- ommlds/minichain/backends/impls/duckduckgo/search.py +5 -1
- ommlds/minichain/backends/impls/huggingface/repos.py +1 -5
- ommlds/minichain/backends/impls/llamacpp/chat.py +6 -3
- ommlds/minichain/backends/impls/llamacpp/completion.py +7 -3
- ommlds/minichain/backends/impls/llamacpp/stream.py +6 -3
- ommlds/minichain/backends/impls/mlx/chat.py +6 -3
- ommlds/minichain/backends/impls/openai/format.py +2 -0
- ommlds/minichain/backends/impls/openai/names.py +3 -1
- ommlds/minichain/backends/impls/sentencepiece/tokens.py +9 -6
- ommlds/minichain/backends/impls/tavily.py +66 -0
- ommlds/minichain/backends/impls/tinygrad/chat.py +7 -4
- ommlds/minichain/backends/impls/tokenizers/tokens.py +9 -6
- ommlds/minichain/backends/impls/transformers/sentence.py +5 -2
- ommlds/minichain/backends/impls/transformers/tokens.py +9 -6
- ommlds/minichain/backends/impls/transformers/transformers.py +10 -8
- ommlds/minichain/llms/types.py +4 -0
- ommlds/minichain/search.py +1 -1
- ommlds/minichain/standard.py +1 -0
- ommlds/specs/__init__.py +0 -0
- ommlds/specs/mcp/__init__.py +0 -0
- ommlds/specs/mcp/_marshal.py +23 -0
- ommlds/specs/mcp/clients.py +146 -0
- ommlds/specs/mcp/protocol.py +371 -0
- {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/METADATA +5 -5
- {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/RECORD +43 -34
- {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/WHEEL +0 -0
- {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/entry_points.txt +0 -0
- {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/licenses/LICENSE +0 -0
- {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,7 @@ from ....chat.choices.services import ChatChoicesRequest
|
|
|
17
17
|
from ....chat.choices.services import ChatChoicesResponse
|
|
18
18
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
19
19
|
from ....chat.choices.types import AiChoice
|
|
20
|
+
from ....chat.choices.types import ChatChoicesOptions
|
|
20
21
|
from ....chat.messages import AiMessage
|
|
21
22
|
from ....chat.messages import AnyAiMessage
|
|
22
23
|
from ....chat.messages import Message
|
|
@@ -24,9 +25,12 @@ from ....chat.messages import SystemMessage
|
|
|
24
25
|
from ....chat.messages import ToolUseMessage
|
|
25
26
|
from ....chat.messages import UserMessage
|
|
26
27
|
from ....chat.tools.types import Tool
|
|
28
|
+
from ....llms.types import MaxTokens
|
|
29
|
+
from ....llms.types import Temperature
|
|
27
30
|
from ....models.configs import ModelName
|
|
28
31
|
from ....standard import ApiKey
|
|
29
32
|
from ....tools.types import ToolUse
|
|
33
|
+
from ....types import Option
|
|
30
34
|
from .names import MODEL_NAMES
|
|
31
35
|
from .protocol import build_protocol_chat_messages
|
|
32
36
|
from .protocol import build_protocol_tool
|
|
@@ -67,16 +71,31 @@ class AnthropicChatChoicesService:
|
|
|
67
71
|
else:
|
|
68
72
|
raise TypeError(m)
|
|
69
73
|
|
|
74
|
+
DEFAULT_OPTIONS: ta.ClassVar[tv.TypedValues[Option]] = tv.TypedValues[Option](
|
|
75
|
+
MaxTokens(4096),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
_OPTION_KWARG_NAMES_MAP: ta.ClassVar[ta.Mapping[str, type[ChatChoicesOptions]]] = dict(
|
|
79
|
+
temperature=Temperature,
|
|
80
|
+
max_tokens=MaxTokens,
|
|
81
|
+
)
|
|
82
|
+
|
|
70
83
|
async def invoke(
|
|
71
84
|
self,
|
|
72
85
|
request: ChatChoicesRequest,
|
|
73
|
-
*,
|
|
74
|
-
max_tokens: int = 4096, # FIXME: ChatOption
|
|
75
86
|
) -> ChatChoicesResponse:
|
|
76
87
|
messages, system = build_protocol_chat_messages(request.v)
|
|
77
88
|
|
|
89
|
+
kwargs: dict = dict()
|
|
90
|
+
|
|
78
91
|
tools: list[pt.ToolSpec] = []
|
|
79
|
-
with tv.TypedValues(
|
|
92
|
+
with tv.TypedValues(
|
|
93
|
+
*self.DEFAULT_OPTIONS,
|
|
94
|
+
*request.options,
|
|
95
|
+
override=True,
|
|
96
|
+
).consume() as oc:
|
|
97
|
+
kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
|
|
98
|
+
|
|
80
99
|
t: Tool
|
|
81
100
|
for t in oc.pop(Tool, []):
|
|
82
101
|
tools.append(build_protocol_tool(t))
|
|
@@ -86,7 +105,7 @@ class AnthropicChatChoicesService:
|
|
|
86
105
|
system=system,
|
|
87
106
|
messages=messages,
|
|
88
107
|
tools=tools or None,
|
|
89
|
-
|
|
108
|
+
**kwargs,
|
|
90
109
|
)
|
|
91
110
|
|
|
92
111
|
raw_request = msh.marshal(a_req)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
from omlish import lang
|
|
2
2
|
|
|
3
3
|
from ....search import SearchHit
|
|
4
4
|
from ....search import SearchHits
|
|
@@ -7,6 +7,10 @@ from ....search import SearchResponse
|
|
|
7
7
|
from ....search import static_check_is_search_service
|
|
8
8
|
|
|
9
9
|
|
|
10
|
+
with lang.auto_proxy_import(globals()):
|
|
11
|
+
import ddgs
|
|
12
|
+
|
|
13
|
+
|
|
10
14
|
##
|
|
11
15
|
|
|
12
16
|
|
|
@@ -3,8 +3,6 @@ TODO:
|
|
|
3
3
|
- local-only check first
|
|
4
4
|
- cat ~/.cache/.../models/.../refs/main -> c5bfd839cd4cda0e5a39a97e00218d9c56e468af
|
|
5
5
|
"""
|
|
6
|
-
import typing as ta
|
|
7
|
-
|
|
8
6
|
from omlish import lang
|
|
9
7
|
|
|
10
8
|
from ....models.configs import ModelRepo
|
|
@@ -12,10 +10,8 @@ from ....models.repos.resolving import ModelRepoResolver
|
|
|
12
10
|
from ....models.repos.resolving import ResolvedModelRepo
|
|
13
11
|
|
|
14
12
|
|
|
15
|
-
|
|
13
|
+
with lang.auto_proxy_import(globals()):
|
|
16
14
|
import huggingface_hub as hf
|
|
17
|
-
else:
|
|
18
|
-
hf = lang.proxy_import('huggingface_hub')
|
|
19
15
|
|
|
20
16
|
|
|
21
17
|
##
|
|
@@ -2,13 +2,10 @@ import contextlib
|
|
|
2
2
|
import os.path
|
|
3
3
|
import typing as ta
|
|
4
4
|
|
|
5
|
-
import llama_cpp as lcc
|
|
6
|
-
|
|
7
5
|
from omlish import check
|
|
8
6
|
from omlish import lang
|
|
9
7
|
from omlish import typedvalues as tv
|
|
10
8
|
|
|
11
|
-
from .....backends import llamacpp as lcu
|
|
12
9
|
from ....chat.choices.services import ChatChoicesRequest
|
|
13
10
|
from ....chat.choices.services import ChatChoicesResponse
|
|
14
11
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
@@ -27,6 +24,12 @@ from .format import ROLES_MAP
|
|
|
27
24
|
from .format import get_msg_content
|
|
28
25
|
|
|
29
26
|
|
|
27
|
+
with lang.auto_proxy_import(globals()):
|
|
28
|
+
import llama_cpp as lcc
|
|
29
|
+
|
|
30
|
+
from .....backends import llamacpp as lcu
|
|
31
|
+
|
|
32
|
+
|
|
30
33
|
##
|
|
31
34
|
|
|
32
35
|
|
|
@@ -2,11 +2,9 @@ import contextlib
|
|
|
2
2
|
import os.path
|
|
3
3
|
import typing as ta
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
from omlish import lang
|
|
7
6
|
from omlish import typedvalues as tv
|
|
8
7
|
|
|
9
|
-
from .....backends import llamacpp as lcu
|
|
10
8
|
from ....completion import CompletionOption
|
|
11
9
|
from ....completion import CompletionRequest
|
|
12
10
|
from ....completion import CompletionResponse
|
|
@@ -18,6 +16,12 @@ from ....llms.types import Temperature
|
|
|
18
16
|
from ....models.configs import ModelPath
|
|
19
17
|
|
|
20
18
|
|
|
19
|
+
with lang.auto_proxy_import(globals()):
|
|
20
|
+
import llama_cpp as lcc
|
|
21
|
+
|
|
22
|
+
from .....backends import llamacpp as lcu
|
|
23
|
+
|
|
24
|
+
|
|
21
25
|
##
|
|
22
26
|
|
|
23
27
|
|
|
@@ -2,13 +2,10 @@ import contextlib
|
|
|
2
2
|
import threading
|
|
3
3
|
import typing as ta # noqa
|
|
4
4
|
|
|
5
|
-
import llama_cpp as lcc
|
|
6
|
-
|
|
7
5
|
from omlish import check
|
|
8
6
|
from omlish import lang
|
|
9
7
|
from omlish import typedvalues as tv
|
|
10
8
|
|
|
11
|
-
from .....backends import llamacpp as lcu
|
|
12
9
|
from ....chat.choices.services import ChatChoicesOutputs
|
|
13
10
|
from ....chat.choices.stream.services import ChatChoicesStreamRequest
|
|
14
11
|
from ....chat.choices.stream.services import ChatChoicesStreamResponse
|
|
@@ -26,6 +23,12 @@ from .format import ROLES_MAP
|
|
|
26
23
|
from .format import get_msg_content
|
|
27
24
|
|
|
28
25
|
|
|
26
|
+
with lang.auto_proxy_import(globals()):
|
|
27
|
+
import llama_cpp as lcc
|
|
28
|
+
|
|
29
|
+
from .....backends import llamacpp as lcu
|
|
30
|
+
|
|
31
|
+
|
|
29
32
|
##
|
|
30
33
|
|
|
31
34
|
|
|
@@ -5,7 +5,6 @@ from omlish import check
|
|
|
5
5
|
from omlish import lang
|
|
6
6
|
from omlish import typedvalues as tv
|
|
7
7
|
|
|
8
|
-
from .....backends import mlx as mlxu
|
|
9
8
|
from ....chat.choices.services import ChatChoicesOutputs
|
|
10
9
|
from ....chat.choices.services import ChatChoicesRequest
|
|
11
10
|
from ....chat.choices.services import ChatChoicesResponse
|
|
@@ -33,6 +32,10 @@ from ....stream.services import StreamResponseSink
|
|
|
33
32
|
from ....stream.services import new_stream_response
|
|
34
33
|
|
|
35
34
|
|
|
35
|
+
with lang.auto_proxy_import(globals()):
|
|
36
|
+
from .....backends import mlx as mlxu
|
|
37
|
+
|
|
38
|
+
|
|
36
39
|
##
|
|
37
40
|
|
|
38
41
|
|
|
@@ -83,7 +86,7 @@ class BaseMlxChatChoicesService(lang.ExitStacked):
|
|
|
83
86
|
raise TypeError(m)
|
|
84
87
|
|
|
85
88
|
@lang.cached_function(transient=True)
|
|
86
|
-
def _load_model(self) -> mlxu.LoadedModel:
|
|
89
|
+
def _load_model(self) -> 'mlxu.LoadedModel':
|
|
87
90
|
# FIXME: walk state, find all mx.arrays, dealloc/set to empty
|
|
88
91
|
check.not_none(self._exit_stack)
|
|
89
92
|
|
|
@@ -100,7 +103,7 @@ class BaseMlxChatChoicesService(lang.ExitStacked):
|
|
|
100
103
|
)
|
|
101
104
|
|
|
102
105
|
@lang.cached_function(transient=True)
|
|
103
|
-
def _get_tokenizer(self) -> mlxu.tokenization.Tokenizer:
|
|
106
|
+
def _get_tokenizer(self) -> 'mlxu.tokenization.Tokenizer':
|
|
104
107
|
tokenizer = self._load_model().tokenization.tokenizer
|
|
105
108
|
|
|
106
109
|
if not (
|
|
@@ -23,6 +23,7 @@ from ....chat.stream.types import PartialToolUseAiDelta
|
|
|
23
23
|
from ....chat.tools.types import Tool
|
|
24
24
|
from ....content.json import JsonContent
|
|
25
25
|
from ....content.prepare import prepare_content_str
|
|
26
|
+
from ....llms.types import MaxCompletionTokens
|
|
26
27
|
from ....llms.types import MaxTokens
|
|
27
28
|
from ....llms.types import Temperature
|
|
28
29
|
from ....llms.types import TokenUsage
|
|
@@ -173,6 +174,7 @@ class OpenaiChatRequestHandler:
|
|
|
173
174
|
_OPTION_KWARG_NAMES_MAP: ta.ClassVar[ta.Mapping[str, type[ChatChoicesOptions]]] = dict(
|
|
174
175
|
temperature=Temperature,
|
|
175
176
|
max_tokens=MaxTokens,
|
|
177
|
+
max_completion_tokens=MaxCompletionTokens,
|
|
176
178
|
)
|
|
177
179
|
|
|
178
180
|
class _ProcessedOptions(ta.NamedTuple):
|
|
@@ -30,6 +30,8 @@ _GPT_MODEL_NAMES = [
|
|
|
30
30
|
'gpt-5-chat-latest',
|
|
31
31
|
'gpt-5-mini',
|
|
32
32
|
'gpt-5-nano',
|
|
33
|
+
|
|
34
|
+
'gpt-5.1',
|
|
33
35
|
]
|
|
34
36
|
|
|
35
37
|
|
|
@@ -46,7 +48,7 @@ CHAT_MODEL_NAMES = ModelNameCollection(
|
|
|
46
48
|
for n in _GPT_MODEL_NAMES
|
|
47
49
|
},
|
|
48
50
|
|
|
49
|
-
'gpt': 'gpt-5',
|
|
51
|
+
'gpt': 'gpt-5.1',
|
|
50
52
|
'gpt-mini': 'gpt-5-mini',
|
|
51
53
|
|
|
52
54
|
#
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import sentencepiece as spm
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
4
|
+
from omlish import lang
|
|
6
5
|
|
|
7
6
|
from .... import tokens as tks
|
|
8
7
|
|
|
9
8
|
|
|
9
|
+
with lang.auto_proxy_import(globals()):
|
|
10
|
+
import sentencepiece as spm
|
|
11
|
+
|
|
12
|
+
|
|
10
13
|
##
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def build_vocab(spm_tokenizer: spm.SentencePieceProcessor) -> tks.Vocab:
|
|
16
|
+
def build_vocab(spm_tokenizer: 'spm.SentencePieceProcessor') -> tks.Vocab:
|
|
14
17
|
return tks.Vocab([
|
|
15
18
|
(ta.cast(tks.Token, i), tks.TokenStr(spm_tokenizer.id_to_piece(i))) # noqa
|
|
16
19
|
for i in range(spm_tokenizer.get_piece_size()) # noqa
|
|
17
20
|
])
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def build_specials(spm_tokenizer: spm.SentencePieceProcessor) -> tks.SpecialTokens:
|
|
23
|
+
def build_specials(spm_tokenizer: 'spm.SentencePieceProcessor') -> tks.SpecialTokens:
|
|
21
24
|
# FIXME
|
|
22
25
|
return tks.SpecialTokens([])
|
|
23
26
|
|
|
@@ -28,7 +31,7 @@ def build_specials(spm_tokenizer: spm.SentencePieceProcessor) -> tks.SpecialToke
|
|
|
28
31
|
class SentencepieceTokenizer(tks.BaseTokenizer):
|
|
29
32
|
def __init__(
|
|
30
33
|
self,
|
|
31
|
-
spm_tokenizer: spm.SentencePieceProcessor,
|
|
34
|
+
spm_tokenizer: 'spm.SentencePieceProcessor',
|
|
32
35
|
) -> None:
|
|
33
36
|
self._spm_tokenizer = check.isinstance(spm_tokenizer, spm.SentencePieceProcessor)
|
|
34
37
|
|
|
@@ -38,7 +41,7 @@ class SentencepieceTokenizer(tks.BaseTokenizer):
|
|
|
38
41
|
)
|
|
39
42
|
|
|
40
43
|
@property
|
|
41
|
-
def spm_tokenizer(self) -> spm.SentencePieceProcessor:
|
|
44
|
+
def spm_tokenizer(self) -> 'spm.SentencePieceProcessor':
|
|
42
45
|
return self._spm_tokenizer
|
|
43
46
|
|
|
44
47
|
#
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from omlish import check
|
|
2
|
+
from omlish import marshal as msh
|
|
3
|
+
from omlish import typedvalues as tv
|
|
4
|
+
from omlish.formats import json
|
|
5
|
+
from omlish.http import all as http
|
|
6
|
+
|
|
7
|
+
from ....backends.tavily import protocol as pt
|
|
8
|
+
from ...search import SearchHit
|
|
9
|
+
from ...search import SearchHits
|
|
10
|
+
from ...search import SearchRequest
|
|
11
|
+
from ...search import SearchResponse
|
|
12
|
+
from ...search import static_check_is_search_service
|
|
13
|
+
from ...standard import ApiKey
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
##
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
|
|
20
|
+
# name='tavily',
|
|
21
|
+
# type='SearchService',
|
|
22
|
+
# )
|
|
23
|
+
@static_check_is_search_service
|
|
24
|
+
class TavilySearchService:
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
*configs: ApiKey,
|
|
28
|
+
http_client: http.AsyncHttpClient | None = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
super().__init__()
|
|
31
|
+
|
|
32
|
+
self._http_client = http_client
|
|
33
|
+
|
|
34
|
+
with tv.consume(*configs) as cc:
|
|
35
|
+
self._api_key = ApiKey.pop_secret(cc, env='TAVILY_API_KEY')
|
|
36
|
+
|
|
37
|
+
async def invoke(self, request: SearchRequest) -> SearchResponse:
|
|
38
|
+
pt_request = pt.SearchRequest(
|
|
39
|
+
query=request.v,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
raw_request = msh.marshal(pt_request)
|
|
43
|
+
|
|
44
|
+
http_response = await http.async_request(
|
|
45
|
+
'https://api.tavily.com/search',
|
|
46
|
+
headers={
|
|
47
|
+
http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
|
|
48
|
+
http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
|
|
49
|
+
},
|
|
50
|
+
data=json.dumps(raw_request).encode('utf-8'),
|
|
51
|
+
client=self._http_client,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))
|
|
55
|
+
|
|
56
|
+
pt_response = msh.unmarshal(raw_response, pt.SearchResponse)
|
|
57
|
+
|
|
58
|
+
return SearchResponse(SearchHits(
|
|
59
|
+
l=[
|
|
60
|
+
SearchHit(
|
|
61
|
+
title=r.title,
|
|
62
|
+
url=r.url,
|
|
63
|
+
)
|
|
64
|
+
for r in pt_response.results or []
|
|
65
|
+
],
|
|
66
|
+
))
|
|
@@ -4,7 +4,6 @@ import typing as ta
|
|
|
4
4
|
from omlish import check
|
|
5
5
|
from omlish import lang
|
|
6
6
|
|
|
7
|
-
from .....backends.tinygrad.models import llama3 as tgl3
|
|
8
7
|
from ....chat.choices.services import ChatChoicesOptions
|
|
9
8
|
from ....chat.choices.services import ChatChoicesRequest
|
|
10
9
|
from ....chat.choices.services import ChatChoicesResponse
|
|
@@ -28,6 +27,10 @@ from ....stream.services import StreamResponseSink
|
|
|
28
27
|
from ....stream.services import new_stream_response
|
|
29
28
|
|
|
30
29
|
|
|
30
|
+
with lang.auto_proxy_import(globals()):
|
|
31
|
+
from .....backends.tinygrad.models import llama3 as tgl3
|
|
32
|
+
|
|
33
|
+
|
|
31
34
|
##
|
|
32
35
|
|
|
33
36
|
|
|
@@ -39,7 +42,7 @@ def _load_model(
|
|
|
39
42
|
*,
|
|
40
43
|
size: str | None = None,
|
|
41
44
|
temperature: float | None = None,
|
|
42
|
-
) -> tgl3.Llama3Llm:
|
|
45
|
+
) -> 'tgl3.Llama3Llm':
|
|
43
46
|
if size is None:
|
|
44
47
|
size = DEFAULT_SIZE
|
|
45
48
|
if temperature is None:
|
|
@@ -58,7 +61,7 @@ def _load_model(
|
|
|
58
61
|
|
|
59
62
|
|
|
60
63
|
def _prepare_toks(
|
|
61
|
-
llm: tgl3.Llama3Llm,
|
|
64
|
+
llm: 'tgl3.Llama3Llm',
|
|
62
65
|
chat: Chat,
|
|
63
66
|
options: ta.Sequence[ChatChoicesOptions],
|
|
64
67
|
) -> list[int]:
|
|
@@ -100,7 +103,7 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
|
|
|
100
103
|
self._temperature = temperature
|
|
101
104
|
|
|
102
105
|
@lang.cached_function(transient=True)
|
|
103
|
-
def _load_model(self) -> tgl3.Llama3Llm:
|
|
106
|
+
def _load_model(self) -> 'tgl3.Llama3Llm':
|
|
104
107
|
check.not_none(self._exit_stack)
|
|
105
108
|
|
|
106
109
|
return _load_model(
|
|
@@ -1,23 +1,26 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import tokenizers as tos
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
4
|
+
from omlish import lang
|
|
6
5
|
|
|
7
6
|
from .... import tokens as tks
|
|
8
7
|
|
|
9
8
|
|
|
9
|
+
with lang.auto_proxy_import(globals()):
|
|
10
|
+
import tokenizers as tos
|
|
11
|
+
|
|
12
|
+
|
|
10
13
|
##
|
|
11
14
|
|
|
12
15
|
|
|
13
|
-
def build_vocab(tos_tokenizer: tos.Tokenizer) -> tks.Vocab:
|
|
16
|
+
def build_vocab(tos_tokenizer: 'tos.Tokenizer') -> tks.Vocab:
|
|
14
17
|
return tks.Vocab([
|
|
15
18
|
(ta.cast(tks.Token, i), tks.TokenStr(s))
|
|
16
19
|
for s, i in tos_tokenizer.get_vocab().items()
|
|
17
20
|
])
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
23
|
+
def build_specials(tos_tokenizer: 'tos.Tokenizer') -> tks.SpecialTokens:
|
|
21
24
|
# FIXME
|
|
22
25
|
return tks.SpecialTokens([])
|
|
23
26
|
|
|
@@ -28,7 +31,7 @@ def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
|
|
|
28
31
|
class TokenizersTokenizer(tks.BaseTokenizer):
|
|
29
32
|
def __init__(
|
|
30
33
|
self,
|
|
31
|
-
tos_tokenizer: tos.Tokenizer,
|
|
34
|
+
tos_tokenizer: 'tos.Tokenizer',
|
|
32
35
|
) -> None:
|
|
33
36
|
self._tos_tokenizer = check.isinstance(tos_tokenizer, tos.Tokenizer)
|
|
34
37
|
|
|
@@ -38,7 +41,7 @@ class TokenizersTokenizer(tks.BaseTokenizer):
|
|
|
38
41
|
)
|
|
39
42
|
|
|
40
43
|
@property
|
|
41
|
-
def tos_tokenizer(self) -> tos.Tokenizer:
|
|
44
|
+
def tos_tokenizer(self) -> 'tos.Tokenizer':
|
|
42
45
|
return self._tos_tokenizer
|
|
43
46
|
|
|
44
47
|
#
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
from omlish import lang
|
|
5
4
|
from omlish import typedvalues as tv
|
|
6
5
|
|
|
7
6
|
from ....configs import Config
|
|
@@ -13,6 +12,10 @@ from ....vectors.embeddings import static_check_is_embedding_service
|
|
|
13
12
|
from ....vectors.types import Vector
|
|
14
13
|
|
|
15
14
|
|
|
15
|
+
with lang.auto_proxy_import(globals()):
|
|
16
|
+
import sentence_transformers as stfm
|
|
17
|
+
|
|
18
|
+
|
|
16
19
|
##
|
|
17
20
|
|
|
18
21
|
|
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
import typing as ta
|
|
2
2
|
|
|
3
|
-
import transformers as tfm
|
|
4
|
-
|
|
5
3
|
from omlish import check
|
|
6
4
|
from omlish import collections as col
|
|
5
|
+
from omlish import lang
|
|
7
6
|
|
|
8
7
|
from .... import tokens as tks
|
|
9
8
|
|
|
10
9
|
|
|
10
|
+
with lang.auto_proxy_import(globals()):
|
|
11
|
+
import transformers as tfm
|
|
12
|
+
|
|
13
|
+
|
|
11
14
|
##
|
|
12
15
|
|
|
13
16
|
|
|
14
|
-
def build_vocab(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.Vocab:
|
|
17
|
+
def build_vocab(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.Vocab:
|
|
15
18
|
return tks.Vocab([
|
|
16
19
|
(ta.cast(tks.Token, i), tks.TokenStr(s))
|
|
17
20
|
for s, i in tfm_tokenizer.get_vocab().items()
|
|
@@ -32,7 +35,7 @@ SPECIAL_TOKEN_ATTR_MAP: col.BiMap[type[tks.SpecialToken], str] = col.make_bi_map
|
|
|
32
35
|
})
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTokens:
|
|
38
|
+
def build_specials(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.SpecialTokens:
|
|
36
39
|
return tks.SpecialTokens.from_dict({
|
|
37
40
|
st: getattr(tfm_tokenizer, a)
|
|
38
41
|
for st, a in SPECIAL_TOKEN_ATTR_MAP.items()
|
|
@@ -45,7 +48,7 @@ def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTok
|
|
|
45
48
|
class TransformersTokenizer(tks.BaseTokenizer):
|
|
46
49
|
def __init__(
|
|
47
50
|
self,
|
|
48
|
-
tfm_tokenizer: tfm.PreTrainedTokenizerBase,
|
|
51
|
+
tfm_tokenizer: 'tfm.PreTrainedTokenizerBase',
|
|
49
52
|
) -> None:
|
|
50
53
|
self._tfm_tokenizer = check.isinstance(tfm_tokenizer, tfm.PreTrainedTokenizerBase)
|
|
51
54
|
|
|
@@ -55,7 +58,7 @@ class TransformersTokenizer(tks.BaseTokenizer):
|
|
|
55
58
|
)
|
|
56
59
|
|
|
57
60
|
@property
|
|
58
|
-
def tfm_tokenizer(self) -> tfm.PreTrainedTokenizerBase:
|
|
61
|
+
def tfm_tokenizer(self) -> 'tfm.PreTrainedTokenizerBase':
|
|
59
62
|
return self._tfm_tokenizer
|
|
60
63
|
|
|
61
64
|
#
|
|
@@ -7,15 +7,11 @@ import sys
|
|
|
7
7
|
import threading
|
|
8
8
|
import typing as ta
|
|
9
9
|
|
|
10
|
-
import transformers as tfm
|
|
11
|
-
|
|
12
10
|
from omlish import check
|
|
13
11
|
from omlish import lang
|
|
14
12
|
from omlish import typedvalues as tv
|
|
15
13
|
from omlish.asyncs.asyncio.sync import AsyncioBufferRelay
|
|
16
14
|
|
|
17
|
-
from .....backends.transformers.filecache import file_cache_patch_context
|
|
18
|
-
from .....backends.transformers.streamers import CancellableTextStreamer
|
|
19
15
|
from ....chat.choices.services import ChatChoicesRequest
|
|
20
16
|
from ....chat.choices.services import ChatChoicesResponse
|
|
21
17
|
from ....chat.choices.services import static_check_is_chat_choices_service
|
|
@@ -44,6 +40,12 @@ from ....stream.services import new_stream_response
|
|
|
44
40
|
from ...impls.huggingface.configs import HuggingfaceHubToken
|
|
45
41
|
|
|
46
42
|
|
|
43
|
+
with lang.auto_proxy_import(globals()):
|
|
44
|
+
import transformers as tfm
|
|
45
|
+
|
|
46
|
+
from .....backends import transformers as tfm_u
|
|
47
|
+
|
|
48
|
+
|
|
47
49
|
##
|
|
48
50
|
|
|
49
51
|
|
|
@@ -168,7 +170,7 @@ class BaseTransformersChatChoicesService(lang.ExitStacked):
|
|
|
168
170
|
self._huggingface_hub_token = HuggingfaceHubToken.pop_secret(cc, env='HUGGINGFACE_HUB_TOKEN')
|
|
169
171
|
|
|
170
172
|
@lang.cached_function(transient=True)
|
|
171
|
-
def _load_pipeline(self) -> tfm.Pipeline:
|
|
173
|
+
def _load_pipeline(self) -> 'tfm.Pipeline':
|
|
172
174
|
# FIXME: unload
|
|
173
175
|
check.not_none(self._exit_stack)
|
|
174
176
|
|
|
@@ -181,7 +183,7 @@ class BaseTransformersChatChoicesService(lang.ExitStacked):
|
|
|
181
183
|
for pkw_cfg in self._pipeline_kwargs:
|
|
182
184
|
pkw.update(pkw_cfg.v)
|
|
183
185
|
|
|
184
|
-
with file_cache_patch_context(
|
|
186
|
+
with tfm_u.file_cache_patch_context(
|
|
185
187
|
local_first=True,
|
|
186
188
|
local_config_present_is_authoritative=True,
|
|
187
189
|
):
|
|
@@ -246,7 +248,7 @@ class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
|
|
|
246
248
|
if text or stream_end:
|
|
247
249
|
relay.push(text, *([None] if stream_end else []))
|
|
248
250
|
|
|
249
|
-
streamer = CancellableTextStreamer(
|
|
251
|
+
streamer = tfm_u.CancellableTextStreamer(
|
|
250
252
|
check.not_none(pipeline.tokenizer), # type: ignore[arg-type]
|
|
251
253
|
streamer_callback, # noqa
|
|
252
254
|
skip_prompt=True,
|
|
@@ -255,7 +257,7 @@ class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
|
|
|
255
257
|
|
|
256
258
|
async with UseResources.or_new(request.options) as rs:
|
|
257
259
|
thread = threading.Thread(
|
|
258
|
-
target=CancellableTextStreamer.ignoring_cancelled(pipeline),
|
|
260
|
+
target=tfm_u.CancellableTextStreamer.ignoring_cancelled(pipeline),
|
|
259
261
|
args=(
|
|
260
262
|
inputs,
|
|
261
263
|
),
|
ommlds/minichain/llms/types.py
CHANGED
ommlds/minichain/search.py
CHANGED
ommlds/minichain/standard.py
CHANGED
ommlds/specs/__init__.py
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from omlish import lang
|
|
2
|
+
from omlish import marshal as msh
|
|
3
|
+
|
|
4
|
+
from .protocol import ContentBlock
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
##
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@lang.static_init
|
|
11
|
+
def _install_standard_marshaling() -> None:
|
|
12
|
+
for root_cls, tag_field in [
|
|
13
|
+
(ContentBlock, 'type'),
|
|
14
|
+
]:
|
|
15
|
+
msh.install_standard_factories(*msh.standard_polymorphism_factories(
|
|
16
|
+
msh.polymorphism_from_subclasses(
|
|
17
|
+
root_cls,
|
|
18
|
+
naming=msh.Naming.SNAKE,
|
|
19
|
+
strip_suffix=msh.AutoStripSuffix,
|
|
20
|
+
),
|
|
21
|
+
msh.FieldTypeTagging(tag_field),
|
|
22
|
+
unions='partial',
|
|
23
|
+
))
|