ommlds 0.0.0.dev479__py3-none-any.whl → 0.0.0.dev481__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ommlds/.omlish-manifests.json +40 -23
  2. ommlds/__about__.py +1 -1
  3. ommlds/backends/llamacpp/logging.py +4 -1
  4. ommlds/backends/mlx/caching.py +7 -3
  5. ommlds/backends/mlx/cli.py +10 -7
  6. ommlds/backends/mlx/generation.py +18 -16
  7. ommlds/backends/mlx/limits.py +10 -6
  8. ommlds/backends/mlx/loading.py +7 -4
  9. ommlds/backends/tavily/__init__.py +0 -0
  10. ommlds/backends/tavily/protocol.py +301 -0
  11. ommlds/backends/transformers/__init__.py +14 -0
  12. ommlds/minichain/__init__.py +1 -0
  13. ommlds/minichain/_dataclasses.py +46282 -0
  14. ommlds/minichain/backends/impls/anthropic/chat.py +23 -4
  15. ommlds/minichain/backends/impls/duckduckgo/search.py +5 -1
  16. ommlds/minichain/backends/impls/huggingface/repos.py +1 -5
  17. ommlds/minichain/backends/impls/llamacpp/chat.py +6 -3
  18. ommlds/minichain/backends/impls/llamacpp/completion.py +7 -3
  19. ommlds/minichain/backends/impls/llamacpp/stream.py +6 -3
  20. ommlds/minichain/backends/impls/mlx/chat.py +6 -3
  21. ommlds/minichain/backends/impls/openai/format.py +2 -0
  22. ommlds/minichain/backends/impls/openai/names.py +3 -1
  23. ommlds/minichain/backends/impls/sentencepiece/tokens.py +9 -6
  24. ommlds/minichain/backends/impls/tavily.py +66 -0
  25. ommlds/minichain/backends/impls/tinygrad/chat.py +7 -4
  26. ommlds/minichain/backends/impls/tokenizers/tokens.py +9 -6
  27. ommlds/minichain/backends/impls/transformers/sentence.py +5 -2
  28. ommlds/minichain/backends/impls/transformers/tokens.py +9 -6
  29. ommlds/minichain/backends/impls/transformers/transformers.py +10 -8
  30. ommlds/minichain/llms/types.py +4 -0
  31. ommlds/minichain/search.py +1 -1
  32. ommlds/minichain/standard.py +1 -0
  33. ommlds/specs/__init__.py +0 -0
  34. ommlds/specs/mcp/__init__.py +0 -0
  35. ommlds/specs/mcp/_marshal.py +23 -0
  36. ommlds/specs/mcp/clients.py +146 -0
  37. ommlds/specs/mcp/protocol.py +371 -0
  38. {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/METADATA +5 -5
  39. {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/RECORD +43 -34
  40. {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/WHEEL +0 -0
  41. {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/entry_points.txt +0 -0
  42. {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/licenses/LICENSE +0 -0
  43. {ommlds-0.0.0.dev479.dist-info → ommlds-0.0.0.dev481.dist-info}/top_level.txt +0 -0
@@ -17,6 +17,7 @@ from ....chat.choices.services import ChatChoicesRequest
17
17
  from ....chat.choices.services import ChatChoicesResponse
18
18
  from ....chat.choices.services import static_check_is_chat_choices_service
19
19
  from ....chat.choices.types import AiChoice
20
+ from ....chat.choices.types import ChatChoicesOptions
20
21
  from ....chat.messages import AiMessage
21
22
  from ....chat.messages import AnyAiMessage
22
23
  from ....chat.messages import Message
@@ -24,9 +25,12 @@ from ....chat.messages import SystemMessage
24
25
  from ....chat.messages import ToolUseMessage
25
26
  from ....chat.messages import UserMessage
26
27
  from ....chat.tools.types import Tool
28
+ from ....llms.types import MaxTokens
29
+ from ....llms.types import Temperature
27
30
  from ....models.configs import ModelName
28
31
  from ....standard import ApiKey
29
32
  from ....tools.types import ToolUse
33
+ from ....types import Option
30
34
  from .names import MODEL_NAMES
31
35
  from .protocol import build_protocol_chat_messages
32
36
  from .protocol import build_protocol_tool
@@ -67,16 +71,31 @@ class AnthropicChatChoicesService:
67
71
  else:
68
72
  raise TypeError(m)
69
73
 
74
+ DEFAULT_OPTIONS: ta.ClassVar[tv.TypedValues[Option]] = tv.TypedValues[Option](
75
+ MaxTokens(4096),
76
+ )
77
+
78
+ _OPTION_KWARG_NAMES_MAP: ta.ClassVar[ta.Mapping[str, type[ChatChoicesOptions]]] = dict(
79
+ temperature=Temperature,
80
+ max_tokens=MaxTokens,
81
+ )
82
+
70
83
  async def invoke(
71
84
  self,
72
85
  request: ChatChoicesRequest,
73
- *,
74
- max_tokens: int = 4096, # FIXME: ChatOption
75
86
  ) -> ChatChoicesResponse:
76
87
  messages, system = build_protocol_chat_messages(request.v)
77
88
 
89
+ kwargs: dict = dict()
90
+
78
91
  tools: list[pt.ToolSpec] = []
79
- with tv.TypedValues(*request.options).consume() as oc:
92
+ with tv.TypedValues(
93
+ *self.DEFAULT_OPTIONS,
94
+ *request.options,
95
+ override=True,
96
+ ).consume() as oc:
97
+ kwargs.update(oc.pop_scalar_kwargs(**self._OPTION_KWARG_NAMES_MAP))
98
+
80
99
  t: Tool
81
100
  for t in oc.pop(Tool, []):
82
101
  tools.append(build_protocol_tool(t))
@@ -86,7 +105,7 @@ class AnthropicChatChoicesService:
86
105
  system=system,
87
106
  messages=messages,
88
107
  tools=tools or None,
89
- max_tokens=max_tokens,
108
+ **kwargs,
90
109
  )
91
110
 
92
111
  raw_request = msh.marshal(a_req)
@@ -1,4 +1,4 @@
1
- import ddgs
1
+ from omlish import lang
2
2
 
3
3
  from ....search import SearchHit
4
4
  from ....search import SearchHits
@@ -7,6 +7,10 @@ from ....search import SearchResponse
7
7
  from ....search import static_check_is_search_service
8
8
 
9
9
 
10
+ with lang.auto_proxy_import(globals()):
11
+ import ddgs
12
+
13
+
10
14
  ##
11
15
 
12
16
 
@@ -3,8 +3,6 @@ TODO:
3
3
  - local-only check first
4
4
  - cat ~/.cache/.../models/.../refs/main -> c5bfd839cd4cda0e5a39a97e00218d9c56e468af
5
5
  """
6
- import typing as ta
7
-
8
6
  from omlish import lang
9
7
 
10
8
  from ....models.configs import ModelRepo
@@ -12,10 +10,8 @@ from ....models.repos.resolving import ModelRepoResolver
12
10
  from ....models.repos.resolving import ResolvedModelRepo
13
11
 
14
12
 
15
- if ta.TYPE_CHECKING:
13
+ with lang.auto_proxy_import(globals()):
16
14
  import huggingface_hub as hf
17
- else:
18
- hf = lang.proxy_import('huggingface_hub')
19
15
 
20
16
 
21
17
  ##
@@ -2,13 +2,10 @@ import contextlib
2
2
  import os.path
3
3
  import typing as ta
4
4
 
5
- import llama_cpp as lcc
6
-
7
5
  from omlish import check
8
6
  from omlish import lang
9
7
  from omlish import typedvalues as tv
10
8
 
11
- from .....backends import llamacpp as lcu
12
9
  from ....chat.choices.services import ChatChoicesRequest
13
10
  from ....chat.choices.services import ChatChoicesResponse
14
11
  from ....chat.choices.services import static_check_is_chat_choices_service
@@ -27,6 +24,12 @@ from .format import ROLES_MAP
27
24
  from .format import get_msg_content
28
25
 
29
26
 
27
+ with lang.auto_proxy_import(globals()):
28
+ import llama_cpp as lcc
29
+
30
+ from .....backends import llamacpp as lcu
31
+
32
+
30
33
  ##
31
34
 
32
35
 
@@ -2,11 +2,9 @@ import contextlib
2
2
  import os.path
3
3
  import typing as ta
4
4
 
5
- import llama_cpp as lcc
6
-
5
+ from omlish import lang
7
6
  from omlish import typedvalues as tv
8
7
 
9
- from .....backends import llamacpp as lcu
10
8
  from ....completion import CompletionOption
11
9
  from ....completion import CompletionRequest
12
10
  from ....completion import CompletionResponse
@@ -18,6 +16,12 @@ from ....llms.types import Temperature
18
16
  from ....models.configs import ModelPath
19
17
 
20
18
 
19
+ with lang.auto_proxy_import(globals()):
20
+ import llama_cpp as lcc
21
+
22
+ from .....backends import llamacpp as lcu
23
+
24
+
21
25
  ##
22
26
 
23
27
 
@@ -2,13 +2,10 @@ import contextlib
2
2
  import threading
3
3
  import typing as ta # noqa
4
4
 
5
- import llama_cpp as lcc
6
-
7
5
  from omlish import check
8
6
  from omlish import lang
9
7
  from omlish import typedvalues as tv
10
8
 
11
- from .....backends import llamacpp as lcu
12
9
  from ....chat.choices.services import ChatChoicesOutputs
13
10
  from ....chat.choices.stream.services import ChatChoicesStreamRequest
14
11
  from ....chat.choices.stream.services import ChatChoicesStreamResponse
@@ -26,6 +23,12 @@ from .format import ROLES_MAP
26
23
  from .format import get_msg_content
27
24
 
28
25
 
26
+ with lang.auto_proxy_import(globals()):
27
+ import llama_cpp as lcc
28
+
29
+ from .....backends import llamacpp as lcu
30
+
31
+
29
32
  ##
30
33
 
31
34
 
@@ -5,7 +5,6 @@ from omlish import check
5
5
  from omlish import lang
6
6
  from omlish import typedvalues as tv
7
7
 
8
- from .....backends import mlx as mlxu
9
8
  from ....chat.choices.services import ChatChoicesOutputs
10
9
  from ....chat.choices.services import ChatChoicesRequest
11
10
  from ....chat.choices.services import ChatChoicesResponse
@@ -33,6 +32,10 @@ from ....stream.services import StreamResponseSink
33
32
  from ....stream.services import new_stream_response
34
33
 
35
34
 
35
+ with lang.auto_proxy_import(globals()):
36
+ from .....backends import mlx as mlxu
37
+
38
+
36
39
  ##
37
40
 
38
41
 
@@ -83,7 +86,7 @@ class BaseMlxChatChoicesService(lang.ExitStacked):
83
86
  raise TypeError(m)
84
87
 
85
88
  @lang.cached_function(transient=True)
86
- def _load_model(self) -> mlxu.LoadedModel:
89
+ def _load_model(self) -> 'mlxu.LoadedModel':
87
90
  # FIXME: walk state, find all mx.arrays, dealloc/set to empty
88
91
  check.not_none(self._exit_stack)
89
92
 
@@ -100,7 +103,7 @@ class BaseMlxChatChoicesService(lang.ExitStacked):
100
103
  )
101
104
 
102
105
  @lang.cached_function(transient=True)
103
- def _get_tokenizer(self) -> mlxu.tokenization.Tokenizer:
106
+ def _get_tokenizer(self) -> 'mlxu.tokenization.Tokenizer':
104
107
  tokenizer = self._load_model().tokenization.tokenizer
105
108
 
106
109
  if not (
@@ -23,6 +23,7 @@ from ....chat.stream.types import PartialToolUseAiDelta
23
23
  from ....chat.tools.types import Tool
24
24
  from ....content.json import JsonContent
25
25
  from ....content.prepare import prepare_content_str
26
+ from ....llms.types import MaxCompletionTokens
26
27
  from ....llms.types import MaxTokens
27
28
  from ....llms.types import Temperature
28
29
  from ....llms.types import TokenUsage
@@ -173,6 +174,7 @@ class OpenaiChatRequestHandler:
173
174
  _OPTION_KWARG_NAMES_MAP: ta.ClassVar[ta.Mapping[str, type[ChatChoicesOptions]]] = dict(
174
175
  temperature=Temperature,
175
176
  max_tokens=MaxTokens,
177
+ max_completion_tokens=MaxCompletionTokens,
176
178
  )
177
179
 
178
180
  class _ProcessedOptions(ta.NamedTuple):
@@ -30,6 +30,8 @@ _GPT_MODEL_NAMES = [
30
30
  'gpt-5-chat-latest',
31
31
  'gpt-5-mini',
32
32
  'gpt-5-nano',
33
+
34
+ 'gpt-5.1',
33
35
  ]
34
36
 
35
37
 
@@ -46,7 +48,7 @@ CHAT_MODEL_NAMES = ModelNameCollection(
46
48
  for n in _GPT_MODEL_NAMES
47
49
  },
48
50
 
49
- 'gpt': 'gpt-5',
51
+ 'gpt': 'gpt-5.1',
50
52
  'gpt-mini': 'gpt-5-mini',
51
53
 
52
54
  #
@@ -1,23 +1,26 @@
1
1
  import typing as ta
2
2
 
3
- import sentencepiece as spm
4
-
5
3
  from omlish import check
4
+ from omlish import lang
6
5
 
7
6
  from .... import tokens as tks
8
7
 
9
8
 
9
+ with lang.auto_proxy_import(globals()):
10
+ import sentencepiece as spm
11
+
12
+
10
13
  ##
11
14
 
12
15
 
13
- def build_vocab(spm_tokenizer: spm.SentencePieceProcessor) -> tks.Vocab:
16
+ def build_vocab(spm_tokenizer: 'spm.SentencePieceProcessor') -> tks.Vocab:
14
17
  return tks.Vocab([
15
18
  (ta.cast(tks.Token, i), tks.TokenStr(spm_tokenizer.id_to_piece(i))) # noqa
16
19
  for i in range(spm_tokenizer.get_piece_size()) # noqa
17
20
  ])
18
21
 
19
22
 
20
- def build_specials(spm_tokenizer: spm.SentencePieceProcessor) -> tks.SpecialTokens:
23
+ def build_specials(spm_tokenizer: 'spm.SentencePieceProcessor') -> tks.SpecialTokens:
21
24
  # FIXME
22
25
  return tks.SpecialTokens([])
23
26
 
@@ -28,7 +31,7 @@ def build_specials(spm_tokenizer: spm.SentencePieceProcessor) -> tks.SpecialToke
28
31
  class SentencepieceTokenizer(tks.BaseTokenizer):
29
32
  def __init__(
30
33
  self,
31
- spm_tokenizer: spm.SentencePieceProcessor,
34
+ spm_tokenizer: 'spm.SentencePieceProcessor',
32
35
  ) -> None:
33
36
  self._spm_tokenizer = check.isinstance(spm_tokenizer, spm.SentencePieceProcessor)
34
37
 
@@ -38,7 +41,7 @@ class SentencepieceTokenizer(tks.BaseTokenizer):
38
41
  )
39
42
 
40
43
  @property
41
- def spm_tokenizer(self) -> spm.SentencePieceProcessor:
44
+ def spm_tokenizer(self) -> 'spm.SentencePieceProcessor':
42
45
  return self._spm_tokenizer
43
46
 
44
47
  #
@@ -0,0 +1,66 @@
1
+ from omlish import check
2
+ from omlish import marshal as msh
3
+ from omlish import typedvalues as tv
4
+ from omlish.formats import json
5
+ from omlish.http import all as http
6
+
7
+ from ....backends.tavily import protocol as pt
8
+ from ...search import SearchHit
9
+ from ...search import SearchHits
10
+ from ...search import SearchRequest
11
+ from ...search import SearchResponse
12
+ from ...search import static_check_is_search_service
13
+ from ...standard import ApiKey
14
+
15
+
16
+ ##
17
+
18
+
19
+ # @omlish-manifest $.minichain.registries.manifests.RegistryManifest(
20
+ # name='tavily',
21
+ # type='SearchService',
22
+ # )
23
+ @static_check_is_search_service
24
+ class TavilySearchService:
25
+ def __init__(
26
+ self,
27
+ *configs: ApiKey,
28
+ http_client: http.AsyncHttpClient | None = None,
29
+ ) -> None:
30
+ super().__init__()
31
+
32
+ self._http_client = http_client
33
+
34
+ with tv.consume(*configs) as cc:
35
+ self._api_key = ApiKey.pop_secret(cc, env='TAVILY_API_KEY')
36
+
37
+ async def invoke(self, request: SearchRequest) -> SearchResponse:
38
+ pt_request = pt.SearchRequest(
39
+ query=request.v,
40
+ )
41
+
42
+ raw_request = msh.marshal(pt_request)
43
+
44
+ http_response = await http.async_request(
45
+ 'https://api.tavily.com/search',
46
+ headers={
47
+ http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
48
+ http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
49
+ },
50
+ data=json.dumps(raw_request).encode('utf-8'),
51
+ client=self._http_client,
52
+ )
53
+
54
+ raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))
55
+
56
+ pt_response = msh.unmarshal(raw_response, pt.SearchResponse)
57
+
58
+ return SearchResponse(SearchHits(
59
+ l=[
60
+ SearchHit(
61
+ title=r.title,
62
+ url=r.url,
63
+ )
64
+ for r in pt_response.results or []
65
+ ],
66
+ ))
@@ -4,7 +4,6 @@ import typing as ta
4
4
  from omlish import check
5
5
  from omlish import lang
6
6
 
7
- from .....backends.tinygrad.models import llama3 as tgl3
8
7
  from ....chat.choices.services import ChatChoicesOptions
9
8
  from ....chat.choices.services import ChatChoicesRequest
10
9
  from ....chat.choices.services import ChatChoicesResponse
@@ -28,6 +27,10 @@ from ....stream.services import StreamResponseSink
28
27
  from ....stream.services import new_stream_response
29
28
 
30
29
 
30
+ with lang.auto_proxy_import(globals()):
31
+ from .....backends.tinygrad.models import llama3 as tgl3
32
+
33
+
31
34
  ##
32
35
 
33
36
 
@@ -39,7 +42,7 @@ def _load_model(
39
42
  *,
40
43
  size: str | None = None,
41
44
  temperature: float | None = None,
42
- ) -> tgl3.Llama3Llm:
45
+ ) -> 'tgl3.Llama3Llm':
43
46
  if size is None:
44
47
  size = DEFAULT_SIZE
45
48
  if temperature is None:
@@ -58,7 +61,7 @@ def _load_model(
58
61
 
59
62
 
60
63
  def _prepare_toks(
61
- llm: tgl3.Llama3Llm,
64
+ llm: 'tgl3.Llama3Llm',
62
65
  chat: Chat,
63
66
  options: ta.Sequence[ChatChoicesOptions],
64
67
  ) -> list[int]:
@@ -100,7 +103,7 @@ class BaseTinygradLlama3ChatService(lang.ExitStacked, lang.Abstract):
100
103
  self._temperature = temperature
101
104
 
102
105
  @lang.cached_function(transient=True)
103
- def _load_model(self) -> tgl3.Llama3Llm:
106
+ def _load_model(self) -> 'tgl3.Llama3Llm':
104
107
  check.not_none(self._exit_stack)
105
108
 
106
109
  return _load_model(
@@ -1,23 +1,26 @@
1
1
  import typing as ta
2
2
 
3
- import tokenizers as tos
4
-
5
3
  from omlish import check
4
+ from omlish import lang
6
5
 
7
6
  from .... import tokens as tks
8
7
 
9
8
 
9
+ with lang.auto_proxy_import(globals()):
10
+ import tokenizers as tos
11
+
12
+
10
13
  ##
11
14
 
12
15
 
13
- def build_vocab(tos_tokenizer: tos.Tokenizer) -> tks.Vocab:
16
+ def build_vocab(tos_tokenizer: 'tos.Tokenizer') -> tks.Vocab:
14
17
  return tks.Vocab([
15
18
  (ta.cast(tks.Token, i), tks.TokenStr(s))
16
19
  for s, i in tos_tokenizer.get_vocab().items()
17
20
  ])
18
21
 
19
22
 
20
- def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
23
+ def build_specials(tos_tokenizer: 'tos.Tokenizer') -> tks.SpecialTokens:
21
24
  # FIXME
22
25
  return tks.SpecialTokens([])
23
26
 
@@ -28,7 +31,7 @@ def build_specials(tos_tokenizer: tos.Tokenizer) -> tks.SpecialTokens:
28
31
  class TokenizersTokenizer(tks.BaseTokenizer):
29
32
  def __init__(
30
33
  self,
31
- tos_tokenizer: tos.Tokenizer,
34
+ tos_tokenizer: 'tos.Tokenizer',
32
35
  ) -> None:
33
36
  self._tos_tokenizer = check.isinstance(tos_tokenizer, tos.Tokenizer)
34
37
 
@@ -38,7 +41,7 @@ class TokenizersTokenizer(tks.BaseTokenizer):
38
41
  )
39
42
 
40
43
  @property
41
- def tos_tokenizer(self) -> tos.Tokenizer:
44
+ def tos_tokenizer(self) -> 'tos.Tokenizer':
42
45
  return self._tos_tokenizer
43
46
 
44
47
  #
@@ -1,7 +1,6 @@
1
1
  import typing as ta
2
2
 
3
- import sentence_transformers as stfm
4
-
3
+ from omlish import lang
5
4
  from omlish import typedvalues as tv
6
5
 
7
6
  from ....configs import Config
@@ -13,6 +12,10 @@ from ....vectors.embeddings import static_check_is_embedding_service
13
12
  from ....vectors.types import Vector
14
13
 
15
14
 
15
+ with lang.auto_proxy_import(globals()):
16
+ import sentence_transformers as stfm
17
+
18
+
16
19
  ##
17
20
 
18
21
 
@@ -1,17 +1,20 @@
1
1
  import typing as ta
2
2
 
3
- import transformers as tfm
4
-
5
3
  from omlish import check
6
4
  from omlish import collections as col
5
+ from omlish import lang
7
6
 
8
7
  from .... import tokens as tks
9
8
 
10
9
 
10
+ with lang.auto_proxy_import(globals()):
11
+ import transformers as tfm
12
+
13
+
11
14
  ##
12
15
 
13
16
 
14
- def build_vocab(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.Vocab:
17
+ def build_vocab(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.Vocab:
15
18
  return tks.Vocab([
16
19
  (ta.cast(tks.Token, i), tks.TokenStr(s))
17
20
  for s, i in tfm_tokenizer.get_vocab().items()
@@ -32,7 +35,7 @@ SPECIAL_TOKEN_ATTR_MAP: col.BiMap[type[tks.SpecialToken], str] = col.make_bi_map
32
35
  })
33
36
 
34
37
 
35
- def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTokens:
38
+ def build_specials(tfm_tokenizer: 'tfm.PreTrainedTokenizerBase') -> tks.SpecialTokens:
36
39
  return tks.SpecialTokens.from_dict({
37
40
  st: getattr(tfm_tokenizer, a)
38
41
  for st, a in SPECIAL_TOKEN_ATTR_MAP.items()
@@ -45,7 +48,7 @@ def build_specials(tfm_tokenizer: tfm.PreTrainedTokenizerBase) -> tks.SpecialTok
45
48
  class TransformersTokenizer(tks.BaseTokenizer):
46
49
  def __init__(
47
50
  self,
48
- tfm_tokenizer: tfm.PreTrainedTokenizerBase,
51
+ tfm_tokenizer: 'tfm.PreTrainedTokenizerBase',
49
52
  ) -> None:
50
53
  self._tfm_tokenizer = check.isinstance(tfm_tokenizer, tfm.PreTrainedTokenizerBase)
51
54
 
@@ -55,7 +58,7 @@ class TransformersTokenizer(tks.BaseTokenizer):
55
58
  )
56
59
 
57
60
  @property
58
- def tfm_tokenizer(self) -> tfm.PreTrainedTokenizerBase:
61
+ def tfm_tokenizer(self) -> 'tfm.PreTrainedTokenizerBase':
59
62
  return self._tfm_tokenizer
60
63
 
61
64
  #
@@ -7,15 +7,11 @@ import sys
7
7
  import threading
8
8
  import typing as ta
9
9
 
10
- import transformers as tfm
11
-
12
10
  from omlish import check
13
11
  from omlish import lang
14
12
  from omlish import typedvalues as tv
15
13
  from omlish.asyncs.asyncio.sync import AsyncioBufferRelay
16
14
 
17
- from .....backends.transformers.filecache import file_cache_patch_context
18
- from .....backends.transformers.streamers import CancellableTextStreamer
19
15
  from ....chat.choices.services import ChatChoicesRequest
20
16
  from ....chat.choices.services import ChatChoicesResponse
21
17
  from ....chat.choices.services import static_check_is_chat_choices_service
@@ -44,6 +40,12 @@ from ....stream.services import new_stream_response
44
40
  from ...impls.huggingface.configs import HuggingfaceHubToken
45
41
 
46
42
 
43
+ with lang.auto_proxy_import(globals()):
44
+ import transformers as tfm
45
+
46
+ from .....backends import transformers as tfm_u
47
+
48
+
47
49
  ##
48
50
 
49
51
 
@@ -168,7 +170,7 @@ class BaseTransformersChatChoicesService(lang.ExitStacked):
168
170
  self._huggingface_hub_token = HuggingfaceHubToken.pop_secret(cc, env='HUGGINGFACE_HUB_TOKEN')
169
171
 
170
172
  @lang.cached_function(transient=True)
171
- def _load_pipeline(self) -> tfm.Pipeline:
173
+ def _load_pipeline(self) -> 'tfm.Pipeline':
172
174
  # FIXME: unload
173
175
  check.not_none(self._exit_stack)
174
176
 
@@ -181,7 +183,7 @@ class BaseTransformersChatChoicesService(lang.ExitStacked):
181
183
  for pkw_cfg in self._pipeline_kwargs:
182
184
  pkw.update(pkw_cfg.v)
183
185
 
184
- with file_cache_patch_context(
186
+ with tfm_u.file_cache_patch_context(
185
187
  local_first=True,
186
188
  local_config_present_is_authoritative=True,
187
189
  ):
@@ -246,7 +248,7 @@ class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
246
248
  if text or stream_end:
247
249
  relay.push(text, *([None] if stream_end else []))
248
250
 
249
- streamer = CancellableTextStreamer(
251
+ streamer = tfm_u.CancellableTextStreamer(
250
252
  check.not_none(pipeline.tokenizer), # type: ignore[arg-type]
251
253
  streamer_callback, # noqa
252
254
  skip_prompt=True,
@@ -255,7 +257,7 @@ class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
255
257
 
256
258
  async with UseResources.or_new(request.options) as rs:
257
259
  thread = threading.Thread(
258
- target=CancellableTextStreamer.ignoring_cancelled(pipeline),
260
+ target=tfm_u.CancellableTextStreamer.ignoring_cancelled(pipeline),
259
261
  args=(
260
262
  inputs,
261
263
  ),
@@ -31,6 +31,10 @@ class MaxTokens(LlmOption, tv.UniqueScalarTypedValue[int]):
31
31
  pass
32
32
 
33
33
 
34
+ class MaxCompletionTokens(LlmOption, tv.UniqueScalarTypedValue[int]):
35
+ pass
36
+
37
+
34
38
  ##
35
39
 
36
40
 
@@ -19,7 +19,7 @@ class SearchHit(lang.Final):
19
19
  title: str | None
20
20
  url: str | None
21
21
  description: str | None = None
22
- snippets: lang.SequenceNotStr[str] | None
22
+ snippets: lang.SequenceNotStr[str] | None = None
23
23
 
24
24
 
25
25
  @dc.dataclass(frozen=True, kw_only=True)
@@ -25,6 +25,7 @@ class Device(tv.UniqueScalarTypedValue[ta.Any], Config):
25
25
  ##
26
26
 
27
27
 
28
+ # TODO: ApiEndpointPath, ApiEndpointUrl, ApiBaseUrl, ...
28
29
  class ApiUrl(tv.UniqueScalarTypedValue[str], Config):
29
30
  pass
30
31
 
File without changes
File without changes
@@ -0,0 +1,23 @@
1
+ from omlish import lang
2
+ from omlish import marshal as msh
3
+
4
+ from .protocol import ContentBlock
5
+
6
+
7
+ ##
8
+
9
+
10
+ @lang.static_init
11
+ def _install_standard_marshaling() -> None:
12
+ for root_cls, tag_field in [
13
+ (ContentBlock, 'type'),
14
+ ]:
15
+ msh.install_standard_factories(*msh.standard_polymorphism_factories(
16
+ msh.polymorphism_from_subclasses(
17
+ root_cls,
18
+ naming=msh.Naming.SNAKE,
19
+ strip_suffix=msh.AutoStripSuffix,
20
+ ),
21
+ msh.FieldTypeTagging(tag_field),
22
+ unions='partial',
23
+ ))