ommlds 0.0.0.dev468__py3-none-any.whl → 0.0.0.dev470__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ommlds might be problematic. Click here for more details.

Files changed (36) hide show
  1. ommlds/.omlish-manifests.json +7 -7
  2. ommlds/_hacks/__init__.py +4 -0
  3. ommlds/_hacks/funcs.py +110 -0
  4. ommlds/_hacks/names.py +158 -0
  5. ommlds/_hacks/params.py +73 -0
  6. ommlds/_hacks/patches.py +0 -3
  7. ommlds/backends/transformers/filecache.py +109 -0
  8. ommlds/cli/main.py +11 -5
  9. ommlds/cli/sessions/chat/backends/catalog.py +13 -8
  10. ommlds/cli/sessions/chat/backends/inject.py +15 -0
  11. ommlds/cli/sessions/completion/session.py +1 -1
  12. ommlds/cli/sessions/embedding/session.py +1 -1
  13. ommlds/minichain/__init__.py +1 -0
  14. ommlds/minichain/backends/catalogs/base.py +20 -1
  15. ommlds/minichain/backends/catalogs/simple.py +2 -2
  16. ommlds/minichain/backends/catalogs/strings.py +9 -7
  17. ommlds/minichain/backends/impls/anthropic/chat.py +5 -1
  18. ommlds/minichain/backends/impls/anthropic/stream.py +10 -5
  19. ommlds/minichain/backends/impls/google/chat.py +9 -2
  20. ommlds/minichain/backends/impls/google/search.py +6 -1
  21. ommlds/minichain/backends/impls/google/stream.py +10 -5
  22. ommlds/minichain/backends/impls/mistral.py +9 -2
  23. ommlds/minichain/backends/impls/ollama/chat.py +12 -9
  24. ommlds/minichain/backends/impls/openai/chat.py +9 -2
  25. ommlds/minichain/backends/impls/openai/completion.py +9 -2
  26. ommlds/minichain/backends/impls/openai/embedding.py +9 -2
  27. ommlds/minichain/backends/impls/openai/stream.py +10 -5
  28. ommlds/minichain/backends/impls/transformers/transformers.py +64 -26
  29. ommlds/minichain/registries/globals.py +18 -4
  30. ommlds/tools/git.py +4 -1
  31. {ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/METADATA +3 -3
  32. {ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/RECORD +36 -32
  33. {ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/WHEEL +0 -0
  34. {ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/entry_points.txt +0 -0
  35. {ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/licenses/LICENSE +0 -0
  36. {ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/top_level.txt +0 -0
@@ -3,15 +3,34 @@ import typing as ta
3
3
 
4
4
  from omlish import lang
5
5
 
6
+ from ...configs import Config
7
+
8
+
9
+ T = ta.TypeVar('T')
10
+
6
11
 
7
12
  ##
8
13
 
9
14
 
10
15
  class BackendCatalog(lang.Abstract):
16
+ class Backend(ta.NamedTuple):
17
+ factory: ta.Callable[..., ta.Any]
18
+ configs: ta.Sequence[Config] | None
19
+
11
20
  @abc.abstractmethod
12
- def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
21
+ def get_backend(self, service_cls: type[T], name: str) -> Backend:
13
22
  raise NotImplementedError
14
23
 
24
+ def new_backend(
25
+ self,
26
+ service_cls: ta.Any,
27
+ name: str,
28
+ *args: ta.Any,
29
+ **kwargs: ta.Any,
30
+ ) -> ta.Any:
31
+ be = self.get_backend(service_cls, name)
32
+ return be.factory(*be.configs or [], *args, **kwargs)
33
+
15
34
  # #
16
35
  #
17
36
  # class Bound(lang.Final, ta.Generic[T]):
@@ -40,9 +40,9 @@ class SimpleBackendCatalog(BackendCatalog):
40
40
  sc_dct[e.name] = e
41
41
  self._dct = dct
42
42
 
43
- def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
43
+ def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> BackendCatalog.Backend:
44
44
  e = self._dct[service_cls][name]
45
- return e.factory_fn(*args, **kwargs)
45
+ return BackendCatalog.Backend(e.factory_fn, None)
46
46
 
47
47
 
48
48
  ##
@@ -5,7 +5,7 @@ from omlish import check
5
5
  from ...models.configs import ModelPath
6
6
  from ...models.configs import ModelRepo
7
7
  from ...models.repos.resolving import ModelRepoResolver
8
- from ...registries.globals import registry_new
8
+ from ...registries.globals import get_registry_cls
9
9
  from ..strings.parsing import parse_backend_string
10
10
  from ..strings.resolving import BackendStringResolver
11
11
  from ..strings.resolving import ResolveBackendStringArgs
@@ -30,14 +30,14 @@ class BackendStringBackendCatalog(BackendCatalog):
30
30
  self._string_resolver = string_resolver
31
31
  self._model_repo_resolver = model_repo_resolver
32
32
 
33
- def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
33
+ def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> BackendCatalog.Backend:
34
34
  ps = parse_backend_string(name)
35
35
  rs = check.not_none(self._string_resolver.resolve_backend_string(ResolveBackendStringArgs(
36
36
  service_cls,
37
37
  ps,
38
38
  )))
39
39
 
40
- al = list(rs.args or [])
40
+ al: list = list(rs.args or [])
41
41
 
42
42
  # FIXME: lol
43
43
  if al and isinstance(al[0], ModelRepo):
@@ -46,10 +46,12 @@ class BackendStringBackendCatalog(BackendCatalog):
46
46
  mrp = check.not_none(mrr.resolve(mr))
47
47
  al = [ModelPath(mrp.path), *al[1:]]
48
48
 
49
- return registry_new(
49
+ cls = get_registry_cls(
50
50
  service_cls,
51
51
  rs.name,
52
- *al,
53
- *args,
54
- **kwargs,
52
+ )
53
+
54
+ return BackendCatalog.Backend(
55
+ cls,
56
+ al,
55
57
  )
@@ -46,9 +46,12 @@ class AnthropicChatChoicesService:
46
46
  def __init__(
47
47
  self,
48
48
  *configs: ApiKey | ModelName,
49
+ http_client: http.AsyncHttpClient | None = None,
49
50
  ) -> None:
50
51
  super().__init__()
51
52
 
53
+ self._http_client = http_client
54
+
52
55
  with tv.consume(*configs) as cc:
53
56
  self._api_key = check.not_none(ApiKey.pop_secret(cc, env='ANTHROPIC_API_KEY'))
54
57
  self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
@@ -88,7 +91,7 @@ class AnthropicChatChoicesService:
88
91
 
89
92
  raw_request = msh.marshal(a_req)
90
93
 
91
- raw_response = http.request(
94
+ raw_response = await http.async_request(
92
95
  'https://api.anthropic.com/v1/messages',
93
96
  headers={
94
97
  http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
@@ -96,6 +99,7 @@ class AnthropicChatChoicesService:
96
99
  b'anthropic-version': b'2023-06-01',
97
100
  },
98
101
  data=json.dumps(raw_request).encode('utf-8'),
102
+ client=self._http_client,
99
103
  )
100
104
 
101
105
  response = json.loads(check.not_none(raw_response.data).decode('utf-8'))
@@ -39,9 +39,15 @@ from .protocol import build_protocol_tool
39
39
  # )
40
40
  @static_check_is_chat_choices_stream_service
41
41
  class AnthropicChatChoicesStreamService:
42
- def __init__(self, *configs: Config) -> None:
42
+ def __init__(
43
+ self,
44
+ *configs: Config,
45
+ http_client: http.AsyncHttpClient | None = None,
46
+ ) -> None:
43
47
  super().__init__()
44
48
 
49
+ self._http_client = http_client
50
+
45
51
  with tv.consume(*configs) as cc:
46
52
  self._model_name = cc.pop(AnthropicChatChoicesService.DEFAULT_MODEL_NAME)
47
53
  self._api_key = check.not_none(ApiKey.pop_secret(cc, env='ANTHROPIC_API_KEY'))
@@ -84,8 +90,8 @@ class AnthropicChatChoicesStreamService:
84
90
  )
85
91
 
86
92
  async with UseResources.or_new(request.options) as rs:
87
- http_client = rs.enter_context(http.client())
88
- http_response = rs.enter_context(http_client.stream_request(http_request))
93
+ http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
94
+ http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
89
95
 
90
96
  async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
91
97
  msg_start: AnthropicSseDecoderEvents.MessageStart | None = None
@@ -95,8 +101,7 @@ class AnthropicChatChoicesStreamService:
95
101
  db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
96
102
  sd = sse.SseDecoder()
97
103
  while True:
98
- # FIXME: read1 not on response stream protocol
99
- b = http_response.stream.read1(self.READ_CHUNK_SIZE) # type: ignore[attr-defined]
104
+ b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
100
105
  for l in db.feed(b):
101
106
  if isinstance(l, DelimitingBuffer.Incomplete):
102
107
  # FIXME: handle
@@ -40,9 +40,15 @@ from .tools import build_tool_spec_schema
40
40
  class GoogleChatChoicesService:
41
41
  DEFAULT_MODEL_NAME: ta.ClassVar[ModelName] = ModelName(check.not_none(MODEL_NAMES.default))
42
42
 
43
- def __init__(self, *configs: ApiKey | ModelName) -> None:
43
+ def __init__(
44
+ self,
45
+ *configs: ApiKey | ModelName,
46
+ http_client: http.AsyncHttpClient | None = None,
47
+ ) -> None:
44
48
  super().__init__()
45
49
 
50
+ self._http_client = http_client
51
+
46
52
  with tv.consume(*configs) as cc:
47
53
  self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
48
54
  self._api_key = ApiKey.pop_secret(cc, env='GEMINI_API_KEY')
@@ -149,11 +155,12 @@ class GoogleChatChoicesService:
149
155
 
150
156
  model_name = MODEL_NAMES.resolve(self._model_name.v)
151
157
 
152
- resp = http.request(
158
+ resp = await http.async_request(
153
159
  f'{self.BASE_URL.rstrip("/")}/{model_name}:generateContent?key={key}',
154
160
  headers={'Content-Type': 'application/json'},
155
161
  data=json.dumps_compact(req_dct).encode('utf-8'),
156
162
  method='POST',
163
+ client=self._http_client,
157
164
  )
158
165
 
159
166
  resp_dct = json.loads(check.not_none(resp.data).decode('utf-8'))
@@ -82,12 +82,16 @@ class CseSearchService:
82
82
  self,
83
83
  cse_id: str | None = None,
84
84
  cse_api_key: str | None = None,
85
+ *,
86
+ http_client: http.AsyncHttpClient | None = None,
85
87
  ) -> None:
86
88
  super().__init__()
87
89
 
88
90
  self._cse_id = cse_id
89
91
  self._cse_api_key = cse_api_key
90
92
 
93
+ self._http_client = http_client
94
+
91
95
  async def invoke(
92
96
  self,
93
97
  request: SearchRequest,
@@ -97,8 +101,9 @@ class CseSearchService:
97
101
  cx=check.non_empty_str(self._cse_id),
98
102
  q=request.v,
99
103
  ))
100
- resp = http.request(
104
+ resp = await http.async_request(
101
105
  f'https://www.googleapis.com/customsearch/v1?{qs}',
106
+ client=self._http_client,
102
107
  )
103
108
  out = check.not_none(resp.data)
104
109
 
@@ -46,9 +46,15 @@ from .tools import build_tool_spec_schema
46
46
  class GoogleChatChoicesStreamService:
47
47
  DEFAULT_MODEL_NAME: ta.ClassVar[ModelName] = ModelName(check.not_none(MODEL_NAMES.default))
48
48
 
49
- def __init__(self, *configs: ApiKey | ModelName) -> None:
49
+ def __init__(
50
+ self,
51
+ *configs: ApiKey | ModelName,
52
+ http_client: http.AsyncHttpClient | None = None,
53
+ ) -> None:
50
54
  super().__init__()
51
55
 
56
+ self._http_client = http_client
57
+
52
58
  with tv.consume(*configs) as cc:
53
59
  self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
54
60
  self._api_key = ApiKey.pop_secret(cc, env='GEMINI_API_KEY')
@@ -163,14 +169,13 @@ class GoogleChatChoicesStreamService:
163
169
  )
164
170
 
165
171
  async with UseResources.or_new(request.options) as rs:
166
- http_client = rs.enter_context(http.client())
167
- http_response = rs.enter_context(http_client.stream_request(http_request))
172
+ http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
173
+ http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
168
174
 
169
175
  async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
170
176
  db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
171
177
  while True:
172
- # FIXME: read1 not on response stream protocol
173
- b = http_response.stream.read1(self.READ_CHUNK_SIZE) # type: ignore[attr-defined]
178
+ b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
174
179
  for bl in db.feed(b):
175
180
  if isinstance(bl, DelimitingBuffer.Incomplete):
176
181
  # FIXME: handle
@@ -40,10 +40,16 @@ class MistralChatChoicesService:
40
40
  AiMessage: 'assistant',
41
41
  }
42
42
 
43
- def __init__(self, *, api_key: str | None = None) -> None:
43
+ def __init__(
44
+ self,
45
+ *,
46
+ api_key: str | None = None,
47
+ http_client: http.AsyncHttpClient | None = None,
48
+ ) -> None:
44
49
  super().__init__()
45
50
 
46
51
  self._api_key = api_key
52
+ self._http_client = http_client
47
53
 
48
54
  def _get_msg_content(self, m: Message) -> str | None:
49
55
  if isinstance(m, AiMessage):
@@ -73,7 +79,7 @@ class MistralChatChoicesService:
73
79
  ],
74
80
  }
75
81
 
76
- resp = http.request(
82
+ resp = await http.async_request(
77
83
  'https://api.mistral.ai/v1/chat/completions',
78
84
  method='POST',
79
85
  data=json.dumps_compact(req_dct).encode('utf-8'),
@@ -82,6 +88,7 @@ class MistralChatChoicesService:
82
88
  'Accept': 'application/json',
83
89
  'Authorization': f'Bearer {key}',
84
90
  },
91
+ client=self._http_client,
85
92
  )
86
93
 
87
94
  if resp.status == 429:
@@ -54,16 +54,19 @@ class BaseOllamaChatChoicesService(lang.Abstract):
54
54
  def __init__(
55
55
  self,
56
56
  *configs: ApiUrl | ModelName,
57
+ http_client: http.AsyncHttpClient | None = None,
57
58
  ) -> None:
58
59
  super().__init__()
59
60
 
61
+ self._http_client = http_client
62
+
60
63
  with tv.consume(*configs) as cc:
61
64
  self._api_url = cc.pop(self.DEFAULT_API_URL)
62
65
  self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
63
66
 
64
67
  #
65
68
 
66
- ROLE_MAP: ta.ClassVar[ta.Mapping[type[Message], pt.Role]] = {
69
+ ROLE_MAP: ta.ClassVar[ta.Mapping[type[Message], pt.Role]] = { # noqa
67
70
  SystemMessage: 'system',
68
71
  UserMessage: 'user',
69
72
  AiMessage: 'assistant',
@@ -111,10 +114,11 @@ class OllamaChatChoicesService(BaseOllamaChatChoicesService):
111
114
 
112
115
  raw_request = msh.marshal(a_req)
113
116
 
114
- raw_response = http.request(
115
- self._api_url.v.removesuffix('/') + '/chat',
116
- data=json.dumps(raw_request).encode('utf-8'),
117
- )
117
+ async with http.manage_async_client(self._http_client) as http_client:
118
+ raw_response = await http_client.request(http.HttpRequest(
119
+ self._api_url.v.removesuffix('/') + '/chat',
120
+ data=json.dumps(raw_request).encode('utf-8'),
121
+ ))
118
122
 
119
123
  json_response = json.loads(check.not_none(raw_response.data).decode('utf-8'))
120
124
 
@@ -165,14 +169,13 @@ class OllamaChatChoicesStreamService(BaseOllamaChatChoicesService):
165
169
  )
166
170
 
167
171
  async with UseResources.or_new(request.options) as rs:
168
- http_client = rs.enter_context(http.client())
169
- http_response = rs.enter_context(http_client.stream_request(http_request))
172
+ http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
173
+ http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
170
174
 
171
175
  async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
172
176
  db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
173
177
  while True:
174
- # FIXME: read1 not on response stream protocol
175
- b = http_response.stream.read1(self.READ_CHUNK_SIZE) # type: ignore[attr-defined]
178
+ b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
176
179
  for l in db.feed(b):
177
180
  if isinstance(l, DelimitingBuffer.Incomplete):
178
181
  # FIXME: handle
@@ -42,9 +42,15 @@ from .names import MODEL_NAMES
42
42
  class OpenaiChatChoicesService:
43
43
  DEFAULT_MODEL_NAME: ta.ClassVar[ModelName] = ModelName(check.not_none(MODEL_NAMES.default))
44
44
 
45
- def __init__(self, *configs: ApiKey | ModelName | DefaultOptions) -> None:
45
+ def __init__(
46
+ self,
47
+ *configs: ApiKey | ModelName | DefaultOptions,
48
+ http_client: http.AsyncHttpClient | None = None,
49
+ ) -> None:
46
50
  super().__init__()
47
51
 
52
+ self._http_client = http_client
53
+
48
54
  with tv.consume(*configs) as cc:
49
55
  self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
50
56
  self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
@@ -68,13 +74,14 @@ class OpenaiChatChoicesService:
68
74
 
69
75
  raw_request = msh.marshal(rh.oai_request())
70
76
 
71
- http_response = http.request(
77
+ http_response = await http.async_request(
72
78
  'https://api.openai.com/v1/chat/completions',
73
79
  headers={
74
80
  http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
75
81
  http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
76
82
  },
77
83
  data=json.dumps(raw_request).encode('utf-8'),
84
+ client=self._http_client,
78
85
  )
79
86
 
80
87
  raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))
@@ -23,9 +23,15 @@ from ....standard import ApiKey
23
23
  class OpenaiCompletionService:
24
24
  DEFAULT_MODEL_NAME: ta.ClassVar[str] = 'gpt-3.5-turbo-instruct'
25
25
 
26
- def __init__(self, *configs: Config) -> None:
26
+ def __init__(
27
+ self,
28
+ *configs: Config,
29
+ http_client: http.AsyncHttpClient | None = None,
30
+ ) -> None:
27
31
  super().__init__()
28
32
 
33
+ self._http_client = http_client
34
+
29
35
  with tv.consume(*configs) as cc:
30
36
  self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
31
37
 
@@ -41,13 +47,14 @@ class OpenaiCompletionService:
41
47
  stream=False,
42
48
  )
43
49
 
44
- raw_response = http.request(
50
+ raw_response = await http.async_request(
45
51
  'https://api.openai.com/v1/completions',
46
52
  headers={
47
53
  http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
48
54
  http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
49
55
  },
50
56
  data=json.dumps(raw_request).encode('utf-8'),
57
+ client=self._http_client,
51
58
  )
52
59
 
53
60
  response = json.loads(check.not_none(raw_response.data).decode('utf-8'))
@@ -22,9 +22,15 @@ from ....vectors.types import Vector
22
22
  class OpenaiEmbeddingService:
23
23
  model = 'text-embedding-3-small'
24
24
 
25
- def __init__(self, *configs: Config) -> None:
25
+ def __init__(
26
+ self,
27
+ *configs: Config,
28
+ http_client: http.AsyncHttpClient | None = None,
29
+ ) -> None:
26
30
  super().__init__()
27
31
 
32
+ self._http_client = http_client
33
+
28
34
  with tv.consume(*configs) as cc:
29
35
  self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
30
36
 
@@ -34,13 +40,14 @@ class OpenaiEmbeddingService:
34
40
  input=check.isinstance(request.v, str),
35
41
  )
36
42
 
37
- raw_response = http.request(
43
+ raw_response = await http.async_request(
38
44
  'https://api.openai.com/v1/embeddings',
39
45
  headers={
40
46
  http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
41
47
  http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
42
48
  },
43
49
  data=json.dumps(raw_request).encode('utf-8'),
50
+ client=self._http_client,
44
51
  )
45
52
 
46
53
  response = json.loads(check.not_none(raw_response.data).decode('utf-8'))
@@ -41,9 +41,15 @@ from .names import MODEL_NAMES
41
41
  # )
42
42
  @static_check_is_chat_choices_stream_service
43
43
  class OpenaiChatChoicesStreamService:
44
- def __init__(self, *configs: Config) -> None:
44
+ def __init__(
45
+ self,
46
+ *configs: Config,
47
+ http_client: http.AsyncHttpClient | None = None,
48
+ ) -> None:
45
49
  super().__init__()
46
50
 
51
+ self._http_client = http_client
52
+
47
53
  with tv.consume(*configs) as cc:
48
54
  self._model_name = cc.pop(OpenaiChatChoicesService.DEFAULT_MODEL_NAME)
49
55
  self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
@@ -81,15 +87,14 @@ class OpenaiChatChoicesStreamService:
81
87
  )
82
88
 
83
89
  async with UseResources.or_new(request.options) as rs:
84
- http_client = rs.enter_context(http.client())
85
- http_response = rs.enter_context(http_client.stream_request(http_request))
90
+ http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
91
+ http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
86
92
 
87
93
  async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
88
94
  db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
89
95
  sd = sse.SseDecoder()
90
96
  while True:
91
- # FIXME: read1 not on response stream protocol
92
- b = http_response.stream.read1(self.READ_CHUNK_SIZE) # type: ignore[attr-defined]
97
+ b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
93
98
  for l in db.feed(b):
94
99
  if isinstance(l, DelimitingBuffer.Incomplete):
95
100
  # FIXME: handle
@@ -4,6 +4,7 @@ TODO:
4
4
  - https://huggingface.co/blog/aifeifei798/transformers-streaming-output
5
5
  """
6
6
  import sys
7
+ import threading
7
8
  import typing as ta
8
9
 
9
10
  import transformers as tfm
@@ -11,7 +12,10 @@ import transformers as tfm
11
12
  from omlish import check
12
13
  from omlish import lang
13
14
  from omlish import typedvalues as tv
15
+ from omlish.asyncs.asyncio.sync import AsyncioBufferRelay
14
16
 
17
+ from .....backends.transformers.filecache import file_cache_patch_context
18
+ from .....backends.transformers.streamers import CancellableTextStreamer
15
19
  from ....chat.choices.services import ChatChoicesRequest
16
20
  from ....chat.choices.services import ChatChoicesResponse
17
21
  from ....chat.choices.services import static_check_is_chat_choices_service
@@ -177,10 +181,14 @@ class BaseTransformersChatChoicesService(lang.ExitStacked):
177
181
  for pkw_cfg in self._pipeline_kwargs:
178
182
  pkw.update(pkw_cfg.v)
179
183
 
180
- return tfm.pipeline(
181
- 'text-generation',
182
- **pkw,
183
- )
184
+ with file_cache_patch_context(
185
+ local_first=True,
186
+ local_config_present_is_authoritative=True,
187
+ ):
188
+ return tfm.pipeline(
189
+ 'text-generation',
190
+ **pkw,
191
+ )
184
192
 
185
193
 
186
194
  ##
@@ -232,29 +240,59 @@ class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
232
240
  for m in request.v
233
241
  ]
234
242
 
243
+ relay: AsyncioBufferRelay = AsyncioBufferRelay()
244
+
245
+ def streamer_callback(text: str, *, stream_end: bool) -> None:
246
+ if text or stream_end:
247
+ relay.push(text, *([None] if stream_end else []))
248
+
249
+ streamer = CancellableTextStreamer(
250
+ check.not_none(pipeline.tokenizer), # type: ignore[arg-type]
251
+ streamer_callback, # noqa
252
+ skip_prompt=True,
253
+ skip_special_tokens=True,
254
+ )
255
+
235
256
  async with UseResources.or_new(request.options) as rs:
257
+ thread = threading.Thread(
258
+ target=CancellableTextStreamer.ignoring_cancelled(pipeline),
259
+ args=(
260
+ inputs,
261
+ ),
262
+ kwargs=dict(
263
+ streamer=streamer,
264
+ ),
265
+ )
266
+
267
+ def stop_thread() -> None:
268
+ streamer.cancel()
269
+ # thread.join()
270
+
271
+ rs.enter_context(lang.defer(stop_thread))
272
+
273
+ thread.start()
274
+
236
275
  async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
237
- # last_role: ta.Any = None
238
- #
239
- # for chunk in output:
240
- # check.state(chunk['object'] == 'chat.completion.chunk')
241
- #
242
- # choice = check.single(chunk['choices'])
243
- #
244
- # if not (delta := choice.get('delta', {})):
245
- # continue
246
- #
247
- # # FIXME: check role is assistant
248
- # if (role := delta.get('role')) != last_role:
249
- # last_role = role
250
- #
251
- # # FIXME: stop reason
252
- #
253
- # if (content := delta.get('content', '')):
254
- # await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiChoiceDelta(content)])]))
255
- #
256
- # return None
257
-
258
- raise NotImplementedError
276
+ while True:
277
+ await relay.wait()
278
+ got = relay.swap()
279
+
280
+ if not got:
281
+ raise RuntimeError
282
+
283
+ if got[-1] is None:
284
+ out = ''.join(got[:-1])
285
+ end = True
286
+ else:
287
+ out = ''.join(got)
288
+ end = False
289
+
290
+ if out:
291
+ await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiChoiceDelta(out)])]))
292
+
293
+ if end:
294
+ break
295
+
296
+ return []
259
297
 
260
298
  return await new_stream_response(rs, inner)
@@ -98,20 +98,34 @@ def register_type(
98
98
 
99
99
 
100
100
  @ta.overload
101
- def registry_new(cls: type[T], name: str, *args: ta.Any, **kwargs: ta.Any) -> T:
101
+ def get_registry_cls(cls: type[T], name: str) -> type[T]:
102
102
  ...
103
103
 
104
104
 
105
105
  @ta.overload
106
- def registry_new(cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
106
+ def get_registry_cls(cls: ta.Any, name: str) -> ta.Any:
107
107
  ...
108
108
 
109
109
 
110
- def registry_new(cls, name, *args, **kwargs):
110
+ def get_registry_cls(cls, name, *args, **kwargs):
111
111
  be_cls = _GlobalRegistry.instance().get_registry_cls(cls, name)
112
112
  if isinstance(cls, type):
113
113
  be_cls = check.issubclass(be_cls, cls) # noqa
114
- return be_cls(*args, **kwargs)
114
+ return be_cls
115
+
116
+
117
+ @ta.overload
118
+ def registry_new(cls: type[T], name: str, *args: ta.Any, **kwargs: ta.Any) -> T:
119
+ ...
120
+
121
+
122
+ @ta.overload
123
+ def registry_new(cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
124
+ ...
125
+
126
+
127
+ def registry_new(cls, name, *args, **kwargs):
128
+ return get_registry_cls(cls, name)(*args, **kwargs)
115
129
 
116
130
 
117
131
  #
ommlds/tools/git.py CHANGED
@@ -16,6 +16,7 @@ from omdev.tools.git.messages import GitMessageGenerator
16
16
  from omlish import check
17
17
  from omlish import lang
18
18
  from omlish.configs.classes import Configurable
19
+ from omlish.http import all as http
19
20
  from omlish.subprocesses.sync import subprocesses
20
21
 
21
22
  from .. import minichain as mc
@@ -76,7 +77,9 @@ class OpenaiGitAiBackend(GitAiBackend['OpenaiGitAiBackend.Config']):
76
77
  if (sec := load_secrets().try_get(key.lower())) is not None:
77
78
  os.environ[key] = sec.reveal()
78
79
 
79
- llm = OpenaiChatChoicesService()
80
+ llm = OpenaiChatChoicesService(
81
+ http_client=http.SyncAsyncHttpClient(http.client()),
82
+ )
80
83
 
81
84
  resp = lang.sync_await(llm.invoke(mc.ChatChoicesRequest(
82
85
  [mc.UserMessage(prompt)],