PyPI - ommlds - Versions diffs - 0.0.0.dev468__py3-none-any.whl → 0.0.0.dev470__py3-none-any.whl - Mend

ommlds 0.0.0.dev468py3-none-any.whl → 0.0.0.dev470py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ommlds might be problematic. Click here for more details.

Files changed (36) hide show

ommlds/.omlish-manifests.json +7 -7
ommlds/_hacks/__init__.py +4 -0
ommlds/_hacks/funcs.py +110 -0
ommlds/_hacks/names.py +158 -0
ommlds/_hacks/params.py +73 -0
ommlds/_hacks/patches.py +0 -3
ommlds/backends/transformers/filecache.py +109 -0
ommlds/cli/main.py +11 -5
ommlds/cli/sessions/chat/backends/catalog.py +13 -8
ommlds/cli/sessions/chat/backends/inject.py +15 -0
ommlds/cli/sessions/completion/session.py +1 -1
ommlds/cli/sessions/embedding/session.py +1 -1
ommlds/minichain/__init__.py +1 -0
ommlds/minichain/backends/catalogs/base.py +20 -1
ommlds/minichain/backends/catalogs/simple.py +2 -2
ommlds/minichain/backends/catalogs/strings.py +9 -7
ommlds/minichain/backends/impls/anthropic/chat.py +5 -1
ommlds/minichain/backends/impls/anthropic/stream.py +10 -5
ommlds/minichain/backends/impls/google/chat.py +9 -2
ommlds/minichain/backends/impls/google/search.py +6 -1
ommlds/minichain/backends/impls/google/stream.py +10 -5
ommlds/minichain/backends/impls/mistral.py +9 -2
ommlds/minichain/backends/impls/ollama/chat.py +12 -9
ommlds/minichain/backends/impls/openai/chat.py +9 -2
ommlds/minichain/backends/impls/openai/completion.py +9 -2
ommlds/minichain/backends/impls/openai/embedding.py +9 -2
ommlds/minichain/backends/impls/openai/stream.py +10 -5
ommlds/minichain/backends/impls/transformers/transformers.py +64 -26
ommlds/minichain/registries/globals.py +18 -4
ommlds/tools/git.py +4 -1
{ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/METADATA +3 -3
{ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/RECORD +36 -32
{ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/WHEEL +0 -0
{ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/entry_points.txt +0 -0
{ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/licenses/LICENSE +0 -0
{ommlds-0.0.0.dev468.dist-info → ommlds-0.0.0.dev470.dist-info}/top_level.txt +0 -0

ommlds/minichain/backends/catalogs/base.py CHANGED Viewed

@@ -3,15 +3,34 @@ import typing as ta
 from omlish import lang
+from ...configs import Config
+T = ta.TypeVar('T')
 ##
 class BackendCatalog(lang.Abstract):
+    class Backend(ta.NamedTuple):
+        factory: ta.Callable[..., ta.Any]
+        configs: ta.Sequence[Config] | None
     @abc.abstractmethod
-    def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
+    def get_backend(self, service_cls: type[T], name: str) -> Backend:
         raise NotImplementedError
+    def new_backend(
+            self,
+            service_cls: ta.Any,
+            name: str,
+            *args: ta.Any,
+            **kwargs: ta.Any,
+    ) -> ta.Any:
+        be = self.get_backend(service_cls, name)
+        return be.factory(*be.configs or [], *args, **kwargs)
     # #
     #
     # class Bound(lang.Final, ta.Generic[T]):

ommlds/minichain/backends/catalogs/simple.py CHANGED Viewed

@@ -40,9 +40,9 @@ class SimpleBackendCatalog(BackendCatalog):
             sc_dct[e.name] = e
         self._dct = dct
-    def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
+    def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> BackendCatalog.Backend:
         e = self._dct[service_cls][name]
-        return e.factory_fn(*args, **kwargs)
+        return BackendCatalog.Backend(e.factory_fn, None)
 ##

ommlds/minichain/backends/catalogs/strings.py CHANGED Viewed

@@ -5,7 +5,7 @@ from omlish import check
 from ...models.configs import ModelPath
 from ...models.configs import ModelRepo
 from ...models.repos.resolving import ModelRepoResolver
-from ...registries.globals import registry_new
+from ...registries.globals import get_registry_cls
 from ..strings.parsing import parse_backend_string
 from ..strings.resolving import BackendStringResolver
 from ..strings.resolving import ResolveBackendStringArgs
@@ -30,14 +30,14 @@ class BackendStringBackendCatalog(BackendCatalog):
         self._string_resolver = string_resolver
         self._model_repo_resolver = model_repo_resolver
-    def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
+    def get_backend(self, service_cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> BackendCatalog.Backend:
         ps = parse_backend_string(name)
         rs = check.not_none(self._string_resolver.resolve_backend_string(ResolveBackendStringArgs(
             service_cls,
             ps,
         )))
-        al = list(rs.args or [])
+        al: list = list(rs.args or [])
         # FIXME: lol
         if al and isinstance(al[0], ModelRepo):
@@ -46,10 +46,12 @@ class BackendStringBackendCatalog(BackendCatalog):
             mrp = check.not_none(mrr.resolve(mr))
             al = [ModelPath(mrp.path), *al[1:]]
-        return registry_new(
+        cls = get_registry_cls(
             service_cls,
             rs.name,
-            *al,
-            *args,
-            **kwargs,
+        )
+        return BackendCatalog.Backend(
+            cls,
+            al,
         )

ommlds/minichain/backends/impls/anthropic/chat.py CHANGED Viewed

@@ -46,9 +46,12 @@ class AnthropicChatChoicesService:
     def __init__(
             self,
             *configs: ApiKey | ModelName,
+            http_client: http.AsyncHttpClient | None = None,
     ) -> None:
         super().__init__()
+        self._http_client = http_client
         with tv.consume(*configs) as cc:
             self._api_key = check.not_none(ApiKey.pop_secret(cc, env='ANTHROPIC_API_KEY'))
             self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
@@ -88,7 +91,7 @@ class AnthropicChatChoicesService:
         raw_request = msh.marshal(a_req)
-        raw_response = http.request(
+        raw_response = await http.async_request(
             'https://api.anthropic.com/v1/messages',
             headers={
                 http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
@@ -96,6 +99,7 @@ class AnthropicChatChoicesService:
                 b'anthropic-version': b'2023-06-01',
             },
             data=json.dumps(raw_request).encode('utf-8'),
+            client=self._http_client,
         )
         response = json.loads(check.not_none(raw_response.data).decode('utf-8'))

ommlds/minichain/backends/impls/anthropic/stream.py CHANGED Viewed

@@ -39,9 +39,15 @@ from .protocol import build_protocol_tool
 # )
 @static_check_is_chat_choices_stream_service
 class AnthropicChatChoicesStreamService:
-    def __init__(self, *configs: Config) -> None:
+    def __init__(
+            self,
+            *configs: Config,
+            http_client: http.AsyncHttpClient | None = None,
+    ) -> None:
         super().__init__()
+        self._http_client = http_client
         with tv.consume(*configs) as cc:
             self._model_name = cc.pop(AnthropicChatChoicesService.DEFAULT_MODEL_NAME)
             self._api_key = check.not_none(ApiKey.pop_secret(cc, env='ANTHROPIC_API_KEY'))
@@ -84,8 +90,8 @@ class AnthropicChatChoicesStreamService:
         )
         async with UseResources.or_new(request.options) as rs:
-            http_client = rs.enter_context(http.client())
-            http_response = rs.enter_context(http_client.stream_request(http_request))
+            http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
+            http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
             async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
                 msg_start: AnthropicSseDecoderEvents.MessageStart | None = None
@@ -95,8 +101,7 @@ class AnthropicChatChoicesStreamService:
                 db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
                 sd = sse.SseDecoder()
                 while True:
-                    # FIXME: read1 not on response stream protocol
-                    b = http_response.stream.read1(self.READ_CHUNK_SIZE)  # type: ignore[attr-defined]
+                    b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
                     for l in db.feed(b):
                         if isinstance(l, DelimitingBuffer.Incomplete):
                             # FIXME: handle

ommlds/minichain/backends/impls/google/chat.py CHANGED Viewed

@@ -40,9 +40,15 @@ from .tools import build_tool_spec_schema
 class GoogleChatChoicesService:
     DEFAULT_MODEL_NAME: ta.ClassVar[ModelName] = ModelName(check.not_none(MODEL_NAMES.default))
-    def __init__(self, *configs: ApiKey | ModelName) -> None:
+    def __init__(
+            self,
+            *configs: ApiKey | ModelName,
+            http_client: http.AsyncHttpClient | None = None,
+    ) -> None:
         super().__init__()
+        self._http_client = http_client
         with tv.consume(*configs) as cc:
             self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
             self._api_key = ApiKey.pop_secret(cc, env='GEMINI_API_KEY')
@@ -149,11 +155,12 @@ class GoogleChatChoicesService:
         model_name = MODEL_NAMES.resolve(self._model_name.v)
-        resp = http.request(
+        resp = await http.async_request(
             f'{self.BASE_URL.rstrip("/")}/{model_name}:generateContent?key={key}',
             headers={'Content-Type': 'application/json'},
             data=json.dumps_compact(req_dct).encode('utf-8'),
             method='POST',
+            client=self._http_client,
         )
         resp_dct = json.loads(check.not_none(resp.data).decode('utf-8'))

ommlds/minichain/backends/impls/google/search.py CHANGED Viewed

@@ -82,12 +82,16 @@ class CseSearchService:
             self,
             cse_id: str | None = None,
             cse_api_key: str | None = None,
+            *,
+            http_client: http.AsyncHttpClient | None = None,
     ) -> None:
         super().__init__()
         self._cse_id = cse_id
         self._cse_api_key = cse_api_key
+        self._http_client = http_client
     async def invoke(
             self,
             request: SearchRequest,
@@ -97,8 +101,9 @@ class CseSearchService:
             cx=check.non_empty_str(self._cse_id),
             q=request.v,
         ))
-        resp = http.request(
+        resp = await http.async_request(
             f'https://www.googleapis.com/customsearch/v1?{qs}',
+            client=self._http_client,
         )
         out = check.not_none(resp.data)

ommlds/minichain/backends/impls/google/stream.py CHANGED Viewed

@@ -46,9 +46,15 @@ from .tools import build_tool_spec_schema
 class GoogleChatChoicesStreamService:
     DEFAULT_MODEL_NAME: ta.ClassVar[ModelName] = ModelName(check.not_none(MODEL_NAMES.default))
-    def __init__(self, *configs: ApiKey | ModelName) -> None:
+    def __init__(
+            self,
+            *configs: ApiKey | ModelName,
+            http_client: http.AsyncHttpClient | None = None,
+    ) -> None:
         super().__init__()
+        self._http_client = http_client
         with tv.consume(*configs) as cc:
             self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
             self._api_key = ApiKey.pop_secret(cc, env='GEMINI_API_KEY')
@@ -163,14 +169,13 @@ class GoogleChatChoicesStreamService:
         )
         async with UseResources.or_new(request.options) as rs:
-            http_client = rs.enter_context(http.client())
-            http_response = rs.enter_context(http_client.stream_request(http_request))
+            http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
+            http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
             async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
                 db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
                 while True:
-                    # FIXME: read1 not on response stream protocol
-                    b = http_response.stream.read1(self.READ_CHUNK_SIZE)  # type: ignore[attr-defined]
+                    b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
                     for bl in db.feed(b):
                         if isinstance(bl, DelimitingBuffer.Incomplete):
                             # FIXME: handle

ommlds/minichain/backends/impls/mistral.py CHANGED Viewed

@@ -40,10 +40,16 @@ class MistralChatChoicesService:
         AiMessage: 'assistant',
     }
-    def __init__(self, *, api_key: str | None = None) -> None:
+    def __init__(
+            self,
+            *,
+            api_key: str | None = None,
+            http_client: http.AsyncHttpClient | None = None,
+    ) -> None:
         super().__init__()
         self._api_key = api_key
+        self._http_client = http_client
     def _get_msg_content(self, m: Message) -> str | None:
         if isinstance(m, AiMessage):
@@ -73,7 +79,7 @@ class MistralChatChoicesService:
             ],
         }
-        resp = http.request(
+        resp = await http.async_request(
             'https://api.mistral.ai/v1/chat/completions',
             method='POST',
             data=json.dumps_compact(req_dct).encode('utf-8'),
@@ -82,6 +88,7 @@ class MistralChatChoicesService:
                 'Accept': 'application/json',
                 'Authorization': f'Bearer {key}',
             },
+            client=self._http_client,
         )
         if resp.status == 429:

ommlds/minichain/backends/impls/ollama/chat.py CHANGED Viewed

@@ -54,16 +54,19 @@ class BaseOllamaChatChoicesService(lang.Abstract):
     def __init__(
             self,
             *configs: ApiUrl | ModelName,
+            http_client: http.AsyncHttpClient | None = None,
     ) -> None:
         super().__init__()
+        self._http_client = http_client
         with tv.consume(*configs) as cc:
             self._api_url = cc.pop(self.DEFAULT_API_URL)
             self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
     #
-    ROLE_MAP: ta.ClassVar[ta.Mapping[type[Message], pt.Role]] = {
+    ROLE_MAP: ta.ClassVar[ta.Mapping[type[Message], pt.Role]] = {  # noqa
         SystemMessage: 'system',
         UserMessage: 'user',
         AiMessage: 'assistant',
@@ -111,10 +114,11 @@ class OllamaChatChoicesService(BaseOllamaChatChoicesService):
         raw_request = msh.marshal(a_req)
-        raw_response = http.request(
-            self._api_url.v.removesuffix('/') + '/chat',
-            data=json.dumps(raw_request).encode('utf-8'),
-        )
+        async with http.manage_async_client(self._http_client) as http_client:
+            raw_response = await http_client.request(http.HttpRequest(
+                self._api_url.v.removesuffix('/') + '/chat',
+                data=json.dumps(raw_request).encode('utf-8'),
+            ))
         json_response = json.loads(check.not_none(raw_response.data).decode('utf-8'))
@@ -165,14 +169,13 @@ class OllamaChatChoicesStreamService(BaseOllamaChatChoicesService):
         )
         async with UseResources.or_new(request.options) as rs:
-            http_client = rs.enter_context(http.client())
-            http_response = rs.enter_context(http_client.stream_request(http_request))
+            http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
+            http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
             async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
                 db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
                 while True:
-                    # FIXME: read1 not on response stream protocol
-                    b = http_response.stream.read1(self.READ_CHUNK_SIZE)  # type: ignore[attr-defined]
+                    b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
                     for l in db.feed(b):
                         if isinstance(l, DelimitingBuffer.Incomplete):
                             # FIXME: handle

ommlds/minichain/backends/impls/openai/chat.py CHANGED Viewed

@@ -42,9 +42,15 @@ from .names import MODEL_NAMES
 class OpenaiChatChoicesService:
     DEFAULT_MODEL_NAME: ta.ClassVar[ModelName] = ModelName(check.not_none(MODEL_NAMES.default))
-    def __init__(self, *configs: ApiKey | ModelName | DefaultOptions) -> None:
+    def __init__(
+            self,
+            *configs: ApiKey | ModelName | DefaultOptions,
+            http_client: http.AsyncHttpClient | None = None,
+    ) -> None:
         super().__init__()
+        self._http_client = http_client
         with tv.consume(*configs) as cc:
             self._model_name = cc.pop(self.DEFAULT_MODEL_NAME)
             self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
@@ -68,13 +74,14 @@ class OpenaiChatChoicesService:
         raw_request = msh.marshal(rh.oai_request())
-        http_response = http.request(
+        http_response = await http.async_request(
             'https://api.openai.com/v1/chat/completions',
             headers={
                 http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
                 http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
             },
             data=json.dumps(raw_request).encode('utf-8'),
+            client=self._http_client,
         )
         raw_response = json.loads(check.not_none(http_response.data).decode('utf-8'))

ommlds/minichain/backends/impls/openai/completion.py CHANGED Viewed

@@ -23,9 +23,15 @@ from ....standard import ApiKey
 class OpenaiCompletionService:
     DEFAULT_MODEL_NAME: ta.ClassVar[str] = 'gpt-3.5-turbo-instruct'
-    def __init__(self, *configs: Config) -> None:
+    def __init__(
+            self,
+            *configs: Config,
+            http_client: http.AsyncHttpClient | None = None,
+    ) -> None:
         super().__init__()
+        self._http_client = http_client
         with tv.consume(*configs) as cc:
             self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
@@ -41,13 +47,14 @@ class OpenaiCompletionService:
             stream=False,
         )
-        raw_response = http.request(
+        raw_response = await http.async_request(
             'https://api.openai.com/v1/completions',
             headers={
                 http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
                 http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
             },
             data=json.dumps(raw_request).encode('utf-8'),
+            client=self._http_client,
         )
         response = json.loads(check.not_none(raw_response.data).decode('utf-8'))

ommlds/minichain/backends/impls/openai/embedding.py CHANGED Viewed

@@ -22,9 +22,15 @@ from ....vectors.types import Vector
 class OpenaiEmbeddingService:
     model = 'text-embedding-3-small'
-    def __init__(self, *configs: Config) -> None:
+    def __init__(
+            self,
+            *configs: Config,
+            http_client: http.AsyncHttpClient | None = None,
+    ) -> None:
         super().__init__()
+        self._http_client = http_client
         with tv.consume(*configs) as cc:
             self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
@@ -34,13 +40,14 @@ class OpenaiEmbeddingService:
             input=check.isinstance(request.v, str),
         )
-        raw_response = http.request(
+        raw_response = await http.async_request(
             'https://api.openai.com/v1/embeddings',
             headers={
                 http.consts.HEADER_CONTENT_TYPE: http.consts.CONTENT_TYPE_JSON,
                 http.consts.HEADER_AUTH: http.consts.format_bearer_auth_header(check.not_none(self._api_key).reveal()),
             },
             data=json.dumps(raw_request).encode('utf-8'),
+            client=self._http_client,
         )
         response = json.loads(check.not_none(raw_response.data).decode('utf-8'))

ommlds/minichain/backends/impls/openai/stream.py CHANGED Viewed

@@ -41,9 +41,15 @@ from .names import MODEL_NAMES
 # )
 @static_check_is_chat_choices_stream_service
 class OpenaiChatChoicesStreamService:
-    def __init__(self, *configs: Config) -> None:
+    def __init__(
+            self,
+            *configs: Config,
+            http_client: http.AsyncHttpClient | None = None,
+    ) -> None:
         super().__init__()
+        self._http_client = http_client
         with tv.consume(*configs) as cc:
             self._model_name = cc.pop(OpenaiChatChoicesService.DEFAULT_MODEL_NAME)
             self._api_key = ApiKey.pop_secret(cc, env='OPENAI_API_KEY')
@@ -81,15 +87,14 @@ class OpenaiChatChoicesStreamService:
         )
         async with UseResources.or_new(request.options) as rs:
-            http_client = rs.enter_context(http.client())
-            http_response = rs.enter_context(http_client.stream_request(http_request))
+            http_client = await rs.enter_async_context(http.manage_async_client(self._http_client))
+            http_response = await rs.enter_async_context(await http_client.stream_request(http_request))
             async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs]:
                 db = DelimitingBuffer([b'\r', b'\n', b'\r\n'])
                 sd = sse.SseDecoder()
                 while True:
-                    # FIXME: read1 not on response stream protocol
-                    b = http_response.stream.read1(self.READ_CHUNK_SIZE)  # type: ignore[attr-defined]
+                    b = await http_response.stream.read1(self.READ_CHUNK_SIZE)
                     for l in db.feed(b):
                         if isinstance(l, DelimitingBuffer.Incomplete):
                             # FIXME: handle

ommlds/minichain/backends/impls/transformers/transformers.py CHANGED Viewed

@@ -4,6 +4,7 @@ TODO:
  - https://huggingface.co/blog/aifeifei798/transformers-streaming-output
 """
 import sys
+import threading
 import typing as ta
 import transformers as tfm
@@ -11,7 +12,10 @@ import transformers as tfm
 from omlish import check
 from omlish import lang
 from omlish import typedvalues as tv
+from omlish.asyncs.asyncio.sync import AsyncioBufferRelay
+from .....backends.transformers.filecache import file_cache_patch_context
+from .....backends.transformers.streamers import CancellableTextStreamer
 from ....chat.choices.services import ChatChoicesRequest
 from ....chat.choices.services import ChatChoicesResponse
 from ....chat.choices.services import static_check_is_chat_choices_service
@@ -177,10 +181,14 @@ class BaseTransformersChatChoicesService(lang.ExitStacked):
         for pkw_cfg in self._pipeline_kwargs:
             pkw.update(pkw_cfg.v)
-        return tfm.pipeline(
-            'text-generation',
-            **pkw,
-        )
+        with file_cache_patch_context(
+                local_first=True,
+                local_config_present_is_authoritative=True,
+        ):
+            return tfm.pipeline(
+                'text-generation',
+                **pkw,
+            )
 ##
@@ -232,29 +240,59 @@ class TransformersChatChoicesStreamService(BaseTransformersChatChoicesService):
             for m in request.v
         ]
+        relay: AsyncioBufferRelay = AsyncioBufferRelay()
+        def streamer_callback(text: str, *, stream_end: bool) -> None:
+            if text or stream_end:
+                relay.push(text, *([None] if stream_end else []))
+        streamer = CancellableTextStreamer(
+            check.not_none(pipeline.tokenizer),  # type: ignore[arg-type]
+            streamer_callback,  # noqa
+            skip_prompt=True,
+            skip_special_tokens=True,
+        )
         async with UseResources.or_new(request.options) as rs:
+            thread = threading.Thread(
+                target=CancellableTextStreamer.ignoring_cancelled(pipeline),
+                args=(
+                    inputs,
+                ),
+                kwargs=dict(
+                    streamer=streamer,
+                ),
+            )
+            def stop_thread() -> None:
+                streamer.cancel()
+                # thread.join()
+            rs.enter_context(lang.defer(stop_thread))
+            thread.start()
             async def inner(sink: StreamResponseSink[AiChoicesDeltas]) -> ta.Sequence[ChatChoicesOutputs] | None:
-                # last_role: ta.Any = None
-                #
-                # for chunk in output:
-                #     check.state(chunk['object'] == 'chat.completion.chunk')
-                #
-                #     choice = check.single(chunk['choices'])
-                #
-                #     if not (delta := choice.get('delta', {})):
-                #         continue
-                #
-                #     # FIXME: check role is assistant
-                #     if (role := delta.get('role')) != last_role:
-                #         last_role = role
-                #
-                #     # FIXME: stop reason
-                #
-                #     if (content := delta.get('content', '')):
-                #         await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiChoiceDelta(content)])]))
-                #
-                # return None
-                raise NotImplementedError
+                while True:
+                    await relay.wait()
+                    got = relay.swap()
+                    if not got:
+                        raise RuntimeError
+                    if got[-1] is None:
+                        out = ''.join(got[:-1])
+                        end = True
+                    else:
+                        out = ''.join(got)
+                        end = False
+                    if out:
+                        await sink.emit(AiChoicesDeltas([AiChoiceDeltas([ContentAiChoiceDelta(out)])]))
+                    if end:
+                        break
+                return []
             return await new_stream_response(rs, inner)

ommlds/minichain/registries/globals.py CHANGED Viewed

@@ -98,20 +98,34 @@ def register_type(
 @ta.overload
-def registry_new(cls: type[T], name: str, *args: ta.Any, **kwargs: ta.Any) -> T:
+def get_registry_cls(cls: type[T], name: str) -> type[T]:
     ...
 @ta.overload
-def registry_new(cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
+def get_registry_cls(cls: ta.Any, name: str) -> ta.Any:
     ...
-def registry_new(cls, name, *args, **kwargs):
+def get_registry_cls(cls, name, *args, **kwargs):
     be_cls = _GlobalRegistry.instance().get_registry_cls(cls, name)
     if isinstance(cls, type):
         be_cls = check.issubclass(be_cls, cls)  # noqa
-    return be_cls(*args, **kwargs)
+    return be_cls
+@ta.overload
+def registry_new(cls: type[T], name: str, *args: ta.Any, **kwargs: ta.Any) -> T:
+    ...
+@ta.overload
+def registry_new(cls: ta.Any, name: str, *args: ta.Any, **kwargs: ta.Any) -> ta.Any:
+    ...
+def registry_new(cls, name, *args, **kwargs):
+    return get_registry_cls(cls, name)(*args, **kwargs)
 #

ommlds/tools/git.py CHANGED Viewed

@@ -16,6 +16,7 @@ from omdev.tools.git.messages import GitMessageGenerator
 from omlish import check
 from omlish import lang
 from omlish.configs.classes import Configurable
+from omlish.http import all as http
 from omlish.subprocesses.sync import subprocesses
 from .. import minichain as mc
@@ -76,7 +77,9 @@ class OpenaiGitAiBackend(GitAiBackend['OpenaiGitAiBackend.Config']):
             if (sec := load_secrets().try_get(key.lower())) is not None:
                 os.environ[key] = sec.reveal()
-        llm = OpenaiChatChoicesService()
+        llm = OpenaiChatChoicesService(
+            http_client=http.SyncAsyncHttpClient(http.client()),
+        )
         resp = lang.sync_await(llm.invoke(mc.ChatChoicesRequest(
             [mc.UserMessage(prompt)],

ommlds 0.0.0.dev468__py3-none-any.whl → 0.0.0.dev470__py3-none-any.whl

Potentially problematic release.

ommlds 0.0.0.dev468py3-none-any.whl → 0.0.0.dev470py3-none-any.whl