PyPI - ommlds - Versions diffs - 0.0.0.dev480__py3-none-any.whl → 0.0.0.dev481__py3-none-any.whl - Mend

ommlds 0.0.0.dev480py3-none-any.whl → 0.0.0.dev481py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

ommlds/.omlish-manifests.json CHANGED Viewed

@@ -78,7 +78,7 @@
     "module": ".minichain.backends.impls.duckduckgo.search",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/duckduckgo/search.py",
-    "line": 13,
+    "line": 17,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.duckduckgo.search",
@@ -252,7 +252,7 @@
     "module": ".minichain.backends.impls.huggingface.repos",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/huggingface/repos.py",
-    "line": 24,
+    "line": 20,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.huggingface.repos",
@@ -269,7 +269,7 @@
     "module": ".minichain.backends.impls.llamacpp.chat",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/llamacpp/chat.py",
-    "line": 33,
+    "line": 36,
     "value": {
       "!.minichain.backends.strings.manifests.BackendStringsManifest": {
         "service_cls_names": [
@@ -284,7 +284,7 @@
     "module": ".minichain.backends.impls.llamacpp.chat",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/llamacpp/chat.py",
-    "line": 42,
+    "line": 45,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.llamacpp.chat",
@@ -299,7 +299,7 @@
     "module": ".minichain.backends.impls.llamacpp.completion",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/llamacpp/completion.py",
-    "line": 24,
+    "line": 28,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.llamacpp.completion",
@@ -314,7 +314,7 @@
     "module": ".minichain.backends.impls.llamacpp.stream",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/llamacpp/stream.py",
-    "line": 32,
+    "line": 35,
     "value": {
       "!.minichain.backends.strings.manifests.BackendStringsManifest": {
         "service_cls_names": [
@@ -329,7 +329,7 @@
     "module": ".minichain.backends.impls.llamacpp.stream",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/llamacpp/stream.py",
-    "line": 41,
+    "line": 44,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.llamacpp.stream",
@@ -359,7 +359,7 @@
     "module": ".minichain.backends.impls.mlx.chat",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/mlx/chat.py",
-    "line": 39,
+    "line": 42,
     "value": {
       "!.minichain.backends.strings.manifests.BackendStringsManifest": {
         "service_cls_names": [
@@ -375,7 +375,7 @@
     "module": ".minichain.backends.impls.mlx.chat",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/mlx/chat.py",
-    "line": 133,
+    "line": 136,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.mlx.chat",
@@ -390,7 +390,7 @@
     "module": ".minichain.backends.impls.mlx.chat",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/mlx/chat.py",
-    "line": 164,
+    "line": 167,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.mlx.chat",
@@ -610,7 +610,7 @@
     "module": ".minichain.backends.impls.tinygrad.chat",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/tinygrad/chat.py",
-    "line": 115,
+    "line": 118,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.tinygrad.chat",
@@ -625,7 +625,7 @@
     "module": ".minichain.backends.impls.tinygrad.chat",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/tinygrad/chat.py",
-    "line": 135,
+    "line": 138,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.tinygrad.chat",
@@ -640,7 +640,7 @@
     "module": ".minichain.backends.impls.tinygrad.chat",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/tinygrad/chat.py",
-    "line": 166,
+    "line": 169,
     "value": {
       "!.minichain.backends.strings.manifests.BackendStringsManifest": {
         "service_cls_names": [
@@ -656,7 +656,7 @@
     "module": ".minichain.backends.impls.transformers.sentence",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/transformers/sentence.py",
-    "line": 19,
+    "line": 22,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.transformers.sentence",
@@ -673,7 +673,7 @@
     "module": ".minichain.backends.impls.transformers.transformers",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/transformers/transformers.py",
-    "line": 50,
+    "line": 52,
     "value": {
       "!.minichain.backends.strings.manifests.BackendStringsManifest": {
         "service_cls_names": [
@@ -689,7 +689,7 @@
     "module": ".minichain.backends.impls.transformers.transformers",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/transformers/transformers.py",
-    "line": 66,
+    "line": 68,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.transformers.transformers",
@@ -706,7 +706,7 @@
     "module": ".minichain.backends.impls.transformers.transformers",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/transformers/transformers.py",
-    "line": 197,
+    "line": 199,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.transformers.transformers",
@@ -723,7 +723,7 @@
     "module": ".minichain.backends.impls.transformers.transformers",
     "attr": null,
     "file": "ommlds/minichain/backends/impls/transformers/transformers.py",
-    "line": 227,
+    "line": 229,
     "value": {
       "!.minichain.registries.manifests.RegistryManifest": {
         "module": "ommlds.minichain.backends.impls.transformers.transformers",

ommlds/backends/llamacpp/logging.py CHANGED Viewed

@@ -1,4 +1,7 @@
 """
+NOTE: This can't be cleaned up too much - the callback can't be a closure to hide its guts because it needs to be
+      picklable for multiprocessing.
 FIXME:
  - it outputs newline-terminated so buffer and chop on newlines - DelimitingBuffer again
 """
@@ -27,4 +30,4 @@ def llama_log_callback(
 @lang.cached_function
 def install_logging_hook() -> None:
-    llama_cpp.llama_log_set(llama_log_callback, ct.c_void_p(0))
+    llama_cpp.llama_log_set(llama_log_callback, ct.c_void_p(0))  # noqa

ommlds/backends/mlx/caching.py CHANGED Viewed

@@ -17,7 +17,11 @@
 # https://github.com/ml-explore/mlx-lm/blob/ce2358d297af245b002e690623f00195b6507da0/mlx_lm/generate.py
 import typing as ta
-import mlx_lm.models.cache
+from omlish import lang
+with lang.auto_proxy_import(globals()):
+    import mlx_lm.models.cache as mlx_lm_models_cache
 ##
@@ -32,13 +36,13 @@ def maybe_quantize_kv_cache(
 ) -> None:
     if not (
             kv_bits is not None and
-            not isinstance(prompt_cache[0], mlx_lm.models.cache.QuantizedKVCache) and
+            not isinstance(prompt_cache[0], mlx_lm_models_cache.QuantizedKVCache) and
             prompt_cache[0].offset > quantized_kv_start
     ):
         return
     for i in range(len(prompt_cache)):
-        if isinstance(prompt_cache[i], mlx_lm.models.cache.KVCache):
+        if isinstance(prompt_cache[i], mlx_lm_models_cache.KVCache):
             prompt_cache[i] = prompt_cache[i].to_quantized(
                 bits=kv_bits,
                 group_size=kv_group_size,

ommlds/backends/mlx/cli.py CHANGED Viewed

@@ -20,16 +20,19 @@ import json
 import sys
 import typing as ta
-import mlx.core as mx
-import mlx_lm.models.cache
-import mlx_lm.sample_utils
-import mlx_lm.utils
+from omlish import lang
 from .generation import GenerationParams
 from .generation import generate
 from .loading import load_model
+with lang.auto_proxy_import(globals()):
+    import mlx.core as mx
+    import mlx_lm.models.cache as mlx_lm_models_cache
+    import mlx_lm.sample_utils as mlx_lm_sample_utils
 ##
@@ -214,11 +217,11 @@ def _main() -> None:
     # Load the prompt cache and metadata if a cache file is provided
     using_cache = args.prompt_cache_file is not None
     if using_cache:
-        prompt_cache, metadata = mlx_lm.models.cache.load_prompt_cache(
+        prompt_cache, metadata = mlx_lm_models_cache.load_prompt_cache(
             args.prompt_cache_file,
             return_metadata=True,
         )
-        if isinstance(prompt_cache[0], mlx_lm.models.cache.QuantizedKVCache):
+        if isinstance(prompt_cache[0], mlx_lm_models_cache.QuantizedKVCache):
             if args.kv_bits is not None and args.kv_bits != prompt_cache[0].bits:
                 raise ValueError('--kv-bits does not match the kv cache loaded from --prompt-cache-file.')
             if args.kv_group_size != prompt_cache[0].group_size:
@@ -293,7 +296,7 @@ def _main() -> None:
     else:
         prompt = tokenizer.encode(prompt)
-    sampler = mlx_lm.sample_utils.make_sampler(
+    sampler = mlx_lm_sample_utils.make_sampler(
         args.temp,
         args.top_p,
         args.min_p,

ommlds/backends/mlx/generation.py CHANGED Viewed

@@ -21,10 +21,6 @@ import io
 import sys
 import typing as ta
-import mlx.core as mx
-import mlx_lm.models.cache
-from mlx import nn
 from omlish import check
 from omlish import lang
@@ -33,6 +29,12 @@ from .limits import wired_limit_context
 from .tokenization import Tokenization
+with lang.auto_proxy_import(globals()):
+    import mlx.core as mx
+    import mlx.nn as mlx_nn
+    import mlx_lm.models.cache as mlx_lm_models_cache
 ##
@@ -47,9 +49,9 @@ def _generation_stream():
 class LogitProcessor(ta.Protocol):
     def __call__(
             self,
-            tokens: mx.array,
-            logits: mx.array,
-    ) -> mx.array:
+            tokens: 'mx.array',
+            logits: 'mx.array',
+    ) -> 'mx.array':
         ...
@@ -99,12 +101,12 @@ class GenerationParams:
 class _GenerationStep(ta.NamedTuple):
     token: int
-    logprobs: mx.array
+    logprobs: 'mx.array'
 def _generate_step(
-        prompt: mx.array,
-        model: nn.Module,
+        prompt: 'mx.array',
+        model: 'mlx_nn.Module',
         params: GenerationParams = GenerationParams(),
 ) -> ta.Generator[_GenerationStep]:
     y = prompt
@@ -113,7 +115,7 @@ def _generate_step(
     # Create the Kv cache for generation
     prompt_cache = params.prompt_cache
     if prompt_cache is None:
-        prompt_cache = mlx_lm.models.cache.make_prompt_cache(
+        prompt_cache = mlx_lm_models_cache.make_prompt_cache(
             model,
             max_kv_size=params.max_kv_size,
         )
@@ -221,7 +223,7 @@ class GenerationOutput:
     token: int
     # A vector of log probabilities.
-    logprobs: mx.array
+    logprobs: 'mx.array'
     # The number of tokens in the prompt.
     prompt_tokens: int
@@ -234,9 +236,9 @@ class GenerationOutput:
 def stream_generate(
-        model: nn.Module,
+        model: 'mlx_nn.Module',
         tokenization: Tokenization,
-        prompt: str | mx.array,
+        prompt: ta.Union[str, 'mx.array'],
         params: GenerationParams = GenerationParams(),
 ) -> ta.Generator[GenerationOutput]:
     if not isinstance(prompt, mx.array):
@@ -308,9 +310,9 @@ def stream_generate(
 def generate(
-        model: nn.Module,
+        model: 'mlx_nn.Module',
         tokenization: Tokenization,
-        prompt: str | mx.array,
+        prompt: ta.Union[str, 'mx.array'],
         params: GenerationParams = GenerationParams(),
         *,
         verbose: bool = False,

ommlds/backends/mlx/limits.py CHANGED Viewed

@@ -19,9 +19,13 @@ import contextlib
 import sys
 import typing as ta
-import mlx.core as mx
-import mlx.utils
-from mlx import nn
+from omlish import lang
+with lang.auto_proxy_import(globals()):
+    import mlx.core as mx
+    import mlx.nn as mlx_nn
+    import mlx.utils as mlx_utils
 ##
@@ -29,8 +33,8 @@ from mlx import nn
 @contextlib.contextmanager
 def wired_limit_context(
-        model: nn.Module,
-        streams: ta.Iterable[mx.Stream] | None = None,
+        model: 'mlx_nn.Module',
+        streams: ta.Iterable['mx.Stream'] | None = None,
 ) -> ta.Generator[None]:
     """
     A context manager to temporarily change the wired limit.
@@ -43,7 +47,7 @@ def wired_limit_context(
         yield
         return
-    model_bytes = mlx.utils.tree_reduce(
+    model_bytes = mlx_utils.tree_reduce(
         lambda acc, x: acc + x.nbytes if isinstance(x, mx.array) else acc,
         model,
         0,

ommlds/backends/mlx/loading.py CHANGED Viewed

@@ -1,10 +1,8 @@
+# ruff: noqa: TC002
 import dataclasses as dc
 import pathlib
 import typing as ta
-import mlx_lm.utils
-from mlx import nn
 from omlish import check
 from omlish import lang
@@ -12,6 +10,11 @@ from .tokenization import Tokenization
 from .tokenization import load_tokenization
+with lang.auto_proxy_import(globals()):
+    import mlx.nn as mlx_nn
+    import mlx_lm.utils
 ##
@@ -76,7 +79,7 @@ def get_model_path(
 class LoadedModel:
     path: pathlib.Path
-    model: nn.Module
+    model: 'mlx_nn.Module'
     config: dict
     #

ommlds/backends/transformers/__init__.py CHANGED Viewed

@@ -0,0 +1,14 @@
+from omlish import lang as _lang
+with _lang.auto_proxy_init(globals()):
+    #
+    from .filecache import (  # noqa
+        patch_file_cache,
+        file_cache_patch_context,
+    )
+    from .streamers import (  # noqa
+        CancellableTextStreamer,
+    )

ommlds 0.0.0.dev480__py3-none-any.whl → 0.0.0.dev481__py3-none-any.whl

ommlds 0.0.0.dev480py3-none-any.whl → 0.0.0.dev481py3-none-any.whl