PyPI - ommlds - Versions diffs - 0.0.0.dev332__tar.gz → 0.0.0.dev333__tar.gz - Mend

ommlds 0.0.0.dev332tar.gz → 0.0.0.dev333tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (225) hide show

{ommlds-0.0.0.dev332/ommlds.egg-info → ommlds-0.0.0.dev333}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ommlds
-Version: 0.0.0.dev332
+Version: 0.0.0.dev333
 Summary: ommlds
 Author: wrmsr
 License: BSD-3-Clause
@@ -12,8 +12,8 @@ Classifier: Operating System :: OS Independent
 Classifier: Operating System :: POSIX
 Requires-Python: >=3.12
 License-File: LICENSE
-Requires-Dist: omdev==0.0.0.dev332
-Requires-Dist: omlish==0.0.0.dev332
+Requires-Dist: omdev==0.0.0.dev333
+Requires-Dist: omlish==0.0.0.dev333
 Provides-Extra: all
 Requires-Dist: llama-cpp-python~=0.3; extra == "all"
 Requires-Dist: mlx~=0.26; extra == "all"

{ommlds-0.0.0.dev332 → ommlds-0.0.0.dev333}/ommlds/backends/tinygrad/models/llama3/attention.py RENAMED Viewed

@@ -57,8 +57,8 @@ class Attention:
             self,
             dim,
             n_heads,
-            n_kv_heads,
-            max_context,
+            n_kv_heads=None,
+            max_context=0,
             linear=nn.Linear,
             qk_norm: float | None = None,
     ) -> None:
@@ -85,7 +85,7 @@ class Attention:
             x: Tensor,
             start_pos: Variable_ | int,
             freqs_cis: Tensor,
-            mask: Tensor | None,
+            mask: Tensor | None = None,
     ) -> Tensor:
         if getenv('WQKV'):
             if not hasattr(self, 'wqkv'):
@@ -114,36 +114,71 @@ class Attention:
         bsz, seqlen, _, _ = xq.shape
         # create kv cache
-        if not hasattr(self, 'cache_kv'):
-            self.cache_kv = (
-                Tensor.zeros(
-                    2,
-                    bsz,
-                    self.max_context,
-                    self.n_kv_heads,
-                    self.head_dim,
-                    dtype=x.dtype,
+        # if not hasattr(self, 'cache_kv'):
+        #     self.cache_kv = (
+        #         Tensor.zeros(
+        #             2,
+        #             bsz,
+        #             self.max_context,
+        #             self.n_kv_heads,
+        #             self.head_dim,
+        #             dtype=x.dtype,
+        #         )
+        #         .contiguous()
+        #         .realize()
+        #     )
+        #     if isinstance(x.device, tuple):
+        #         # TODO: instead of specifying how to shard, it can follow how xk and xv are being sharded
+        #         self.cache_kv.shard_(
+        #             (x.device), axis=3 if getenv('SHARD_KVCACHE') else None,
+        #         ).realize()
+        #
+        # # update the cache
+        # check.state(xk.dtype == xv.dtype == self.cache_kv.dtype, f'{xk.dtype=}, {xv.dtype=}, {self.cache_kv.dtype=}')
+        #
+        # self.cache_kv[:, :, start_pos:start_pos + seqlen, :, :].assign(Tensor.stack(xk, xv)).realize()
+        #
+        # keys = self.cache_kv[0, :, 0:start_pos + seqlen, :, :]
+        # values = self.cache_kv[1, :, 0:start_pos + seqlen, :, :]
+        if self.max_context:
+            if not hasattr(self, 'cache_kv'):
+                self.cache_kv = (
+                    Tensor.zeros(
+                        2,
+                        bsz,
+                        self.max_context,
+                        self.n_kv_heads,
+                        self.head_dim,
+                        dtype=x.dtype,
+                    )
+                    .contiguous()
+                    .realize()
                 )
-                .contiguous()
-                .realize()
+                if isinstance(x.device, tuple):
+                    # TODO: instead of specifying how to shard, it can follow how xk and xv are being sharded
+                    self.cache_kv.shard_(
+                        (x.device),
+                        axis=3 if getenv('SHARD_KVCACHE') else None,
+                    ).realize()
+            # update the cache
+            check.state(
+                xk.dtype == xv.dtype == self.cache_kv.dtype,
+                f'{xk.dtype=}, {xv.dtype=}, {self.cache_kv.dtype=}',
             )
-            if isinstance(x.device, tuple):
-                # TODO: instead of specifying how to shard, it can follow how xk and xv are being sharded
-                self.cache_kv.shard_(
-                    (x.device), axis=3 if getenv('SHARD_KVCACHE') else None,
-                ).realize()
+            self.cache_kv[:, :, start_pos:start_pos + seqlen, :, :].assign(Tensor.stack(xk, xv)).realize()
-        # update the cache
-        check.state(xk.dtype == xv.dtype == self.cache_kv.dtype, f'{xk.dtype=}, {xv.dtype=}, {self.cache_kv.dtype=}')
+            keys = self.cache_kv[0, :, 0:start_pos + seqlen, :, :]
+            values = self.cache_kv[1, :, 0:start_pos + seqlen, :, :]
-        self.cache_kv.shrink(
-            (None, None, (start_pos, start_pos + seqlen), None, None),
-        ).assign(Tensor.stack(xk, xv)).realize()
+        else:
+            check.state(start_pos == 0)
+            keys, values = xk, xv
-        keys = self.cache_kv[0].shrink((None, (0, start_pos + seqlen), None, None))
-        values = self.cache_kv[1].shrink((None, (0, start_pos + seqlen), None, None))
+        keys = repeat_kv(keys, self.n_rep)
+        values = repeat_kv(values, self.n_rep)
-        keys, values = repeat_kv(keys, self.n_rep), repeat_kv(values, self.n_rep)
         xq, keys, values = (
             xq.transpose(1, 2),
             keys.transpose(1, 2),

{ommlds-0.0.0.dev332 → ommlds-0.0.0.dev333}/ommlds/backends/tinygrad/models/llama3/loading.py RENAMED Viewed

@@ -27,7 +27,7 @@ from .transformer import Transformer
 # TODO: model shouldn't be an input here, and n_kv_heads should support None
 def convert_from_huggingface(
         weights: dict[str, Tensor],
-        model: Transformer,
+        n_layers: int,
         n_heads: int,
         n_kv_heads: int,
         permute_layers: bool = True,
@@ -50,35 +50,35 @@ def convert_from_huggingface(
         'model.embed_tokens.weight': 'tok_embeddings.weight',
         **{
             f'model.layers.{l}.input_layernorm.weight': f'layers.{l}.attention_norm.weight'
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'model.layers.{l}.self_attn.{x}_norm.weight': f'layers.{l}.attention.{x}_norm.weight'
             for x in ['q', 'k']
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'model.layers.{l}.self_attn.{x}_proj.weight': f'layers.{l}.attention.w{x}.weight'
             for x in ['q', 'k', 'v', 'o']
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'model.layers.{l}.self_attn.{x}_proj.bias': f'layers.{l}.attention.w{x}.bias'
             for x in ['q', 'k', 'v', 'o']
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'model.layers.{l}.post_attention_layernorm.weight': f'layers.{l}.ffn_norm.weight'
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'model.layers.{l}.mlp.{x}_proj.weight': f'layers.{l}.feed_forward.w{y}.weight'
             for x, y in {'gate': '1', 'down': '2', 'up': '3'}.items()
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'model.layers.{l}.mlp.gate.weight': f'layers.{l}.feed_forward.gate.weight'
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         'model.norm.weight': 'norm.weight',
         'lm_head.weight': 'output.weight',
@@ -107,31 +107,31 @@ def convert_from_huggingface(
 def convert_from_gguf(
         weights: dict[str, Tensor],
-        model: Transformer,
+        n_layers: int,
 ):
     keymap = {
         'token_embd.weight': 'tok_embeddings.weight',
         **{
             f'blk.{l}.attn_norm.weight': f'layers.{l}.attention_norm.weight'
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'blk.{l}.attn_{x}.weight': f'layers.{l}.attention.w{x}.weight'
             for x in ['q', 'k', 'v']
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'blk.{l}.attn_output.weight': f'layers.{l}.attention.wo.weight'
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'blk.{l}.ffn_norm.weight': f'layers.{l}.ffn_norm.weight'
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         **{
             f'blk.{l}.ffn_{x}.weight': f'layers.{l}.feed_forward.w{y}.weight'
             for x, y in {'gate': '1', 'down': '2', 'up': '3'}.items()
-            for l in range(len(model.layers))
+            for l in range(n_layers)
         },
         'output_norm.weight': 'norm.weight',
         'rope_freqs.weight': 'rope_freqs.weight',
@@ -269,8 +269,10 @@ def build_transformer(
     else:
         linear, embedding, quantize_embeds = nn.Linear, nn.Embedding, False
+    model_params = MODEL_PARAMS[model_size]
     model = Transformer(
-        **MODEL_PARAMS[model_size]['args'],
+        **model_params['args'],
         linear=linear,
         embedding=embedding,
         max_context=max_context,
@@ -292,7 +294,7 @@ def build_transformer(
             weights = concat_weights(
                 [
                     load(str(model_path / f'consolidated.{i:02d}.pth'))
-                    for i in range(MODEL_PARAMS[model_size]['files'])
+                    for i in range(model_params['files'])
                 ],
                 device[0] if isinstance(device, tuple) else device,
             )
@@ -303,13 +305,16 @@ def build_transformer(
     if 'model.embed_tokens.weight' in weights:
         weights = convert_from_huggingface(
             weights,
-            model,
-            MODEL_PARAMS[model_size]['args']['n_heads'],
-            MODEL_PARAMS[model_size]['args']['n_kv_heads'],
+            model_params['args']['n_layers'],
+            model_params['args']['n_heads'],
+            model_params['args']['n_kv_heads'],
         )
     elif 'token_embd.weight' in weights:
-        weights = convert_from_gguf(weights, model)
+        weights = convert_from_gguf(
+            weights,
+            model_params['args']['n_layers'],
+        )
     weights = fix_bf16(weights)

{ommlds-0.0.0.dev332 → ommlds-0.0.0.dev333}/ommlds/backends/tinygrad/models/llama3/transformer.py RENAMED Viewed

@@ -130,10 +130,8 @@ class Transformer:
         _bsz, seqlen = tokens.shape
         h = self.tok_embeddings(tokens)
-        self.freqs_cis = self.freqs_cis.cast(h.dtype).realize()
-        freqs_cis = self.freqs_cis.shrink(
-            (None, (start_pos, start_pos + seqlen), None, None, None),
-        )
+        self.freqs_cis = self.freqs_cis.cast(h.dtype).kernelize()
+        freqs_cis = self.freqs_cis[:, start_pos:start_pos + seqlen, :, :, :]
         mask = (
             Tensor.full(
@@ -143,7 +141,7 @@ class Transformer:
                 device=h.device,
             )
             .triu(start_pos + 1)
-            .realize()
+            .kernelize()
         ) if seqlen > 1 else None
         for layer in self.layers:
@@ -152,7 +150,7 @@ class Transformer:
         return sample(
             logits.flatten(), temperature, top_k, top_p, alpha_f, alpha_p,
-        ).realize()
+        ).kernelize()
     def __call__(
             self,
@@ -172,7 +170,7 @@ class Transformer:
         ):
             return self.forward_jit(
                 tokens,
-                Variable('start_pos', 1, self.max_context).bind(start_pos),
+                Variable('start_pos', 1, self.max_context - 1).bind(start_pos),
                 temperature,
                 top_k,
                 top_p,

{ommlds-0.0.0.dev332 → ommlds-0.0.0.dev333}/ommlds/cli/main.py RENAMED Viewed

@@ -70,7 +70,7 @@ def _main() -> None:
     content: mc.Content
     if args.image:
-        content = mc.Image(pimg.open(check.non_empty_str(check.single(args.prompt))))
+        content = mc.ImageContent(pimg.open(check.non_empty_str(check.single(args.prompt))))
     elif args.editor:
         check.arg(not args.prompt)

{ommlds-0.0.0.dev332 → ommlds-0.0.0.dev333}/ommlds/cli/sessions/chat.py RENAMED Viewed

@@ -41,8 +41,7 @@ class ChatState:
 DEFAULT_CHAT_MODEL_BACKEND = 'openai'
-CHAT_MODEL_FACTORIES: ta.Mapping[str, ta.Callable[..., mc.ChatService]] = {
-}
+CHAT_MODEL_FACTORIES: ta.Mapping[str, ta.Callable[..., mc.ChatService]] = {}
 ##

{ommlds-0.0.0.dev332 → ommlds-0.0.0.dev333}/ommlds/minichain/__init__.py RENAMED Viewed

@@ -1,10 +1,6 @@
 # fmt: off
-from .registry import (  # noqa
-    register_type,
-    registry_new,
-    registry_of,
-)
+##
 from .chat.formats import (  # noqa
     JSON_RESPONSE_FORMAT,
@@ -71,23 +67,7 @@ from .chat.types import (  # noqa
     ChatResponseOutput,
 )
-from .completion import (  # noqa
-    CompletionRequestOption,
-    CompletionRequestOptions,
-    CompletionRequest,
-    CompletionResponseOutput,
-    CompletionResponseOutputs,
-    CompletionResponse,
-    CompletionService,
-)
-from .configs import (  # noqa
-    Config,
-    consume_configs,
-)
+##
 from .content.content import (  # noqa
     Content,
@@ -95,13 +75,26 @@ from .content.content import (  # noqa
 )
 from .content.images import (  # noqa
-    Image,
+    ImageContent,
+)
+from .content.list import (  # noqa
+    ListContent,
+)
+from .content.metadata import (  # noqa
+    ContentMetadata,
+    ContentMetadatas,
 )
 from .content.rendering import (  # noqa
     StringRenderer,
 )
+from .content.text import (  # noqa
+    TextContent,
+)
 from .content.transforms import (  # noqa
     ContentTransform,
@@ -109,10 +102,7 @@ from .content.transforms import (  # noqa
     transform_content_strings,
 )
-from .envs import (  # noqa
-    Env,
-    EnvKey,
-)
+##
 from .llms.tokens import (  # noqa
     Token,
@@ -135,6 +125,8 @@ from .llms.services import (  # noqa
     TokenUsageOutput,
 )
+##
 from .services import (  # noqa
     Request,
     RequestOption,
@@ -144,21 +136,7 @@ from .services import (  # noqa
     ServiceFacade,
 )
-from .standard import (  # noqa
-    ModelSpecifier,
-    ModelName,
-    ModelPath,
-    ApiKey,
-    DefaultRequestOptions,
-)
-from .streaming import (  # noqa
-    ResponseGenerator,
-    StreamResponse,
-)
+##
 from .tools.jsonschema import (  # noqa
     build_tool_spec_json_schema,
@@ -189,6 +167,8 @@ from .tools.types import (  # noqa
     ToolExecRequest,
 )
+##
 from .vectors.embeddings import (  # noqa
     EmbeddingRequest,
     EmbeddingRequestOption,
@@ -235,13 +215,78 @@ from .vectors.types import (  # noqa
     Vectorable,
 )
+##
+from .completion import (  # noqa
+    CompletionRequestOption,
+    CompletionRequestOptions,
+    CompletionRequest,
+    CompletionResponseOutput,
+    CompletionResponseOutputs,
+    CompletionResponse,
+    CompletionService,
+)
+from .configs import (  # noqa
+    Config,
+    consume_configs,
+)
+from .envs import (  # noqa
+    Env,
+    EnvKey,
+)
+from .metadata import (  # noqa
+    Metadata,
+    MetadataContainer,
+    CommonMetadata,
+    Uuid,
+)
+from .registry import (  # noqa
+    register_type,
+    registry_new,
+    registry_of,
+)
+from .resources import (  # noqa
+    ResourcesRef,
+    ResourcesRefNotRegisteredError,
+    Resources,
+    ResourceManaged,
+)
+from .standard import (  # noqa
+    ModelSpecifier,
+    ModelName,
+    ModelPath,
+    ApiKey,
+    DefaultRequestOptions,
+)
+from .streaming import (  # noqa
+    ResponseGenerator,
+    StreamResponse,
+)
 ##
 from omlish.lang.imports import _register_conditional_import  # noqa
-_register_conditional_import('omlish.marshal', '.chat.marshal', __package__)
-_register_conditional_import('omlish.marshal', '.content.marshal', __package__)
-_register_conditional_import('omlish.marshal', '.llms.marshal', __package__)
-_register_conditional_import('omlish.marshal', '.tools.marshal', __package__)
+_register_conditional_import('omlish.marshal', '.chat._marshal', __package__)
+_register_conditional_import('omlish.marshal', '.content._marshal', __package__)
+_register_conditional_import('omlish.marshal', '.llms._marshal', __package__)
+_register_conditional_import('omlish.marshal', '.tools._marshal', __package__)

ommlds-0.0.0.dev333/ommlds/minichain/_typedvalues.py ADDED Viewed

@@ -0,0 +1,93 @@
+import operator
+import typing as ta
+from omlish import check
+from omlish import dataclasses as dc
+from omlish import marshal as msh
+from omlish import reflect as rfl
+from omlish import typedvalues as tv
+from omlish.funcs import match as mfs
+from omlish.typedvalues.marshal import build_typed_values_marshaler
+from omlish.typedvalues.marshal import build_typed_values_unmarshaler
+##
+@dc.dataclass()
+class _TypedValuesFieldMarshalerFactory(msh.MarshalerFactoryMatchClass):
+    tvs_rty: rfl.Type
+    @mfs.simple(lambda _, ctx, rty: True)
+    def _build(self, ctx: msh.MarshalContext, rty: rfl.Type) -> msh.Marshaler:
+        return build_typed_values_marshaler(ctx, self.tvs_rty)
+@dc.dataclass()
+class _TypedValuesFieldUnmarshalerFactory(msh.UnmarshalerFactoryMatchClass):
+    tvs_rty: rfl.Type
+    @mfs.simple(lambda _, ctx, rty: True)
+    def _build(self, ctx: msh.UnmarshalContext, rty: rfl.Type) -> msh.Unmarshaler:
+        return build_typed_values_unmarshaler(ctx, self.tvs_rty)
+##
+def _tv_field_coercer(
+        tvc: type[tv.TypedValue] | tuple[type[tv.TypedValue], ...],
+) -> ta.Callable[[ta.Sequence], tv.TypedValues]:
+    if isinstance(tvc, tuple):
+        check.arg(all(issubclass(e, tv.TypedValue) for e in tvc))
+    else:
+        check.issubclass(tvc, tv.TypedValue)
+    def inner(seq):
+        return tv.TypedValues(*[
+            check.isinstance(e, tvc)
+            for e in check.isinstance(seq, ta.Sequence)
+        ])
+    return inner
+def _tv_field_repr(tvs: tv.TypedValues) -> str | None:
+    if not tvs:
+        return None
+    return repr(list(tvs))
+def _tv_field_metadata(
+        tvc: ta.Any,
+        *,
+        marshal_name: str | None = None,
+) -> ta.Mapping:
+    tvc_rty = rfl.type_(tvc)
+    ct: ta.Any
+    if isinstance(tvc_rty, type):
+        ct = check.issubclass(tvc, tv.TypedValue)
+    elif isinstance(tvc_rty, rfl.Union):
+        ct = tuple(check.issubclass(check.not_none(rfl.get_concrete_type(a)), tv.TypedValue) for a in tvc_rty.args)
+    else:
+        raise TypeError(tvc_rty)
+    tvs_rty = rfl.type_(tv.TypedValues[tvc])
+    return {
+        **dc.extra_field_params(
+            coerce=_tv_field_coercer(ct),
+            repr_fn=_tv_field_repr,
+        ),
+        msh.FieldMetadata: msh.FieldMetadata(
+            name=marshal_name,
+            options=msh.FieldOptions(
+                omit_if=operator.not_,
+            ),
+            marshaler_factory=_TypedValuesFieldMarshalerFactory(tvs_rty),
+            unmarshaler_factory=_TypedValuesFieldUnmarshalerFactory(tvs_rty),
+        ),
+    }

{ommlds-0.0.0.dev332 → ommlds-0.0.0.dev333}/ommlds/minichain/backends/transformers/sentence.py RENAMED Viewed

@@ -4,7 +4,7 @@ import sentence_transformers as stfm
 from ...configs import Config
 from ...configs import consume_configs
-from ...content.images import Image
+from ...content.images import ImageContent
 from ...standard import ModelPath
 from ...vectors.embeddings import EmbeddingRequest
 from ...vectors.embeddings import EmbeddingResponse
@@ -40,7 +40,7 @@ class SentenceTransformersEmbeddingService(EmbeddingService):
         v = request.v
         if isinstance(v, str):
             obj = v
-        elif isinstance(v, Image):
+        elif isinstance(v, ImageContent):
             obj = v.i
         else:
             raise TypeError(v)

ommlds 0.0.0.dev332__tar.gz → 0.0.0.dev333__tar.gz

ommlds 0.0.0.dev332tar.gz → 0.0.0.dev333tar.gz