PyPI - lalamo - Versions diffs - 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl - Mend

lalamo 0.5.2py3-none-any.whl → 0.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

lalamo/__init__.py +15 -2
lalamo/data/__init__.py +0 -1
lalamo/data/huggingface_message.py +1 -0
lalamo/main.py +167 -18
lalamo/message_processor.py +2 -3
lalamo/model_import/common.py +120 -27
lalamo/model_import/decoder_configs/__init__.py +4 -2
lalamo/model_import/decoder_configs/common.py +62 -21
lalamo/model_import/decoder_configs/executorch.py +14 -9
lalamo/model_import/decoder_configs/huggingface/__init__.py +4 -2
lalamo/model_import/decoder_configs/huggingface/common.py +38 -12
lalamo/model_import/decoder_configs/huggingface/gemma2.py +15 -10
lalamo/model_import/decoder_configs/huggingface/gemma3.py +19 -16
lalamo/model_import/decoder_configs/huggingface/gpt_oss.py +16 -10
lalamo/model_import/decoder_configs/huggingface/llama.py +16 -11
lalamo/model_import/decoder_configs/huggingface/llamba.py +23 -14
lalamo/model_import/decoder_configs/huggingface/mistral.py +16 -11
lalamo/model_import/decoder_configs/huggingface/modern_bert.py +241 -0
lalamo/model_import/decoder_configs/huggingface/qwen2.py +17 -10
lalamo/model_import/decoder_configs/huggingface/qwen3.py +15 -10
lalamo/model_import/loaders/__init__.py +3 -2
lalamo/model_import/loaders/executorch.py +24 -12
lalamo/model_import/loaders/huggingface.py +258 -30
lalamo/model_import/model_specs/__init__.py +4 -2
lalamo/model_import/model_specs/common.py +8 -2
lalamo/model_import/model_specs/gemma.py +5 -1
lalamo/model_import/model_specs/huggingface.py +1 -1
lalamo/model_import/model_specs/mirai.py +20 -0
lalamo/models/__init__.py +10 -0
lalamo/models/common.py +81 -0
lalamo/{language_model.py → models/language_model.py} +32 -49
lalamo/models/router.py +59 -0
lalamo/modules/__init__.py +33 -16
lalamo/modules/classifier.py +339 -0
lalamo/modules/common.py +6 -3
lalamo/modules/decoder.py +52 -180
lalamo/modules/mlp.py +28 -5
lalamo/modules/normalization.py +13 -8
lalamo/modules/token_mixers/attention.py +10 -6
lalamo/modules/token_mixers/state/kv_cache.py +14 -4
lalamo/modules/transformer.py +273 -0
lalamo/modules/{decoder_layer.py → transformer_layer.py} +62 -45
lalamo/speculator/__init__.py +6 -2
lalamo/speculator/estimator.py +91 -0
lalamo/speculator/inference.py +28 -9
lalamo/speculator/ngram.py +7 -3
lalamo/speculator/utils.py +4 -2
{lalamo-0.5.2.dist-info → lalamo-0.5.4.dist-info}/METADATA +1 -1
lalamo-0.5.4.dist-info/RECORD +88 -0
lalamo-0.5.2.dist-info/RECORD +0 -80
{lalamo-0.5.2.dist-info → lalamo-0.5.4.dist-info}/WHEEL +0 -0
{lalamo-0.5.2.dist-info → lalamo-0.5.4.dist-info}/entry_points.txt +0 -0
{lalamo-0.5.2.dist-info → lalamo-0.5.4.dist-info}/licenses/LICENSE +0 -0
{lalamo-0.5.2.dist-info → lalamo-0.5.4.dist-info}/top_level.txt +0 -0

lalamo/model_import/loaders/huggingface.py CHANGED Viewed

@@ -9,7 +9,6 @@ from lalamo.common import ParameterPath
 from lalamo.modules import (
     Attention,
     Decoder,
-    DecoderLayer,
     DenseMLP,
     FullPrecisionLinear,
     GroupQuantizedLinear,
@@ -18,18 +17,20 @@ from lalamo.modules import (
     MLXQuantizedLinear,
     MLXQuantizedTiedEmbedding,
     MLXSemiQuantizedUntiedEmbedding,
-    RMSNorm,
+    Normalization,
     SeparableCausalConv,
     TiedEmbedding,
+    TransformerLayer,
     UntiedEmbedding,
 )
+from lalamo.modules.classifier import Classifier
 from lalamo.modules.mlp import MixtureOfExperts, MLPBase
 from lalamo.quantization import QuantizationMode
 from .common import load_parameters
 from .utils import decode_mxfp4, deinterleave_pairwise_columns
-__all__ = ["load_huggingface"]
+__all__ = ["load_huggingface_decoder"]
 AWQ_UINT4_REVERSE_ORDER = jnp.array([0, 4, 1, 5, 2, 6, 3, 7], dtype=jnp.int32)
@@ -42,15 +43,20 @@ def _reverse_uint4_order(array: Array, reverse_order: Array) -> Array:
     if last_dim % pack_factor != 0:
         return array
-    array_reshaped = rearrange(array, "... (group pack_factor) -> ... group pack_factor", pack_factor=pack_factor)
+    array_reshaped = rearrange(
+        array,
+        "... (group pack_factor) -> ... group pack_factor",
+        pack_factor=pack_factor,
+    )
     array_reordered = array_reshaped[..., reverse_order]
     return rearrange(array_reordered, "... group pack_factor -> ... (group pack_factor)")
 def unpack_int32(packed_weights: Array, mode: QuantizationMode) -> Array:
-    assert packed_weights.dtype in (jnp.int32, jnp.uint32), (
-        f"Expected packed_weights to be of dtype jnp.(u)int32, got {packed_weights.dtype}"
-    )
+    assert packed_weights.dtype in (
+        jnp.int32,
+        jnp.uint32,
+    ), f"Expected packed_weights to be of dtype jnp.(u)int32, got {packed_weights.dtype}"
     assert 32 % mode.bits == 0
     shifts = jnp.arange(0, 32, mode.bits)
@@ -309,7 +315,14 @@ def load_moe(module: MixtureOfExperts, weights_dict: Mapping[str, Array], path:
         )
     else:
         # Fallback: recursively load a standard DenseMLP experts module
-        experts = load_mlp(module.experts, weights_dict, experts_path, "up_proj", "gate_proj", "down_proj")
+        experts = load_mlp(
+            module.experts,
+            weights_dict,
+            experts_path,
+            "up_proj",
+            "gate_proj",
+            "down_proj",
+        )
     return load_parameters(
         lambda m: (m.router, m.experts),
@@ -319,10 +332,10 @@ def load_moe(module: MixtureOfExperts, weights_dict: Mapping[str, Array], path:
 def load_rmsnorm(
-    module: RMSNorm,
+    module: Normalization,
     weights_dict: Mapping[str, Array],
     path: ParameterPath,
-) -> RMSNorm:
+) -> Normalization:
     scales = weights_dict[path / "weight"]
     return load_parameters(lambda m: (m.scales,), module, (scales,))
@@ -357,7 +370,13 @@ def load_attention(
         sinks = module.sinks
     return load_parameters(
-        lambda m: (m.qkv_projection, m.out_projection, m.query_norm, m.key_norm, m.sinks),
+        lambda m: (
+            m.qkv_projection,
+            m.out_projection,
+            m.query_norm,
+            m.key_norm,
+            m.sinks,
+        ),
         module,
         (qkv_projection, out_projection, query_norm, key_norm, sinks),
     )
@@ -420,14 +439,20 @@ def load_mamba2(
         gate_bias = module.gate_bias
     return load_parameters(
-        lambda m: (m.in_projection, m.out_projection, m.conv, m.skip_connection_weight, m.gate_bias),
+        lambda m: (
+            m.in_projection,
+            m.out_projection,
+            m.conv,
+            m.skip_connection_weight,
+            m.gate_bias,
+        ),
         module,
         (in_projection, out_projection, conv, skip_connection_weight, gate_bias),
     )
-def load_decoder_layer(
-    module: DecoderLayer,
+def load_transformer_layer(
+    module: TransformerLayer,
     weights_dict: Mapping[str, Array],
     mixer_path: ParameterPath,
     mlp_path: ParameterPath,
@@ -438,13 +463,16 @@ def load_decoder_layer(
     up_proj_key: str,
     gate_proj_key: str,
     down_proj_key: str,
-) -> DecoderLayer:
-    pre_attention_norm = load_rmsnorm(
-        module.pre_mixer_norm,
-        weights_dict,
-        mixer_path / pre_mixer_norm_key,
-    )
+) -> TransformerLayer:
+    if module.pre_mixer_norm is not None:
+        pre_attention_norm = load_rmsnorm(
+            module.pre_mixer_norm,
+            weights_dict,
+            mixer_path / pre_mixer_norm_key,
+        )
+    else:
+        pre_attention_norm = None
     # Load mixer (attention or mamba)
     if isinstance(module.mixer, Attention):
         mixer = load_attention(module.mixer, weights_dict, mixer_path / mixer_key)
@@ -474,7 +502,14 @@ def load_decoder_layer(
             mlp_path / pre_mlp_norm_key,
         )
-    mlp = load_mlp(module.mlp, weights_dict, mlp_path / mlp_key, up_proj_key, gate_proj_key, down_proj_key)
+    mlp = load_mlp(
+        module.mlp,
+        weights_dict,
+        mlp_path / mlp_key,
+        up_proj_key,
+        gate_proj_key,
+        down_proj_key,
+    )
     if module.post_mlp_norm is not None:
         post_mlp_norm = load_rmsnorm(
@@ -486,9 +521,23 @@ def load_decoder_layer(
         post_mlp_norm = None
     return load_parameters(
-        lambda m: (m.pre_mixer_norm, m.mixer, m.post_mixer_norm, m.pre_mlp_norm, m.mlp, m.post_mlp_norm),
+        lambda m: (
+            m.pre_mixer_norm,
+            m.mixer,
+            m.post_mixer_norm,
+            m.pre_mlp_norm,
+            m.mlp,
+            m.post_mlp_norm,
+        ),
         module,
-        (pre_attention_norm, mixer, post_attention_norm, pre_mlp_norm, mlp, post_mlp_norm),
+        (
+            pre_attention_norm,
+            mixer,
+            post_attention_norm,
+            pre_mlp_norm,
+            mlp,
+            post_mlp_norm,
+        ),
     )
@@ -558,10 +607,14 @@ def load_untied_embedding(
 ) -> UntiedEmbedding:
     input_weights = weights_dict[embedding_path / "weight"]
     output_weights = weights_dict[lm_head_path / "weight"]
-    return load_parameters(lambda m: (m.input_weights, m.output_weights), module, (input_weights, output_weights))
+    return load_parameters(
+        lambda m: (m.input_weights, m.output_weights),
+        module,
+        (input_weights, output_weights),
+    )
-def load_huggingface(
+def load_huggingface_decoder(
     module: Decoder,
     weights_dict: Mapping[str, Array],
 ) -> Decoder:
@@ -629,7 +682,7 @@ def load_huggingface(
         raise TypeError(f"Unsupported embedding type: {type(module.embedding)}")
     decoder_layers = tuple(
-        load_decoder_layer(
+        load_transformer_layer(
             layer,
             weights_dict,
             decoder_path / "layers" / ((i * 2) if alternating_layers else i),
@@ -642,12 +695,187 @@ def load_huggingface(
             gate_proj_key,
             down_proj_key,
         )
-        for i, layer in enumerate(module.layers)
+        for i, layer in enumerate(module.transformer.layers)
     )
-    output_norm = load_rmsnorm(module.output_norm, weights_dict, decoder_path / norm_key)
+    output_norm = load_rmsnorm(module.transformer.output_norm, weights_dict, decoder_path / norm_key)
     return load_parameters(
-        lambda m: (m.embedding, m.layers, m.output_norm),
+        lambda m: (m.embedding, m.transformer.layers, m.transformer.output_norm),
         module,
         (embedding, decoder_layers, output_norm),
     )
+def load_huggingface_classifier(
+    module: Classifier,
+    weights_dict: Mapping[str, Array],
+) -> Classifier:
+    def load_tied_embedding_local(
+        module: TiedEmbedding,
+        weights_dict: Mapping[str, Array],
+        decoder_path: ParameterPath,
+    ) -> TiedEmbedding:
+        input_weights = weights_dict[decoder_path / "embeddings" / "tok_embeddings" / "weight"]
+        return load_parameters(lambda m: (m.weights,), module, (input_weights,))
+    def load_linear_with_reshufling(
+        module: LinearBase,
+        weights_dict: Mapping[str, Array],
+        path: ParameterPath,
+    ) -> LinearBase:
+        """Loads a linear layer and reshufle some weights in resulting matrix to meet
+        requirements of downstream 'split' in MLP layer in attention."""
+        assert not module.has_biases, "Expecting no biases in FullPrecisionLinear"
+        assert isinstance(module, FullPrecisionLinear), "Expecting FullPrecisionLinear module as input"
+        weights = weights_dict[path / "weight"]
+        rows, _ = weights.shape
+        shuffled_weights = jnp.vstack((weights[rows // 2 :, :], weights[: rows // 2, :]))
+        return load_parameters(lambda m: (m.weights, m.biases), module, (shuffled_weights, None))
+    def load_attention_local(
+        module: Attention,
+        weights_dict: Mapping[str, Array],
+        path: ParameterPath,
+    ) -> Attention:
+        qkv_projection = load_linear(
+            module.qkv_projection,
+            weights_dict,
+            path / "Wqkv",
+            sublayers_to_fuse=None,
+        )
+        out_projection = load_linear(module.out_projection, weights_dict, path / "Wo")
+        if module.query_norm is not None:
+            query_norm = load_rmsnorm(module.query_norm, weights_dict, path / "q_norm")
+        else:
+            query_norm = None
+        if module.key_norm is not None:
+            key_norm = load_rmsnorm(module.key_norm, weights_dict, path / "k_norm")
+        else:
+            key_norm = None
+        return load_parameters(
+            lambda m: (m.qkv_projection, m.out_projection, m.query_norm, m.key_norm),
+            module,
+            (qkv_projection, out_projection, query_norm, key_norm),
+        )
+    def load_mlp_local(module: MLPBase, weights_dict: Mapping[str, Array], path: ParameterPath) -> MLPBase:
+        assert isinstance(module, DenseMLP)
+        up_projection = load_linear_with_reshufling(
+            module.up_projection,
+            weights_dict,
+            path / "Wi",
+        )
+        down_projection = load_linear(module.down_projection, weights_dict, path / "Wo")
+        return load_parameters(
+            lambda m: (m.up_projection, m.down_projection),
+            module,
+            (up_projection, down_projection),
+        )
+    def load_transformer_layer_local(
+        module: TransformerLayer,
+        weights_dict: Mapping[str, Array],
+        path: ParameterPath,
+    ) -> TransformerLayer:
+        if module.pre_mixer_norm is not None:
+            pre_attention_norm = load_rmsnorm(
+                module.pre_mixer_norm,
+                weights_dict,
+                path / "attn_norm",
+            )
+        else:
+            pre_attention_norm = None
+        assert isinstance(module.mixer, Attention)
+        attention = load_attention_local(module.mixer, weights_dict, path / "attn")
+        if module.post_mixer_norm is not None:
+            post_attention_norm = load_rmsnorm(
+                module.post_mixer_norm,
+                weights_dict,
+                path / "post_attention_layernorm",
+            )
+            pre_mlp_norm = load_rmsnorm(
+                module.pre_mlp_norm,
+                weights_dict,
+                path / "pre_feedforward_layernorm",
+            )
+        else:
+            post_attention_norm = None
+            pre_mlp_norm = load_rmsnorm(
+                module.pre_mlp_norm,
+                weights_dict,
+                path / "mlp_norm",
+            )
+        mlp = load_mlp_local(module.mlp, weights_dict, path / "mlp")
+        if module.post_mlp_norm is not None:
+            post_mlp_norm = load_rmsnorm(
+                module.post_mlp_norm,
+                weights_dict,
+                path / "post_feedforward_layernorm",
+            )
+        else:
+            post_mlp_norm = None
+        return load_parameters(
+            lambda m: (
+                m.pre_mixer_norm,
+                m.mixer,
+                m.post_mixer_norm,
+                m.pre_mlp_norm,
+                m.mlp,
+                m.post_mlp_norm,
+            ),
+            module,
+            (
+                pre_attention_norm,
+                attention,
+                post_attention_norm,
+                pre_mlp_norm,
+                mlp,
+                post_mlp_norm,
+            ),
+        )
+    base_path = ParameterPath()
+    decoder_path = base_path / "model"
+    head_path = base_path / "head"
+    classifier_path = base_path / "classifier"
+    assert isinstance(module.embedding, TiedEmbedding)
+    embedding = load_tied_embedding_local(module.embedding, weights_dict, decoder_path)
+    embedding_norm = load_rmsnorm(module.embedding_norm, weights_dict, base_path / "model" / "embeddings" / "norm")
+    decoder_layers = tuple(
+        load_transformer_layer_local(layer, weights_dict, decoder_path / "layers" / i)
+        for i, layer in enumerate(module.transformer.layers)
+    )
+    output_norm = load_rmsnorm(module.transformer.output_norm, weights_dict, decoder_path / "final_norm")
+    head_dense = load_linear(module.prediction_head.dense, weights_dict, head_path / "dense")
+    head_norm = load_rmsnorm(module.prediction_head.norm, weights_dict, head_path / "norm")
+    head_readout = load_linear(module.prediction_head.readout, weights_dict, classifier_path)
+    return load_parameters(
+        lambda m: (
+            m.embedding,
+            m.embedding_norm,
+            m.transformer.layers,
+            m.transformer.output_norm,
+            m.prediction_head.dense,
+            m.prediction_head.norm,
+            m.prediction_head.readout,
+        ),
+        module,
+        (
+            embedding,
+            embedding_norm,
+            decoder_layers,
+            output_norm,
+            head_dense,
+            head_norm,
+            head_readout,
+        ),
+    )

lalamo/model_import/model_specs/__init__.py CHANGED Viewed

@@ -1,10 +1,11 @@
-from .common import FileSpec, ModelSpec, UseCase, build_quantized_models
+from .common import FileSpec, ModelSpec, ModelType, UseCase, build_quantized_models
 from .deepseek import DEEPSEEK_MODELS
 from .gemma import GEMMA_MODELS
 from .gpt_oss import GPT_OSS_MODELS
 from .huggingface import HUGGINGFACE_MODELS
 from .llama import LLAMA_MODELS
 from .llamba import LLAMBA_MODELS
+from .mirai import MIRAI_ROUTER_MODELS
 from .mistral import MISTRAL_MODELS
 # from .pleias import PLEIAS_MODELS
@@ -17,6 +18,7 @@ __all__ = [
     "REPO_TO_MODEL",
     "FileSpec",
     "ModelSpec",
+    "ModelType",
     "UseCase",
 ]
@@ -33,9 +35,9 @@ ALL_MODEL_LISTS = [
     POLARIS_MODELS,
     QWEN_MODELS,
     REKA_MODELS,
+    MIRAI_ROUTER_MODELS,
 ]
 ALL_MODELS = [model for model_list in ALL_MODEL_LISTS for model in model_list]

lalamo/model_import/model_specs/common.py CHANGED Viewed

@@ -5,7 +5,7 @@ from collections.abc import (
 )
 from contextlib import contextmanager
 from dataclasses import dataclass, field
-from enum import Enum
+from enum import Enum, StrEnum
 from pathlib import Path
 from typing import ClassVar, cast, get_args, get_origin
@@ -22,6 +22,7 @@ __all__ = [
     "FileSpec",
     "JSONFieldSpec",
     "ModelSpec",
+    "ModelType",
     "UseCase",
     "WeightsType",
     "awq_model_spec",
@@ -29,6 +30,11 @@ __all__ = [
 ]
+class ModelType(StrEnum):
+    LANGUAGE_MODEL = "language_model"
+    ROUTER_MODEL = "router_model"
 def cast_if_float(array: Array, cast_to: DTypeLike) -> Array:
     if array.dtype in [jnp.float16, jnp.bfloat16, jnp.float32, jnp.float64]:
         return array.astype(cast_to)
@@ -50,7 +56,6 @@ class WeightsType(Enum):
                 yield MapDictValues(lambda v: cast_if_float(v, float_dtype), weights_dict), metadata_dict or {}
         else:
             import torch
             from lalamo.modules.torch_interop import torch_to_jax
             torch_weights = torch.load(filename, map_location="cpu", weights_only=True)
@@ -129,6 +134,7 @@ class ModelSpec:
     assistant_role_name: str = "assistant"
     tool_role_name: str = "tool"
     weights_type: WeightsType = WeightsType.SAFETENSORS
+    model_type: ModelType = ModelType.LANGUAGE_MODEL
     configs: ConfigMap = field(default=ConfigMap())
     use_cases: tuple[UseCase, ...] = tuple()

lalamo/model_import/model_specs/gemma.py CHANGED Viewed

@@ -1,4 +1,8 @@
-from lalamo.model_import.decoder_configs import HFGemma2Config, HFGemma3Config, HFGemma3TextConfig
+from lalamo.model_import.decoder_configs import (
+    HFGemma2Config,
+    HFGemma3Config,
+    HFGemma3TextConfig,
+)
 from .common import ModelSpec, WeightsType

lalamo/model_import/model_specs/huggingface.py CHANGED Viewed

@@ -14,5 +14,5 @@ HUGGINGFACE_MODELS = [
         repo="HuggingFaceTB/SmolLM2-1.7B-Instruct",
         config_type=HFLlamaConfig,
         use_cases=tuple(),
-    ),
+    )
 ]

lalamo/model_import/model_specs/mirai.py ADDED Viewed

@@ -0,0 +1,20 @@
+from lalamo.model_import.decoder_configs.huggingface import ModernBERTConfig
+from .common import ConfigMap, FileSpec, ModelSpec, ModelType
+__all__ = ["MIRAI_ROUTER_MODELS"]
+MIRAI_ROUTER_MODELS = [
+    ModelSpec(
+        vendor="trymirai",
+        family="ModernBERT",
+        name="ModernBERT-Chat-Moderation",
+        size="0.15B",
+        quantization=None,
+        repo="trymirai/chat-moderation-router",
+        config_type=ModernBERTConfig,
+        use_cases=tuple(),
+        model_type=ModelType("router_model"),
+        configs=ConfigMap(chat_template=FileSpec("chat_template.jinja")),
+    ),
+]

lalamo/models/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from .language_model import GenerationConfig, LanguageModel, LanguageModelConfig
+from .router import Router, RouterConfig
+__all__ = [
+    "GenerationConfig",
+    "LanguageModel",
+    "LanguageModelConfig",
+    "Router",
+    "RouterConfig",
+]

lalamo/models/common.py ADDED Viewed

@@ -0,0 +1,81 @@
+import json
+from abc import ABC, abstractmethod
+from collections.abc import Iterable
+from dataclasses import dataclass, replace
+from pathlib import Path
+from typing import Self
+import equinox as eqx
+from jax import Array
+from jax import numpy as jnp
+from tokenizers import Tokenizer
+from lalamo.common import DTypeLike, ParameterTree, unflatten_parameters
+from lalamo.message_processor import Message, MessageProcessor, MessageProcessorConfig, UserMessage
+from lalamo.modules import Classifier, Decoder, LalamoModule, config_converter
+from lalamo.modules.classifier import ClassifierConfig, ClassifierResult
+from lalamo.modules.decoder import DecoderConfig, DecoderResult
+from lalamo.utils import open_safetensors
+__all__ = [
+    "TextModel",
+    "TextModelConfig",
+]
+@dataclass(frozen=True)
+class TextModelConfig[ConfigT: ClassifierConfig | DecoderConfig](ABC):
+    model_config: ConfigT
+    message_processor_config: MessageProcessorConfig
+    @abstractmethod
+    def init(
+        self,
+        model: LalamoModule,
+        message_processor: MessageProcessor,
+    ) -> LalamoModule[Self]: ...
+    @classmethod
+    def load_model(cls, path: Path | str) -> LalamoModule[Self]:
+        if isinstance(path, str):
+            path = Path(path)
+        with open(path / "config.json") as config_file:
+            config_json = json.load(config_file)
+        config = config_converter.structure(config_json["model_config"], cls)
+        with open_safetensors(path / "model.safetensors") as open_results:
+            weights_dict, _ = open_results
+            weights = unflatten_parameters(weights_dict)
+            model = config.model_config.empty().import_weights(weights)
+        tokenizer = Tokenizer.from_file(str(path / "tokenizer.json"))
+        message_processor = MessageProcessor(config.message_processor_config, tokenizer)
+        return config.init(model, message_processor)
+class TextModel[ConfigT, ModelT: Decoder | Classifier](LalamoModule[ConfigT]):
+    model: ModelT
+    message_processor: MessageProcessor = eqx.field(static=True)
+    @property
+    def activation_precision(self) -> DTypeLike:
+        return self.model.activation_precision
+    def export_weights(self) -> ParameterTree:
+        return self.model.export_weights()
+    def import_weights(
+        self,
+        weights: ParameterTree[Array],
+    ) -> Self:
+        return replace(
+            self,
+            model=self.model.import_weights(weights),
+        )
+    def record_trace(self, messages: Iterable[Message] | None = None) -> ClassifierResult | DecoderResult:
+        if messages is None:
+            messages = [UserMessage("Tell me about London")]
+        token_ids = jnp.array(self.message_processor.tokenize_request(messages))[None:]
+        _, num_tokens = token_ids.shape
+        token_positions = jnp.arange(num_tokens)[None, :]
+        return self.model(token_ids=token_ids, token_positions=token_positions, return_activation_trace=True)

lalamo 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl

lalamo 0.5.2py3-none-any.whl → 0.5.4py3-none-any.whl