PyPI - lalamo - Versions diffs - 0.5.8__tar.gz → 0.5.9__tar.gz - Mend

lalamo 0.5.8tar.gz → 0.5.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

{lalamo-0.5.8 → lalamo-0.5.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lalamo
-Version: 0.5.8
+Version: 0.5.9
 Summary: JAX library for optimization and export of models for use with the UZU inference engine.
 Requires-Python: <4,>=3.12
 Description-Content-Type: text/markdown

{lalamo-0.5.8 → lalamo-0.5.9}/lalamo/__init__.py RENAMED Viewed

@@ -15,7 +15,7 @@ from lalamo.speculator import (
     SpeculatorTrainingEvent,
 )
-__version__ = "0.5.8"
+__version__ = "0.5.9"
 __all__ = [
     "AssistantMessage",

{lalamo-0.5.8 → lalamo-0.5.9}/lalamo/model_import/common.py RENAMED Viewed

@@ -17,6 +17,7 @@ from lalamo.message_processor import MessageProcessor, MessageProcessorConfig
 from lalamo.models import ClassifierModel, ClassifierModelConfig, GenerationConfig, LanguageModel, LanguageModelConfig
 from lalamo.modules import Classifier, Decoder, LalamoModule
 from lalamo.quantization import QuantizationMode
+from lalamo.utils import process_chat_template
 from .decoder_configs import ForeignClassifierConfig, ForeignConfig, ForeignLMConfig
 from .huggingface_generation_config import HFGenerationConfig
@@ -154,6 +155,7 @@ def import_message_processor(
         if model_spec.configs.chat_template is not None:
             raise ValueError("Conflicting chat template specifications.")
         prompt_template = tokenizer_config.chat_template
+    prompt_template = process_chat_template(prompt_template)
     tokenizer = Tokenizer.from_file(str(tokenizer_file))
     added_tokens = tokenizer_config.added_tokens()

{lalamo-0.5.8 → lalamo-0.5.9}/lalamo/model_import/decoder_configs/huggingface/gemma3.py RENAMED Viewed

@@ -10,7 +10,7 @@ from lalamo.modules.activations import GELU
 from lalamo.modules.linear import FullPrecisionLinearConfig
 from lalamo.modules.mlp import DenseMLPConfig
 from lalamo.modules.normalization import NormalizationConfig, UpcastMode
-from lalamo.modules.rope import LinearScalingRoPEConfig, UnscaledRoPEConfig
+from lalamo.modules.rope import LinearScalingRoPEConfig, UnscaledRoPEConfig, YARNRoPEConfig
 from lalamo.modules.token_mixers.attention import AttentionConfig
 from lalamo.modules.transformer_layer import TransformerLayerConfig
@@ -19,9 +19,6 @@ from .common import HuggingFaceLMConfig
 __all__ = ["HFGemma3Config", "HFGemma3TextConfig"]
-NUM_SLIDING_WINDOW_LAYERS_PER_FULL_ATTENTION_LAYER = 6
 def _round_to_bfloat16(x: float) -> float:
     return jnp.asarray(x).astype(jnp.bfloat16).item()
@@ -32,6 +29,16 @@ class GemmaRoPEScalingConfig:
     rope_type: Literal["linear"]
+@dataclass(frozen=True)
+class YarnRopeScalingConfig:
+    factor: float
+    beta_fast: float
+    beta_slow: float
+    original_max_position_embeddings: int
+    rope_type: Literal["yarn"]
+    truncate: bool = False
 @dataclass(frozen=True)
 class HFGemma3TextConfigRaw:
     hidden_size: int
@@ -39,6 +46,7 @@ class HFGemma3TextConfigRaw:
     model_type: Literal["gemma3_text"]
     num_hidden_layers: int
     sliding_window: int
+    sliding_window_pattern: int
     rms_norm_eps: float = 1e-06
     query_pre_attn_scalar: float = 256.0
     attention_bias: bool = False
@@ -49,7 +57,7 @@ class HFGemma3TextConfigRaw:
     max_position_embeddings: int = 131072
     rope_theta: float = 1000000.0
     rope_local_base_freq: float = 10000.0
-    rope_scaling: GemmaRoPEScalingConfig | None = None
+    rope_scaling: GemmaRoPEScalingConfig | YarnRopeScalingConfig | None = None
     final_logit_softcapping: float | None = None
     vocab_size: int = 262208
@@ -57,7 +65,7 @@ class HFGemma3TextConfigRaw:
     def sliding_window_sizes(self) -> list[int | None]:
         result = []
         for i in range(self.num_hidden_layers):
-            if (i + 1) % NUM_SLIDING_WINDOW_LAYERS_PER_FULL_ATTENTION_LAYER == 0:
+            if (i + 1) % self.sliding_window_pattern == 0:
                 result.append(None)
             else:
                 result.append(self.sliding_window)
@@ -74,7 +82,7 @@ class HFGemma3TextConfigRaw:
         attention_scale = self.query_pre_attn_scalar**-0.5
         embedding_config = TiedEmbeddingConfig(
             input_scale=input_scale,
-            logit_soft_cap=None,
+            logit_soft_cap=self.final_logit_softcapping,
             precision=activation_precision,
         )
         rms_norm_config = NormalizationConfig(
@@ -86,19 +94,33 @@ class HFGemma3TextConfigRaw:
             subtract_mean=False,
         )
-        if self.rope_scaling is not None:
+        if isinstance(self.rope_scaling, GemmaRoPEScalingConfig):
             global_rope_config = LinearScalingRoPEConfig(
                 precision=activation_precision,
                 base=self.rope_theta,
                 max_sequence_length=self.max_position_embeddings,
                 scaling_factor=self.rope_scaling.factor,
             )
-        else:
+        elif isinstance(self.rope_scaling, YarnRopeScalingConfig):
+            global_rope_config = YARNRoPEConfig(
+                precision=activation_precision,
+                base=self.rope_theta,
+                scaling_factor=self.rope_scaling.factor,
+                max_sequence_length=self.max_position_embeddings,
+                original_context_length=self.rope_scaling.original_max_position_embeddings,
+                beta_fast=self.rope_scaling.beta_fast,
+                beta_slow=self.rope_scaling.beta_slow,
+                truncate=self.rope_scaling.truncate,
+            )
+        elif self.rope_scaling is None:
             global_rope_config = UnscaledRoPEConfig(
                 precision=activation_precision,
                 base=self.rope_theta,
                 max_sequence_length=context_length or self.max_position_embeddings,
             )
+        else:
+            raise ValueError("Invalid rope scaling configuration")
         local_rope_config = UnscaledRoPEConfig(
             precision=activation_precision,
             base=self.rope_local_base_freq,

{lalamo-0.5.8 → lalamo-0.5.9}/lalamo/model_import/loaders/huggingface.py RENAMED Viewed

@@ -300,7 +300,7 @@ def load_moe(module: MixtureOfExperts, weights_dict: Mapping[str, Array], path:
         down_w = rearrange(down_w, "e o ib ie -> e o (ib ie)")
         down_b = weights_dict[experts_path / "down_proj_bias"]
         if down_b.ndim == 1:
-            down_b = jnp.broadcast_to(down_b, down_w.shape[:-1] + (down_b.shape[0],))
+            down_b = jnp.broadcast_to(down_b, (*down_w.shape[:-1], down_b.shape[0]))
         down_projection = load_parameters(
             lambda m: (m.weights, m.biases),  # type: ignore

{lalamo-0.5.8 → lalamo-0.5.9}/lalamo/model_import/model_specs/__init__.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from .common import FileSpec, ModelSpec, ModelType, UseCase, build_quantized_models
 from .deepseek import DEEPSEEK_MODELS
+from .essential_ai import RNJ_MODELS
 from .gemma import GEMMA_MODELS
 from .gpt_oss import GPT_OSS_MODELS
 from .huggingface import HUGGINGFACE_MODELS
@@ -36,6 +37,7 @@ ALL_MODEL_LISTS = [
     QWEN_MODELS,
     REKA_MODELS,
     MIRAI_CLASSIFIER_MODELS,
+    RNJ_MODELS,
 ]
 ALL_MODELS = [model for model_list in ALL_MODEL_LISTS for model in model_list]

lalamo-0.5.9/lalamo/model_import/model_specs/essential_ai.py ADDED Viewed

@@ -0,0 +1,17 @@
+from lalamo.model_import.decoder_configs.huggingface import HFGemma3TextConfig
+from .common import ModelSpec
+__all__ = ["RNJ_MODELS"]
+RNJ_MODELS = [
+    ModelSpec(
+        vendor="EssentialAI",
+        family="Rnj-1",
+        name="Rnj-1-Instruct",
+        size="8B",
+        quantization=None,
+        repo="EssentialAI/rnj-1-instruct",
+        config_type=HFGemma3TextConfig,
+    ),
+]

{lalamo-0.5.8 → lalamo-0.5.9}/lalamo/model_import/model_specs/huggingface.py RENAMED Viewed

@@ -14,5 +14,5 @@ HUGGINGFACE_MODELS = [
         repo="HuggingFaceTB/SmolLM2-1.7B-Instruct",
         config_type=HFLlamaConfig,
         use_cases=tuple(),
-    )
+    ),
 ]

{lalamo-0.5.8 → lalamo-0.5.9}/lalamo/utils.py RENAMED Viewed

@@ -24,6 +24,7 @@ __all__ = [
     "MapSequence",
     "jax_uint4_to_packed_uint8",
     "open_safetensors",
+    "process_chat_template",
 ]
@@ -159,3 +160,9 @@ def jax_uint8_to_unpacked_uint4(array: Array) -> Array:
     )
     return unpacked.astype(jnp.uint4)
+def process_chat_template(template: str) -> str:
+    template = template.replace("{% generation %}", "")
+    template = template.replace("{%- endgeneration -%}", "")
+    return template

{lalamo-0.5.8 → lalamo-0.5.9}/lalamo.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lalamo
-Version: 0.5.8
+Version: 0.5.9
 Summary: JAX library for optimization and export of models for use with the UZU inference engine.
 Requires-Python: <4,>=3.12
 Description-Content-Type: text/markdown

{lalamo-0.5.8 → lalamo-0.5.9}/lalamo.egg-info/SOURCES.txt RENAMED Viewed

@@ -45,6 +45,7 @@ lalamo/model_import/loaders/utils.py
 lalamo/model_import/model_specs/__init__.py
 lalamo/model_import/model_specs/common.py
 lalamo/model_import/model_specs/deepseek.py
+lalamo/model_import/model_specs/essential_ai.py
 lalamo/model_import/model_specs/gemma.py
 lalamo/model_import/model_specs/gpt_oss.py
 lalamo/model_import/model_specs/huggingface.py