PyPI - lalamo - Versions diffs - 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

lalamo 0.2.7py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

lalamo/__init__.py +1 -1
lalamo/common.py +79 -29
lalamo/language_model.py +106 -83
lalamo/main.py +91 -18
lalamo/message_processor.py +170 -0
lalamo/model_import/common.py +159 -43
lalamo/model_import/{configs → decoder_configs}/__init__.py +0 -1
lalamo/model_import/{configs → decoder_configs}/common.py +11 -10
lalamo/model_import/{configs → decoder_configs}/huggingface/common.py +9 -4
lalamo/model_import/{configs → decoder_configs}/huggingface/gemma3.py +2 -2
lalamo/model_import/{configs → decoder_configs}/huggingface/llama.py +2 -2
lalamo/model_import/{configs → decoder_configs}/huggingface/mistral.py +1 -1
lalamo/model_import/{configs → decoder_configs}/huggingface/qwen2.py +1 -1
lalamo/model_import/{configs → decoder_configs}/huggingface/qwen3.py +1 -1
lalamo/model_import/huggingface_generation_config.py +44 -0
lalamo/model_import/huggingface_tokenizer_config.py +85 -0
lalamo/model_import/loaders/common.py +2 -1
lalamo/model_import/loaders/huggingface.py +12 -10
lalamo/model_import/model_specs/__init__.py +3 -2
lalamo/model_import/model_specs/common.py +32 -34
lalamo/model_import/model_specs/deepseek.py +1 -10
lalamo/model_import/model_specs/gemma.py +2 -25
lalamo/model_import/model_specs/huggingface.py +2 -12
lalamo/model_import/model_specs/llama.py +2 -58
lalamo/model_import/model_specs/mistral.py +9 -19
lalamo/model_import/model_specs/pleias.py +3 -13
lalamo/model_import/model_specs/polaris.py +5 -7
lalamo/model_import/model_specs/qwen.py +12 -111
lalamo/model_import/model_specs/reka.py +4 -13
lalamo/modules/__init__.py +2 -1
lalamo/modules/attention.py +90 -10
lalamo/modules/common.py +51 -4
lalamo/modules/decoder.py +90 -8
lalamo/modules/decoder_layer.py +85 -8
lalamo/modules/embedding.py +95 -29
lalamo/modules/kv_cache.py +3 -3
lalamo/modules/linear.py +170 -130
lalamo/modules/mlp.py +40 -7
lalamo/modules/normalization.py +24 -6
lalamo/modules/rope.py +24 -6
lalamo/sampling.py +99 -0
lalamo/utils.py +86 -1
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/METADATA +6 -6
lalamo-0.3.0.dist-info/RECORD +58 -0
lalamo-0.2.7.dist-info/RECORD +0 -54
/lalamo/model_import/{configs → decoder_configs}/executorch.py +0 -0
/lalamo/model_import/{configs → decoder_configs}/huggingface/__init__.py +0 -0
/lalamo/model_import/{configs → decoder_configs}/huggingface/gemma2.py +0 -0
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/WHEEL +0 -0
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/entry_points.txt +0 -0
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/licenses/LICENSE +0 -0
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/top_level.txt +0 -0

lalamo/message_processor.py ADDED Viewed

@@ -0,0 +1,170 @@
+import re
+from collections.abc import Iterable
+from dataclasses import dataclass
+from functools import cached_property
+from re import Pattern
+from typing import NotRequired, TypedDict
+from jinja2 import Template
+from tokenizers import Tokenizer
+__all__ = [
+    "AssistantMessage",
+    "ContentBlock",
+    "Image",
+    "Message",
+    "MessageProcessor",
+    "MessageProcessorConfig",
+    "SystemMessage",
+    "ToolSchema",
+    "UserMessage",
+]
+type ToolSchema = None  # WIP
+type Image = None  # WIP
+class HuggingFaceMessage(TypedDict):
+    role: str
+    content: str
+    tool_calls: NotRequired[list[dict]]
+    reasoning_content: NotRequired[str]
+class HuggingFaceRequest(TypedDict):
+    add_generation_prompt: bool
+    bos_token: str | None
+    messages: list[HuggingFaceMessage]
+    enable_thinking: NotRequired[bool]
+    tools: NotRequired[dict]
+@dataclass(frozen=True)
+class Message:
+    pass
+type ContentBlock = str | Image
+@dataclass(frozen=True)
+class UserMessage(Message):
+    content: tuple[ContentBlock, ...] | ContentBlock
+@dataclass(frozen=True)
+class SystemMessage(UserMessage):
+    content: tuple[ContentBlock, ...] | ContentBlock
+@dataclass(frozen=True)
+class AssistantMessage(Message):
+    chain_of_thought: str | None
+    response: str
+@dataclass(frozen=True)
+class MessageProcessorConfig:
+    prompt_template: str
+    output_parser_regex: str | None
+    system_role_name: str
+    user_role_name: str
+    assistant_role_name: str
+    bos_token: str | None
+    def init(self, tokenizer: Tokenizer) -> "MessageProcessor":
+        return MessageProcessor(
+            config=self,
+            tokenizer=tokenizer,
+        )
+@dataclass(frozen=True)
+class MessageProcessor:
+    config: MessageProcessorConfig
+    tokenizer: Tokenizer
+    @cached_property
+    def prompt_template(self) -> Template:
+        return Template(self.config.prompt_template)
+    @cached_property
+    def output_parser_regex(self) -> Pattern | None:
+        if self.config.output_parser_regex is None:
+            return None
+        return re.compile(self.config.output_parser_regex)
+    @property
+    def system_role_name(self) -> str:
+        return self.config.system_role_name
+    @property
+    def user_role_name(self) -> str:
+        return self.config.user_role_name
+    @property
+    def assistant_role_name(self) -> str:
+        return self.config.assistant_role_name
+    @property
+    def bos_token(self) -> str | None:
+        return self.config.bos_token
+    def message_to_dict(self, message: Message) -> HuggingFaceMessage:
+        match message:
+            case UserMessage(content=content):
+                assert isinstance(content, str)
+                return HuggingFaceMessage(role=self.user_role_name, content=content)
+            case SystemMessage(content=content):
+                assert isinstance(content, str)
+                return HuggingFaceMessage(role=self.system_role_name, content=content)
+            case AssistantMessage(chain_of_thought=chain_of_thought, response=response):
+                result = HuggingFaceMessage(role=self.assistant_role_name, content=response)
+                if chain_of_thought:
+                    result["reasoning_content"] = chain_of_thought
+                return result
+        raise ValueError(f"Unsupported message type: {type(message)}")
+    def request_to_dict(
+        self,
+        messages: Iterable[Message],
+        tools: Iterable[ToolSchema] | None = None,
+        enable_thinking: bool | None = None,
+    ) -> HuggingFaceRequest:
+        converted_messages = [self.message_to_dict(message) for message in messages]
+        result = HuggingFaceRequest(add_generation_prompt=True, messages=converted_messages, bos_token=self.bos_token)
+        if enable_thinking is not None:
+            result["enable_thinking"] = enable_thinking
+        if tools is not None:
+            raise NotImplementedError("Tools are not supported yet.")
+        return result
+    def render_request(self, messages: Iterable[Message]) -> str:
+        request_dict = self.request_to_dict(messages)
+        return self.prompt_template.render(request_dict)
+    def parse_response(self, response: str) -> AssistantMessage:
+        if self.output_parser_regex is None:
+            return AssistantMessage(chain_of_thought=None, response=response)
+        match = self.output_parser_regex.match(response)
+        if match is None:
+            raise ValueError(f"Invalid response format: {response}")
+        return AssistantMessage(**match.groupdict())
+    def tokenize(self, text: str) -> list[int]:
+        return self.tokenizer.encode(text, add_special_tokens=False).ids
+    def detokenize(self, tokens: list[int]) -> str:
+        return self.tokenizer.decode(tokens, skip_special_tokens=False)
+    def __post_init__(self) -> None:
+        if self.output_parser_regex is not None:
+            all_fields = AssistantMessage.__dataclass_fields__
+            required_fields = {k: v for k, v in all_fields.items() if v.type == v.type | None}
+            named_groups = self.output_parser_regex.groupindex
+            invalid_groups = set(named_groups) - set(all_fields)
+            if invalid_groups:
+                raise ValueError(f"Unsupported output fields: {list(invalid_groups)}")
+            for group_name in required_fields:
+                if group_name not in named_groups:
+                    raise ValueError(f"Missing required output field: {group_name}")

lalamo/model_import/common.py CHANGED Viewed

@@ -1,21 +1,32 @@
 import importlib.metadata
+from collections import ChainMap
+from collections.abc import Callable
 from dataclasses import dataclass
 from pathlib import Path
 from typing import NamedTuple
 import huggingface_hub
 import jax.numpy as jnp
+from jax import Array
 from jaxtyping import DTypeLike
+from tokenizers import Tokenizer
-from lalamo.modules import Decoder, DecoderConfig
+from lalamo.language_model import GenerationConfig, LanguageModel, LanguageModelConfig
+from lalamo.message_processor import MessageProcessor, MessageProcessorConfig
 from lalamo.quantization import QuantizationMode
-from .model_specs import REPO_TO_MODEL, ModelSpec, UseCase
+from .huggingface_generation_config import HFGenerationConfig
+from .huggingface_tokenizer_config import HFTokenizerConfig
+from .model_specs import REPO_TO_MODEL, FileSpec, ModelSpec, UseCase
 __all__ = [
     "REPO_TO_MODEL",
+    "DownloadingFileEvent",
+    "FinishedDownloadingFileEvent",
+    "InitializingModelEvent",
     "ModelMetadata",
     "ModelSpec",
+    "StatusEvent",
     "import_model",
 ]
@@ -23,6 +34,27 @@ __all__ = [
 LALAMO_VERSION = importlib.metadata.version("lalamo")
+class DownloadingFileEvent(NamedTuple):
+    file: FileSpec
+class FinishedDownloadingFileEvent(NamedTuple):
+    file: FileSpec
+class InitializingModelEvent(NamedTuple):
+    pass
+class FinishedInitializingModelEvent(NamedTuple):
+    pass
+type StatusEvent = (
+    DownloadingFileEvent | FinishedDownloadingFileEvent | InitializingModelEvent | FinishedInitializingModelEvent
+)
 @dataclass(frozen=True)
 class ModelMetadata:
     toolchain_version: str
@@ -33,69 +65,154 @@ class ModelMetadata:
     quantization: QuantizationMode | None
     repo: str
     use_cases: tuple[UseCase, ...]
-    model_config: DecoderConfig
-    tokenizer_file_names: tuple[str, ...]
+    model_config: LanguageModelConfig
-def download_weights(model_spec: ModelSpec, output_dir: Path | str | None = None) -> list[Path]:
-    result = [
-        huggingface_hub.hf_hub_download(
-            repo_id=model_spec.repo,
-            local_dir=output_dir,
-            filename=filename,
-        )
-        for filename in model_spec.weights_file_names
-    ]
-    return [Path(path) for path in result]
-def download_config_file(model_spec: ModelSpec, output_dir: Path | str | None = None) -> Path:
+def download_file(
+    file_spec: FileSpec,
+    model_repo: str,
+    output_dir: Path | str | None = None,
+    progress_callback: Callable[[StatusEvent], None] | None = None,
+) -> Path:
+    if progress_callback is not None:
+        progress_callback(DownloadingFileEvent(file_spec))
     result = huggingface_hub.hf_hub_download(
-        repo_id=model_spec.repo,
+        repo_id=file_spec.repo or model_repo,
         local_dir=output_dir,
-        filename=model_spec.config_file_name,
+        filename=file_spec.filename,
     )
+    if progress_callback is not None:
+        progress_callback(FinishedDownloadingFileEvent(file_spec))
     return Path(result)
-def download_tokenizer_files(model_spec: ModelSpec, output_dir: Path | str | None = None) -> tuple[Path, ...]:
-    result = [
-        huggingface_hub.hf_hub_download(
-            repo_id=tokenizer_file_spec.repo or model_spec.repo,
-            local_dir=output_dir,
-            filename=tokenizer_file_spec.filename,
-        )
-        for tokenizer_file_spec in model_spec.tokenizer_files
+def list_weight_files(model_repo: str) -> list[FileSpec]:
+    all_files = huggingface_hub.list_repo_files(model_repo)
+    return [FileSpec(filename) for filename in all_files if filename.endswith(".safetensors")]
+def download_weights(
+    model_spec: ModelSpec,
+    output_dir: Path | str | None = None,
+    progress_callback: Callable[[StatusEvent], None] | None = None,
+) -> list[Path]:
+    return [
+        download_file(file_spec, model_spec.repo, output_dir, progress_callback)
+        for file_spec in list_weight_files(model_spec.repo)
     ]
-    return tuple(Path(path) for path in result)
+def download_config_file(
+    model_spec: ModelSpec,
+    output_dir: Path | str | None = None,
+    progress_callback: Callable[[StatusEvent], None] | None = None,
+) -> Path:
+    return download_file(model_spec.configs.model_config, model_spec.repo, output_dir, progress_callback)
 class ImportResults(NamedTuple):
-    model: Decoder
+    model: LanguageModel
     metadata: ModelMetadata
-    tokenizer_file_paths: tuple[Path, ...]
-def import_model(
+def import_message_processor(
     model_spec: ModelSpec,
+    output_dir: Path | str | None = None,
+    progress_callback: Callable[[StatusEvent], None] | None = None,
+) -> MessageProcessor:
+    tokenizer_file = download_file(model_spec.configs.tokenizer, model_spec.repo, output_dir, progress_callback)
+    tokenizer_config_file = download_file(
+        model_spec.configs.tokenizer_config,
+        model_spec.repo,
+        output_dir,
+        progress_callback,
+    )
+    tokenizer_config = HFTokenizerConfig.from_json(tokenizer_config_file)
+    if tokenizer_config.chat_template is None:
+        if model_spec.configs.chat_template is None:
+            raise ValueError("Missiing chat template.")
+        chat_template_file = download_file(model_spec.configs.chat_template, model_spec.repo, output_dir)
+        prompt_template = chat_template_file.read_text()
+    else:
+        if model_spec.configs.chat_template is not None:
+            raise ValueError("Conflicting chat template specifications.")
+        prompt_template = tokenizer_config.chat_template
+    tokenizer = Tokenizer.from_file(str(tokenizer_file))
+    tokenizer.add_special_tokens(tokenizer_config.added_tokens())
+    message_processor_config = MessageProcessorConfig(
+        prompt_template=prompt_template,
+        output_parser_regex=model_spec.output_parser_regex,
+        system_role_name=model_spec.system_role_name,
+        user_role_name=model_spec.user_role_name,
+        assistant_role_name=model_spec.assistant_role_name,
+        bos_token=tokenizer_config.bos_token,
+    )
+    return MessageProcessor(config=message_processor_config, tokenizer=tokenizer)
+def import_model(
+    model_spec: ModelSpec | str,
     *,
     context_length: int | None = None,
     precision: DTypeLike | None = None,
     accumulation_precision: DTypeLike = jnp.float32,
+    progress_callback: Callable[[StatusEvent], None] | None = None,
 ) -> ImportResults:
-    foreign_config_file = download_config_file(model_spec)
-    foreign_config = model_spec.config_type.from_json(foreign_config_file)
+    if isinstance(model_spec, str):
+        try:
+            model_spec = REPO_TO_MODEL[model_spec]
+        except KeyError as e:
+            raise ValueError(f"Unknown model: {model_spec}") from e
+    foreign_decoder_config_file = download_config_file(model_spec)
+    foreign_decoder_config = model_spec.config_type.from_json(foreign_decoder_config_file)
-    tokenizer_file_paths = download_tokenizer_files(model_spec)
     if precision is None:
-        precision = foreign_config.default_precision
+        precision = foreign_decoder_config.default_precision
+    weights_paths = download_weights(model_spec, progress_callback=progress_callback)
+    weights_dict: ChainMap[str, Array] = ChainMap(
+        *[model_spec.weights_type.load(weights_path, precision) for weights_path in weights_paths],  # type: ignore
+    )
-    weights_paths = download_weights(model_spec)
-    weights_dict = {}
-    for weights_path in weights_paths:
-        weights_dict.update(model_spec.weights_type.load(weights_path, precision))
+    if progress_callback is not None:
+        progress_callback(InitializingModelEvent())
+    decoder = foreign_decoder_config.load_decoder(context_length, precision, accumulation_precision, weights_dict)
+    if progress_callback is not None:
+        progress_callback(FinishedInitializingModelEvent())
+    message_processor = import_message_processor(model_spec)
+    stop_token_ids = tuple(foreign_decoder_config.eos_token_ids)
+    if model_spec.configs.generation_config is not None:
+        hf_generation_config_file = download_file(model_spec.configs.generation_config, model_spec.repo)
+        hf_generation_config = HFGenerationConfig.from_json(hf_generation_config_file)
+        generation_config = GenerationConfig(
+            stop_token_ids=stop_token_ids,
+            temperature=hf_generation_config.temperature,
+            top_p=hf_generation_config.top_p,
+            top_k=hf_generation_config.top_k,
+            banned_tokens=None,
+        )
+    else:
+        generation_config = GenerationConfig(
+            stop_token_ids=stop_token_ids,
+            temperature=None,
+            top_p=None,
+            top_k=None,
+            banned_tokens=None,
+        )
+    language_model_config = LanguageModelConfig(
+        decoder_config=decoder.config,
+        message_processor_config=message_processor.config,
+        generation_config=generation_config,
+    )
-    model = foreign_config.load_model(context_length, precision, accumulation_precision, weights_dict)
+    language_model = LanguageModel(language_model_config, decoder, message_processor)
     metadata = ModelMetadata(
         toolchain_version=LALAMO_VERSION,
         vendor=model_spec.vendor,
@@ -105,7 +222,6 @@ def import_model(
         quantization=model_spec.quantization,
         repo=model_spec.repo,
         use_cases=model_spec.use_cases,
-        model_config=model.config,
-        tokenizer_file_names=tuple(p.name for p in tokenizer_file_paths),
+        model_config=language_model_config,
     )
-    return ImportResults(model, metadata, tokenizer_file_paths)
+    return ImportResults(language_model, metadata)

lalamo/model_import/{configs → decoder_configs}/__init__.py RENAMED Viewed

@@ -1,5 +1,4 @@
 from .common import ForeignConfig
 # from .executorch import ETLlamaConfig
 from .huggingface import (
     HFGemma2Config,

lalamo/model_import/{configs → decoder_configs}/common.py RENAMED Viewed

@@ -1,11 +1,11 @@
 import json
 from abc import abstractmethod
+from collections.abc import Mapping
 from dataclasses import dataclass
 from pathlib import Path
 from typing import ClassVar, Self
 import cattrs
-import jax
 from jaxtyping import Array, DTypeLike
 from lalamo.modules import Decoder, DecoderConfig
@@ -18,6 +18,12 @@ class ForeignConfig:
     _converter: ClassVar[cattrs.Converter] = cattrs.Converter()
     _converter.register_structure_hook(int | list[int], lambda v, _: v)
+    eos_token_id: int | list[int]
+    @property
+    def eos_token_ids(self) -> list[int]:
+        return [self.eos_token_id] if isinstance(self.eos_token_id, int) else self.eos_token_id
     @property
     @abstractmethod
     def default_precision(self) -> DTypeLike: ...
@@ -29,11 +35,6 @@ class ForeignConfig:
             config = json.load(f)
         return cls._converter.structure(config, cls)
-    def to_json(self, json_path: Path | str) -> None:
-        json_path = Path(json_path)
-        with open(json_path, "w") as f:
-            json.dump(self._converter.unstructure(self), f, indent=2)
     def to_decoder_config(
         self,
         context_length: int | None,
@@ -46,17 +47,17 @@ class ForeignConfig:
     def _load_weights(
         cls,
         model: Decoder,
-        weights_dict: dict[str, Array],
+        weights_dict: Mapping[str, Array],
     ) -> Decoder:
         raise NotImplementedError
-    def load_model(
+    def load_decoder(
         self,
         context_length: int | None,
         activation_precision: DTypeLike,
         accumulation_precision: DTypeLike,
-        weights_dict: dict[str, Array],
+        weights_dict: Mapping[str, Array],
     ) -> Decoder:
         config = self.to_decoder_config(context_length, activation_precision, accumulation_precision)
-        model = config.random_init(key=jax.random.PRNGKey(0))
+        model = config.empty()
         return self._load_weights(model, weights_dict)

lalamo/model_import/{configs → decoder_configs}/huggingface/common.py RENAMED Viewed

@@ -1,18 +1,19 @@
+from collections.abc import Mapping
 from dataclasses import dataclass
 from typing import Literal
 import jax.numpy as jnp
 from jaxtyping import Array, DTypeLike
-from lalamo.model_import.configs import ForeignConfig
+from lalamo.model_import.decoder_configs import ForeignConfig
 from lalamo.model_import.loaders import load_huggingface
 from lalamo.modules import Decoder
 __all__ = [
-    "HuggingFaceConfig",
     "AWQQuantizationConfig",
     "GPTQMetaConfig",
-    "GPTQQuantizationConfig"
+    "GPTQQuantizationConfig",
+    "HuggingFaceConfig",
 ]
@@ -59,6 +60,10 @@ class GPTQQuantizationConfig:
 class HuggingFaceConfig(ForeignConfig):
     torch_dtype: Literal["bfloat16", "float16", "float32"]
+    @property
+    def eos_token_ids(self) -> list[int]:
+        return [self.eos_token_id] if isinstance(self.eos_token_id, int) else self.eos_token_id
     @property
     def default_precision(self) -> DTypeLike:
         return jnp.dtype(self.torch_dtype)
@@ -67,6 +72,6 @@ class HuggingFaceConfig(ForeignConfig):
     def _load_weights(
         cls,
         model: Decoder,
-        weights_dict: dict[str, Array],
+        weights_dict: Mapping[str, Array],
     ) -> Decoder:
         return load_huggingface(model, weights_dict)

lalamo/model_import/{configs → decoder_configs}/huggingface/gemma3.py RENAMED Viewed

@@ -97,12 +97,12 @@ class HFGemma3TextConfigRaw:
             global_rope_config = UnscaledRoPEConfig(
                 precision=activation_precision,
                 base=self.rope_theta,
-                max_sequence_length=self.max_position_embeddings,
+                max_sequence_length=context_length or self.max_position_embeddings,
             )
         local_rope_config = UnscaledRoPEConfig(
             precision=activation_precision,
             base=self.rope_local_base_freq,
-            max_sequence_length=self.max_position_embeddings,
+            max_sequence_length=context_length or self.max_position_embeddings,
         )
         linear_config = FullPrecisionLinearConfig(precision=activation_precision)

lalamo/model_import/{configs → decoder_configs}/huggingface/llama.py RENAMED Viewed

@@ -85,13 +85,13 @@ class HFLlamaConfig(HuggingFaceConfig):
             rope_config = UnscaledRoPEConfig(
                 precision=activation_precision,
                 base=self.rope_theta,
-                max_sequence_length=self.max_position_embeddings,
+                max_sequence_length=context_length or self.max_position_embeddings,
             )
         else:
             rope_config = LlamaRoPEConfig(
                 precision=activation_precision,
                 base=self.rope_theta,
-                max_sequence_length=self.max_position_embeddings,
+                max_sequence_length=context_length or self.max_position_embeddings,
                 scaling_factor=self.rope_scaling.factor,
                 original_context_length=self.rope_scaling.original_max_position_embeddings,
                 low_frequency_factor=self.rope_scaling.low_freq_factor,

lalamo/model_import/{configs → decoder_configs}/huggingface/mistral.py RENAMED Viewed

@@ -70,7 +70,7 @@ class HFMistralConfig(HuggingFaceConfig):
         rope_config = UnscaledRoPEConfig(
             precision=activation_precision,
             base=self.rope_theta,
-            max_sequence_length=self.max_position_embeddings,
+            max_sequence_length=context_length or self.max_position_embeddings,
         )
         rmsnorm_config = RMSNormConfig(

lalamo/model_import/{configs → decoder_configs}/huggingface/qwen2.py RENAMED Viewed

@@ -84,7 +84,7 @@ class HFQwen2Config(HuggingFaceConfig):
         rope_config = UnscaledRoPEConfig(
             precision=activation_precision,
             base=self.rope_theta,
-            max_sequence_length=self.max_position_embeddings,
+            max_sequence_length=context_length or self.max_position_embeddings,
         )
         rmsnorm_config = RMSNormConfig(
             scale_precision=activation_precision,

lalamo/model_import/{configs → decoder_configs}/huggingface/qwen3.py RENAMED Viewed

@@ -82,7 +82,7 @@ class HFQwen3Config(HuggingFaceConfig):
         rope_config = UnscaledRoPEConfig(
             precision=activation_precision,
             base=self.rope_theta,
-            max_sequence_length=self.max_position_embeddings,
+            max_sequence_length=context_length or self.max_position_embeddings,
         )
         rmsnorm_config = RMSNormConfig(
             scale_precision=activation_precision,

lalamo/model_import/huggingface_generation_config.py ADDED Viewed

@@ -0,0 +1,44 @@
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import ClassVar
+import cattrs
+__all__ = ["HFGenerationConfig"]
+@dataclass(frozen=True)
+class HFGenerationConfig:
+    _converter: ClassVar[cattrs.Converter] = cattrs.Converter()
+    _converter.register_structure_hook(int | list[int], lambda v, _: v)
+    _converter.register_structure_hook(int | list[int] | None, lambda v, _: v)
+    # -------- identity / bookkeeping --------
+    _from_model_config: bool | None = None  # some Mistral & DeepSeek models
+    transformers_version: str | None = None  # library version that saved the file
+    # -------- special-token ids -------------
+    bos_token_id: int | None = None
+    eos_token_id: int | list[int] | None = None
+    pad_token_id: int | None = None
+    # -------- backend hints -----------------
+    cache_implementation: str | None = None  # “hybrid” for Gemma 3/2
+    # -------- sampling strategy -------------
+    do_sample: bool | None = None
+    temperature: float | None = None
+    top_p: float | None = None
+    top_k: int | None = None
+    repetition_penalty: float | None = None
+    # -------- length limits -----------------
+    max_length: int | None = None  # seen in Llama 3, Gemma 2/3
+    @classmethod
+    def from_json(cls, json_path: Path | str) -> "HFGenerationConfig":
+        json_path = Path(json_path)
+        with open(json_path) as f:
+            config = json.load(f)
+        return cls._converter.structure(config, cls)

lalamo 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl

lalamo 0.2.7py3-none-any.whl → 0.3.0py3-none-any.whl