PyPI - lalamo - Versions diffs - 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

lalamo 0.2.7py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

lalamo/__init__.py +1 -1
lalamo/common.py +79 -29
lalamo/language_model.py +106 -83
lalamo/main.py +91 -18
lalamo/message_processor.py +170 -0
lalamo/model_import/common.py +159 -43
lalamo/model_import/{configs → decoder_configs}/__init__.py +0 -1
lalamo/model_import/{configs → decoder_configs}/common.py +11 -10
lalamo/model_import/{configs → decoder_configs}/huggingface/common.py +9 -4
lalamo/model_import/{configs → decoder_configs}/huggingface/gemma3.py +2 -2
lalamo/model_import/{configs → decoder_configs}/huggingface/llama.py +2 -2
lalamo/model_import/{configs → decoder_configs}/huggingface/mistral.py +1 -1
lalamo/model_import/{configs → decoder_configs}/huggingface/qwen2.py +1 -1
lalamo/model_import/{configs → decoder_configs}/huggingface/qwen3.py +1 -1
lalamo/model_import/huggingface_generation_config.py +44 -0
lalamo/model_import/huggingface_tokenizer_config.py +85 -0
lalamo/model_import/loaders/common.py +2 -1
lalamo/model_import/loaders/huggingface.py +12 -10
lalamo/model_import/model_specs/__init__.py +3 -2
lalamo/model_import/model_specs/common.py +32 -34
lalamo/model_import/model_specs/deepseek.py +1 -10
lalamo/model_import/model_specs/gemma.py +2 -25
lalamo/model_import/model_specs/huggingface.py +2 -12
lalamo/model_import/model_specs/llama.py +2 -58
lalamo/model_import/model_specs/mistral.py +9 -19
lalamo/model_import/model_specs/pleias.py +3 -13
lalamo/model_import/model_specs/polaris.py +5 -7
lalamo/model_import/model_specs/qwen.py +12 -111
lalamo/model_import/model_specs/reka.py +4 -13
lalamo/modules/__init__.py +2 -1
lalamo/modules/attention.py +90 -10
lalamo/modules/common.py +51 -4
lalamo/modules/decoder.py +90 -8
lalamo/modules/decoder_layer.py +85 -8
lalamo/modules/embedding.py +95 -29
lalamo/modules/kv_cache.py +3 -3
lalamo/modules/linear.py +170 -130
lalamo/modules/mlp.py +40 -7
lalamo/modules/normalization.py +24 -6
lalamo/modules/rope.py +24 -6
lalamo/sampling.py +99 -0
lalamo/utils.py +86 -1
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/METADATA +6 -6
lalamo-0.3.0.dist-info/RECORD +58 -0
lalamo-0.2.7.dist-info/RECORD +0 -54
/lalamo/model_import/{configs → decoder_configs}/executorch.py +0 -0
/lalamo/model_import/{configs → decoder_configs}/huggingface/__init__.py +0 -0
/lalamo/model_import/{configs → decoder_configs}/huggingface/gemma2.py +0 -0
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/WHEEL +0 -0
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/entry_points.txt +0 -0
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/licenses/LICENSE +0 -0
{lalamo-0.2.7.dist-info → lalamo-0.3.0.dist-info}/top_level.txt +0 -0

lalamo/model_import/huggingface_tokenizer_config.py ADDED Viewed

@@ -0,0 +1,85 @@
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import ClassVar
+import cattrs
+from tokenizers import AddedToken
+__all__ = ["HFAddedToken", "HFTokenizerConfig"]
+@dataclass(frozen=True)
+class HFAddedToken:
+    content: str
+    single_word: bool
+    normalized: bool
+    special: bool
+    lstrip: bool
+    rstrip: bool
+    def to_added_token(self) -> AddedToken:
+        return AddedToken(
+            self.content,
+            single_word=self.single_word,
+            normalized=self.normalized,
+            special=self.special,
+            lstrip=self.lstrip,
+            rstrip=self.rstrip,
+        )
+@dataclass(frozen=True)
+class HFTokenizerConfig:
+    _converter: ClassVar[cattrs.Converter] = cattrs.Converter()
+    _converter.register_structure_hook(int | list[int], lambda v, _: v)
+    # ---------- core identity ----------
+    tokenizer_class: str | None = None
+    model_max_length: int | None = None
+    padding_side: str | None = None  # "left" | "right"
+    truncation_side: str | None = None  # "left" | "right"
+    legacy: bool | None = None
+    use_fast: bool | None = None
+    clean_up_tokenization_spaces: bool | None = None
+    # ---------- behaviour flags ----------
+    add_bos_token: bool | None = None
+    add_eos_token: bool | None = None
+    add_prefix_space: bool | None = None
+    use_default_system_prompt: bool | None = None
+    spaces_between_special_tokens: bool | None = None
+    do_lower_case: bool | None = None
+    # ---------- special tokens ----------
+    bos_token: str | None = None
+    eos_token: str | None = None
+    unk_token: str | None = None
+    pad_token: str | None = None
+    sep_token: str | None = None
+    cls_token: str | None = None
+    mask_token: str | None = None
+    added_tokens_decoder: dict[str, HFAddedToken] | None = None
+    # ---------- chat / SentencePiece ----------
+    chat_template: str | None = None
+    sp_model_kwargs: dict | None = None
+    # ---------- extras ----------
+    language: str | None = None
+    task: str | None = None
+    def added_tokens(self) -> list[AddedToken]:
+        if self.added_tokens_decoder is None:
+            return []
+        return [
+            AddedToken(content=token.content, single_word=token.single_word, normalized=token.normalized)
+            for token in self.added_tokens_decoder.values()
+        ]
+    @classmethod
+    def from_json(cls, json_path: Path | str) -> "HFTokenizerConfig":
+        json_path = Path(json_path)
+        with open(json_path) as f:
+            config = json.load(f)
+        return cls._converter.structure(config, cls)

lalamo/model_import/loaders/common.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from collections.abc import Callable, Iterable
 import equinox as eqx
+from jax._src.api import ShapeDtypeStruct
 from jax.tree import leaves_with_path
 from jax.tree_util import keystr
 from jaxtyping import Array, PyTree
@@ -18,7 +19,7 @@ def _get_name(leaf: PyTree, tree: PyTree) -> str:
 def _check_compatible(old_value: PyTree, new_value: PyTree, module: eqx.Module) -> None:
-    if isinstance(old_value, Array) and isinstance(new_value, Array):
+    if isinstance(old_value, (Array, ShapeDtypeStruct)) and isinstance(new_value, Array):
         name = _get_name(old_value, module)
         if old_value.shape != new_value.shape:
             raise ValueError(f"Expected parameter {name} to have shape {old_value.shape}, got {new_value.shape}")

lalamo/model_import/loaders/huggingface.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from collections.abc import Mapping
 import jax.numpy as jnp
 from einops import rearrange
 from jaxtyping import Array
@@ -80,7 +82,7 @@ def _process_quantized_tensors(
 def _fuse_full_precision_weights(
-    weights_dict: dict[str, Array],
+    weights_dict: Mapping[str, Array],
     path: ParameterPath,
     sublayers_to_fuse: list[str] | None,
 ) -> Array:
@@ -92,7 +94,7 @@ def _fuse_full_precision_weights(
 def _fuse_quantized_weights(
-    weights_dict: dict[str, Array],
+    weights_dict: Mapping[str, Array],
     path: ParameterPath,
     sublayers_to_fuse: list[str] | None,
 ) -> tuple[Array, Array, Array]:
@@ -117,7 +119,7 @@ def _fuse_quantized_weights(
 def load_linear(
     module: LinearBase,
-    weights_dict: dict[str, Array],
+    weights_dict: Mapping[str, Array],
     path: ParameterPath,
     sublayers_to_fuse: list[str] | None = None,
 ) -> LinearBase:
@@ -162,7 +164,7 @@ def load_linear(
     raise TypeError(f"Unsupported module type for loading: {type(module)}")
-def load_mlp(module: MLP, weights_dict: dict[str, Array], path: ParameterPath) -> MLP:
+def load_mlp(module: MLP, weights_dict: Mapping[str, Array], path: ParameterPath) -> MLP:
     up_projection = load_linear(module.up_projection, weights_dict, path, sublayers_to_fuse=["up_proj", "gate_proj"])
     down_projection = load_linear(module.down_projection, weights_dict, path / "down_proj")
     return load_parameters(lambda m: (m.up_projection, m.down_projection), module, (up_projection, down_projection))
@@ -170,7 +172,7 @@ def load_mlp(module: MLP, weights_dict: dict[str, Array], path: ParameterPath) -
 def load_rmsnorm(
     module: RMSNorm,
-    weights_dict: dict[str, Array],
+    weights_dict: Mapping[str, Array],
     path: ParameterPath,
 ) -> RMSNorm:
     scales = weights_dict[path / "weight"]
@@ -179,7 +181,7 @@ def load_rmsnorm(
 def load_attention(
     module: Attention,
-    weights_dict: dict[str, Array],
+    weights_dict: Mapping[str, Array],
     path: ParameterPath,
 ) -> Attention:
     qkv_projection = load_linear(
@@ -209,7 +211,7 @@ def load_attention(
 def load_decoder_layer(
     module: DecoderLayer,
-    weights_dict: dict[str, Array],
+    weights_dict: Mapping[str, Array],
     path: ParameterPath,
 ) -> DecoderLayer:
     pre_attention_norm = load_rmsnorm(
@@ -257,7 +259,7 @@ def load_decoder_layer(
 def load_tied_embedding(
     module: TiedEmbedding,
-    weights_dict: dict[str, Array],
+    weights_dict: Mapping[str, Array],
     decoder_path: ParameterPath,
 ) -> TiedEmbedding:
     weights = weights_dict[decoder_path / "embed_tokens" / "weight"]
@@ -266,7 +268,7 @@ def load_tied_embedding(
 def load_untied_embedding(
     module: UntiedEmbedding,
-    weights_dict: dict[str, Array],
+    weights_dict: Mapping[str, Array],
     decoder_path: ParameterPath,
     lm_head_path: ParameterPath,
 ) -> UntiedEmbedding:
@@ -277,7 +279,7 @@ def load_untied_embedding(
 def load_huggingface(
     module: Decoder,
-    weights_dict: dict[str, Array],
+    weights_dict: Mapping[str, Array],
 ) -> Decoder:
     if any(key.startswith("language_model.") for key in weights_dict):
         base_path = ParameterPath("language_model")

lalamo/model_import/model_specs/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .common import awq_model_spec, build_quantized_models, ModelSpec, UseCase
+from .common import FileSpec, ModelSpec, UseCase, build_quantized_models
 from .deepseek import DEEPSEEK_MODELS
 from .gemma import GEMMA_MODELS
 from .huggingface import HUGGINGFACE_MODELS
@@ -12,6 +12,7 @@ from .reka import REKA_MODELS
 __all__ = [
     "ALL_MODELS",
     "REPO_TO_MODEL",
+    "FileSpec",
     "ModelSpec",
     "UseCase",
 ]
@@ -23,7 +24,7 @@ ALL_MODEL_LISTS = [
     GEMMA_MODELS,
     HUGGINGFACE_MODELS,
     MISTRAL_MODELS,
-    PLEIAS_MODELS,
+    # PLEIAS_MODELS,  # TODO(norpadon): Add chat template
     POLARIS_MODELS,
     QWEN_MODELS,
     REKA_MODELS,

lalamo/model_import/model_specs/common.py CHANGED Viewed

@@ -1,4 +1,7 @@
-from dataclasses import dataclass
+from collections.abc import (
+    Mapping,
+)
+from dataclasses import dataclass, field
 from enum import Enum
 from pathlib import Path
@@ -6,18 +9,19 @@ import jax.numpy as jnp
 from jaxtyping import Array, DTypeLike
 from safetensors.flax import load_file as load_safetensors
-from lalamo.model_import.configs import ForeignConfig
+from lalamo.model_import.decoder_configs import ForeignConfig
+from lalamo.modules.torch_interop import torch_to_jax
 from lalamo.quantization import QuantizationMode
+from lalamo.utils import MapDictValues
 __all__ = [
-    "HUGGINFACE_GENERATION_CONFIG_FILE",
-    "HUGGINGFACE_TOKENIZER_FILES",
+    "ConfigMap",
+    "FileSpec",
     "ModelSpec",
-    "TokenizerFileSpec",
     "UseCase",
+    "WeightsType",
     "awq_model_spec",
     "build_quantized_models",
-    "huggingface_weight_files",
 ]
@@ -31,16 +35,14 @@ class WeightsType(Enum):
     SAFETENSORS = "safetensors"
     TORCH = "torch"
-    def load(self, filename: Path | str, float_dtype: DTypeLike) -> dict[str, jnp.ndarray]:
+    def load(self, filename: Path | str, float_dtype: DTypeLike) -> Mapping[str, jnp.ndarray]:
         if self == WeightsType.SAFETENSORS:
-            return {k: cast_if_float(v, float_dtype) for k, v in load_safetensors(filename).items()}
+            return MapDictValues(lambda v: cast_if_float(v, float_dtype), load_safetensors(filename))
         import torch
-        from lalamo.modules.torch_interop import torch_to_jax
         torch_weights = torch.load(filename, map_location="cpu", weights_only=True)
-        return {k: cast_if_float(torch_to_jax(v), float_dtype) for k, v in torch_weights.items()}
+        return MapDictValues(lambda v: cast_if_float(torch_to_jax(v), float_dtype), torch_weights)
 class UseCase(Enum):
@@ -48,9 +50,18 @@ class UseCase(Enum):
 @dataclass(frozen=True)
-class TokenizerFileSpec:
-    repo: str | None
+class FileSpec:
     filename: str
+    repo: str | None = None
+@dataclass(frozen=True)
+class ConfigMap:
+    model_config: FileSpec = field(default=FileSpec("config.json"))
+    tokenizer: FileSpec = field(default=FileSpec("tokenizer.json"))
+    tokenizer_config: FileSpec = field(default=FileSpec("tokenizer_config.json"))
+    generation_config: FileSpec | None = field(default=FileSpec("generation_config.json"))
+    chat_template: FileSpec | None = None
 @dataclass(frozen=True)
@@ -62,19 +73,16 @@ class ModelSpec:
     quantization: QuantizationMode | None
     repo: str
     config_type: type[ForeignConfig]
-    config_file_name: str
-    weights_file_names: tuple[str, ...]
-    weights_type: WeightsType
-    tokenizer_files: tuple[TokenizerFileSpec, ...] = tuple()
+    output_parser_regex: str | None = None
+    system_role_name: str = "system"
+    user_role_name: str = "user"
+    assistant_role_name: str = "assistant"
+    tool_role_name: str = "tool"
+    weights_type: WeightsType = WeightsType.SAFETENSORS
+    configs: ConfigMap = field(default=ConfigMap())
     use_cases: tuple[UseCase, ...] = tuple()
-def huggingface_weight_files(num_shards: int) -> tuple[str, ...]:
-    if num_shards == 1:
-        return ("model.safetensors",)
-    return tuple(f"model-{i:05d}-of-{num_shards:05d}.safetensors" for i in range(1, num_shards + 1))
 def awq_model_spec(
     model_spec: ModelSpec,
     repo: str,
@@ -88,10 +96,8 @@ def awq_model_spec(
         quantization=quantization,
         repo=repo,
         config_type=model_spec.config_type,
-        config_file_name=model_spec.config_file_name,
-        weights_file_names=huggingface_weight_files(1),
+        configs=model_spec.configs,
         weights_type=model_spec.weights_type,
-        tokenizer_files=model_spec.tokenizer_files,
         use_cases=model_spec.use_cases,
     )
@@ -115,11 +121,3 @@ def build_quantized_models(model_specs: list[ModelSpec]) -> list[ModelSpec]:
         quantized_model_spec = awq_model_spec(model_spec, quantized_repo)
         quantized_model_specs.append(quantized_model_spec)
     return quantized_model_specs
-HUGGINGFACE_TOKENIZER_FILES = (
-    TokenizerFileSpec(repo=None, filename="tokenizer.json"),
-    TokenizerFileSpec(repo=None, filename="tokenizer_config.json"),
-)
-HUGGINFACE_GENERATION_CONFIG_FILE = TokenizerFileSpec(repo=None, filename="generation_config.json")

lalamo/model_import/model_specs/deepseek.py CHANGED Viewed

@@ -1,11 +1,7 @@
-from lalamo.model_import.configs import HFQwen2Config
+from lalamo.model_import.decoder_configs import HFQwen2Config
 from .common import (
-    HUGGINFACE_GENERATION_CONFIG_FILE,
-    HUGGINGFACE_TOKENIZER_FILES,
     ModelSpec,
-    WeightsType,
-    huggingface_weight_files,
 )
 __all__ = ["DEEPSEEK_MODELS"]
@@ -19,10 +15,5 @@ DEEPSEEK_MODELS = [
         quantization=None,
         repo="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         config_type=HFQwen2Config,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(1),
-        weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
-        use_cases=tuple(),
     ),
 ]

lalamo/model_import/model_specs/gemma.py CHANGED Viewed

@@ -1,12 +1,6 @@
-from lalamo.model_import.configs import HFGemma2Config, HFGemma3Config, HFGemma3TextConfig
+from lalamo.model_import.decoder_configs import HFGemma2Config, HFGemma3Config, HFGemma3TextConfig
-from .common import (
-    HUGGINFACE_GENERATION_CONFIG_FILE,
-    HUGGINGFACE_TOKENIZER_FILES,
-    ModelSpec,
-    WeightsType,
-    huggingface_weight_files,
-)
+from .common import ModelSpec, WeightsType
 __all__ = ["GEMMA_MODELS"]
@@ -19,11 +13,6 @@ GEMMA2 = [
         quantization=None,
         repo="google/gemma-2-2b-it",
         config_type=HFGemma2Config,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(2),
-        weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
-        use_cases=tuple(),
     ),
 ]
@@ -36,11 +25,7 @@ GEMMA3 = [
         quantization=None,
         repo="google/gemma-3-1b-it",
         config_type=HFGemma3TextConfig,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(1),
         weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
-        use_cases=tuple(),
     ),
     ModelSpec(
         vendor="Google",
@@ -50,11 +35,7 @@ GEMMA3 = [
         quantization=None,
         repo="google/gemma-3-4b-it",
         config_type=HFGemma3Config,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(2),
         weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
-        use_cases=tuple(),
     ),
     ModelSpec(
         vendor="Google",
@@ -64,11 +45,7 @@ GEMMA3 = [
         quantization=None,
         repo="google/gemma-3-27b-it",
         config_type=HFGemma3Config,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(12),
         weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
-        use_cases=tuple(),
     ),
 ]

lalamo/model_import/model_specs/huggingface.py CHANGED Viewed

@@ -1,12 +1,6 @@
-from lalamo.model_import.configs import HFLlamaConfig
+from lalamo.model_import.decoder_configs import HFLlamaConfig
-from .common import (
-    HUGGINFACE_GENERATION_CONFIG_FILE,
-    HUGGINGFACE_TOKENIZER_FILES,
-    ModelSpec,
-    WeightsType,
-    huggingface_weight_files,
-)
+from .common import ModelSpec
 __all__ = ["HUGGINGFACE_MODELS"]
@@ -19,10 +13,6 @@ HUGGINGFACE_MODELS = [
         quantization=None,
         repo="HuggingFaceTB/SmolLM2-1.7B-Instruct",
         config_type=HFLlamaConfig,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(1),
-        weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
         use_cases=tuple(),
     ),
 ]

lalamo/model_import/model_specs/llama.py CHANGED Viewed

@@ -1,15 +1,6 @@
-from dataclasses import replace
+from lalamo.model_import.decoder_configs import HFLlamaConfig
-from lalamo.model_import.configs import HFLlamaConfig
-from .common import (
-    HUGGINFACE_GENERATION_CONFIG_FILE,
-    HUGGINGFACE_TOKENIZER_FILES,
-    ModelSpec,
-    TokenizerFileSpec,
-    WeightsType,
-    huggingface_weight_files,
-)
+from .common import ModelSpec
 __all__ = ["LLAMA_MODELS"]
@@ -22,23 +13,12 @@ LLAMA31 = [
         quantization=None,
         repo="meta-llama/Llama-3.1-8B-Instruct",
         config_type=HFLlamaConfig,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(4),
-        weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
         use_cases=tuple(),
     ),
 ]
-def _tokenizer_files_from_another_repo(repo: str) -> tuple[TokenizerFileSpec, ...]:
-    return tuple(
-        replace(spec, repo=repo) for spec in (*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE)
-    )
 LLAMA32 = [
-    # LLAMA
     ModelSpec(
         vendor="Meta",
         family="Llama-3.2",
@@ -47,26 +27,8 @@ LLAMA32 = [
         quantization=None,
         repo="meta-llama/Llama-3.2-1B-Instruct",
         config_type=HFLlamaConfig,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(1),
-        weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
         use_cases=tuple(),
     ),
-    # ModelSpec(
-    #     vendor="Meta",
-    #     family="Llama-3.2",
-    #     name="Llama-3.2-1B-Instruct-QLoRA",
-    #     size="1B",
-    #     quantization=QuantizationMode.UINT4,
-    #     repo="meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8",
-    #     config_type=ETLlamaConfig,
-    #     config_file_name="params.json",
-    #     weights_file_names=("consolidated.00.pth",),
-    #     weights_type=WeightsType.TORCH,
-    #     tokenizer_files=_tokenizer_files_from_another_repo("meta-llama/Llama-3.2-1B-Instruct"),
-    #     use_cases=tuple(),
-    # ),
     ModelSpec(
         vendor="Meta",
         family="Llama-3.2",
@@ -75,26 +37,8 @@ LLAMA32 = [
         quantization=None,
         repo="meta-llama/Llama-3.2-3B-Instruct",
         config_type=HFLlamaConfig,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(2),
-        weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
         use_cases=tuple(),
     ),
-    # ModelSpec(
-    #     vendor="Meta",
-    #     family="Llama-3.2",
-    #     name="Llama-3.2-3B-Instruct-QLoRA",
-    #     size="3B",
-    #     quantization=QuantizationMode.UINT4,
-    #     repo="meta-llama/Llama-3.2-3B-Instruct-QLORA_INT4_EO8",
-    #     config_type=ETLlamaConfig,
-    #     config_file_name="params.json",
-    #     weights_file_names=("consolidated.00.pth",),
-    #     tokenizer_files=_tokenizer_files_from_another_repo("meta-llama/Llama-3.2-3B-Instruct"),
-    #     weights_type=WeightsType.TORCH,
-    #     use_cases=tuple(),
-    # ),
 ]
 LLAMA_MODELS = LLAMA31 + LLAMA32

lalamo/model_import/model_specs/mistral.py CHANGED Viewed

@@ -1,15 +1,11 @@
-from dataclasses import replace
-from lalamo.model_import.configs import HFMistralConfig
+from lalamo.model_import.decoder_configs import HFMistralConfig
 from .common import (
-    HUGGINFACE_GENERATION_CONFIG_FILE,
-    HUGGINGFACE_TOKENIZER_FILES,
+    ConfigMap,
+    FileSpec,
     ModelSpec,
-    TokenizerFileSpec,
     UseCase,
     WeightsType,
-    huggingface_weight_files,
 )
 __all__ = ["MISTRAL_MODELS"]
@@ -23,20 +19,13 @@ CODESTRAL = [
         quantization=None,
         repo="mistral-community/Codestral-22B-v0.1",
         config_type=HFMistralConfig,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(9),
         weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
         use_cases=(UseCase.CODE,),
     ),
 ]
-def _tokenizer_files_from_another_repo(repo: str) -> tuple[TokenizerFileSpec, ...]:
-    return tuple(
-        replace(spec, repo=repo) for spec in (*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE)
-    )
+DEVSTRAL_TOKENIZER_REPO = "mistralai/Mistral-Small-3.1-24B-Base-2503"
 DEVSTRAL = [
     ModelSpec(
@@ -47,10 +36,11 @@ DEVSTRAL = [
         quantization=None,
         repo="mistralai/Devstral-Small-2505",
         config_type=HFMistralConfig,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(10),
-        weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=_tokenizer_files_from_another_repo("mistralai/Mistral-Small-3.1-24B-Base-2503"),
+        configs=ConfigMap(
+            tokenizer=FileSpec(repo=DEVSTRAL_TOKENIZER_REPO, filename="tokenizer.json"),
+            tokenizer_config=FileSpec(repo=DEVSTRAL_TOKENIZER_REPO, filename="tokenizer_config.json"),
+            generation_config=FileSpec(repo=DEVSTRAL_TOKENIZER_REPO, filename="generation_config.json"),
+        ),
         use_cases=(UseCase.CODE,),
     ),
 ]

lalamo/model_import/model_specs/pleias.py CHANGED Viewed

@@ -1,15 +1,10 @@
-from lalamo.model_import.configs import HFLlamaConfig
+from lalamo.model_import.decoder_configs import HFLlamaConfig
-from .common import (
-    HUGGINFACE_GENERATION_CONFIG_FILE,
-    HUGGINGFACE_TOKENIZER_FILES,
-    ModelSpec,
-    WeightsType,
-    huggingface_weight_files,
-)
+from .common import ModelSpec
 __all__ = ["PLEIAS_MODELS"]
 PLEIAS_MODELS = [
     ModelSpec(
         vendor="PleIAs",
@@ -19,10 +14,5 @@ PLEIAS_MODELS = [
         quantization=None,
         repo="PleIAs/Pleias-RAG-1B",
         config_type=HFLlamaConfig,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(1),
-        weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
-        use_cases=tuple(),
     ),
 ]

lalamo/model_import/model_specs/polaris.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from lalamo.model_import.configs import HFQwen3Config
+from lalamo.model_import.decoder_configs import HFQwen3Config
-from .common import HUGGINGFACE_TOKENIZER_FILES, ModelSpec, TokenizerFileSpec, WeightsType, huggingface_weight_files
+from .common import ConfigMap, FileSpec, ModelSpec
 __all__ = ["POLARIS_MODELS"]
@@ -13,10 +13,8 @@ POLARIS_MODELS = [
         quantization=None,
         repo="POLARIS-Project/Polaris-4B-Preview",
         config_type=HFQwen3Config,
-        config_file_name="config.json",
-        weights_file_names=huggingface_weight_files(2),
-        weights_type=WeightsType.SAFETENSORS,
-        tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, TokenizerFileSpec(repo=None, filename="chat_template.jinja")),
-        use_cases=tuple(),
+        configs=ConfigMap(
+            chat_template=FileSpec("chat_template.jinja"),
+        ),
     ),
 ]

lalamo 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl

lalamo 0.2.7py3-none-any.whl → 0.3.0py3-none-any.whl