PyPI - lalamo - Versions diffs - 0.5.17__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

lalamo 0.5.17py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

lalamo/__init__.py +1 -1
lalamo/commands.py +69 -17
lalamo/common.py +14 -1
lalamo/main.py +148 -27
lalamo/message_processor.py +4 -1
lalamo/model_import/common.py +8 -17
lalamo/model_import/decoder_configs/huggingface/lfm2.py +14 -4
lalamo/model_import/decoder_configs/huggingface/llamba.py +2 -2
lalamo/model_import/decoder_configs/huggingface/modern_bert.py +2 -2
lalamo/model_import/huggingface_generation_config.py +21 -3
lalamo/model_import/loaders/executorch.py +2 -2
lalamo/model_import/loaders/huggingface.py +3 -3
lalamo/model_import/model_specs/common.py +4 -2
lalamo/model_import/model_specs/lfm2.py +41 -9
lalamo/models/language_model.py +7 -6
lalamo/modules/activations.py +1 -1
lalamo/modules/classifier.py +11 -24
lalamo/modules/common.py +4 -1
lalamo/modules/decoder.py +5 -11
lalamo/modules/embedding.py +25 -62
lalamo/modules/linear.py +19 -33
lalamo/modules/mlp.py +9 -19
lalamo/modules/mlx_interop.py +1 -1
lalamo/modules/rope.py +1 -1
lalamo/modules/token_mixers/__init__.py +1 -1
lalamo/modules/token_mixers/attention.py +9 -27
lalamo/modules/token_mixers/mamba.py +9 -24
lalamo/modules/token_mixers/short_conv.py +5 -12
lalamo/modules/transformer.py +10 -20
lalamo/modules/transformer_layer.py +8 -20
lalamo/registry_abc.py +4 -4
lalamo/sampling.py +14 -0
lalamo/speculator/estimator.py +3 -3
lalamo/speculator/ngram.py +1 -1
{lalamo-0.5.17.dist-info → lalamo-0.6.0.dist-info}/METADATA +1 -1
{lalamo-0.5.17.dist-info → lalamo-0.6.0.dist-info}/RECORD +40 -40
{lalamo-0.5.17.dist-info → lalamo-0.6.0.dist-info}/WHEEL +0 -0
{lalamo-0.5.17.dist-info → lalamo-0.6.0.dist-info}/entry_points.txt +0 -0
{lalamo-0.5.17.dist-info → lalamo-0.6.0.dist-info}/licenses/LICENSE +0 -0
{lalamo-0.5.17.dist-info → lalamo-0.6.0.dist-info}/top_level.txt +0 -0

lalamo/__init__.py CHANGED Viewed

@@ -27,7 +27,7 @@ from lalamo.speculator import (
     SpeculatorTrainingEvent,
 )
-__version__ = "0.5.17"
+__version__ = "0.6.0"
 __all__ = [
     "AssistantMessage",

lalamo/commands.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-from collections.abc import Callable
+from collections.abc import Callable, Iterable
 from dataclasses import dataclass
 from enum import Enum
 from itertools import chain
@@ -10,7 +10,7 @@ from jaxtyping import DTypeLike
 from lalamo.common import flatten_parameters
 from lalamo.data import import_hf_parquet
 from lalamo.data.lalamo_completions import LalamoCompletion
-from lalamo.message_processor import UserMessage
+from lalamo.message_processor import Message
 from lalamo.model_import import ModelMetadata, ModelSpec, import_model
 from lalamo.model_import.common import (
     DownloadingFileEvent,
@@ -41,8 +41,6 @@ class ConversionCallbacks:
     output_dir: Path
     precision: Precision | None
     context_length: int | None
-    include_traces: bool
-    message_for_trace: str | None
     def started(self) -> None:
         pass
@@ -74,16 +72,12 @@ def convert(
     output_dir: Path,
     precision: Precision | None = None,
     context_length: int | None = None,
-    include_traces: bool = False,
-    message_for_trace: str | None = None,
     callbacks_type: Callable[
         [
             ModelSpec,
             Path,
             Precision | None,
             int | None,
-            bool,
-            str | None,
         ],
         ConversionCallbacks,
     ] = ConversionCallbacks,
@@ -93,8 +87,6 @@ def convert(
         output_dir,
         precision,
         context_length,
-        include_traces,
-        message_for_trace,
     )
     if precision is not None:
@@ -127,13 +119,6 @@ def convert(
     callbacks.saving_model()
     output_dir.mkdir(parents=True, exist_ok=True)
-    if include_traces:
-        message = None if message_for_trace is None else [UserMessage(content=message_for_trace)]
-        result = model.record_trace(message)
-        traces = flatten_parameters(result.export())
-        with Path(output_dir / "traces.safetensors").open("wb") as fd:
-            safe_write(fd, traces)
     model.message_processor.tokenizer.save(str(output_dir / "tokenizer.json"))
     weights = flatten_parameters(model.export_weights())
     del model
@@ -148,6 +133,73 @@ def convert(
     callbacks.finished_saving_model()
+@dataclass
+class TraceCallbacks:
+    model_path: Path
+    output_path: Path
+    messages: Iterable[Message] | None
+    def output_exists(self) -> None:
+        raise RuntimeError(f"{self.output_path=} already exists, refusing to overwrite!")
+    def started(self) -> None:
+        pass
+    def loading_model(self) -> None:
+        pass
+    def finished_loading_model(self) -> None:
+        pass
+    def tracing_model(self) -> None:
+        pass
+    def finished_tracing_model(self) -> None:
+        pass
+    def saving_trace(self) -> None:
+        pass
+    def finished_saving_trace(self) -> None:
+        pass
+def trace(
+    model_path: Path,
+    output_path: Path,
+    messages: Iterable[Message] | None = None,
+    callbacks_type: Callable[
+        [
+            Path,
+            Path,
+            Iterable[Message] | None,
+        ],
+        TraceCallbacks,
+    ] = TraceCallbacks,
+) -> None:
+    callbacks = callbacks_type(model_path, output_path, messages)
+    if output_path.exists():
+        callbacks.output_exists()
+    callbacks.started()
+    callbacks.loading_model()
+    model = LanguageModelConfig.load_model(model_path)
+    callbacks.finished_loading_model()
+    callbacks.tracing_model()
+    result = model.record_trace(messages)
+    callbacks.finished_tracing_model()
+    callbacks.saving_trace()
+    traces = flatten_parameters(result.export())
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with Path(output_path).open("wb") as fd:
+        safe_write(fd, traces)
+    callbacks.finished_saving_trace()
 @dataclass
 class EstimateBatchsizeCallbacks:
     model_path: Path

lalamo/common.py CHANGED Viewed

@@ -15,6 +15,8 @@ __all__ = [
     "ParameterTree",
     "dummy_array",
     "flatten_parameters",
+    "require_array",
+    "require_tree",
     "unflatten_parameters",
 ]
@@ -29,6 +31,16 @@ type ParameterTree[ArrayType: ArrayLike] = (
 )
+def require_array[ArrayType: ArrayLike](value: ArrayType | ParameterTree[ArrayType]) -> ArrayType:
+    assert not isinstance(value, (Mapping, Sequence))
+    return value
+def require_tree[ArrayType: ArrayLike](value: ArrayType | ParameterTree[ArrayType]) -> ParameterTree[ArrayType]:
+    assert not isinstance(value, (Array, ShapeDtypeStruct))
+    return value
 def dummy_array(shape: int | tuple[int, ...], dtype: DTypeLike) -> Array:
     if isinstance(shape, int):
         shape = (shape,)
@@ -40,9 +52,10 @@ def flatten_parameters[ArrayType: ArrayLike](nested_parameters: ParameterTree[Ar
     if not isinstance(nested_parameters, Mapping):
         nested_parameters = {str(i): value for i, value in enumerate(nested_parameters)}
     for key, value in nested_parameters.items():
+        value = cast("ArrayType | ParameterTree[ArrayType]", value)
         key_path = ParameterPath(key)
         if isinstance(value, (Array, ShapeDtypeStruct)):
-            result[key_path] = value
+            result[key_path] = cast("ArrayType", value)
         else:
             update: dict[str, ArrayType] = {
                 str(key_path / subkey): subvalue for subkey, subvalue in flatten_parameters(value).items()

lalamo/main.py CHANGED Viewed

@@ -36,11 +36,13 @@ from lalamo.commands import (
     ConversionCallbacks,
     EstimateBatchsizeCallbacks,
     Precision,
+    TraceCallbacks,
     TrainCallbacks,
 )
 from lalamo.commands import collect_traces as _collect_traces
 from lalamo.commands import convert as _convert
 from lalamo.commands import estimate_batchsize as _estimate_batchsize
+from lalamo.commands import trace as _trace
 from lalamo.commands import train as _train
 from lalamo.data.lalamo_completions import LalamoCompletion
 from lalamo.message_processor import UserMessage
@@ -83,7 +85,7 @@ class ModelParser(ParamType):
                 f"\n\nUse the `{SCRIPT_NAME} list-models` command to see the list of currently supported models.",
             )
             error_message = "".join(error_message_parts)
-            self.fail(error_message, param, ctx)
+            return self.fail(error_message, param, ctx)
         return result
@@ -111,10 +113,18 @@ def chat(
             metavar="MODEL_PATH",
         ),
     ],
+    message: Annotated[
+        str | None,
+        Option(
+            help="Message for non-interactive mode",
+            show_default="None, run interactively",
+        ),
+    ] = None,
 ) -> None:
     with Progress(
         SpinnerColumn(),
         TextColumn("[progress.description]{task.description}"),
+        console=err_console,
         transient=True,
     ) as progress:
         loading_task = progress.add_task("🚀 [cyan]Loading model...[/cyan]")
@@ -123,21 +133,28 @@ def chat(
         warmup_task = progress.add_task("🔥 Warming up compilation cache...")
         list(model.stream_reply_text([UserMessage("")], max_output_length=1))
         progress.remove_task(warmup_task)
-    console.print(f"🤖 Chatting with [blue]{model_path}[/blue]:")
-    messages = []
-    while True:
-        user_text = console.input("[cyan]user> [/cyan]")
-        user_message = UserMessage(user_text)
-        messages.append(user_message)
-        console.print("[red]assistant> [/red]", end="")
-        model_response_tokens = []
-        for token in model.stream_reply_text(messages):
+    if message is None:
+        console.print(f"🤖 Chatting with [blue]{model_path}[/blue]:")
+        messages = []
+        while True:
+            user_text = console.input("[cyan]user> [/cyan]")
+            user_message = UserMessage(user_text)
+            messages.append(user_message)
+            console.print("[red]assistant> [/red]", end="")
+            model_response_tokens = []
+            for token in model.stream_reply_text(messages):
+                console.print(token, end="")
+                model_response_tokens.append(token)
+            console.print()
+            model_response_text = "".join(model_response_tokens)
+            messages.append(model.message_processor.parse_response(model_response_text))
+    else:
+        for token in model.stream_reply_text([UserMessage(message)]):
             console.print(token, end="")
-            model_response_tokens.append(token)
         console.print()
-        model_response_text = "".join(model_response_tokens)
-        messages.append(model.message_processor.parse_response(model_response_text))
 @app.command(help="Classify given message with a Classifier type of model.")
@@ -178,6 +195,7 @@ class CliConversionCallbacks(ConversionCallbacks):
     overwrite: bool = False
     stack: ExitStack = field(default_factory=ExitStack)
+    progress: Progress | None = None
     downloading_tasks: dict[FileSpec, TaskID] = field(default_factory=dict)
     initializing_task: TaskID | None = None
     saving_task: TaskID | None = None
@@ -211,23 +229,33 @@ class CliConversionCallbacks(ConversionCallbacks):
         shutil.rmtree(self.output_dir)
     def downloading(self, file_spec: FileSpec) -> None:
+        assert self.progress is not None
         self.downloading_tasks[file_spec] = self.progress.add_task(f"Retrieving {file_spec.filename}...")
     def finished_downloading(self, file_spec: FileSpec) -> None:
+        assert self.progress is not None
         self.progress.remove_task(self.downloading_tasks[file_spec])
     def initializing_model(self) -> None:
+        assert self.progress is not None
         self.initializing_task = self.progress.add_task("Initializing model...")
     def finished_initializing_model(self) -> None:
+        assert self.progress is not None
         assert self.initializing_task is not None
         self.progress.remove_task(self.initializing_task)
     def saving_model(self) -> None:
+        assert self.progress is not None
         self.saving_task = self.progress.add_task(f"💾 Saving the model to {self.output_dir}")
     def finished_saving_model(self) -> None:
+        assert self.progress is not None
         assert self.saving_task is not None
         self.progress.remove_task(self.saving_task)
@@ -272,24 +300,12 @@ def convert(
             show_default="Model's native maximum context length.",
         ),
     ] = None,
-    include_traces: Annotated[
-        bool,
-        Option(
-            help="Export activation traces for debugging purposes.",
-        ),
-    ] = False,
     overwrite: Annotated[
         bool,
         Option(
             help="Overwrite existing model files.",
         ),
     ] = False,
-    message_for_trace: Annotated[
-        str | None,
-        Option(
-            help="Text message to use as prompt when recording trace",
-        ),
-    ] = None,
 ) -> None:
     if output_dir is None:
         output_dir = DEFAULT_OUTPUT_DIR / model_repo.name
@@ -299,12 +315,117 @@ def convert(
         output_dir,
         precision,
         context_length,
-        include_traces,
-        message_for_trace,
         partial(CliConversionCallbacks, overwrite=overwrite),
     )
+@dataclass
+class CliTraceCallbacks(TraceCallbacks):
+    overwrite: bool = False
+    stack: ExitStack = field(default_factory=ExitStack)
+    progress: Progress | None = None
+    loading_task: TaskID | None = None
+    tracing_task: TaskID | None = None
+    saving_task: TaskID | None = None
+    def output_exists(self) -> None:
+        if not self.overwrite and not Confirm().ask(
+            rf"⚠️ Output [cyan]{self.output_path}[/cyan] already exists."
+            r" Do you want to overwrite it?",
+        ):
+            raise Exit
+        self.output_path.unlink()
+    def started(self) -> None:
+        console.print(f"🔍 Tracing [cyan]{self.model_path}[/cyan]")
+        self.progress = self.stack.enter_context(
+            Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                transient=True,
+            ),
+        )
+    def loading_model(self) -> None:
+        assert self.progress is not None
+        self.loading_task = self.progress.add_task("🧠 Loading model...")
+    def finished_loading_model(self) -> None:
+        assert self.progress is not None
+        assert self.loading_task is not None
+        self.progress.remove_task(self.loading_task)
+    def tracing_model(self) -> None:
+        assert self.progress is not None
+        self.tracing_task = self.progress.add_task("🔍 Recording trace...")
+    def finished_tracing_model(self) -> None:
+        assert self.progress is not None
+        assert self.tracing_task is not None
+        self.progress.remove_task(self.tracing_task)
+    def saving_trace(self) -> None:
+        assert self.progress is not None
+        self.saving_task = self.progress.add_task(f"💾 Saving trace to {self.output_path}")
+    def finished_saving_trace(self) -> None:
+        assert self.progress is not None
+        assert self.saving_task is not None
+        self.progress.remove_task(self.saving_task)
+        self.stack.close()
+        console.print(f"💾 Trace saved to [cyan]{self.output_path}[/cyan]")
+@app.command(help="Trace a model.")
+def trace(
+    model_path: Annotated[
+        Path,
+        Argument(
+            help="Path to the model directory.",
+            metavar="MODEL_PATH",
+        ),
+    ],
+    output_path: Annotated[
+        Path | None,
+        Option(
+            help="Path to save the trace to.",
+            show_default="${MODEL_PATH}/traces.safetensors",
+        ),
+    ] = None,
+    overwrite: Annotated[
+        bool,
+        Option(
+            help="Overwrite existing trace file.",
+        ),
+    ] = False,
+    message: Annotated[
+        str | None,
+        Option(
+            help="Text message to use as prompt when recording trace",
+        ),
+    ] = None,
+) -> None:
+    if output_path is None:
+        output_path = model_path / "traces.safetensors"
+    messages = None if message is None else [UserMessage(content=message)]
+    _trace(
+        model_path,
+        output_path,
+        messages,
+        partial(CliTraceCallbacks, overwrite=overwrite),
+    )
 def _model_size_string_to_int(
     size_str: str,
     _regex: re.Pattern = re.compile(r"(?P<number>(\d+)(\.\d*)?)(?P<suffix>[KMBT])"),

lalamo/message_processor.py CHANGED Viewed

@@ -169,7 +169,10 @@ class MessageProcessor:
     def __post_init__(self) -> None:
         if self.output_parser_regex is not None:
             all_fields = AssistantMessage.__dataclass_fields__
-            required_fields = {k: v for k, v in all_fields.items() if v.type == v.type | None}
+            # NOTE: str type annotations are assumed to be required
+            required_fields = {
+                k: v for k, v in all_fields.items() if isinstance(v.type, str) or v.type == (v.type | None)
+            }
             named_groups = self.output_parser_regex.groupindex
             invalid_groups = set(named_groups) - set(all_fields)
             if invalid_groups:

lalamo/model_import/common.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 from collections import ChainMap
 from collections.abc import Callable
 from contextlib import ExitStack
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from pathlib import Path
 from typing import NamedTuple
@@ -20,7 +20,7 @@ from lalamo.quantization import QuantizationMode
 from lalamo.utils import process_chat_template
 from .decoder_configs import ForeignClassifierConfig, ForeignConfig, ForeignLMConfig
-from .huggingface_generation_config import HFGenerationConfig
+from .huggingface_generation_config import HFGenerationConfig, _policy_from_hf_config
 from .huggingface_tokenizer_config import HFTokenizerConfig
 from .model_specs import REPO_TO_MODEL, FileSpec, ModelSpec, ModelType, UseCase
 from .model_specs.common import JSONFieldSpec
@@ -34,6 +34,7 @@ __all__ = [
     "ModelSpec",
     "ModelType",
     "StatusEvent",
+    "download_file",
     "import_model",
 ]
@@ -239,24 +240,14 @@ def _import_language_model(
     stop_token_ids = tuple(foreign_decoder_config.eos_token_ids)
-    if model_spec.configs.generation_config is not None:
+    if isinstance(model_spec.configs.generation_config, GenerationConfig):
+        generation_config = replace(model_spec.configs.generation_config, stop_token_ids=stop_token_ids)
+    elif isinstance(model_spec.configs.generation_config, FileSpec):
         hf_generation_config_file = download_file(model_spec.configs.generation_config, model_spec.repo)
         hf_generation_config = HFGenerationConfig.from_json(hf_generation_config_file)
-        generation_config = GenerationConfig(
-            stop_token_ids=stop_token_ids,
-            temperature=hf_generation_config.temperature,
-            top_p=hf_generation_config.top_p,
-            top_k=hf_generation_config.top_k,
-            banned_tokens=None,
-        )
+        generation_config = _policy_from_hf_config(hf_generation_config, stop_token_ids)
     else:
-        generation_config = GenerationConfig(
-            stop_token_ids=stop_token_ids,
-            temperature=None,
-            top_p=None,
-            top_k=None,
-            banned_tokens=None,
-        )
+        generation_config = GenerationConfig(stop_token_ids)
     language_model_config = LanguageModelConfig(
         model_config=decoder.config,

lalamo/model_import/decoder_configs/huggingface/lfm2.py CHANGED Viewed

@@ -2,6 +2,7 @@ from collections.abc import Mapping
 from dataclasses import dataclass
 from typing import Literal
+import jax.numpy as jnp
 from jaxtyping import DTypeLike
 from lalamo.modules import (
@@ -50,7 +51,6 @@ class HFLFM2Config(HuggingFaceLMConfig):
     conv_L_cache: int  # noqa: N815
     conv_bias: bool
     conv_dim: int
-    conv_dim_out: int
     conv_use_xavier_init: bool
     eos_token_id: int
     hidden_size: int
@@ -64,13 +64,15 @@ class HFLFM2Config(HuggingFaceLMConfig):
     num_key_value_heads: int
     pad_token_id: int
     rope_theta: float
-    torch_dtype: Literal["bfloat16"]
     transformers_version: str
     use_cache: bool
     use_pos_enc: bool
     vocab_size: int
+    dtype: Literal["bfloat16", "float16", "float32"] | None = None
+    torch_dtype: Literal["bfloat16", "float16", "float32"] | None = None
     intermediate_size: int | None = None
+    conv_dim_out: int | None = None
     layer_types: list[Literal["conv", "full_attention"]] | None = None
     full_attn_idxs: list[int] | None = None
     tie_embedding: bool = True
@@ -79,6 +81,14 @@ class HFLFM2Config(HuggingFaceLMConfig):
     quantization: QuantizationConfig | None = None
     quantization_config: QuantizationConfig | None = None
+    @property
+    def default_precision(self) -> DTypeLike:
+        assert self.dtype is not None or self.torch_dtype is not None, (
+            "at least one of dtype or torch_dtype must be specified"
+        )
+        return jnp.dtype(self.dtype or self.torch_dtype)
     def to_decoder_config(
         self,
         context_length: int | None,
@@ -200,8 +210,8 @@ class HFLFM2Config(HuggingFaceLMConfig):
             subtract_mean=False,
         )
-        if self.intermediate_size is not None:
-            hidden_dim = self.intermediate_size
+        if not self.block_auto_adjust_ff_dim:
+            hidden_dim = self.intermediate_size or self.block_ff_dim
         else:
             hidden_dim_adjusted = self.block_ff_dim * self.block_ffn_dim_multiplier * (2 / 3)
             hidden_dim = int(

lalamo/model_import/decoder_configs/huggingface/llamba.py CHANGED Viewed

@@ -76,7 +76,7 @@ class HFLlambaConfig(HuggingFaceLMConfig):
                 logit_soft_cap=None,
                 group_size=int(metadata_dict["quantization_kwargs.group_size"]),
                 embedding_quantization_mode=QuantizationMode.from_num_bits(
-                    int(metadata_dict["quantization_kwargs.bits"])
+                    int(metadata_dict["quantization_kwargs.bits"]),
                 ),
                 activation_quantization_mode=None,
                 activation_precision=activation_precision,
@@ -107,7 +107,7 @@ class HFLlambaConfig(HuggingFaceLMConfig):
             linear_config = MLXQuantizedLinearConfig(
                 group_size=int(metadata_dict["quantization_kwargs.group_size"]),
                 weight_quantization_mode=QuantizationMode.from_num_bits(
-                    int(metadata_dict["quantization_kwargs.bits"])
+                    int(metadata_dict["quantization_kwargs.bits"]),
                 ),
                 activation_quantization_mode=None,
                 activation_precision=activation_precision,

lalamo/model_import/decoder_configs/huggingface/modern_bert.py CHANGED Viewed

@@ -41,7 +41,7 @@ def activation_from_str(activation: str) -> type[Activation]:
         return supported_activations[activation]
     raise ValueError(
-        f"Only activations from the following list are supported by Classifier: {supported_activations.keys()}"
+        f"Only activations from the following list are supported by Classifier: {supported_activations.keys()}",
     )
@@ -97,7 +97,7 @@ class ModernBERTConfig(HuggingFaceClassifierConfig):
         result = [None] * num_layers
         for index in range(len(result)):
             if index % global_attn_every_n_layers != 0:
-                result[index] = self.local_attention  # type: ignore
+                result[index] = self.local_attention
             else:
                 pass
         return tuple(result)

lalamo/model_import/huggingface_generation_config.py CHANGED Viewed

@@ -5,7 +5,9 @@ from typing import ClassVar
 import cattrs
-__all__ = ["HFGenerationConfig"]
+from lalamo.models import GenerationConfig
+__all__ = ["HFGenerationConfig", "_policy_from_hf_config"]
 @dataclass(frozen=True)
@@ -27,10 +29,11 @@ class HFGenerationConfig:
     cache_implementation: str | None = None  # “hybrid” for Gemma 3/2
     # -------- sampling strategy -------------
-    do_sample: bool | None = None
+    do_sample: bool | None = False
     temperature: float | None = None
+    min_p: float | None = None
     top_p: float | None = None
-    top_k: int | None = None
+    top_k: int | None = 50
     repetition_penalty: float | None = None
     # -------- length limits -----------------
@@ -42,3 +45,18 @@ class HFGenerationConfig:
         with open(json_path) as f:
             config = json.load(f)
         return cls._converter.structure(config, cls)
+def _policy_from_hf_config(
+    hf_config: HFGenerationConfig,
+    stop_token_ids: tuple[int, ...] = (),
+    banned_tokens: tuple[int, ...] | None = None,
+) -> GenerationConfig:
+    return GenerationConfig(
+        stop_token_ids=stop_token_ids,
+        temperature=hf_config.temperature,
+        top_k=hf_config.top_k,
+        top_p=hf_config.top_p,
+        min_p=hf_config.min_p,
+        banned_tokens=banned_tokens,
+    )

lalamo/model_import/loaders/executorch.py CHANGED Viewed

@@ -97,7 +97,7 @@ def load_mlp(module: DenseMLP, weights_dict: Mapping[str, Array], path: Paramete
     fused_up_gate_params = merge_linear_params([up_proj_params, gate_proj_params])
     return load_parameters(
-        lambda m: (*params_selector(m.up_projection), *params_selector(m.down_projection)),  # type: ignore
+        lambda m: (*params_selector(m.up_projection), *params_selector(m.down_projection)),
         module,
         (*fused_up_gate_params, *down_proj_params),
     )
@@ -177,7 +177,7 @@ def load_attention(
     qkv_params = merge_linear_params([q_params, k_params, v_params])
     return load_parameters(
-        lambda m: (*params_selector(m.qkv_projection), *params_selector(m.out_projection)),  # type: ignore
+        lambda m: (*params_selector(m.qkv_projection), *params_selector(m.out_projection)),
         module,
         (*qkv_params, *out_params),
     )

lalamo 0.5.17__py3-none-any.whl → 0.6.0__py3-none-any.whl

lalamo 0.5.17py3-none-any.whl → 0.6.0py3-none-any.whl