PyPI - lalamo - Versions diffs - 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

lalamo 0.3.3py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

lalamo/__init__.py +20 -5
lalamo/data/__init__.py +8 -0
lalamo/data/huggingface_message.py +38 -0
lalamo/data/lalamo_completions.py +43 -0
lalamo/data/utils.py +8 -0
lalamo/language_model.py +152 -69
lalamo/main.py +271 -43
lalamo/message_processor.py +11 -1
lalamo/model_import/common.py +17 -7
lalamo/model_import/decoder_configs/__init__.py +3 -0
lalamo/model_import/decoder_configs/executorch.py +12 -6
lalamo/model_import/decoder_configs/huggingface/__init__.py +2 -0
lalamo/model_import/decoder_configs/huggingface/common.py +1 -3
lalamo/model_import/decoder_configs/huggingface/gemma2.py +11 -5
lalamo/model_import/decoder_configs/huggingface/gemma3.py +14 -5
lalamo/model_import/decoder_configs/huggingface/gpt_oss.py +195 -0
lalamo/model_import/decoder_configs/huggingface/llama.py +38 -8
lalamo/model_import/decoder_configs/huggingface/mistral.py +12 -6
lalamo/model_import/decoder_configs/huggingface/qwen2.py +12 -6
lalamo/model_import/decoder_configs/huggingface/qwen3.py +12 -6
lalamo/model_import/huggingface_tokenizer_config.py +1 -4
lalamo/model_import/loaders/executorch.py +10 -9
lalamo/model_import/loaders/huggingface.py +104 -9
lalamo/model_import/loaders/utils.py +92 -0
lalamo/model_import/model_specs/__init__.py +4 -1
lalamo/model_import/model_specs/common.py +15 -12
lalamo/model_import/model_specs/gpt_oss.py +21 -0
lalamo/modules/__init__.py +35 -7
lalamo/modules/activations.py +24 -14
lalamo/modules/attention.py +73 -20
lalamo/modules/common.py +8 -57
lalamo/modules/decoder.py +48 -34
lalamo/modules/decoder_layer.py +57 -43
lalamo/modules/embedding.py +13 -19
lalamo/modules/kv_cache.py +53 -16
lalamo/modules/linear.py +260 -79
lalamo/modules/mlp.py +395 -23
lalamo/modules/normalization.py +2 -3
lalamo/modules/rope.py +32 -21
lalamo/modules/utils.py +10 -0
lalamo/speculator/__init__.py +11 -0
lalamo/speculator/common.py +22 -0
lalamo/speculator/inference.py +75 -0
lalamo/speculator/ngram.py +154 -0
lalamo/speculator/utils.py +52 -0
lalamo/utils.py +27 -0
{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/METADATA +11 -4
lalamo-0.4.0.dist-info/RECORD +71 -0
lalamo-0.3.3.dist-info/RECORD +0 -59
{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/WHEEL +0 -0
{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/entry_points.txt +0 -0
{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/licenses/LICENSE +0 -0
{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/top_level.txt +0 -0

lalamo/speculator/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from .common import Speculator
+from .inference import inference_collect_traces
+from .ngram import NGramSpeculator
+from .utils import train_speculator
+__all__ = [
+    "NGramSpeculator",
+    "Speculator",
+    "inference_collect_traces",
+    "train_speculator",
+]

lalamo/speculator/common.py ADDED Viewed

@@ -0,0 +1,22 @@
+from abc import abstractmethod
+from collections.abc import Iterable
+from typing import Self
+class Speculator:
+    @abstractmethod
+    def train(self, token_ids: Iterable[int], token_logits: Iterable[dict[int, float]]) -> None:
+        raise NotImplementedError
+    @abstractmethod
+    def probs(self, seq: Iterable[int]) -> dict[int, float]:
+        raise NotImplementedError
+    @abstractmethod
+    def serialize(self) -> bytes:
+        raise NotImplementedError
+    @classmethod
+    @abstractmethod
+    def deserialize(cls, blob: bytes) -> Self:
+        raise NotImplementedError

lalamo/speculator/inference.py ADDED Viewed

@@ -0,0 +1,75 @@
+from collections.abc import Callable, Iterable
+from itertools import batched, chain
+from typing import NamedTuple
+import jax.numpy as jnp
+from lalamo.data.lalamo_completions import LalamoCompletion
+from lalamo.data.utils import get_prefixes_ending_in_user_message
+from lalamo.language_model import LanguageModel
+from lalamo.message_processor import Message
+class CollectTracesEvent(NamedTuple):
+    sequences_processed: int
+    tokens_generated: int
+def inference_collect_traces(
+    model: LanguageModel,
+    conversations: Iterable[Iterable[Message]],
+    num_top_logits_to_collect: int = 8,
+    batch_size: int = 1,
+    max_input_length: int = 1024,
+    max_output_length: int = 1024,
+    tokens_to_generate: int | None = None,
+    progress_callback: Callable[[CollectTracesEvent], None] | None = None,
+) -> Iterable[LalamoCompletion]:
+    prefixes = chain.from_iterable(map(get_prefixes_ending_in_user_message, conversations))
+    tokenized_prefixes = map(model.message_processor.tokenize_request, prefixes)
+    filtered_prefixes = filter(lambda conv: len(conv) <= max_input_length, tokenized_prefixes)
+    tokens_generated, sequences_processed = 0, 0
+    for batch in batched(filtered_prefixes, n=batch_size):
+        length_without_padding = jnp.array(list(map(len, batch)))
+        max_len = max(map(len, batch))
+        padded = jnp.array(
+            [jnp.pad(jnp.array(tokens), (0, max_len - len(tokens)), constant_values=0) for tokens in batch],
+        )
+        generated = model.generate_tokens(
+            padded,
+            prompt_lengths_without_padding=length_without_padding,
+            max_output_length=max_output_length,
+            num_top_logits_to_return=num_top_logits_to_collect,
+        )
+        assert generated.top_k_token_ids is not None and generated.top_k_token_logits is not None
+        for conv_idx in range(batch_size):
+            token_ids = generated.token_ids[conv_idx].tolist()
+            seqlen = next((i + 1 for i, t in enumerate(token_ids) if t in model.stop_token_ids), len(token_ids))
+            if tokens_to_generate is not None:
+                seqlen = min(seqlen, tokens_to_generate - tokens_generated)
+            tokens_generated += seqlen
+            sequences_processed += 1
+            token_ids = token_ids[:seqlen]
+            token_logits_ids = generated.top_k_token_ids[conv_idx, : len(token_ids)].tolist()
+            token_logits_values = generated.top_k_token_logits[conv_idx, : len(token_ids)].tolist()
+            token_logits = [
+                dict(zip(keys, values, strict=True))
+                for keys, values in zip(token_logits_ids, token_logits_values, strict=True)
+            ]
+            yield LalamoCompletion(batch[conv_idx], token_ids, token_logits)
+            if tokens_to_generate is not None and tokens_generated >= tokens_to_generate:
+                break
+        if progress_callback is not None:
+            progress_callback(CollectTracesEvent(sequences_processed, tokens_generated))
+        if tokens_to_generate is not None and tokens_generated >= tokens_to_generate:
+            break

lalamo/speculator/ngram.py ADDED Viewed

@@ -0,0 +1,154 @@
+import struct
+from array import array
+from collections.abc import Iterable
+from dataclasses import dataclass
+from itertools import chain, repeat, tee
+from math import exp
+from typing import Self
+import xxhash
+from .common import Speculator
+# This is not exactly randomly distributed if size is not a power of two.
+# Shouldn't matter in practice though because size <<< 2**64
+def seqhash(tokens: Iterable[int], size: int) -> int:
+    tokens = list(tokens)
+    assert size <= 2**64
+    if len(tokens) > 0:
+        packed = struct.pack("<" + "I" * len(tokens), *tokens)
+    else:
+        packed = b""
+    return xxhash.xxh3_64_intdigest(packed) % size
+def padded_sliding_window(seq: Iterable[int], size:int, pad:int) -> Iterable[tuple[int, ...]]:
+    seqs = tee(seq, size)
+    pads = tuple(repeat(pad, size - i) for i in range(size))
+    padded_seqs = tuple(chain(pad, seq) for pad, seq in zip(pads, seqs, strict=True))
+    return zip(*padded_seqs, strict=False)
+def softmax(logits: Iterable[float]) -> list[float]:
+    logits = list(logits)
+    log_max = max(logits)
+    exp_logs = [exp(logit - log_max) for logit in logits]
+    exp_log_sum = sum(exp_logs)
+    return [exp_log / exp_log_sum for exp_log in exp_logs]
+def online_mean(old_mean: float, sample: float, new_count: int) -> float:
+    return old_mean + (sample - old_mean) / new_count
+def update_probs(old_mean: dict[int, float], sample: dict[int, float], new_count: int, top_k: int) -> dict[int, float]:
+    all_keys = set(old_mean.keys()).union(sample.keys())
+    new_probs_all = {k: online_mean(old_mean.get(k, 0), sample.get(k, 0), new_count) for k in all_keys}
+    new_probs_top_k = dict(sorted(new_probs_all.items(), key=lambda x: (-x[1], x[0]))[:top_k])
+    new_probs_sum = sum(new_probs_top_k.values())
+    new_probs_norm = {k: v / new_probs_sum for k, v in new_probs_top_k.items()}
+    return new_probs_norm
+@dataclass(frozen=True, eq=False)
+class NGramSpeculator(Speculator):
+    hashtable_size: int
+    ngram_k: int
+    ngram_n: int
+    ngram_pad: int
+    # "interior mutability" here.
+    # dataclass field default_factory doesn't pass self so None here + post init as a workaround
+    ngram_keys: array[int]
+    ngram_values: array[float]
+    ngram_counts: array[int]
+    def __post_init__(self) -> None:
+        if not self.hashtable_size > 0:
+            raise ValueError(f"{self.hashtable_size=} (must be > 0)")
+        if not self.ngram_k > 0:
+            raise ValueError(f"{self.ngram_k=} (must be > 0)")
+        if not self.ngram_n > 0:
+            raise ValueError(f"{self.ngram_n=} (must be > 0)")
+    @classmethod
+    def new(cls, hashtable_size: int, ngram_k: int, ngram_n: int, ngram_pad: int = 2**32 - 1) -> Self:
+        return cls(
+            hashtable_size,
+            ngram_k,
+            ngram_n,
+            ngram_pad,
+            array("I", range(ngram_k)) * hashtable_size,
+            array("f", repeat(0, ngram_k)) * hashtable_size,
+            array("I", [0]) * hashtable_size,
+        )
+    def train(self, token_ids: Iterable[int], token_logits: Iterable[dict[int, float]]) -> None:
+        ngram_ctx = self.ngram_n - 1
+        if ngram_ctx > 0:
+            contexts = padded_sliding_window(token_ids, ngram_ctx, self.ngram_pad)
+        else:
+            contexts = repeat(())
+        for ctx, cur_logits in zip(contexts, token_logits, strict=False):
+            ngram_keys, ngram_values, ngram_counts = self._seq_slice(ctx)
+            ngram_counts[0] = new_count = ngram_counts[0] + 1
+            old_mean = dict(zip(ngram_keys, ngram_values, strict=True))
+            sample = dict(zip(cur_logits.keys(), softmax(cur_logits.values()), strict=True))
+            new_probs = update_probs(old_mean, sample, new_count, self.ngram_k)
+            ngram_keys[:] = array("I", new_probs.keys())
+            ngram_values[:] = array("f", new_probs.values())
+    def probs(self, seq: Iterable[int]) -> dict[int, float]:
+        ngram_keys, ngram_values, _ = self._seq_slice(seq)
+        return dict(zip(ngram_keys, ngram_values, strict=True))
+    # python < 3.13 doesn't support memoryview[T], but if T is not specified typechecker incorrectly assumes it's int
+    def _seq_slice(self, seq: Iterable[int]) -> tuple["memoryview[int]", "memoryview[float]", "memoryview[int]"]:
+        seq = list(seq)
+        ngram_ctx = self.ngram_n - 1
+        if ngram_ctx > 0:
+            padded_seq = [*repeat(self.ngram_pad, max(ngram_ctx - len(seq), 0)), *seq[-ngram_ctx :]]
+        else:
+            padded_seq = []
+        seq_hash = seqhash(padded_seq, self.hashtable_size)
+        idx_start = seq_hash * self.ngram_k
+        idx_end = seq_hash * self.ngram_k + self.ngram_k
+        return (
+            memoryview(self.ngram_keys)[idx_start:idx_end],
+            memoryview(self.ngram_values)[idx_start:idx_end], # type: ignore (typechecker bug)
+            memoryview(self.ngram_counts)[seq_hash : (seq_hash + 1)],
+        )
+    def serialize(self) -> bytes:
+        hdr = struct.pack("<4I", self.hashtable_size, self.ngram_k, self.ngram_n, self.ngram_pad)
+        return hdr + bytes(self.ngram_keys) + bytes(self.ngram_values) + bytes(self.ngram_counts)
+    @classmethod
+    def deserialize(cls, blob: bytes) -> Self:
+        offset = 16
+        hashtable_size, ngram_k, ngram_len, ngram_pad = struct.unpack("<4I", blob[:offset])
+        ngram_kv_len = 4 * ngram_k * hashtable_size
+        ngram_keys = array("I", blob[offset : offset + ngram_kv_len])
+        offset += ngram_kv_len
+        ngram_values = array("f", blob[offset : offset + ngram_kv_len])
+        offset += ngram_kv_len
+        ngram_counts_len = 4 * hashtable_size
+        ngram_counts = array("I", blob[offset : offset + ngram_counts_len])
+        offset += ngram_counts_len
+        return cls(hashtable_size, ngram_k, ngram_len, ngram_pad, ngram_keys, ngram_values, ngram_counts)

lalamo/speculator/utils.py ADDED Viewed

@@ -0,0 +1,52 @@
+import random
+from collections.abc import Callable, Iterable
+from typing import NamedTuple
+from lalamo.data.lalamo_completions import LalamoCompletion
+from lalamo.speculator.common import Speculator
+class SpeculatorTrainingEvent(NamedTuple):
+    trained_sequences: int
+    trained_tokens: int
+def train_speculator(
+    speculator: Speculator,
+    traces: Iterable[LalamoCompletion],
+    tokens_to_train: int | None = None,
+    progress_callback: Callable[[SpeculatorTrainingEvent], None] | None = None,
+) -> None:
+    trained_tokens = 0
+    for trained_sequences,trace in enumerate(traces, start=1):
+        if tokens_to_train is not None and trained_tokens + len(trace.completion_token_ids) > tokens_to_train:
+            end = tokens_to_train - trained_tokens
+        else:
+            end = None
+        token_ids = trace.completion_token_ids[:end]
+        token_logits = trace.completion_token_logits[:end]
+        speculator.train(token_ids, token_logits)
+        trained_tokens += len(token_ids)
+        if progress_callback is not None:
+            progress_callback(SpeculatorTrainingEvent(trained_sequences, trained_tokens))
+        if tokens_to_train is not None and trained_tokens >= tokens_to_train:
+            break
+def test_speculator(
+    speculator: Speculator, sequence: Iterable[int] = [], max_completion_length: int = 32,
+) -> list[int]:
+    sequence = list(sequence)
+    for _ in range(max_completion_length):
+        probs = speculator.probs(sequence)
+        if sum(probs.values()) == 0:
+            break
+        selected = random.choices(list(probs.keys()), weights=list(probs.values()), k=1)[0]
+        sequence.append(selected)
+    return sequence

lalamo/utils.py CHANGED Viewed

@@ -9,20 +9,47 @@ from collections.abc import (
     Sequence,
     ValuesView,
 )
+from contextlib import contextmanager
 from dataclasses import dataclass
+from pathlib import Path
 from typing import overload
 import einops
 import jax.numpy as jnp
 from jaxtyping import Array
+from safetensors import safe_open
 __all__ = [
     "MapDictValues",
     "MapSequence",
     "jax_uint4_to_packed_uint8",
+    "open_safetensors",
 ]
+@dataclass(frozen=True)
+class LazyDict[K, V](Mapping[K, V]):
+    stored_keys: set[K]
+    getter: Callable[[K], V]
+    def __getitem__(self, key: K) -> V:
+        if key not in self.stored_keys:
+            raise KeyError(key)
+        return self.getter(key)
+    def __iter__(self) -> Iterator[K]:
+        return iter(self.stored_keys)
+    def __len__(self) -> int:
+        return len(self.stored_keys)
+@contextmanager
+def open_safetensors(filename: Path | str) -> Iterator[Mapping[str, Array]]:
+    with safe_open(filename, framework="flax") as safetensors_nonsense:
+        yield LazyDict(set(safetensors_nonsense.keys()), safetensors_nonsense.get_tensor)
 @dataclass(frozen=True)
 class MapIterable[OldT, NewT](Iterable[NewT]):
     map_func: Callable[[OldT], NewT]

{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,17 +1,16 @@
 Metadata-Version: 2.4
 Name: lalamo
-Version: 0.3.3
+Version: 0.4.0
 Summary: JAX library for optimization and export of models for use with the UZU inference engine.
 Requires-Python: <4,>=3.12
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: cattrs>=24.1.2
+Requires-Dist: cattrs[msgpack]>=24.1.2
 Requires-Dist: click>=8.1.8
 Requires-Dist: einops>=0.8.0
 Requires-Dist: equinox>=0.11.11
 Requires-Dist: huggingface-hub[hf-transfer]>=0.27.1
-Requires-Dist: jax>=0.4.38; sys_platform == "darwin"
-Requires-Dist: jax[cuda]>=0.4.38; sys_platform == "linux"
+Requires-Dist: jax>=0.7.2
 Requires-Dist: jaxtyping>=0.2.36
 Requires-Dist: jinja2>=3.1.6
 Requires-Dist: ml-dtypes>=0.5.1
@@ -21,6 +20,14 @@ Requires-Dist: thefuzz>=0.22.1
 Requires-Dist: tokenizers>=0.21.2
 Requires-Dist: typer>=0.15.1
 Requires-Dist: safetensors>=0.6.2
+Requires-Dist: polars>=1.33.1
+Requires-Dist: xxhash>=3.5.0
+Provides-Extra: cpu
+Requires-Dist: jax[cpu]>=0.7.2; extra == "cpu"
+Provides-Extra: cuda
+Requires-Dist: jax[cuda]>=0.7.2; extra == "cuda"
+Provides-Extra: tpu
+Requires-Dist: jax[tpu]>=0.7.2; extra == "tpu"
 Dynamic: license-file
 <p align="center">

lalamo-0.4.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,71 @@
+lalamo/__init__.py,sha256=8lO29V8RZHM7yw3YLj3QhjMywppgG0HtviDrVRepSas,487
+lalamo/common.py,sha256=5NUFD26yQgOnEEk3LaQnce8n-VwJxILkEpFesHZhtQU,3820
+lalamo/language_model.py,sha256=embFPBH8gLMAMzzc5TqTM9KO7TXJkdOMuqLn9vT9bW8,14619
+lalamo/main.py,sha256=qGFb4fDFT7xU98XGVlgGjlx5R4_a5OULz8sTxcuRjxE,19470
+lalamo/message_processor.py,sha256=eDKqOGRTAc4rbwzye0FAGwTnG_nIlerdlLEcn8QlT0Q,5817
+lalamo/quantization.py,sha256=8o6ryIZLzzDYQuvBTboPfaVVdfijAKGpTxOcg3GKVD8,2752
+lalamo/registry_abc.py,sha256=ENjXiD_wEH100fNjG-W5Em1L_EQ0Lf0pdRhRGvf3qZk,2197
+lalamo/sampling.py,sha256=g_dNiJyZrRqoQIiLid4cr6nRT9N5tSz3GtHr8Bt4n-E,3404
+lalamo/utils.py,sha256=51uhz0vVNiX3S5QbP3WTsOF_QVlc5nnoensFTlNLlLQ,3707
+lalamo/data/__init__.py,sha256=9-lii4yjDZkZFkSNJA0Wd5ZOcxnhs5390IWO4JrMMWI,190
+lalamo/data/huggingface_message.py,sha256=AJi4t1DIJ0AtFPQW8rBxNwjnve8RLl5WuZH9DFVxRqU,1114
+lalamo/data/lalamo_completions.py,sha256=U_m3UNSJASUFz3rJq_taZOtL_U4B8Oj-ndkTF-JH-v4,1509
+lalamo/data/utils.py,sha256=B96gLaULyStKYuR8wjFdTpFc6YIDC8EEvGh1eiMe_Ec,338
+lalamo/model_import/__init__.py,sha256=Z8pS9rbKKx1QgUy7KZtHxiNWlZhII3mdovT9d37vAxg,168
+lalamo/model_import/common.py,sha256=JXEjwabvWINU46drKSqasbAaNENjc6jcH7EmMqdOas8,8103
+lalamo/model_import/huggingface_generation_config.py,sha256=mot6VQ6ezCtEhN6VjhnvaU-nR5P5T2BuBUgpFNnWJxU,1495
+lalamo/model_import/huggingface_tokenizer_config.py,sha256=xvwdmio7b9nhn2H3uMBVligiYj58JaCFCvHY3-8dBvM,2502
+lalamo/model_import/decoder_configs/__init__.py,sha256=vPmiVwYAU9NQvgAiK5UzFXLVdsez7rJGgX_rttKSO7c,503
+lalamo/model_import/decoder_configs/common.py,sha256=L1oUilS4KwfnwKyWegoSZ3Tha5CqRjN6gjMhCie4Ymk,1849
+lalamo/model_import/decoder_configs/executorch.py,sha256=peYxvT3sOpeGX3RGBReM_EYEiQyue-xJm61JKmHgLPY,5492
+lalamo/model_import/decoder_configs/huggingface/__init__.py,sha256=Fd7-a_eMT1TsITmrTjbyafFXylqK9uHkGqBbmy8GnjY,518
+lalamo/model_import/decoder_configs/huggingface/common.py,sha256=WgKWhp2AeYosLbx1nZGHM_Xt6unwd_7B5KA1z5tzSr4,1863
+lalamo/model_import/decoder_configs/huggingface/gemma2.py,sha256=Y0cX4LuSzpCG5RUosJug-EZNSAAUJNwoRXETSbFEa-k,4347
+lalamo/model_import/decoder_configs/huggingface/gemma3.py,sha256=cjJ9nTuabbJ7vMAbPYTmjmW2n8YL1hOh9IkKeCZmJ_4,6780
+lalamo/model_import/decoder_configs/huggingface/gpt_oss.py,sha256=UlQT7AGISVbQMeL0Yln60KlEXB6wav7QH_eH-J_ogXU,6914
+lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=KeMUW2dem4-hDL48E5VTdW5dRw4G-NHWa4OJZE76wJk,6720
+lalamo/model_import/decoder_configs/huggingface/mistral.py,sha256=89a0Mj-Yq1zDkVP428UhoqCfm_ufYC0v8UpXGjb6-O8,4411
+lalamo/model_import/decoder_configs/huggingface/qwen2.py,sha256=WdXscId2L94sHVEQceBo50XQp1rVq7nLYZUpKMNsVQM,5226
+lalamo/model_import/decoder_configs/huggingface/qwen3.py,sha256=voMDmAU-UWZpl5G18T3egxhfDKNxY1pslnDnTPiPq0g,5292
+lalamo/model_import/loaders/__init__.py,sha256=Olg7a79phusilNgEa7PTgx1JgQQJLgAVg18T8isp0mw,148
+lalamo/model_import/loaders/common.py,sha256=kkugV-bMQlN1zvGHoj3uc7z0FbXKoMtXEBTvyu4KxK4,1844
+lalamo/model_import/loaders/executorch.py,sha256=GClNwxCbQJ8HNv_NqHEzqjZRRblgzVOf3h79yHT6bWQ,8975
+lalamo/model_import/loaders/huggingface.py,sha256=wd6wP36tLaN22WhEbi3utb8iha0yGbzuEi2lmrU8CUA,14358
+lalamo/model_import/loaders/utils.py,sha256=eiX3WKFRrAfBY-dugodscNInl5o5w3KmVcgma4atpGY,2456
+lalamo/model_import/model_specs/__init__.py,sha256=5S91pDmboMUmvKmHmTLtkcc_TPti5aU3ITlwC-QL0OA,1023
+lalamo/model_import/model_specs/common.py,sha256=xthsCPuU2CM7QSpJrrg6-q6xSnBVmnup-1zCH0BBdB4,5375
+lalamo/model_import/model_specs/deepseek.py,sha256=Umef93_ZBuq93yYsejIRNwj3udoln1gHfrv3SK5jyMo,417
+lalamo/model_import/model_specs/gemma.py,sha256=YGWM-J7jBEL16c_LZ9F_6dgeZnf4sEPLDKngl_3FbrE,1289
+lalamo/model_import/model_specs/gpt_oss.py,sha256=PLo0QGrXKdX61ReTRdyOaP_EH3Dmj5lp3fpJjZRwRVA,542
+lalamo/model_import/model_specs/huggingface.py,sha256=TEkU8y95_hmUWyF-Q5hn0dE2SvXbApghAsQwhWRu4D0,431
+lalamo/model_import/model_specs/llama.py,sha256=Ml-xvRGlXBT9NJhmEpwgNo6C84oBSMYgA1_PrCYGcAw,990
+lalamo/model_import/model_specs/mistral.py,sha256=HAojorjOqsJn2DoMBzYRw8A70qCslhFEsE9AF5xumlg,1278
+lalamo/model_import/model_specs/pleias.py,sha256=5sRpZGYwLdsav6bLiW-459y1Cs9iJKgKkBIuGsOxtsQ,368
+lalamo/model_import/model_specs/polaris.py,sha256=Mw1-6bByjDmPIKlIUIV46CsmV5xUp_laI5Qquo5DmAQ,520
+lalamo/model_import/model_specs/qwen.py,sha256=TA1ApqR0EUTmHheZYoqVMgGUXZrXEznABt6FYT2PoQo,5991
+lalamo/model_import/model_specs/reka.py,sha256=dOUYbEMMvovQdzQuBO_DCsjGI39syhoKCvnxLkNEDCw,423
+lalamo/modules/__init__.py,sha256=TM9diDXhY0OlaHvA0FgtQdFITixOes4VtHhOEE9eR08,2764
+lalamo/modules/activations.py,sha256=NT_A2TJbQ2bDGMVaSIboT2CAaFGxDVPXV3UvlrzfMWo,815
+lalamo/modules/attention.py,sha256=WveC0G1_iA6fZ0sojYG9BYRO6Xw51UIE4q7qc9gGvk0,16287
+lalamo/modules/common.py,sha256=bRq32rwTFfmRrYq7VanlGMPIkqwH0wMtl83D8PmCJl0,2969
+lalamo/modules/decoder.py,sha256=2TpGyakxIBiloCmO5C73YtXQIWvnFmCVyKaJWHL0P0E,12618
+lalamo/modules/decoder_layer.py,sha256=IHNTZxvQUNObglnz-KDJEbiuOfBK4VUmhfo49IjbYUY,12858
+lalamo/modules/embedding.py,sha256=szyu6HOpXPk77f8s9RyKgE5429B0yQ9Jxostsk3-HbY,11994
+lalamo/modules/kv_cache.py,sha256=x67_NvDfXSnbqB2Xb4i6Stb5Nc1I9zBNpKJGVRUSmXs,8908
+lalamo/modules/linear.py,sha256=gDvKJd1KlIjrll-i68AUIaTPU8O3igeErkYneixkf4E,30808
+lalamo/modules/mlp.py,sha256=sLmQr9eBlZ5YGo_Fa8fqrE9ryC432YPFBXP7csP9Y-k,17645
+lalamo/modules/normalization.py,sha256=gdmv4tWLk8CwvIfoS0rY3QTVRXhjHQNNF6YGYjyez1c,2790
+lalamo/modules/rope.py,sha256=HbIv5ESLGNAK47HAtqu1whLLUa20Sb28U8kEs6KclZM,10127
+lalamo/modules/torch_interop.py,sha256=-mujd1zI4ec2w92Hd50RtDa0K3jl6ZSnPxc5r3Fp9nU,916
+lalamo/modules/utils.py,sha256=t_TayWT6g5LtYKhJaod-u_COWaI_VbNd3eYek9Nj0lc,441
+lalamo/speculator/__init__.py,sha256=pLGwZiXZZeH2SaMsIGiqHzXs5AJtbxWweXaKc4fPFHc,262
+lalamo/speculator/common.py,sha256=PudF_gkpe5_nQ-57sAC-foE1xCy_H2Axh5KwRoA86lo,587
+lalamo/speculator/inference.py,sha256=4E7_8jwLwg1bvUUH59cLxjD73-IJ9xv58yCwYMFa1r4,3101
+lalamo/speculator/ngram.py,sha256=3O9akE2IGN_7Mp6l_zwhUpdNvjZB0rAlQm51EuX_-rY,5869
+lalamo/speculator/utils.py,sha256=K5qDGEdWYD47C1xLJQ3bamIHAG3-VEuANf-821VeanE,1688
+lalamo-0.4.0.dist-info/licenses/LICENSE,sha256=diHRfjSEJHD1nnEeMIfMRCjR3UERf8bT3eseD6b1ayA,1072
+lalamo-0.4.0.dist-info/METADATA,sha256=gfzekotZfrrfcex8qJx8X3XZh9SM0DtsIKrxRnUhegw,3041
+lalamo-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lalamo-0.4.0.dist-info/entry_points.txt,sha256=qli7qTfnBk5WP10rOGXXEckHMtt-atJMDWd8jN89Uks,43
+lalamo-0.4.0.dist-info/top_level.txt,sha256=VHvWL5JN5XRG36NsN_MieJ7EwRihEOrEjyDaTdFJ-aI,7
+lalamo-0.4.0.dist-info/RECORD,,

lalamo-0.3.3.dist-info/RECORD DELETED Viewed

@@ -1,59 +0,0 @@
-lalamo/__init__.py,sha256=GCS_DTVF-_KdksObU3ceoJbjaxvS4w7fF62TI1NyBh0,217
-lalamo/common.py,sha256=5NUFD26yQgOnEEk3LaQnce8n-VwJxILkEpFesHZhtQU,3820
-lalamo/language_model.py,sha256=wZGucNCPP9ReHn6fYO5TlFHTXun1QIMwaxniq9z7K5Q,10639
-lalamo/main.py,sha256=Ta3ZW-Xw3uNPEVvb2YlBOH9UhhLDV7PZjRCg6giu0ao,12275
-lalamo/message_processor.py,sha256=hxnc2ELd31VfP41sCU-WobCoW06wqZllEmIvCcyAvyo,5445
-lalamo/quantization.py,sha256=8o6ryIZLzzDYQuvBTboPfaVVdfijAKGpTxOcg3GKVD8,2752
-lalamo/registry_abc.py,sha256=ENjXiD_wEH100fNjG-W5Em1L_EQ0Lf0pdRhRGvf3qZk,2197
-lalamo/sampling.py,sha256=g_dNiJyZrRqoQIiLid4cr6nRT9N5tSz3GtHr8Bt4n-E,3404
-lalamo/utils.py,sha256=Jm54CKFc6lJSggXVwF_lgiUuKEtQ55c58U-ALU6oxuk,2922
-lalamo/model_import/__init__.py,sha256=Z8pS9rbKKx1QgUy7KZtHxiNWlZhII3mdovT9d37vAxg,168
-lalamo/model_import/common.py,sha256=vVCKRl9-zifMiut4lMjB5eVXOHQxw8yfD5Q7rDr153c,7649
-lalamo/model_import/huggingface_generation_config.py,sha256=mot6VQ6ezCtEhN6VjhnvaU-nR5P5T2BuBUgpFNnWJxU,1495
-lalamo/model_import/huggingface_tokenizer_config.py,sha256=kTdgH2lle1m7zRtClO3BhgonvfZsAjeDGjPMAd1EU8E,2607
-lalamo/model_import/decoder_configs/__init__.py,sha256=Ru_lI1IjahyXZBSB5J0eWwVM6gn2ilRZW65m2sU2hMw,460
-lalamo/model_import/decoder_configs/common.py,sha256=L1oUilS4KwfnwKyWegoSZ3Tha5CqRjN6gjMhCie4Ymk,1849
-lalamo/model_import/decoder_configs/executorch.py,sha256=Kx_T-B5jumfWf9vj20We4FF0GkSkTmIYeWOss88-qYA,5266
-lalamo/model_import/decoder_configs/huggingface/__init__.py,sha256=kWHUnZDwGQCbA3Ucm-FEDr8zZ2yZ3yviPVftlNgMk30,460
-lalamo/model_import/decoder_configs/huggingface/common.py,sha256=0Q4Q4uklQkWhVBYFS_hCWmdZuEpw8Ch7jhycgbA2_NY,1899
-lalamo/model_import/decoder_configs/huggingface/gemma2.py,sha256=oIefI_ad-7DtzXmisFczkKPuOQ-KkzMkKWTk9likaMs,4101
-lalamo/model_import/decoder_configs/huggingface/gemma3.py,sha256=AUwkxMI90uaFhAthc_FtjnwJcLuHGSzF19BXKhTaWtM,6470
-lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=8CvuCy4V84plGSOdmzIQmAwmxpCjKTo5wViCud5_oLo,5480
-lalamo/model_import/decoder_configs/huggingface/mistral.py,sha256=cxT8I8RFhYE47-gvaSIfaVOau5fQRjFj2XLqLUMZw6g,4166
-lalamo/model_import/decoder_configs/huggingface/qwen2.py,sha256=-sDwpEYe1wwRx1NfXxAhySz061LoV4_WZ2eWbkDo_7k,4981
-lalamo/model_import/decoder_configs/huggingface/qwen3.py,sha256=seSMPJyFCj4z-lf_vvrZj407oytieslxmKdlJSrXcqY,5047
-lalamo/model_import/loaders/__init__.py,sha256=Olg7a79phusilNgEa7PTgx1JgQQJLgAVg18T8isp0mw,148
-lalamo/model_import/loaders/common.py,sha256=kkugV-bMQlN1zvGHoj3uc7z0FbXKoMtXEBTvyu4KxK4,1844
-lalamo/model_import/loaders/executorch.py,sha256=nSvpylK8QL3nBk78P3FabLoyA87E3kv5CCpMfvuZe6Q,8886
-lalamo/model_import/loaders/huggingface.py,sha256=C0spTy9-DhQ8U7bDAyGA_i9dKgUoLR2Dv58t20KUvZs,10590
-lalamo/model_import/model_specs/__init__.py,sha256=UsKmBzNzJHVQquZxWdjGgiznnORVklE-z9Nr9Ertfqc,964
-lalamo/model_import/model_specs/common.py,sha256=MblETxPSVv2-KP7EBlBOEsvsZ8d1_PZfIXBjrpc1G-k,5208
-lalamo/model_import/model_specs/deepseek.py,sha256=Umef93_ZBuq93yYsejIRNwj3udoln1gHfrv3SK5jyMo,417
-lalamo/model_import/model_specs/gemma.py,sha256=YGWM-J7jBEL16c_LZ9F_6dgeZnf4sEPLDKngl_3FbrE,1289
-lalamo/model_import/model_specs/huggingface.py,sha256=TEkU8y95_hmUWyF-Q5hn0dE2SvXbApghAsQwhWRu4D0,431
-lalamo/model_import/model_specs/llama.py,sha256=Ml-xvRGlXBT9NJhmEpwgNo6C84oBSMYgA1_PrCYGcAw,990
-lalamo/model_import/model_specs/mistral.py,sha256=HAojorjOqsJn2DoMBzYRw8A70qCslhFEsE9AF5xumlg,1278
-lalamo/model_import/model_specs/pleias.py,sha256=5sRpZGYwLdsav6bLiW-459y1Cs9iJKgKkBIuGsOxtsQ,368
-lalamo/model_import/model_specs/polaris.py,sha256=Mw1-6bByjDmPIKlIUIV46CsmV5xUp_laI5Qquo5DmAQ,520
-lalamo/model_import/model_specs/qwen.py,sha256=TA1ApqR0EUTmHheZYoqVMgGUXZrXEznABt6FYT2PoQo,5991
-lalamo/model_import/model_specs/reka.py,sha256=dOUYbEMMvovQdzQuBO_DCsjGI39syhoKCvnxLkNEDCw,423
-lalamo/modules/__init__.py,sha256=ipgm-hRI9uLKluW1scsUXH9z2WeZf6bWSLgIFT4roMQ,2198
-lalamo/modules/activations.py,sha256=ZgUd3E4VTAVgCZaj9HhYkXiJuiKrWBzK6so5JGnucOc,532
-lalamo/modules/attention.py,sha256=Rukpx4lSTYHUqsrWoRc8hIruVa5l4hEEt0v-z6ieKaM,14815
-lalamo/modules/common.py,sha256=Fj0CWhw9ymCw5ulaPOc6ShYtqiB8UISXRWt-Cpftgdo,4512
-lalamo/modules/decoder.py,sha256=V1z4jgxxPOU8W-Xow2nxw35tYnaQxScfEFB8ouMcSWE,12197
-lalamo/modules/decoder_layer.py,sha256=rF8VOkNYmnWQxPbyNiVxJZsFaoygsZQtGo_HrfXwt40,12517
-lalamo/modules/embedding.py,sha256=2VopGTsZs6DhmDzBeB2XCrio_BFjty9gtKa99NDKfX4,12567
-lalamo/modules/kv_cache.py,sha256=yzCx6jKuzShu7-UV5ffpM0q21syA1l5YKzSOUHLALYM,7216
-lalamo/modules/linear.py,sha256=AKvNuVmUVOb2-Y4_j0pL1dnC0G0O8UzfYtDvBdTtcOE,24498
-lalamo/modules/mlp.py,sha256=xCxeOlKJLGTnbiGOBq2RkYOeKEqeIb62fdTQM63aM18,3769
-lalamo/modules/normalization.py,sha256=YecmA0hnVVYd473FzVgxunHYA377NWMKZRfuQNB8DQo,2942
-lalamo/modules/rope.py,sha256=IlL4Jz4fw6eTKXXMls1BXBAgbuB3GnP0aVB9GrF1nA8,10012
-lalamo/modules/torch_interop.py,sha256=-mujd1zI4ec2w92Hd50RtDa0K3jl6ZSnPxc5r3Fp9nU,916
-lalamo/modules/utils.py,sha256=5QTdi34kEI5jix7TfTdB0mOYZbzZUul_T1y8eWCA6lQ,262
-lalamo-0.3.3.dist-info/licenses/LICENSE,sha256=diHRfjSEJHD1nnEeMIfMRCjR3UERf8bT3eseD6b1ayA,1072
-lalamo-0.3.3.dist-info/METADATA,sha256=sngvnwPijSDHmBBtGzZ4-KHVg0UB99oE7fK6aPVPO3I,2854
-lalamo-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lalamo-0.3.3.dist-info/entry_points.txt,sha256=qli7qTfnBk5WP10rOGXXEckHMtt-atJMDWd8jN89Uks,43
-lalamo-0.3.3.dist-info/top_level.txt,sha256=VHvWL5JN5XRG36NsN_MieJ7EwRihEOrEjyDaTdFJ-aI,7
-lalamo-0.3.3.dist-info/RECORD,,

{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

lalamo 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

lalamo 0.3.3py3-none-any.whl → 0.4.0py3-none-any.whl