lalamo 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. lalamo/__init__.py +20 -5
  2. lalamo/data/__init__.py +8 -0
  3. lalamo/data/huggingface_message.py +38 -0
  4. lalamo/data/lalamo_completions.py +43 -0
  5. lalamo/data/utils.py +8 -0
  6. lalamo/language_model.py +152 -69
  7. lalamo/main.py +271 -43
  8. lalamo/message_processor.py +11 -1
  9. lalamo/model_import/common.py +17 -7
  10. lalamo/model_import/decoder_configs/__init__.py +3 -0
  11. lalamo/model_import/decoder_configs/executorch.py +12 -6
  12. lalamo/model_import/decoder_configs/huggingface/__init__.py +2 -0
  13. lalamo/model_import/decoder_configs/huggingface/common.py +1 -3
  14. lalamo/model_import/decoder_configs/huggingface/gemma2.py +11 -5
  15. lalamo/model_import/decoder_configs/huggingface/gemma3.py +14 -5
  16. lalamo/model_import/decoder_configs/huggingface/gpt_oss.py +195 -0
  17. lalamo/model_import/decoder_configs/huggingface/llama.py +38 -8
  18. lalamo/model_import/decoder_configs/huggingface/mistral.py +12 -6
  19. lalamo/model_import/decoder_configs/huggingface/qwen2.py +12 -6
  20. lalamo/model_import/decoder_configs/huggingface/qwen3.py +12 -6
  21. lalamo/model_import/huggingface_tokenizer_config.py +1 -4
  22. lalamo/model_import/loaders/executorch.py +10 -9
  23. lalamo/model_import/loaders/huggingface.py +104 -9
  24. lalamo/model_import/loaders/utils.py +92 -0
  25. lalamo/model_import/model_specs/__init__.py +4 -1
  26. lalamo/model_import/model_specs/common.py +15 -12
  27. lalamo/model_import/model_specs/gpt_oss.py +21 -0
  28. lalamo/modules/__init__.py +35 -7
  29. lalamo/modules/activations.py +24 -14
  30. lalamo/modules/attention.py +73 -20
  31. lalamo/modules/common.py +8 -57
  32. lalamo/modules/decoder.py +48 -34
  33. lalamo/modules/decoder_layer.py +57 -43
  34. lalamo/modules/embedding.py +13 -19
  35. lalamo/modules/kv_cache.py +53 -16
  36. lalamo/modules/linear.py +260 -79
  37. lalamo/modules/mlp.py +395 -23
  38. lalamo/modules/normalization.py +2 -3
  39. lalamo/modules/rope.py +32 -21
  40. lalamo/modules/utils.py +10 -0
  41. lalamo/speculator/__init__.py +11 -0
  42. lalamo/speculator/common.py +22 -0
  43. lalamo/speculator/inference.py +75 -0
  44. lalamo/speculator/ngram.py +154 -0
  45. lalamo/speculator/utils.py +52 -0
  46. lalamo/utils.py +27 -0
  47. {lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/METADATA +11 -4
  48. lalamo-0.4.0.dist-info/RECORD +71 -0
  49. lalamo-0.3.3.dist-info/RECORD +0 -59
  50. {lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/WHEEL +0 -0
  51. {lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/entry_points.txt +0 -0
  52. {lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/licenses/LICENSE +0 -0
  53. {lalamo-0.3.3.dist-info → lalamo-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,11 @@
1
+ from .common import Speculator
2
+ from .inference import inference_collect_traces
3
+ from .ngram import NGramSpeculator
4
+ from .utils import train_speculator
5
+
6
+ __all__ = [
7
+ "NGramSpeculator",
8
+ "Speculator",
9
+ "inference_collect_traces",
10
+ "train_speculator",
11
+ ]
@@ -0,0 +1,22 @@
1
+ from abc import abstractmethod
2
+ from collections.abc import Iterable
3
+ from typing import Self
4
+
5
+
6
+ class Speculator:
7
+ @abstractmethod
8
+ def train(self, token_ids: Iterable[int], token_logits: Iterable[dict[int, float]]) -> None:
9
+ raise NotImplementedError
10
+
11
+ @abstractmethod
12
+ def probs(self, seq: Iterable[int]) -> dict[int, float]:
13
+ raise NotImplementedError
14
+
15
+ @abstractmethod
16
+ def serialize(self) -> bytes:
17
+ raise NotImplementedError
18
+
19
+ @classmethod
20
+ @abstractmethod
21
+ def deserialize(cls, blob: bytes) -> Self:
22
+ raise NotImplementedError
@@ -0,0 +1,75 @@
1
+ from collections.abc import Callable, Iterable
2
+ from itertools import batched, chain
3
+ from typing import NamedTuple
4
+
5
+ import jax.numpy as jnp
6
+
7
+ from lalamo.data.lalamo_completions import LalamoCompletion
8
+ from lalamo.data.utils import get_prefixes_ending_in_user_message
9
+ from lalamo.language_model import LanguageModel
10
+ from lalamo.message_processor import Message
11
+
12
+
13
+ class CollectTracesEvent(NamedTuple):
14
+ sequences_processed: int
15
+ tokens_generated: int
16
+
17
+
18
+ def inference_collect_traces(
19
+ model: LanguageModel,
20
+ conversations: Iterable[Iterable[Message]],
21
+ num_top_logits_to_collect: int = 8,
22
+ batch_size: int = 1,
23
+ max_input_length: int = 1024,
24
+ max_output_length: int = 1024,
25
+ tokens_to_generate: int | None = None,
26
+ progress_callback: Callable[[CollectTracesEvent], None] | None = None,
27
+ ) -> Iterable[LalamoCompletion]:
28
+ prefixes = chain.from_iterable(map(get_prefixes_ending_in_user_message, conversations))
29
+
30
+ tokenized_prefixes = map(model.message_processor.tokenize_request, prefixes)
31
+ filtered_prefixes = filter(lambda conv: len(conv) <= max_input_length, tokenized_prefixes)
32
+
33
+ tokens_generated, sequences_processed = 0, 0
34
+
35
+ for batch in batched(filtered_prefixes, n=batch_size):
36
+ length_without_padding = jnp.array(list(map(len, batch)))
37
+ max_len = max(map(len, batch))
38
+
39
+ padded = jnp.array(
40
+ [jnp.pad(jnp.array(tokens), (0, max_len - len(tokens)), constant_values=0) for tokens in batch],
41
+ )
42
+
43
+ generated = model.generate_tokens(
44
+ padded,
45
+ prompt_lengths_without_padding=length_without_padding,
46
+ max_output_length=max_output_length,
47
+ num_top_logits_to_return=num_top_logits_to_collect,
48
+ )
49
+ assert generated.top_k_token_ids is not None and generated.top_k_token_logits is not None
50
+ for conv_idx in range(batch_size):
51
+ token_ids = generated.token_ids[conv_idx].tolist()
52
+ seqlen = next((i + 1 for i, t in enumerate(token_ids) if t in model.stop_token_ids), len(token_ids))
53
+ if tokens_to_generate is not None:
54
+ seqlen = min(seqlen, tokens_to_generate - tokens_generated)
55
+ tokens_generated += seqlen
56
+ sequences_processed += 1
57
+
58
+ token_ids = token_ids[:seqlen]
59
+ token_logits_ids = generated.top_k_token_ids[conv_idx, : len(token_ids)].tolist()
60
+ token_logits_values = generated.top_k_token_logits[conv_idx, : len(token_ids)].tolist()
61
+ token_logits = [
62
+ dict(zip(keys, values, strict=True))
63
+ for keys, values in zip(token_logits_ids, token_logits_values, strict=True)
64
+ ]
65
+
66
+ yield LalamoCompletion(batch[conv_idx], token_ids, token_logits)
67
+
68
+ if tokens_to_generate is not None and tokens_generated >= tokens_to_generate:
69
+ break
70
+
71
+ if progress_callback is not None:
72
+ progress_callback(CollectTracesEvent(sequences_processed, tokens_generated))
73
+
74
+ if tokens_to_generate is not None and tokens_generated >= tokens_to_generate:
75
+ break
@@ -0,0 +1,154 @@
1
+ import struct
2
+ from array import array
3
+ from collections.abc import Iterable
4
+ from dataclasses import dataclass
5
+ from itertools import chain, repeat, tee
6
+ from math import exp
7
+ from typing import Self
8
+
9
+ import xxhash
10
+
11
+ from .common import Speculator
12
+
13
+
14
+ # This is not exactly randomly distributed if size is not a power of two.
15
+ # Shouldn't matter in practice though because size <<< 2**64
16
+ def seqhash(tokens: Iterable[int], size: int) -> int:
17
+ tokens = list(tokens)
18
+ assert size <= 2**64
19
+ if len(tokens) > 0:
20
+ packed = struct.pack("<" + "I" * len(tokens), *tokens)
21
+ else:
22
+ packed = b""
23
+
24
+ return xxhash.xxh3_64_intdigest(packed) % size
25
+
26
+ def padded_sliding_window(seq: Iterable[int], size:int, pad:int) -> Iterable[tuple[int, ...]]:
27
+ seqs = tee(seq, size)
28
+ pads = tuple(repeat(pad, size - i) for i in range(size))
29
+ padded_seqs = tuple(chain(pad, seq) for pad, seq in zip(pads, seqs, strict=True))
30
+ return zip(*padded_seqs, strict=False)
31
+
32
+ def softmax(logits: Iterable[float]) -> list[float]:
33
+ logits = list(logits)
34
+ log_max = max(logits)
35
+ exp_logs = [exp(logit - log_max) for logit in logits]
36
+ exp_log_sum = sum(exp_logs)
37
+ return [exp_log / exp_log_sum for exp_log in exp_logs]
38
+
39
+ def online_mean(old_mean: float, sample: float, new_count: int) -> float:
40
+ return old_mean + (sample - old_mean) / new_count
41
+
42
+ def update_probs(old_mean: dict[int, float], sample: dict[int, float], new_count: int, top_k: int) -> dict[int, float]:
43
+ all_keys = set(old_mean.keys()).union(sample.keys())
44
+
45
+ new_probs_all = {k: online_mean(old_mean.get(k, 0), sample.get(k, 0), new_count) for k in all_keys}
46
+
47
+ new_probs_top_k = dict(sorted(new_probs_all.items(), key=lambda x: (-x[1], x[0]))[:top_k])
48
+
49
+ new_probs_sum = sum(new_probs_top_k.values())
50
+
51
+ new_probs_norm = {k: v / new_probs_sum for k, v in new_probs_top_k.items()}
52
+
53
+ return new_probs_norm
54
+
55
+
56
+ @dataclass(frozen=True, eq=False)
57
+ class NGramSpeculator(Speculator):
58
+ hashtable_size: int
59
+ ngram_k: int
60
+ ngram_n: int
61
+ ngram_pad: int
62
+
63
+ # "interior mutability" here.
64
+ # dataclass field default_factory doesn't pass self so None here + post init as a workaround
65
+ ngram_keys: array[int]
66
+ ngram_values: array[float]
67
+ ngram_counts: array[int]
68
+
69
+ def __post_init__(self) -> None:
70
+ if not self.hashtable_size > 0:
71
+ raise ValueError(f"{self.hashtable_size=} (must be > 0)")
72
+ if not self.ngram_k > 0:
73
+ raise ValueError(f"{self.ngram_k=} (must be > 0)")
74
+ if not self.ngram_n > 0:
75
+ raise ValueError(f"{self.ngram_n=} (must be > 0)")
76
+
77
+ @classmethod
78
+ def new(cls, hashtable_size: int, ngram_k: int, ngram_n: int, ngram_pad: int = 2**32 - 1) -> Self:
79
+ return cls(
80
+ hashtable_size,
81
+ ngram_k,
82
+ ngram_n,
83
+ ngram_pad,
84
+ array("I", range(ngram_k)) * hashtable_size,
85
+ array("f", repeat(0, ngram_k)) * hashtable_size,
86
+ array("I", [0]) * hashtable_size,
87
+ )
88
+
89
+ def train(self, token_ids: Iterable[int], token_logits: Iterable[dict[int, float]]) -> None:
90
+ ngram_ctx = self.ngram_n - 1
91
+ if ngram_ctx > 0:
92
+ contexts = padded_sliding_window(token_ids, ngram_ctx, self.ngram_pad)
93
+ else:
94
+ contexts = repeat(())
95
+
96
+ for ctx, cur_logits in zip(contexts, token_logits, strict=False):
97
+ ngram_keys, ngram_values, ngram_counts = self._seq_slice(ctx)
98
+
99
+ ngram_counts[0] = new_count = ngram_counts[0] + 1
100
+
101
+ old_mean = dict(zip(ngram_keys, ngram_values, strict=True))
102
+ sample = dict(zip(cur_logits.keys(), softmax(cur_logits.values()), strict=True))
103
+
104
+ new_probs = update_probs(old_mean, sample, new_count, self.ngram_k)
105
+
106
+ ngram_keys[:] = array("I", new_probs.keys())
107
+ ngram_values[:] = array("f", new_probs.values())
108
+
109
+ def probs(self, seq: Iterable[int]) -> dict[int, float]:
110
+ ngram_keys, ngram_values, _ = self._seq_slice(seq)
111
+ return dict(zip(ngram_keys, ngram_values, strict=True))
112
+
113
+ # python < 3.13 doesn't support memoryview[T], but if T is not specified typechecker incorrectly assumes it's int
114
+ def _seq_slice(self, seq: Iterable[int]) -> tuple["memoryview[int]", "memoryview[float]", "memoryview[int]"]:
115
+ seq = list(seq)
116
+ ngram_ctx = self.ngram_n - 1
117
+ if ngram_ctx > 0:
118
+ padded_seq = [*repeat(self.ngram_pad, max(ngram_ctx - len(seq), 0)), *seq[-ngram_ctx :]]
119
+ else:
120
+ padded_seq = []
121
+
122
+ seq_hash = seqhash(padded_seq, self.hashtable_size)
123
+ idx_start = seq_hash * self.ngram_k
124
+ idx_end = seq_hash * self.ngram_k + self.ngram_k
125
+
126
+ return (
127
+ memoryview(self.ngram_keys)[idx_start:idx_end],
128
+ memoryview(self.ngram_values)[idx_start:idx_end], # type: ignore (typechecker bug)
129
+ memoryview(self.ngram_counts)[seq_hash : (seq_hash + 1)],
130
+ )
131
+
132
+ def serialize(self) -> bytes:
133
+ hdr = struct.pack("<4I", self.hashtable_size, self.ngram_k, self.ngram_n, self.ngram_pad)
134
+ return hdr + bytes(self.ngram_keys) + bytes(self.ngram_values) + bytes(self.ngram_counts)
135
+
136
+ @classmethod
137
+ def deserialize(cls, blob: bytes) -> Self:
138
+ offset = 16
139
+
140
+ hashtable_size, ngram_k, ngram_len, ngram_pad = struct.unpack("<4I", blob[:offset])
141
+
142
+ ngram_kv_len = 4 * ngram_k * hashtable_size
143
+
144
+ ngram_keys = array("I", blob[offset : offset + ngram_kv_len])
145
+ offset += ngram_kv_len
146
+
147
+ ngram_values = array("f", blob[offset : offset + ngram_kv_len])
148
+ offset += ngram_kv_len
149
+
150
+ ngram_counts_len = 4 * hashtable_size
151
+ ngram_counts = array("I", blob[offset : offset + ngram_counts_len])
152
+ offset += ngram_counts_len
153
+
154
+ return cls(hashtable_size, ngram_k, ngram_len, ngram_pad, ngram_keys, ngram_values, ngram_counts)
@@ -0,0 +1,52 @@
1
+ import random
2
+ from collections.abc import Callable, Iterable
3
+ from typing import NamedTuple
4
+
5
+ from lalamo.data.lalamo_completions import LalamoCompletion
6
+ from lalamo.speculator.common import Speculator
7
+
8
+
9
+ class SpeculatorTrainingEvent(NamedTuple):
10
+ trained_sequences: int
11
+ trained_tokens: int
12
+
13
+
14
+ def train_speculator(
15
+ speculator: Speculator,
16
+ traces: Iterable[LalamoCompletion],
17
+ tokens_to_train: int | None = None,
18
+ progress_callback: Callable[[SpeculatorTrainingEvent], None] | None = None,
19
+ ) -> None:
20
+ trained_tokens = 0
21
+
22
+ for trained_sequences,trace in enumerate(traces, start=1):
23
+ if tokens_to_train is not None and trained_tokens + len(trace.completion_token_ids) > tokens_to_train:
24
+ end = tokens_to_train - trained_tokens
25
+ else:
26
+ end = None
27
+ token_ids = trace.completion_token_ids[:end]
28
+ token_logits = trace.completion_token_logits[:end]
29
+
30
+ speculator.train(token_ids, token_logits)
31
+
32
+ trained_tokens += len(token_ids)
33
+
34
+ if progress_callback is not None:
35
+ progress_callback(SpeculatorTrainingEvent(trained_sequences, trained_tokens))
36
+
37
+ if tokens_to_train is not None and trained_tokens >= tokens_to_train:
38
+ break
39
+
40
+
41
+ def test_speculator(
42
+ speculator: Speculator, sequence: Iterable[int] = [], max_completion_length: int = 32,
43
+ ) -> list[int]:
44
+ sequence = list(sequence)
45
+ for _ in range(max_completion_length):
46
+ probs = speculator.probs(sequence)
47
+ if sum(probs.values()) == 0:
48
+ break
49
+
50
+ selected = random.choices(list(probs.keys()), weights=list(probs.values()), k=1)[0]
51
+ sequence.append(selected)
52
+ return sequence
lalamo/utils.py CHANGED
@@ -9,20 +9,47 @@ from collections.abc import (
9
9
  Sequence,
10
10
  ValuesView,
11
11
  )
12
+ from contextlib import contextmanager
12
13
  from dataclasses import dataclass
14
+ from pathlib import Path
13
15
  from typing import overload
14
16
 
15
17
  import einops
16
18
  import jax.numpy as jnp
17
19
  from jaxtyping import Array
20
+ from safetensors import safe_open
18
21
 
19
22
  __all__ = [
20
23
  "MapDictValues",
21
24
  "MapSequence",
22
25
  "jax_uint4_to_packed_uint8",
26
+ "open_safetensors",
23
27
  ]
24
28
 
25
29
 
30
+ @dataclass(frozen=True)
31
+ class LazyDict[K, V](Mapping[K, V]):
32
+ stored_keys: set[K]
33
+ getter: Callable[[K], V]
34
+
35
+ def __getitem__(self, key: K) -> V:
36
+ if key not in self.stored_keys:
37
+ raise KeyError(key)
38
+ return self.getter(key)
39
+
40
+ def __iter__(self) -> Iterator[K]:
41
+ return iter(self.stored_keys)
42
+
43
+ def __len__(self) -> int:
44
+ return len(self.stored_keys)
45
+
46
+
47
+ @contextmanager
48
+ def open_safetensors(filename: Path | str) -> Iterator[Mapping[str, Array]]:
49
+ with safe_open(filename, framework="flax") as safetensors_nonsense:
50
+ yield LazyDict(set(safetensors_nonsense.keys()), safetensors_nonsense.get_tensor)
51
+
52
+
26
53
  @dataclass(frozen=True)
27
54
  class MapIterable[OldT, NewT](Iterable[NewT]):
28
55
  map_func: Callable[[OldT], NewT]
@@ -1,17 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lalamo
3
- Version: 0.3.3
3
+ Version: 0.4.0
4
4
  Summary: JAX library for optimization and export of models for use with the UZU inference engine.
5
5
  Requires-Python: <4,>=3.12
6
6
  Description-Content-Type: text/markdown
7
7
  License-File: LICENSE
8
- Requires-Dist: cattrs>=24.1.2
8
+ Requires-Dist: cattrs[msgpack]>=24.1.2
9
9
  Requires-Dist: click>=8.1.8
10
10
  Requires-Dist: einops>=0.8.0
11
11
  Requires-Dist: equinox>=0.11.11
12
12
  Requires-Dist: huggingface-hub[hf-transfer]>=0.27.1
13
- Requires-Dist: jax>=0.4.38; sys_platform == "darwin"
14
- Requires-Dist: jax[cuda]>=0.4.38; sys_platform == "linux"
13
+ Requires-Dist: jax>=0.7.2
15
14
  Requires-Dist: jaxtyping>=0.2.36
16
15
  Requires-Dist: jinja2>=3.1.6
17
16
  Requires-Dist: ml-dtypes>=0.5.1
@@ -21,6 +20,14 @@ Requires-Dist: thefuzz>=0.22.1
21
20
  Requires-Dist: tokenizers>=0.21.2
22
21
  Requires-Dist: typer>=0.15.1
23
22
  Requires-Dist: safetensors>=0.6.2
23
+ Requires-Dist: polars>=1.33.1
24
+ Requires-Dist: xxhash>=3.5.0
25
+ Provides-Extra: cpu
26
+ Requires-Dist: jax[cpu]>=0.7.2; extra == "cpu"
27
+ Provides-Extra: cuda
28
+ Requires-Dist: jax[cuda]>=0.7.2; extra == "cuda"
29
+ Provides-Extra: tpu
30
+ Requires-Dist: jax[tpu]>=0.7.2; extra == "tpu"
24
31
  Dynamic: license-file
25
32
 
26
33
  <p align="center">
@@ -0,0 +1,71 @@
1
+ lalamo/__init__.py,sha256=8lO29V8RZHM7yw3YLj3QhjMywppgG0HtviDrVRepSas,487
2
+ lalamo/common.py,sha256=5NUFD26yQgOnEEk3LaQnce8n-VwJxILkEpFesHZhtQU,3820
3
+ lalamo/language_model.py,sha256=embFPBH8gLMAMzzc5TqTM9KO7TXJkdOMuqLn9vT9bW8,14619
4
+ lalamo/main.py,sha256=qGFb4fDFT7xU98XGVlgGjlx5R4_a5OULz8sTxcuRjxE,19470
5
+ lalamo/message_processor.py,sha256=eDKqOGRTAc4rbwzye0FAGwTnG_nIlerdlLEcn8QlT0Q,5817
6
+ lalamo/quantization.py,sha256=8o6ryIZLzzDYQuvBTboPfaVVdfijAKGpTxOcg3GKVD8,2752
7
+ lalamo/registry_abc.py,sha256=ENjXiD_wEH100fNjG-W5Em1L_EQ0Lf0pdRhRGvf3qZk,2197
8
+ lalamo/sampling.py,sha256=g_dNiJyZrRqoQIiLid4cr6nRT9N5tSz3GtHr8Bt4n-E,3404
9
+ lalamo/utils.py,sha256=51uhz0vVNiX3S5QbP3WTsOF_QVlc5nnoensFTlNLlLQ,3707
10
+ lalamo/data/__init__.py,sha256=9-lii4yjDZkZFkSNJA0Wd5ZOcxnhs5390IWO4JrMMWI,190
11
+ lalamo/data/huggingface_message.py,sha256=AJi4t1DIJ0AtFPQW8rBxNwjnve8RLl5WuZH9DFVxRqU,1114
12
+ lalamo/data/lalamo_completions.py,sha256=U_m3UNSJASUFz3rJq_taZOtL_U4B8Oj-ndkTF-JH-v4,1509
13
+ lalamo/data/utils.py,sha256=B96gLaULyStKYuR8wjFdTpFc6YIDC8EEvGh1eiMe_Ec,338
14
+ lalamo/model_import/__init__.py,sha256=Z8pS9rbKKx1QgUy7KZtHxiNWlZhII3mdovT9d37vAxg,168
15
+ lalamo/model_import/common.py,sha256=JXEjwabvWINU46drKSqasbAaNENjc6jcH7EmMqdOas8,8103
16
+ lalamo/model_import/huggingface_generation_config.py,sha256=mot6VQ6ezCtEhN6VjhnvaU-nR5P5T2BuBUgpFNnWJxU,1495
17
+ lalamo/model_import/huggingface_tokenizer_config.py,sha256=xvwdmio7b9nhn2H3uMBVligiYj58JaCFCvHY3-8dBvM,2502
18
+ lalamo/model_import/decoder_configs/__init__.py,sha256=vPmiVwYAU9NQvgAiK5UzFXLVdsez7rJGgX_rttKSO7c,503
19
+ lalamo/model_import/decoder_configs/common.py,sha256=L1oUilS4KwfnwKyWegoSZ3Tha5CqRjN6gjMhCie4Ymk,1849
20
+ lalamo/model_import/decoder_configs/executorch.py,sha256=peYxvT3sOpeGX3RGBReM_EYEiQyue-xJm61JKmHgLPY,5492
21
+ lalamo/model_import/decoder_configs/huggingface/__init__.py,sha256=Fd7-a_eMT1TsITmrTjbyafFXylqK9uHkGqBbmy8GnjY,518
22
+ lalamo/model_import/decoder_configs/huggingface/common.py,sha256=WgKWhp2AeYosLbx1nZGHM_Xt6unwd_7B5KA1z5tzSr4,1863
23
+ lalamo/model_import/decoder_configs/huggingface/gemma2.py,sha256=Y0cX4LuSzpCG5RUosJug-EZNSAAUJNwoRXETSbFEa-k,4347
24
+ lalamo/model_import/decoder_configs/huggingface/gemma3.py,sha256=cjJ9nTuabbJ7vMAbPYTmjmW2n8YL1hOh9IkKeCZmJ_4,6780
25
+ lalamo/model_import/decoder_configs/huggingface/gpt_oss.py,sha256=UlQT7AGISVbQMeL0Yln60KlEXB6wav7QH_eH-J_ogXU,6914
26
+ lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=KeMUW2dem4-hDL48E5VTdW5dRw4G-NHWa4OJZE76wJk,6720
27
+ lalamo/model_import/decoder_configs/huggingface/mistral.py,sha256=89a0Mj-Yq1zDkVP428UhoqCfm_ufYC0v8UpXGjb6-O8,4411
28
+ lalamo/model_import/decoder_configs/huggingface/qwen2.py,sha256=WdXscId2L94sHVEQceBo50XQp1rVq7nLYZUpKMNsVQM,5226
29
+ lalamo/model_import/decoder_configs/huggingface/qwen3.py,sha256=voMDmAU-UWZpl5G18T3egxhfDKNxY1pslnDnTPiPq0g,5292
30
+ lalamo/model_import/loaders/__init__.py,sha256=Olg7a79phusilNgEa7PTgx1JgQQJLgAVg18T8isp0mw,148
31
+ lalamo/model_import/loaders/common.py,sha256=kkugV-bMQlN1zvGHoj3uc7z0FbXKoMtXEBTvyu4KxK4,1844
32
+ lalamo/model_import/loaders/executorch.py,sha256=GClNwxCbQJ8HNv_NqHEzqjZRRblgzVOf3h79yHT6bWQ,8975
33
+ lalamo/model_import/loaders/huggingface.py,sha256=wd6wP36tLaN22WhEbi3utb8iha0yGbzuEi2lmrU8CUA,14358
34
+ lalamo/model_import/loaders/utils.py,sha256=eiX3WKFRrAfBY-dugodscNInl5o5w3KmVcgma4atpGY,2456
35
+ lalamo/model_import/model_specs/__init__.py,sha256=5S91pDmboMUmvKmHmTLtkcc_TPti5aU3ITlwC-QL0OA,1023
36
+ lalamo/model_import/model_specs/common.py,sha256=xthsCPuU2CM7QSpJrrg6-q6xSnBVmnup-1zCH0BBdB4,5375
37
+ lalamo/model_import/model_specs/deepseek.py,sha256=Umef93_ZBuq93yYsejIRNwj3udoln1gHfrv3SK5jyMo,417
38
+ lalamo/model_import/model_specs/gemma.py,sha256=YGWM-J7jBEL16c_LZ9F_6dgeZnf4sEPLDKngl_3FbrE,1289
39
+ lalamo/model_import/model_specs/gpt_oss.py,sha256=PLo0QGrXKdX61ReTRdyOaP_EH3Dmj5lp3fpJjZRwRVA,542
40
+ lalamo/model_import/model_specs/huggingface.py,sha256=TEkU8y95_hmUWyF-Q5hn0dE2SvXbApghAsQwhWRu4D0,431
41
+ lalamo/model_import/model_specs/llama.py,sha256=Ml-xvRGlXBT9NJhmEpwgNo6C84oBSMYgA1_PrCYGcAw,990
42
+ lalamo/model_import/model_specs/mistral.py,sha256=HAojorjOqsJn2DoMBzYRw8A70qCslhFEsE9AF5xumlg,1278
43
+ lalamo/model_import/model_specs/pleias.py,sha256=5sRpZGYwLdsav6bLiW-459y1Cs9iJKgKkBIuGsOxtsQ,368
44
+ lalamo/model_import/model_specs/polaris.py,sha256=Mw1-6bByjDmPIKlIUIV46CsmV5xUp_laI5Qquo5DmAQ,520
45
+ lalamo/model_import/model_specs/qwen.py,sha256=TA1ApqR0EUTmHheZYoqVMgGUXZrXEznABt6FYT2PoQo,5991
46
+ lalamo/model_import/model_specs/reka.py,sha256=dOUYbEMMvovQdzQuBO_DCsjGI39syhoKCvnxLkNEDCw,423
47
+ lalamo/modules/__init__.py,sha256=TM9diDXhY0OlaHvA0FgtQdFITixOes4VtHhOEE9eR08,2764
48
+ lalamo/modules/activations.py,sha256=NT_A2TJbQ2bDGMVaSIboT2CAaFGxDVPXV3UvlrzfMWo,815
49
+ lalamo/modules/attention.py,sha256=WveC0G1_iA6fZ0sojYG9BYRO6Xw51UIE4q7qc9gGvk0,16287
50
+ lalamo/modules/common.py,sha256=bRq32rwTFfmRrYq7VanlGMPIkqwH0wMtl83D8PmCJl0,2969
51
+ lalamo/modules/decoder.py,sha256=2TpGyakxIBiloCmO5C73YtXQIWvnFmCVyKaJWHL0P0E,12618
52
+ lalamo/modules/decoder_layer.py,sha256=IHNTZxvQUNObglnz-KDJEbiuOfBK4VUmhfo49IjbYUY,12858
53
+ lalamo/modules/embedding.py,sha256=szyu6HOpXPk77f8s9RyKgE5429B0yQ9Jxostsk3-HbY,11994
54
+ lalamo/modules/kv_cache.py,sha256=x67_NvDfXSnbqB2Xb4i6Stb5Nc1I9zBNpKJGVRUSmXs,8908
55
+ lalamo/modules/linear.py,sha256=gDvKJd1KlIjrll-i68AUIaTPU8O3igeErkYneixkf4E,30808
56
+ lalamo/modules/mlp.py,sha256=sLmQr9eBlZ5YGo_Fa8fqrE9ryC432YPFBXP7csP9Y-k,17645
57
+ lalamo/modules/normalization.py,sha256=gdmv4tWLk8CwvIfoS0rY3QTVRXhjHQNNF6YGYjyez1c,2790
58
+ lalamo/modules/rope.py,sha256=HbIv5ESLGNAK47HAtqu1whLLUa20Sb28U8kEs6KclZM,10127
59
+ lalamo/modules/torch_interop.py,sha256=-mujd1zI4ec2w92Hd50RtDa0K3jl6ZSnPxc5r3Fp9nU,916
60
+ lalamo/modules/utils.py,sha256=t_TayWT6g5LtYKhJaod-u_COWaI_VbNd3eYek9Nj0lc,441
61
+ lalamo/speculator/__init__.py,sha256=pLGwZiXZZeH2SaMsIGiqHzXs5AJtbxWweXaKc4fPFHc,262
62
+ lalamo/speculator/common.py,sha256=PudF_gkpe5_nQ-57sAC-foE1xCy_H2Axh5KwRoA86lo,587
63
+ lalamo/speculator/inference.py,sha256=4E7_8jwLwg1bvUUH59cLxjD73-IJ9xv58yCwYMFa1r4,3101
64
+ lalamo/speculator/ngram.py,sha256=3O9akE2IGN_7Mp6l_zwhUpdNvjZB0rAlQm51EuX_-rY,5869
65
+ lalamo/speculator/utils.py,sha256=K5qDGEdWYD47C1xLJQ3bamIHAG3-VEuANf-821VeanE,1688
66
+ lalamo-0.4.0.dist-info/licenses/LICENSE,sha256=diHRfjSEJHD1nnEeMIfMRCjR3UERf8bT3eseD6b1ayA,1072
67
+ lalamo-0.4.0.dist-info/METADATA,sha256=gfzekotZfrrfcex8qJx8X3XZh9SM0DtsIKrxRnUhegw,3041
68
+ lalamo-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
+ lalamo-0.4.0.dist-info/entry_points.txt,sha256=qli7qTfnBk5WP10rOGXXEckHMtt-atJMDWd8jN89Uks,43
70
+ lalamo-0.4.0.dist-info/top_level.txt,sha256=VHvWL5JN5XRG36NsN_MieJ7EwRihEOrEjyDaTdFJ-aI,7
71
+ lalamo-0.4.0.dist-info/RECORD,,
@@ -1,59 +0,0 @@
1
- lalamo/__init__.py,sha256=GCS_DTVF-_KdksObU3ceoJbjaxvS4w7fF62TI1NyBh0,217
2
- lalamo/common.py,sha256=5NUFD26yQgOnEEk3LaQnce8n-VwJxILkEpFesHZhtQU,3820
3
- lalamo/language_model.py,sha256=wZGucNCPP9ReHn6fYO5TlFHTXun1QIMwaxniq9z7K5Q,10639
4
- lalamo/main.py,sha256=Ta3ZW-Xw3uNPEVvb2YlBOH9UhhLDV7PZjRCg6giu0ao,12275
5
- lalamo/message_processor.py,sha256=hxnc2ELd31VfP41sCU-WobCoW06wqZllEmIvCcyAvyo,5445
6
- lalamo/quantization.py,sha256=8o6ryIZLzzDYQuvBTboPfaVVdfijAKGpTxOcg3GKVD8,2752
7
- lalamo/registry_abc.py,sha256=ENjXiD_wEH100fNjG-W5Em1L_EQ0Lf0pdRhRGvf3qZk,2197
8
- lalamo/sampling.py,sha256=g_dNiJyZrRqoQIiLid4cr6nRT9N5tSz3GtHr8Bt4n-E,3404
9
- lalamo/utils.py,sha256=Jm54CKFc6lJSggXVwF_lgiUuKEtQ55c58U-ALU6oxuk,2922
10
- lalamo/model_import/__init__.py,sha256=Z8pS9rbKKx1QgUy7KZtHxiNWlZhII3mdovT9d37vAxg,168
11
- lalamo/model_import/common.py,sha256=vVCKRl9-zifMiut4lMjB5eVXOHQxw8yfD5Q7rDr153c,7649
12
- lalamo/model_import/huggingface_generation_config.py,sha256=mot6VQ6ezCtEhN6VjhnvaU-nR5P5T2BuBUgpFNnWJxU,1495
13
- lalamo/model_import/huggingface_tokenizer_config.py,sha256=kTdgH2lle1m7zRtClO3BhgonvfZsAjeDGjPMAd1EU8E,2607
14
- lalamo/model_import/decoder_configs/__init__.py,sha256=Ru_lI1IjahyXZBSB5J0eWwVM6gn2ilRZW65m2sU2hMw,460
15
- lalamo/model_import/decoder_configs/common.py,sha256=L1oUilS4KwfnwKyWegoSZ3Tha5CqRjN6gjMhCie4Ymk,1849
16
- lalamo/model_import/decoder_configs/executorch.py,sha256=Kx_T-B5jumfWf9vj20We4FF0GkSkTmIYeWOss88-qYA,5266
17
- lalamo/model_import/decoder_configs/huggingface/__init__.py,sha256=kWHUnZDwGQCbA3Ucm-FEDr8zZ2yZ3yviPVftlNgMk30,460
18
- lalamo/model_import/decoder_configs/huggingface/common.py,sha256=0Q4Q4uklQkWhVBYFS_hCWmdZuEpw8Ch7jhycgbA2_NY,1899
19
- lalamo/model_import/decoder_configs/huggingface/gemma2.py,sha256=oIefI_ad-7DtzXmisFczkKPuOQ-KkzMkKWTk9likaMs,4101
20
- lalamo/model_import/decoder_configs/huggingface/gemma3.py,sha256=AUwkxMI90uaFhAthc_FtjnwJcLuHGSzF19BXKhTaWtM,6470
21
- lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=8CvuCy4V84plGSOdmzIQmAwmxpCjKTo5wViCud5_oLo,5480
22
- lalamo/model_import/decoder_configs/huggingface/mistral.py,sha256=cxT8I8RFhYE47-gvaSIfaVOau5fQRjFj2XLqLUMZw6g,4166
23
- lalamo/model_import/decoder_configs/huggingface/qwen2.py,sha256=-sDwpEYe1wwRx1NfXxAhySz061LoV4_WZ2eWbkDo_7k,4981
24
- lalamo/model_import/decoder_configs/huggingface/qwen3.py,sha256=seSMPJyFCj4z-lf_vvrZj407oytieslxmKdlJSrXcqY,5047
25
- lalamo/model_import/loaders/__init__.py,sha256=Olg7a79phusilNgEa7PTgx1JgQQJLgAVg18T8isp0mw,148
26
- lalamo/model_import/loaders/common.py,sha256=kkugV-bMQlN1zvGHoj3uc7z0FbXKoMtXEBTvyu4KxK4,1844
27
- lalamo/model_import/loaders/executorch.py,sha256=nSvpylK8QL3nBk78P3FabLoyA87E3kv5CCpMfvuZe6Q,8886
28
- lalamo/model_import/loaders/huggingface.py,sha256=C0spTy9-DhQ8U7bDAyGA_i9dKgUoLR2Dv58t20KUvZs,10590
29
- lalamo/model_import/model_specs/__init__.py,sha256=UsKmBzNzJHVQquZxWdjGgiznnORVklE-z9Nr9Ertfqc,964
30
- lalamo/model_import/model_specs/common.py,sha256=MblETxPSVv2-KP7EBlBOEsvsZ8d1_PZfIXBjrpc1G-k,5208
31
- lalamo/model_import/model_specs/deepseek.py,sha256=Umef93_ZBuq93yYsejIRNwj3udoln1gHfrv3SK5jyMo,417
32
- lalamo/model_import/model_specs/gemma.py,sha256=YGWM-J7jBEL16c_LZ9F_6dgeZnf4sEPLDKngl_3FbrE,1289
33
- lalamo/model_import/model_specs/huggingface.py,sha256=TEkU8y95_hmUWyF-Q5hn0dE2SvXbApghAsQwhWRu4D0,431
34
- lalamo/model_import/model_specs/llama.py,sha256=Ml-xvRGlXBT9NJhmEpwgNo6C84oBSMYgA1_PrCYGcAw,990
35
- lalamo/model_import/model_specs/mistral.py,sha256=HAojorjOqsJn2DoMBzYRw8A70qCslhFEsE9AF5xumlg,1278
36
- lalamo/model_import/model_specs/pleias.py,sha256=5sRpZGYwLdsav6bLiW-459y1Cs9iJKgKkBIuGsOxtsQ,368
37
- lalamo/model_import/model_specs/polaris.py,sha256=Mw1-6bByjDmPIKlIUIV46CsmV5xUp_laI5Qquo5DmAQ,520
38
- lalamo/model_import/model_specs/qwen.py,sha256=TA1ApqR0EUTmHheZYoqVMgGUXZrXEznABt6FYT2PoQo,5991
39
- lalamo/model_import/model_specs/reka.py,sha256=dOUYbEMMvovQdzQuBO_DCsjGI39syhoKCvnxLkNEDCw,423
40
- lalamo/modules/__init__.py,sha256=ipgm-hRI9uLKluW1scsUXH9z2WeZf6bWSLgIFT4roMQ,2198
41
- lalamo/modules/activations.py,sha256=ZgUd3E4VTAVgCZaj9HhYkXiJuiKrWBzK6so5JGnucOc,532
42
- lalamo/modules/attention.py,sha256=Rukpx4lSTYHUqsrWoRc8hIruVa5l4hEEt0v-z6ieKaM,14815
43
- lalamo/modules/common.py,sha256=Fj0CWhw9ymCw5ulaPOc6ShYtqiB8UISXRWt-Cpftgdo,4512
44
- lalamo/modules/decoder.py,sha256=V1z4jgxxPOU8W-Xow2nxw35tYnaQxScfEFB8ouMcSWE,12197
45
- lalamo/modules/decoder_layer.py,sha256=rF8VOkNYmnWQxPbyNiVxJZsFaoygsZQtGo_HrfXwt40,12517
46
- lalamo/modules/embedding.py,sha256=2VopGTsZs6DhmDzBeB2XCrio_BFjty9gtKa99NDKfX4,12567
47
- lalamo/modules/kv_cache.py,sha256=yzCx6jKuzShu7-UV5ffpM0q21syA1l5YKzSOUHLALYM,7216
48
- lalamo/modules/linear.py,sha256=AKvNuVmUVOb2-Y4_j0pL1dnC0G0O8UzfYtDvBdTtcOE,24498
49
- lalamo/modules/mlp.py,sha256=xCxeOlKJLGTnbiGOBq2RkYOeKEqeIb62fdTQM63aM18,3769
50
- lalamo/modules/normalization.py,sha256=YecmA0hnVVYd473FzVgxunHYA377NWMKZRfuQNB8DQo,2942
51
- lalamo/modules/rope.py,sha256=IlL4Jz4fw6eTKXXMls1BXBAgbuB3GnP0aVB9GrF1nA8,10012
52
- lalamo/modules/torch_interop.py,sha256=-mujd1zI4ec2w92Hd50RtDa0K3jl6ZSnPxc5r3Fp9nU,916
53
- lalamo/modules/utils.py,sha256=5QTdi34kEI5jix7TfTdB0mOYZbzZUul_T1y8eWCA6lQ,262
54
- lalamo-0.3.3.dist-info/licenses/LICENSE,sha256=diHRfjSEJHD1nnEeMIfMRCjR3UERf8bT3eseD6b1ayA,1072
55
- lalamo-0.3.3.dist-info/METADATA,sha256=sngvnwPijSDHmBBtGzZ4-KHVg0UB99oE7fK6aPVPO3I,2854
56
- lalamo-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
57
- lalamo-0.3.3.dist-info/entry_points.txt,sha256=qli7qTfnBk5WP10rOGXXEckHMtt-atJMDWd8jN89Uks,43
58
- lalamo-0.3.3.dist-info/top_level.txt,sha256=VHvWL5JN5XRG36NsN_MieJ7EwRihEOrEjyDaTdFJ-aI,7
59
- lalamo-0.3.3.dist-info/RECORD,,
File without changes