lalamo 0.5.15__py3-none-any.whl → 0.5.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lalamo/__init__.py +1 -1
- lalamo/model_import/decoder_configs/huggingface/llama.py +32 -21
- {lalamo-0.5.15.dist-info → lalamo-0.5.16.dist-info}/METADATA +1 -1
- {lalamo-0.5.15.dist-info → lalamo-0.5.16.dist-info}/RECORD +8 -8
- {lalamo-0.5.15.dist-info → lalamo-0.5.16.dist-info}/WHEEL +0 -0
- {lalamo-0.5.15.dist-info → lalamo-0.5.16.dist-info}/entry_points.txt +0 -0
- {lalamo-0.5.15.dist-info → lalamo-0.5.16.dist-info}/licenses/LICENSE +0 -0
- {lalamo-0.5.15.dist-info → lalamo-0.5.16.dist-info}/top_level.txt +0 -0
lalamo/__init__.py
CHANGED
|
@@ -13,6 +13,7 @@ from lalamo.modules import (
|
|
|
13
13
|
LlamaRoPEConfig,
|
|
14
14
|
MLXQuantizedLinearConfig,
|
|
15
15
|
MLXQuantizedTiedEmbeddingConfig,
|
|
16
|
+
MLXQuantizedUntiedEmbeddingConfig,
|
|
16
17
|
NormalizationConfig,
|
|
17
18
|
SiLU,
|
|
18
19
|
TiedEmbeddingConfig,
|
|
@@ -89,27 +90,37 @@ class HFLlamaConfig(HuggingFaceLMConfig):
|
|
|
89
90
|
) -> DecoderConfig:
|
|
90
91
|
quantization = self.quantization or self.quantization_config
|
|
91
92
|
if isinstance(quantization, MLXQuantizationConfig):
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
93
|
+
if self.tie_word_embeddings:
|
|
94
|
+
embedding_config = MLXQuantizedTiedEmbeddingConfig(
|
|
95
|
+
input_scale=None,
|
|
96
|
+
logit_soft_cap=None,
|
|
97
|
+
group_size=quantization.group_size,
|
|
98
|
+
embedding_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
|
|
99
|
+
activation_quantization_mode=None,
|
|
100
|
+
activation_precision=activation_precision,
|
|
101
|
+
)
|
|
102
|
+
else:
|
|
103
|
+
embedding_config = MLXQuantizedUntiedEmbeddingConfig(
|
|
104
|
+
input_scale=None,
|
|
105
|
+
logit_soft_cap=None,
|
|
106
|
+
group_size=quantization.group_size,
|
|
107
|
+
embedding_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
|
|
108
|
+
activation_quantization_mode=None,
|
|
109
|
+
activation_precision=activation_precision,
|
|
110
|
+
)
|
|
111
|
+
else: # noqa: PLR5501
|
|
112
|
+
if self.tie_word_embeddings:
|
|
113
|
+
embedding_config = TiedEmbeddingConfig(
|
|
114
|
+
input_scale=None,
|
|
115
|
+
logit_soft_cap=None,
|
|
116
|
+
precision=activation_precision,
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
embedding_config = UntiedEmbeddingConfig(
|
|
120
|
+
input_scale=None,
|
|
121
|
+
logit_soft_cap=None,
|
|
122
|
+
precision=activation_precision,
|
|
123
|
+
)
|
|
113
124
|
if self.rope_scaling is None:
|
|
114
125
|
rope_config = UnscaledRoPEConfig(
|
|
115
126
|
precision=activation_precision,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
lalamo/__init__.py,sha256=
|
|
1
|
+
lalamo/__init__.py,sha256=FjfGsBVSl14mNsDoFJEwXMRUq1-Kg_lessRzlJNG3KM,815
|
|
2
2
|
lalamo/common.py,sha256=5NUFD26yQgOnEEk3LaQnce8n-VwJxILkEpFesHZhtQU,3820
|
|
3
3
|
lalamo/main.py,sha256=GgUT7lT48-XQuAEH7qzsDKG8Lx9iBf-sYBIRhZL9q7E,23978
|
|
4
4
|
lalamo/message_processor.py,sha256=bSUAQg7CemLTnBV4LtPxJBicAalruDCA-JXjkTYPZ8U,5797
|
|
@@ -23,7 +23,7 @@ lalamo/model_import/decoder_configs/huggingface/gemma2.py,sha256=g8LH_GlSNyL04WW
|
|
|
23
23
|
lalamo/model_import/decoder_configs/huggingface/gemma3.py,sha256=UXiEyNqlD0Czc5Gj3n4hNqNDp9Ml5YzH1XZ6BXj0mgU,10223
|
|
24
24
|
lalamo/model_import/decoder_configs/huggingface/gpt_oss.py,sha256=MBCoPbuWyzbJiBRtHOtpaPHJjQ1UVCAYcVrfIejTnlQ,7446
|
|
25
25
|
lalamo/model_import/decoder_configs/huggingface/lfm2.py,sha256=vrBMxtiKEg0eHNDL_bWM9odlrsab7jlMXEY8vjEB7-c,7595
|
|
26
|
-
lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=
|
|
26
|
+
lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=pGuBQTY6qpx6CriWwdsLpuTSRS7ECoTP1kt5pSKRlNQ,8549
|
|
27
27
|
lalamo/model_import/decoder_configs/huggingface/llamba.py,sha256=ANB-vQK8U-zVFubZSTDXXt2S70T5SVOGzf7eOVvPzIQ,5773
|
|
28
28
|
lalamo/model_import/decoder_configs/huggingface/mistral.py,sha256=MDGC0ivzJuUpOC11n8vFdcVzqccUyaRw_hkL74mVlAg,4599
|
|
29
29
|
lalamo/model_import/decoder_configs/huggingface/modern_bert.py,sha256=A8nNIMhPVumvPWIFR3RexRc6XkFyUd_3mmNpmvyPEGE,8816
|
|
@@ -85,9 +85,9 @@ lalamo/speculator/estimator.py,sha256=4D8dPZCWsrpORb7y8pQ6VsiIg1Cblvvxe6gXCoYtcD
|
|
|
85
85
|
lalamo/speculator/inference.py,sha256=5GntUgj0HQLeLn3HIHnVX8EEO0EBzmKeP5-_U7kdFAM,3670
|
|
86
86
|
lalamo/speculator/ngram.py,sha256=95mdfAWhx4d5XOnOwhyhElnvcy6nlUjYhcbJzqDs414,5875
|
|
87
87
|
lalamo/speculator/utils.py,sha256=0wZoMMIzzk0Q-3zq5H5f-JBplePNHxywndkrNtOJOyo,1697
|
|
88
|
-
lalamo-0.5.
|
|
89
|
-
lalamo-0.5.
|
|
90
|
-
lalamo-0.5.
|
|
91
|
-
lalamo-0.5.
|
|
92
|
-
lalamo-0.5.
|
|
93
|
-
lalamo-0.5.
|
|
88
|
+
lalamo-0.5.16.dist-info/licenses/LICENSE,sha256=diHRfjSEJHD1nnEeMIfMRCjR3UERf8bT3eseD6b1ayA,1072
|
|
89
|
+
lalamo-0.5.16.dist-info/METADATA,sha256=dcs0vT9RULTxt4cxJJmfjP-4UJi7ZkrifXAaSMAgKeU,3147
|
|
90
|
+
lalamo-0.5.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
91
|
+
lalamo-0.5.16.dist-info/entry_points.txt,sha256=qli7qTfnBk5WP10rOGXXEckHMtt-atJMDWd8jN89Uks,43
|
|
92
|
+
lalamo-0.5.16.dist-info/top_level.txt,sha256=VHvWL5JN5XRG36NsN_MieJ7EwRihEOrEjyDaTdFJ-aI,7
|
|
93
|
+
lalamo-0.5.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|